blob: dda1b5e9ea52615c5d6f41086b024dea86d4f3e3 [file] [log] [blame]
// Copyright 2024 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// TODO(https://github.com/rust-lang/rust/issues/39371): remove
#![allow(non_upper_case_globals)]
use crate::bpf::syscalls::BpfTypeFormat;
use crate::bpf::{BpfMapHandle, ProgramHandle};
use crate::mm::memory::MemoryObject;
use crate::mm::{PAGE_SIZE, ProtectionFlags};
use crate::security::{self, PermissionFlags};
use crate::task::{
CurrentTask, EventHandler, SignalHandler, SignalHandlerInner, Task, WaitCanceler, Waiter,
};
use crate::vfs::buffers::{InputBuffer, OutputBuffer};
use crate::vfs::{
CacheMode, CheckAccessReason, FdNumber, FileObject, FileOps, FileSystem, FileSystemHandle,
FileSystemOps, FileSystemOptions, FsNode, FsNodeHandle, FsNodeInfo, FsNodeOps, FsStr,
MemoryDirectoryFile, MemoryXattrStorage, NamespaceNode, XattrStorage as _,
fileops_impl_nonseekable, fileops_impl_noop_sync, fs_node_impl_not_dir,
fs_node_impl_xattr_delegate,
};
use bstr::BStr;
use ebpf::{MapFlags, MapSchema};
use ebpf_api::{RINGBUF_SIGNAL, compute_map_storage_size};
use starnix_logging::track_stub;
use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
use starnix_types::vfs::default_statfs;
use starnix_uapi::auth::FsCred;
use starnix_uapi::device_type::DeviceType;
use starnix_uapi::errors::Errno;
use starnix_uapi::file_mode::{FileMode, mode};
use starnix_uapi::math::round_up_to_increment;
use starnix_uapi::open_flags::OpenFlags;
use starnix_uapi::vfs::FdEvents;
use starnix_uapi::{
BPF_FS_MAGIC, bpf_map_type_BPF_MAP_TYPE_ARRAY, bpf_map_type_BPF_MAP_TYPE_RINGBUF, errno, error,
statfs,
};
use std::sync::Arc;
/// A reference to a BPF object that can be stored in either an FD or an entry in the /sys/fs/bpf
/// filesystem.
#[derive(Debug, Clone)]
pub enum BpfHandle {
Program(ProgramHandle),
// Stub used to fake loading of programs of unknown types.
ProgramStub(u32),
Map(BpfMapHandle),
BpfTypeFormat(Arc<BpfTypeFormat>),
}
impl BpfHandle {
pub fn as_map(&self) -> Result<&BpfMapHandle, Errno> {
match self {
Self::Map(map) => Ok(map),
_ => error!(EINVAL),
}
}
pub fn as_program(&self) -> Result<&ProgramHandle, Errno> {
match self {
Self::Program(program) => Ok(program),
_ => error!(EINVAL),
}
}
pub fn into_program(self) -> Result<ProgramHandle, Errno> {
match self {
Self::Program(program) => Ok(program),
_ => error!(EINVAL),
}
}
// Returns VMO and schema if this handle references a map.
fn get_map_vmo(&self) -> Result<(&Arc<zx::Vmo>, MapSchema), Errno> {
match self {
Self::Map(map) => Ok((map.vmo(), map.schema)),
_ => error!(ENODEV),
}
}
pub fn type_name(&self) -> &'static str {
match self {
Self::Map(_) => "bpf-map",
Self::Program(_) | Self::ProgramStub(_) => "bpf-prog",
Self::BpfTypeFormat(_) => "bpf-type",
}
}
/// Performs security-related checks when opening a BPF map. If
/// `permission_flags` is `None`, then they are inferred from the map's
/// schema. `permission_flags` is ignored for programs.
pub(super) fn security_check_open_fd(
&self,
current_task: &CurrentTask,
permission_flags: Option<PermissionFlags>,
) -> Result<(), Errno> {
match self {
Self::Map(bpf_map) => security::check_bpf_map_access(
current_task,
&bpf_map,
permission_flags.unwrap_or_else(|| bpf_map.schema.flags.into()),
),
Self::Program(program) => security::check_bpf_prog_access(current_task, &program),
_ => Ok(()),
}
}
}
impl From<ProgramHandle> for BpfHandle {
fn from(program: ProgramHandle) -> Self {
Self::Program(program)
}
}
impl From<BpfMapHandle> for BpfHandle {
fn from(map: BpfMapHandle) -> Self {
Self::Map(map)
}
}
impl From<BpfTypeFormat> for BpfHandle {
fn from(format: BpfTypeFormat) -> Self {
Self::BpfTypeFormat(Arc::new(format))
}
}
impl FileOps for BpfHandle {
fileops_impl_nonseekable!();
fileops_impl_noop_sync!();
fn read(
&self,
_locked: &mut Locked<FileOpsCore>,
_file: &FileObject,
_current_task: &crate::task::CurrentTask,
_offset: usize,
_data: &mut dyn OutputBuffer,
) -> Result<usize, Errno> {
track_stub!(TODO("https://fxbug.dev/322874229"), "bpf handle read");
error!(EINVAL)
}
fn write(
&self,
_locked: &mut Locked<FileOpsCore>,
_file: &FileObject,
_current_task: &crate::task::CurrentTask,
_offset: usize,
_data: &mut dyn InputBuffer,
) -> Result<usize, Errno> {
track_stub!(TODO("https://fxbug.dev/322873841"), "bpf handle write");
error!(EINVAL)
}
fn get_memory(
&self,
locked: &mut Locked<FileOpsCore>,
_file: &FileObject,
_current_task: &CurrentTask,
length: Option<usize>,
prot: ProtectionFlags,
) -> Result<Arc<MemoryObject>, Errno> {
let (vmo, schema) = self.get_map_vmo()?;
// Because of the specific condition needed to map this object, the size must be known.
let length = length.ok_or_else(|| errno!(EINVAL))?;
// This cannot be mapped executable.
if prot.contains(ProtectionFlags::EXEC) {
return error!(EPERM);
}
match schema.map_type {
bpf_map_type_BPF_MAP_TYPE_RINGBUF => {
let page_size = *PAGE_SIZE as usize;
// Starting from the second page, this cannot be mapped writable.
if length > page_size {
if prot.contains(ProtectionFlags::WRITE) {
return error!(EPERM);
}
// This cannot be mapped outside of the 2 control pages and the 2 data sections.
if length > 2 * page_size + 2 * schema.max_entries as usize {
return error!(EINVAL);
}
}
self.as_map()?.get_memory(locked, || {
// The first page of the ring buffer VMO is not visible to
// user-space processes. Return a VMO slice that doesn't
// include the first page.
let clone_size = 2 * page_size + schema.max_entries as usize;
let vmo_dup = vmo
.create_child(
zx::VmoChildOptions::SLICE,
page_size as u64,
clone_size as u64,
)
.map_err(|_| errno!(EIO))?
.into();
Ok(Arc::new(MemoryObject::RingBuf(vmo_dup)))
})
}
bpf_map_type_BPF_MAP_TYPE_ARRAY => {
if !schema.flags.contains(MapFlags::Mmapable) {
return error!(EPERM);
}
let array_size = round_up_to_increment(
compute_map_storage_size(&schema).map_err(|_| errno!(EINVAL))?,
*PAGE_SIZE as usize,
)?;
if length > array_size {
return error!(EINVAL);
}
self.as_map()?.get_memory(locked, || {
let vmo_dup = vmo
.as_handle_ref()
.duplicate(zx::Rights::SAME_RIGHTS)
.map_err(|_| errno!(EIO))?
.into();
Ok(Arc::new(MemoryObject::Vmo(vmo_dup)))
})
}
// Other maps cannot be mmap'ed.
_ => error!(ENODEV),
}
}
fn wait_async(
&self,
_locked: &mut Locked<FileOpsCore>,
_file: &FileObject,
_current_task: &CurrentTask,
waiter: &Waiter,
events: FdEvents,
handler: EventHandler,
) -> Option<WaitCanceler> {
let (vmo, schema) = self.get_map_vmo().ok()?;
// Only ringbuffers can be polled for POLLIN.
if schema.map_type != bpf_map_type_BPF_MAP_TYPE_RINGBUF
|| !events.contains(FdEvents::POLLIN)
{
return Some(WaitCanceler::new_noop());
}
let handler = SignalHandler {
inner: SignalHandlerInner::ZxHandle(|signals| {
if signals.contains(RINGBUF_SIGNAL) { FdEvents::POLLIN } else { FdEvents::empty() }
}),
event_handler: handler,
err_code: None,
};
// Reset the signal before waiting. The case when the ring buffer already has some data
// is handled by the caller: it should call `query_events` after starting the waiter.
vmo.as_handle_ref()
.signal(RINGBUF_SIGNAL, zx::Signals::empty())
.expect("Failed to set signal or a ring buffer VMO");
let canceler = waiter
.wake_on_zircon_signals(&vmo.as_handle_ref(), RINGBUF_SIGNAL, handler)
.expect("Failed to wait for signals on ringbuf VMO");
Some(WaitCanceler::new_port(canceler))
}
fn query_events(
&self,
_locked: &mut Locked<FileOpsCore>,
_file: &FileObject,
_current_task: &CurrentTask,
) -> Result<FdEvents, Errno> {
match self {
Self::Map(map) => {
let events = match map.can_read() {
Some(true) => FdEvents::POLLIN,
Some(false) => FdEvents::empty(),
None => FdEvents::POLLERR,
};
Ok(events)
}
_ => error!(EPERM),
}
}
}
pub fn get_bpf_object(task: &Task, fd: FdNumber) -> Result<BpfHandle, Errno> {
Ok((*task.files.get(fd)?.downcast_file::<BpfHandle>().ok_or_else(|| errno!(EBADF))?).clone())
}
pub struct BpfFs;
impl BpfFs {
pub fn new_fs(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
options: FileSystemOptions,
) -> Result<FileSystemHandle, Errno> {
let kernel = current_task.kernel();
let fs = FileSystem::new(locked, kernel, CacheMode::Permanent, BpfFs, options)?;
let root_ino = fs.allocate_ino();
fs.create_root_with_info(
root_ino,
BpfFsDir::new(),
FsNodeInfo::new(mode!(IFDIR, 0o777) | FileMode::ISVTX, FsCred::root()),
);
Ok(fs)
}
}
impl FileSystemOps for BpfFs {
fn statfs(
&self,
_locked: &mut Locked<FileOpsCore>,
_fs: &FileSystem,
_current_task: &CurrentTask,
) -> Result<statfs, Errno> {
Ok(default_statfs(BPF_FS_MAGIC))
}
fn name(&self) -> &'static FsStr {
"bpf".into()
}
fn rename(
&self,
_locked: &mut Locked<FileOpsCore>,
_fs: &FileSystem,
_current_task: &CurrentTask,
_old_parent: &FsNodeHandle,
_old_name: &FsStr,
_new_parent: &FsNodeHandle,
_new_name: &FsStr,
_renamed: &FsNodeHandle,
_replaced: Option<&FsNodeHandle>,
) -> Result<(), Errno> {
Ok(())
}
}
pub struct BpfFsDir {
xattrs: MemoryXattrStorage,
}
impl BpfFsDir {
fn new() -> Self {
Self { xattrs: MemoryXattrStorage::default() }
}
pub fn register_pin<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
node: &NamespaceNode,
name: &FsStr,
object: BpfHandle,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
node.entry.create_entry(
locked,
current_task,
&node.mount,
name,
|_locked, dir, _mount, _name| {
Ok(dir.fs().create_node_and_allocate_node_id(
BpfFsObject::new(object),
FsNodeInfo::new(mode!(IFREG, 0o600), current_task.current_fscred()),
))
},
)?;
Ok(())
}
}
impl FsNodeOps for BpfFsDir {
fs_node_impl_xattr_delegate!(self, self.xattrs);
fn create_file_ops(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_flags: OpenFlags,
) -> Result<Box<dyn FileOps>, Errno> {
Ok(Box::new(MemoryDirectoryFile::new()))
}
fn mkdir(
&self,
_locked: &mut Locked<FileOpsCore>,
node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
mode: FileMode,
owner: FsCred,
) -> Result<FsNodeHandle, Errno> {
Ok(node.fs().create_node_and_allocate_node_id(
BpfFsDir::new(),
FsNodeInfo::new(mode | FileMode::ISVTX, owner),
))
}
fn mknod(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_mode: FileMode,
_dev: DeviceType,
_owner: FsCred,
) -> Result<FsNodeHandle, Errno> {
error!(EPERM)
}
fn create_symlink(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_target: &FsStr,
_owner: FsCred,
) -> Result<FsNodeHandle, Errno> {
error!(EPERM)
}
fn link(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_child: &FsNodeHandle,
) -> Result<(), Errno> {
Ok(())
}
fn unlink(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_child: &FsNodeHandle,
) -> Result<(), Errno> {
Ok(())
}
}
pub struct BpfFsObject {
pub handle: BpfHandle,
xattrs: MemoryXattrStorage,
}
impl BpfFsObject {
fn new(handle: BpfHandle) -> Self {
Self { handle, xattrs: MemoryXattrStorage::default() }
}
}
impl FsNodeOps for BpfFsObject {
fs_node_impl_not_dir!();
fs_node_impl_xattr_delegate!(self, self.xattrs);
fn create_file_ops(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_flags: OpenFlags,
) -> Result<Box<dyn FileOps>, Errno> {
error!(EIO)
}
}
/// Resolves a pinned BPF object from a path, returning the handle and the node.
/// Performs DAC and MAC checks using the specified `open_flags `. Also updates
/// atime unless `NOATIME` flag is set.
pub fn resolve_pinned_bpf_object(
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
path: &BStr,
open_flags: OpenFlags,
) -> Result<(BpfHandle, NamespaceNode), Errno> {
let node = current_task.lookup_path_from_root(locked, path.as_ref())?;
let permission_flags = PermissionFlags::from(open_flags);
node.check_access(locked, current_task, permission_flags, CheckAccessReason::Access)?;
let object = node.entry.node.downcast_ops::<BpfFsObject>().ok_or_else(|| errno!(EPERM))?;
object.handle.security_check_open_fd(current_task, Some(permission_flags))?;
if !open_flags.contains(OpenFlags::NOATIME) {
node.update_atime();
}
Ok((object.handle.clone(), node))
}