| // Copyright 2024 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| // TODO(https://github.com/rust-lang/rust/issues/39371): remove |
| #![allow(non_upper_case_globals)] |
| |
| use crate::bpf::syscalls::BpfTypeFormat; |
| use crate::bpf::{BpfMapHandle, ProgramHandle}; |
| use crate::mm::memory::MemoryObject; |
| use crate::mm::{PAGE_SIZE, ProtectionFlags}; |
| use crate::security::{self, PermissionFlags}; |
| use crate::task::{ |
| CurrentTask, EventHandler, SignalHandler, SignalHandlerInner, Task, WaitCanceler, Waiter, |
| }; |
| use crate::vfs::buffers::{InputBuffer, OutputBuffer}; |
| use crate::vfs::{ |
| CacheMode, CheckAccessReason, FdNumber, FileObject, FileOps, FileSystem, FileSystemHandle, |
| FileSystemOps, FileSystemOptions, FsNode, FsNodeHandle, FsNodeInfo, FsNodeOps, FsStr, |
| MemoryDirectoryFile, MemoryXattrStorage, NamespaceNode, XattrStorage as _, |
| fileops_impl_nonseekable, fileops_impl_noop_sync, fs_node_impl_not_dir, |
| fs_node_impl_xattr_delegate, |
| }; |
| use bstr::BStr; |
| use ebpf::{MapFlags, MapSchema}; |
| use ebpf_api::{RINGBUF_SIGNAL, compute_map_storage_size}; |
| use starnix_logging::track_stub; |
| use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked}; |
| use starnix_types::vfs::default_statfs; |
| use starnix_uapi::auth::FsCred; |
| use starnix_uapi::device_type::DeviceType; |
| use starnix_uapi::errors::Errno; |
| use starnix_uapi::file_mode::{FileMode, mode}; |
| use starnix_uapi::math::round_up_to_increment; |
| use starnix_uapi::open_flags::OpenFlags; |
| use starnix_uapi::vfs::FdEvents; |
| use starnix_uapi::{ |
| BPF_FS_MAGIC, bpf_map_type_BPF_MAP_TYPE_ARRAY, bpf_map_type_BPF_MAP_TYPE_RINGBUF, errno, error, |
| statfs, |
| }; |
| use std::sync::Arc; |
| |
| /// A reference to a BPF object that can be stored in either an FD or an entry in the /sys/fs/bpf |
| /// filesystem. |
| #[derive(Debug, Clone)] |
| pub enum BpfHandle { |
| Program(ProgramHandle), |
| |
| // Stub used to fake loading of programs of unknown types. |
| ProgramStub(u32), |
| |
| Map(BpfMapHandle), |
| BpfTypeFormat(Arc<BpfTypeFormat>), |
| } |
| |
| impl BpfHandle { |
| pub fn as_map(&self) -> Result<&BpfMapHandle, Errno> { |
| match self { |
| Self::Map(map) => Ok(map), |
| _ => error!(EINVAL), |
| } |
| } |
| pub fn as_program(&self) -> Result<&ProgramHandle, Errno> { |
| match self { |
| Self::Program(program) => Ok(program), |
| _ => error!(EINVAL), |
| } |
| } |
| |
| pub fn into_program(self) -> Result<ProgramHandle, Errno> { |
| match self { |
| Self::Program(program) => Ok(program), |
| _ => error!(EINVAL), |
| } |
| } |
| |
| // Returns VMO and schema if this handle references a map. |
| fn get_map_vmo(&self) -> Result<(&Arc<zx::Vmo>, MapSchema), Errno> { |
| match self { |
| Self::Map(map) => Ok((map.vmo(), map.schema)), |
| _ => error!(ENODEV), |
| } |
| } |
| |
| pub fn type_name(&self) -> &'static str { |
| match self { |
| Self::Map(_) => "bpf-map", |
| Self::Program(_) | Self::ProgramStub(_) => "bpf-prog", |
| Self::BpfTypeFormat(_) => "bpf-type", |
| } |
| } |
| |
| /// Performs security-related checks when opening a BPF map. If |
| /// `permission_flags` is `None`, then they are inferred from the map's |
| /// schema. `permission_flags` is ignored for programs. |
| pub(super) fn security_check_open_fd( |
| &self, |
| current_task: &CurrentTask, |
| permission_flags: Option<PermissionFlags>, |
| ) -> Result<(), Errno> { |
| match self { |
| Self::Map(bpf_map) => security::check_bpf_map_access( |
| current_task, |
| &bpf_map, |
| permission_flags.unwrap_or_else(|| bpf_map.schema.flags.into()), |
| ), |
| Self::Program(program) => security::check_bpf_prog_access(current_task, &program), |
| _ => Ok(()), |
| } |
| } |
| } |
| |
| impl From<ProgramHandle> for BpfHandle { |
| fn from(program: ProgramHandle) -> Self { |
| Self::Program(program) |
| } |
| } |
| |
| impl From<BpfMapHandle> for BpfHandle { |
| fn from(map: BpfMapHandle) -> Self { |
| Self::Map(map) |
| } |
| } |
| |
| impl From<BpfTypeFormat> for BpfHandle { |
| fn from(format: BpfTypeFormat) -> Self { |
| Self::BpfTypeFormat(Arc::new(format)) |
| } |
| } |
| |
| impl FileOps for BpfHandle { |
| fileops_impl_nonseekable!(); |
| fileops_impl_noop_sync!(); |
| fn read( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _file: &FileObject, |
| _current_task: &crate::task::CurrentTask, |
| _offset: usize, |
| _data: &mut dyn OutputBuffer, |
| ) -> Result<usize, Errno> { |
| track_stub!(TODO("https://fxbug.dev/322874229"), "bpf handle read"); |
| error!(EINVAL) |
| } |
| fn write( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _file: &FileObject, |
| _current_task: &crate::task::CurrentTask, |
| _offset: usize, |
| _data: &mut dyn InputBuffer, |
| ) -> Result<usize, Errno> { |
| track_stub!(TODO("https://fxbug.dev/322873841"), "bpf handle write"); |
| error!(EINVAL) |
| } |
| |
| fn get_memory( |
| &self, |
| locked: &mut Locked<FileOpsCore>, |
| _file: &FileObject, |
| _current_task: &CurrentTask, |
| length: Option<usize>, |
| prot: ProtectionFlags, |
| ) -> Result<Arc<MemoryObject>, Errno> { |
| let (vmo, schema) = self.get_map_vmo()?; |
| |
| // Because of the specific condition needed to map this object, the size must be known. |
| let length = length.ok_or_else(|| errno!(EINVAL))?; |
| |
| // This cannot be mapped executable. |
| if prot.contains(ProtectionFlags::EXEC) { |
| return error!(EPERM); |
| } |
| |
| match schema.map_type { |
| bpf_map_type_BPF_MAP_TYPE_RINGBUF => { |
| let page_size = *PAGE_SIZE as usize; |
| // Starting from the second page, this cannot be mapped writable. |
| if length > page_size { |
| if prot.contains(ProtectionFlags::WRITE) { |
| return error!(EPERM); |
| } |
| // This cannot be mapped outside of the 2 control pages and the 2 data sections. |
| if length > 2 * page_size + 2 * schema.max_entries as usize { |
| return error!(EINVAL); |
| } |
| } |
| |
| self.as_map()?.get_memory(locked, || { |
| // The first page of the ring buffer VMO is not visible to |
| // user-space processes. Return a VMO slice that doesn't |
| // include the first page. |
| let clone_size = 2 * page_size + schema.max_entries as usize; |
| let vmo_dup = vmo |
| .create_child( |
| zx::VmoChildOptions::SLICE, |
| page_size as u64, |
| clone_size as u64, |
| ) |
| .map_err(|_| errno!(EIO))? |
| .into(); |
| Ok(Arc::new(MemoryObject::RingBuf(vmo_dup))) |
| }) |
| } |
| |
| bpf_map_type_BPF_MAP_TYPE_ARRAY => { |
| if !schema.flags.contains(MapFlags::Mmapable) { |
| return error!(EPERM); |
| } |
| |
| let array_size = round_up_to_increment( |
| compute_map_storage_size(&schema).map_err(|_| errno!(EINVAL))?, |
| *PAGE_SIZE as usize, |
| )?; |
| if length > array_size { |
| return error!(EINVAL); |
| } |
| |
| self.as_map()?.get_memory(locked, || { |
| let vmo_dup = vmo |
| .as_handle_ref() |
| .duplicate(zx::Rights::SAME_RIGHTS) |
| .map_err(|_| errno!(EIO))? |
| .into(); |
| Ok(Arc::new(MemoryObject::Vmo(vmo_dup))) |
| }) |
| } |
| |
| // Other maps cannot be mmap'ed. |
| _ => error!(ENODEV), |
| } |
| } |
| |
| fn wait_async( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _file: &FileObject, |
| _current_task: &CurrentTask, |
| waiter: &Waiter, |
| events: FdEvents, |
| handler: EventHandler, |
| ) -> Option<WaitCanceler> { |
| let (vmo, schema) = self.get_map_vmo().ok()?; |
| |
| // Only ringbuffers can be polled for POLLIN. |
| if schema.map_type != bpf_map_type_BPF_MAP_TYPE_RINGBUF |
| || !events.contains(FdEvents::POLLIN) |
| { |
| return Some(WaitCanceler::new_noop()); |
| } |
| |
| let handler = SignalHandler { |
| inner: SignalHandlerInner::ZxHandle(|signals| { |
| if signals.contains(RINGBUF_SIGNAL) { FdEvents::POLLIN } else { FdEvents::empty() } |
| }), |
| event_handler: handler, |
| err_code: None, |
| }; |
| |
| // Reset the signal before waiting. The case when the ring buffer already has some data |
| // is handled by the caller: it should call `query_events` after starting the waiter. |
| vmo.as_handle_ref() |
| .signal(RINGBUF_SIGNAL, zx::Signals::empty()) |
| .expect("Failed to set signal or a ring buffer VMO"); |
| |
| let canceler = waiter |
| .wake_on_zircon_signals(&vmo.as_handle_ref(), RINGBUF_SIGNAL, handler) |
| .expect("Failed to wait for signals on ringbuf VMO"); |
| Some(WaitCanceler::new_port(canceler)) |
| } |
| |
| fn query_events( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _file: &FileObject, |
| _current_task: &CurrentTask, |
| ) -> Result<FdEvents, Errno> { |
| match self { |
| Self::Map(map) => { |
| let events = match map.can_read() { |
| Some(true) => FdEvents::POLLIN, |
| Some(false) => FdEvents::empty(), |
| None => FdEvents::POLLERR, |
| }; |
| Ok(events) |
| } |
| _ => error!(EPERM), |
| } |
| } |
| } |
| |
| pub fn get_bpf_object(task: &Task, fd: FdNumber) -> Result<BpfHandle, Errno> { |
| Ok((*task.files.get(fd)?.downcast_file::<BpfHandle>().ok_or_else(|| errno!(EBADF))?).clone()) |
| } |
| pub struct BpfFs; |
| impl BpfFs { |
| pub fn new_fs( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| options: FileSystemOptions, |
| ) -> Result<FileSystemHandle, Errno> { |
| let kernel = current_task.kernel(); |
| let fs = FileSystem::new(locked, kernel, CacheMode::Permanent, BpfFs, options)?; |
| let root_ino = fs.allocate_ino(); |
| fs.create_root_with_info( |
| root_ino, |
| BpfFsDir::new(), |
| FsNodeInfo::new(mode!(IFDIR, 0o777) | FileMode::ISVTX, FsCred::root()), |
| ); |
| Ok(fs) |
| } |
| } |
| |
| impl FileSystemOps for BpfFs { |
| fn statfs( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _fs: &FileSystem, |
| _current_task: &CurrentTask, |
| ) -> Result<statfs, Errno> { |
| Ok(default_statfs(BPF_FS_MAGIC)) |
| } |
| fn name(&self) -> &'static FsStr { |
| "bpf".into() |
| } |
| |
| fn rename( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _fs: &FileSystem, |
| _current_task: &CurrentTask, |
| _old_parent: &FsNodeHandle, |
| _old_name: &FsStr, |
| _new_parent: &FsNodeHandle, |
| _new_name: &FsStr, |
| _renamed: &FsNodeHandle, |
| _replaced: Option<&FsNodeHandle>, |
| ) -> Result<(), Errno> { |
| Ok(()) |
| } |
| } |
| |
| pub struct BpfFsDir { |
| xattrs: MemoryXattrStorage, |
| } |
| |
| impl BpfFsDir { |
| fn new() -> Self { |
| Self { xattrs: MemoryXattrStorage::default() } |
| } |
| |
| pub fn register_pin<L>( |
| &self, |
| locked: &mut Locked<L>, |
| current_task: &CurrentTask, |
| node: &NamespaceNode, |
| name: &FsStr, |
| object: BpfHandle, |
| ) -> Result<(), Errno> |
| where |
| L: LockEqualOrBefore<FileOpsCore>, |
| { |
| node.entry.create_entry( |
| locked, |
| current_task, |
| &node.mount, |
| name, |
| |_locked, dir, _mount, _name| { |
| Ok(dir.fs().create_node_and_allocate_node_id( |
| BpfFsObject::new(object), |
| FsNodeInfo::new(mode!(IFREG, 0o600), current_task.current_fscred()), |
| )) |
| }, |
| )?; |
| Ok(()) |
| } |
| } |
| |
| impl FsNodeOps for BpfFsDir { |
| fs_node_impl_xattr_delegate!(self, self.xattrs); |
| |
| fn create_file_ops( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _node: &FsNode, |
| _current_task: &CurrentTask, |
| _flags: OpenFlags, |
| ) -> Result<Box<dyn FileOps>, Errno> { |
| Ok(Box::new(MemoryDirectoryFile::new())) |
| } |
| |
| fn mkdir( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| node: &FsNode, |
| _current_task: &CurrentTask, |
| _name: &FsStr, |
| mode: FileMode, |
| owner: FsCred, |
| ) -> Result<FsNodeHandle, Errno> { |
| Ok(node.fs().create_node_and_allocate_node_id( |
| BpfFsDir::new(), |
| FsNodeInfo::new(mode | FileMode::ISVTX, owner), |
| )) |
| } |
| |
| fn mknod( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _node: &FsNode, |
| _current_task: &CurrentTask, |
| _name: &FsStr, |
| _mode: FileMode, |
| _dev: DeviceType, |
| _owner: FsCred, |
| ) -> Result<FsNodeHandle, Errno> { |
| error!(EPERM) |
| } |
| |
| fn create_symlink( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _node: &FsNode, |
| _current_task: &CurrentTask, |
| _name: &FsStr, |
| _target: &FsStr, |
| _owner: FsCred, |
| ) -> Result<FsNodeHandle, Errno> { |
| error!(EPERM) |
| } |
| |
| fn link( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _node: &FsNode, |
| _current_task: &CurrentTask, |
| _name: &FsStr, |
| _child: &FsNodeHandle, |
| ) -> Result<(), Errno> { |
| Ok(()) |
| } |
| |
| fn unlink( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _node: &FsNode, |
| _current_task: &CurrentTask, |
| _name: &FsStr, |
| _child: &FsNodeHandle, |
| ) -> Result<(), Errno> { |
| Ok(()) |
| } |
| } |
| |
| pub struct BpfFsObject { |
| pub handle: BpfHandle, |
| xattrs: MemoryXattrStorage, |
| } |
| |
| impl BpfFsObject { |
| fn new(handle: BpfHandle) -> Self { |
| Self { handle, xattrs: MemoryXattrStorage::default() } |
| } |
| } |
| |
| impl FsNodeOps for BpfFsObject { |
| fs_node_impl_not_dir!(); |
| fs_node_impl_xattr_delegate!(self, self.xattrs); |
| |
| fn create_file_ops( |
| &self, |
| _locked: &mut Locked<FileOpsCore>, |
| _node: &FsNode, |
| _current_task: &CurrentTask, |
| _flags: OpenFlags, |
| ) -> Result<Box<dyn FileOps>, Errno> { |
| error!(EIO) |
| } |
| } |
| |
| /// Resolves a pinned BPF object from a path, returning the handle and the node. |
| /// Performs DAC and MAC checks using the specified `open_flags `. Also updates |
| /// atime unless `NOATIME` flag is set. |
| pub fn resolve_pinned_bpf_object( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path: &BStr, |
| open_flags: OpenFlags, |
| ) -> Result<(BpfHandle, NamespaceNode), Errno> { |
| let node = current_task.lookup_path_from_root(locked, path.as_ref())?; |
| |
| let permission_flags = PermissionFlags::from(open_flags); |
| node.check_access(locked, current_task, permission_flags, CheckAccessReason::Access)?; |
| |
| let object = node.entry.node.downcast_ops::<BpfFsObject>().ok_or_else(|| errno!(EPERM))?; |
| object.handle.security_check_open_fd(current_task, Some(permission_flags))?; |
| |
| if !open_flags.contains(OpenFlags::NOATIME) { |
| node.update_atime(); |
| } |
| |
| Ok((object.handle.clone(), node)) |
| } |