blob: b6a748269b8ca022e7c6ccc37c7fd3b7b0d7e236 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use fuchsia_zircon as zx;
use std::fmt;
use std::sync::Arc;
use crate::fs::*;
use crate::lock::Mutex;
use crate::logging::{impossible_error, not_implemented};
use crate::mm::{DesiredAddress, MappedVmo, MappingOptions};
use crate::syscalls::SyscallResult;
use crate::task::*;
use crate::types::as_any::*;
use crate::types::*;
pub const MAX_LFS_FILESIZE: usize = 0x7fffffffffffffff;
pub enum SeekOrigin {
impl SeekOrigin {
pub fn from_raw(whence: u32) -> Result<SeekOrigin, Errno> {
match whence {
SEEK_SET => Ok(SeekOrigin::SET),
SEEK_CUR => Ok(SeekOrigin::CUR),
SEEK_END => Ok(SeekOrigin::END),
_ => error!(EINVAL),
/// Corresponds to struct file_operations in Linux, plus any filesystem-specific data.
pub trait FileOps: Send + Sync + AsAny {
/// Called when the FileObject is closed.
fn close(&self, _file: &FileObject) {}
/// Read from the file without an offset. If your file is seekable, consider implementing this
/// with [`fileops_impl_seekable`].
fn read(
file: &FileObject,
current_task: &CurrentTask,
data: &[UserBuffer],
) -> Result<usize, Errno>;
/// Read from the file at an offset. If your file is seekable, consider implementing this with
/// [`fileops_impl_nonseekable`].
fn read_at(
file: &FileObject,
current_task: &CurrentTask,
offset: usize,
data: &[UserBuffer],
) -> Result<usize, Errno>;
/// Write to the file without an offset. If your file is seekable, consider implementing this
/// with [`fileops_impl_seekable`].
fn write(
file: &FileObject,
current_task: &CurrentTask,
data: &[UserBuffer],
) -> Result<usize, Errno>;
/// Write to the file at a offset. If your file is nonseekable, consider implementing this with
/// [`fileops_impl_nonseekable`].
fn write_at(
file: &FileObject,
current_task: &CurrentTask,
offset: usize,
data: &[UserBuffer],
) -> Result<usize, Errno>;
/// Adjust the seek offset if the file is seekable.
fn seek(
file: &FileObject,
current_task: &CurrentTask,
offset: off_t,
whence: SeekOrigin,
) -> Result<off_t, Errno>;
/// Returns a VMO representing this file. At least the requested protection flags must
/// be set on the VMO. Reading or writing the VMO must read or write the file. If this is not
/// possible given the requested protection, an error must be returned.
/// The `length` is a hint for the desired size of the VMO. The returned VMO may be larger or
/// smaller than the requested length.
/// This method is typically called by [`Self::mmap`].
fn get_vmo(
_file: &FileObject,
_current_task: &CurrentTask,
_length: Option<usize>,
_prot: zx::VmarFlags,
) -> Result<zx::Vmo, Errno> {
/// Responds to an mmap call. The default implementation calls [`Self::get_vmo`] to get a VMO
/// and then maps it with [`crate::mm::MemoryManager::map`].
/// Only implement this trait method if your file needs to control mapping, or record where
/// a VMO gets mapped.
fn mmap(
file: &FileObject,
current_task: &CurrentTask,
addr: DesiredAddress,
vmo_offset: u64,
length: usize,
flags: zx::VmarFlags,
options: MappingOptions,
filename: NamespaceNode,
) -> Result<MappedVmo, Errno> {
// Sanitize the protection flags to only include PERM_READ, PERM_WRITE, and PERM_EXECUTE.
let zx_prot_flags = flags
& (zx::VmarFlags::PERM_READ | zx::VmarFlags::PERM_WRITE | zx::VmarFlags::PERM_EXECUTE);
let vmo = Arc::new(if options.contains(MappingOptions::SHARED) {
self.get_vmo(file, current_task, Some(length), zx_prot_flags)?
} else {
// TODO(tbodt): Use VMO_FLAG_PRIVATE to have the filesystem server do the clone for us.
let vmo = self.get_vmo(
zx_prot_flags - zx::VmarFlags::PERM_WRITE,
let mut clone_flags = zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE;
if !zx_prot_flags.contains(zx::VmarFlags::PERM_WRITE) {
clone_flags |= zx::VmoChildOptions::NO_WRITE;
vmo.create_child(clone_flags, 0, vmo.get_size().map_err(impossible_error)?)
let addr =
Ok(MappedVmo::new(vmo, addr))
fn readdir(
_file: &FileObject,
_current_task: &CurrentTask,
_sink: &mut dyn DirentSink,
) -> Result<(), Errno> {
/// Establish a one-shot, asynchronous wait for the given FdEvents for the given file and task.
/// If the events are already active at the time of calling, handler will be called on immediately
/// on the next wait.
fn wait_async(
_file: &FileObject,
_current_task: &CurrentTask,
_waiter: &Arc<Waiter>,
_events: FdEvents,
_handler: EventHandler,
) -> WaitKey;
/// Cancel a wait set up by wait_async.
/// Returns true if the wait has not been activated and has been cancelled.
fn cancel_wait(&self, _current_task: &CurrentTask, _waiter: &Arc<Waiter>, _key: WaitKey);
fn query_events(&self, current_task: &CurrentTask) -> FdEvents;
fn ioctl(
_file: &FileObject,
_current_task: &CurrentTask,
request: u32,
_user_addr: UserAddress,
) -> Result<SyscallResult, Errno> {
fn fcntl(
_file: &FileObject,
_current_task: &CurrentTask,
_cmd: u32,
_arg: u64,
) -> Result<SyscallResult, Errno> {
/// Implements [`FileOps`] methods in a way that makes sense for non-seekable files.
/// You must implement [`FileOps::read`] and [`FileOps::write`].
macro_rules! fileops_impl_nonseekable {
() => {
fn read_at(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_offset: usize,
_data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
use crate::types::errno::*;
fn write_at(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_offset: usize,
_data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
use crate::types::errno::*;
fn seek(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_offset: crate::types::off_t,
_whence: crate::fs::SeekOrigin,
) -> Result<crate::types::off_t, crate::types::Errno> {
use crate::types::errno::*;
/// Implements [`FileOps`] methods in a way that makes sense for seekable files.
/// You must implement [`FileOps::read_at`] and [`FileOps::write_at`].
macro_rules! fileops_impl_seekable {
() => {
fn read(
file: &crate::fs::FileObject,
current_task: &crate::task::CurrentTask,
data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
let mut offset = file.offset.lock();
let size = self.read_at(file, current_task, *offset as usize, data)?;
*offset += size as crate::types::off_t;
fn write(
file: &crate::fs::FileObject,
current_task: &crate::task::CurrentTask,
data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
let mut offset = file.offset.lock();
if file.flags().contains(OpenFlags::APPEND) {
*offset = file.node().info().size as crate::types::off_t;
let size = self.write_at(file, current_task, *offset as usize, data)?;
*offset += size as crate::types::off_t;
fn seek(
file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
offset: crate::types::off_t,
whence: crate::fs::SeekOrigin,
) -> Result<crate::types::off_t, crate::types::Errno> {
use crate::types::errno::*;
let mut current_offset = file.offset.lock();
let new_offset = match whence {
crate::fs::SeekOrigin::SET => Some(offset),
crate::fs::SeekOrigin::CUR => (*current_offset).checked_add(offset),
crate::fs::SeekOrigin::END => {
let stat = file.node().stat()?;
offset.checked_add(stat.st_size as crate::types::off_t)
if new_offset < 0 {
return error!(EINVAL);
*current_offset = new_offset;
/// Implements [`FileOps`] methods in a way that makes sense for files that ignore
/// seeking operations and always read/write at offset 0.
/// You must implement [`FileOps::read_at`] and [`FileOps::write_at`].
macro_rules! fileops_impl_seekless {
() => {
fn read(
file: &crate::fs::FileObject,
current_task: &crate::task::CurrentTask,
data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
self.read_at(file, current_task, 0, data)
fn write(
file: &crate::fs::FileObject,
current_task: &crate::task::CurrentTask,
data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
self.write_at(file, current_task, 0, data)
fn seek(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_offset: crate::types::off_t,
_whence: crate::fs::SeekOrigin,
) -> Result<crate::types::off_t, crate::types::Errno> {
/// Implements [`FileOps`] methods in a way that makes sense for directories. You must implement
/// [`FileOps::seek`] and [`FileOps::readdir`].
macro_rules! fileops_impl_directory {
() => {
fn read(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
use crate::types::errno::*;
fn read_at(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_offset: usize,
_data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
use crate::types::errno::*;
fn write(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
use crate::types::errno::*;
fn write_at(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_offset: usize,
_data: &[crate::types::UserBuffer],
) -> Result<usize, crate::types::Errno> {
use crate::types::errno::*;
/// Implements [`FileOps`] methods in a way that makes sense for files that never block
/// while reading/writing. The [`FileOps::wait_async`] and [`FileOps::query_events`] methods are
/// implemented for you.
macro_rules! fileops_impl_nonblocking {
() => {
fn wait_async(
_file: &crate::fs::FileObject,
_current_task: &crate::task::CurrentTask,
_waiter: &std::sync::Arc<crate::task::Waiter>,
_events: crate::fs::FdEvents,
_handler: crate::task::EventHandler,
) -> crate::task::WaitKey {
fn cancel_wait(
_current_task: &CurrentTask,
_waiter: &std::sync::Arc<crate::task::Waiter>,
_key: crate::task::WaitKey,
) {
fn query_events(&self, _current_task: &crate::task::CurrentTask) -> crate::fs::FdEvents {
crate::fs::FdEvents::POLLIN | crate::fs::FdEvents::POLLOUT
// Public re-export of macros allows them to be used like regular rust items.
pub(crate) use fileops_impl_directory;
pub(crate) use fileops_impl_nonblocking;
pub(crate) use fileops_impl_nonseekable;
pub(crate) use fileops_impl_seekable;
pub(crate) use fileops_impl_seekless;
pub fn default_ioctl(request: u32) -> Result<SyscallResult, Errno> {
not_implemented!("ioctl: request=0x{:x}", request);
pub struct OPathOps {}
impl OPathOps {
pub fn new() -> OPathOps {
OPathOps {}
impl FileOps for OPathOps {
fn read(
_file: &FileObject,
_current_task: &CurrentTask,
_data: &[UserBuffer],
) -> Result<usize, Errno> {
fn read_at(
_file: &FileObject,
_current_task: &CurrentTask,
_offset: usize,
_data: &[UserBuffer],
) -> Result<usize, Errno> {
fn write(
_file: &FileObject,
_current_task: &CurrentTask,
_data: &[UserBuffer],
) -> Result<usize, Errno> {
fn write_at(
_file: &FileObject,
_current_task: &CurrentTask,
_offset: usize,
_data: &[UserBuffer],
) -> Result<usize, Errno> {
fn seek(
_file: &FileObject,
_current_task: &CurrentTask,
_offset: off_t,
_whence: SeekOrigin,
) -> Result<off_t, Errno> {
fn get_vmo(
_file: &FileObject,
_current_task: &CurrentTask,
_length: Option<usize>,
_prot: zx::VmarFlags,
) -> Result<zx::Vmo, Errno> {
fn readdir(
_file: &FileObject,
_current_task: &CurrentTask,
_sink: &mut dyn DirentSink,
) -> Result<(), Errno> {
fn ioctl(
_file: &FileObject,
_current_task: &CurrentTask,
_request: u32,
_user_addr: UserAddress,
) -> Result<SyscallResult, Errno> {
fn fcntl(
_file: &FileObject,
_current_task: &CurrentTask,
_cmd: u32,
_arg: u64,
) -> Result<SyscallResult, Errno> {
// Note: this can be a valid operation for files opened with O_PATH.
/// A session with a file object.
/// Each time a client calls open(), we create a new FileObject from the
/// underlying FsNode that receives the open(). This object contains the state
/// that is specific to this sessions whereas the underlying FsNode contains
/// the state that is shared between all the sessions.
pub struct FileObject {
ops: Box<dyn FileOps>,
/// The NamespaceNode associated with this FileObject.
/// Represents the name the process used to open this file.
pub name: NamespaceNode,
pub fs: FileSystemHandle,
pub offset: Mutex<off_t>,
flags: Mutex<OpenFlags>,
async_owner: Mutex<pid_t>,
pub type FileHandle = Arc<FileObject>;
impl FileObject {
/// Create a FileObject that is not mounted in a namespace.
/// The returned FileObject does not have a name.
pub fn new_anonymous(
ops: Box<dyn FileOps>,
node: FsNodeHandle,
flags: OpenFlags,
) -> FileHandle {
Self::new(ops, NamespaceNode::new_anonymous(node), flags)
/// Create a FileObject with an associated NamespaceNode.
/// This function is not typically called directly. Instead, consider
/// calling NamespaceNode::open.
pub fn new(ops: Box<dyn FileOps>, name: NamespaceNode, flags: OpenFlags) -> FileHandle {
let fs = name.entry.node.fs();
Arc::new(Self {
offset: Mutex::new(0),
flags: Mutex::new(flags),
async_owner: Mutex::new(0),
/// The FsNode from which this FileObject was created.
pub fn node(&self) -> &FsNodeHandle {
pub fn can_read(&self) -> bool {
// TODO: Consider caching the access mode outside of this lock
// because it cannot change.
pub fn can_write(&self) -> bool {
// TODO: Consider caching the access mode outside of this lock
// because it cannot change.
fn ops(&self) -> &dyn FileOps {
/// Returns the `FileObject`'s `FileOps` as a `&T`, or `None` if the downcast fails.
/// This is useful for syscalls that only operate on a certain type of file.
pub fn downcast_file<T>(&self) -> Option<&T>
T: 'static,
pub fn blocking_op<T, Op>(
current_task: &CurrentTask,
mut op: Op,
events: FdEvents,
deadline: Option<zx::Time>,
) -> Result<T, Errno>
Op: FnMut() -> Result<T, Errno>,
match op() {
Err(errno) if errno == EAGAIN && !self.flags().contains(OpenFlags::NONBLOCK) => {}
result => return result,
let waiter = Waiter::new();
loop {
self.ops().wait_async(self, current_task, &waiter, events, WaitCallback::none());
match op() {
Err(errno) if errno == EAGAIN => waiter
.wait_until(current_task, deadline.unwrap_or(zx::Time::INFINITE))
.map_err(|e| if e == ETIMEDOUT { errno!(EAGAIN) } else { e })?,
result => return result,
pub fn read(&self, current_task: &CurrentTask, data: &[UserBuffer]) -> Result<usize, Errno> {
if !self.can_read() {
return error!(EBADF);
|| self.ops().read(self, current_task, data),
FdEvents::POLLIN | FdEvents::POLLHUP,
pub fn read_at(
current_task: &CurrentTask,
offset: usize,
data: &[UserBuffer],
) -> Result<usize, Errno> {
if !self.can_read() {
return error!(EBADF);
|| self.ops().read_at(self, current_task, offset, data),
FdEvents::POLLIN | FdEvents::POLLHUP,
pub fn write(&self, current_task: &CurrentTask, data: &[UserBuffer]) -> Result<usize, Errno> {
if !self.can_write() {
return error!(EBADF);
|| {
if self.flags().contains(OpenFlags::APPEND) {
let _guard = self.node().append_lock.write();
self.ops().write(self, current_task, data)
} else {
let _guard = self.node();
self.ops().write(self, current_task, data)
FdEvents::POLLOUT | FdEvents::POLLHUP,
pub fn write_at(
current_task: &CurrentTask,
offset: usize,
data: &[UserBuffer],
) -> Result<usize, Errno> {
if !self.can_write() {
return error!(EBADF);
|| {
let _guard = self.node();
self.ops().write_at(self, current_task, offset, data)
FdEvents::POLLOUT | FdEvents::POLLHUP,
pub fn seek(
current_task: &CurrentTask,
offset: off_t,
whence: SeekOrigin,
) -> Result<off_t, Errno> {
self.ops().seek(self, current_task, offset, whence)
pub fn get_vmo(
current_task: &CurrentTask,
length: Option<usize>,
prot: zx::VmarFlags,
) -> Result<zx::Vmo, Errno> {
if prot.contains(zx::VmarFlags::PERM_READ) && !self.can_read() {
return error!(EACCES);
if prot.contains(zx::VmarFlags::PERM_WRITE) && !self.can_write() {
return error!(EACCES);
// TODO: Check for PERM_EXECUTE by checking whether the filesystem is mounted as noexec.
self.ops().get_vmo(self, current_task, length, prot)
pub fn mmap(
current_task: &CurrentTask,
addr: DesiredAddress,
vmo_offset: u64,
length: usize,
flags: zx::VmarFlags,
options: MappingOptions,
filename: NamespaceNode,
) -> Result<MappedVmo, Errno> {
if flags.contains(zx::VmarFlags::PERM_READ) && !self.can_read() {
return error!(EACCES);
if flags.contains(zx::VmarFlags::PERM_WRITE)
&& !self.can_write()
&& options.contains(MappingOptions::SHARED)
return error!(EACCES);
// TODO: Check for PERM_EXECUTE by checking whether the filesystem is mounted as noexec.
self.ops().mmap(self, current_task, addr, vmo_offset, length, flags, options, filename)
pub fn readdir(
current_task: &CurrentTask,
sink: &mut dyn DirentSink,
) -> Result<(), Errno> {
match self.ops().readdir(self, current_task, sink) {
// The ENOSPC we catch here is generated by DirentSink::add. We
// return the error to the caller only if we didn't have space for
// the first directory entry.
// We use ENOSPC rather than EINVAL to signal this condition
// because EINVAL is a very generic error. We only want to perform
// this transformation in exactly the case where there was not
// sufficient space in the DirentSink.
Err(errno) if errno == ENOSPC && sink.actual() > 0 => Ok(()),
Err(errno) if errno == ENOSPC => Err(errno),
result => result,
pub fn ioctl(
current_task: &CurrentTask,
request: u32,
user_addr: UserAddress,
) -> Result<SyscallResult, Errno> {
self.ops().ioctl(self, current_task, request, user_addr)
pub fn fcntl(
current_task: &CurrentTask,
cmd: u32,
arg: u64,
) -> Result<SyscallResult, Errno> {
self.ops().fcntl(self, current_task, cmd, arg)
pub fn update_file_flags(&self, value: OpenFlags, mask: OpenFlags) {
let mask_bits = mask.bits();
let mut flags = self.flags.lock();
let bits = (flags.bits() & !mask_bits) | (value.bits() & mask_bits);
*flags = OpenFlags::from_bits_truncate(bits);
pub fn flags(&self) -> OpenFlags {
/// Get the async owner of this file.
/// See fcntl(F_GETOWN)
pub fn get_async_owner(&self) -> pid_t {
/// Set the async owner of this file.
/// See fcntl(F_SETOWN)
pub fn set_async_owner(&self, owner: pid_t) {
*self.async_owner.lock() = owner;
/// Wait on the specified events and call the EventHandler when ready
pub fn wait_async(
current_task: &CurrentTask,
waiter: &Arc<Waiter>,
events: FdEvents,
handler: EventHandler,
) -> WaitKey {
self.ops().wait_async(self, current_task, waiter, events, handler)
// Cancel a wait set up with wait_async
pub fn cancel_wait(&self, current_task: &CurrentTask, waiter: &Arc<Waiter>, key: WaitKey) {
self.ops().cancel_wait(current_task, waiter, key);
// Return the events currently active
pub fn query_events(&self, current_task: &CurrentTask) -> FdEvents {
/// Updates the file's seek offset without an upper bound on the resulting offset.
/// This is useful for files without a defined size.
/// Errors if `whence` is invalid, or the calculated offset is invalid.
/// - `offset`: The target offset from `whence`.
/// - `whence`: The location from which to compute the updated offset.
pub fn unbounded_seek(&self, offset: off_t, whence: SeekOrigin) -> Result<off_t, Errno> {
let mut current_offset = self.offset.lock();
let new_offset = match whence {
SeekOrigin::SET => Some(offset),
SeekOrigin::CUR => (*current_offset).checked_add(offset),
SeekOrigin::END => Some(MAX_LFS_FILESIZE as i64),
if new_offset < 0 {
return error!(EINVAL);
*current_offset = new_offset;
impl Drop for FileObject {
fn drop(&mut self) {
impl fmt::Debug for FileObject {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
.field("name", &
.field("offset", &self.offset)