| // Copyright 2021 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| use crate::mm::{IOVecPtr, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE}; |
| use crate::security; |
| use crate::syscalls::time::{ITimerSpecPtr, TimeSpecPtr, TimeValPtr}; |
| use crate::task::{ |
| CurrentTask, EventHandler, ProcessEntryRef, ReadyItem, ReadyItemKey, Timeline, TimerWakeup, |
| Waiter, |
| }; |
| use crate::vfs::aio::AioContext; |
| use crate::vfs::buffers::{UserBuffersInputBuffer, UserBuffersOutputBuffer}; |
| use crate::vfs::eventfd::{EventFdType, new_eventfd}; |
| use crate::vfs::fs_args::MountParams; |
| use crate::vfs::inotify::InotifyFileObject; |
| use crate::vfs::io_uring::{IORING_MAX_ENTRIES, IoUringFileObject}; |
| use crate::vfs::pidfd::new_pidfd; |
| use crate::vfs::pipe::{PipeFileObject, new_pipe}; |
| use crate::vfs::timer::TimerFile; |
| use crate::vfs::{ |
| CheckAccessReason, DirentSink64, EpollFileObject, FallocMode, FdFlags, FdNumber, |
| FileAsyncOwner, FileHandle, FileSystemOptions, FlockOperation, FsStr, FsString, LookupContext, |
| NamespaceNode, PathWithReachability, RecordLockCommand, RenameFlags, SeekTarget, StatxFlags, |
| SymlinkMode, SymlinkTarget, TargetFdNumber, TimeUpdateType, UnlinkKind, ValueOrSize, WdNumber, |
| WhatToMount, XattrOp, checked_add_offset_and_length, new_memfd, new_zombie_pidfd, splice, |
| }; |
| use starnix_logging::{log_trace, track_stub}; |
| use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Mutex, Unlocked}; |
| use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult}; |
| use starnix_types::ownership::TempRef; |
| use starnix_types::time::{ |
| duration_from_poll_timeout, duration_from_timespec, time_from_timespec, timespec_from_duration, |
| }; |
| use starnix_types::user_buffer::UserBuffer; |
| use starnix_uapi::auth::{ |
| CAP_BLOCK_SUSPEND, CAP_DAC_READ_SEARCH, CAP_LEASE, CAP_SYS_ADMIN, CAP_WAKE_ALARM, |
| PTRACE_MODE_ATTACH_REALCREDS, |
| }; |
| use starnix_uapi::device_type::DeviceType; |
| use starnix_uapi::errors::{ |
| EFAULT, EINTR, ENAMETOOLONG, ENOTSUP, ETIMEDOUT, Errno, ErrnoResultExt, |
| }; |
| use starnix_uapi::file_lease::FileLeaseType; |
| use starnix_uapi::file_mode::{Access, AccessCheck, FileMode}; |
| use starnix_uapi::inotify_mask::InotifyMask; |
| use starnix_uapi::mount_flags::MountFlags; |
| use starnix_uapi::open_flags::OpenFlags; |
| use starnix_uapi::personality::PersonalityFlags; |
| use starnix_uapi::resource_limits::Resource; |
| use starnix_uapi::seal_flags::SealFlags; |
| use starnix_uapi::signals::SigSet; |
| use starnix_uapi::unmount_flags::UnmountFlags; |
| use starnix_uapi::user_address::{MultiArchUserRef, UserAddress, UserCString, UserRef}; |
| use starnix_uapi::user_value::UserValue; |
| use starnix_uapi::vfs::{EpollEvent, FdEvents, ResolveFlags}; |
| use starnix_uapi::{ |
| __kernel_fd_set, AT_EACCESS, AT_EMPTY_PATH, AT_NO_AUTOMOUNT, AT_REMOVEDIR, AT_SYMLINK_FOLLOW, |
| AT_SYMLINK_NOFOLLOW, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM, CLOCK_MONOTONIC, CLOCK_REALTIME, |
| CLOCK_REALTIME_ALARM, CLOSE_RANGE_CLOEXEC, CLOSE_RANGE_UNSHARE, EFD_CLOEXEC, EFD_NONBLOCK, |
| EFD_SEMAPHORE, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD, F_ADD_SEALS, |
| F_DUPFD, F_DUPFD_CLOEXEC, F_GET_SEALS, F_GETFD, F_GETFL, F_GETLEASE, F_GETLK, F_GETLK64, |
| F_GETOWN, F_GETOWN_EX, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW, F_OWNER_PGRP, F_OWNER_PID, |
| F_OWNER_TID, F_SETFD, F_SETFL, F_SETLEASE, F_SETLK, F_SETLK64, F_SETLKW, F_SETLKW64, F_SETOWN, |
| F_SETOWN_EX, F_SETSIG, FIOCLEX, FIONCLEX, IN_CLOEXEC, IN_NONBLOCK, MFD_ALLOW_SEALING, |
| MFD_CLOEXEC, MFD_EXEC, MFD_HUGE_MASK, MFD_HUGE_SHIFT, MFD_HUGETLB, MFD_NOEXEC_SEAL, NAME_MAX, |
| O_CLOEXEC, O_CREAT, O_NOFOLLOW, O_PATH, O_TMPFILE, PIDFD_NONBLOCK, POLLERR, POLLHUP, POLLIN, |
| POLLOUT, POLLPRI, POLLRDBAND, POLLRDNORM, POLLWRBAND, POLLWRNORM, POSIX_FADV_DONTNEED, |
| POSIX_FADV_NOREUSE, POSIX_FADV_NORMAL, POSIX_FADV_RANDOM, POSIX_FADV_SEQUENTIAL, |
| POSIX_FADV_WILLNEED, RWF_SUPPORTED, TFD_CLOEXEC, TFD_NONBLOCK, TFD_TIMER_ABSTIME, |
| TFD_TIMER_CANCEL_ON_SET, XATTR_CREATE, XATTR_NAME_MAX, XATTR_REPLACE, aio_context_t, errno, |
| error, f_owner_ex, io_event, io_uring_params, |
| io_uring_register_op_IORING_REGISTER_BUFFERS as IORING_REGISTER_BUFFERS, |
| io_uring_register_op_IORING_REGISTER_IOWQ_MAX_WORKERS as IORING_REGISTER_IOWQ_MAX_WORKERS, |
| io_uring_register_op_IORING_REGISTER_PBUF_RING as IORING_REGISTER_PBUF_RING, |
| io_uring_register_op_IORING_REGISTER_PBUF_STATUS as IORING_REGISTER_PBUF_STATUS, |
| io_uring_register_op_IORING_REGISTER_RING_FDS as IORING_REGISTER_RING_FDS, |
| io_uring_register_op_IORING_UNREGISTER_BUFFERS as IORING_UNREGISTER_BUFFERS, |
| io_uring_register_op_IORING_UNREGISTER_PBUF_RING as IORING_UNREGISTER_PBUF_RING, |
| io_uring_register_op_IORING_UNREGISTER_RING_FDS as IORING_UNREGISTER_RING_FDS, iocb, off_t, |
| pid_t, pollfd, pselect6_sigmask, sigset_t, statx, timespec, uapi, uid_t, |
| }; |
| use std::cmp::Ordering; |
| use std::collections::VecDeque; |
| use std::marker::PhantomData; |
| use std::sync::{Arc, atomic}; |
| use std::usize; |
| use zerocopy::{Immutable, IntoBytes}; |
| |
| uapi::check_arch_independent_layout! { |
| pollfd { |
| fd, |
| events, |
| revents, |
| } |
| |
| io_event { |
| data, |
| obj, |
| res, |
| res2, |
| } |
| |
| iocb { |
| aio_data, |
| aio_key, |
| aio_rw_flags, |
| aio_lio_opcode, |
| aio_reqprio, |
| aio_fildes, |
| aio_buf, |
| aio_nbytes, |
| aio_offset, |
| aio_reserved2, |
| aio_flags, |
| aio_resfd, |
| } |
| |
| statx_timestamp { |
| tv_sec, |
| tv_nsec, |
| } |
| |
| statx { |
| stx_mask, |
| stx_blksize, |
| stx_attributes, |
| stx_nlink, |
| stx_uid, |
| stx_gid, |
| stx_mode, |
| stx_ino, |
| stx_size, |
| stx_blocks, |
| stx_attributes_mask, |
| stx_atime, |
| stx_btime, |
| stx_ctime, |
| stx_mtime, |
| stx_rdev_major, |
| stx_rdev_minor, |
| stx_dev_major, |
| stx_dev_minor, |
| stx_mnt_id, |
| stx_dio_mem_align, |
| stx_dio_offset_align, |
| stx_subvol, |
| stx_atomic_write_unit_min, |
| stx_atomic_write_unit_max, |
| stx_atomic_write_segments_max, |
| } |
| |
| io_sqring_offsets { |
| head, |
| tail, |
| ring_mask, |
| ring_entries, |
| flags, |
| dropped, |
| array, |
| resv1, |
| user_addr, |
| } |
| |
| io_cqring_offsets { |
| head, |
| tail, |
| ring_mask, |
| ring_entries, |
| overflow, |
| cqes, |
| flags, |
| resv1, |
| user_addr, |
| } |
| |
| io_uring_params { |
| sq_entries, |
| cq_entries, |
| flags, |
| sq_thread_cpu, |
| sq_thread_idle, |
| features, |
| wq_fd, |
| resv, |
| sq_off, |
| cq_off, |
| } |
| |
| io_uring_rsrc_update { |
| offset, |
| resv, |
| data, |
| } |
| |
| io_uring_buf_reg { |
| ring_addr, |
| ring_entries, |
| bgid, |
| flags, |
| resv, |
| } |
| } |
| |
| // Constants from bionic/libc/include/sys/stat.h |
| const UTIME_NOW: i64 = 0x3fffffff; |
| const UTIME_OMIT: i64 = 0x3ffffffe; |
| |
| pub type OffsetPtr = MultiArchUserRef<uapi::off_t, uapi::arch32::off_t>; |
| pub type IocbPtr = MultiArchUserRef<iocb, iocb>; |
| pub type IocbPtrPtr = MultiArchUserRef<IocbPtr, IocbPtr>; |
| |
| pub fn sys_read( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| address: UserAddress, |
| length: usize, |
| ) -> Result<usize, Errno> { |
| let file = current_task.files.get(fd)?; |
| file.read( |
| locked, |
| current_task, |
| &mut UserBuffersOutputBuffer::unified_new_at(current_task, address, length)?, |
| ) |
| .map_eintr(|| errno!(ERESTARTSYS)) |
| } |
| |
| pub fn sys_write( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| address: UserAddress, |
| length: usize, |
| ) -> Result<usize, Errno> { |
| let file = current_task.files.get(fd)?; |
| file.write( |
| locked, |
| current_task, |
| &mut UserBuffersInputBuffer::unified_new_at(current_task, address, length)?, |
| ) |
| .map_eintr(|| errno!(ERESTARTSYS)) |
| } |
| |
| pub fn sys_close( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| ) -> Result<(), Errno> { |
| current_task.files.close(fd)?; |
| Ok(()) |
| } |
| |
| pub fn sys_close_range( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| first: u32, |
| last: u32, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| if first > last || flags & !(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC) != 0 { |
| return error!(EINVAL); |
| } |
| if flags & CLOSE_RANGE_UNSHARE != 0 { |
| current_task.files.unshare(); |
| } |
| let in_range = |fd: FdNumber| fd.raw() as u32 >= first && fd.raw() as u32 <= last; |
| if flags & CLOSE_RANGE_CLOEXEC != 0 { |
| current_task.files.retain(locked, current_task, |fd, flags| { |
| if in_range(fd) { |
| *flags |= FdFlags::CLOEXEC; |
| } |
| true |
| }); |
| } else { |
| current_task.files.retain(locked, current_task, |fd, _| !in_range(fd)); |
| } |
| Ok(()) |
| } |
| |
| pub fn sys_lseek( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| offset: off_t, |
| whence: u32, |
| ) -> Result<off_t, Errno> { |
| let file = current_task.files.get(fd)?; |
| file.seek(locked, current_task, SeekTarget::from_raw(whence, offset)?) |
| } |
| |
| pub fn sys_fcntl( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| cmd: u32, |
| arg: u64, |
| ) -> Result<SyscallResult, Errno> { |
| let file = match cmd { |
| F_DUPFD | F_DUPFD_CLOEXEC | F_GETFD | F_SETFD | F_GETFL => { |
| current_task.files.get_allowing_opath(fd)? |
| } |
| _ => current_task.files.get(fd)?, |
| }; |
| |
| match cmd { |
| // For the following values of cmd we need to perform more checks before running the |
| // `check_file_fcntl_access` LSM hook. |
| F_SETOWN | F_SETOWN_EX | F_ADD_SEALS | F_SETLEASE => {} |
| _ => { |
| security::check_file_fcntl_access(current_task, &file, cmd, arg)?; |
| } |
| }; |
| |
| match cmd { |
| F_DUPFD | F_DUPFD_CLOEXEC => { |
| let fd_number = arg as i32; |
| let flags = if cmd == F_DUPFD_CLOEXEC { FdFlags::CLOEXEC } else { FdFlags::empty() }; |
| let newfd = current_task.files.duplicate( |
| locked, |
| current_task, |
| fd, |
| TargetFdNumber::Minimum(FdNumber::from_raw(fd_number)), |
| flags, |
| )?; |
| Ok(newfd.into()) |
| } |
| F_GETOWN => match file.get_async_owner() { |
| FileAsyncOwner::Unowned => Ok(0.into()), |
| FileAsyncOwner::Thread(tid) => Ok(tid.into()), |
| FileAsyncOwner::Process(pid) => Ok(pid.into()), |
| FileAsyncOwner::ProcessGroup(pgid) => Ok((-pgid).into()), |
| }, |
| F_GETOWN_EX => { |
| let maybe_owner = match file.get_async_owner() { |
| FileAsyncOwner::Unowned => None, |
| FileAsyncOwner::Thread(tid) => { |
| Some(uapi::f_owner_ex { type_: F_OWNER_TID as i32, pid: tid }) |
| } |
| FileAsyncOwner::Process(pid) => { |
| Some(uapi::f_owner_ex { type_: F_OWNER_PID as i32, pid }) |
| } |
| FileAsyncOwner::ProcessGroup(pgid) => { |
| Some(uapi::f_owner_ex { type_: F_OWNER_PGRP as i32, pid: pgid }) |
| } |
| }; |
| if let Some(owner) = maybe_owner { |
| let user_owner: UserRef<f_owner_ex> = |
| UserRef::<uapi::f_owner_ex>::new(UserAddress::from(arg)); |
| current_task.write_object(user_owner, &owner)?; |
| } |
| Ok(SUCCESS) |
| } |
| F_SETOWN => { |
| let pid = (arg as u32) as i32; |
| let owner = match pid.cmp(&0) { |
| Ordering::Equal => FileAsyncOwner::Unowned, |
| Ordering::Greater => FileAsyncOwner::Process(pid), |
| Ordering::Less => { |
| FileAsyncOwner::ProcessGroup(pid.checked_neg().ok_or_else(|| errno!(EINVAL))?) |
| } |
| }; |
| owner.validate(current_task)?; |
| security::check_file_fcntl_access(current_task, &file, cmd, arg)?; |
| file.set_async_owner(owner); |
| Ok(SUCCESS) |
| } |
| F_SETOWN_EX => { |
| let user_owner = UserRef::<uapi::f_owner_ex>::new(UserAddress::from(arg)); |
| let requested_owner = current_task.read_object(user_owner)?; |
| let mut owner = match requested_owner.type_ as u32 { |
| F_OWNER_TID => FileAsyncOwner::Thread(requested_owner.pid), |
| F_OWNER_PID => FileAsyncOwner::Process(requested_owner.pid), |
| F_OWNER_PGRP => FileAsyncOwner::ProcessGroup(requested_owner.pid), |
| _ => return error!(EINVAL), |
| }; |
| if requested_owner.pid == 0 { |
| owner = FileAsyncOwner::Unowned; |
| } |
| owner.validate(current_task)?; |
| security::check_file_fcntl_access(current_task, &file, cmd, arg)?; |
| file.set_async_owner(owner); |
| Ok(SUCCESS) |
| } |
| F_GETFD => Ok(current_task.files.get_fd_flags_allowing_opath(fd)?.into()), |
| F_SETFD => { |
| current_task |
| .files |
| .set_fd_flags_allowing_opath(fd, FdFlags::from_bits_truncate(arg as u32))?; |
| Ok(SUCCESS) |
| } |
| F_GETFL => { |
| // O_PATH allowed for: |
| // |
| // Retrieving open file status flags using the fcntl(2) |
| // F_GETFL operation: the returned flags will include the |
| // bit O_PATH. |
| // |
| // See https://man7.org/linux/man-pages/man2/open.2.html |
| Ok(file.flags().into()) |
| } |
| F_SETFL => { |
| let settable_flags = OpenFlags::APPEND |
| | OpenFlags::DIRECT |
| | OpenFlags::NOATIME |
| | OpenFlags::NONBLOCK |
| | OpenFlags::ASYNC; |
| let requested_flags = |
| OpenFlags::from_bits_truncate((arg as u32) & settable_flags.bits()); |
| |
| // If `NOATIME` flag is being set then check that it's allowed. |
| if requested_flags.contains(OpenFlags::NOATIME) |
| && !file.flags().contains(OpenFlags::NOATIME) |
| { |
| file.name.check_o_noatime_allowed(current_task)?; |
| } |
| |
| file.update_file_flags(requested_flags, settable_flags); |
| Ok(SUCCESS) |
| } |
| F_SETLK | F_SETLKW | F_GETLK => { |
| let flock_ref = |
| MultiArchUserRef::<uapi::flock, uapi::arch32::flock>::new(current_task, arg); |
| let flock = current_task.read_multi_arch_object(flock_ref)?; |
| let cmd = RecordLockCommand::from_raw(cmd).ok_or_else(|| errno!(EINVAL))?; |
| if let Some(flock) = file.record_lock(locked, current_task, cmd, flock)? { |
| current_task.write_multi_arch_object(flock_ref, flock)?; |
| } |
| Ok(SUCCESS) |
| } |
| F_SETLK64 | F_SETLKW64 | F_GETLK64 | F_OFD_GETLK | F_OFD_SETLK | F_OFD_SETLKW => { |
| let flock_ref = |
| MultiArchUserRef::<uapi::flock, uapi::arch32::flock64>::new(current_task, arg); |
| let flock = current_task.read_multi_arch_object(flock_ref)?; |
| let cmd = RecordLockCommand::from_raw(cmd).ok_or_else(|| errno!(EINVAL))?; |
| if let Some(flock) = file.record_lock(locked, current_task, cmd, flock)? { |
| current_task.write_multi_arch_object(flock_ref, flock)?; |
| } |
| Ok(SUCCESS) |
| } |
| F_ADD_SEALS => { |
| if !file.can_write() { |
| // Cannot add seals if the file is not writable |
| return error!(EPERM); |
| } |
| security::check_file_fcntl_access(current_task, &file, cmd, arg)?; |
| let mut state = file.name.entry.node.write_guard_state.lock(); |
| let flags = SealFlags::from_bits_truncate(arg as u32); |
| state.try_add_seal(flags)?; |
| Ok(SUCCESS) |
| } |
| F_GET_SEALS => { |
| let state = file.name.entry.node.write_guard_state.lock(); |
| Ok(state.get_seals()?.into()) |
| } |
| F_SETLEASE => { |
| let fsuid = current_task.with_current_creds(|creds| creds.fsuid); |
| if fsuid != file.node().info().uid { |
| security::check_task_capable(current_task, CAP_LEASE)?; |
| } |
| let lease = FileLeaseType::from_bits(arg as u32)?; |
| security::check_file_fcntl_access(current_task, &file, cmd, arg)?; |
| file.set_lease(current_task, lease)?; |
| Ok(SUCCESS) |
| } |
| F_GETLEASE => Ok(file.get_lease(current_task).into()), |
| F_SETSIG => { |
| track_stub!(TODO("https://fxbug.dev/437972675"), "F_SETSIG"); |
| return error!(EOPNOTSUPP); |
| } |
| _ => file.fcntl(current_task, cmd, arg), |
| } |
| } |
| |
| pub fn sys_pread64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| address: UserAddress, |
| length: usize, |
| offset: off_t, |
| ) -> Result<usize, Errno> { |
| let file = current_task.files.get(fd)?; |
| let offset = offset.try_into().map_err(|_| errno!(EINVAL))?; |
| file.read_at( |
| locked, |
| current_task, |
| offset, |
| &mut UserBuffersOutputBuffer::unified_new_at(current_task, address, length)?, |
| ) |
| } |
| |
| pub fn sys_pwrite64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| address: UserAddress, |
| length: usize, |
| offset: off_t, |
| ) -> Result<usize, Errno> { |
| let file = current_task.files.get(fd)?; |
| let offset = offset.try_into().map_err(|_| errno!(EINVAL))?; |
| file.write_at( |
| locked, |
| current_task, |
| offset, |
| &mut UserBuffersInputBuffer::unified_new_at(current_task, address, length)?, |
| ) |
| } |
| |
| fn do_readv( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| offset: Option<off_t>, |
| flags: u32, |
| ) -> Result<usize, Errno> { |
| if flags & !RWF_SUPPORTED != 0 { |
| return error!(EOPNOTSUPP); |
| } |
| if flags != 0 { |
| track_stub!(TODO("https://fxbug.dev/322875072"), "preadv2 flags", flags); |
| } |
| let file = current_task.files.get(fd)?; |
| let iovec = current_task.read_iovec(iovec_addr, iovec_count)?; |
| let mut data = UserBuffersOutputBuffer::unified_new(current_task, iovec)?; |
| if let Some(offset) = offset { |
| file.read_at( |
| locked, |
| current_task, |
| offset.try_into().map_err(|_| errno!(EINVAL))?, |
| &mut data, |
| ) |
| } else { |
| file.read(locked, current_task, &mut data) |
| } |
| } |
| |
| pub fn sys_readv( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| ) -> Result<usize, Errno> { |
| do_readv(locked, current_task, fd, iovec_addr, iovec_count, None, 0) |
| } |
| |
| pub fn sys_preadv( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| offset: off_t, |
| ) -> Result<usize, Errno> { |
| do_readv(locked, current_task, fd, iovec_addr, iovec_count, Some(offset), 0) |
| } |
| |
| pub fn sys_preadv2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| offset: off_t, |
| _unused: SyscallArg, // On 32-bit systems, holds the upper 32 bits of offset. |
| flags: u32, |
| ) -> Result<usize, Errno> { |
| let offset = if offset == -1 { None } else { Some(offset) }; |
| do_readv(locked, current_task, fd, iovec_addr, iovec_count, offset, flags) |
| } |
| |
| fn do_writev( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| offset: Option<off_t>, |
| flags: u32, |
| ) -> Result<usize, Errno> { |
| if flags & !RWF_SUPPORTED != 0 { |
| return error!(EOPNOTSUPP); |
| } |
| if flags != 0 { |
| track_stub!(TODO("https://fxbug.dev/322874523"), "pwritev2 flags", flags); |
| } |
| |
| let file = current_task.files.get(fd)?; |
| let iovec = current_task.read_iovec(iovec_addr, iovec_count)?; |
| let mut data = UserBuffersInputBuffer::unified_new(current_task, iovec)?; |
| let res = if let Some(offset) = offset { |
| file.write_at( |
| locked, |
| current_task, |
| offset.try_into().map_err(|_| errno!(EINVAL))?, |
| &mut data, |
| ) |
| } else { |
| file.write(locked, current_task, &mut data) |
| }; |
| |
| match &res { |
| Err(e) if e.code == EFAULT => { |
| track_stub!(TODO("https://fxbug.dev/297370529"), "allow partial writes") |
| } |
| _ => (), |
| } |
| |
| res |
| } |
| |
| pub fn sys_writev( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| ) -> Result<usize, Errno> { |
| do_writev(locked, current_task, fd, iovec_addr, iovec_count, None, 0) |
| } |
| |
| pub fn sys_pwritev( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| offset: off_t, |
| ) -> Result<usize, Errno> { |
| do_writev(locked, current_task, fd, iovec_addr, iovec_count, Some(offset), 0) |
| } |
| |
| pub fn sys_pwritev2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| offset: off_t, |
| _unused: SyscallArg, // On 32-bit systems, holds the upper 32 bits of offset. |
| flags: u32, |
| ) -> Result<usize, Errno> { |
| let offset = if offset == -1 { None } else { Some(offset) }; |
| do_writev(locked, current_task, fd, iovec_addr, iovec_count, offset, flags) |
| } |
| |
| type StatFsPtr = MultiArchUserRef<uapi::statfs, uapi::arch32::statfs>; |
| |
| pub fn fstatfs<T32: IntoBytes + Immutable + TryFrom<uapi::statfs>>( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| user_buf: MultiArchUserRef<uapi::statfs, T32>, |
| ) -> Result<(), Errno> { |
| // O_PATH allowed for: |
| // |
| // fstatfs(2) (since Linux 3.12). |
| // |
| // See https://man7.org/linux/man-pages/man2/open.2.html |
| let file = current_task.files.get_allowing_opath(fd)?; |
| let mut stat = file.fs.statfs(locked, current_task)?; |
| stat.f_flags |= file.name.mount.flags().bits() as i64; |
| current_task.write_multi_arch_object(user_buf, stat)?; |
| Ok(()) |
| } |
| |
| pub fn sys_fstatfs( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| user_buf: StatFsPtr, |
| ) -> Result<(), Errno> { |
| fstatfs(locked, current_task, fd, user_buf) |
| } |
| |
| fn statfs<T32: IntoBytes + Immutable + TryFrom<uapi::statfs>>( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| user_buf: MultiArchUserRef<uapi::statfs, T32>, |
| ) -> Result<(), Errno> { |
| let name = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?; |
| let fs = name.entry.node.fs(); |
| let mut stat = fs.statfs(locked, current_task)?; |
| stat.f_flags |= name.mount.flags().bits() as i64; |
| current_task.write_multi_arch_object(user_buf, stat)?; |
| Ok(()) |
| } |
| |
| pub fn sys_statfs( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| user_buf: StatFsPtr, |
| ) -> Result<(), Errno> { |
| statfs(locked, current_task, user_path, user_buf) |
| } |
| |
| pub fn sys_sendfile( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| out_fd: FdNumber, |
| in_fd: FdNumber, |
| user_offset: OffsetPtr, |
| count: i32, |
| ) -> Result<usize, Errno> { |
| splice::sendfile(locked, current_task, out_fd, in_fd, user_offset, count) |
| } |
| |
| /// A convenient wrapper for Task::open_file_at. |
| /// |
| /// Reads user_path from user memory and then calls through to Task::open_file_at. |
| fn open_file_at( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| flags: u32, |
| mode: FileMode, |
| resolve_flags: ResolveFlags, |
| ) -> Result<FileHandle, Errno> { |
| let path = current_task.read_path(user_path)?; |
| log_trace!(dir_fd:%, path:%; "open_file_at"); |
| current_task.open_file_at( |
| locked, |
| dir_fd, |
| path.as_ref(), |
| OpenFlags::from_bits_truncate(flags), |
| mode, |
| resolve_flags, |
| AccessCheck::default(), |
| ) |
| } |
| |
| fn lookup_parent_at<T, F>( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| callback: F, |
| ) -> Result<T, Errno> |
| where |
| F: Fn(&mut Locked<Unlocked>, LookupContext, NamespaceNode, &FsStr) -> Result<T, Errno>, |
| { |
| let path = current_task.read_path(user_path)?; |
| log_trace!(dir_fd:%, path:%; "lookup_parent_at"); |
| if path.is_empty() { |
| return error!(ENOENT); |
| } |
| let mut context = LookupContext::default(); |
| let (parent, basename) = |
| current_task.lookup_parent_at(locked, &mut context, dir_fd, path.as_ref())?; |
| callback(locked, context, parent, basename) |
| } |
| |
| /// Options for lookup_at. |
| #[derive(Debug, Default, Copy, Clone)] |
| pub struct LookupFlags { |
| /// Whether AT_EMPTY_PATH was supplied. |
| allow_empty_path: bool, |
| |
| /// Used to implement AT_SYMLINK_NOFOLLOW. |
| symlink_mode: SymlinkMode, |
| |
| /// Automount directories on the path. |
| // TODO(https://fxbug.dev/297370602): Support the `AT_NO_AUTOMOUNT` flag. |
| #[allow(dead_code)] |
| automount: bool, |
| } |
| |
| impl LookupFlags { |
| fn no_follow() -> Self { |
| Self { symlink_mode: SymlinkMode::NoFollow, ..Default::default() } |
| } |
| |
| fn from_bits(flags: u32, allowed_flags: u32) -> Result<Self, Errno> { |
| if flags & !allowed_flags != 0 { |
| return error!(EINVAL); |
| } |
| let follow_symlinks = if allowed_flags & AT_SYMLINK_FOLLOW != 0 { |
| flags & AT_SYMLINK_FOLLOW != 0 |
| } else { |
| flags & AT_SYMLINK_NOFOLLOW == 0 |
| }; |
| let automount = |
| if allowed_flags & AT_NO_AUTOMOUNT != 0 { flags & AT_NO_AUTOMOUNT == 0 } else { false }; |
| if automount { |
| track_stub!(TODO("https://fxbug.dev/297370602"), "LookupFlags::automount"); |
| } |
| Ok(LookupFlags { |
| allow_empty_path: (flags & AT_EMPTY_PATH != 0) |
| || (flags & O_PATH != 0 && flags & O_NOFOLLOW != 0), |
| symlink_mode: if follow_symlinks { SymlinkMode::Follow } else { SymlinkMode::NoFollow }, |
| automount, |
| }) |
| } |
| } |
| |
| impl From<StatxFlags> for LookupFlags { |
| fn from(flags: StatxFlags) -> Self { |
| let lookup_flags = StatxFlags::AT_SYMLINK_NOFOLLOW |
| | StatxFlags::AT_EMPTY_PATH |
| | StatxFlags::AT_NO_AUTOMOUNT; |
| Self::from_bits((flags & lookup_flags).bits(), lookup_flags.bits()).unwrap() |
| } |
| } |
| |
| pub fn lookup_at<L>( |
| locked: &mut Locked<L>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| options: LookupFlags, |
| ) -> Result<NamespaceNode, Errno> |
| where |
| L: LockEqualOrBefore<FileOpsCore>, |
| { |
| let path = current_task.read_path(user_path)?; |
| log_trace!(dir_fd:%, path:%; "lookup_at"); |
| if path.is_empty() { |
| if options.allow_empty_path { |
| let (node, _) = current_task.resolve_dir_fd( |
| locked, |
| dir_fd, |
| path.as_ref(), |
| ResolveFlags::empty(), |
| )?; |
| return Ok(node); |
| } |
| return error!(ENOENT); |
| } |
| |
| let mut parent_context = LookupContext::default(); |
| let (parent, basename) = |
| current_task.lookup_parent_at(locked, &mut parent_context, dir_fd, path.as_ref())?; |
| |
| let mut child_context = if parent_context.must_be_directory { |
| // The child must resolve to a directory. This is because a trailing slash |
| // was found in the path. If the child is a symlink, we should follow it. |
| // See https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap03.html#tag_21_03_00_75 |
| parent_context.with(SymlinkMode::Follow) |
| } else { |
| parent_context.with(options.symlink_mode) |
| }; |
| |
| parent.lookup_child(locked, current_task, &mut child_context, basename) |
| } |
| |
| fn do_openat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| flags: u32, |
| mode: FileMode, |
| resolve_flags: ResolveFlags, |
| ) -> Result<FdNumber, Errno> { |
| let file = open_file_at(locked, current_task, dir_fd, user_path, flags, mode, resolve_flags)?; |
| let fd_flags = get_fd_flags(flags); |
| current_task.add_file(locked, file, fd_flags) |
| } |
| |
| pub fn sys_openat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| flags: u32, |
| mode: FileMode, |
| ) -> Result<FdNumber, Errno> { |
| do_openat(locked, current_task, dir_fd, user_path, flags, mode, ResolveFlags::empty()) |
| } |
| |
| pub fn sys_openat2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| how_ref: UserRef<uapi::open_how>, |
| size: usize, |
| ) -> Result<FdNumber, Errno> { |
| const EXPECTED_SIZE: usize = std::mem::size_of::<uapi::open_how>(); |
| if size < EXPECTED_SIZE { |
| return error!(EINVAL); |
| } |
| |
| let how = current_task.read_object(how_ref)?; |
| |
| // If the `size` is greater than expected, then we need to check that any extra bytes after |
| // `open_how` are set to 0. This is needed to properly handle the case when `open_how` is |
| // extended with new fields in the future. There is no upper limit on the buffer size, so we |
| // limit size of each read to one page. |
| let mut pos = EXPECTED_SIZE; |
| while pos < size { |
| let length = std::cmp::min(size - pos, *PAGE_SIZE as usize); |
| let extra_bytes = |
| current_task.read_buffer(&UserBuffer { address: (how_ref.addr() + pos)?, length })?; |
| for b in extra_bytes { |
| if b != 0 { |
| return error!(E2BIG); |
| } |
| } |
| pos += length; |
| } |
| |
| let flags: u32 = how.flags.try_into().map_err(|_| errno!(EINVAL))?; |
| |
| // `mode` can be specified only with `O_CREAT` or `O_TMPFILE`. |
| let allowed_mode_flags = if (flags & (O_CREAT | O_TMPFILE)) > 0 { 0o7777 } else { 0 }; |
| if (how.mode & !allowed_mode_flags) != 0 { |
| return error!(EINVAL); |
| } |
| |
| let mode = FileMode::from_bits(how.mode.try_into().map_err(|_| errno!(EINVAL))?); |
| let resolve_flags = |
| ResolveFlags::from_bits(how.resolve.try_into().map_err(|_| errno!(EINVAL))?) |
| .ok_or_else(|| errno!(EINVAL))?; |
| |
| if resolve_flags.contains(ResolveFlags::CACHED) { |
| track_stub!(TODO("https://fxbug.dev/326474574"), "openat2: RESOLVE_CACHED"); |
| return error!(EAGAIN); |
| } |
| |
| do_openat(locked, current_task, dir_fd, user_path, flags, mode, resolve_flags) |
| } |
| |
| pub fn sys_faccessat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| mode: u32, |
| ) -> Result<(), Errno> { |
| sys_faccessat2(locked, current_task, dir_fd, user_path, mode, 0) |
| } |
| |
| pub fn sys_faccessat2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| mode: u32, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| current_task.override_creds( |
| |creds| { |
| // Unless `AT_ACCESS` is set, perform lookup & access-checking using real UID & GID. |
| if flags & AT_EACCESS == 0 { |
| creds.creds.fsuid = creds.creds.uid; |
| creds.creds.fsgid = creds.creds.gid; |
| } |
| }, |
| || { |
| let mode = Access::try_from(mode)?; |
| let lookup_flags = LookupFlags::from_bits(flags, AT_SYMLINK_NOFOLLOW | AT_EACCESS)?; |
| let name = lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?; |
| name.check_access(locked, current_task, mode, CheckAccessReason::Access) |
| }, |
| ) |
| } |
| |
| pub fn sys_getdents64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| user_buffer: UserAddress, |
| user_capacity: usize, |
| ) -> Result<usize, Errno> { |
| let file = current_task.files.get(fd)?; |
| let mut offset = file.offset.lock(); |
| let mut sink = DirentSink64::new(current_task, &mut offset, user_buffer, user_capacity); |
| let result = file.readdir(locked, current_task, &mut sink); |
| sink.map_result_with_actual(result) |
| } |
| |
| pub fn sys_chroot( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| ) -> Result<(), Errno> { |
| let name = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?; |
| if !name.entry.node.is_dir() { |
| return error!(ENOTDIR); |
| } |
| |
| current_task.fs().chroot(locked, current_task, name)?; |
| Ok(()) |
| } |
| |
| pub fn sys_chdir( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| ) -> Result<(), Errno> { |
| let name = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?; |
| if !name.entry.node.is_dir() { |
| return error!(ENOTDIR); |
| } |
| current_task.fs().chdir(locked, current_task, name) |
| } |
| |
| pub fn sys_fchdir( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| ) -> Result<(), Errno> { |
| // O_PATH allowed for: |
| // |
| // fchdir(2), if the file descriptor refers to a directory |
| // (since Linux 3.5). |
| // |
| // See https://man7.org/linux/man-pages/man2/open.2.html |
| let file = current_task.files.get_allowing_opath(fd)?; |
| if !file.name.entry.node.is_dir() { |
| return error!(ENOTDIR); |
| } |
| current_task.fs().chdir(locked, current_task, file.name.to_passive()) |
| } |
| |
| pub fn sys_fstat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| buffer: UserRef<uapi::stat>, |
| ) -> Result<(), Errno> { |
| // O_PATH allowed for: |
| // |
| // fstat(2) (since Linux 3.6). |
| // |
| // See https://man7.org/linux/man-pages/man2/open.2.html |
| let file = current_task.files.get_allowing_opath(fd)?; |
| let result = file.node().stat(locked, current_task)?; |
| current_task.write_object(buffer, &result)?; |
| Ok(()) |
| } |
| |
| type StatPtr = MultiArchUserRef<uapi::stat, uapi::arch32::stat64>; |
| |
| pub fn sys_fstatat64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| buffer: StatPtr, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| let flags = |
| LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)?; |
| let name = lookup_at(locked, current_task, dir_fd, user_path, flags)?; |
| let result = name.entry.node.stat(locked, current_task)?; |
| current_task.write_multi_arch_object(buffer, result)?; |
| Ok(()) |
| } |
| |
| pub use sys_fstatat64 as sys_newfstatat; |
| |
| pub fn sys_statx( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| flags: u32, |
| mask: u32, |
| statxbuf: UserRef<statx>, |
| ) -> Result<(), Errno> { |
| let flags = StatxFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?; |
| if flags & (StatxFlags::AT_STATX_FORCE_SYNC | StatxFlags::AT_STATX_DONT_SYNC) |
| == (StatxFlags::AT_STATX_FORCE_SYNC | StatxFlags::AT_STATX_DONT_SYNC) |
| { |
| return error!(EINVAL); |
| } |
| |
| let name = lookup_at(locked, current_task, dir_fd, user_path, LookupFlags::from(flags))?; |
| let result = name.entry.node.statx(locked, current_task, flags, mask)?; |
| current_task.write_object(statxbuf, &result)?; |
| Ok(()) |
| } |
| |
| pub fn sys_readlinkat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| buffer: UserAddress, |
| buffer_size: usize, |
| ) -> Result<usize, Errno> { |
| let path = current_task.read_path(user_path)?; |
| let lookup_flags = if path.is_empty() { |
| if dir_fd == FdNumber::AT_FDCWD { |
| return error!(ENOENT); |
| } |
| LookupFlags { |
| allow_empty_path: true, |
| symlink_mode: SymlinkMode::NoFollow, |
| ..Default::default() |
| } |
| } else { |
| LookupFlags::no_follow() |
| }; |
| let name = lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?; |
| |
| let target = match name.readlink(locked, current_task)? { |
| SymlinkTarget::Path(path) => path, |
| SymlinkTarget::Node(node) => node.path(current_task), |
| }; |
| |
| if buffer_size == 0 { |
| return error!(EINVAL); |
| } |
| // Cap the returned length at buffer_size. |
| let length = std::cmp::min(buffer_size, target.len()); |
| current_task.write_memory(buffer, &target[..length])?; |
| Ok(length) |
| } |
| |
| pub fn sys_truncate( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| length: off_t, |
| ) -> Result<(), Errno> { |
| let length = length.try_into().map_err(|_| errno!(EINVAL))?; |
| let name = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?; |
| name.truncate(locked, current_task, length)?; |
| Ok(()) |
| } |
| |
| pub fn sys_ftruncate( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| length: off_t, |
| ) -> Result<(), Errno> { |
| let length = length.try_into().map_err(|_| errno!(EINVAL))?; |
| let file = current_task.files.get(fd)?; |
| file.ftruncate(locked, current_task, length)?; |
| Ok(()) |
| } |
| |
| pub fn sys_mkdirat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| mode: FileMode, |
| ) -> Result<(), Errno> { |
| let path = current_task.read_path(user_path)?; |
| |
| if path.is_empty() { |
| return error!(ENOENT); |
| } |
| let (parent, basename) = current_task.lookup_parent_at( |
| locked, |
| &mut LookupContext::default(), |
| dir_fd, |
| path.as_ref(), |
| )?; |
| parent.create_node( |
| locked, |
| current_task, |
| basename, |
| mode.with_type(FileMode::IFDIR), |
| DeviceType::NONE, |
| )?; |
| Ok(()) |
| } |
| |
| pub fn sys_mknodat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| mode: FileMode, |
| dev: DeviceType, |
| ) -> Result<(), Errno> { |
| let file_type = match mode.fmt() { |
| FileMode::IFREG |
| | FileMode::IFCHR |
| | FileMode::IFBLK |
| | FileMode::IFIFO |
| | FileMode::IFSOCK => mode.fmt(), |
| FileMode::EMPTY => FileMode::IFREG, |
| _ => return error!(EINVAL), |
| }; |
| lookup_parent_at(locked, current_task, dir_fd, user_path, |locked, _, parent, basename| { |
| parent.create_node(locked, current_task, basename, mode.with_type(file_type), dev) |
| })?; |
| Ok(()) |
| } |
| |
| pub fn sys_linkat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| old_dir_fd: FdNumber, |
| old_user_path: UserCString, |
| new_dir_fd: FdNumber, |
| new_user_path: UserCString, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| if flags & !(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH) != 0 { |
| track_stub!(TODO("https://fxbug.dev/322875706"), "linkat unknown flags", flags); |
| return error!(EINVAL); |
| } |
| |
| if flags & AT_EMPTY_PATH != 0 { |
| security::check_task_capable(current_task, CAP_DAC_READ_SEARCH) |
| .map_err(|_| errno!(ENOENT))?; |
| } |
| |
| let flags = LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_FOLLOW)?; |
| let target = lookup_at(locked, current_task, old_dir_fd, old_user_path, flags)?; |
| lookup_parent_at( |
| locked, |
| current_task, |
| new_dir_fd, |
| new_user_path, |
| |locked, context, parent, basename| { |
| // The path to a new link cannot end in `/`. That would imply that we are dereferencing |
| // the link to a directory. |
| if context.must_be_directory { |
| return error!(ENOENT); |
| } |
| if target.mount != parent.mount { |
| return error!(EXDEV); |
| } |
| parent.link(locked, current_task, basename, &target.entry.node) |
| }, |
| )?; |
| |
| Ok(()) |
| } |
| |
| pub fn sys_unlinkat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| if flags & !AT_REMOVEDIR != 0 { |
| return error!(EINVAL); |
| } |
| let kind = |
| if flags & AT_REMOVEDIR != 0 { UnlinkKind::Directory } else { UnlinkKind::NonDirectory }; |
| lookup_parent_at( |
| locked, |
| current_task, |
| dir_fd, |
| user_path, |
| |locked, context, parent, basename| { |
| parent.unlink(locked, current_task, basename, kind, context.must_be_directory) |
| }, |
| )?; |
| Ok(()) |
| } |
| |
| pub fn sys_renameat2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| old_dir_fd: FdNumber, |
| old_user_path: UserCString, |
| new_dir_fd: FdNumber, |
| new_user_path: UserCString, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| let flags = RenameFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?; |
| if flags.intersects(RenameFlags::INTERNAL) { |
| return error!(EINVAL); |
| }; |
| |
| // RENAME_EXCHANGE cannot be combined with the other flags. |
| if flags.contains(RenameFlags::EXCHANGE) |
| && flags.intersects(RenameFlags::NOREPLACE | RenameFlags::WHITEOUT) |
| { |
| return error!(EINVAL); |
| } |
| |
| // RENAME_WHITEOUT is not supported. |
| if flags.contains(RenameFlags::WHITEOUT) { |
| track_stub!(TODO("https://fxbug.dev/322875416"), "RENAME_WHITEOUT"); |
| return error!(ENOSYS); |
| }; |
| |
| let mut lookup = |dir_fd, user_path| { |
| lookup_parent_at(locked, current_task, dir_fd, user_path, |_, _, parent, basename| { |
| Ok((parent, basename.to_owned())) |
| }) |
| }; |
| |
| let (old_parent, old_basename) = lookup(old_dir_fd, old_user_path)?; |
| let (new_parent, new_basename) = lookup(new_dir_fd, new_user_path)?; |
| |
| if new_basename.len() > NAME_MAX as usize { |
| return error!(ENAMETOOLONG); |
| } |
| |
| NamespaceNode::rename( |
| locked, |
| current_task, |
| &old_parent, |
| old_basename.as_ref(), |
| &new_parent, |
| new_basename.as_ref(), |
| flags, |
| ) |
| } |
| |
| pub fn sys_fchmod( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| mode: FileMode, |
| ) -> Result<(), Errno> { |
| // Remove the filetype from the mode. |
| let mode = mode & FileMode::PERMISSIONS; |
| let file = current_task.files.get(fd)?; |
| file.name.entry.node.chmod(locked, current_task, &file.name.mount, mode)?; |
| file.name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB); |
| Ok(()) |
| } |
| |
| pub fn sys_fchmodat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| mode: FileMode, |
| ) -> Result<(), Errno> { |
| // Remove the filetype from the mode. |
| let mode = mode & FileMode::PERMISSIONS; |
| let name = lookup_at(locked, current_task, dir_fd, user_path, LookupFlags::default())?; |
| name.entry.node.chmod(locked, current_task, &name.mount, mode)?; |
| name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB); |
| Ok(()) |
| } |
| |
| fn maybe_uid(id: u32) -> Option<uid_t> { |
| if id == u32::MAX { None } else { Some(id) } |
| } |
| |
| pub fn sys_fchown( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| owner: u32, |
| group: u32, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| file.name.entry.node.chown( |
| locked, |
| current_task, |
| &file.name.mount, |
| maybe_uid(owner), |
| maybe_uid(group), |
| )?; |
| file.name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB); |
| Ok(()) |
| } |
| |
| pub fn sys_fchownat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| owner: u32, |
| group: u32, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| let flags = LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW)?; |
| let name = lookup_at(locked, current_task, dir_fd, user_path, flags)?; |
| name.entry.node.chown(locked, current_task, &name.mount, maybe_uid(owner), maybe_uid(group))?; |
| name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB); |
| Ok(()) |
| } |
| |
| fn read_xattr_name(current_task: &CurrentTask, name_addr: UserCString) -> Result<FsString, Errno> { |
| let name = current_task |
| .read_c_string_to_vec(name_addr, XATTR_NAME_MAX as usize + 1) |
| .map_err(|e| if e == ENAMETOOLONG { errno!(ERANGE) } else { e })?; |
| if name.is_empty() { |
| return error!(ERANGE); |
| } |
| let dot_index = memchr::memchr(b'.', &name).ok_or_else(|| errno!(ENOTSUP))?; |
| if name[dot_index + 1..].is_empty() { |
| return error!(EINVAL); |
| } |
| match &name[..dot_index] { |
| b"user" | b"security" | b"trusted" | b"system" => {} |
| _ => return error!(ENOTSUP), |
| } |
| Ok(name) |
| } |
| |
| fn do_getxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| node: &NamespaceNode, |
| name_addr: UserCString, |
| value_addr: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let name = read_xattr_name(current_task, name_addr)?; |
| let value = |
| match node.entry.node.get_xattr(locked, current_task, &node.mount, name.as_ref(), size)? { |
| ValueOrSize::Size(s) => return Ok(s), |
| ValueOrSize::Value(v) => v, |
| }; |
| if size == 0 { |
| return Ok(value.len()); |
| } |
| if size < value.len() { |
| return error!(ERANGE); |
| } |
| current_task.write_memory(value_addr, &value) |
| } |
| |
| pub fn sys_getxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path_addr: UserCString, |
| name_addr: UserCString, |
| value_addr: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let node = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?; |
| do_getxattr(locked, current_task, &node, name_addr, value_addr, size) |
| } |
| |
| pub fn sys_fgetxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| name_addr: UserCString, |
| value_addr: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let file = current_task.files.get(fd)?; |
| do_getxattr(locked, current_task, &file.name, name_addr, value_addr, size) |
| } |
| |
| pub fn sys_lgetxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path_addr: UserCString, |
| name_addr: UserCString, |
| value_addr: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let node = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?; |
| do_getxattr(locked, current_task, &node, name_addr, value_addr, size) |
| } |
| |
| fn do_setxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| node: &NamespaceNode, |
| name_addr: UserCString, |
| value_addr: UserAddress, |
| size: usize, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| if size > XATTR_NAME_MAX as usize { |
| return error!(E2BIG); |
| } |
| |
| let op = match flags { |
| 0 => XattrOp::Set, |
| XATTR_CREATE => XattrOp::Create, |
| XATTR_REPLACE => XattrOp::Replace, |
| _ => return error!(EINVAL), |
| }; |
| let name = read_xattr_name(current_task, name_addr)?; |
| let value = FsString::from(current_task.read_memory_to_vec(value_addr, size)?); |
| node.entry.node.set_xattr(locked, current_task, &node.mount, name.as_ref(), value.as_ref(), op) |
| } |
| |
| pub fn sys_fsetxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| name_addr: UserCString, |
| value_addr: UserAddress, |
| size: usize, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| do_setxattr(locked, current_task, &file.name, name_addr, value_addr, size, flags) |
| } |
| |
| pub fn sys_lsetxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path_addr: UserCString, |
| name_addr: UserCString, |
| value_addr: UserAddress, |
| size: usize, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| let node = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?; |
| do_setxattr(locked, current_task, &node, name_addr, value_addr, size, flags) |
| } |
| |
| pub fn sys_setxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path_addr: UserCString, |
| name_addr: UserCString, |
| value_addr: UserAddress, |
| size: usize, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| let node = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?; |
| do_setxattr(locked, current_task, &node, name_addr, value_addr, size, flags) |
| } |
| |
| fn do_removexattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| node: &NamespaceNode, |
| name_addr: UserCString, |
| ) -> Result<(), Errno> { |
| let mode = node.entry.node.info().mode; |
| if mode.is_chr() || mode.is_fifo() { |
| return error!(EPERM); |
| } |
| let name = read_xattr_name(current_task, name_addr)?; |
| node.entry.node.remove_xattr(locked, current_task, &node.mount, name.as_ref()) |
| } |
| |
| pub fn sys_removexattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path_addr: UserCString, |
| name_addr: UserCString, |
| ) -> Result<(), Errno> { |
| let node = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?; |
| do_removexattr(locked, current_task, &node, name_addr) |
| } |
| |
| pub fn sys_lremovexattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path_addr: UserCString, |
| name_addr: UserCString, |
| ) -> Result<(), Errno> { |
| let node = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?; |
| do_removexattr(locked, current_task, &node, name_addr) |
| } |
| |
| pub fn sys_fremovexattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| name_addr: UserCString, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| do_removexattr(locked, current_task, &file.name, name_addr) |
| } |
| |
| fn do_listxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| node: &NamespaceNode, |
| list_addr: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let security_xattr = security::fs_node_listsecurity(current_task, &node.entry.node); |
| let xattrs = match node.entry.node.list_xattrs(locked, current_task, size) { |
| Ok(ValueOrSize::Size(s)) => return Ok(s + security_xattr.map_or(0, |s| s.len() + 1)), |
| Ok(ValueOrSize::Value(mut v)) => { |
| if let Some(security_value) = security_xattr { |
| if !v.contains(&security_value) { |
| v.push(security_value); |
| } |
| } |
| v |
| } |
| Err(e) => { |
| if e.code != ENOTSUP || security_xattr.is_none() { |
| return Err(e); |
| } |
| vec![security_xattr.unwrap()] |
| } |
| }; |
| |
| let mut list = vec![]; |
| for name in xattrs.iter() { |
| list.extend_from_slice(name); |
| list.push(b'\0'); |
| } |
| if size == 0 { |
| return Ok(list.len()); |
| } |
| if size < list.len() { |
| return error!(ERANGE); |
| } |
| current_task.write_memory(list_addr, &list) |
| } |
| |
| pub fn sys_listxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path_addr: UserCString, |
| list_addr: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let node = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?; |
| do_listxattr(locked, current_task, &node, list_addr, size) |
| } |
| |
| pub fn sys_llistxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| path_addr: UserCString, |
| list_addr: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let node = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?; |
| do_listxattr(locked, current_task, &node, list_addr, size) |
| } |
| |
| pub fn sys_flistxattr( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| list_addr: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let file = current_task.files.get(fd)?; |
| do_listxattr(locked, current_task, &file.name, list_addr, size) |
| } |
| |
| pub fn sys_getcwd( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| buf: UserAddress, |
| size: usize, |
| ) -> Result<usize, Errno> { |
| let root = current_task.fs().root(); |
| let cwd = current_task.fs().cwd(); |
| let mut user_cwd = match cwd.path_from_root(Some(&root)) { |
| PathWithReachability::Reachable(path) => path, |
| PathWithReachability::Unreachable(mut path) => { |
| let mut combined = vec![]; |
| combined.extend_from_slice(b"(unreachable)"); |
| combined.append(&mut path); |
| combined.into() |
| } |
| }; |
| user_cwd.push(b'\0'); |
| if user_cwd.len() > size { |
| return error!(ERANGE); |
| } |
| current_task.write_memory(buf, &user_cwd)?; |
| Ok(user_cwd.len()) |
| } |
| |
| pub fn sys_umask( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| umask: FileMode, |
| ) -> Result<FileMode, Errno> { |
| Ok(current_task.fs().set_umask(umask)) |
| } |
| |
| fn get_fd_flags(flags: u32) -> FdFlags { |
| if flags & O_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() } |
| } |
| |
| pub fn sys_pipe2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_pipe: UserRef<FdNumber>, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| let supported_file_flags = OpenFlags::NONBLOCK | OpenFlags::DIRECT; |
| if flags & !(O_CLOEXEC | supported_file_flags.bits()) != 0 { |
| return error!(EINVAL); |
| } |
| let (read, write) = new_pipe(locked, current_task)?; |
| |
| let file_flags = OpenFlags::from_bits_truncate(flags & supported_file_flags.bits()); |
| read.update_file_flags(file_flags, supported_file_flags); |
| write.update_file_flags(file_flags, supported_file_flags); |
| |
| let fd_flags = get_fd_flags(flags); |
| let fd_read = current_task.add_file(locked, read, fd_flags)?; |
| let fd_write = current_task.add_file(locked, write, fd_flags)?; |
| log_trace!("pipe2 -> [{:#x}, {:#x}]", fd_read.raw(), fd_write.raw()); |
| |
| current_task.write_object(user_pipe, &fd_read)?; |
| let user_pipe = user_pipe.next()?; |
| current_task.write_object(user_pipe, &fd_write)?; |
| |
| Ok(()) |
| } |
| |
| pub fn sys_ioctl( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| request: u32, |
| arg: SyscallArg, |
| ) -> Result<SyscallResult, Errno> { |
| match request { |
| FIOCLEX => { |
| current_task.files.set_fd_flags(fd, FdFlags::CLOEXEC)?; |
| Ok(SUCCESS) |
| } |
| FIONCLEX => { |
| current_task.files.set_fd_flags(fd, FdFlags::empty())?; |
| Ok(SUCCESS) |
| } |
| _ => { |
| let file = current_task.files.get(fd)?; |
| file.ioctl(locked, current_task, request, arg) |
| } |
| } |
| } |
| |
| pub fn sys_symlinkat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_target: UserCString, |
| new_dir_fd: FdNumber, |
| user_path: UserCString, |
| ) -> Result<(), Errno> { |
| let target = current_task.read_path(user_target)?; |
| if target.is_empty() { |
| return error!(ENOENT); |
| } |
| |
| let path = current_task.read_path(user_path)?; |
| // TODO: This check could probably be moved into parent.symlink(..). |
| if path.is_empty() { |
| return error!(ENOENT); |
| } |
| |
| let res = lookup_parent_at( |
| locked, |
| current_task, |
| new_dir_fd, |
| user_path, |
| |locked, context, parent, basename| { |
| // The path to a new symlink cannot end in `/`. That would imply that we are dereferencing |
| // the symlink to a directory. |
| // |
| // See https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap03.html#tag_21_03_00_75 |
| if context.must_be_directory { |
| return error!(ENOENT); |
| } |
| parent.create_symlink(locked, current_task, basename, target.as_ref()) |
| }, |
| ); |
| res?; |
| Ok(()) |
| } |
| |
| pub fn sys_dup( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| oldfd: FdNumber, |
| ) -> Result<FdNumber, Errno> { |
| current_task.files.duplicate( |
| locked, |
| current_task, |
| oldfd, |
| TargetFdNumber::Default, |
| FdFlags::empty(), |
| ) |
| } |
| |
| pub fn sys_dup3( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| oldfd: FdNumber, |
| newfd: FdNumber, |
| flags: u32, |
| ) -> Result<FdNumber, Errno> { |
| if oldfd == newfd { |
| return error!(EINVAL); |
| } |
| if flags & !O_CLOEXEC != 0 { |
| return error!(EINVAL); |
| } |
| let fd_flags = get_fd_flags(flags); |
| current_task.files.duplicate( |
| locked, |
| current_task, |
| oldfd, |
| TargetFdNumber::Specific(newfd), |
| fd_flags, |
| )?; |
| Ok(newfd) |
| } |
| |
| /// A memfd file descriptor cannot have a name longer than 250 bytes, including |
| /// the null terminator. |
| /// |
| /// See Errors section of https://man7.org/linux/man-pages/man2/memfd_create.2.html |
| const MEMFD_NAME_MAX_LEN: usize = 250; |
| |
| pub fn sys_memfd_create( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_name: UserCString, |
| flags: u32, |
| ) -> Result<FdNumber, Errno> { |
| const HUGE_SHIFTED_MASK: u32 = MFD_HUGE_MASK << MFD_HUGE_SHIFT; |
| |
| if flags |
| & !(MFD_CLOEXEC |
| | MFD_ALLOW_SEALING |
| | MFD_HUGETLB |
| | HUGE_SHIFTED_MASK |
| | MFD_NOEXEC_SEAL |
| | MFD_EXEC) |
| != 0 |
| { |
| track_stub!(TODO("https://fxbug.dev/322875665"), "memfd_create unknown flags", flags); |
| return error!(EINVAL); |
| } |
| |
| let _huge_page_size = if flags & MFD_HUGETLB != 0 { |
| Some(flags & HUGE_SHIFTED_MASK) |
| } else { |
| if flags & HUGE_SHIFTED_MASK != 0 { |
| return error!(EINVAL); |
| } |
| None |
| }; |
| |
| let name = current_task |
| .read_c_string_to_vec(user_name, MEMFD_NAME_MAX_LEN) |
| .map_err(|e| if e == ENAMETOOLONG { errno!(EINVAL) } else { e })?; |
| |
| // This behavior matches MEMFD_NOEXEC_SCOPE_EXEC, which states: |
| // > memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_EXEC was set. |
| // |
| // This behavior can be changed on Linux via sysctl vm.memfd_noexec, which is pid namespaced. |
| // We do not currently support changing this behavior. |
| let seals = if flags & MFD_NOEXEC_SEAL != 0 { |
| SealFlags::NO_EXEC |
| } else if flags & MFD_ALLOW_SEALING != 0 { |
| SealFlags::empty() |
| } else { |
| // Forbid sealing, by sealing the seal operation. |
| SealFlags::SEAL |
| }; |
| |
| let file = new_memfd(locked, current_task, name, seals, OpenFlags::RDWR)?; |
| |
| let mut fd_flags = FdFlags::empty(); |
| if flags & MFD_CLOEXEC != 0 { |
| fd_flags |= FdFlags::CLOEXEC; |
| } |
| let fd = current_task.add_file(locked, file, fd_flags)?; |
| Ok(fd) |
| } |
| |
| pub fn sys_mount( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| source_addr: UserCString, |
| target_addr: UserCString, |
| filesystemtype_addr: UserCString, |
| flags: u32, |
| data_addr: UserCString, |
| ) -> Result<(), Errno> { |
| security::check_task_capable(current_task, CAP_SYS_ADMIN)?; |
| |
| let flags = MountFlags::from_bits(flags).ok_or_else(|| { |
| track_stub!( |
| TODO("https://fxbug.dev/322875327"), |
| "mount unknown flags", |
| flags & !MountFlags::from_bits_truncate(flags).bits() |
| ); |
| errno!(EINVAL) |
| })?; |
| |
| let target = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, target_addr, LookupFlags::default())?; |
| |
| security::sb_mount(current_task, &target, flags)?; |
| |
| if flags.contains(MountFlags::REMOUNT) { |
| do_mount_remount(current_task, target, flags, data_addr) |
| } else if flags.contains(MountFlags::BIND) { |
| do_mount_bind(locked, current_task, source_addr, target, flags) |
| } else if flags.intersects(MountFlags::SHARED | MountFlags::PRIVATE | MountFlags::DOWNSTREAM) { |
| do_mount_change_propagation_type(current_task, target, flags) |
| } else { |
| do_mount_create( |
| locked, |
| current_task, |
| source_addr, |
| target, |
| filesystemtype_addr, |
| data_addr, |
| flags, |
| ) |
| } |
| } |
| |
| fn do_mount_remount( |
| current_task: &CurrentTask, |
| target: NamespaceNode, |
| flags: MountFlags, |
| data_addr: UserCString, |
| ) -> Result<(), Errno> { |
| if !data_addr.is_null() { |
| track_stub!(TODO("https://fxbug.dev/322875506"), "MS_REMOUNT: Updating data"); |
| } |
| let mount = target.mount_if_root()?; |
| |
| let data = current_task.read_path_if_non_null(data_addr)?; |
| let mount_options = |
| security::sb_eat_lsm_opts(current_task.kernel(), &mut MountParams::parse(data.as_ref())?)?; |
| security::sb_remount(current_task, &mount, mount_options)?; |
| let updated_flags = flags & MountFlags::CHANGEABLE_WITH_REMOUNT; |
| mount.update_flags(updated_flags); |
| if !flags.contains(MountFlags::BIND) { |
| // From <https://man7.org/linux/man-pages/man2/mount.2.html> |
| // |
| // Since Linux 2.6.26, the MS_REMOUNT flag can be used with MS_BIND |
| // to modify only the per-mount-point flags. This is particularly |
| // useful for setting or clearing the "read-only" flag on a mount |
| // without changing the underlying filesystem. |
| track_stub!(TODO("https://fxbug.dev/322875215"), "MS_REMOUNT: Updating superblock flags"); |
| } |
| Ok(()) |
| } |
| |
| fn do_mount_bind( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| source_addr: UserCString, |
| target: NamespaceNode, |
| flags: MountFlags, |
| ) -> Result<(), Errno> { |
| let source = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, source_addr, LookupFlags::default())?; |
| log_trace!( |
| source:% = source.path(current_task), |
| target:% = target.path(current_task), |
| flags:?; |
| "do_mount_bind", |
| ); |
| target.mount(WhatToMount::Bind(source), flags) |
| } |
| |
| fn do_mount_change_propagation_type( |
| current_task: &CurrentTask, |
| target: NamespaceNode, |
| flags: MountFlags, |
| ) -> Result<(), Errno> { |
| log_trace!( |
| target:% = target.path(current_task), |
| flags:?; |
| "do_mount_change_propagation_type", |
| ); |
| |
| // Flag validation. Of the three propagation type flags, exactly one must be passed. The only |
| // valid flags other than propagation type are MS_SILENT and MS_REC. |
| // |
| // Use if statements to find the first propagation type flag, then check for valid flags using |
| // only the first propagation flag and MS_REC / MS_SILENT as valid flags. |
| let propagation_flag = if flags.contains(MountFlags::SHARED) { |
| MountFlags::SHARED |
| } else if flags.contains(MountFlags::PRIVATE) { |
| MountFlags::PRIVATE |
| } else if flags.contains(MountFlags::DOWNSTREAM) { |
| MountFlags::DOWNSTREAM |
| } else { |
| return error!(EINVAL); |
| }; |
| if flags.intersects(!(propagation_flag | MountFlags::REC | MountFlags::SILENT)) { |
| return error!(EINVAL); |
| } |
| |
| let mount = target.mount_if_root()?; |
| mount.change_propagation(propagation_flag, flags.contains(MountFlags::REC)); |
| Ok(()) |
| } |
| |
| fn do_mount_create( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| source_addr: UserCString, |
| target: NamespaceNode, |
| filesystemtype_addr: UserCString, |
| data_addr: UserCString, |
| flags: MountFlags, |
| ) -> Result<(), Errno> { |
| let source = current_task.read_path_if_non_null(source_addr)?; |
| let fs_type = current_task.read_path(filesystemtype_addr)?; |
| let data = current_task.read_path_if_non_null(data_addr)?; |
| log_trace!( |
| source:%, |
| target:% = target.path(current_task), |
| fs_type:%, |
| data:%; |
| "do_mount_create", |
| ); |
| |
| let options = FileSystemOptions { |
| source: source.into(), |
| flags: flags & MountFlags::STORED_ON_FILESYSTEM, |
| params: MountParams::parse(data.as_ref())?, |
| }; |
| |
| let fs = current_task.create_filesystem(locked, fs_type.as_ref(), options)?; |
| |
| security::sb_kern_mount(current_task, &fs)?; |
| target.mount(WhatToMount::Fs(fs), flags) |
| } |
| |
| pub fn sys_umount2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| target_addr: UserCString, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| security::check_task_capable(current_task, CAP_SYS_ADMIN)?; |
| |
| let unmount_flags = UnmountFlags::from_bits(flags).ok_or_else(|| { |
| track_stub!( |
| TODO("https://fxbug.dev/322875327"), |
| "unmount unknown flags", |
| flags & !UnmountFlags::from_bits_truncate(flags).bits() |
| ); |
| errno!(EINVAL) |
| })?; |
| |
| if unmount_flags.contains(UnmountFlags::EXPIRE) |
| && (unmount_flags.contains(UnmountFlags::FORCE) |
| || unmount_flags.contains(UnmountFlags::DETACH)) |
| { |
| return error!(EINVAL); |
| } |
| |
| let lookup_flags = if unmount_flags.contains(UnmountFlags::NOFOLLOW) { |
| LookupFlags::no_follow() |
| } else { |
| LookupFlags::default() |
| }; |
| let target = lookup_at(locked, current_task, FdNumber::AT_FDCWD, target_addr, lookup_flags)?; |
| |
| security::sb_umount(current_task, &target, unmount_flags)?; |
| |
| target.unmount(unmount_flags) |
| } |
| |
| pub fn sys_eventfd2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| value: u32, |
| flags: u32, |
| ) -> Result<FdNumber, Errno> { |
| if flags & !(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE) != 0 { |
| return error!(EINVAL); |
| } |
| let blocking = (flags & EFD_NONBLOCK) == 0; |
| let eventfd_type = |
| if (flags & EFD_SEMAPHORE) == 0 { EventFdType::Counter } else { EventFdType::Semaphore }; |
| let file = new_eventfd(locked, current_task, value, eventfd_type, blocking); |
| let fd_flags = if flags & EFD_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() }; |
| let fd = current_task.add_file(locked, file, fd_flags)?; |
| Ok(fd) |
| } |
| |
| pub fn sys_pidfd_open( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| pid: pid_t, |
| flags: u32, |
| ) -> Result<FdNumber, Errno> { |
| if flags & !PIDFD_NONBLOCK != 0 { |
| return error!(EINVAL); |
| } |
| if pid <= 0 { |
| return error!(EINVAL); |
| } |
| |
| let file = { |
| let pid_table = current_task.kernel().pids.read(); |
| |
| let blocking = (flags & PIDFD_NONBLOCK) == 0; |
| let open_flags = if blocking { OpenFlags::empty() } else { OpenFlags::NONBLOCK }; |
| |
| // Validate that a process (and not just a task) entry exists for the PID. |
| let task = pid_table.get_task(pid); |
| let file = match (pid_table.get_process(pid), task.upgrade()) { |
| (Some(ProcessEntryRef::Process(proc)), Some(task)) => { |
| new_pidfd(locked, current_task, &proc, &*task.mm()?, open_flags) |
| } |
| (Some(ProcessEntryRef::Zombie(_)), _) => { |
| new_zombie_pidfd(locked, current_task, open_flags) |
| } |
| (None, Some(_)) => return error!(EINVAL), |
| _ => return error!(ESRCH), |
| }; |
| file |
| }; |
| |
| current_task.add_file(locked, file, FdFlags::CLOEXEC) |
| } |
| |
| pub fn sys_pidfd_getfd( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| pidfd: FdNumber, |
| targetfd: FdNumber, |
| flags: u32, |
| ) -> Result<FdNumber, Errno> { |
| if flags != 0 { |
| return error!(EINVAL); |
| } |
| |
| let file = current_task.files.get(pidfd)?; |
| let tg = file.as_thread_group_key()?; |
| let tg = tg.upgrade().ok_or_else(|| errno!(ESRCH))?; |
| let task = TempRef::into_static(tg.read().tasks().next().ok_or_else(|| errno!(ESRCH))?); |
| |
| current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &task)?; |
| |
| let target_file = task.files.get(targetfd)?; |
| current_task.add_file(locked, target_file, FdFlags::CLOEXEC) |
| } |
| |
| pub fn sys_timerfd_create( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| clock_id: u32, |
| flags: u32, |
| ) -> Result<FdNumber, Errno> { |
| let timeline = match clock_id { |
| CLOCK_MONOTONIC => Timeline::Monotonic, |
| CLOCK_BOOTTIME | CLOCK_BOOTTIME_ALARM => Timeline::BootInstant, |
| CLOCK_REALTIME | CLOCK_REALTIME_ALARM => Timeline::RealTime, |
| _ => return error!(EINVAL), |
| }; |
| let timer_type = match clock_id { |
| CLOCK_MONOTONIC | CLOCK_BOOTTIME | CLOCK_REALTIME => TimerWakeup::Regular, |
| CLOCK_BOOTTIME_ALARM | CLOCK_REALTIME_ALARM => { |
| security::check_task_capable(current_task, CAP_WAKE_ALARM)?; |
| TimerWakeup::Alarm |
| } |
| _ => return error!(EINVAL), |
| }; |
| if flags & !(TFD_NONBLOCK | TFD_CLOEXEC) != 0 { |
| track_stub!(TODO("https://fxbug.dev/322875488"), "timerfd_create unknown flags", flags); |
| return error!(EINVAL); |
| } |
| log_trace!("timerfd_create(clock_id={:?}, flags={:#x})", clock_id, flags); |
| |
| let mut open_flags = OpenFlags::RDWR; |
| if flags & TFD_NONBLOCK != 0 { |
| open_flags |= OpenFlags::NONBLOCK; |
| } |
| |
| let mut fd_flags = FdFlags::empty(); |
| if flags & TFD_CLOEXEC != 0 { |
| fd_flags |= FdFlags::CLOEXEC; |
| }; |
| |
| let timer = TimerFile::new_file(locked, current_task, timer_type, timeline, open_flags)?; |
| let fd = current_task.add_file(locked, timer, fd_flags)?; |
| Ok(fd) |
| } |
| |
| pub fn sys_timerfd_gettime( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| user_current_value: ITimerSpecPtr, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| let timer_file = file.downcast_file::<TimerFile>().ok_or_else(|| errno!(EINVAL))?; |
| let timer_info = timer_file.current_timer_spec(); |
| log_trace!("timerfd_gettime(fd={:?}, current_value={:?})", fd, timer_info); |
| current_task.write_multi_arch_object(user_current_value, timer_info)?; |
| Ok(()) |
| } |
| |
| pub fn sys_timerfd_settime( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| flags: u32, |
| user_new_value: ITimerSpecPtr, |
| user_old_value: ITimerSpecPtr, |
| ) -> Result<(), Errno> { |
| if flags & !(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET) != 0 { |
| track_stub!(TODO("https://fxbug.dev/322874722"), "timerfd_settime unknown flags", flags); |
| return error!(EINVAL); |
| } |
| |
| let file = current_task.files.get(fd)?; |
| let timer_file = file.downcast_file::<TimerFile>().ok_or_else(|| errno!(EINVAL))?; |
| |
| let new_timer_spec = current_task.read_multi_arch_object(user_new_value)?; |
| let old_timer_spec = timer_file.set_timer_spec(current_task, &file, new_timer_spec, flags)?; |
| log_trace!( |
| "timerfd_settime(fd={:?}, flags={:#x}, new_value={:?}, current_value={:?})", |
| fd, |
| flags, |
| new_timer_spec, |
| old_timer_spec |
| ); |
| if !user_old_value.is_null() { |
| current_task.write_multi_arch_object(user_old_value, old_timer_spec)?; |
| } |
| Ok(()) |
| } |
| |
| fn deadline_after_timespec( |
| current_task: &CurrentTask, |
| user_timespec: TimeSpecPtr, |
| ) -> Result<zx::MonotonicInstant, Errno> { |
| if user_timespec.is_null() { |
| Ok(zx::MonotonicInstant::INFINITE) |
| } else { |
| let timespec = current_task.read_multi_arch_object(user_timespec)?; |
| Ok(zx::MonotonicInstant::after(duration_from_timespec(timespec)?)) |
| } |
| } |
| |
| static_assertions::assert_eq_size!(uapi::__kernel_fd_set, uapi::arch32::__kernel_fd_set); |
| |
| fn select( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| nfds: u32, |
| readfds_addr: UserRef<__kernel_fd_set>, |
| writefds_addr: UserRef<__kernel_fd_set>, |
| exceptfds_addr: UserRef<__kernel_fd_set>, |
| deadline: zx::MonotonicInstant, |
| sigmask_addr: UserRef<pselect6_sigmask>, |
| ) -> Result<i32, Errno> { |
| const BITS_PER_BYTE: usize = 8; |
| |
| fn sizeof<T>(_: &T) -> usize { |
| BITS_PER_BYTE * std::mem::size_of::<T>() |
| } |
| fn is_fd_set(set: &__kernel_fd_set, fd: usize) -> bool { |
| let index = fd / sizeof(&set.fds_bits[0]); |
| let remainder = fd % sizeof(&set.fds_bits[0]); |
| set.fds_bits[index] & (1 << remainder) > 0 |
| } |
| fn add_fd_to_set(set: &mut __kernel_fd_set, fd: usize) { |
| let index = fd / sizeof(&set.fds_bits[0]); |
| let remainder = fd % sizeof(&set.fds_bits[0]); |
| |
| set.fds_bits[index] |= 1 << remainder; |
| } |
| let read_fd_set = |addr: UserRef<__kernel_fd_set>| { |
| if addr.is_null() { Ok(Default::default()) } else { current_task.read_object(addr) } |
| }; |
| |
| if nfds as usize > BITS_PER_BYTE * std::mem::size_of::<__kernel_fd_set>() { |
| return error!(EINVAL); |
| } |
| |
| let read_events = |
| FdEvents::from_bits_truncate(POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR); |
| let write_events = FdEvents::from_bits_truncate(POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR); |
| let except_events = FdEvents::from_bits_truncate(POLLPRI); |
| |
| let readfds = read_fd_set(readfds_addr)?; |
| let writefds = read_fd_set(writefds_addr)?; |
| let exceptfds = read_fd_set(exceptfds_addr)?; |
| |
| let sets = &[(read_events, &readfds), (write_events, &writefds), (except_events, &exceptfds)]; |
| let waiter = FileWaiter::<FdNumber>::default(); |
| |
| for fd in 0..nfds { |
| let mut aggregated_events = FdEvents::empty(); |
| for (events, fds) in sets.iter() { |
| if is_fd_set(fds, fd as usize) { |
| aggregated_events |= *events; |
| } |
| } |
| if !aggregated_events.is_empty() { |
| let fd = FdNumber::from_raw(fd as i32); |
| let file = current_task.files.get(fd)?; |
| waiter.add(locked, current_task, fd, Some(&file), aggregated_events)?; |
| } |
| } |
| |
| let mask = if !sigmask_addr.is_null() { |
| let sigmask = current_task.read_object(sigmask_addr)?; |
| let mask = if sigmask.ss.is_null() { |
| current_task.read().signal_mask() |
| } else { |
| if sigmask.ss_len < std::mem::size_of::<sigset_t>() { |
| return error!(EINVAL); |
| } |
| current_task.read_object(sigmask.ss.into())? |
| }; |
| Some(mask) |
| } else { |
| None |
| }; |
| |
| waiter.wait(locked, current_task, mask, deadline)?; |
| |
| let mut num_fds = 0; |
| let mut readfds_out: __kernel_fd_set = Default::default(); |
| let mut writefds_out: __kernel_fd_set = Default::default(); |
| let mut exceptfds_out: __kernel_fd_set = Default::default(); |
| let mut sets = [ |
| (read_events, &readfds, &mut readfds_out), |
| (write_events, &writefds, &mut writefds_out), |
| (except_events, &exceptfds, &mut exceptfds_out), |
| ]; |
| let mut ready_items = waiter.ready_items.lock(); |
| for ReadyItem { key: ready_key, events: ready_events } in ready_items.drain(..) { |
| let ready_key = assert_matches::assert_matches!( |
| ready_key, |
| ReadyItemKey::FdNumber(v) => v |
| ); |
| |
| sets.iter_mut().for_each(|(events, fds, fds_out)| { |
| let fd = ready_key.raw() as usize; |
| if events.intersects(ready_events) && is_fd_set(fds, fd) { |
| add_fd_to_set(fds_out, fd); |
| num_fds += 1; |
| } |
| }); |
| } |
| |
| let write_fd_set = |
| |addr: UserRef<__kernel_fd_set>, value: __kernel_fd_set| -> Result<(), Errno> { |
| if !addr.is_null() { |
| current_task.write_object(addr, &value)?; |
| } |
| Ok(()) |
| }; |
| write_fd_set(readfds_addr, readfds_out)?; |
| write_fd_set(writefds_addr, writefds_out)?; |
| write_fd_set(exceptfds_addr, exceptfds_out)?; |
| Ok(num_fds) |
| } |
| |
| pub fn sys_pselect6( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| nfds: u32, |
| readfds_addr: UserRef<__kernel_fd_set>, |
| writefds_addr: UserRef<__kernel_fd_set>, |
| exceptfds_addr: UserRef<__kernel_fd_set>, |
| timeout_addr: TimeSpecPtr, |
| sigmask_addr: UserRef<pselect6_sigmask>, |
| ) -> Result<i32, Errno> { |
| let deadline = deadline_after_timespec(current_task, timeout_addr)?; |
| |
| let num_fds = select( |
| locked, |
| current_task, |
| nfds, |
| readfds_addr, |
| writefds_addr, |
| exceptfds_addr, |
| deadline, |
| sigmask_addr, |
| )?; |
| |
| if !timeout_addr.is_null() |
| && !current_task |
| .thread_group() |
| .read() |
| .personality |
| .contains(PersonalityFlags::STICKY_TIMEOUTS) |
| { |
| let now = zx::MonotonicInstant::get(); |
| let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0)); |
| current_task.write_multi_arch_object(timeout_addr, timespec_from_duration(remaining))?; |
| } |
| |
| Ok(num_fds) |
| } |
| |
| pub fn sys_select( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| nfds: u32, |
| readfds_addr: UserRef<__kernel_fd_set>, |
| writefds_addr: UserRef<__kernel_fd_set>, |
| exceptfds_addr: UserRef<__kernel_fd_set>, |
| timeout_addr: TimeValPtr, |
| ) -> Result<i32, Errno> { |
| let start_time = zx::MonotonicInstant::get(); |
| |
| let deadline = if timeout_addr.is_null() { |
| zx::MonotonicInstant::INFINITE |
| } else { |
| let timeval = current_task.read_multi_arch_object(timeout_addr)?; |
| start_time + starnix_types::time::duration_from_timeval(timeval)? |
| }; |
| |
| let num_fds = select( |
| locked, |
| current_task, |
| nfds, |
| readfds_addr, |
| writefds_addr, |
| exceptfds_addr, |
| deadline, |
| UserRef::<pselect6_sigmask>::default(), |
| )?; |
| |
| if !timeout_addr.is_null() |
| && !current_task |
| .thread_group() |
| .read() |
| .personality |
| .contains(PersonalityFlags::STICKY_TIMEOUTS) |
| { |
| let now = zx::MonotonicInstant::get(); |
| let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0)); |
| current_task.write_multi_arch_object( |
| timeout_addr, |
| starnix_types::time::timeval_from_duration(remaining), |
| )?; |
| } |
| |
| Ok(num_fds) |
| } |
| |
| pub fn sys_epoll_create1( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| flags: u32, |
| ) -> Result<FdNumber, Errno> { |
| if flags & !EPOLL_CLOEXEC != 0 { |
| return error!(EINVAL); |
| } |
| let ep_file = EpollFileObject::new_file(locked, current_task); |
| let fd_flags = if flags & EPOLL_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() }; |
| let fd = current_task.add_file(locked, ep_file, fd_flags)?; |
| Ok(fd) |
| } |
| |
| pub fn sys_epoll_ctl( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| epfd: FdNumber, |
| op: u32, |
| fd: FdNumber, |
| event: UserRef<EpollEvent>, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(epfd)?; |
| let epoll_file = file.downcast_file::<EpollFileObject>().ok_or_else(|| errno!(EINVAL))?; |
| let operand_file = current_task.files.get(fd)?; |
| |
| if Arc::ptr_eq(&file, &operand_file) { |
| return error!(EINVAL); |
| } |
| |
| let epoll_event = match current_task.read_object(event) { |
| Ok(mut epoll_event) => { |
| // If EPOLLWAKEUP is specified in flags, but the caller does not have the CAP_BLOCK_SUSPEND |
| // capability, then the EPOLLWAKEUP flag is silently ignored. |
| // See https://man7.org/linux/man-pages/man2/epoll_ctl.2.html |
| if epoll_event.events().contains(FdEvents::EPOLLWAKEUP) { |
| if !security::is_task_capable_noaudit(current_task, CAP_BLOCK_SUSPEND) { |
| epoll_event.ignore(FdEvents::EPOLLWAKEUP); |
| } |
| } |
| Ok(epoll_event) |
| } |
| result => result, |
| }; |
| |
| match op { |
| EPOLL_CTL_ADD => { |
| epoll_file.add(locked, current_task, &operand_file, &file, epoll_event?)?; |
| operand_file.register_epfd(&file); |
| } |
| EPOLL_CTL_MOD => { |
| epoll_file.modify(locked, current_task, &operand_file, epoll_event?)?; |
| } |
| EPOLL_CTL_DEL => { |
| epoll_file.delete(&operand_file)?; |
| current_task |
| .kernel() |
| .suspend_resume_manager |
| .remove_epoll(operand_file.id.as_epoll_key()); |
| operand_file.unregister_epfd(&file); |
| } |
| _ => return error!(EINVAL), |
| } |
| Ok(()) |
| } |
| |
| // Backend for sys_epoll_pwait and sys_epoll_pwait2 that takes an already-decoded deadline. |
| fn do_epoll_pwait( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| epfd: FdNumber, |
| events: UserRef<EpollEvent>, |
| unvalidated_max_events: i32, |
| deadline: zx::MonotonicInstant, |
| user_sigmask: UserRef<SigSet>, |
| ) -> Result<usize, Errno> { |
| let file = current_task.files.get(epfd)?; |
| let epoll_file = file.downcast_file::<EpollFileObject>().ok_or_else(|| errno!(EINVAL))?; |
| |
| // Max_events must be greater than 0. |
| let max_events: usize = unvalidated_max_events.try_into().map_err(|_| errno!(EINVAL))?; |
| if max_events == 0 { |
| return error!(EINVAL); |
| } |
| |
| // Return early if the user passes an obviously invalid pointer. This avoids dropping events |
| // for common pointer errors. When we catch bad pointers after the wait is complete when the |
| // memory is actually written, the events will be lost. This check is not a guarantee. |
| current_task |
| .mm()? |
| .check_plausible(events.addr(), max_events * std::mem::size_of::<EpollEvent>())?; |
| |
| let active_events = if !user_sigmask.is_null() { |
| let signal_mask = current_task.read_object(user_sigmask)?; |
| current_task.wait_with_temporary_mask(locked, signal_mask, |locked, current_task| { |
| epoll_file.wait(locked, current_task, max_events, deadline) |
| })? |
| } else { |
| epoll_file.wait(locked, current_task, max_events, deadline)? |
| }; |
| |
| current_task.write_objects(events, &active_events)?; |
| Ok(active_events.len()) |
| } |
| |
| pub fn sys_epoll_pwait( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| epfd: FdNumber, |
| events: UserRef<EpollEvent>, |
| max_events: i32, |
| timeout: i32, |
| user_sigmask: UserRef<SigSet>, |
| ) -> Result<usize, Errno> { |
| let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?); |
| do_epoll_pwait(locked, current_task, epfd, events, max_events, deadline, user_sigmask) |
| } |
| |
| pub fn sys_epoll_pwait2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| epfd: FdNumber, |
| events: UserRef<EpollEvent>, |
| max_events: i32, |
| user_timespec: TimeSpecPtr, |
| user_sigmask: UserRef<SigSet>, |
| ) -> Result<usize, Errno> { |
| let deadline = deadline_after_timespec(current_task, user_timespec)?; |
| do_epoll_pwait(locked, current_task, epfd, events, max_events, deadline, user_sigmask) |
| } |
| |
| struct FileWaiter<Key: Into<ReadyItemKey>> { |
| waiter: Waiter, |
| ready_items: Arc<Mutex<VecDeque<ReadyItem>>>, |
| _marker: PhantomData<Key>, |
| } |
| |
| impl<Key: Into<ReadyItemKey>> Default for FileWaiter<Key> { |
| fn default() -> Self { |
| Self { waiter: Waiter::new(), ready_items: Default::default(), _marker: PhantomData } |
| } |
| } |
| |
| impl<Key: Into<ReadyItemKey>> FileWaiter<Key> { |
| fn add<L>( |
| &self, |
| locked: &mut Locked<L>, |
| current_task: &CurrentTask, |
| key: Key, |
| file: Option<&FileHandle>, |
| requested_events: FdEvents, |
| ) -> Result<(), Errno> |
| where |
| L: LockEqualOrBefore<FileOpsCore>, |
| { |
| let key = key.into(); |
| |
| if let Some(file) = file { |
| let sought_events = requested_events | FdEvents::POLLERR | FdEvents::POLLHUP; |
| |
| let handler = |
| EventHandler::Enqueue { key, queue: self.ready_items.clone(), sought_events }; |
| file.wait_async(locked, current_task, &self.waiter, sought_events, handler); |
| let current_events = file.query_events(locked, current_task)? & sought_events; |
| if !current_events.is_empty() { |
| self.ready_items.lock().push_back(ReadyItem { key, events: current_events }); |
| } |
| } else { |
| self.ready_items.lock().push_back(ReadyItem { key, events: FdEvents::POLLNVAL }); |
| } |
| Ok(()) |
| } |
| |
| fn wait<L>( |
| &self, |
| locked: &mut Locked<L>, |
| current_task: &mut CurrentTask, |
| signal_mask: Option<SigSet>, |
| deadline: zx::MonotonicInstant, |
| ) -> Result<(), Errno> |
| where |
| L: LockEqualOrBefore<FileOpsCore>, |
| { |
| if self.ready_items.lock().is_empty() { |
| // When wait_until() returns Ok() it means there was a wake up; however there may not |
| // be a ready item, for example if waiting on a sync file with multiple sync points. |
| // Keep waiting until there's at least one ready item. |
| let signal_mask = signal_mask.unwrap_or_else(|| current_task.read().signal_mask()); |
| let mut result = current_task.wait_with_temporary_mask( |
| locked, |
| signal_mask, |
| |locked, current_task| self.waiter.wait_until(locked, current_task, deadline), |
| ); |
| loop { |
| match result { |
| Err(err) if err == ETIMEDOUT => return Ok(()), |
| Ok(()) => { |
| if !self.ready_items.lock().is_empty() { |
| break; |
| } |
| } |
| result => result?, |
| }; |
| result = self.waiter.wait_until(locked, current_task, deadline); |
| } |
| } |
| Ok(()) |
| } |
| } |
| |
| pub fn poll( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| user_pollfds: UserRef<pollfd>, |
| num_fds: i32, |
| mask: Option<SigSet>, |
| deadline: zx::MonotonicInstant, |
| ) -> Result<usize, Errno> { |
| if num_fds < 0 |
| || num_fds as u64 > current_task.thread_group().get_rlimit(locked, Resource::NOFILE) |
| { |
| return error!(EINVAL); |
| } |
| |
| let mut pollfds = vec![pollfd::default(); num_fds as usize]; |
| let waiter = FileWaiter::<usize>::default(); |
| |
| for (index, poll_descriptor) in pollfds.iter_mut().enumerate() { |
| *poll_descriptor = current_task.read_object(user_pollfds.at(index)?)?; |
| poll_descriptor.revents = 0; |
| if poll_descriptor.fd < 0 { |
| continue; |
| } |
| let file = current_task.files.get(FdNumber::from_raw(poll_descriptor.fd)).ok(); |
| waiter.add( |
| locked, |
| current_task, |
| index, |
| file.as_ref(), |
| FdEvents::from_bits_truncate(poll_descriptor.events as u32), |
| )?; |
| } |
| |
| waiter.wait(locked, current_task, mask, deadline)?; |
| |
| let mut ready_items = waiter.ready_items.lock(); |
| let mut unique_ready_items = |
| bit_vec::BitVec::from_elem(usize::try_from(num_fds).unwrap(), false); |
| for ReadyItem { key: ready_key, events: ready_events } in ready_items.drain(..) { |
| let ready_key = assert_matches::assert_matches!( |
| ready_key, |
| ReadyItemKey::Usize(v) => v |
| ); |
| let interested_events = FdEvents::from_bits_truncate(pollfds[ready_key].events as u32) |
| | FdEvents::POLLERR |
| | FdEvents::POLLHUP |
| | FdEvents::POLLNVAL; |
| let return_events = (interested_events & ready_events).bits(); |
| pollfds[ready_key].revents = return_events as i16; |
| unique_ready_items.set(ready_key, true); |
| } |
| |
| for (index, poll_descriptor) in pollfds.iter().enumerate() { |
| current_task.write_object(user_pollfds.at(index)?, poll_descriptor)?; |
| } |
| |
| Ok(unique_ready_items.into_iter().filter(Clone::clone).count()) |
| } |
| |
| pub fn sys_ppoll( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| user_fds: UserRef<pollfd>, |
| num_fds: i32, |
| user_timespec: TimeSpecPtr, |
| user_mask: UserRef<SigSet>, |
| sigset_size: usize, |
| ) -> Result<usize, Errno> { |
| let start_time = zx::MonotonicInstant::get(); |
| |
| let timeout = if user_timespec.is_null() { |
| // Passing -1 to poll is equivalent to an infinite timeout. |
| -1 |
| } else { |
| let ts = current_task.read_multi_arch_object(user_timespec)?; |
| duration_from_timespec::<zx::MonotonicTimeline>(ts)?.into_millis() as i32 |
| }; |
| |
| let deadline = start_time + duration_from_poll_timeout(timeout)?; |
| |
| let mask = if !user_mask.is_null() { |
| if sigset_size != std::mem::size_of::<SigSet>() { |
| return error!(EINVAL); |
| } |
| let mask = current_task.read_object(user_mask)?; |
| Some(mask) |
| } else { |
| None |
| }; |
| |
| let poll_result = poll(locked, current_task, user_fds, num_fds, mask, deadline); |
| |
| if user_timespec.is_null() { |
| return poll_result; |
| } |
| |
| let now = zx::MonotonicInstant::get(); |
| let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0)); |
| let remaining_timespec = timespec_from_duration(remaining); |
| |
| // From gVisor: "ppoll is normally restartable if interrupted by something other than a signal |
| // handled by the application (i.e. returns ERESTARTNOHAND). However, if |
| // [copy out] failed, then the restarted ppoll would use the wrong timeout, so the |
| // error should be left as EINTR." |
| match (current_task.write_multi_arch_object(user_timespec, remaining_timespec), poll_result) { |
| // If write was ok, and poll was ok, return poll result. |
| (Ok(_), Ok(num_events)) => Ok(num_events), |
| (Ok(_), Err(e)) if e == EINTR => { |
| error!(ERESTARTNOHAND) |
| } |
| (Ok(_), poll_result) => poll_result, |
| // If write was a failure, return the poll result unchanged. |
| (Err(_), poll_result) => poll_result, |
| } |
| } |
| |
| pub fn sys_flock( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| operation: u32, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| let operation = FlockOperation::from_flags(operation)?; |
| security::check_file_lock_access(current_task, &file)?; |
| file.flock(locked, current_task, operation) |
| } |
| |
| pub fn sys_sync(_locked: &mut Locked<Unlocked>, _current_task: &CurrentTask) -> Result<(), Errno> { |
| track_stub!(TODO("https://fxbug.dev/322875826"), "sync()"); |
| Ok(()) |
| } |
| |
| pub fn sys_syncfs( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| ) -> Result<(), Errno> { |
| let _file = current_task.files.get(fd)?; |
| track_stub!(TODO("https://fxbug.dev/322875646"), "syncfs"); |
| Ok(()) |
| } |
| |
| pub fn sys_fsync( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| file.sync(current_task) |
| } |
| |
| pub fn sys_fdatasync( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| file.data_sync(current_task) |
| } |
| |
| pub fn sys_sync_file_range( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| offset: off_t, |
| length: off_t, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| const KNOWN_FLAGS: u32 = uapi::SYNC_FILE_RANGE_WAIT_BEFORE |
| | uapi::SYNC_FILE_RANGE_WRITE |
| | uapi::SYNC_FILE_RANGE_WAIT_AFTER; |
| if flags & !KNOWN_FLAGS != 0 { |
| return error!(EINVAL); |
| } |
| |
| let file = current_task.files.get(fd)?; |
| |
| if offset < 0 || length < 0 { |
| return error!(EINVAL); |
| } |
| |
| checked_add_offset_and_length(offset as usize, length as usize)?; |
| |
| // From <https://linux.die.net/man/2/sync_file_range>: |
| // |
| // fd refers to something other than a regular file, a block device, a directory, or a symbolic link. |
| let mode = file.node().info().mode; |
| if !mode.is_reg() && !mode.is_blk() && !mode.is_dir() && !mode.is_lnk() { |
| return error!(ESPIPE); |
| } |
| |
| if flags == 0 { |
| return Ok(()); |
| } |
| |
| // Syncing the whole file is much more than we need for sync_file_range, which only needs to |
| // sync the specified data range. |
| file.data_sync(current_task) |
| } |
| |
| pub fn sys_fadvise64( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| offset: off_t, |
| len: off_t, |
| advice: u32, |
| ) -> Result<(), Errno> { |
| match advice { |
| POSIX_FADV_NORMAL => track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_NORMAL"), |
| POSIX_FADV_RANDOM => track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_RANDOM"), |
| POSIX_FADV_SEQUENTIAL => { |
| track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_SEQUENTIAL") |
| } |
| POSIX_FADV_WILLNEED => { |
| track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_WILLNEED") |
| } |
| POSIX_FADV_DONTNEED => { |
| track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_DONTNEED") |
| } |
| POSIX_FADV_NOREUSE => { |
| track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_NOREUSE") |
| } |
| _ => { |
| track_stub!(TODO("https://fxbug.dev/322875684"), "fadvise64 unknown advice", advice); |
| return error!(EINVAL); |
| } |
| } |
| |
| if offset < 0 || len < 0 { |
| return error!(EINVAL); |
| } |
| |
| let file = current_task.files.get(fd)?; |
| // fadvise does not work on pipes. |
| if file.downcast_file::<PipeFileObject>().is_some() { |
| return error!(ESPIPE); |
| } |
| |
| // fadvise does not work on paths. |
| if file.flags().contains(OpenFlags::PATH) { |
| return error!(EBADF); |
| } |
| |
| Ok(()) |
| } |
| |
| pub fn sys_fallocate( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| mode: u32, |
| offset: off_t, |
| len: off_t, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| |
| // Offset must not be less than 0. |
| // Length must not be less than or equal to 0. |
| // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS |
| if offset < 0 || len <= 0 { |
| return error!(EINVAL); |
| } |
| |
| let mode = FallocMode::from_bits(mode).ok_or_else(|| errno!(EINVAL))?; |
| file.fallocate(locked, current_task, mode, offset as u64, len as u64)?; |
| |
| Ok(()) |
| } |
| |
| pub fn sys_inotify_init1( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| flags: u32, |
| ) -> Result<FdNumber, Errno> { |
| if flags & !(IN_NONBLOCK | IN_CLOEXEC) != 0 { |
| return error!(EINVAL); |
| } |
| let non_blocking = flags & IN_NONBLOCK != 0; |
| let close_on_exec = flags & IN_CLOEXEC != 0; |
| let inotify_file = InotifyFileObject::new_file(locked, current_task, non_blocking); |
| let fd_flags = if close_on_exec { FdFlags::CLOEXEC } else { FdFlags::empty() }; |
| current_task.add_file(locked, inotify_file, fd_flags) |
| } |
| |
| pub fn sys_inotify_add_watch( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| user_path: UserCString, |
| mask: u32, |
| ) -> Result<WdNumber, Errno> { |
| let mask = InotifyMask::from_bits(mask).ok_or_else(|| errno!(EINVAL))?; |
| if !mask.intersects(InotifyMask::ALL_EVENTS) { |
| // Mask must include at least 1 event. |
| return error!(EINVAL); |
| } |
| let file = current_task.files.get(fd)?; |
| let inotify_file = file.downcast_file::<InotifyFileObject>().ok_or_else(|| errno!(EINVAL))?; |
| let options = if mask.contains(InotifyMask::DONT_FOLLOW) { |
| LookupFlags::no_follow() |
| } else { |
| LookupFlags::default() |
| }; |
| let watched_node = lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, options)?; |
| if mask.contains(InotifyMask::ONLYDIR) && !watched_node.entry.node.is_dir() { |
| return error!(ENOTDIR); |
| } |
| inotify_file.add_watch(watched_node.entry, mask, &file) |
| } |
| |
| pub fn sys_inotify_rm_watch( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| watch_id: WdNumber, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| let inotify_file = file.downcast_file::<InotifyFileObject>().ok_or_else(|| errno!(EINVAL))?; |
| inotify_file.remove_watch(watch_id, &file) |
| } |
| |
| pub fn sys_utimensat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| dir_fd: FdNumber, |
| user_path: UserCString, |
| user_times: TimeSpecPtr, |
| flags: u32, |
| ) -> Result<(), Errno> { |
| let (atime, mtime) = if user_times.addr().is_null() { |
| // If user_times is null, the timestamps are updated to the current time. |
| (TimeUpdateType::Now, TimeUpdateType::Now) |
| } else { |
| let ts = current_task.read_multi_arch_objects_to_vec(user_times, 2)?; |
| let atime = ts[0]; |
| let mtime = ts[1]; |
| let parse_timespec = |spec: timespec| match spec.tv_nsec { |
| UTIME_NOW => Ok(TimeUpdateType::Now), |
| UTIME_OMIT => Ok(TimeUpdateType::Omit), |
| _ => time_from_timespec(spec).map(TimeUpdateType::Time), |
| }; |
| (parse_timespec(atime)?, parse_timespec(mtime)?) |
| }; |
| |
| if let (TimeUpdateType::Omit, TimeUpdateType::Omit) = (atime, mtime) { |
| return Ok(()); |
| }; |
| |
| // Non-standard feature: if user_path is null, the timestamps are updated on the file referred |
| // to by dir_fd. |
| // See https://man7.org/linux/man-pages/man2/utimensat.2.html |
| let name = if user_path.addr().is_null() { |
| if dir_fd == FdNumber::AT_FDCWD { |
| return error!(EFAULT); |
| } |
| let (node, _) = current_task.resolve_dir_fd( |
| locked, |
| dir_fd, |
| Default::default(), |
| ResolveFlags::empty(), |
| )?; |
| node |
| } else { |
| let lookup_flags = LookupFlags::from_bits(flags, AT_SYMLINK_NOFOLLOW)?; |
| lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)? |
| }; |
| name.entry.node.update_atime_mtime(locked, current_task, &name.mount, atime, mtime)?; |
| let event_mask = match (atime, mtime) { |
| (_, TimeUpdateType::Omit) => InotifyMask::ACCESS, |
| (TimeUpdateType::Omit, _) => InotifyMask::MODIFY, |
| (_, _) => InotifyMask::ATTRIB, |
| }; |
| name.entry.notify_ignoring_excl_unlink(event_mask); |
| Ok(()) |
| } |
| |
| pub fn sys_splice( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd_in: FdNumber, |
| off_in: OffsetPtr, |
| fd_out: FdNumber, |
| off_out: OffsetPtr, |
| len: usize, |
| flags: u32, |
| ) -> Result<usize, Errno> { |
| splice::splice(locked, current_task, fd_in, off_in, fd_out, off_out, len, flags) |
| } |
| |
| pub fn sys_vmsplice( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| iovec_addr: IOVecPtr, |
| iovec_count: UserValue<i32>, |
| flags: u32, |
| ) -> Result<usize, Errno> { |
| splice::vmsplice(locked, current_task, fd, iovec_addr, iovec_count, flags) |
| } |
| |
| pub fn sys_copy_file_range( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd_in: FdNumber, |
| off_in: OffsetPtr, |
| fd_out: FdNumber, |
| off_out: OffsetPtr, |
| len: usize, |
| flags: u32, |
| ) -> Result<usize, Errno> { |
| splice::copy_file_range(locked, current_task, fd_in, off_in, fd_out, off_out, len, flags) |
| } |
| |
| pub fn sys_tee( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd_in: FdNumber, |
| fd_out: FdNumber, |
| len: usize, |
| flags: u32, |
| ) -> Result<usize, Errno> { |
| splice::tee(locked, current_task, fd_in, fd_out, len, flags) |
| } |
| |
| pub fn sys_readahead( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| offset: off_t, |
| length: usize, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get(fd)?; |
| // Allow only non-negative values of `offset`. Some versions of Linux allow it to be negative, |
| // but GVisor tests require `readahead()` to fail in this case. |
| let offset: usize = offset.try_into().map_err(|_| errno!(EINVAL))?; |
| file.readahead(current_task, offset, length) |
| } |
| |
| pub fn sys_io_setup( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_nr_events: UserValue<u32>, |
| user_ctx_idp: MultiArchUserRef<uapi::aio_context_t, uapi::arch32::aio_context_t>, |
| ) -> Result<(), Errno> { |
| // From https://man7.org/linux/man-pages/man2/io_setup.2.html: |
| // |
| // EINVAL ctx_idp is not initialized, or the specified nr_events |
| // exceeds internal limits. nr_events should be greater than |
| // 0. |
| // |
| // TODO: Determine what "internal limits" means. |
| let max_operations = |
| user_nr_events.validate(0..(i32::MAX as u32)).ok_or_else(|| errno!(EINVAL))? as usize; |
| if current_task.read_multi_arch_object(user_ctx_idp)? != 0 { |
| return error!(EINVAL); |
| } |
| let ctx_id = AioContext::create(current_task, max_operations)?; |
| current_task.write_multi_arch_object(user_ctx_idp, ctx_id).map_err(|e| { |
| let _ = current_task |
| .mm() |
| .expect("previous sys_io_setup code verified mm exists") |
| .destroy_aio_context(ctx_id.into()); |
| e |
| })?; |
| Ok(()) |
| } |
| |
| pub fn sys_io_submit( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| ctx_id: aio_context_t, |
| user_nr: UserValue<i32>, |
| mut iocb_addrs: IocbPtrPtr, |
| ) -> Result<i32, Errno> { |
| let nr = user_nr.validate(0..i32::MAX).ok_or_else(|| errno!(EINVAL))?; |
| if nr == 0 { |
| return Ok(0); |
| } |
| let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?; |
| |
| // `iocbpp` is an array of addresses to iocb's. |
| let mut num_submitted: i32 = 0; |
| loop { |
| let iocb_ref = current_task.read_multi_arch_ptr(iocb_addrs)?; |
| let control_block = current_task.read_multi_arch_object(iocb_ref)?; |
| |
| match (num_submitted, ctx.submit(current_task, control_block, iocb_ref)) { |
| (0, Err(e)) => return Err(e), |
| (_, Err(_)) => break, |
| (_, Ok(())) => { |
| num_submitted += 1; |
| if num_submitted == nr { |
| break; |
| } |
| } |
| }; |
| |
| iocb_addrs = iocb_addrs.next()?; |
| } |
| |
| Ok(num_submitted) |
| } |
| |
| pub fn sys_io_getevents( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| ctx_id: aio_context_t, |
| min_nr: i64, |
| nr: i64, |
| events_ref: UserRef<io_event>, |
| user_timeout: TimeSpecPtr, |
| ) -> Result<i32, Errno> { |
| if min_nr < 0 || min_nr > nr || nr < 0 { |
| return error!(EINVAL); |
| } |
| let min_results = min_nr as usize; |
| let max_results = nr as usize; |
| let deadline = deadline_after_timespec(current_task, user_timeout)?; |
| |
| let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?; |
| let events = ctx.get_events(current_task, min_results, max_results, deadline)?; |
| current_task.write_objects(events_ref, &events)?; |
| |
| Ok(events.len() as i32) |
| } |
| |
| pub fn sys_io_cancel( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| ctx_id: aio_context_t, |
| user_iocb: IocbPtr, |
| _result: UserRef<io_event>, |
| ) -> Result<(), Errno> { |
| let iocb = current_task.read_multi_arch_object(user_iocb)?; |
| let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?; |
| |
| ctx.cancel(current_task, iocb, user_iocb)?; |
| // TODO: Correctly handle return. If the operation is successfully canceled, the event should be copied into the memory pointed to by result without being placed into the completion queue. |
| track_stub!(TODO("https://fxbug.dev/297433877"), "io_cancel"); |
| Ok(()) |
| } |
| |
| pub fn sys_io_destroy( |
| _locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| ctx_id: aio_context_t, |
| ) -> Result<(), Errno> { |
| let aio_context = current_task.mm()?.destroy_aio_context(ctx_id.into())?; |
| std::mem::drop(aio_context); |
| Ok(()) |
| } |
| |
| pub fn sys_io_uring_setup( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_entries: UserValue<u32>, |
| user_params: UserRef<io_uring_params>, |
| ) -> Result<FdNumber, Errno> { |
| // TODO: https://fxbug.dev/397186254 - we will want to do a no-audit CAP_IPC_LOCK capability |
| // check; see "If not granted CAP_IPC_LOCK io_uring operations are accounted against the user's |
| // RLIMIT_MEMLOCK limit" at |
| // https://github.com/SELinuxProject/selinux-notebook/blob/main/src/auditing.md#capability-audit-exemptions |
| |
| if !current_task.kernel().features.io_uring { |
| return error!(ENOSYS); |
| } |
| |
| // Apply policy from /proc/sys/kernel/io_uring_disabled |
| let limits = ¤t_task.kernel().system_limits; |
| match limits.io_uring_disabled.load(atomic::Ordering::Relaxed) { |
| 0 => (), |
| 1 => { |
| let io_uring_group = limits.io_uring_group.load(atomic::Ordering::Relaxed).try_into(); |
| if io_uring_group.is_err() |
| || !current_task |
| .with_current_creds(|creds| creds.is_in_group(io_uring_group.unwrap())) |
| { |
| security::check_task_capable(current_task, CAP_SYS_ADMIN)?; |
| } |
| } |
| _ => { |
| return error!(EPERM); |
| } |
| } |
| |
| let entries = user_entries.validate(1..IORING_MAX_ENTRIES).ok_or_else(|| errno!(EINVAL))?; |
| |
| let mut params = current_task.read_object(user_params)?; |
| for byte in params.resv { |
| if byte != 0 { |
| return error!(EINVAL); |
| } |
| } |
| |
| let file = IoUringFileObject::new_file(locked, current_task, entries, &mut params)?; |
| |
| // io_uring file descriptors are always created with CLOEXEC. |
| let fd = current_task.add_file(locked, file, FdFlags::CLOEXEC)?; |
| current_task.write_object(user_params, ¶ms)?; |
| Ok(fd) |
| } |
| |
| pub fn sys_io_uring_enter( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| to_submit: u32, |
| min_complete: u32, |
| flags: u32, |
| _sig: UserRef<SigSet>, |
| sigset_size: usize, |
| ) -> Result<u32, Errno> { |
| if !current_task.kernel().features.io_uring { |
| return error!(ENOSYS); |
| } |
| if !_sig.is_null() { |
| if sigset_size != std::mem::size_of::<SigSet>() { |
| return error!(EINVAL); |
| } |
| } |
| let file = current_task.files.get(fd)?; |
| let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?; |
| // TODO(https://fxbug.dev/297431387): Use `_sig` to change the signal mask for `current_task`. |
| io_uring.enter(locked, current_task, to_submit, min_complete, flags) |
| } |
| |
| pub fn sys_io_uring_register( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| opcode: u32, |
| arg: UserAddress, |
| nr_args: UserValue<u32>, |
| ) -> Result<SyscallResult, Errno> { |
| if !current_task.kernel().features.io_uring { |
| return error!(ENOSYS); |
| } |
| let file = current_task.files.get(fd)?; |
| let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?; |
| match opcode { |
| IORING_REGISTER_BUFFERS => { |
| // TODO(https://fxbug.dev/297431387): Check nr_args for zero and return EINVAL here. |
| let iovec = IOVecPtr::new(current_task, arg); |
| let buffers = current_task.read_iovec(iovec, nr_args)?; |
| io_uring.register_buffers(locked, buffers); |
| return Ok(SUCCESS); |
| } |
| IORING_UNREGISTER_BUFFERS => { |
| if !arg.is_null() { |
| return error!(EINVAL); |
| } |
| io_uring.unregister_buffers(locked); |
| return Ok(SUCCESS); |
| } |
| IORING_REGISTER_IOWQ_MAX_WORKERS => { |
| track_stub!( |
| TODO("https://fxbug.dev/297431387"), |
| "io_uring_register IORING_REGISTER_IOWQ_MAX_WORKERS", |
| opcode |
| ); |
| // The current implementation only ever use 1 worker for read and 1 for write. |
| return Ok(SUCCESS); |
| } |
| IORING_REGISTER_RING_FDS => { |
| track_stub!( |
| TODO("https://fxbug.dev/297431387"), |
| "io_uring_register IORING_REGISTER_RING_FDS", |
| opcode |
| ); |
| // The current implementation doesn't use any thread local specific identifier for |
| // performance. Instead, when registering a fd, just return the passed fd as the value |
| // to use. |
| let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?; |
| if nr_args > 16 { |
| return error!(EINVAL); |
| } |
| let updates_addr = UserRef::<uapi::io_uring_rsrc_update>::from(arg); |
| let mut updates = current_task |
| .read_objects_to_smallvec::<uapi::io_uring_rsrc_update, 1>(updates_addr, nr_args)?; |
| let mut result = 0; |
| for update in updates.iter_mut() { |
| if update.offset == u32::MAX { |
| update.offset = update.data.try_into().map_err(|_| errno!(EINVAL))?; |
| result += 1; |
| } |
| } |
| current_task.write_objects(updates_addr, &updates)?; |
| return Ok(result.into()); |
| } |
| IORING_UNREGISTER_RING_FDS => { |
| track_stub!( |
| TODO("https://fxbug.dev/297431387"), |
| "io_uring_register IORING_UNREGISTER_RING_FDS", |
| opcode |
| ); |
| // Because registering a fd doesn't use any resource currently, unregistering is free. |
| return Ok(SUCCESS); |
| } |
| IORING_REGISTER_PBUF_RING => { |
| let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?; |
| if nr_args != 1 { |
| return error!(EINVAL); |
| } |
| let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?; |
| io_uring.register_ring_buffers(locked, buffer_definition)?; |
| return Ok(SUCCESS); |
| } |
| |
| IORING_UNREGISTER_PBUF_RING => { |
| let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?; |
| if nr_args != 1 { |
| return error!(EINVAL); |
| } |
| let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?; |
| io_uring.unregister_ring_buffers(locked, buffer_definition)?; |
| return Ok(SUCCESS); |
| } |
| |
| IORING_REGISTER_PBUF_STATUS => { |
| let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?; |
| if nr_args != 1 { |
| return error!(EINVAL); |
| } |
| let buffer_status_addr = UserRef::<uapi::io_uring_buf_status>::from(arg); |
| let mut buffer_status: uapi::io_uring_buf_status = |
| current_task.read_object(buffer_status_addr)?; |
| io_uring.ring_buffer_status(locked, &mut buffer_status)?; |
| current_task.write_object(buffer_status_addr, &buffer_status)?; |
| return Ok(SUCCESS); |
| } |
| |
| _ => { |
| track_stub!( |
| TODO("https://fxbug.dev/297431387"), |
| "io_uring_register unknown op", |
| opcode |
| ); |
| return error!(EINVAL); |
| } |
| } |
| } |
| |
| // Syscalls for arch32 usage |
| #[cfg(target_arch = "aarch64")] |
| mod arch32 { |
| use crate::mm::MemoryAccessorExt; |
| use crate::task::CurrentTask; |
| use crate::vfs::syscalls::{ |
| LookupFlags, OpenFlags, lookup_at, sys_dup3, sys_faccessat, sys_fallocate, sys_lseek, |
| sys_mkdirat, sys_openat, sys_readlinkat, sys_unlinkat, |
| }; |
| use crate::vfs::{FdNumber, FsNode}; |
| use linux_uapi::off_t; |
| use starnix_sync::{Locked, Unlocked}; |
| use starnix_syscalls::SyscallArg; |
| use starnix_types::time::duration_from_poll_timeout; |
| use starnix_uapi::errors::Errno; |
| use starnix_uapi::file_mode::FileMode; |
| use starnix_uapi::signals::SigSet; |
| use starnix_uapi::user_address::{MultiArchUserRef, UserAddress, UserCString, UserRef}; |
| use starnix_uapi::vfs::EpollEvent; |
| use starnix_uapi::{AT_REMOVEDIR, errno, error, uapi}; |
| |
| type StatFs64Ptr = MultiArchUserRef<uapi::statfs, uapi::arch32::statfs64>; |
| |
| fn merge_low_and_high(low: u32, high: u32) -> off_t { |
| ((high as off_t) << 32) | (low as off_t) |
| } |
| |
| pub fn sys_arch32_open( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| flags: u32, |
| mode: FileMode, |
| ) -> Result<FdNumber, Errno> { |
| sys_openat(locked, current_task, FdNumber::AT_FDCWD, user_path, flags, mode) |
| } |
| |
| pub fn sys_arch32_access( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| mode: u32, |
| ) -> Result<(), Errno> { |
| sys_faccessat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode) |
| } |
| pub fn stat64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| node: &FsNode, |
| arch32_stat_buf: UserRef<uapi::arch32::stat64>, |
| ) -> Result<(), Errno> { |
| let stat_buffer = node.stat(locked, current_task)?; |
| let result: uapi::arch32::stat64 = stat_buffer.try_into().map_err(|_| errno!(EINVAL))?; |
| // Now we copy to the arch32 version and write. |
| current_task.write_object(arch32_stat_buf, &result)?; |
| Ok(()) |
| } |
| |
| pub fn sys_arch32_fstat64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| arch32_stat_buf: UserRef<uapi::arch32::stat64>, |
| ) -> Result<(), Errno> { |
| let file = current_task.files.get_allowing_opath(fd)?; |
| stat64(locked, current_task, file.node(), arch32_stat_buf) |
| } |
| |
| pub fn sys_arch32_fallocate( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| mode: u32, |
| offset_low: u32, |
| offset_high: u32, |
| len_low: u32, |
| len_high: u32, |
| ) -> Result<(), Errno> { |
| let offset = merge_low_and_high(offset_low, offset_high); |
| let len = merge_low_and_high(len_low, len_high); |
| sys_fallocate(locked, current_task, fd, mode, offset, len) |
| } |
| |
| pub fn sys_arch32_stat64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| arch32_stat_buf: UserRef<uapi::arch32::stat64>, |
| ) -> Result<(), Errno> { |
| let name = |
| lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?; |
| stat64(locked, current_task, &name.entry.node, arch32_stat_buf) |
| } |
| |
| pub fn sys_arch32_readlink( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| buffer: UserAddress, |
| buffer_size: usize, |
| ) -> Result<usize, Errno> { |
| sys_readlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, buffer, buffer_size) |
| } |
| |
| pub fn sys_arch32_mkdir( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| mode: FileMode, |
| ) -> Result<(), Errno> { |
| sys_mkdirat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode) |
| } |
| |
| pub fn sys_arch32_rmdir( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| ) -> Result<(), Errno> { |
| sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, AT_REMOVEDIR) |
| } |
| |
| #[allow(non_snake_case)] |
| pub fn sys_arch32__llseek( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| offset_high: u32, |
| offset_low: u32, |
| result: UserRef<off_t>, |
| whence: u32, |
| ) -> Result<(), Errno> { |
| let offset = merge_low_and_high(offset_low, offset_high); |
| let result_value = sys_lseek(locked, current_task, fd, offset, whence)?; |
| current_task.write_object(result, &result_value).map(|_| ()) |
| } |
| |
| pub fn sys_arch32_dup2( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| oldfd: FdNumber, |
| newfd: FdNumber, |
| ) -> Result<FdNumber, Errno> { |
| if oldfd == newfd { |
| // O_PATH allowed for: |
| // |
| // Duplicating the file descriptor (dup(2), fcntl(2) |
| // F_DUPFD, etc.). |
| // |
| // See https://man7.org/linux/man-pages/man2/open.2.html |
| current_task.files.get_allowing_opath(oldfd)?; |
| return Ok(newfd); |
| } |
| sys_dup3(locked, current_task, oldfd, newfd, 0) |
| } |
| |
| pub fn sys_arch32_unlink( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| ) -> Result<(), Errno> { |
| sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, 0) |
| } |
| |
| pub fn sys_arch32_pread64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| address: UserAddress, |
| length: usize, |
| _: SyscallArg, |
| offset_low: u32, |
| offset_high: u32, |
| ) -> Result<usize, Errno> { |
| super::sys_pread64( |
| locked, |
| current_task, |
| fd, |
| address, |
| length, |
| merge_low_and_high(offset_low, offset_high), |
| ) |
| } |
| |
| pub fn sys_arch32_pwrite64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| address: UserAddress, |
| length: usize, |
| _: SyscallArg, |
| offset_low: u32, |
| offset_high: u32, |
| ) -> Result<usize, Errno> { |
| super::sys_pwrite64( |
| locked, |
| current_task, |
| fd, |
| address, |
| length, |
| merge_low_and_high(offset_low, offset_high), |
| ) |
| } |
| |
| pub fn sys_arch32_truncate64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| _unused: SyscallArg, |
| length_low: u32, |
| length_high: u32, |
| ) -> Result<(), Errno> { |
| super::sys_truncate( |
| locked, |
| current_task, |
| user_path, |
| merge_low_and_high(length_low, length_high), |
| ) |
| } |
| |
| pub fn sys_arch32_ftruncate64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| _: SyscallArg, |
| length_low: u32, |
| length_high: u32, |
| ) -> Result<(), Errno> { |
| super::sys_ftruncate(locked, current_task, fd, merge_low_and_high(length_low, length_high)) |
| } |
| |
| pub fn sys_arch32_chmod( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| mode: FileMode, |
| ) -> Result<(), Errno> { |
| super::sys_fchmodat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode) |
| } |
| |
| pub fn sys_arch32_chown32( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| owner: uapi::arch32::__kernel_uid32_t, |
| group: uapi::arch32::__kernel_uid32_t, |
| ) -> Result<(), Errno> { |
| super::sys_fchownat(locked, current_task, FdNumber::AT_FDCWD, user_path, owner, group, 0) |
| } |
| |
| pub fn sys_arch32_poll( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| user_fds: UserRef<uapi::pollfd>, |
| num_fds: i32, |
| timeout: i32, |
| ) -> Result<usize, Errno> { |
| let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?); |
| super::poll(locked, current_task, user_fds, num_fds, None, deadline) |
| } |
| |
| pub fn sys_arch32_epoll_create( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| size: i32, |
| ) -> Result<FdNumber, Errno> { |
| if size < 1 { |
| // The man page for epoll_create says the size was used in a previous implementation as |
| // a hint but no longer does anything. But it's still required to be >= 1 to ensure |
| // programs are backwards-compatible. |
| return error!(EINVAL); |
| } |
| super::sys_epoll_create1(locked, current_task, 0) |
| } |
| |
| pub fn sys_arch32_epoll_wait( |
| locked: &mut Locked<Unlocked>, |
| current_task: &mut CurrentTask, |
| epfd: FdNumber, |
| events: UserRef<EpollEvent>, |
| max_events: i32, |
| timeout: i32, |
| ) -> Result<usize, Errno> { |
| super::sys_epoll_pwait( |
| locked, |
| current_task, |
| epfd, |
| events, |
| max_events, |
| timeout, |
| UserRef::<SigSet>::default(), |
| ) |
| } |
| |
| pub fn sys_arch32_rename( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| old_user_path: UserCString, |
| new_user_path: UserCString, |
| ) -> Result<(), Errno> { |
| super::sys_renameat2( |
| locked, |
| current_task, |
| FdNumber::AT_FDCWD, |
| old_user_path, |
| FdNumber::AT_FDCWD, |
| new_user_path, |
| 0, |
| ) |
| } |
| |
| pub fn sys_arch32_creat( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| mode: FileMode, |
| ) -> Result<FdNumber, Errno> { |
| super::sys_openat( |
| locked, |
| current_task, |
| FdNumber::AT_FDCWD, |
| user_path, |
| (OpenFlags::WRONLY | OpenFlags::CREAT | OpenFlags::TRUNC).bits(), |
| mode, |
| ) |
| } |
| |
| pub fn sys_arch32_symlink( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_target: UserCString, |
| user_path: UserCString, |
| ) -> Result<(), Errno> { |
| super::sys_symlinkat(locked, current_task, user_target, FdNumber::AT_FDCWD, user_path) |
| } |
| |
| pub fn sys_arch32_eventfd( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| value: u32, |
| ) -> Result<FdNumber, Errno> { |
| super::sys_eventfd2(locked, current_task, value, 0) |
| } |
| |
| pub fn sys_arch32_inotify_init( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| ) -> Result<FdNumber, Errno> { |
| super::sys_inotify_init1(locked, current_task, 0) |
| } |
| |
| pub fn sys_arch32_link( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| old_user_path: UserCString, |
| new_user_path: UserCString, |
| ) -> Result<(), Errno> { |
| super::sys_linkat( |
| locked, |
| current_task, |
| FdNumber::AT_FDCWD, |
| old_user_path, |
| FdNumber::AT_FDCWD, |
| new_user_path, |
| 0, |
| ) |
| } |
| |
| pub fn sys_arch32_fstatfs64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| user_buf_len: u32, |
| user_buf: StatFs64Ptr, |
| ) -> Result<(), Errno> { |
| if (user_buf_len as usize) < std::mem::size_of::<uapi::arch32::statfs64>() { |
| return error!(EINVAL); |
| } |
| super::fstatfs(locked, current_task, fd, user_buf) |
| } |
| |
| pub fn sys_arch32_statfs64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| user_path: UserCString, |
| user_buf_len: u32, |
| user_buf: StatFs64Ptr, |
| ) -> Result<(), Errno> { |
| if (user_buf_len as usize) < std::mem::size_of::<uapi::arch32::statfs64>() { |
| return error!(EINVAL); |
| } |
| super::statfs(locked, current_task, user_path, user_buf) |
| } |
| |
| pub fn sys_arch32_arm_fadvise64_64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| fd: FdNumber, |
| advice: u32, |
| offset_low: u32, |
| offset_high: u32, |
| len_low: u32, |
| len_high: u32, |
| ) -> Result<(), Errno> { |
| let offset = merge_low_and_high(offset_low, offset_high); |
| let len = merge_low_and_high(len_low, len_high); |
| super::sys_fadvise64(locked, current_task, fd, offset, len, advice) |
| } |
| |
| pub fn sys_arch32_sendfile64( |
| locked: &mut Locked<Unlocked>, |
| current_task: &CurrentTask, |
| out_fd: FdNumber, |
| in_fd: FdNumber, |
| user_offset: UserRef<uapi::off_t>, |
| count: i32, |
| ) -> Result<usize, Errno> { |
| super::sys_sendfile(locked, current_task, out_fd, in_fd, user_offset.into(), count) |
| } |
| |
| pub use super::{ |
| sys_chdir as sys_arch32_chdir, sys_chroot as sys_arch32_chroot, |
| sys_copy_file_range as sys_arch32_copy_file_range, sys_dup3 as sys_arch32_dup3, |
| sys_epoll_create1 as sys_arch32_epoll_create1, sys_epoll_ctl as sys_arch32_epoll_ctl, |
| sys_epoll_pwait as sys_arch32_epoll_pwait, sys_epoll_pwait2 as sys_arch32_epoll_pwait2, |
| sys_eventfd2 as sys_arch32_eventfd2, sys_fchmod as sys_arch32_fchmod, |
| sys_fchmodat as sys_arch32_fchmodat, sys_fchown as sys_arch32_fchown32, |
| sys_fchown as sys_arch32_fchown, sys_fchownat as sys_arch32_fchownat, |
| sys_fdatasync as sys_arch32_fdatasync, sys_flock as sys_arch32_flock, |
| sys_fsetxattr as sys_arch32_fsetxattr, sys_fstatat64 as sys_arch32_fstatat64, |
| sys_fstatfs as sys_arch32_fstatfs, sys_fsync as sys_arch32_fsync, |
| sys_ftruncate as sys_arch32_ftruncate, |
| sys_inotify_add_watch as sys_arch32_inotify_add_watch, |
| sys_inotify_init1 as sys_arch32_inotify_init1, |
| sys_inotify_rm_watch as sys_arch32_inotify_rm_watch, sys_io_cancel as sys_arch32_io_cancel, |
| sys_io_destroy as sys_arch32_io_destroy, sys_io_getevents as sys_arch32_io_getevents, |
| sys_io_setup as sys_arch32_io_setup, sys_io_submit as sys_arch32_io_submit, |
| sys_io_uring_enter as sys_arch32_io_uring_enter, |
| sys_io_uring_register as sys_arch32_io_uring_register, |
| sys_io_uring_setup as sys_arch32_io_uring_setup, sys_lgetxattr as sys_arch32_lgetxattr, |
| sys_linkat as sys_arch32_linkat, sys_listxattr as sys_arch32_listxattr, |
| sys_llistxattr as sys_arch32_llistxattr, sys_lsetxattr as sys_arch32_lsetxattr, |
| sys_mkdirat as sys_arch32_mkdirat, sys_mknodat as sys_arch32_mknodat, |
| sys_pidfd_getfd as sys_arch32_pidfd_getfd, sys_pidfd_open as sys_arch32_pidfd_open, |
| sys_ppoll as sys_arch32_ppoll, sys_preadv as sys_arch32_preadv, |
| sys_pselect6 as sys_arch32_pselect6, sys_readv as sys_arch32_readv, |
| sys_removexattr as sys_arch32_removexattr, sys_renameat2 as sys_arch32_renameat2, |
| sys_select as sys_arch32__newselect, sys_sendfile as sys_arch32_sendfile, |
| sys_setxattr as sys_arch32_setxattr, sys_splice as sys_arch32_splice, |
| sys_statfs as sys_arch32_statfs, sys_statx as sys_arch32_statx, |
| sys_symlinkat as sys_arch32_symlinkat, sys_sync as sys_arch32_sync, |
| sys_tee as sys_arch32_tee, sys_timerfd_create as sys_arch32_timerfd_create, |
| sys_timerfd_gettime as sys_arch32_timerfd_gettime, |
| sys_timerfd_settime as sys_arch32_timerfd_settime, sys_truncate as sys_arch32_truncate, |
| sys_umask as sys_arch32_umask, sys_utimensat as sys_arch32_utimensat, |
| sys_vmsplice as sys_arch32_vmsplice, |
| }; |
| } |
| |
| #[cfg(target_arch = "aarch64")] |
| pub use arch32::*; |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use crate::testing::*; |
| use starnix_types::vfs::default_statfs; |
| use starnix_uapi::{O_RDONLY, SEEK_CUR, SEEK_END, SEEK_SET}; |
| use zerocopy::IntoBytes; |
| |
| #[::fuchsia::test] |
| async fn test_sys_lseek() -> Result<(), Errno> { |
| spawn_kernel_and_run_with_pkgfs(async |locked, current_task| { |
| let fd = FdNumber::from_raw(10); |
| let file_handle = |
| current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?; |
| let file_size = file_handle.node().stat(locked, current_task).unwrap().st_size; |
| current_task.files.insert(locked, current_task, fd, file_handle).unwrap(); |
| |
| assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_CUR)?, 0); |
| assert_eq!(sys_lseek(locked, current_task, fd, 1, SEEK_CUR)?, 1); |
| assert_eq!(sys_lseek(locked, current_task, fd, 3, SEEK_SET)?, 3); |
| assert_eq!(sys_lseek(locked, current_task, fd, -3, SEEK_CUR)?, 0); |
| assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_END)?, file_size); |
| assert_eq!(sys_lseek(locked, current_task, fd, -5, SEEK_SET), error!(EINVAL)); |
| |
| // Make sure that the failed call above did not change the offset. |
| assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_CUR)?, file_size); |
| |
| // Prepare for an overflow. |
| assert_eq!(sys_lseek(locked, current_task, fd, 3, SEEK_SET)?, 3); |
| |
| // Check for overflow. |
| assert_eq!(sys_lseek(locked, current_task, fd, i64::MAX, SEEK_CUR), error!(EINVAL)); |
| |
| Ok(()) |
| }) |
| .await |
| } |
| |
| #[::fuchsia::test] |
| async fn test_sys_dup() -> Result<(), Errno> { |
| spawn_kernel_and_run_with_pkgfs(async |locked, current_task| { |
| let file_handle = |
| current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?; |
| let oldfd = current_task.add_file(locked, file_handle, FdFlags::empty())?; |
| let newfd = sys_dup(locked, current_task, oldfd)?; |
| |
| assert_ne!(oldfd, newfd); |
| let files = ¤t_task.files; |
| assert!(Arc::ptr_eq(&files.get(oldfd).unwrap(), &files.get(newfd).unwrap())); |
| |
| assert_eq!(sys_dup(locked, current_task, FdNumber::from_raw(3)), error!(EBADF)); |
| |
| Ok(()) |
| }) |
| .await |
| } |
| |
| #[::fuchsia::test] |
| async fn test_sys_dup3() -> Result<(), Errno> { |
| spawn_kernel_and_run_with_pkgfs(async |locked, current_task| { |
| let file_handle = |
| current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?; |
| let oldfd = current_task.add_file(locked, file_handle, FdFlags::empty())?; |
| let newfd = FdNumber::from_raw(2); |
| sys_dup3(locked, current_task, oldfd, newfd, O_CLOEXEC)?; |
| |
| assert_ne!(oldfd, newfd); |
| let files = ¤t_task.files; |
| assert!(Arc::ptr_eq(&files.get(oldfd).unwrap(), &files.get(newfd).unwrap())); |
| assert_eq!(files.get_fd_flags_allowing_opath(oldfd).unwrap(), FdFlags::empty()); |
| assert_eq!(files.get_fd_flags_allowing_opath(newfd).unwrap(), FdFlags::CLOEXEC); |
| |
| assert_eq!(sys_dup3(locked, current_task, oldfd, oldfd, O_CLOEXEC), error!(EINVAL)); |
| |
| // Pass invalid flags. |
| let invalid_flags = 1234; |
| assert_eq!(sys_dup3(locked, current_task, oldfd, newfd, invalid_flags), error!(EINVAL)); |
| |
| // Makes sure that dup closes the old file handle before the fd points |
| // to the new file handle. |
| let second_file_handle = |
| current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?; |
| let different_file_fd = |
| current_task.add_file(locked, second_file_handle, FdFlags::empty())?; |
| assert!(!Arc::ptr_eq( |
| &files.get(oldfd).unwrap(), |
| &files.get(different_file_fd).unwrap() |
| )); |
| sys_dup3(locked, current_task, oldfd, different_file_fd, O_CLOEXEC)?; |
| assert!(Arc::ptr_eq( |
| &files.get(oldfd).unwrap(), |
| &files.get(different_file_fd).unwrap() |
| )); |
| |
| Ok(()) |
| }) |
| .await |
| } |
| |
| #[::fuchsia::test] |
| async fn test_sys_open_cloexec() -> Result<(), Errno> { |
| spawn_kernel_and_run_with_pkgfs(async |locked, current_task| { |
| let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE); |
| let path = b"data/testfile.txt\0"; |
| current_task.write_memory(path_addr, path)?; |
| let fd = sys_openat( |
| locked, |
| ¤t_task, |
| FdNumber::AT_FDCWD, |
| UserCString::new(current_task, path_addr), |
| O_RDONLY | O_CLOEXEC, |
| FileMode::default(), |
| )?; |
| assert!(current_task.files.get_fd_flags_allowing_opath(fd)?.contains(FdFlags::CLOEXEC)); |
| Ok(()) |
| }) |
| .await |
| } |
| |
| #[::fuchsia::test] |
| async fn test_sys_epoll() -> Result<(), Errno> { |
| spawn_kernel_and_run_with_pkgfs(async |locked, current_task| { |
| let epoll_fd = |
| sys_epoll_create1(locked, current_task, 0).expect("sys_epoll_create1 failed"); |
| sys_close(locked, current_task, epoll_fd).expect("sys_close failed"); |
| |
| Ok(()) |
| }) |
| .await |
| } |
| |
| #[::fuchsia::test] |
| async fn test_fstat_tmp_file() { |
| spawn_kernel_and_run_with_pkgfs(async |locked, current_task| { |
| // Create the file that will be used to stat. |
| let file_path = "data/testfile.txt"; |
| let _file_handle = |
| current_task.open_file(locked, file_path.into(), OpenFlags::RDONLY).unwrap(); |
| |
| // Write the path to user memory. |
| let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE); |
| current_task |
| .write_memory(path_addr, file_path.as_bytes()) |
| .expect("failed to clear struct"); |
| |
| let memory_len = (path_addr + file_path.len()).expect("OOB memory allocation!"); |
| let user_stat = UserRef::new(memory_len); |
| current_task |
| .write_object(user_stat, &default_statfs(0)) |
| .expect("failed to clear struct"); |
| |
| let user_path = UserCString::new(current_task, path_addr); |
| |
| assert_eq!(sys_statfs(locked, current_task, user_path, user_stat.into()), Ok(())); |
| |
| let returned_stat = current_task.read_object(user_stat).expect("failed to read struct"); |
| assert_eq!( |
| returned_stat.as_bytes(), |
| default_statfs(u32::from_be_bytes(*b"f.io")).as_bytes() |
| ); |
| }) |
| .await; |
| } |
| |
| #[::fuchsia::test] |
| async fn test_unlinkat_dir() { |
| spawn_kernel_and_run(async |locked, current_task| { |
| // Create the dir that we will attempt to unlink later. |
| let no_slash_path = b"testdir"; |
| let no_slash_path_addr = |
| map_memory(locked, ¤t_task, UserAddress::default(), *PAGE_SIZE); |
| current_task |
| .write_memory(no_slash_path_addr, no_slash_path) |
| .expect("failed to write path"); |
| let no_slash_user_path = UserCString::new(current_task, no_slash_path_addr); |
| sys_mkdirat( |
| locked, |
| ¤t_task, |
| FdNumber::AT_FDCWD, |
| no_slash_user_path, |
| FileMode::ALLOW_ALL.with_type(FileMode::IFDIR), |
| ) |
| .unwrap(); |
| |
| let slash_path = b"testdir/"; |
| let slash_path_addr = |
| map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE); |
| current_task.write_memory(slash_path_addr, slash_path).expect("failed to write path"); |
| let slash_user_path = UserCString::new(current_task, slash_path_addr); |
| |
| // Try to remove a directory without specifying AT_REMOVEDIR. |
| // This should fail with EISDIR, irrespective of the terminating slash. |
| let error = sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, slash_user_path, 0) |
| .unwrap_err(); |
| assert_eq!(error, errno!(EISDIR)); |
| let error = |
| sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, no_slash_user_path, 0) |
| .unwrap_err(); |
| assert_eq!(error, errno!(EISDIR)); |
| |
| // Success with AT_REMOVEDIR. |
| sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, slash_user_path, AT_REMOVEDIR) |
| .unwrap(); |
| }) |
| .await; |
| } |
| |
| #[::fuchsia::test] |
| async fn test_rename_noreplace() { |
| spawn_kernel_and_run_with_pkgfs(async |locked, current_task| { |
| // Create the file that will be renamed. |
| let old_user_path = "data/testfile.txt"; |
| let _old_file_handle = |
| current_task.open_file(locked, old_user_path.into(), OpenFlags::RDONLY).unwrap(); |
| |
| // Write the path to user memory. |
| let old_path_addr = |
| map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE); |
| current_task |
| .write_memory(old_path_addr, old_user_path.as_bytes()) |
| .expect("failed to clear struct"); |
| |
| // Create a second file that we will attempt to rename to. |
| let new_user_path = "data/testfile2.txt"; |
| let _new_file_handle = |
| current_task.open_file(locked, new_user_path.into(), OpenFlags::RDONLY).unwrap(); |
| |
| // Write the path to user memory. |
| let new_path_addr = |
| map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE); |
| current_task |
| .write_memory(new_path_addr, new_user_path.as_bytes()) |
| .expect("failed to clear struct"); |
| |
| // Try to rename first file to second file's name with RENAME_NOREPLACE flag. |
| // This should fail with EEXIST. |
| let error = sys_renameat2( |
| locked, |
| ¤t_task, |
| FdNumber::AT_FDCWD, |
| UserCString::new(current_task, old_path_addr), |
| FdNumber::AT_FDCWD, |
| UserCString::new(current_task, new_path_addr), |
| RenameFlags::NOREPLACE.bits(), |
| ) |
| .unwrap_err(); |
| assert_eq!(error, errno!(EEXIST)); |
| }) |
| .await; |
| } |
| } |