src/starnix/kernel/core/task/syscalls.rs - fuchsia - Git at Google

 // Copyright 2021 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 use crate::execution::execute_task;
 use crate::mm::{DumpPolicy, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
 use crate::ptrace::{
     PR_SET_PTRACER_ANY, PtraceAllowedPtracers, PtraceAttachType, PtraceOptions, ptrace_attach,
     ptrace_dispatch, ptrace_traceme,
 };
 use crate::security;
 use crate::signals::syscalls::RUsagePtr;
 use crate::task::{
     CurrentTask, ExitStatus, NormalPriority, SchedulingPolicy, SeccompAction, SeccompStateValue,
     SyslogAccess, Task, ThreadGroup, max_priority_for_sched_policy, min_priority_for_sched_policy,
 };
 use crate::vfs::{
     FdNumber, FileHandle, MountNamespaceFile, PidFdFileObject, UserBuffersOutputBuffer,
     VecOutputBuffer,
 };
 use starnix_logging::{log_error, log_info, log_trace, track_stub};
 use starnix_sync::{Locked, RwLock, Unlocked};
 use starnix_syscalls::SyscallResult;
 use starnix_task_command::TaskCommand;
 use starnix_types::ownership::WeakRef;
 use starnix_types::time::timeval_from_duration;
 use starnix_uapi::auth::{
     CAP_SETGID, CAP_SETPCAP, CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SYS_RESOURCE,
     CAP_SYS_TTY_CONFIG, Capabilities, Credentials, PTRACE_MODE_READ_REALCREDS, SecureBits,
 };
 use starnix_uapi::errors::{ENAMETOOLONG, Errno};
 use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
 use starnix_uapi::kcmp::KcmpResource;
 use starnix_uapi::open_flags::OpenFlags;
 use starnix_uapi::resource_limits::Resource;
 use starnix_uapi::signals::{Signal, UncheckedSignal};
 use starnix_uapi::syslog::SyslogAction;
 use starnix_uapi::user_address::{
     ArchSpecific, MappingMultiArchUserRef, MultiArchUserRef, UserAddress, UserCString,
     UserCStringPtr, UserRef,
 };
 use starnix_uapi::vfs::ResolveFlags;
 use starnix_uapi::{
     __user_cap_data_struct, __user_cap_header_struct, _LINUX_CAPABILITY_VERSION_1,
     _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3, AT_EMPTY_PATH, AT_SYMLINK_NOFOLLOW,
     BPF_MAXINSNS, CLONE_ARGS_SIZE_VER0, CLONE_ARGS_SIZE_VER1, CLONE_ARGS_SIZE_VER2, CLONE_FILES,
     CLONE_FS, CLONE_NEWNS, CLONE_NEWUTS, CLONE_SETTLS, CLONE_VFORK, NGROUPS_MAX, PR_CAP_AMBIENT,
     PR_CAP_AMBIENT_CLEAR_ALL, PR_CAP_AMBIENT_IS_SET, PR_CAP_AMBIENT_LOWER, PR_CAP_AMBIENT_RAISE,
     PR_CAPBSET_DROP, PR_CAPBSET_READ, PR_GET_CHILD_SUBREAPER, PR_GET_DUMPABLE, PR_GET_KEEPCAPS,
     PR_GET_NAME, PR_GET_NO_NEW_PRIVS, PR_GET_SECCOMP, PR_GET_SECUREBITS, PR_SET_CHILD_SUBREAPER,
     PR_SET_DUMPABLE, PR_SET_KEEPCAPS, PR_SET_NAME, PR_SET_NO_NEW_PRIVS, PR_SET_PDEATHSIG,
     PR_SET_PTRACER, PR_SET_SECCOMP, PR_SET_SECUREBITS, PR_SET_TIMERSLACK, PR_SET_VMA,
     PR_SET_VMA_ANON_NAME, PRIO_PROCESS, PTRACE_ATTACH, PTRACE_SEIZE, PTRACE_TRACEME,
     RUSAGE_CHILDREN, SCHED_RESET_ON_FORK, SECCOMP_FILTER_FLAG_LOG,
     SECCOMP_FILTER_FLAG_NEW_LISTENER, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_TSYNC,
     SECCOMP_FILTER_FLAG_TSYNC_ESRCH, SECCOMP_GET_ACTION_AVAIL, SECCOMP_GET_NOTIF_SIZES,
     SECCOMP_MODE_FILTER, SECCOMP_MODE_STRICT, SECCOMP_SET_MODE_FILTER, SECCOMP_SET_MODE_STRICT,
     c_char, c_int, clone_args, errno, error, gid_t, pid_t, rlimit, rusage, sched_param,
     sock_filter, uapi, uid_t,
 };
 use static_assertions::const_assert;
 use std::cmp;
 use std::ffi::CString;
 use std::sync::{Arc, LazyLock};
 use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};

 #[cfg(target_arch = "aarch64")]
 use starnix_uapi::{PR_GET_TAGGED_ADDR_CTRL, PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE};

 pub type SockFProgPtr =
     MappingMultiArchUserRef<SockFProg, uapi::sock_fprog, uapi::arch32::sock_fprog>;
 pub type SockFilterPtr = MultiArchUserRef<uapi::sock_filter, uapi::arch32::sock_filter>;

 pub struct SockFProg {
     pub len: u32,
     pub filter: SockFilterPtr,
 }

 uapi::arch_map_data! {
     BidiTryFrom<SockFProg, sock_fprog> {
         len = len;
         filter = filter;
     }
 }

 uapi::check_arch_independent_layout! {
     sched_param {
         sched_priority,
     }
 }

 pub fn do_clone(
     locked: &mut Locked<Unlocked>,
     current_task: &mut CurrentTask,
     args: &clone_args,
 ) -> Result<pid_t, Errno> {
     security::check_task_create_access(current_task)?;

     let child_exit_signal = if args.exit_signal == 0 {
         None
     } else {
         Some(Signal::try_from(UncheckedSignal::new(args.exit_signal))?)
     };

     let mut new_task = current_task.clone_task(
         locked,
         args.flags,
         child_exit_signal,
         UserRef::<pid_t>::new(UserAddress::from(args.parent_tid)),
         UserRef::<pid_t>::new(UserAddress::from(args.child_tid)),
         UserRef::<FdNumber>::new(UserAddress::from(args.pidfd)),
     )?;

     // Set the result register to 0 for the return value from clone in the
     // cloned process.
     new_task.thread_state.registers.set_return_register(0);
     let (trace_kind, ptrace_state) = current_task.get_ptrace_core_state_for_clone(args);

     if args.stack != 0 {
         // In clone() the `stack` argument points to the top of the stack, while in clone3()
         // `stack` points to the bottom of the stack. Therefore, in clone3() we need to add
         // `stack_size` to calculate the stack pointer. Note that in clone() `stack_size` is 0.
         new_task
             .thread_state
             .registers
             .set_stack_pointer_register(args.stack.wrapping_add(args.stack_size));
     }

     if args.flags & (CLONE_SETTLS as u64) != 0 {
         new_task.thread_state.registers.set_thread_pointer_register(args.tls);
     }

     let tid = new_task.task.tid;
     let task_ref = WeakRef::from(&new_task.task);
     execute_task(locked, new_task, |_, _| Ok(()), |_| {}, ptrace_state)?;

     current_task.ptrace_event(locked, trace_kind, tid as u64);

     if args.flags & (CLONE_VFORK as u64) != 0 {
         current_task.wait_for_execve(task_ref)?;
         current_task.ptrace_event(locked, PtraceOptions::TRACEVFORKDONE, tid as u64);
     }

     Ok(tid)
 }

 pub fn sys_clone3(
     locked: &mut Locked<Unlocked>,
     current_task: &mut CurrentTask,
     user_clone_args: UserRef<clone_args>,
     user_clone_args_size: usize,
 ) -> Result<pid_t, Errno> {
     // Only these specific sized versions are supported.
     if !(user_clone_args_size == CLONE_ARGS_SIZE_VER0 as usize
         || user_clone_args_size == CLONE_ARGS_SIZE_VER1 as usize
         || user_clone_args_size == CLONE_ARGS_SIZE_VER2 as usize)
     {
         return error!(EINVAL);
     }

     // The most recent version of the struct size should match our definition.
     const_assert!(std::mem::size_of::<clone_args>() == CLONE_ARGS_SIZE_VER2 as usize);

     let clone_args = current_task.read_object_partial(user_clone_args, user_clone_args_size)?;
     do_clone(locked, current_task, &clone_args)
 }

 fn read_c_string_vector(
     mm: &CurrentTask,
     user_vector: UserCStringPtr,
     elem_limit: usize,
     vec_limit: usize,
 ) -> Result<(Vec<CString>, usize), Errno> {
     let mut user_current = user_vector;
     let mut vector: Vec<CString> = vec![];
     let mut vec_size: usize = 0;
     loop {
         let user_string = mm.read_multi_arch_ptr(user_current)?;
         if user_string.is_null() {
             break;
         }
         let string = mm
             .read_c_string_to_vec(user_string, elem_limit)
             .map_err(|e| if e.code == ENAMETOOLONG { errno!(E2BIG) } else { e })?;
         let cstring = CString::new(string).map_err(|_| errno!(EINVAL))?;
         vec_size =
             vec_size.checked_add(cstring.as_bytes_with_nul().len()).ok_or_else(|| errno!(E2BIG))?;
         if vec_size > vec_limit {
             return error!(E2BIG);
         }
         vector.push(cstring);
         user_current = user_current.next()?;
     }
     Ok((vector, vec_size))
 }

 pub fn sys_execve(
     locked: &mut Locked<Unlocked>,
     current_task: &mut CurrentTask,
     user_path: UserCString,
     user_argv: UserCStringPtr,
     user_environ: UserCStringPtr,
 ) -> Result<(), Errno> {
     sys_execveat(locked, current_task, FdNumber::AT_FDCWD, user_path, user_argv, user_environ, 0)
 }

 pub fn sys_execveat(
     locked: &mut Locked<Unlocked>,
     current_task: &mut CurrentTask,
     dir_fd: FdNumber,
     user_path: UserCString,
     user_argv: UserCStringPtr,
     user_environ: UserCStringPtr,
     flags: u32,
 ) -> Result<(), Errno> {
     if flags & !(AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW) != 0 {
         return error!(EINVAL);
     }

     // Calculate the limit for argv and environ size as 1/4 of the stack size, floored at 32 pages.
     // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
     const PAGE_LIMIT: usize = 32;
     let page_limit_size: usize = PAGE_LIMIT * *PAGE_SIZE as usize;
     let rlimit = current_task.thread_group().get_rlimit(locked, Resource::STACK);
     let stack_limit = rlimit / 4;
     let argv_env_limit = cmp::max(page_limit_size, stack_limit as usize);

     // The limit per argument or environment variable is 32 pages.
     // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
     let (argv, argv_size) = if user_argv.is_null() {
         (Vec::new(), 0)
     } else {
         read_c_string_vector(current_task, user_argv, page_limit_size, argv_env_limit)?
     };

     let (environ, _) = if user_environ.is_null() {
         (Vec::new(), 0)
     } else {
         read_c_string_vector(
             current_task,
             user_environ,
             page_limit_size,
             argv_env_limit - argv_size,
         )?
     };

     let path = &current_task.read_path(user_path)?;

     log_trace!(argv:?, environ:?, flags:?; "execveat({dir_fd}, {path})");

     let mut open_flags = OpenFlags::RDONLY;

     if flags & AT_SYMLINK_NOFOLLOW != 0 {
         open_flags |= OpenFlags::NOFOLLOW;
     }

     let executable = if path.is_empty() {
         if flags & AT_EMPTY_PATH == 0 {
             // If AT_EMPTY_PATH is not set, this is an error.
             return error!(ENOENT);
         }

         // O_PATH allowed for:
         //
         //   Passing the file descriptor as the dirfd argument of
         //   openat() and the other "*at()" system calls.  This
         //   includes linkat(2) with AT_EMPTY_PATH (or via procfs
         //   using AT_SYMLINK_FOLLOW) even if the file is not a
         //   directory.
         //
         // See https://man7.org/linux/man-pages/man2/open.2.html
         let file = current_task.files.get_allowing_opath(dir_fd)?;

         // We are forced to reopen the file with O_RDONLY to get access to the underlying VMO.
         // Note that skip the access check in the arguments in case the file mode does
         // not actually have the read permission bit.
         //
         // This can happen because a file could have --x--x--x mode permissions and then
         // be opened with O_PATH. Internally, the file operations would all be stubbed out
         // for that file, which is undesirable here.
         //
         // See https://man7.org/linux/man-pages/man3/fexecve.3.html#DESCRIPTION
         file.name.open(
             locked,
             current_task,
             OpenFlags::RDONLY,
             AccessCheck::check_for(Access::EXEC),
         )?
     } else {
         current_task.open_file_at(
             locked,
             dir_fd,
             path.as_ref(),
             open_flags,
             FileMode::default(),
             ResolveFlags::empty(),
             AccessCheck::check_for(Access::EXEC),
         )?
     };

     // This path can affect script resolution (the path is appended to the script args)
     // and the auxiliary value `AT_EXECFN` from the syscall `getauxval()`
     let path = if dir_fd == FdNumber::AT_FDCWD {
         // The file descriptor is CWD, so the path is exactly
         // what the user specified.
         path.to_vec()
     } else {
         // The path is `/dev/fd/N/P` where N is the file descriptor
         // number and P is the user-provided path (if relative and non-empty).
         //
         // See https://man7.org/linux/man-pages/man2/execveat.2.html#NOTES
         match path.first() {
             Some(b'/') => {
                 // The user-provided path is absolute, so dir_fd is ignored.
                 path.to_vec()
             }
             Some(_) => {
                 // User-provided path is relative, append it.
                 let mut new_path = format!("/dev/fd/{}/", dir_fd.raw()).into_bytes();
                 new_path.append(&mut path.to_vec());
                 new_path
             }
             // User-provided path is empty
             None => format!("/dev/fd/{}", dir_fd.raw()).into_bytes(),
         }
     };

     let path = CString::new(path).map_err(|_| errno!(EINVAL))?;

     current_task.exec(locked, executable, path, argv, environ)?;
     Ok(())
 }

 pub fn sys_getcpu(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     cpu_out: UserRef<u32>,
     node_out: UserRef<u32>,
 ) -> Result<(), Errno> {
     // "When either cpu or node is NULL nothing is written to the respective pointer."
     // from https://man7.org/linux/man-pages/man2/getcpu.2.html
     if !cpu_out.is_null() {
         let thread_stats = current_task
             .thread
             .read()
             .as_ref()
             .expect("current thread is never None when executing")
             .stats()
             .map_err(|e| errno!(EINVAL, format!("getting thread stats failed {e:?}")))?;
         current_task.write_object(cpu_out, &thread_stats.last_scheduled_cpu)?;
     }
     if !node_out.is_null() {
         // Zircon does not yet have a concept of NUMA task scheduling, always tell userspace that
         // it's on the "first" node which should be true for non-NUMA systems.
         track_stub!(TODO("https://fxbug.dev/325643815"), "getcpu() numa node");
         current_task.write_object(node_out, &0)?;
     }
     Ok(())
 }

 pub fn sys_getpid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<pid_t, Errno> {
     Ok(current_task.get_pid())
 }

 pub fn sys_gettid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<pid_t, Errno> {
     Ok(current_task.get_tid())
 }

 pub fn sys_getppid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<pid_t, Errno> {
     Ok(current_task.thread_group().read().get_ppid())
 }

 fn get_task_or_current(current_task: &CurrentTask, pid: pid_t) -> WeakRef<Task> {
     if pid == 0 { current_task.weak_task() } else { current_task.get_task(pid) }
 }

 pub fn sys_getsid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
 ) -> Result<pid_t, Errno> {
     let weak = get_task_or_current(current_task, pid);
     let target_task = Task::from_weak(&weak)?;
     security::check_task_getsid(current_task, &target_task)?;
     let sid = target_task.thread_group().read().process_group.session.leader;
     Ok(sid)
 }

 pub fn sys_getpgid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
 ) -> Result<pid_t, Errno> {
     let weak = get_task_or_current(current_task, pid);
     let task = Task::from_weak(&weak)?;

     security::check_getpgid_access(current_task, &task)?;
     let pgid = task.thread_group().read().process_group.leader;
     Ok(pgid)
 }

 pub fn sys_setpgid(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
     pgid: pid_t,
 ) -> Result<(), Errno> {
     let weak = get_task_or_current(current_task, pid);
     let task = Task::from_weak(&weak)?;

     current_task.thread_group().setpgid(locked, current_task, &task, pgid)?;
     Ok(())
 }

 impl CurrentTask {
     /// Returns true if the `current_task`'s effective user ID (EUID) is the same as the
     /// EUID or UID of the `target_task`. We describe this as the current task being
     /// "EUID-friendly" to the target and it enables actions to be performed that would
     /// otherwise require additional privileges.
     ///
     /// See "The caller needs an effective user ID equal to the real user ID or effective
     /// user ID of the [target]" at sched_setaffinity(2), comparable language at
     /// setpriority(2), more ambiguous language at sched_setscheduler(2), and no
     /// particular specification at sched_setparam(2).
     fn is_euid_friendly_with(&self, target_task: &Task) -> bool {
         let self_creds = self.current_creds();
         let target_creds = target_task.real_creds();
         self_creds.euid == target_creds.uid || self_creds.euid == target_creds.euid
     }
 }

 // A non-root process is allowed to set any of its three uids to the value of any other. The
 // CAP_SETUID capability bypasses these checks and allows setting any uid to any integer. Likewise
 // for gids.
 fn new_uid_allowed(current_task: &CurrentTask, uid: uid_t) -> bool {
     let current_creds = current_task.current_creds();
     uid == current_creds.uid
         || uid == current_creds.euid
         || uid == current_creds.saved_uid
         || security::is_task_capable_noaudit(current_task, CAP_SETUID)
 }

 fn new_gid_allowed(current_task: &CurrentTask, gid: gid_t) -> bool {
     let current_creds = current_task.current_creds();
     gid == current_creds.gid
         || gid == current_creds.egid
         || gid == current_creds.saved_gid
         || security::is_task_capable_noaudit(current_task, CAP_SETGID)
 }

 pub fn sys_getuid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<uid_t, Errno> {
     Ok(current_task.current_creds().uid)
 }

 pub fn sys_getgid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<gid_t, Errno> {
     Ok(current_task.current_creds().gid)
 }

 pub fn sys_setuid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     uid: uid_t,
 ) -> Result<(), Errno> {
     if uid == gid_t::MAX {
         return error!(EINVAL);
     }
     if !new_uid_allowed(&current_task, uid) {
         return error!(EPERM);
     }

     let mut creds = Credentials::clone(&current_task.current_creds());
     let prev = creds.copy_user_credentials();
     creds.euid = uid;
     creds.fsuid = uid;
     if security::is_task_capable_noaudit(current_task, CAP_SETUID) {
         creds.uid = uid;
         creds.saved_uid = uid;
     }

     creds.update_capabilities(prev);
     current_task.set_creds(creds);
     Ok(())
 }

 pub fn sys_setgid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     gid: gid_t,
 ) -> Result<(), Errno> {
     if gid == gid_t::MAX {
         return error!(EINVAL);
     }
     if !new_gid_allowed(&current_task, gid) {
         return error!(EPERM);
     }

     let mut creds = Credentials::clone(&current_task.current_creds());
     creds.egid = gid;
     creds.fsgid = gid;
     if security::is_task_capable_noaudit(current_task, CAP_SETGID) {
         creds.gid = gid;
         creds.saved_gid = gid;
     }
     current_task.set_creds(creds);
     Ok(())
 }

 pub fn sys_geteuid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<uid_t, Errno> {
     Ok(current_task.current_creds().euid)
 }

 pub fn sys_getegid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<gid_t, Errno> {
     Ok(current_task.current_creds().egid)
 }

 pub fn sys_setfsuid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     fsuid: uid_t,
 ) -> Result<uid_t, Errno> {
     let mut creds = Credentials::clone(&current_task.current_creds());
     let prev = creds.copy_user_credentials();
     if fsuid != u32::MAX && new_uid_allowed(&current_task, fsuid) {
         creds.fsuid = fsuid;
         creds.update_capabilities(prev);
         current_task.set_creds(creds);
     }

     Ok(prev.fsuid)
 }

 pub fn sys_setfsgid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     fsgid: gid_t,
 ) -> Result<gid_t, Errno> {
     let mut creds = Credentials::clone(&current_task.current_creds());
     let prev = creds.copy_user_credentials();
     let prev_fsgid = creds.fsgid;

     if fsgid != u32::MAX && new_gid_allowed(&current_task, fsgid) {
         creds.fsgid = fsgid;
         creds.update_capabilities(prev);
         current_task.set_creds(creds);
     }

     Ok(prev_fsgid)
 }

 pub fn sys_getresuid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     ruid_addr: UserRef<uid_t>,
     euid_addr: UserRef<uid_t>,
     suid_addr: UserRef<uid_t>,
 ) -> Result<(), Errno> {
     let creds = current_task.current_creds();
     current_task.write_object(ruid_addr, &creds.uid)?;
     current_task.write_object(euid_addr, &creds.euid)?;
     current_task.write_object(suid_addr, &creds.saved_uid)?;
     Ok(())
 }

 pub fn sys_getresgid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     rgid_addr: UserRef<gid_t>,
     egid_addr: UserRef<gid_t>,
     sgid_addr: UserRef<gid_t>,
 ) -> Result<(), Errno> {
     let creds = current_task.current_creds();
     current_task.write_object(rgid_addr, &creds.gid)?;
     current_task.write_object(egid_addr, &creds.egid)?;
     current_task.write_object(sgid_addr, &creds.saved_gid)?;
     Ok(())
 }

 pub fn sys_setreuid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     ruid: uid_t,
     euid: uid_t,
 ) -> Result<(), Errno> {
     let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
     if !allowed(ruid) || !allowed(euid) {
         return error!(EPERM);
     }

     let mut creds = Credentials::clone(&current_task.current_creds());
     let prev = creds.copy_user_credentials();
     let mut is_ruid_set = false;
     if ruid != u32::MAX {
         creds.uid = ruid;
         is_ruid_set = true;
     }
     if euid != u32::MAX {
         creds.euid = euid;
         creds.fsuid = euid;
     }

     if is_ruid_set || prev.uid != euid {
         creds.saved_uid = creds.euid;
     }

     creds.update_capabilities(prev);
     current_task.set_creds(creds);
     Ok(())
 }

 pub fn sys_setregid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     rgid: gid_t,
     egid: gid_t,
 ) -> Result<(), Errno> {
     let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
     if !allowed(rgid) || !allowed(egid) {
         return error!(EPERM);
     }

     let mut creds = Credentials::clone(&current_task.current_creds());
     let previous_rgid = creds.gid;
     let mut is_rgid_set = false;
     if rgid != u32::MAX {
         creds.gid = rgid;
         is_rgid_set = true;
     }
     if egid != u32::MAX {
         creds.egid = egid;
         creds.fsgid = egid;
     }

     if is_rgid_set || previous_rgid != egid {
         creds.saved_gid = creds.egid;
     }

     current_task.set_creds(creds);
     Ok(())
 }

 pub fn sys_setresuid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     ruid: uid_t,
     euid: uid_t,
     suid: uid_t,
 ) -> Result<(), Errno> {
     let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
     if !allowed(ruid) || !allowed(euid) || !allowed(suid) {
         return error!(EPERM);
     }

     let mut creds = Credentials::clone(&current_task.current_creds());
     let prev = creds.copy_user_credentials();
     if ruid != u32::MAX {
         creds.uid = ruid;
     }
     if euid != u32::MAX {
         creds.euid = euid;
         creds.fsuid = euid;
     }
     if suid != u32::MAX {
         creds.saved_uid = suid;
     }
     creds.update_capabilities(prev);
     current_task.set_creds(creds);
     Ok(())
 }

 pub fn sys_setresgid(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     rgid: gid_t,
     egid: gid_t,
     sgid: gid_t,
 ) -> Result<(), Errno> {
     let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
     if !allowed(rgid) || !allowed(egid) || !allowed(sgid) {
         return error!(EPERM);
     }

     let mut creds = Credentials::clone(&current_task.current_creds());
     if rgid != u32::MAX {
         creds.gid = rgid;
     }
     if egid != u32::MAX {
         creds.egid = egid;
         creds.fsgid = egid;
     }
     if sgid != u32::MAX {
         creds.saved_gid = sgid;
     }
     current_task.set_creds(creds);
     Ok(())
 }

 pub fn sys_exit(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     code: i32,
 ) -> Result<(), Errno> {
     // Only change the current exit status if this has not been already set by exit_group, as
     // otherwise it has priority.
     current_task.write().set_exit_status_if_not_already(ExitStatus::Exit(code as u8));
     Ok(())
 }

 pub fn sys_exit_group(
     locked: &mut Locked<Unlocked>,
     current_task: &mut CurrentTask,
     code: i32,
 ) -> Result<(), Errno> {
     current_task.thread_group_exit(locked, ExitStatus::Exit(code as u8));
     Ok(())
 }

 pub fn sys_sched_getscheduler(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
 ) -> Result<u32, Errno> {
     if pid < 0 {
         return error!(EINVAL);
     }

     let weak = get_task_or_current(current_task, pid);
     let target_task = Task::from_weak(&weak)?;
     security::check_getsched_access(current_task, target_task.as_ref())?;
     let current_scheduler_state = target_task.read().scheduler_state;
     Ok(current_scheduler_state.policy_for_sched_getscheduler())
 }

 pub fn sys_sched_setscheduler(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
     policy: u32,
     param: UserRef<sched_param>,
 ) -> Result<(), Errno> {
     // Parse & validate the arguments.
     if pid < 0 || param.is_null() {
         return error!(EINVAL);
     }

     let weak = get_task_or_current(current_task, pid);
     let target_task = Task::from_weak(&weak)?;

     let reset_on_fork = policy & SCHED_RESET_ON_FORK != 0;

     let policy = SchedulingPolicy::try_from(policy & !SCHED_RESET_ON_FORK)?;
     let realtime_priority =
         policy.realtime_priority_from(current_task.read_object(param)?.sched_priority)?;

     // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
     let current_state = target_task.read().scheduler_state;

     // Check capabilities and permissions, if required, for the operation.
     let euid_friendly = current_task.is_euid_friendly_with(&target_task);
     let strengthening = current_state.realtime_priority < realtime_priority;
     let rlimited = strengthening
         && realtime_priority
             .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
     let clearing_reset_on_fork = current_state.reset_on_fork && !reset_on_fork;
     let caught_in_idle_trap = current_state.policy == SchedulingPolicy::Idle
         && policy != SchedulingPolicy::Idle
         && current_state
             .normal_priority
             .exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
     if !euid_friendly || rlimited || clearing_reset_on_fork || caught_in_idle_trap {
         security::check_task_capable(current_task, CAP_SYS_NICE)?;
     }

     security::check_setsched_access(current_task, &target_task)?;

     // Apply the new scheduler configuration to the task.
     target_task.set_scheduler_policy_priority_and_reset_on_fork(
         policy,
         realtime_priority,
         reset_on_fork,
     )?;

     Ok(())
 }

 const CPU_SET_SIZE: usize = 128;

 #[repr(C)]
 #[derive(Debug, Copy, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
 pub struct CpuSet {
     bits: [u8; CPU_SET_SIZE],
 }

 impl Default for CpuSet {
     fn default() -> Self {
         Self { bits: [0; CPU_SET_SIZE] }
     }
 }

 fn check_cpu_set_alignment(current_task: &CurrentTask, cpusetsize: u32) -> Result<(), Errno> {
     let alignment = if current_task.is_arch32() { 4 } else { 8 };
     if cpusetsize < alignment || cpusetsize % alignment != 0 {
         return error!(EINVAL);
     }
     Ok(())
 }

 fn get_default_cpu_set() -> CpuSet {
     let mut result = CpuSet::default();
     let mut cpus_count = zx::system_get_num_cpus();
     let cpus_count_max = (CPU_SET_SIZE * 8) as u32;
     if cpus_count > cpus_count_max {
         log_error!("cpus_count={cpus_count}, greater than the {cpus_count_max} max supported.");
         cpus_count = cpus_count_max;
     }
     let mut index = 0;
     while cpus_count > 0 {
         let count = std::cmp::min(cpus_count, 8);
         let (shl, overflow) = 1_u8.overflowing_shl(count);
         let mask = if overflow { u8::max_value() } else { shl - 1 };
         result.bits[index] = mask;
         index += 1;
         cpus_count -= count;
     }
     result
 }

 pub fn sys_sched_getaffinity(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
     cpusetsize: u32,
     user_mask: UserAddress,
 ) -> Result<usize, Errno> {
     if pid < 0 {
         return error!(EINVAL);
     }

     check_cpu_set_alignment(current_task, cpusetsize)?;

     let weak = get_task_or_current(current_task, pid);
     let _task = Task::from_weak(&weak)?;

     // sched_setaffinity() is not implemented. Fake affinity mask based on the number of CPUs.
     let mask = get_default_cpu_set();
     let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
     current_task.write_memory(user_mask, &mask.bits[..mask_size])?;
     track_stub!(TODO("https://fxbug.dev/322874659"), "sched_getaffinity");
     Ok(mask_size)
 }

 pub fn sys_sched_setaffinity(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
     cpusetsize: u32,
     user_mask: UserAddress,
 ) -> Result<(), Errno> {
     if pid < 0 {
         return error!(EINVAL);
     }
     let weak = get_task_or_current(current_task, pid);
     let target_task = Task::from_weak(&weak)?;

     check_cpu_set_alignment(current_task, cpusetsize)?;

     let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
     let mut mask = CpuSet::default();
     current_task.read_memory_to_slice(user_mask, &mut mask.bits[..mask_size])?;

     // Specified mask must include at least one valid CPU.
     let max_mask = get_default_cpu_set();
     let mut has_valid_cpu_in_mask = false;
     for (l1, l2) in std::iter::zip(max_mask.bits, mask.bits) {
         has_valid_cpu_in_mask = has_valid_cpu_in_mask || (l1 & l2 > 0);
     }
     if !has_valid_cpu_in_mask {
         return error!(EINVAL);
     }

     if !current_task.is_euid_friendly_with(&target_task) {
         security::check_task_capable(current_task, CAP_SYS_NICE)?;
     }

     // Currently, we ignore the mask and act as if the system reset the mask
     // immediately to allowing all CPUs.
     track_stub!(TODO("https://fxbug.dev/322874889"), "sched_setaffinity");
     Ok(())
 }

 pub fn sys_sched_getparam(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
     param: UserRef<sched_param>,
 ) -> Result<(), Errno> {
     if pid < 0 || param.is_null() {
         return error!(EINVAL);
     }

     let weak = get_task_or_current(current_task, pid);
     let target_task = Task::from_weak(&weak)?;
     let param_value = target_task.read().scheduler_state.get_sched_param();
     current_task.write_object(param, &param_value)?;
     Ok(())
 }

 pub fn sys_sched_setparam(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
     param: UserRef<sched_param>,
 ) -> Result<(), Errno> {
     // Parse & validate the arguments.
     if pid < 0 || param.is_null() {
         return error!(EINVAL);
     }
     let weak = get_task_or_current(current_task, pid);
     let target_task = Task::from_weak(&weak)?;

     // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
     let current_state = target_task.read().scheduler_state;

     let realtime_priority = current_state
         .policy
         .realtime_priority_from(current_task.read_object(param)?.sched_priority)?;

     // Check capabilities and permissions, if required, for the operation.
     let euid_friendly = current_task.is_euid_friendly_with(&target_task);
     let strengthening = current_state.realtime_priority < realtime_priority;
     let rlimited = strengthening
         && realtime_priority
             .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
     if !euid_friendly || rlimited {
         security::check_task_capable(current_task, CAP_SYS_NICE)?;
     }

     security::check_setsched_access(current_task, &target_task)?;

     // Apply the new scheduler configuration to the task.
     target_task.set_scheduler_priority(realtime_priority)?;

     Ok(())
 }

 pub fn sys_sched_get_priority_min(
     _locked: &mut Locked<Unlocked>,
     _ctx: &CurrentTask,
     policy: u32,
 ) -> Result<u8, Errno> {
     min_priority_for_sched_policy(policy)
 }

 pub fn sys_sched_get_priority_max(
     _locked: &mut Locked<Unlocked>,
     _ctx: &CurrentTask,
     policy: u32,
 ) -> Result<u8, Errno> {
     max_priority_for_sched_policy(policy)
 }

 pub fn sys_ioprio_set(
     _locked: &mut Locked<Unlocked>,
     _current_task: &mut CurrentTask,
     _which: i32,
     _who: i32,
     _ioprio: i32,
 ) -> Result<(), Errno> {
     track_stub!(TODO("https://fxbug.dev/297591758"), "ioprio_set()");
     error!(ENOSYS)
 }

 pub fn sys_prctl(
     locked: &mut Locked<Unlocked>,
     current_task: &mut CurrentTask,
     option: u32,
     arg2: u64,
     arg3: u64,
     arg4: u64,
     arg5: u64,
 ) -> Result<SyscallResult, Errno> {
     match option {
         PR_SET_VMA => {
             if arg2 != PR_SET_VMA_ANON_NAME as u64 {
                 track_stub!(TODO("https://fxbug.dev/322874826"), "prctl PR_SET_VMA", arg2);
                 return error!(ENOSYS);
             }
             let addr = UserAddress::from(arg3);
             let length = arg4 as usize;
             let name_addr = UserAddress::from(arg5);
             let name = if name_addr.is_null() {
                 None
             } else {
                 let name = UserCString::new(current_task, UserAddress::from(arg5));
                 let name = current_task.read_c_string_to_vec(name, 256).map_err(|e| {
                     // An overly long name produces EINVAL and not ENAMETOOLONG in Linux 5.15.
                     if e.code == ENAMETOOLONG { errno!(EINVAL) } else { e }
                 })?;
                 // Some characters are forbidden in VMA names.
                 if name.iter().any(|b| {
                     matches!(b,
                         0..=0x1f |
                         0x7f..=0xff |
                         b'\\' | b'`' | b'$' | b'[' | b']'
                     )
                 }) {
                     return error!(EINVAL);
                 }
                 Some(name)
             };
             current_task.mm()?.set_mapping_name(addr, length, name)?;
             Ok(().into())
         }
         PR_SET_DUMPABLE => {
             let mm = current_task.mm()?;
             let mut dumpable = mm.dumpable.lock(locked);
             *dumpable = if arg2 == 1 { DumpPolicy::User } else { DumpPolicy::Disable };
             Ok(().into())
         }
         PR_GET_DUMPABLE => {
             let mm = current_task.mm()?;
             let dumpable = mm.dumpable.lock(locked);
             Ok(match *dumpable {
                 DumpPolicy::Disable => 0.into(),
                 DumpPolicy::User => 1.into(),
             })
         }
         PR_SET_PDEATHSIG => {
             track_stub!(TODO("https://fxbug.dev/322874397"), "PR_SET_PDEATHSIG");
             Ok(().into())
         }
         PR_SET_NAME => {
             let addr = UserAddress::from(arg2);
             let name = TaskCommand::new(&current_task.read_memory_to_array::<16>(addr)?);
             current_task.set_command_name(name);
             Ok(0.into())
         }
         PR_GET_NAME => {
             let addr = UserAddress::from(arg2);
             let name = current_task.command().prctl_name();
             current_task.write_memory(addr, &name[..])?;
             Ok(().into())
         }
         PR_SET_PTRACER => {
             let allowed_ptracers = if arg2 == PR_SET_PTRACER_ANY as u64 {
                 PtraceAllowedPtracers::Any
             } else if arg2 == 0 {
                 PtraceAllowedPtracers::None
             } else {
                 if current_task.kernel().pids.read().get_task(arg2 as i32).upgrade().is_none() {
                     return error!(EINVAL);
                 }
                 PtraceAllowedPtracers::Some(arg2 as pid_t)
             };
             current_task.thread_group().write().allowed_ptracers = allowed_ptracers;
             Ok(().into())
         }
         PR_GET_KEEPCAPS => {
             Ok(current_task.current_creds().securebits.contains(SecureBits::KEEP_CAPS).into())
         }
         PR_SET_KEEPCAPS => {
             if arg2 != 0 && arg2 != 1 {
                 return error!(EINVAL);
             }
             let mut creds = Credentials::clone(&current_task.current_creds());
             creds.securebits.set(SecureBits::KEEP_CAPS, arg2 != 0);
             current_task.set_creds(creds);
             Ok(().into())
         }
         PR_SET_NO_NEW_PRIVS => {
             // If any args are set other than arg2 to 1, this should return einval
             if arg2 != 1 || arg3 != 0 || arg4 != 0 || arg5 != 0 {
                 return error!(EINVAL);
             }
             current_task.write().enable_no_new_privs();
             Ok(().into())
         }
         PR_GET_NO_NEW_PRIVS => {
             // If any args are set, this should return einval
             if arg2 != 0 || arg3 != 0 || arg4 != 0 {
                 return error!(EINVAL);
             }
             Ok(current_task.read().no_new_privs().into())
         }
         PR_GET_SECCOMP => {
             if current_task.seccomp_filter_state.get() == SeccompStateValue::None {
                 Ok(0.into())
             } else {
                 Ok(2.into())
             }
         }
         PR_SET_SECCOMP => {
             if arg2 == SECCOMP_MODE_STRICT as u64 {
                 return sys_seccomp(
                     locked,
                     current_task,
                     SECCOMP_SET_MODE_STRICT,
                     0,
                     UserAddress::NULL,
                 );
             } else if arg2 == SECCOMP_MODE_FILTER as u64 {
                 return sys_seccomp(locked, current_task, SECCOMP_SET_MODE_FILTER, 0, arg3.into());
             }
             Ok(().into())
         }
         PR_GET_CHILD_SUBREAPER => {
             let addr = UserAddress::from(arg2);
             #[allow(clippy::bool_to_int_with_if)]
             let value: i32 =
                 if current_task.thread_group().read().is_child_subreaper { 1 } else { 0 };
             current_task.write_object(addr.into(), &value)?;
             Ok(().into())
         }
         PR_SET_CHILD_SUBREAPER => {
             current_task.thread_group().write().is_child_subreaper = arg2 != 0;
             Ok(().into())
         }
         PR_GET_SECUREBITS => Ok(current_task.current_creds().securebits.bits().into()),
         PR_SET_SECUREBITS => {
             // TODO(security): This does not yet respect locked flags.
             let mut creds = Credentials::clone(&current_task.current_creds());
             security::check_task_capable(current_task, CAP_SETPCAP)?;

             let securebits = SecureBits::from_bits(arg2 as u32).ok_or_else(|| {
                 track_stub!(TODO("https://fxbug.dev/322875244"), "PR_SET_SECUREBITS", arg2);
                 errno!(ENOSYS)
             })?;
             creds.securebits = securebits;
             current_task.set_creds(creds);
             Ok(().into())
         }
         PR_CAPBSET_READ => {
             let cap = Capabilities::try_from(arg2)?;
             Ok(current_task.current_creds().cap_bounding.contains(cap).into())
         }
         PR_CAPBSET_DROP => {
             let mut creds = Credentials::clone(&current_task.current_creds());
             security::check_task_capable(current_task, CAP_SETPCAP)?;

             creds.cap_bounding.remove(Capabilities::try_from(arg2)?);
             current_task.set_creds(creds);
             Ok(().into())
         }
         PR_CAP_AMBIENT => {
             let operation = arg2 as u32;
             let capability_arg = Capabilities::try_from(arg3)?;
             if arg4 != 0 || arg5 != 0 {
                 return error!(EINVAL);
             }

             // TODO(security): We don't currently validate capabilities, but this should return an
             // error if the capability_arg is invalid.
             match operation {
                 PR_CAP_AMBIENT_RAISE => {
                     let mut creds = Credentials::clone(&current_task.current_creds());
                     if !(creds.cap_permitted.contains(capability_arg)
                         && creds.cap_inheritable.contains(capability_arg))
                     {
                         return error!(EPERM);
                     }
                     if creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE)
                         || creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE_LOCKED)
                     {
                         return error!(EPERM);
                     }

                     creds.cap_ambient.insert(capability_arg);
                     current_task.set_creds(creds);
                     Ok(().into())
                 }
                 PR_CAP_AMBIENT_LOWER => {
                     let mut creds = Credentials::clone(&current_task.current_creds());
                     creds.cap_ambient.remove(capability_arg);
                     current_task.set_creds(creds);
                     Ok(().into())
                 }
                 PR_CAP_AMBIENT_IS_SET => {
                     Ok(current_task.current_creds().cap_ambient.contains(capability_arg).into())
                 }
                 PR_CAP_AMBIENT_CLEAR_ALL => {
                     if arg3 != 0 {
                         return error!(EINVAL);
                     }

                     let mut creds = Credentials::clone(&current_task.current_creds());
                     creds.cap_ambient = Capabilities::empty();
                     current_task.set_creds(creds);
                     Ok(().into())
                 }
                 _ => error!(EINVAL),
             }
         }
         PR_SET_TIMERSLACK => {
             current_task.write().set_timerslack_ns(arg2);
             Ok(().into())
         }
         #[cfg(target_arch = "aarch64")]
         PR_GET_TAGGED_ADDR_CTRL => {
             track_stub!(TODO("https://fxbug.dev/408554469"), "PR_GET_TAGGED_ADDR_CTRL");
             Ok(0.into())
         }
         #[cfg(target_arch = "aarch64")]
         PR_SET_TAGGED_ADDR_CTRL => match u32::try_from(arg2).map_err(|_| errno!(EINVAL))? {
             // Only untagged pointers are allowed, the default.
             0 => Ok(().into()),
             PR_TAGGED_ADDR_ENABLE => {
                 track_stub!(TODO("https://fxbug.dev/408554469"), "PR_TAGGED_ADDR_ENABLE");
                 error!(EINVAL)
             }
             unknown_mode => {
                 track_stub!(
                     TODO("https://fxbug.dev/408554469"),
                     "PR_SET_TAGGED_ADDR_CTRL unknown mode",
                     unknown_mode,
                 );
                 error!(EINVAL)
             }
         },
         _ => {
             track_stub!(TODO("https://fxbug.dev/322874733"), "prctl fallthrough", option);
             error!(ENOSYS)
         }
     }
 }

 pub fn sys_ptrace(
     locked: &mut Locked<Unlocked>,
     current_task: &mut CurrentTask,
     request: u32,
     pid: pid_t,
     addr: UserAddress,
     data: UserAddress,
 ) -> Result<SyscallResult, Errno> {
     match request {
         PTRACE_TRACEME => ptrace_traceme(current_task),
         PTRACE_ATTACH => ptrace_attach(locked, current_task, pid, PtraceAttachType::Attach, data),
         PTRACE_SEIZE => ptrace_attach(locked, current_task, pid, PtraceAttachType::Seize, data),
         _ => ptrace_dispatch(locked, current_task, request, pid, addr, data),
     }
 }

 pub fn sys_set_tid_address(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     user_tid: UserRef<pid_t>,
 ) -> Result<pid_t, Errno> {
     current_task.write().clear_child_tid = user_tid;
     Ok(current_task.get_tid())
 }

 pub fn sys_getrusage(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     who: i32,
     user_usage: RUsagePtr,
 ) -> Result<(), Errno> {
     const RUSAGE_SELF: i32 = starnix_uapi::uapi::RUSAGE_SELF as i32;
     const RUSAGE_THREAD: i32 = starnix_uapi::uapi::RUSAGE_THREAD as i32;
     track_stub!(TODO("https://fxbug.dev/297370242"), "real rusage");
     let time_stats = match who {
         RUSAGE_CHILDREN => current_task.task.thread_group().read().children_time_stats,
         RUSAGE_SELF => current_task.task.thread_group().time_stats(),
         RUSAGE_THREAD => current_task.task.time_stats(),
         _ => return error!(EINVAL),
     };

     let usage = rusage {
         ru_utime: timeval_from_duration(time_stats.user_time),
         ru_stime: timeval_from_duration(time_stats.system_time),
         ..rusage::default()
     };
     current_task.write_multi_arch_object(user_usage, usage)?;

     Ok(())
 }

 type PrLimitRef = MultiArchUserRef<uapi::rlimit, uapi::arch32::rlimit>;

 pub fn sys_getrlimit(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     resource: u32,
     user_rlimit: PrLimitRef,
 ) -> Result<(), Errno> {
     do_prlimit64(locked, current_task, 0, resource, PrLimitRef::null(current_task), user_rlimit)
 }

 pub fn sys_setrlimit(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     resource: u32,
     user_rlimit: PrLimitRef,
 ) -> Result<(), Errno> {
     do_prlimit64(locked, current_task, 0, resource, user_rlimit, PrLimitRef::null(current_task))
 }

 pub fn sys_prlimit64(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
     user_resource: u32,
     new_limit_ref: UserRef<uapi::rlimit>,
     old_limit_ref: UserRef<uapi::rlimit>,
 ) -> Result<(), Errno> {
     do_prlimit64::<uapi::rlimit>(
         locked,
         current_task,
         pid,
         user_resource,
         new_limit_ref.into(),
         old_limit_ref.into(),
     )
 }

 pub fn do_prlimit64<T>(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid: pid_t,
     user_resource: u32,
     new_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
     old_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
 ) -> Result<(), Errno>
 where
     T: FromBytes + IntoBytes + Immutable + From<uapi::rlimit> + Into<uapi::rlimit>,
 {
     let weak = get_task_or_current(current_task, pid);
     let target_task = Task::from_weak(&weak)?;

     // To get or set the resource of a process other than itself, the caller must have either:
     // * the same `uid`, `euid`, `saved_uid`, `gid`, `egid`, `saved_gid` as the target.
     // * the CAP_SYS_RESOURCE
     if current_task.get_pid() != target_task.get_pid() {
         let self_creds = current_task.current_creds();
         let target_creds = target_task.real_creds();
         if self_creds.uid != target_creds.uid
             || self_creds.euid != target_creds.euid
             || self_creds.saved_uid != target_creds.saved_uid
             || self_creds.gid != target_creds.gid
             || self_creds.egid != target_creds.egid
             || self_creds.saved_gid != target_creds.saved_gid
         {
             security::check_task_capable(current_task, CAP_SYS_RESOURCE)?;
         }
         security::task_prlimit(
             current_task,
             &target_task,
             !old_limit_ref.is_null(),
             !new_limit_ref.is_null(),
         )?;
     }

     let resource = Resource::from_raw(user_resource)?;

     let old_limit = match resource {
         // TODO: Integrate Resource::STACK with generic ResourceLimits machinery.
         Resource::STACK => {
             if !new_limit_ref.is_null() {
                 track_stub!(
                     TODO("https://fxbug.dev/322874791"),
                     "prlimit64 cannot set RLIMIT_STACK"
                 );
             }
             // The stack size is fixed at the moment, but
             // if MAP_GROWSDOWN is implemented this should
             // report the limit that it can be grown.
             let mm = target_task.mm()?;
             let mm_state = mm.state.read();
             let stack_size = mm_state.stack_size as u64;
             rlimit { rlim_cur: stack_size, rlim_max: stack_size }
         }
         _ => {
             let new_limit = if new_limit_ref.is_null() {
                 None
             } else {
                 let new_limit = current_task.read_multi_arch_object(new_limit_ref)?;
                 if new_limit.rlim_cur > new_limit.rlim_max {
                     return error!(EINVAL);
                 }
                 Some(new_limit)
             };
             ThreadGroup::adjust_rlimits(locked, current_task, &target_task, resource, new_limit)?
         }
     };
     if !old_limit_ref.is_null() {
         current_task.write_multi_arch_object(old_limit_ref, old_limit)?;
     }
     Ok(())
 }

 pub fn sys_quotactl(
     _locked: &mut Locked<Unlocked>,
     _current_task: &CurrentTask,
     _cmd: i32,
     _special: UserRef<c_char>,
     _id: i32,
     _addr: UserRef<c_char>,
 ) -> Result<SyscallResult, Errno> {
     track_stub!(TODO("https://fxbug.dev/297302197"), "quotacl()");
     error!(ENOSYS)
 }

 pub fn sys_capget(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     user_header: UserRef<__user_cap_header_struct>,
     user_data: UserRef<__user_cap_data_struct>,
 ) -> Result<(), Errno> {
     let mut header = current_task.read_object(user_header)?;
     let is_version_valid =
         [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
             .contains(&header.version);
     if !is_version_valid {
         header.version = _LINUX_CAPABILITY_VERSION_3;
         current_task.write_object(user_header, &header)?;
     }
     if user_data.is_null() {
         return Ok(());
     }
     if !is_version_valid || header.pid < 0 {
         return error!(EINVAL);
     }

     let weak = get_task_or_current(current_task, header.pid);
     let target_task = Task::from_weak(&weak)?;

     security::check_getcap_access(current_task, &target_task)?;

     let (permitted, effective, inheritable) = {
         let creds = &target_task.real_creds();
         (creds.cap_permitted, creds.cap_effective, creds.cap_inheritable)
     };

     match header.version {
         _LINUX_CAPABILITY_VERSION_1 => {
             let data: [__user_cap_data_struct; 1] = [__user_cap_data_struct {
                 effective: effective.as_abi_v1(),
                 inheritable: inheritable.as_abi_v1(),
                 permitted: permitted.as_abi_v1(),
             }];
             current_task.write_objects(user_data, &data)?;
         }
         _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
             // Return 64 bit capabilities as two sets of 32 bit capabilities, little endian
             let (permitted, effective, inheritable) =
                 (permitted.as_abi_v3(), effective.as_abi_v3(), inheritable.as_abi_v3());
             let data: [__user_cap_data_struct; 2] = [
                 __user_cap_data_struct {
                     effective: effective.0,
                     inheritable: inheritable.0,
                     permitted: permitted.0,
                 },
                 __user_cap_data_struct {
                     effective: effective.1,
                     inheritable: inheritable.1,
                     permitted: permitted.1,
                 },
             ];
             current_task.write_objects(user_data, &data)?;
         }
         _ => {
             unreachable!("already returned if Linux capability version is not valid")
         }
     }
     Ok(())
 }

 pub fn sys_capset(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     user_header: UserRef<__user_cap_header_struct>,
     user_data: UserRef<__user_cap_data_struct>,
 ) -> Result<(), Errno> {
     let mut header = current_task.read_object(user_header)?;
     let is_version_valid =
         [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
             .contains(&header.version);
     if !is_version_valid {
         header.version = _LINUX_CAPABILITY_VERSION_3;
         current_task.write_object(user_header, &header)?;
         return error!(EINVAL);
     }
     if header.pid != 0 && header.pid != current_task.tid {
         return error!(EPERM);
     }

     let (new_permitted, new_effective, new_inheritable) = match header.version {
         _LINUX_CAPABILITY_VERSION_1 => {
             let data = current_task.read_object(user_data)?;
             (
                 Capabilities::from_abi_v1(data.permitted),
                 Capabilities::from_abi_v1(data.effective),
                 Capabilities::from_abi_v1(data.inheritable),
             )
         }
         _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
             let data =
                 current_task.read_objects_to_array::<__user_cap_data_struct, 2>(user_data)?;
             (
                 Capabilities::from_abi_v3((data[0].permitted, data[1].permitted)),
                 Capabilities::from_abi_v3((data[0].effective, data[1].effective)),
                 Capabilities::from_abi_v3((data[0].inheritable, data[1].inheritable)),
             )
         }
         _ => {
             unreachable!("already returned if Linux capability version is not valid")
         }
     };

     // Permission checks. Copied out of TLPI section 39.7.
     let mut creds = Credentials::clone(&current_task.current_creds());
     {
         log_trace!(
             "Capabilities({{permitted={:?} from {:?}, effective={:?} from {:?}, inheritable={:?} from {:?}}}, bounding={:?})",
             new_permitted,
             creds.cap_permitted,
             new_effective,
             creds.cap_effective,
             new_inheritable,
             creds.cap_inheritable,
             creds.cap_bounding
         );
         if !creds.cap_inheritable.union(creds.cap_permitted).contains(new_inheritable) {
             security::check_task_capable(current_task, CAP_SETPCAP)?;
         }

         if !creds.cap_inheritable.union(creds.cap_bounding).contains(new_inheritable) {
             return error!(EPERM);
         }
         if !creds.cap_permitted.contains(new_permitted) {
             return error!(EPERM);
         }
         if !new_permitted.contains(new_effective) {
             return error!(EPERM);
         }
     }
     let weak = get_task_or_current(current_task, header.pid);
     let target_task = Task::from_weak(&weak)?;

     security::check_setcap_access(current_task, &target_task)?;

     creds.cap_permitted = new_permitted;
     creds.cap_effective = new_effective;
     creds.cap_inheritable = new_inheritable;
     creds.cap_ambient = new_permitted & new_inheritable & creds.cap_ambient;
     current_task.set_creds(creds);
     Ok(())
 }

 pub fn sys_seccomp(
     locked: &mut Locked<Unlocked>,
     current_task: &mut CurrentTask,
     operation: u32,
     flags: u32,
     args: UserAddress,
 ) -> Result<SyscallResult, Errno> {
     match operation {
         SECCOMP_SET_MODE_STRICT => {
             if flags != 0 || args != UserAddress::NULL {
                 return error!(EINVAL);
             }
             current_task.set_seccomp_state(SeccompStateValue::Strict)?;
             Ok(().into())
         }
         SECCOMP_SET_MODE_FILTER => {
             if flags
                 & (SECCOMP_FILTER_FLAG_LOG
                     | SECCOMP_FILTER_FLAG_NEW_LISTENER
                     | SECCOMP_FILTER_FLAG_SPEC_ALLOW
                     | SECCOMP_FILTER_FLAG_TSYNC
                     | SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
                 != flags
             {
                 return error!(EINVAL);
             }
             if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER != 0)
                 && (flags & SECCOMP_FILTER_FLAG_TSYNC != 0)
                 && (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH == 0)
             {
                 return error!(EINVAL);
             }
             let fprog =
                 current_task.read_multi_arch_object(SockFProgPtr::new(current_task, args))?;
             if fprog.len > BPF_MAXINSNS || fprog.len == 0 {
                 return error!(EINVAL);
             }
             let code: Vec<sock_filter> =
                 current_task.read_multi_arch_objects_to_vec(fprog.filter, fprog.len as usize)?;

             if !current_task.read().no_new_privs() {
                 security::check_task_capable(current_task, CAP_SYS_ADMIN)
                     .map_err(|_| errno!(EACCES))?;
             }
             current_task.add_seccomp_filter(locked, code, flags)
         }
         SECCOMP_GET_ACTION_AVAIL => {
             if flags != 0 || args.is_null() {
                 return error!(EINVAL);
             }
             let action: u32 = current_task.read_object(UserRef::new(args))?;
             SeccompAction::is_action_available(action)
         }
         SECCOMP_GET_NOTIF_SIZES => {
             if flags != 0 {
                 return error!(EINVAL);
             }
             track_stub!(TODO("https://fxbug.dev/322874791"), "SECCOMP_GET_NOTIF_SIZES");
             error!(ENOSYS)
         }
         _ => {
             track_stub!(TODO("https://fxbug.dev/322874916"), "seccomp fallthrough", operation);
             error!(EINVAL)
         }
     }
 }

 pub fn sys_setgroups(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     size: usize,
     groups_addr: UserAddress,
 ) -> Result<(), Errno> {
     if size > NGROUPS_MAX as usize {
         return error!(EINVAL);
     }
     let groups = current_task.read_objects_to_vec::<gid_t>(groups_addr.into(), size)?;
     let mut creds = Credentials::clone(&current_task.current_creds());
     if !creds.is_superuser() {
         return error!(EPERM);
     }
     creds.groups = groups;
     current_task.set_creds(creds);
     Ok(())
 }

 pub fn sys_getgroups(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     size: usize,
     groups_addr: UserAddress,
 ) -> Result<usize, Errno> {
     if size > NGROUPS_MAX as usize {
         return error!(EINVAL);
     }
     let creds = current_task.current_creds();
     if size != 0 {
         if size < creds.groups.len() {
             return error!(EINVAL);
         }
         current_task.write_memory(groups_addr, creds.groups.as_slice().as_bytes())?;
     }
     Ok(creds.groups.len())
 }

 pub fn sys_setsid(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<pid_t, Errno> {
     current_task.thread_group().setsid(locked)?;
     Ok(current_task.get_pid())
 }

 // Note the asymmetry with sys_setpriority: this returns "kernel nice" which ranges
 // from 1 (weakest) to 40 (strongest). (It is part of Linux history that this syscall
 // deals with niceness but has "priority" in its name.)
 pub fn sys_getpriority(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     which: u32,
     who: i32,
 ) -> Result<u8, Errno> {
     match which {
         PRIO_PROCESS => {}
         // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
         _ => return error!(EINVAL),
     }
     track_stub!(TODO("https://fxbug.dev/322893809"), "getpriority permissions");
     let weak = get_task_or_current(current_task, who);
     let target_task = Task::from_weak(&weak)?;
     let state = target_task.read();
     Ok(state.scheduler_state.normal_priority.raw_priority())
 }

 // Note the asymmetry with sys_getpriority: this call's `priority` parameter is a
 // "user nice" which ranges from -20 (strongest) to 19 (weakest) (other values can be
 // passed and are clamped to that range and interpretation). (It is part of Linux
 // history that this syscall deals with niceness but has "priority" in its name.)
 pub fn sys_setpriority(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     which: u32,
     who: i32,
     priority: i32,
 ) -> Result<(), Errno> {
     // Parse & validate the arguments.
     match which {
         PRIO_PROCESS => {}
         // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
         _ => return error!(EINVAL),
     }

     let weak = get_task_or_current(current_task, who);
     let target_task = Task::from_weak(&weak)?;

     let normal_priority = NormalPriority::from_setpriority_syscall(priority);

     // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
     let current_state = target_task.read().scheduler_state;

     // Check capabilities and permissions, if required, for the operation.
     let euid_friendly = current_task.is_euid_friendly_with(&target_task);
     let strengthening = current_state.normal_priority < normal_priority;
     let rlimited = strengthening
         && normal_priority.exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
     if !euid_friendly {
         security::check_task_capable(current_task, CAP_SYS_NICE)?;
     } else if rlimited {
         security::check_task_capable(current_task, CAP_SYS_NICE).map_err(|_| errno!(EACCES))?;
     }

     security::check_setsched_access(current_task, &target_task)?;

     // Apply the new scheduler configuration to the task.
     target_task.set_scheduler_nice(normal_priority)?;

     Ok(())
 }

 pub fn sys_setns(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     ns_fd: FdNumber,
     ns_type: c_int,
 ) -> Result<(), Errno> {
     let file_handle = current_task.task.files.get(ns_fd)?;

     // From man pages this is not quite right because some namespace types require more capabilities
     // or require this capability in multiple namespaces, but it should cover our current test
     // cases and we can make this more nuanced once more namespace types are supported.
     security::check_task_capable(current_task, CAP_SYS_ADMIN)?;

     if let Some(mount_ns) = file_handle.downcast_file::<MountNamespaceFile>() {
         if !(ns_type == 0 || ns_type == CLONE_NEWNS as i32) {
             log_trace!("invalid type");
             return error!(EINVAL);
         }

         track_stub!(TODO("https://fxbug.dev/297312091"), "setns CLONE_FS limitations");
         current_task.task.fs().set_namespace(mount_ns.0.clone())?;
         return Ok(());
     }

     if let Some(_pidfd) = file_handle.downcast_file::<PidFdFileObject>() {
         track_stub!(TODO("https://fxbug.dev/297312844"), "setns w/ pidfd");
         return error!(ENOSYS);
     }

     track_stub!(TODO("https://fxbug.dev/322893829"), "unknown ns file for setns, see logs");
     log_info!("ns_fd was not a supported namespace file: {}", file_handle.ops_type_name());
     error!(EINVAL)
 }

 pub fn sys_unshare(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     flags: u32,
 ) -> Result<(), Errno> {
     const IMPLEMENTED_FLAGS: u32 = CLONE_FILES | CLONE_FS | CLONE_NEWNS | CLONE_NEWUTS;
     if flags & !IMPLEMENTED_FLAGS != 0 {
         track_stub!(TODO("https://fxbug.dev/322893372"), "unshare", flags & !IMPLEMENTED_FLAGS);
         return error!(EINVAL);
     }

     if (flags & CLONE_FILES) != 0 {
         current_task.files.unshare();
     }

     if (flags & CLONE_FS) != 0 {
         current_task.unshare_fs();
     }

     if (flags & CLONE_NEWNS) != 0 {
         security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
         current_task.fs().unshare_namespace();
     }

     if (flags & CLONE_NEWUTS) != 0 {
         security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
         // Fork the UTS namespace.
         let mut task_state = current_task.write();
         let new_uts_ns = task_state.uts_ns.read().clone();
         task_state.uts_ns = Arc::new(RwLock::new(new_uts_ns));
     }

     Ok(())
 }

 pub fn sys_swapon(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     user_path: UserCString,
     _flags: i32,
 ) -> Result<(), Errno> {
     const MAX_SWAPFILES: usize = 32; // See https://man7.org/linux/man-pages/man2/swapon.2.html

     security::check_task_capable(current_task, CAP_SYS_ADMIN)?;

     track_stub!(TODO("https://fxbug.dev/322893905"), "swapon validate flags");

     let path = current_task.read_path(user_path)?;
     let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;

     let node = file.node();
     let mode = node.info().mode;
     if !mode.is_reg() && !mode.is_blk() {
         return error!(EINVAL);
     }

     // We determined this magic number by using the mkswap tool and the file tool. The mkswap tool
     // populates a few bytes in the file, including a UUID, which can be replaced with zeros while
     // still being recognized by the file tool. This string appears at a fixed offset
     // (MAGIC_OFFSET) in the file, which looks quite like a magic number.
     const MAGIC_OFFSET: usize = 0xff6;
     let swap_magic = b"SWAPSPACE2";
     let mut buffer = VecOutputBuffer::new(swap_magic.len());
     if file.read_at(locked, current_task, MAGIC_OFFSET, &mut buffer)? != swap_magic.len()
         || buffer.data() != swap_magic
     {
         return error!(EINVAL);
     }

     let mut swap_files = current_task.kernel().swap_files.lock(locked);
     for swap_node in swap_files.iter() {
         if Arc::ptr_eq(swap_node, node) {
             return error!(EBUSY);
         }
     }
     if swap_files.len() >= MAX_SWAPFILES {
         return error!(EPERM);
     }
     swap_files.push(node.clone());
     Ok(())
 }

 pub fn sys_swapoff(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     user_path: UserCString,
 ) -> Result<(), Errno> {
     security::check_task_capable(current_task, CAP_SYS_ADMIN)?;

     let path = current_task.read_path(user_path)?;
     let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
     let node = file.node();

     let mut swap_files = current_task.kernel().swap_files.lock(locked);
     let original_length = swap_files.len();
     swap_files.retain(|swap_node| !Arc::ptr_eq(swap_node, node));
     if swap_files.len() == original_length {
         return error!(EINVAL);
     }
     Ok(())
 }

 #[derive(Default, Debug, IntoBytes, KnownLayout, FromBytes, Immutable)]
 #[repr(C)]
 struct KcmpParams {
     mask: usize,
     shuffle: usize,
 }

 static KCMP_PARAMS: LazyLock<KcmpParams> = LazyLock::new(|| {
     let mut params = KcmpParams::default();
     zx::cprng_draw(params.as_mut_bytes());
     // Ensure the shuffle is odd so that multiplying a usize by this value is a permutation.
     params.shuffle |= 1;
     params
 });

 fn obfuscate_value(value: usize) -> usize {
     let KcmpParams { mask, shuffle } = *KCMP_PARAMS;
     (value ^ mask).wrapping_mul(shuffle)
 }

 fn obfuscate_ptr<T>(ptr: *const T) -> usize {
     obfuscate_value(ptr as usize)
 }

 fn obfuscate_arc<T>(arc: &Arc<T>) -> usize {
     obfuscate_ptr(Arc::as_ptr(arc))
 }

 pub fn sys_kcmp(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     pid1: pid_t,
     pid2: pid_t,
     resource_type: u32,
     index1: u64,
     index2: u64,
 ) -> Result<u32, Errno> {
     let weak1 = current_task.get_task(pid1);
     let weak2 = current_task.get_task(pid2);
     let task1 = Task::from_weak(&weak1)?;
     let task2 = Task::from_weak(&weak2)?;

     current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task1)?;
     current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task2)?;

     let resource_type = KcmpResource::from_raw(resource_type)?;

     // Output encoding (see <https://man7.org/linux/man-pages/man2/kcmp.2.html>):
     //
     //   0  v1 is equal to v2; in other words, the two processes share the resource.
     //   1  v1 is less than v2.
     //   2  v1 is greater than v2.
     //   3  v1 is not equal to v2, but ordering information is unavailable.
     //
     fn encode_ordering(value: cmp::Ordering) -> u32 {
         match value {
             cmp::Ordering::Equal => 0,
             cmp::Ordering::Less => 1,
             cmp::Ordering::Greater => 2,
         }
     }

     match resource_type {
         KcmpResource::FILE => {
             fn get_file(task: &Task, index: u64) -> Result<FileHandle, Errno> {
                 // TODO: Test whether O_PATH is allowed here. Conceptually, seems like
                 //       O_PATH should be allowed, but we haven't tested it yet.
                 task.files.get_allowing_opath(FdNumber::from_raw(
                     index.try_into().map_err(|_| errno!(EBADF))?,
                 ))
             }
             let file1 = get_file(&task1, index1)?;
             let file2 = get_file(&task2, index2)?;
             Ok(encode_ordering(obfuscate_arc(&file1).cmp(&obfuscate_arc(&file2))))
         }
         KcmpResource::FILES => Ok(encode_ordering(
             obfuscate_value(task1.files.id().raw()).cmp(&obfuscate_value(task2.files.id().raw())),
         )),
         KcmpResource::FS => {
             Ok(encode_ordering(obfuscate_arc(&task1.fs()).cmp(&obfuscate_arc(&task2.fs()))))
         }
         KcmpResource::SIGHAND => Ok(encode_ordering(
             obfuscate_arc(&task1.thread_group().signal_actions)
                 .cmp(&obfuscate_arc(&task2.thread_group().signal_actions)),
         )),
         KcmpResource::VM => {
             Ok(encode_ordering(obfuscate_arc(&task1.mm()?).cmp(&obfuscate_arc(&task2.mm()?))))
         }
         _ => error!(EINVAL),
     }
 }

 pub fn sys_syslog(
     locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
     action_type: i32,
     address: UserAddress,
     length: i32,
 ) -> Result<i32, Errno> {
     let action = SyslogAction::try_from(action_type)?;
     let syslog =
         current_task.kernel().syslog.access(&current_task, SyslogAccess::Syscall(action))?;
     match action {
         SyslogAction::Read => {
             if address.is_null() || length < 0 {
                 return error!(EINVAL);
             }
             let mut output_buffer =
                 UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
             syslog.blocking_read(locked, current_task, &mut output_buffer)
         }
         SyslogAction::ReadAll => {
             if address.is_null() || length < 0 {
                 return error!(EINVAL);
             }
             let mut output_buffer =
                 UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
             syslog.read_all(current_task, &mut output_buffer)
         }
         SyslogAction::SizeUnread => syslog.size_unread(),
         SyslogAction::SizeBuffer => syslog.size_buffer(),
         SyslogAction::Close | SyslogAction::Open => Ok(0),
         SyslogAction::ReadClear => {
             track_stub!(TODO("https://fxbug.dev/322894145"), "syslog: read clear");
             Ok(0)
         }
         SyslogAction::Clear => {
             track_stub!(TODO("https://fxbug.dev/322893673"), "syslog: clear");
             Ok(0)
         }
         SyslogAction::ConsoleOff => {
             track_stub!(TODO("https://fxbug.dev/322894399"), "syslog: console off");
             Ok(0)
         }
         SyslogAction::ConsoleOn => {
             track_stub!(TODO("https://fxbug.dev/322894106"), "syslog: console on");
             Ok(0)
         }
         SyslogAction::ConsoleLevel => {
             if length <= 0 || length >= 8 {
                 return error!(EINVAL);
             }
             track_stub!(TODO("https://fxbug.dev/322894199"), "syslog: console level");
             Ok(0)
         }
     }
 }

 pub fn sys_vhangup(
     _locked: &mut Locked<Unlocked>,
     current_task: &CurrentTask,
 ) -> Result<(), Errno> {
     security::check_task_capable(current_task, CAP_SYS_TTY_CONFIG)?;
     track_stub!(TODO("https://fxbug.dev/324079257"), "vhangup");
     Ok(())
 }

 // Syscalls for arch32 usage
 #[cfg(target_arch = "aarch64")]
 mod arch32 {
     pub use super::{
         sys_execve as sys_arch32_execve, sys_getegid as sys_arch32_getegid32,
         sys_geteuid as sys_arch32_geteuid32, sys_getgid as sys_arch32_getgid32,
         sys_getgroups as sys_arch32_getgroups32, sys_getpgid as sys_arch32_getpgid,
         sys_getppid as sys_arch32_getppid, sys_getpriority as sys_arch32_getpriority,
         sys_getresgid as sys_arch32_getresgid32, sys_getresuid as sys_arch32_getresuid32,
         sys_getrlimit as sys_arch32_ugetrlimit, sys_getrusage as sys_arch32_getrusage,
         sys_getuid as sys_arch32_getuid32, sys_ioprio_set as sys_arch32_ioprio_set,
         sys_ptrace as sys_arch32_ptrace, sys_quotactl as sys_arch32_quotactl,
         sys_sched_get_priority_max as sys_arch32_sched_get_priority_max,
         sys_sched_get_priority_min as sys_arch32_sched_get_priority_min,
         sys_sched_getaffinity as sys_arch32_sched_getaffinity,
         sys_sched_getparam as sys_arch32_sched_getparam,
         sys_sched_setaffinity as sys_arch32_sched_setaffinity,
         sys_sched_setparam as sys_arch32_sched_setparam,
         sys_sched_setscheduler as sys_arch32_sched_setscheduler, sys_seccomp as sys_arch32_seccomp,
         sys_setfsuid as sys_arch32_setfsuid, sys_setfsuid as sys_arch32_setfsuid32,
         sys_setgid as sys_arch32_setgid32, sys_setgroups as sys_arch32_setgroups32,
         sys_setns as sys_arch32_setns, sys_setpgid as sys_arch32_setpgid,
         sys_setpriority as sys_arch32_setpriority, sys_setregid as sys_arch32_setregid32,
         sys_setresgid as sys_arch32_setresgid32, sys_setresuid as sys_arch32_setresuid32,
         sys_setreuid as sys_arch32_setreuid32, sys_setreuid as sys_arch32_setreuid,
         sys_setrlimit as sys_arch32_setrlimit, sys_setsid as sys_arch32_setsid,
         sys_syslog as sys_arch32_syslog, sys_unshare as sys_arch32_unshare,
     };
 }

 #[cfg(target_arch = "aarch64")]
 pub use arch32::*;

 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::mm::syscalls::sys_munmap;
     use crate::testing::{AutoReleasableTask, map_memory, spawn_kernel_and_run};
     use starnix_syscalls::SUCCESS;
     use starnix_task_command::TaskCommand;
     use starnix_uapi::auth::Credentials;
     use starnix_uapi::{SCHED_FIFO, SCHED_NORMAL};
     use std::ffi::CString;

     #[::fuchsia::test]
     async fn test_prctl_set_vma_anon_name() {
         spawn_kernel_and_run(async |locked, current_task| {
             let mapped_address =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             let name_addr = (mapped_address + 128u64).unwrap();
             let name = "test-name\0";
             current_task.write_memory(name_addr, name.as_bytes()).expect("failed to write name");
             sys_prctl(
                 locked,
                 current_task,
                 PR_SET_VMA,
                 PR_SET_VMA_ANON_NAME as u64,
                 mapped_address.ptr() as u64,
                 32,
                 name_addr.ptr() as u64,
             )
             .expect("failed to set name");
             assert_eq!(
                 "test-name",
                 current_task
                     .mm()
                     .unwrap()
                     .get_mapping_name((mapped_address + 24u64).unwrap())
                     .expect("failed to get address")
                     .unwrap()
                     .to_string(),
             );

             sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize)
                 .expect("failed to unmap memory");
             assert_eq!(
                 error!(EFAULT),
                 current_task.mm().unwrap().get_mapping_name((mapped_address + 24u64).unwrap())
             );
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_set_vma_name_special_chars() {
         spawn_kernel_and_run(async |locked, current_task| {
             let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);

             let mapping_addr =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);

             for c in 1..255 {
                 let vma_name = CString::new([c]).unwrap();
                 current_task.write_memory(name_addr, vma_name.as_bytes_with_nul()).unwrap();

                 let result = sys_prctl(
                     locked,
                     current_task,
                     PR_SET_VMA,
                     PR_SET_VMA_ANON_NAME as u64,
                     mapping_addr.ptr() as u64,
                     *PAGE_SIZE,
                     name_addr.ptr() as u64,
                 );

                 if c > 0x1f
                     && c < 0x7f
                     && c != b'\\'
                     && c != b'`'
                     && c != b'$'
                     && c != b'['
                     && c != b']'
                 {
                     assert_eq!(result, Ok(SUCCESS));
                 } else {
                     assert_eq!(result, error!(EINVAL));
                 }
             }
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_set_vma_name_long() {
         spawn_kernel_and_run(async |locked, current_task| {
             let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);

             let mapping_addr =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);

             let name_too_long = CString::new(vec![b'a'; 256]).unwrap();

             current_task.write_memory(name_addr, name_too_long.as_bytes_with_nul()).unwrap();

             assert_eq!(
                 sys_prctl(
                     locked,
                     current_task,
                     PR_SET_VMA,
                     PR_SET_VMA_ANON_NAME as u64,
                     mapping_addr.ptr() as u64,
                     *PAGE_SIZE,
                     name_addr.ptr() as u64,
                 ),
                 error!(EINVAL)
             );

             let name_just_long_enough = CString::new(vec![b'a'; 255]).unwrap();

             current_task
                 .write_memory(name_addr, name_just_long_enough.as_bytes_with_nul())
                 .unwrap();

             assert_eq!(
                 sys_prctl(
                     locked,
                     current_task,
                     PR_SET_VMA,
                     PR_SET_VMA_ANON_NAME as u64,
                     mapping_addr.ptr() as u64,
                     *PAGE_SIZE,
                     name_addr.ptr() as u64,
                 ),
                 Ok(SUCCESS)
             );
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_set_vma_name_misaligned() {
         spawn_kernel_and_run(async |locked, current_task| {
             let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);

             let mapping_addr =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);

             let name = CString::new("name").unwrap();
             current_task.write_memory(name_addr, name.as_bytes_with_nul()).unwrap();

             // Passing a misaligned pointer to the start of the named region fails.
             assert_eq!(
                 sys_prctl(
                     locked,
                     current_task,
                     PR_SET_VMA,
                     PR_SET_VMA_ANON_NAME as u64,
                     1 + mapping_addr.ptr() as u64,
                     *PAGE_SIZE - 1,
                     name_addr.ptr() as u64,
                 ),
                 error!(EINVAL)
             );

             // Passing an unaligned length does work, however.
             assert_eq!(
                 sys_prctl(
                     locked,
                     current_task,
                     PR_SET_VMA,
                     PR_SET_VMA_ANON_NAME as u64,
                     mapping_addr.ptr() as u64,
                     *PAGE_SIZE - 1,
                     name_addr.ptr() as u64,
                 ),
                 Ok(SUCCESS)
             );
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_prctl_get_set_dumpable() {
         spawn_kernel_and_run(async |locked, current_task| {
             sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
                 .expect("failed to get dumpable");

             sys_prctl(locked, current_task, PR_SET_DUMPABLE, 1, 0, 0, 0)
                 .expect("failed to set dumpable");
             sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
                 .expect("failed to get dumpable");

             // SUID_DUMP_ROOT not supported.
             sys_prctl(locked, current_task, PR_SET_DUMPABLE, 2, 0, 0, 0)
                 .expect("failed to set dumpable");
             sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
                 .expect("failed to get dumpable");
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_sys_getsid() {
         spawn_kernel_and_run(async |locked, current_task| {
             let kernel = current_task.kernel();
             assert_eq!(
                 current_task.get_tid(),
                 sys_getsid(locked, &current_task, 0).expect("failed to get sid")
             );

             let second_task = crate::execution::create_init_child_process(
                 locked,
                 &kernel.weak_self.upgrade().unwrap(),
                 TaskCommand::new(b"second task"),
                 Some(&CString::new("#kernel").unwrap()),
             )
             .expect("failed to create second task");
             second_task
                 .mm()
                 .unwrap()
                 .initialize_mmap_layout_for_test(starnix_types::arch::ArchWidth::Arch64);
             let second_current = AutoReleasableTask::from(second_task);

             assert_eq!(
                 second_current.get_tid(),
                 sys_getsid(locked, &current_task, second_current.get_tid())
                     .expect("failed to get sid")
             );
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_get_affinity_size() {
         spawn_kernel_and_run(async |locked, current_task| {
             let mapped_address =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             let pid = current_task.get_pid();
             assert_eq!(
                 sys_sched_getaffinity(locked, &current_task, pid, 16, mapped_address),
                 Ok(16)
             );
             assert_eq!(
                 sys_sched_getaffinity(locked, &current_task, pid, 1024, mapped_address),
                 Ok(std::mem::size_of::<CpuSet>())
             );
             assert_eq!(
                 sys_sched_getaffinity(locked, &current_task, pid, 1, mapped_address),
                 error!(EINVAL)
             );
             assert_eq!(
                 sys_sched_getaffinity(locked, &current_task, pid, 9, mapped_address),
                 error!(EINVAL)
             );
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_set_affinity_size() {
         spawn_kernel_and_run(async |locked, current_task| {
             let mapped_address =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             current_task.write_memory(mapped_address, &[0xffu8]).expect("failed to cpumask");
             let pid = current_task.get_pid();
             assert_eq!(
                 sys_sched_setaffinity(
                     locked,
                     &current_task,
                     pid,
                     *PAGE_SIZE as u32,
                     mapped_address
                 ),
                 Ok(())
             );
             assert_eq!(
                 sys_sched_setaffinity(locked, &current_task, pid, 1, mapped_address),
                 error!(EINVAL)
             );
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_task_name() {
         spawn_kernel_and_run(async |locked, current_task| {
             let mapped_address =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             let name = "my-task-name\0";
             current_task
                 .write_memory(mapped_address, name.as_bytes())
                 .expect("failed to write name");

             let result =
                 sys_prctl(locked, current_task, PR_SET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
                     .unwrap();
             assert_eq!(SUCCESS, result);

             let mapped_address =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             let result =
                 sys_prctl(locked, current_task, PR_GET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
                     .unwrap();
             assert_eq!(SUCCESS, result);

             let name_length = name.len();

             let out_name = current_task.read_memory_to_vec(mapped_address, name_length).unwrap();
             assert_eq!(name.as_bytes(), &out_name);
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_sched_get_priority_min_max() {
         spawn_kernel_and_run(async |locked, current_task| {
             let non_rt_min =
                 sys_sched_get_priority_min(locked, &current_task, SCHED_NORMAL).unwrap();
             assert_eq!(non_rt_min, 0);
             let non_rt_max =
                 sys_sched_get_priority_max(locked, &current_task, SCHED_NORMAL).unwrap();
             assert_eq!(non_rt_max, 0);

             let rt_min = sys_sched_get_priority_min(locked, &current_task, SCHED_FIFO).unwrap();
             assert_eq!(rt_min, 1);
             let rt_max = sys_sched_get_priority_max(locked, &current_task, SCHED_FIFO).unwrap();
             assert_eq!(rt_max, 99);

             let min_bad_policy_error =
                 sys_sched_get_priority_min(locked, &current_task, std::u32::MAX).unwrap_err();
             assert_eq!(min_bad_policy_error, errno!(EINVAL));

             let max_bad_policy_error =
                 sys_sched_get_priority_max(locked, &current_task, std::u32::MAX).unwrap_err();
             assert_eq!(max_bad_policy_error, errno!(EINVAL));
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_sched_setscheduler() {
         spawn_kernel_and_run(async |locked, current_task| {
             current_task
                 .thread_group()
                 .limits
                 .lock(locked)
                 .set(Resource::RTPRIO, rlimit { rlim_cur: 255, rlim_max: 255 });

             let scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
             assert_eq!(scheduler, SCHED_NORMAL, "tasks should have normal scheduler by default");

             let mapped_address =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             let requested_params = sched_param { sched_priority: 15 };
             current_task.write_object(mapped_address.into(), &requested_params).unwrap();

             sys_sched_setscheduler(locked, &current_task, 0, SCHED_FIFO, mapped_address.into())
                 .unwrap();

             let new_scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
             assert_eq!(new_scheduler, SCHED_FIFO, "task should have been assigned fifo scheduler");

             let mapped_address =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
                 .expect("sched_getparam");
             let param_value: sched_param =
                 current_task.read_object(mapped_address.into()).expect("read_object");
             assert_eq!(param_value.sched_priority, 15);
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_sched_getparam() {
         spawn_kernel_and_run(async |locked, current_task| {
             let mapped_address =
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
                 .expect("sched_getparam");
             let param_value: sched_param =
                 current_task.read_object(mapped_address.into()).expect("read_object");
             assert_eq!(param_value.sched_priority, 0);
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_setuid() {
         spawn_kernel_and_run(async |locked, current_task| {
             // Test for root.
             current_task.set_creds(Credentials::clone(&Credentials::root()));
             sys_setuid(locked, &current_task, 42).expect("setuid");
             let mut creds = Credentials::clone(&current_task.current_creds());
             assert_eq!(creds.euid, 42);
             assert_eq!(creds.uid, 42);
             assert_eq!(creds.saved_uid, 42);

             // Remove the CAP_SETUID capability to avoid overwriting permission checks.
             creds.cap_effective.remove(CAP_SETUID);
             current_task.set_creds(creds);

             // Test for non root, which task now is.
             assert_eq!(sys_setuid(locked, &current_task, 0), error!(EPERM));
             assert_eq!(sys_setuid(locked, &current_task, 43), error!(EPERM));

             sys_setuid(locked, &current_task, 42).expect("setuid");
             let creds = current_task.clone_creds();
             assert_eq!(creds.euid, 42);
             assert_eq!(creds.uid, 42);
             assert_eq!(creds.saved_uid, 42);

             // Change uid and saved_uid, and check that one can set the euid to these.
             let mut creds = Credentials::clone(&current_task.current_creds());
             creds.uid = 41;
             creds.euid = 42;
             creds.saved_uid = 43;
             current_task.set_creds(creds);

             sys_setuid(locked, &current_task, 41).expect("setuid");
             let creds = current_task.clone_creds();
             assert_eq!(creds.euid, 41);
             assert_eq!(creds.uid, 41);
             assert_eq!(creds.saved_uid, 43);

             let mut creds = Credentials::clone(&current_task.current_creds());
             creds.uid = 41;
             creds.euid = 42;
             creds.saved_uid = 43;
             current_task.set_creds(creds);

             sys_setuid(locked, &current_task, 43).expect("setuid");
             let creds = current_task.clone_creds();
             assert_eq!(creds.euid, 43);
             assert_eq!(creds.uid, 41);
             assert_eq!(creds.saved_uid, 43);
         })
         .await;
     }

     #[::fuchsia::test]
     async fn test_read_c_string_vector() {
         spawn_kernel_and_run(async |locked, current_task| {
             let arg_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
             let arg = b"test-arg\0";
             current_task.write_memory(arg_addr, arg).expect("failed to write test arg");
             let arg_usercstr = UserCString::new(current_task, arg_addr);
             let null_usercstr = UserCString::null(current_task);

             let argv_addr = UserCStringPtr::new(
                 current_task,
                 map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE),
             );
             current_task
                 .write_multi_arch_ptr(argv_addr.addr(), arg_usercstr)
                 .expect("failed to write UserCString");
             current_task
                 .write_multi_arch_ptr(argv_addr.next().unwrap().addr(), null_usercstr)
                 .expect("failed to write UserCString");

             // The arguments size limit should include the null terminator.
             assert!(read_c_string_vector(&current_task, argv_addr, 100, arg.len()).is_ok());
             assert_eq!(
                 read_c_string_vector(
                     &current_task,
                     argv_addr,
                     100,
                     std::str::from_utf8(arg).unwrap().trim_matches('\0').len()
                 ),
                 error!(E2BIG)
             );
         })
         .await;
     }
 }