src/starnix/kernel/core/task/current_task.rs - fuchsia - Git at Google

 // Copyright 2023 The Fuchsia Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 use crate::arch::task::{decode_page_fault_exception_report, get_signal_for_general_exception};
 use crate::execution::{TaskInfo, create_zircon_process};
 use crate::mm::{DumpPolicy, MemoryAccessor, MemoryAccessorExt, TaskMemoryAccessor};
 use crate::ptrace::{PtraceCoreState, PtraceEvent, PtraceEventData, PtraceOptions, StopState};
 use crate::security;
 use crate::signals::{RunState, SignalInfo, send_signal_first, send_standard_signal};
 use crate::task::loader::{ResolvedElf, load_executable, resolve_executable};
 use crate::task::waiter::WaiterOptions;
 use crate::task::{
     ExitStatus, RobustListHeadPtr, SeccompFilter, SeccompFilterContainer, SeccompNotifierHandle,
     SeccompState, SeccompStateValue, Task, TaskFlags, Waiter,
 };
 use crate::vfs::{
     CheckAccessReason, FdFlags, FdNumber, FileHandle, FsStr, LookupContext, MAX_SYMLINK_FOLLOWS,
     NamespaceNode, ResolveBase, SymlinkMode, SymlinkTarget, new_pidfd,
 };
 use extended_pstate::ExtendedPstateState;
 use futures::FutureExt;
 use linux_uapi::CLONE_PIDFD;
 use starnix_logging::{log_error, log_warn, track_file_not_found, track_stub};
 use starnix_registers::{HeapRegs, RegisterState, RegisterStorage, RegisterStorageEnum};
 use starnix_stack::clean_stack;
 use starnix_sync::{
     EventWaitGuard, FileOpsCore, LockBefore, LockEqualOrBefore, Locked, MmDumpable,
     ProcessGroupState, TaskRelease, Unlocked, WakeReason,
 };
 use starnix_syscalls::SyscallResult;
 use starnix_syscalls::decls::Syscall;
 use starnix_task_command::TaskCommand;
 use starnix_types::arch::ArchWidth;
 use starnix_types::futex_address::FutexAddress;
 use starnix_types::ownership::{OwnedRef, Releasable, TempRef, WeakRef, release_on_error};
 use starnix_uapi::auth::{
     CAP_KILL, CAP_SYS_ADMIN, CAP_SYS_PTRACE, Credentials, FsCred, PTRACE_MODE_FSCREDS,
     PTRACE_MODE_REALCREDS, PtraceAccessMode, UserAndOrGroupId,
 };
 use starnix_uapi::device_type::DeviceType;
 use starnix_uapi::errors::{Errno, ErrnoCode};
 use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
 use starnix_uapi::open_flags::OpenFlags;
 use starnix_uapi::signals::{
     SIGBUS, SIGCHLD, SIGCONT, SIGILL, SIGKILL, SIGSEGV, SIGSYS, SIGTRAP, SigSet, Signal,
     UncheckedSignal,
 };
 use starnix_uapi::user_address::{ArchSpecific, UserAddress, UserRef};
 use starnix_uapi::vfs::ResolveFlags;
 use starnix_uapi::{
     CLONE_CHILD_CLEARTID, CLONE_CHILD_SETTID, CLONE_FILES, CLONE_FS, CLONE_INTO_CGROUP,
     CLONE_NEWUTS, CLONE_PARENT, CLONE_PARENT_SETTID, CLONE_PTRACE, CLONE_SETTLS, CLONE_SIGHAND,
     CLONE_SYSVSEM, CLONE_THREAD, CLONE_VFORK, CLONE_VM, FUTEX_OWNER_DIED, FUTEX_TID_MASK,
     ROBUST_LIST_LIMIT, SECCOMP_FILTER_FLAG_LOG, SECCOMP_FILTER_FLAG_NEW_LISTENER,
     SECCOMP_FILTER_FLAG_TSYNC, SECCOMP_FILTER_FLAG_TSYNC_ESRCH, SI_KERNEL, clone_args, errno,
     error, from_status_like_fdio, pid_t, sock_filter, ucred,
 };
 use std::cell::{Ref, RefCell};
 use std::collections::VecDeque;
 use std::ffi::CString;
 use std::fmt;
 use std::marker::PhantomData;
 use std::mem::MaybeUninit;
 use std::sync::Arc;
 use zx::sys::zx_restricted_state_t;

 use super::ThreadGroupLifecycleWaitValue;

 pub struct TaskBuilder {
     /// The underlying task object.
     pub task: OwnedRef<Task>,

     pub thread_state: ThreadState<HeapRegs>,
 }

 impl TaskBuilder {
     pub fn new(task: OwnedRef<Task>) -> Self {
         Self { task, thread_state: Default::default() }
     }

     #[inline(always)]
     pub fn release<L>(self, locked: &mut Locked<L>)
     where
         L: LockBefore<TaskRelease>,
     {
         let locked = locked.cast_locked::<TaskRelease>();
         Releasable::release(self, locked);
     }
 }

 impl From<TaskBuilder> for CurrentTask {
     fn from(builder: TaskBuilder) -> Self {
         Self::new(builder.task, builder.thread_state.into())
     }
 }

 impl Releasable for TaskBuilder {
     type Context<'a> = &'a mut Locked<TaskRelease>;

     fn release<'a>(self, locked: Self::Context<'a>) {
         let kernel = Arc::clone(self.kernel());
         let mut pids = kernel.pids.write();

         // We remove from the thread group here because the WeakRef in the pid
         // table to this task must be valid until this task is removed from the
         // thread group, and the code below will invalidate it.
         // Moreover, this requires a OwnedRef of the task to ensure the tasks of
         // the thread group are always valid.
         self.task.thread_group().remove(locked, &mut pids, &self.task);

         let context = (self.thread_state.into(), locked, pids);
         self.task.release(context);
     }
 }

 impl std::ops::Deref for TaskBuilder {
     type Target = Task;
     fn deref(&self) -> &Self::Target {
         &self.task
     }
 }

 /// Task permission are determined from their credentials, and if enabled, from their SEStarnix
 ///  security state.
 #[derive(Debug, Clone)]
 pub struct FullCredentials {
     pub creds: Arc<Credentials>,
     pub security_state: security::TaskState,
 }

 impl FullCredentials {
     pub fn for_kernel() -> Self {
         Self { creds: Credentials::root(), security_state: security::task_alloc_for_kernel() }
     }
 }

 /// The task object associated with the currently executing thread.
 ///
 /// We often pass the `CurrentTask` as the first argument to functions if those functions need to
 /// know contextual information about the thread on which they are running. For example, we often
 /// use the `CurrentTask` to perform access checks, which ensures that the caller is authorized to
 /// perform the requested operation.
 ///
 /// The `CurrentTask` also has state that can be referenced only on the currently executing thread,
 /// such as the register state for that thread. Syscalls are given a mutable references to the
 /// `CurrentTask`, which lets them manipulate this state.
 ///
 /// See also `Task` for more information about tasks.
 pub struct CurrentTask {
     /// The underlying task object.
     pub task: OwnedRef<Task>,

     pub thread_state: ThreadState<RegisterStorageEnum>,

     /// The current subjective credentials of the task.
     // TODO(https://fxbug.dev/433548348): Avoid interior mutability here by passing a
     // &mut CurrentTask around instead of &CurrentTask.
     pub current_creds: RefCell<CurrentCreds>,

     /// Makes CurrentTask neither Sync not Send.
     _local_marker: PhantomData<*mut u8>,
 }

 /// Represents the current state of the task's subjective credentials.
 pub enum CurrentCreds {
     /// The task does not have overridden credentials, the subjective creds are identical to the
     /// objective creds. Since credentials are often accessed from the current task, we hold a
     /// reference here that does not necessitate going through the Rcu machinery to read.
     /// The subjective security state is stored on the Task.
     Cached(Arc<Credentials>),
     /// The task has overridden credentials, with the given credentials and security state.
     // TODO(https://fxbug.dev/433463756): TaskState will soon move into Credentials.
     Overridden(Arc<Credentials>, security::TaskState),
 }

 impl CurrentCreds {
     fn creds(&self) -> &Arc<Credentials> {
         match self {
             CurrentCreds::Cached(creds) => creds,
             CurrentCreds::Overridden(creds, _) => creds,
         }
     }
 }

 /// The thread related information of a `CurrentTask`. The information should never be used  outside
 /// of the thread owning the `CurrentTask`.
 #[derive(Default)]
 pub struct ThreadState<T: RegisterStorage> {
     /// A copy of the registers associated with the Zircon thread. Up-to-date values can be read
     /// from `self.handle.read_state_general_regs()`. To write these values back to the thread, call
     /// `self.handle.write_state_general_regs(self.thread_state.registers.into())`.
     pub registers: RegisterState<T>,

     /// Copy of the current extended processor state including floating point and vector registers.
     pub extended_pstate: ExtendedPstateState,

     /// The errno code (if any) that indicated this task should restart a syscall.
     pub restart_code: Option<ErrnoCode>,

     /// A custom function to resume a syscall that has been interrupted by SIGSTOP.
     /// To use, call set_syscall_restart_func and return ERESTART_RESTARTBLOCK. sys_restart_syscall
     /// will eventually call it.
     pub syscall_restart_func: Option<Box<SyscallRestartFunc>>,

     /// An architecture agnostic enum indicating the width (32 or 64 bits) of the execution
     /// environment in use.
     pub arch_width: ArchWidth,
 }

 impl<T: RegisterStorage> ThreadState<T> {
     /// Returns a new `ThreadState` with the same `registers` as this one.
     fn snapshot<R: RegisterStorage>(&self) -> ThreadState<R>
     where
         RegisterState<R>: From<RegisterState<T>>,
     {
         ThreadState::<R> {
             registers: self.registers.clone().into(),
             extended_pstate: Default::default(),
             restart_code: self.restart_code,
             syscall_restart_func: None,
             arch_width: self.arch_width,
         }
     }

     pub fn extended_snapshot<R: RegisterStorage>(&self) -> ThreadState<R>
     where
         RegisterState<R>: From<RegisterState<T>>,
     {
         ThreadState::<R> {
             registers: self.registers.clone().into(),
             extended_pstate: self.extended_pstate.clone(),
             restart_code: self.restart_code,
             syscall_restart_func: None,
             arch_width: self.arch_width,
         }
     }

     pub fn replace_registers<O: RegisterStorage>(&mut self, other: &ThreadState<O>) {
         self.registers.load(*other.registers);
         self.extended_pstate = other.extended_pstate;
         self.arch_width = other.arch_width;
     }

     pub fn get_user_register(&mut self, offset: usize) -> Result<usize, Errno> {
         let mut result: usize = 0;
         self.registers.apply_user_register(offset, &mut |register| result = *register as usize)?;
         Ok(result)
     }

     pub fn set_user_register(&mut self, offset: usize, value: usize) -> Result<(), Errno> {
         self.registers.apply_user_register(offset, &mut |register| *register = value as u64)
     }
 }

 impl From<ThreadState<HeapRegs>> for ThreadState<RegisterStorageEnum> {
     fn from(value: ThreadState<HeapRegs>) -> Self {
         ThreadState {
             registers: value.registers.into(),
             extended_pstate: value.extended_pstate,
             restart_code: value.restart_code,
             syscall_restart_func: value.syscall_restart_func,
             arch_width: value.arch_width,
         }
     }
 }

 impl<T: RegisterStorage> ArchSpecific for ThreadState<T> {
     fn is_arch32(&self) -> bool {
         self.arch_width.is_arch32()
     }
 }

 type SyscallRestartFunc = dyn FnOnce(&mut Locked<Unlocked>, &mut CurrentTask) -> Result<SyscallResult, Errno>
     + Send
     + Sync;

 impl Releasable for CurrentTask {
     type Context<'a> = &'a mut Locked<TaskRelease>;

     fn release<'a>(self, locked: Self::Context<'a>) {
         self.notify_robust_list();
         let _ignored = self.clear_child_tid_if_needed(locked);

         let kernel = Arc::clone(self.kernel());
         let mut pids = kernel.pids.write();

         // We remove from the thread group here because the WeakRef in the pid
         // table to this task must be valid until this task is removed from the
         // thread group, and the code below will invalidate it.
         // Moreover, this requires a OwnedRef of the task to ensure the tasks of
         // the thread group are always valid.
         self.task.thread_group().remove(locked, &mut pids, &self.task);

         let context = (self.thread_state, locked, pids);
         self.task.release(context);
     }
 }

 impl std::ops::Deref for CurrentTask {
     type Target = Task;
     fn deref(&self) -> &Self::Target {
         &self.task
     }
 }

 impl fmt::Debug for CurrentTask {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         self.task.fmt(f)
     }
 }

 impl CurrentTask {
     pub fn new(task: OwnedRef<Task>, thread_state: ThreadState<RegisterStorageEnum>) -> Self {
         let current_creds = RefCell::new(CurrentCreds::Cached(task.clone_creds()));
         Self { task, thread_state, current_creds, _local_marker: Default::default() }
     }

     /// Returns the current subjective credentials of the task.
     ///
     /// The subjective credentials are the credentials that are used to check permissions for
     /// actions performed by the task.
     pub fn current_creds(&self) -> Ref<'_, Arc<Credentials>> {
         Ref::map(self.current_creds.borrow(), CurrentCreds::creds)
     }

     /// Returns the current subjective credentials of the task, including the security state.
     pub fn full_current_creds(&self) -> FullCredentials {
         match *self.current_creds.borrow() {
             CurrentCreds::Cached(ref creds) => FullCredentials {
                 creds: creds.clone(),
                 security_state: self.security_state.clone(),
             },
             CurrentCreds::Overridden(ref creds, ref security_state) => {
                 FullCredentials { creds: creds.clone(), security_state: security_state.clone() }
             }
         }
     }

     pub fn current_fscred(&self) -> FsCred {
         self.current_creds().as_fscred()
     }

     pub fn current_ucred(&self) -> ucred {
         let creds = self.current_creds();
         ucred { pid: self.get_pid(), uid: creds.uid, gid: creds.gid }
     }

     /// Save the current creds and security state, alter them by calling `alter_creds`, then call
     /// `callback`.
     /// The creds and security state will be restored to their original values at the end of the
     /// call. Only the "subjective" state of the CurrentTask, accessed with `current_creds()` and
     ///  used to check permissions for actions performed by the task, is altered. The "objective"
     ///  state, accessed through `Task::real_creds()` by other tasks and used to check permissions
     /// for actions performed on the task, is not altered, and changes to the credentials are not
     /// externally visible.
     pub async fn override_creds_async<R>(
         &self,
         new_creds: FullCredentials,
         callback: impl AsyncFnOnce() -> R,
     ) -> R {
         let saved = self
             .current_creds
             .replace(CurrentCreds::Overridden(new_creds.creds, new_creds.security_state));
         let result = callback().await;
         self.current_creds.replace(saved);
         result
     }

     /// Save the current creds and security state, alter them by calling `alter_creds`, then call
     /// `callback`.
     /// The creds and security state will be restored to their original values at the end of the
     /// call. Only the "subjective" state of the CurrentTask, accessed with `current_creds()` and
     ///  used to check permissions for actions performed by the task, is altered. The "objective"
     ///  state, accessed through `Task::real_creds()` by other tasks and used to check permissions
     /// for actions performed on the task, is not altered, and changes to the credentials are not
     /// externally visible.
     pub fn override_creds<R>(&self, new_creds: FullCredentials, callback: impl FnOnce() -> R) -> R {
         self.override_creds_async(new_creds, async move || callback())
             .now_or_never()
             .expect("Future should be ready")
     }

     pub fn has_overridden_creds(&self) -> bool {
         matches!(*self.current_creds.borrow(), CurrentCreds::Overridden(_, _))
     }

     pub fn trigger_delayed_releaser<L>(&self, locked: &mut Locked<L>)
     where
         L: LockEqualOrBefore<FileOpsCore>,
     {
         let locked = locked.cast_locked::<FileOpsCore>();
         self.kernel().delayed_releaser.apply(locked, self);
     }

     pub fn weak_task(&self) -> WeakRef<Task> {
         WeakRef::from(&self.task)
     }

     pub fn temp_task(&self) -> TempRef<'_, Task> {
         TempRef::from(&self.task)
     }

     /// Change the current and real creds of the task. This is invalid to call while temporary
     /// credentials are present.
     pub fn set_creds(&self, creds: Credentials) {
         assert!(!self.has_overridden_creds());

         let creds = Arc::new(creds);
         let mut current_creds = self.current_creds.borrow_mut();
         *current_creds = CurrentCreds::Cached(creds.clone());

         // SAFETY: this is allowed because we are the CurrentTask.
         unsafe {
             self.persistent_info.write_creds().update(creds);
         }
         // The /proc/pid directory's ownership is updated when the task's euid
         // or egid changes. See proc(5).
         let maybe_node = self.proc_pid_directory_cache.lock();
         if let Some(node) = &*maybe_node {
             let creds = self.real_creds().euid_as_fscred();
             // SAFETY: The /proc/pid directory held by `proc_pid_directory_cache` represents the
             // current task. It's owner and group are supposed to track the current task's euid and
             // egid.
             unsafe {
                 node.force_chown(creds);
             }
         }
     }

     #[inline(always)]
     pub fn release<L>(self, locked: &mut Locked<L>)
     where
         L: LockBefore<TaskRelease>,
     {
         let locked = locked.cast_locked::<TaskRelease>();
         Releasable::release(self, locked);
     }

     pub fn set_syscall_restart_func<R: Into<SyscallResult>>(
         &mut self,
         f: impl FnOnce(&mut Locked<Unlocked>, &mut CurrentTask) -> Result<R, Errno>
         + Send
         + Sync
         + 'static,
     ) {
         self.thread_state.syscall_restart_func =
             Some(Box::new(|locked, current_task| Ok(f(locked, current_task)?.into())));
     }

     pub fn add_file<L>(
         &self,
         locked: &mut Locked<L>,
         file: FileHandle,
         flags: FdFlags,
     ) -> Result<FdNumber, Errno>
     where
         L: LockEqualOrBefore<FileOpsCore>,
     {
         self.files.add(locked, self, file, flags)
     }

     /// Sets the task's signal mask to `signal_mask` and runs `wait_function`.
     ///
     /// Signals are dequeued prior to the original signal mask being restored. This is done by the
     /// signal machinery in the syscall dispatch loop.
     ///
     /// The returned result is the result returned from the wait function.
     pub fn wait_with_temporary_mask<F, T, L>(
         &mut self,
         locked: &mut Locked<L>,
         signal_mask: SigSet,
         wait_function: F,
     ) -> Result<T, Errno>
     where
         L: LockEqualOrBefore<FileOpsCore>,
         F: FnOnce(&mut Locked<L>, &CurrentTask) -> Result<T, Errno>,
     {
         {
             let mut state = self.write();
             state.set_flags(TaskFlags::TEMPORARY_SIGNAL_MASK, true);
             state.set_temporary_signal_mask(signal_mask);
         }
         wait_function(locked, self)
     }

     /// If waking, promotes from waking to awake.  If not waking, make waiter async
     /// wait until woken.  Returns true if woken.
     pub fn wake_or_wait_until_unstopped_async(&self, waiter: &Waiter) -> bool {
         let group_state = self.thread_group().read();
         let mut task_state = self.write();

         // Wake up if
         //   a) we should wake up, meaning:
         //      i) we're in group stop, and the thread group has exited group stop, or
         //      ii) we're waking up,
         //   b) and ptrace isn't stopping us from waking up, but
         //   c) always wake up if we got a SIGKILL.
         let task_stop_state = self.load_stopped();
         let group_stop_state = self.thread_group().load_stopped();
         if ((task_stop_state == StopState::GroupStopped && group_stop_state.is_waking_or_awake())
             || task_stop_state.is_waking_or_awake())
             && (!task_state.is_ptrace_listening() || task_stop_state.is_force())
         {
             let new_state = if task_stop_state.is_waking_or_awake() {
                 task_stop_state.finalize()
             } else {
                 group_stop_state.finalize()
             };
             if let Ok(new_state) = new_state {
                 task_state.set_stopped(new_state, None, Some(self), None);
                 drop(group_state);
                 drop(task_state);
                 // It is possible for the stop state to be changed by another
                 // thread between when it is checked above and the following
                 // invocation, but set_stopped does sufficient checking while
                 // holding the lock to make sure that such a change won't result
                 // in corrupted state.
                 self.thread_group().set_stopped(new_state, None, false);
                 return true;
             }
         }

         // We will wait.
         if self.thread_group().load_stopped().is_stopped() || task_stop_state.is_stopped() {
             // If we've stopped or PTRACE_LISTEN has been sent, wait for a
             // signal or instructions from the tracer.
             group_state
                 .lifecycle_waiters
                 .wait_async_value(&waiter, ThreadGroupLifecycleWaitValue::Stopped);
             task_state.wait_on_ptracer(&waiter);
         } else if task_state.can_accept_ptrace_commands() {
             // If we're stopped because a tracer has seen the stop and not taken
             // further action, wait for further instructions from the tracer.
             task_state.wait_on_ptracer(&waiter);
         } else if task_state.is_ptrace_listening() {
             // A PTRACE_LISTEN is a state where we can get signals and notify a
             // ptracer, but otherwise remain blocked.
             if let Some(ptrace) = &mut task_state.ptrace {
                 ptrace.set_last_signal(Some(SignalInfo::default(SIGTRAP)));
                 ptrace.set_last_event(Some(PtraceEventData::new_from_event(PtraceEvent::Stop, 0)));
             }
             task_state.wait_on_ptracer(&waiter);
             task_state.notify_ptracers();
         }
         false
     }

     /// Set the RunState for the current task to the given value and then call the given callback.
     ///
     /// When the callback is done, the run_state is restored to `RunState::Running`.
     ///
     /// This function is typically used just before blocking the current task on some operation.
     /// The given `run_state` registers the mechanism for interrupting the blocking operation with
     /// the task and the given `callback` actually blocks the task.
     ///
     /// This function can only be called in the `RunState::Running` state and cannot set the
     /// run state to `RunState::Running`. For this reason, this function cannot be reentered.
     pub fn run_in_state<F, T>(&self, run_state: RunState, callback: F) -> Result<T, Errno>
     where
         F: FnOnce() -> Result<T, Errno>,
     {
         assert_ne!(run_state, RunState::Running);

         // As an optimization, decommit unused pages of the stack to reduce memory pressure while
         // the thread is blocked.
         clean_stack();

         {
             let mut state = self.write();
             assert!(!state.is_blocked());

             if matches!(run_state, RunState::Frozen(_)) {
                 // Freeze is a kernel signal and is handled before other user signals. A frozen task
                 // ignores all other signals except SIGKILL until it is thawed.
                 if state.has_signal_pending(SIGKILL) {
                     return error!(EINTR);
                 }
             } else if state.is_any_signal_pending() && !state.is_ptrace_listening() {
                 // A note on PTRACE_LISTEN - the thread cannot be scheduled
                 // regardless of pending signals.
                 return error!(EINTR);
             }
             state.set_run_state(run_state.clone());
         }

         let result = callback();

         {
             let mut state = self.write();
             assert_eq!(
                 state.run_state(),
                 run_state,
                 "SignalState run state changed while waiting!"
             );
             state.set_run_state(RunState::Running);
         };

         result
     }

     pub fn block_until(
         &self,
         guard: EventWaitGuard<'_>,
         deadline: zx::MonotonicInstant,
     ) -> Result<(), Errno> {
         self.run_in_state(RunState::Event(guard.event().clone()), move || {
             guard.block_until(None, deadline).map_err(|e| match e {
                 WakeReason::Interrupted => errno!(EINTR),
                 WakeReason::DeadlineExpired => errno!(ETIMEDOUT),
             })
         })
     }

     pub fn block_with_owner_until(
         &self,
         guard: EventWaitGuard<'_>,
         new_owner: &zx::Thread,
         deadline: zx::MonotonicInstant,
     ) -> Result<(), Errno> {
         self.run_in_state(RunState::Event(guard.event().clone()), move || {
             guard.block_until(Some(new_owner), deadline).map_err(|e| match e {
                 WakeReason::Interrupted => errno!(EINTR),
                 WakeReason::DeadlineExpired => errno!(ETIMEDOUT),
             })
         })
     }

     /// Determine namespace node indicated by the dir_fd.
     ///
     /// Returns the namespace node and the path to use relative to that node.
     pub fn resolve_dir_fd<'a, L>(
         &self,
         locked: &mut Locked<L>,
         dir_fd: FdNumber,
         mut path: &'a FsStr,
         flags: ResolveFlags,
     ) -> Result<(NamespaceNode, &'a FsStr), Errno>
     where
         L: LockEqualOrBefore<FileOpsCore>,
     {
         let path_is_absolute = path.starts_with(b"/");
         if path_is_absolute {
             if flags.contains(ResolveFlags::BENEATH) {
                 return error!(EXDEV);
             }
             path = &path[1..];
         }

         let dir = if path_is_absolute && !flags.contains(ResolveFlags::IN_ROOT) {
             self.fs().root()
         } else if dir_fd == FdNumber::AT_FDCWD {
             self.fs().cwd()
         } else {
             // O_PATH allowed for:
             //
             //   Passing the file descriptor as the dirfd argument of
             //   openat() and the other "*at()" system calls.  This
             //   includes linkat(2) with AT_EMPTY_PATH (or via procfs
             //   using AT_SYMLINK_FOLLOW) even if the file is not a
             //   directory.
             //
             // See https://man7.org/linux/man-pages/man2/open.2.html
             let file = self.files.get_allowing_opath(dir_fd)?;
             file.name.to_passive()
         };

         if !path.is_empty() {
             if !dir.entry.node.is_dir() {
                 return error!(ENOTDIR);
             }
             dir.check_access(
                 locked,
                 self,
                 Access::EXEC,
                 CheckAccessReason::InternalPermissionChecks,
             )?;
         }
         Ok((dir, path.into()))
     }

     /// A convenient wrapper for opening files relative to FdNumber::AT_FDCWD.
     ///
     /// Returns a FileHandle but does not install the FileHandle in the FdTable
     /// for this task.
     pub fn open_file(
         &self,
         locked: &mut Locked<Unlocked>,
         path: &FsStr,
         flags: OpenFlags,
     ) -> Result<FileHandle, Errno> {
         if flags.contains(OpenFlags::CREAT) {
             // In order to support OpenFlags::CREAT we would need to take a
             // FileMode argument.
             return error!(EINVAL);
         }
         self.open_file_at(
             locked,
             FdNumber::AT_FDCWD,
             path,
             flags,
             FileMode::default(),
             ResolveFlags::empty(),
             AccessCheck::default(),
         )
     }

     /// Resolves a path for open.
     ///
     /// If the final path component points to a symlink, the symlink is followed (as long as
     /// the symlink traversal limit has not been reached).
     ///
     /// If the final path component (after following any symlinks, if enabled) does not exist,
     /// and `flags` contains `OpenFlags::CREAT`, a new node is created at the location of the
     /// final path component.
     ///
     /// This returns the resolved node, and a boolean indicating whether the node has been created.
     fn resolve_open_path<L>(
         &self,
         locked: &mut Locked<L>,
         context: &mut LookupContext,
         dir: &NamespaceNode,
         path: &FsStr,
         mode: FileMode,
         flags: OpenFlags,
     ) -> Result<(NamespaceNode, bool), Errno>
     where
         L: LockEqualOrBefore<FileOpsCore>,
     {
         context.update_for_path(path);
         let mut parent_content = context.with(SymlinkMode::Follow);
         let (parent, basename) = self.lookup_parent(locked, &mut parent_content, dir, path)?;
         context.remaining_follows = parent_content.remaining_follows;

         let must_create = flags.contains(OpenFlags::CREAT) && flags.contains(OpenFlags::EXCL);

         // Lookup the child, without following a symlink or expecting it to be a directory.
         let mut child_context = context.with(SymlinkMode::NoFollow);
         child_context.must_be_directory = false;

         match parent.lookup_child(locked, self, &mut child_context, basename) {
             Ok(name) => {
                 if name.entry.node.is_lnk() {
                     if flags.contains(OpenFlags::PATH)
                         && context.symlink_mode == SymlinkMode::NoFollow
                     {
                         // When O_PATH is specified in flags, if pathname is a symbolic link
                         // and the O_NOFOLLOW flag is also specified, then the call returns
                         // a file descriptor referring to the symbolic link.
                         // See https://man7.org/linux/man-pages/man2/openat.2.html
                         //
                         // If the trailing component (i.e., basename) of
                         // pathname is a symbolic link, how.resolve contains
                         // RESOLVE_NO_SYMLINKS, and how.flags contains both
                         // O_PATH and O_NOFOLLOW, then an O_PATH file
                         // descriptor referencing the symbolic link will be
                         // returned.
                         // See https://man7.org/linux/man-pages/man2/openat2.2.html
                         return Ok((name, false));
                     }

                     if (!flags.contains(OpenFlags::PATH)
                         && context.symlink_mode == SymlinkMode::NoFollow)
                         || context.resolve_flags.contains(ResolveFlags::NO_SYMLINKS)
                         || context.remaining_follows == 0
                     {
                         if must_create {
                             // Since `must_create` is set, and a node was found, this returns EEXIST
                             // instead of ELOOP.
                             return error!(EEXIST);
                         }
                         // A symlink was found, but one of the following is true:
                         // * flags specified O_NOFOLLOW but not O_PATH.
                         // * how.resolve contains RESOLVE_NO_SYMLINKS
                         // * too many symlink traversals have been attempted
                         return error!(ELOOP);
                     }

                     context.remaining_follows -= 1;
                     match name.readlink(locked, self)? {
                         SymlinkTarget::Path(path) => {
                             let dir = if path[0] == b'/' { self.fs().root() } else { parent };
                             self.resolve_open_path(
                                 locked,
                                 context,
                                 &dir,
                                 path.as_ref(),
                                 mode,
                                 flags,
                             )
                         }
                         SymlinkTarget::Node(name) => {
                             if context.resolve_flags.contains(ResolveFlags::NO_MAGICLINKS)
                                 || name.entry.node.is_lnk()
                             {
                                 error!(ELOOP)
                             } else {
                                 Ok((name, false))
                             }
                         }
                     }
                 } else {
                     if must_create {
                         return error!(EEXIST);
                     }
                     Ok((name, false))
                 }
             }
             Err(e) if e == errno!(ENOENT) && flags.contains(OpenFlags::CREAT) => {
                 if context.must_be_directory {
                     return error!(EISDIR);
                 }
                 Ok((
                     parent.open_create_node(
                         locked,
                         self,
                         basename,
                         mode.with_type(FileMode::IFREG),
                         DeviceType::NONE,
                         flags,
                     )?,
                     true,
                 ))
             }
             Err(e) => Err(e),
         }
     }

     /// The primary entry point for opening files relative to a task.
     ///
     /// Absolute paths are resolve relative to the root of the FsContext for
     /// this task. Relative paths are resolve relative to dir_fd. To resolve
     /// relative to the current working directory, pass FdNumber::AT_FDCWD for
     /// dir_fd.
     ///
     /// Returns a FileHandle but does not install the FileHandle in the FdTable
     /// for this task.
     pub fn open_file_at(
         &self,
         locked: &mut Locked<Unlocked>,
         dir_fd: FdNumber,
         path: &FsStr,
         flags: OpenFlags,
         mode: FileMode,
         resolve_flags: ResolveFlags,
         access_check: AccessCheck,
     ) -> Result<FileHandle, Errno> {
         if path.is_empty() {
             return error!(ENOENT);
         }

         let (dir, path) = self.resolve_dir_fd(locked, dir_fd, path, resolve_flags)?;
         self.open_namespace_node_at(locked, dir, path, flags, mode, resolve_flags, access_check)
     }

     pub fn open_namespace_node_at(
         &self,
         locked: &mut Locked<Unlocked>,
         dir: NamespaceNode,
         path: &FsStr,
         flags: OpenFlags,
         mode: FileMode,
         mut resolve_flags: ResolveFlags,
         access_check: AccessCheck,
     ) -> Result<FileHandle, Errno> {
         // 64-bit kernels force the O_LARGEFILE flag to be on.
         let mut flags = flags | OpenFlags::LARGEFILE;
         let opath = flags.contains(OpenFlags::PATH);
         if opath {
             // When O_PATH is specified in flags, flag bits other than O_CLOEXEC,
             // O_DIRECTORY, and O_NOFOLLOW are ignored.
             const ALLOWED_FLAGS: OpenFlags = OpenFlags::from_bits_truncate(
                 OpenFlags::PATH.bits()
                     | OpenFlags::CLOEXEC.bits()
                     | OpenFlags::DIRECTORY.bits()
                     | OpenFlags::NOFOLLOW.bits(),
             );
             flags &= ALLOWED_FLAGS;
         }

         if flags.contains(OpenFlags::TMPFILE) && !flags.can_write() {
             return error!(EINVAL);
         }

         let nofollow = flags.contains(OpenFlags::NOFOLLOW);
         let must_create = flags.contains(OpenFlags::CREAT) && flags.contains(OpenFlags::EXCL);

         let symlink_mode =
             if nofollow || must_create { SymlinkMode::NoFollow } else { SymlinkMode::Follow };

         let resolve_base = match (
             resolve_flags.contains(ResolveFlags::BENEATH),
             resolve_flags.contains(ResolveFlags::IN_ROOT),
         ) {
             (false, false) => ResolveBase::None,
             (true, false) => ResolveBase::Beneath(dir.clone()),
             (false, true) => ResolveBase::InRoot(dir.clone()),
             (true, true) => return error!(EINVAL),
         };

         // `RESOLVE_BENEATH` and `RESOLVE_IN_ROOT` imply `RESOLVE_NO_MAGICLINKS`. This matches
         // Linux behavior. Strictly speaking it's is not really required, but it's hard to
         // implement `BENEATH` and `IN_ROOT` flags correctly otherwise.
         if resolve_base != ResolveBase::None {
             resolve_flags.insert(ResolveFlags::NO_MAGICLINKS);
         }

         let mut context = LookupContext {
             symlink_mode,
             remaining_follows: MAX_SYMLINK_FOLLOWS,
             must_be_directory: flags.contains(OpenFlags::DIRECTORY),
             resolve_flags,
             resolve_base,
         };
         let (name, created) =
             match self.resolve_open_path(locked, &mut context, &dir, path, mode, flags) {
                 Ok((n, c)) => (n, c),
                 Err(e) => {
                     let mut abs_path = dir.path(&self.task);
                     abs_path.extend(&**path);
                     track_file_not_found(abs_path);
                     return Err(e);
                 }
             };

         let name = if flags.contains(OpenFlags::TMPFILE) {
             // `O_TMPFILE` is incompatible with `O_CREAT`
             if flags.contains(OpenFlags::CREAT) {
                 return error!(EINVAL);
             }
             name.create_tmpfile(locked, self, mode.with_type(FileMode::IFREG), flags)?
         } else {
             let mode = name.entry.node.info().mode;

             // These checks are not needed in the `O_TMPFILE` case because `mode` refers to the
             // file we are opening. With `O_TMPFILE`, that file is the regular file we just
             // created rather than the node we found by resolving the path.
             //
             // For example, we do not need to produce `ENOTDIR` when `must_be_directory` is set
             // because `must_be_directory` refers to the node we found by resolving the path.
             // If that node was not a directory, then `create_tmpfile` will produce an error.
             //
             // Similarly, we never need to call `truncate` because `O_TMPFILE` is newly created
             // and therefor already an empty file.

             if !opath && nofollow && mode.is_lnk() {
                 return error!(ELOOP);
             }

             if mode.is_dir() {
                 if flags.can_write()
                     || flags.contains(OpenFlags::CREAT)
                     || flags.contains(OpenFlags::TRUNC)
                 {
                     return error!(EISDIR);
                 }
                 if flags.contains(OpenFlags::DIRECT) {
                     return error!(EINVAL);
                 }
             } else if context.must_be_directory {
                 return error!(ENOTDIR);
             }

             if flags.contains(OpenFlags::TRUNC) && mode.is_reg() && !created {
                 // You might think we should check file.can_write() at this
                 // point, which is what the docs suggest, but apparently we
                 // are supposed to truncate the file if this task can write
                 // to the underlying node, even if we are opening the file
                 // as read-only. See OpenTest.CanTruncateReadOnly.
                 name.truncate(locked, self, 0)?;
             }

             name
         };

         // If the node has been created, the open operation should not verify access right:
         // From <https://man7.org/linux/man-pages/man2/open.2.html>
         //
         // > Note that mode applies only to future accesses of the newly created file; the
         // > open() call that creates a read-only file may well return a  read/write  file
         // > descriptor.

         let access_check = if created { AccessCheck::skip() } else { access_check };
         name.open(locked, self, flags, access_check)
     }

     /// A wrapper for FsContext::lookup_parent_at that resolves the given
     /// dir_fd to a NamespaceNode.
     ///
     /// Absolute paths are resolve relative to the root of the FsContext for
     /// this task. Relative paths are resolve relative to dir_fd. To resolve
     /// relative to the current working directory, pass FdNumber::AT_FDCWD for
     /// dir_fd.
     pub fn lookup_parent_at<'a, L>(
         &self,
         locked: &mut Locked<L>,
         context: &mut LookupContext,
         dir_fd: FdNumber,
         path: &'a FsStr,
     ) -> Result<(NamespaceNode, &'a FsStr), Errno>
     where
         L: LockEqualOrBefore<FileOpsCore>,
     {
         let (dir, path) = self.resolve_dir_fd(locked, dir_fd, path, ResolveFlags::empty())?;
         self.lookup_parent(locked, context, &dir, path)
     }

     /// Lookup the parent of a namespace node.
     ///
     /// Consider using Task::open_file_at or Task::lookup_parent_at rather than
     /// calling this function directly.
     ///
     /// This function resolves all but the last component of the given path.
     /// The function returns the parent directory of the last component as well
     /// as the last component.
     ///
     /// If path is empty, this function returns dir and an empty path.
     /// Similarly, if path ends with "." or "..", these components will be
     /// returned along with the parent.
     ///
     /// The returned parent might not be a directory.
     pub fn lookup_parent<'a, L>(
         &self,
         locked: &mut Locked<L>,
         context: &mut LookupContext,
         dir: &NamespaceNode,
         path: &'a FsStr,
     ) -> Result<(NamespaceNode, &'a FsStr), Errno>
     where
         L: LockEqualOrBefore<FileOpsCore>,
     {
         context.update_for_path(path);

         let mut current_node = dir.clone();
         let mut it = path.split(|c| *c == b'/').filter(|p| !p.is_empty()).map(<&FsStr>::from);
         let mut current_path_component = it.next().unwrap_or_default();
         for next_path_component in it {
             current_node =
                 current_node.lookup_child(locked, self, context, current_path_component)?;
             current_path_component = next_path_component;
         }
         Ok((current_node, current_path_component))
     }

     /// Lookup a namespace node.
     ///
     /// Consider using Task::open_file_at or Task::lookup_parent_at rather than
     /// calling this function directly.
     ///
     /// This function resolves the component of the given path.
     pub fn lookup_path<L>(
         &self,
         locked: &mut Locked<L>,
         context: &mut LookupContext,
         dir: NamespaceNode,
         path: &FsStr,
     ) -> Result<NamespaceNode, Errno>
     where
         L: LockEqualOrBefore<FileOpsCore>,
     {
         let (parent, basename) = self.lookup_parent(locked, context, &dir, path)?;
         parent.lookup_child(locked, self, context, basename)
     }

     /// Lookup a namespace node starting at the root directory.
     ///
     /// Resolves symlinks.
     pub fn lookup_path_from_root<L>(
         &self,
         locked: &mut Locked<L>,
         path: &FsStr,
     ) -> Result<NamespaceNode, Errno>
     where
         L: LockEqualOrBefore<FileOpsCore>,
     {
         let mut context = LookupContext::default();
         self.lookup_path(locked, &mut context, self.fs().root(), path)
     }

     pub fn exec(
         &mut self,
         locked: &mut Locked<Unlocked>,
         executable: FileHandle,
         path: CString,
         argv: Vec<CString>,
         environ: Vec<CString>,
     ) -> Result<(), Errno> {
         // Executable must be a regular file
         if !executable.name.entry.node.is_reg() {
             return error!(EACCES);
         }

         // File node must have EXEC mode permissions.
         // Note that the ability to execute a file is unrelated to the flags
         // used in the `open` call.
         executable.name.check_access(locked, self, Access::EXEC, CheckAccessReason::Exec)?;

         let elf_security_state = security::bprm_creds_for_exec(self, &executable.name)?;

         let resolved_elf = resolve_executable(
             locked,
             self,
             executable,
             path.clone(),
             argv,
             environ,
             elf_security_state,
         )?;

         let maybe_set_id = if self.kernel().features.enable_suid {
             resolved_elf.file.name.suid_and_sgid(&self)?
         } else {
             Default::default()
         };

         if self.thread_group().read().tasks_count() > 1 {
             track_stub!(TODO("https://fxbug.dev/297434895"), "exec on multithread process");
             return error!(EINVAL);
         }

         if let Err(err) = self.finish_exec(locked, path, resolved_elf, maybe_set_id) {
             log_warn!("unrecoverable error in exec: {err:?}");

             send_standard_signal(
                 locked,
                 self,
                 SignalInfo { code: SI_KERNEL as i32, force: true, ..SignalInfo::default(SIGSEGV) },
             );
             return Err(err);
         }

         self.ptrace_event(locked, PtraceOptions::TRACEEXEC, self.task.tid as u64);
         self.signal_vfork();

         Ok(())
     }

     /// After the memory is unmapped, any failure in exec is unrecoverable and results in the
     /// process crashing. This function is for that second half; any error returned from this
     /// function will be considered unrecoverable.
     fn finish_exec(
         &mut self,
         locked: &mut Locked<Unlocked>,
         path: CString,
         resolved_elf: ResolvedElf,
         mut maybe_set_id: UserAndOrGroupId,
     ) -> Result<(), Errno> {
         // Now that the exec will definitely finish (or crash), notify owners of
         // locked futexes for the current process, which will be impossible to
         // update after process image is replaced.  See get_robust_list(2).
         self.notify_robust_list();

         // Passing arch32 information here ensures the replacement memory
         // layout matches the elf being executed.
         let mm = {
             let mm = self.mm()?;
             let new_mm = mm
                 .exec(resolved_elf.file.name.to_passive(), resolved_elf.arch_width)
                 .map_err(|status| from_status_like_fdio!(status))?;
             self.mm.update(Some(new_mm.clone()));
             new_mm
         };

         {
             let mut state = self.write();

             // From <https://man7.org/linux/man-pages/man2/execve.2.html>:
             //
             //   The aforementioned transformations of the effective IDs are not
             //   performed (i.e., the set-user-ID and set-group-ID bits are
             //   ignored) if any of the following is true:
             //
             //   * the no_new_privs attribute is set for the calling thread (see
             //      prctl(2));
             //
             //   *  the underlying filesystem is mounted nosuid (the MS_NOSUID
             //      flag for mount(2)); or
             //
             //   *  the calling process is being ptraced.
             //
             // The MS_NOSUID check is in `NamespaceNode::suid_and_sgid()`.
             if state.no_new_privs() || state.is_ptraced() {
                 maybe_set_id.clear();
             }

             // From <https://man7.org/linux/man-pages/man2/execve.2.html>:
             //
             //   The process's "dumpable" attribute is set to the value 1,
             //   unless a set-user-ID program, a set-group-ID program, or a
             //   program with capabilities is being executed, in which case the
             //   dumpable flag may instead be reset to the value in
             //   /proc/sys/fs/suid_dumpable, in the circumstances described
             //   under PR_SET_DUMPABLE in prctl(2).
             let dumpable =
                 if maybe_set_id.is_none() { DumpPolicy::User } else { DumpPolicy::Disable };
             *mm.dumpable.lock(locked) = dumpable;

             // TODO(https://fxbug.dev/433463756): Figure out whether this is the right place to
             // take the lock.
             // SAFETY: this is allowed because we are the CurrentTask.
             let mut writable_creds = unsafe { self.persistent_info.write_creds() };
             state.set_sigaltstack(None);
             state.robust_list_head = RobustListHeadPtr::null(self);

             // From <https://man7.org/linux/man-pages/man2/execve.2.html>:
             //
             //   If a set-user-ID or set-group-ID
             //   program is being executed, then the parent death signal set by
             //   prctl(2) PR_SET_PDEATHSIG flag is cleared.
             //
             // TODO(https://fxbug.dev/356684424): Implement the behavior above once we support
             // the PR_SET_PDEATHSIG flag.

             // TODO(tbodt): Check whether capability xattrs are set on the file, and grant/limit
             // capabilities accordingly.
             let mut new_creds = Credentials::clone(&self.current_creds());
             new_creds.exec(maybe_set_id);
             let new_creds = Arc::new(new_creds);
             writable_creds.update(new_creds.clone());
             *self.current_creds.borrow_mut() = CurrentCreds::Cached(new_creds);
         }

         let security_state = resolved_elf.security_state.clone();

         let start_info = load_executable(self, resolved_elf, &path)?;
         // Before consuming start_info below, note if the task is 32-bit.
         self.thread_state.arch_width = start_info.arch_width;

         let regs: zx_restricted_state_t = start_info.into();
         self.thread_state.registers.load(regs);
         self.thread_state.extended_pstate.reset();
         self.thread_group().signal_actions.reset_for_exec();

         // The exit signal (and that of the children) is reset to SIGCHLD.
         let mut thread_group_state = self.thread_group().write();
         thread_group_state.exit_signal = Some(SIGCHLD);
         for (_, weak_child) in &mut thread_group_state.children {
             if let Some(child) = weak_child.upgrade() {
                 let mut child_state = child.write();
                 child_state.exit_signal = Some(SIGCHLD);
             }
         }

         std::mem::drop(thread_group_state);

         // TODO(https://fxbug.dev/42082680): All threads other than the calling thread are destroyed.

         // TODO: POSIX timers are not preserved.

         // TODO: Ensure that the filesystem context is un-shared, undoing the effect of CLONE_FS.

         // The file descriptor table is unshared, undoing the effect of the CLONE_FILES flag of
         // clone(2).
         self.files.unshare();
         self.files.exec(locked, self);

         // If SELinux is enabled, enforce permissions related to inheritance of file descriptors
         // and resource limits. Then update the current task's SID.
         //
         // TODO: https://fxbug.dev/378655436 - After the above, enforce permissions related to
         // signal state inheritance.
         //
         // This needs to be called after closing any files marked "close-on-exec".
         security::exec_binprm(locked, self, &security_state);

         self.thread_group().write().did_exec = true;

         self.set_command_name(TaskCommand::from_path_bytes(path.to_bytes()));

         Ok(())
     }

     pub fn set_command_name(&self, new_name: TaskCommand) {
         // set_command_name needs to run before leader_command() in cases where self is the leader.
         self.task.set_command_name(new_name.clone());
         let leader_command = self.thread_group().read().leader_command();
         starnix_logging::set_current_task_info(
             new_name,
             leader_command,
             self.thread_group().leader,
             self.tid,
         );
     }

     pub fn add_seccomp_filter(
         &mut self,
         locked: &mut Locked<Unlocked>,
         code: Vec<sock_filter>,
         flags: u32,
     ) -> Result<SyscallResult, Errno> {
         let new_filter = Arc::new(SeccompFilter::from_cbpf(
             &code,
             self.thread_group().next_seccomp_filter_id.add(1),
             flags & SECCOMP_FILTER_FLAG_LOG != 0,
         )?);

         let mut maybe_fd: Option<FdNumber> = None;

         if flags & SECCOMP_FILTER_FLAG_NEW_LISTENER != 0 {
             maybe_fd = Some(SeccompFilterContainer::create_listener(locked, self)?);
         }

         // We take the process lock here because we can't change any of the threads
         // while doing a tsync.  So, you hold the process lock while making any changes.
         let state = self.thread_group().write();

         if flags & SECCOMP_FILTER_FLAG_TSYNC != 0 {
             // TSYNC synchronizes all filters for all threads in the current process to
             // the current thread's

             // We collect the filters for the current task upfront to save us acquiring
             // the task's lock a lot of times below.
             let mut filters: SeccompFilterContainer = self.read().seccomp_filters.clone();

             // For TSYNC to work, all of the other thread filters in this process have to
             // be a prefix of this thread's filters, and none of them can be in
             // strict mode.
             let tasks = state.tasks().collect::<Vec<_>>();
             for task in &tasks {
                 if task.tid == self.tid {
                     continue;
                 }
                 let other_task_state = task.read();

                 // Target threads cannot be in SECCOMP_MODE_STRICT
                 if task.seccomp_filter_state.get() == SeccompStateValue::Strict {
                     return Self::seccomp_tsync_error(task.tid, flags);
                 }

                 // Target threads' filters must be a subsequence of this thread's
                 if !other_task_state.seccomp_filters.can_sync_to(&filters) {
                     return Self::seccomp_tsync_error(task.tid, flags);
                 }
             }

             // Now that we're sure we're allowed to do so, add the filter to all threads.
             filters.add_filter(new_filter, code.len() as u16)?;

             for task in &tasks {
                 let mut other_task_state = task.write();

                 other_task_state.enable_no_new_privs();
                 other_task_state.seccomp_filters = filters.clone();
                 task.set_seccomp_state(SeccompStateValue::UserDefined)?;
             }
         } else {
             let mut task_state = self.task.write();

             task_state.seccomp_filters.add_filter(new_filter, code.len() as u16)?;
             self.set_seccomp_state(SeccompStateValue::UserDefined)?;
         }

         if let Some(fd) = maybe_fd { Ok(fd.into()) } else { Ok(().into()) }
     }

     pub fn run_seccomp_filters(
         &mut self,
         locked: &mut Locked<Unlocked>,
         syscall: &Syscall,
     ) -> Option<Result<SyscallResult, Errno>> {
         // Implementation of SECCOMP_FILTER_STRICT, which has slightly different semantics
         // from user-defined seccomp filters.
         if self.seccomp_filter_state.get() == SeccompStateValue::Strict {
             return SeccompState::do_strict(locked, self, syscall);
         }

         // Run user-defined seccomp filters
         let result = self.task.read().seccomp_filters.run_all(self, syscall);

         SeccompState::do_user_defined(locked, result, self, syscall)
     }

     fn seccomp_tsync_error(id: i32, flags: u32) -> Result<SyscallResult, Errno> {
         // By default, TSYNC indicates failure state by returning the first thread
         // id not to be able to sync, rather than by returning -1 and setting
         // errno.  However, if TSYNC_ESRCH is set, it returns ESRCH.  This
         // prevents conflicts with fact that SECCOMP_FILTER_FLAG_NEW_LISTENER
         // makes seccomp return an fd.
         if flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH != 0 { error!(ESRCH) } else { Ok(id.into()) }
     }

     // Notify all futexes in robust list.  The robust list is in user space, so we
     // are very careful about walking it, and there are a lot of quiet returns if
     // we fail to walk it.
     // TODO(https://fxbug.dev/42079081): This only sets the FUTEX_OWNER_DIED bit; it does
     // not wake up a waiter.
     pub fn notify_robust_list(&self) {
         let task_state = self.write();
         let robust_list_addr = task_state.robust_list_head.addr();
         if robust_list_addr == UserAddress::NULL {
             // No one has called set_robust_list.
             return;
         }
         let robust_list_res = self.read_multi_arch_object(task_state.robust_list_head);

         let head = if let Ok(head) = robust_list_res {
             head
         } else {
             return;
         };

         let offset = head.futex_offset;

         let mut entries_count = 0;
         let mut curr_ptr = head.list.next;
         while curr_ptr.addr() != robust_list_addr.into() && entries_count < ROBUST_LIST_LIMIT {
             let curr_ref = self.read_multi_arch_object(curr_ptr);

             let curr = if let Ok(curr) = curr_ref {
                 curr
             } else {
                 return;
             };

             let Some(futex_base) = curr_ptr.addr().checked_add_signed(offset) else {
                 return;
             };

             let futex_addr = match FutexAddress::try_from(futex_base) {
                 Ok(addr) => addr,
                 Err(_) => {
                     return;
                 }
             };

             let Ok(mm) = self.mm() else {
                 log_error!("Asked to notify robust list futexes in system task.");
                 return;
             };
             let futex = if let Ok(futex) = mm.atomic_load_u32_relaxed(futex_addr) {
                 futex
             } else {
                 return;
             };

             if (futex & FUTEX_TID_MASK) as i32 == self.tid {
                 let owner_died = FUTEX_OWNER_DIED | futex;
                 if mm.atomic_store_u32_relaxed(futex_addr, owner_died).is_err() {
                     return;
                 }
             }
             curr_ptr = curr.next;
             entries_count += 1;
         }
     }

     /// Returns a ref to this thread's SeccompNotifier.
     pub fn get_seccomp_notifier(&mut self) -> Option<SeccompNotifierHandle> {
         self.task.write().seccomp_filters.notifier.clone()
     }

     pub fn set_seccomp_notifier(&mut self, notifier: Option<SeccompNotifierHandle>) {
         self.task.write().seccomp_filters.notifier = notifier;
     }

     /// Processes a Zircon exception associated with this task.
     pub fn process_exception(
         &self,
         locked: &mut Locked<Unlocked>,
         report: &zx::ExceptionReport,
     ) -> ExceptionResult {
         match report.ty {
             zx::ExceptionType::General => match get_signal_for_general_exception(&report.arch) {
                 Some(sig) => ExceptionResult::Signal(SignalInfo::default(sig)),
                 None => {
                     log_error!("Unrecognized general exception: {:?}", report);
                     ExceptionResult::Signal(SignalInfo::default(SIGILL))
                 }
             },
             zx::ExceptionType::FatalPageFault { status } => {
                 let report = decode_page_fault_exception_report(&report.arch);
                 if let Ok(mm) = self.mm() {
                     mm.handle_page_fault(locked, report, status)
                 } else {
                     panic!(
                         "system task is handling a major page fault status={:?}, report={:?}",
                         status, report
                     );
                 }
             }
             zx::ExceptionType::UndefinedInstruction => {
                 ExceptionResult::Signal(SignalInfo::default(SIGILL))
             }
             zx::ExceptionType::UnalignedAccess => {
                 ExceptionResult::Signal(SignalInfo::default(SIGBUS))
             }
             zx::ExceptionType::SoftwareBreakpoint | zx::ExceptionType::HardwareBreakpoint => {
                 ExceptionResult::Signal(SignalInfo::default(SIGTRAP))
             }
             zx::ExceptionType::ProcessNameChanged => {
                 log_error!("Received unexpected process name changed exception");
                 ExceptionResult::Handled
             }
             zx::ExceptionType::ProcessStarting
             | zx::ExceptionType::ThreadStarting
             | zx::ExceptionType::ThreadExiting => {
                 log_error!("Received unexpected task lifecycle exception");
                 ExceptionResult::Signal(SignalInfo::default(SIGSYS))
             }
             zx::ExceptionType::PolicyError(policy_code) => {
                 log_error!(policy_code:?; "Received Zircon policy error exception");
                 ExceptionResult::Signal(SignalInfo::default(SIGSYS))
             }
             zx::ExceptionType::UnknownUserGenerated { code, data } => {
                 log_error!(code:?, data:?; "Received unexpected unknown user generated exception");
                 ExceptionResult::Signal(SignalInfo::default(SIGSYS))
             }
             zx::ExceptionType::Unknown { ty, code, data } => {
                 log_error!(ty:?, code:?, data:?; "Received unexpected exception");
                 ExceptionResult::Signal(SignalInfo::default(SIGSYS))
             }
         }
     }

     /// Clone this task.
     ///
     /// Creates a new task object that shares some state with this task
     /// according to the given flags.
     ///
     /// Used by the clone() syscall to create both processes and threads.
     ///
     /// The exit signal is broken out from the flags parameter like clone3() rather than being
     /// bitwise-ORed like clone().
     pub fn clone_task<L>(
         &self,
         locked: &mut Locked<L>,
         flags: u64,
         child_exit_signal: Option<Signal>,
         user_parent_tid: UserRef<pid_t>,
         user_child_tid: UserRef<pid_t>,
         user_pidfd: UserRef<FdNumber>,
     ) -> Result<TaskBuilder, Errno>
     where
         L: LockBefore<MmDumpable>,
         L: LockBefore<TaskRelease>,
         L: LockBefore<ProcessGroupState>,
     {
         const IMPLEMENTED_FLAGS: u64 = (CLONE_VM
             | CLONE_FS
             | CLONE_FILES
             | CLONE_SIGHAND
             | CLONE_THREAD
             | CLONE_SYSVSEM
             | CLONE_SETTLS
             | CLONE_PARENT
             | CLONE_PARENT_SETTID
             | CLONE_PIDFD
             | CLONE_CHILD_CLEARTID
             | CLONE_CHILD_SETTID
             | CLONE_VFORK
             | CLONE_NEWUTS
             | CLONE_PTRACE) as u64;

         // A mask with all valid flags set, because we want to return a different error code for an
         // invalid flag vs an unimplemented flag. Subtracting 1 from the largest valid flag gives a
         // mask with all flags below it set. Shift up by one to make sure the largest flag is also
         // set.
         const VALID_FLAGS: u64 = (CLONE_INTO_CGROUP << 1) - 1;

         // CLONE_SETTLS is implemented by sys_clone.

         let clone_files = flags & (CLONE_FILES as u64) != 0;
         let clone_fs = flags & (CLONE_FS as u64) != 0;
         let clone_parent = flags & (CLONE_PARENT as u64) != 0;
         let clone_parent_settid = flags & (CLONE_PARENT_SETTID as u64) != 0;
         let clone_pidfd = flags & (CLONE_PIDFD as u64) != 0;
         let clone_child_cleartid = flags & (CLONE_CHILD_CLEARTID as u64) != 0;
         let clone_child_settid = flags & (CLONE_CHILD_SETTID as u64) != 0;
         let clone_sysvsem = flags & (CLONE_SYSVSEM as u64) != 0;
         let clone_ptrace = flags & (CLONE_PTRACE as u64) != 0;
         let clone_thread = flags & (CLONE_THREAD as u64) != 0;
         let clone_vm = flags & (CLONE_VM as u64) != 0;
         let clone_sighand = flags & (CLONE_SIGHAND as u64) != 0;
         let clone_vfork = flags & (CLONE_VFORK as u64) != 0;
         let clone_newuts = flags & (CLONE_NEWUTS as u64) != 0;
         let clone_into_cgroup = flags & CLONE_INTO_CGROUP != 0;

         if clone_ptrace {
             track_stub!(TODO("https://fxbug.dev/322874630"), "CLONE_PTRACE");
         }

         if clone_sysvsem {
             track_stub!(TODO("https://fxbug.dev/322875185"), "CLONE_SYSVSEM");
         }

         if clone_into_cgroup {
             track_stub!(TODO("https://fxbug.dev/403612570"), "CLONE_INTO_CGROUP");
         }

         if clone_sighand && !clone_vm {
             return error!(EINVAL);
         }
         if clone_thread && !clone_sighand {
             return error!(EINVAL);
         }

         if clone_pidfd && clone_thread {
             return error!(EINVAL);
         }
         if clone_pidfd && clone_parent_settid && user_parent_tid.addr() == user_pidfd.addr() {
             // `clone()` uses the same out-argument for these, so error out if they have the same
             // user address.
             return error!(EINVAL);
         }

         if flags & !VALID_FLAGS != 0 {
             return error!(EINVAL);
         }

         if clone_vm && !clone_thread {
             // TODO(https://fxbug.dev/42066087) Implement CLONE_VM for child processes (not just child
             // threads). Currently this executes CLONE_VM (explicitly passed to clone() or as
             // used by vfork()) as a fork (the VM in the child is copy-on-write) which is almost
             // always OK.
             //
             // CLONE_VM is primarily as an optimization to avoid making a copy-on-write version of a
             // process' VM that will be immediately replaced with a call to exec(). The main users
             // (libc and language runtimes) don't actually rely on the memory being shared between
             // the two processes. And the vfork() man page explicitly allows vfork() to be
             // implemented as fork() which is what we do here.
             if !clone_vfork {
                 track_stub!(
                     TODO("https://fxbug.dev/322875227"),
                     "CLONE_VM without CLONE_THREAD or CLONE_VFORK"
                 );
             }
         } else if clone_thread && !clone_vm {
             track_stub!(TODO("https://fxbug.dev/322875167"), "CLONE_THREAD without CLONE_VM");
             return error!(ENOSYS);
         }

         if flags & !IMPLEMENTED_FLAGS != 0 {
             track_stub!(
                 TODO("https://fxbug.dev/322875130"),
                 "clone unknown flags",
                 flags & !IMPLEMENTED_FLAGS
             );
             return error!(ENOSYS);
         }

         let fs = if clone_fs { self.fs() } else { self.fs().fork() };
         let files = if clone_files { self.files.clone() } else { self.files.fork() };

         let kernel = self.kernel();

         let mut pids = kernel.pids.write();

         // Lock the cgroup process hierarchy so that the parent process cannot move to a different
         // cgroup while a new task or thread_group is created. This may be unnecessary if
         // CLONE_INTO_CGROUP is implemented and passed in.
         let mut cgroup2_pid_table = kernel.cgroups.lock_cgroup2_pid_table();
         // Create a `KernelSignal::Freeze` to put onto the new task, if the cgroup is frozen.
         let child_kernel_signals = cgroup2_pid_table
             .maybe_create_freeze_signal(self.thread_group())
             .into_iter()
             .collect::<VecDeque<_>>();

         let pid;
         let command;
         let creds;
         let scheduler_state;
         let no_new_privs;
         let seccomp_filters;
         let robust_list_head = RobustListHeadPtr::null(self);
         let child_signal_mask;
         let timerslack_ns;
         let uts_ns;
         let security_state = security::task_alloc(&self, flags);

         let TaskInfo { thread, thread_group, memory_manager } = {
             // These variables hold the original parent in case we need to switch the parent of the
             // new task because of CLONE_PARENT.
             let weak_original_parent;
             let original_parent;

             // Make sure to drop these locks ASAP to avoid inversion
             let thread_group_state = {
                 let thread_group_state = self.thread_group().write();
                 if clone_parent {
                     // With the CLONE_PARENT flag, the parent of the new task is our parent
                     // instead of ourselves.
                     weak_original_parent =
                         thread_group_state.parent.clone().ok_or_else(|| errno!(EINVAL))?;
                     std::mem::drop(thread_group_state);
                     original_parent = weak_original_parent.upgrade();
                     original_parent.write()
                 } else {
                     thread_group_state
                 }
             };

             let state = self.read();

             no_new_privs = state.no_new_privs();
             seccomp_filters = state.seccomp_filters.clone();
             child_signal_mask = state.signal_mask();

             pid = pids.allocate_pid();
             command = self.command();
             creds = self.current_creds().clone();
             scheduler_state = state.scheduler_state.fork();
             timerslack_ns = state.timerslack_ns;

             uts_ns = if clone_newuts {
                 security::check_task_capable(self, CAP_SYS_ADMIN)?;
                 state.uts_ns.read().fork()
             } else {
                 state.uts_ns.clone()
             };

             if clone_thread {
                 TaskInfo {
                     thread: None,
                     thread_group: self.thread_group().clone(),
                     memory_manager: self.mm().ok(),
                 }
             } else {
                 // Drop the lock on this task before entering `create_zircon_process`, because it will
                 // take a lock on the new thread group, and locks on thread groups have a higher
                 // priority than locks on the task in the thread group.
                 std::mem::drop(state);
                 let signal_actions = if clone_sighand {
                     self.thread_group().signal_actions.clone()
                 } else {
                     self.thread_group().signal_actions.fork()
                 };
                 let process_group = thread_group_state.process_group.clone();

                 let task_info = create_zircon_process(
                     locked,
                     kernel,
                     Some(thread_group_state),
                     pid,
                     child_exit_signal,
                     process_group,
                     signal_actions,
                     command.clone(),
                 )?;

                 cgroup2_pid_table.inherit_cgroup(self.thread_group(), &task_info.thread_group);

                 task_info
             }
         };

         // Drop the lock on the cgroup pid_table before creating the TaskBuilder.
         // If the TaskBuilder creation fails, the TaskBuilder is dropped, which calls
         // ThreadGroup::remove. ThreadGroup::remove takes the cgroup pid_table lock, causing
         // a cyclic lock dependency.
         std::mem::drop(cgroup2_pid_table);

         // Only create the vfork event when the caller requested CLONE_VFORK.
         let vfork_event = if clone_vfork { Some(Arc::new(zx::Event::create())) } else { None };

         let mut child = TaskBuilder::new(Task::new(
             pid,
             command,
             thread_group,
             thread,
             files,
             memory_manager,
             fs,
             creds,
             self.abstract_socket_namespace.clone(),
             self.abstract_vsock_namespace.clone(),
             child_signal_mask,
             child_kernel_signals,
             vfork_event,
             scheduler_state,
             uts_ns,
             no_new_privs,
             SeccompState::from(&self.seccomp_filter_state),
             seccomp_filters,
             robust_list_head,
             timerslack_ns,
             security_state,
         ));

         release_on_error!(child, locked, {
             let child_task = TempRef::from(&child.task);
             // Drop the pids lock as soon as possible after creating the child. Destroying the child
             // and removing it from the pids table itself requires the pids lock, so if an early exit
             // takes place we have a self deadlock.
             pids.add_task(&child_task);
             std::mem::drop(pids);

             // Child lock must be taken before this lock. Drop the lock on the task, take a writable
             // lock on the child and take the current state back.

             #[cfg(any(test, debug_assertions))]
             {
                 // Take the lock on the thread group and its child in the correct order to ensure any wrong ordering
                 // will trigger the tracing-mutex at the right call site.
                 if !clone_thread {
                     let _l1 = self.thread_group().read();
                     let _l2 = child.thread_group().read();
                 }
             }

             if clone_thread {
                 self.thread_group().add(&child_task)?;
             } else {
                 child.thread_group().add(&child_task)?;

                 // These manipulations of the signal handling state appear to be related to
                 // CLONE_SIGHAND and CLONE_VM rather than CLONE_THREAD. However, we do not support
                 // all the combinations of these flags, which means doing these operations here
                 // might actually be correct. However, if you find a test that fails because of the
                 // placement of this logic here, we might need to move it.
                 let mut child_state = child.write();
                 let state = self.read();
                 child_state.set_sigaltstack(state.sigaltstack());
                 child_state.set_signal_mask(state.signal_mask());
             }

             if !clone_vm {
                 // We do not support running threads in the same process with different
                 // MemoryManagers.
                 assert!(!clone_thread);
                 self.mm()?.snapshot_to(locked, &child.mm()?)?;
             }

             if clone_parent_settid {
                 self.write_object(user_parent_tid, &child.tid)?;
             }

             if clone_child_cleartid {
                 child.write().clear_child_tid = user_child_tid;
             }

             if clone_child_settid {
                 child.write_object(user_child_tid, &child.tid)?;
             }

             if clone_pidfd {
                 let locked = locked.cast_locked::<TaskRelease>();
                 let file = new_pidfd(
                     locked,
                     self,
                     child.thread_group(),
                     &*child.mm()?,
                     OpenFlags::empty(),
                 );
                 let pidfd = self.add_file(locked, file, FdFlags::CLOEXEC)?;
                 self.write_object(user_pidfd, &pidfd)?;
             }

             // TODO(https://fxbug.dev/42066087): We do not support running different processes with
             // the same MemoryManager. Instead, we implement a rough approximation of that behavior
             // by making a copy-on-write clone of the memory from the original process.
             if clone_vm && !clone_thread {
                 self.mm()?.snapshot_to(locked, &child.mm()?)?;
             }

             child.thread_state = self.thread_state.snapshot::<HeapRegs>();
             Ok(())
         });

         // Take the lock on thread group and task in the correct order to ensure any wrong ordering
         // will trigger the tracing-mutex at the right call site.
         #[cfg(any(test, debug_assertions))]
         {
             let _l1 = child.thread_group().read();
             let _l2 = child.read();
         }

         Ok(child)
     }

     /// Sets the stop state (per set_stopped), and also notifies all listeners,
     /// including the parent process and the tracer if appropriate.
     pub fn set_stopped_and_notify(&self, stopped: StopState, siginfo: Option<SignalInfo>) {
         let maybe_signal_info = {
             let mut state = self.write();
             state.copy_state_from(self);
             state.set_stopped(stopped, siginfo, Some(self), None);
             state.prepare_signal_info(stopped)
         };

         if let Some((tracer, signal_info)) = maybe_signal_info {
             if let Some(tracer) = tracer.upgrade() {
                 tracer.write().send_signal(signal_info);
             }
         }

         if !stopped.is_in_progress() {
             let parent = self.thread_group().read().parent.clone();
             if let Some(parent) = parent {
                 parent
                     .upgrade()
                     .write()
                     .lifecycle_waiters
                     .notify_value(ThreadGroupLifecycleWaitValue::ChildStatus);
             }
         }
     }

     /// If the task is stopping, set it as stopped. return whether the caller
     /// should stop.  The task might also be waking up.
     pub fn finalize_stop_state(&mut self) -> bool {
         let stopped = self.load_stopped();

         if !stopped.is_stopping_or_stopped() {
             // If we are waking up, potentially write back state a tracer may have modified.
             let captured_state = self.write().take_captured_state();
             if let Some(captured) = captured_state {
                 if captured.dirty {
                     self.thread_state.replace_registers(&captured.thread_state);
                 }
             }
         }

         // Stopping because the thread group is stopping.
         // Try to flip to GroupStopped - will fail if we shouldn't.
         if self.thread_group().set_stopped(StopState::GroupStopped, None, true)
             == StopState::GroupStopped
         {
             let signal = self.thread_group().read().last_signal.clone();
             // stopping because the thread group has stopped
             let event = Some(PtraceEventData::new_from_event(PtraceEvent::Stop, 0));
             self.write().set_stopped(StopState::GroupStopped, signal, Some(self), event);
             return true;
         }

         // Stopping because the task is stopping
         if stopped.is_stopping_or_stopped() {
             if let Ok(stopped) = stopped.finalize() {
                 self.set_stopped_and_notify(stopped, None);
             }
             return true;
         }

         false
     }

     /// Block the execution of `current_task` as long as the task is stopped and
     /// not terminated.
     pub fn block_while_stopped(&mut self, locked: &mut Locked<Unlocked>) {
         // Upgrade the state from stopping to stopped if needed. Return if the task
         // should not be stopped.
         if !self.finalize_stop_state() {
             return;
         }

         let waiter = Waiter::with_options(WaiterOptions::IGNORE_SIGNALS);
         loop {
             // If we've exited, unstop the threads and return without notifying
             // waiters.
             if self.is_exitted() {
                 self.thread_group().set_stopped(StopState::ForceAwake, None, false);
                 self.write().set_stopped(StopState::ForceAwake, None, Some(self), None);
                 return;
             }

             if self.wake_or_wait_until_unstopped_async(&waiter) {
                 return;
             }

             // Do the wait. Result is not needed, as this is not in a syscall.
             let _: Result<(), Errno> = waiter.wait(locked, self);

             // Maybe go from stopping to stopped, if we are currently stopping
             // again.
             self.finalize_stop_state();
         }
     }

     /// For traced tasks, this will return the data neceessary for a cloned task
     /// to attach to the same tracer.
     pub fn get_ptrace_core_state_for_clone(
         &mut self,
         clone_args: &clone_args,
     ) -> (PtraceOptions, Option<PtraceCoreState>) {
         let state = self.write();
         if let Some(ptrace) = &state.ptrace {
             ptrace.get_core_state_for_clone(clone_args)
         } else {
             (PtraceOptions::empty(), None)
         }
     }

     /// If currently being ptraced with the given option, emit the appropriate
     /// event.  PTRACE_EVENTMSG will return the given message.  Also emits the
     /// appropriate event for execve in the absence of TRACEEXEC.
     ///
     /// Note that the Linux kernel has a documented bug where, if TRACEEXIT is
     /// enabled, SIGKILL will trigger an event.  We do not exhibit this
     /// behavior.
     pub fn ptrace_event(
         &mut self,
         locked: &mut Locked<Unlocked>,
         trace_kind: PtraceOptions,
         msg: u64,
     ) {
         if !trace_kind.is_empty() {
             {
                 let mut state = self.write();
                 if let Some(ptrace) = &mut state.ptrace {
                     if !ptrace.has_option(trace_kind) {
                         // If this would be a TRACEEXEC, but TRACEEXEC is not
                         // turned on, then send a SIGTRAP.
                         if trace_kind == PtraceOptions::TRACEEXEC && !ptrace.is_seized() {
                             // Send a SIGTRAP so that the parent can gain control.
                             send_signal_first(locked, self, state, SignalInfo::default(SIGTRAP));
                         }

                         return;
                     }
                     let mut siginfo = SignalInfo::default(starnix_uapi::signals::SIGTRAP);
                     siginfo.code = (((PtraceEvent::from_option(&trace_kind) as u32) << 8)
                         | linux_uapi::SIGTRAP) as i32;
                     state.set_stopped(
                         StopState::PtraceEventStopping,
                         Some(siginfo),
                         None,
                         Some(PtraceEventData::new(trace_kind, msg)),
                     );
                 } else {
                     return;
                 }
             }
             self.block_while_stopped(locked);
         }
     }

     /// Causes the current thread's thread group to exit, notifying any ptracer
     /// of this task first.
     pub fn thread_group_exit(&mut self, locked: &mut Locked<Unlocked>, exit_status: ExitStatus) {
         self.ptrace_event(
             locked,
             PtraceOptions::TRACEEXIT,
             exit_status.signal_info_status() as u64,
         );
         self.thread_group().exit(locked, exit_status, None);
     }

     /// The flags indicates only the flags as in clone3(), and does not use the low 8 bits for the
     /// exit signal as in clone().
     pub fn clone_task_for_test<L>(
         &self,
         locked: &mut Locked<L>,
         flags: u64,
         exit_signal: Option<Signal>,
     ) -> crate::testing::AutoReleasableTask
     where
         L: LockBefore<MmDumpable>,
         L: LockBefore<TaskRelease>,
         L: LockBefore<ProcessGroupState>,
     {
         let result = self
             .clone_task(
                 locked,
                 flags,
                 exit_signal,
                 UserRef::default(),
                 UserRef::default(),
                 UserRef::default(),
             )
             .expect("failed to create task in test");

         result.into()
     }

     // See "Ptrace access mode checking" in https://man7.org/linux/man-pages/man2/ptrace.2.html
     pub fn check_ptrace_access_mode<L>(
         &self,
         locked: &mut Locked<L>,
         mode: PtraceAccessMode,
         target: &Task,
     ) -> Result<(), Errno>
     where
         L: LockBefore<MmDumpable>,
     {
         // (1)  If the calling thread and the target thread are in the same
         //      thread group, access is always allowed.
         if self.thread_group().leader == target.thread_group().leader {
             return Ok(());
         }

         // (2)  If the access mode specifies PTRACE_MODE_FSCREDS, then, for
         //      the check in the next step, employ the caller's filesystem
         //      UID and GID.  (As noted in credentials(7), the filesystem
         //      UID and GID almost always have the same values as the
         //      corresponding effective IDs.)
         //
         //      Otherwise, the access mode specifies PTRACE_MODE_REALCREDS,
         //      so use the caller's real UID and GID for the checks in the
         //      next step.  (Most APIs that check the caller's UID and GID
         //      use the effective IDs.  For historical reasons, the
         //      PTRACE_MODE_REALCREDS check uses the real IDs instead.)
         let (uid, gid) = if mode.contains(PTRACE_MODE_FSCREDS) {
             let fscred = self.current_creds().as_fscred();
             (fscred.uid, fscred.gid)
         } else if mode.contains(PTRACE_MODE_REALCREDS) {
             let creds = self.current_creds();
             (creds.uid, creds.gid)
         } else {
             unreachable!();
         };

         // (3)  Deny access if neither of the following is true:
         //
         //      -  The real, effective, and saved-set user IDs of the target
         //         match the caller's user ID, and the real, effective, and
         //         saved-set group IDs of the target match the caller's
         //         group ID.
         //
         //      -  The caller has the CAP_SYS_PTRACE capability in the user
         //         namespace of the target.
         let target_creds = target.real_creds();
         if !(target_creds.uid == uid
             && target_creds.euid == uid
             && target_creds.saved_uid == uid
             && target_creds.gid == gid
             && target_creds.egid == gid
             && target_creds.saved_gid == gid)
         {
             security::check_task_capable(self, CAP_SYS_PTRACE)?;
         }

         // (4)  Deny access if the target process "dumpable" attribute has a
         //      value other than 1 (SUID_DUMP_USER; see the discussion of
         //      PR_SET_DUMPABLE in prctl(2)), and the caller does not have
         //      the CAP_SYS_PTRACE capability in the user namespace of the
         //      target process.
         let dumpable = *target.mm()?.dumpable.lock(locked);
         match dumpable {
             DumpPolicy::User => (),
             DumpPolicy::Disable => security::check_task_capable(self, CAP_SYS_PTRACE)?,
         }

         // (5)  The kernel LSM security_ptrace_access_check() interface is
         //      invoked to see if ptrace access is permitted.
         security::ptrace_access_check(self, target, mode)?;

         // (6)  If access has not been denied by any of the preceding steps,
         //      then access is allowed.
         Ok(())
     }

     pub fn can_signal(
         &self,
         target: &Task,
         unchecked_signal: UncheckedSignal,
     ) -> Result<(), Errno> {
         // If both the tasks share a thread group the signal can be sent. This is not documented
         // in kill(2) because kill does not support task-level granularity in signal sending.
         if self.thread_group == target.thread_group {
             return Ok(());
         }

         let self_creds = self.current_creds();
         let target_creds = target.real_creds();
         // From https://man7.org/linux/man-pages/man2/kill.2.html:
         //
         // > For a process to have permission to send a signal, it must either be
         // > privileged (under Linux: have the CAP_KILL capability in the user
         // > namespace of the target process), or the real or effective user ID of
         // > the sending process must equal the real or saved set- user-ID of the
         // > target process.
         //
         // Returns true if the credentials are considered to have the same user ID.
         if self_creds.euid == target_creds.saved_uid
             || self_creds.euid == target_creds.uid
             || self_creds.uid == target_creds.uid
             || self_creds.uid == target_creds.saved_uid
         {
             return Ok(());
         }

         if Signal::try_from(unchecked_signal) == Ok(SIGCONT) {
             let target_session = target.thread_group().read().process_group.session.leader;
             let self_session = self.thread_group().read().process_group.session.leader;
             if target_session == self_session {
                 return Ok(());
             }
         }

         security::check_task_capable(self, CAP_KILL)
     }
 }

 impl ArchSpecific for CurrentTask {
     fn is_arch32(&self) -> bool {
         self.thread_state.is_arch32()
     }
 }

 impl MemoryAccessor for CurrentTask {
     fn read_memory<'a>(
         &self,
         addr: UserAddress,
         bytes: &'a mut [MaybeUninit<u8>],
     ) -> Result<&'a mut [u8], Errno> {
         self.mm()?.unified_read_memory(self, addr, bytes)
     }

     fn read_memory_partial_until_null_byte<'a>(
         &self,
         addr: UserAddress,
         bytes: &'a mut [MaybeUninit<u8>],
     ) -> Result<&'a mut [u8], Errno> {
         self.mm()?.unified_read_memory_partial_until_null_byte(self, addr, bytes)
     }

     fn read_memory_partial<'a>(
         &self,
         addr: UserAddress,
         bytes: &'a mut [MaybeUninit<u8>],
     ) -> Result<&'a mut [u8], Errno> {
         self.mm()?.unified_read_memory_partial(self, addr, bytes)
     }

     fn write_memory(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
         self.mm()?.unified_write_memory(self, addr, bytes)
     }

     fn write_memory_partial(&self, addr: UserAddress, bytes: &[u8]) -> Result<usize, Errno> {
         self.mm()?.unified_write_memory_partial(self, addr, bytes)
     }

     fn zero(&self, addr: UserAddress, length: usize) -> Result<usize, Errno> {
         self.mm()?.unified_zero(self, addr, length)
     }
 }

 impl TaskMemoryAccessor for CurrentTask {
     fn maximum_valid_address(&self) -> Option<UserAddress> {
         self.mm().ok().map(|mm| mm.maximum_valid_user_address)
     }
 }

 pub enum ExceptionResult {
     /// The exception was handled and no further action is required.
     Handled,

     // The exception generated a signal that should be delivered.
     Signal(SignalInfo),
 }

 #[cfg(test)]
 mod tests {
     use crate::task::FullCredentials;
     use crate::testing::spawn_kernel_and_run;

     // This test will run `override_creds` and check it doesn't crash. This ensures that the
     // delegation to `override_creds_async` is correct.
     #[::fuchsia::test]
     async fn test_override_creds_can_delegate_to_async_version() {
         spawn_kernel_and_run(async move |_, current_task| {
             assert_eq!(current_task.override_creds(FullCredentials::for_kernel(), || 0), 0);
         })
         .await;
     }
 }