blob: b89b162ff1c8bf34821bf7f9b650a48ad6543bf5 [file] [log] [blame]
// Copyright 2021 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use crate::device::DeviceMode;
use crate::mm::PAGE_SIZE;
use crate::security::{self, Auditable, PermissionFlags};
use crate::signals::{SignalInfo, send_standard_signal};
use crate::task::{CurrentTask, CurrentTaskAndLocked, WaitQueue, Waiter, register_delayed_release};
use crate::time::utc;
use crate::vfs::fsverity::FsVerityState;
use crate::vfs::pipe::{Pipe, PipeHandle};
use crate::vfs::rw_queue::{RwQueue, RwQueueReadGuard};
use crate::vfs::socket::SocketHandle;
use crate::vfs::{
DefaultDirEntryOps, DirEntryOps, FileObject, FileObjectState, FileOps, FileSystem,
FileSystemHandle, FileWriteGuardState, FsStr, FsString, MAX_LFS_FILESIZE, MountInfo,
NamespaceNode, OPathOps, RecordLockCommand, RecordLockOwner, RecordLocks, WeakFileHandle,
checked_add_offset_and_length, inotify,
};
use bitflags::bitflags;
use fuchsia_runtime::UtcInstant;
use linux_uapi::{XATTR_SECURITY_PREFIX, XATTR_SYSTEM_PREFIX, XATTR_TRUSTED_PREFIX};
use once_cell::race::OnceBool;
use starnix_crypt::EncryptionKeyId;
use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
use starnix_logging::{log_error, track_stub};
use starnix_sync::{
BeforeFsNodeAppend, FileOpsCore, FsNodeAppend, LockBefore, LockEqualOrBefore, Locked, Mutex,
RwLock, RwLockReadGuard, Unlocked,
};
use starnix_types::ownership::{Releasable, ReleaseGuard};
use starnix_types::time::{NANOS_PER_SECOND, timespec_from_time};
use starnix_uapi::as_any::AsAny;
use starnix_uapi::auth::{
CAP_CHOWN, CAP_DAC_OVERRIDE, CAP_DAC_READ_SEARCH, CAP_FOWNER, CAP_FSETID, CAP_MKNOD,
CAP_SYS_ADMIN, CAP_SYS_RESOURCE, FsCred, UserAndOrGroupId,
};
use starnix_uapi::device_type::DeviceType;
use starnix_uapi::errors::{EACCES, ENOTSUP, EPERM, Errno};
use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
use starnix_uapi::inotify_mask::InotifyMask;
use starnix_uapi::mount_flags::MountFlags;
use starnix_uapi::open_flags::OpenFlags;
use starnix_uapi::resource_limits::Resource;
use starnix_uapi::seal_flags::SealFlags;
use starnix_uapi::signals::SIGXFSZ;
use starnix_uapi::{
FALLOC_FL_COLLAPSE_RANGE, FALLOC_FL_INSERT_RANGE, FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE,
FALLOC_FL_UNSHARE_RANGE, FALLOC_FL_ZERO_RANGE, LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN,
STATX__RESERVED, STATX_ATIME, STATX_ATTR_VERITY, STATX_BASIC_STATS, STATX_BLOCKS, STATX_CTIME,
STATX_GID, STATX_INO, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_UID, XATTR_USER_PREFIX,
errno, error, fsverity_descriptor, gid_t, ino_t, statx, statx_timestamp, timespec, uapi, uid_t,
};
use std::sync::atomic::Ordering;
use std::sync::{Arc, OnceLock, Weak};
use syncio::zxio_node_attr_has_t;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FsNodeLinkBehavior {
Allowed,
Disallowed,
}
impl Default for FsNodeLinkBehavior {
fn default() -> Self {
FsNodeLinkBehavior::Allowed
}
}
pub enum AppendLockGuard<'a> {
Read(RwQueueReadGuard<'a, FsNodeAppend>),
AlreadyLocked(&'a AppendLockGuard<'a>),
}
pub trait AppendLockStrategy<L> {
/// Helper method for acquiring append lock in `truncate`/`allocate`. Acquires the lock when it's not already acquired.
fn lock<'a>(
&'a self,
locked: &'a mut Locked<L>,
current_task: &CurrentTask,
node: &'a FsNode,
) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno>;
}
struct RealAppendLockStrategy {}
impl AppendLockStrategy<BeforeFsNodeAppend> for RealAppendLockStrategy {
fn lock<'a>(
&'a self,
locked: &'a mut Locked<BeforeFsNodeAppend>,
current_task: &CurrentTask,
node: &'a FsNode,
) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno> {
let (guard, new_locked) = node.ops().append_lock_read(locked, node, current_task)?;
Ok((AppendLockGuard::Read(guard), new_locked.cast_locked()))
}
}
pub struct AlreadyLockedAppendLockStrategy<'a> {
// Keep the reference to the guard, which will be returned in subsequent attempts to acquire this lock.
guard: &'a AppendLockGuard<'a>,
}
impl<'a> AlreadyLockedAppendLockStrategy<'a> {
pub fn new(guard: &'a AppendLockGuard<'a>) -> Self {
Self { guard }
}
}
impl AppendLockStrategy<FileOpsCore> for AlreadyLockedAppendLockStrategy<'_> {
fn lock<'a>(
&'a self,
locked: &'a mut Locked<FileOpsCore>,
_current_task: &CurrentTask,
_node: &'a FsNode,
) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno> {
Ok((AppendLockGuard::AlreadyLocked(self.guard), locked.cast_locked::<FileOpsCore>()))
}
}
pub struct FsNode {
/// The inode number for this FsNode.
pub ino: ino_t,
/// The FsNodeOps for this FsNode.
///
/// The FsNodeOps are implemented by the individual file systems to provide
/// specific behaviors for this FsNode.
ops: Box<dyn FsNodeOps>,
/// The FileSystem that owns this FsNode's tree.
fs: Weak<FileSystem>,
/// A RwLock to synchronize append operations for this node.
///
/// FileObjects writing with O_APPEND should grab a write() lock on this
/// field to ensure they operate sequentially. FileObjects writing without
/// O_APPEND should grab read() lock so that they can operate in parallel.
pub append_lock: RwQueue<FsNodeAppend>,
/// Mutable information about this node.
///
/// This data is used to populate the uapi::stat structure.
info: RwLock<FsNodeInfo>,
/// Data associated with an FsNode that is rarely needed.
rare_data: OnceLock<Box<FsNodeRareData>>,
/// Tracks lock state for this file.
pub write_guard_state: Mutex<FileWriteGuardState>,
/// Cached FsVerity state associated with this node.
pub fsverity: Mutex<FsVerityState>,
/// The security state associated with this node. Must always be acquired last
/// relative to other `FsNode` locks.
pub security_state: security::FsNodeState,
}
#[derive(Default)]
struct FsNodeRareData {
/// The pipe located at this node, if any.
///
/// Used if, and only if, the node has a mode of FileMode::IFIFO.
fifo: OnceLock<PipeHandle>,
/// The UNIX domain socket bound to this node, if any.
bound_socket: OnceLock<SocketHandle>,
/// Information about the locking information on this node.
///
/// No other lock on this object may be taken while this lock is held.
flock_info: Mutex<FlockInfo>,
/// Records locks associated with this node.
record_locks: RecordLocks,
/// Whether this node can be linked into a directory.
///
/// Only set for nodes created with `O_TMPFILE`.
link_behavior: OnceLock<FsNodeLinkBehavior>,
/// Inotify watchers on this node. See inotify(7).
watchers: inotify::InotifyWatchers,
}
impl FsNodeRareData {
fn ensure_fifo(&self, current_task: &CurrentTask) -> &PipeHandle {
self.fifo.get_or_init(|| {
let mut default_pipe_capacity = (*PAGE_SIZE * 16) as usize;
if !security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE) {
let kernel = current_task.kernel();
let max_size = kernel.system_limits.pipe_max_size.load(Ordering::Relaxed);
default_pipe_capacity = std::cmp::min(default_pipe_capacity, max_size);
}
Pipe::new(default_pipe_capacity)
})
}
}
pub enum FsNodeReleaserAction {}
impl ReleaserAction<FsNode> for FsNodeReleaserAction {
fn release(fs_node: ReleaseGuard<FsNode>) {
register_delayed_release(fs_node);
}
}
pub type FsNodeReleaser = ObjectReleaser<FsNode, FsNodeReleaserAction>;
pub type FsNodeHandle = Arc<FsNodeReleaser>;
pub type WeakFsNodeHandle = Weak<FsNodeReleaser>;
#[derive(Debug, Default, Clone, PartialEq)]
pub struct FsNodeInfo {
pub mode: FileMode,
pub link_count: usize,
pub uid: uid_t,
pub gid: gid_t,
pub rdev: DeviceType,
pub size: usize,
pub blksize: usize,
pub blocks: usize,
pub time_status_change: UtcInstant,
pub time_access: UtcInstant,
pub time_modify: UtcInstant,
pub casefold: bool,
// If this node is fscrypt encrypted, stores the id of the user wrapping key used to encrypt it.
pub wrapping_key_id: Option<[u8; 16]>,
// Used to indicate to filesystems that manage timestamps that an access has occurred and to
// update the node's atime.
// This only impacts accesses within Starnix. Most Fuchsia programs are not expected to maintain
// access times. If the file handle is transferred out of Starnix, there may be inconsistencies.
pub pending_time_access_update: bool,
}
impl FsNodeInfo {
pub fn new(mode: FileMode, owner: FsCred) -> Self {
let now = utc::utc_now();
Self {
mode,
link_count: if mode.is_dir() { 2 } else { 1 },
uid: owner.uid,
gid: owner.gid,
blksize: DEFAULT_BYTES_PER_BLOCK,
time_status_change: now,
time_access: now,
time_modify: now,
..Default::default()
}
}
pub fn storage_size(&self) -> usize {
self.blksize.saturating_mul(self.blocks)
}
pub fn chmod(&mut self, mode: FileMode) {
self.mode = (self.mode & !FileMode::PERMISSIONS) | (mode & FileMode::PERMISSIONS);
}
pub fn chown(&mut self, owner: Option<uid_t>, group: Option<gid_t>) {
if let Some(owner) = owner {
self.uid = owner;
}
if let Some(group) = group {
self.gid = group;
}
// Clear the setuid and setgid bits if the file is executable and a regular file.
if self.mode.is_reg() {
self.mode &= !FileMode::ISUID;
self.clear_sgid_bit();
}
}
fn clear_sgid_bit(&mut self) {
// If the group execute bit is not set, the setgid bit actually indicates mandatory
// locking and should not be cleared.
if self.mode.intersects(FileMode::IXGRP) {
self.mode &= !FileMode::ISGID;
}
}
fn clear_suid_and_sgid_bits(&mut self) {
self.mode &= !FileMode::ISUID;
self.clear_sgid_bit();
}
pub fn cred(&self) -> FsCred {
FsCred { uid: self.uid, gid: self.gid }
}
pub fn suid_and_sgid(
&self,
current_task: &CurrentTask,
fs_node: &FsNode,
) -> Result<UserAndOrGroupId, Errno> {
let uid = self.mode.contains(FileMode::ISUID).then_some(self.uid);
// See <https://man7.org/linux/man-pages/man7/inode.7.html>:
//
// For an executable file, the set-group-ID bit causes the
// effective group ID of a process that executes the file to change
// as described in execve(2). For a file that does not have the
// group execution bit (S_IXGRP) set, the set-group-ID bit indicates
// mandatory file/record locking.
let gid = self.mode.contains(FileMode::ISGID | FileMode::IXGRP).then_some(self.gid);
let maybe_set_id = UserAndOrGroupId { uid, gid };
if maybe_set_id.is_some() {
// Check that uid and gid actually have execute access before
// returning them as the SUID or SGID.
check_access(
fs_node,
current_task,
security::PermissionFlags::EXEC,
self.uid,
self.gid,
self.mode,
)?;
}
Ok(maybe_set_id)
}
}
#[derive(Default)]
struct FlockInfo {
/// Whether the node is currently locked. The meaning of the different values are:
/// - `None`: The node is not locked.
/// - `Some(false)`: The node is locked non exclusively.
/// - `Some(true)`: The node is locked exclusively.
locked_exclusive: Option<bool>,
/// The FileObject that hold the lock.
locking_handles: Vec<WeakFileHandle>,
/// The queue to notify process waiting on the lock.
wait_queue: WaitQueue,
}
impl FlockInfo {
/// Removes all file handle not holding `predicate` from the list of object holding the lock. If
/// this empties the list, unlocks the node and notifies all waiting processes.
pub fn retain<F>(&mut self, predicate: F)
where
F: Fn(&FileObject) -> bool,
{
if !self.locking_handles.is_empty() {
self.locking_handles
.retain(|w| if let Some(fh) = w.upgrade() { predicate(&fh) } else { false });
if self.locking_handles.is_empty() {
self.locked_exclusive = None;
self.wait_queue.notify_all();
}
}
}
}
/// `st_blksize` is measured in units of 512 bytes.
pub const DEFAULT_BYTES_PER_BLOCK: usize = 512;
pub struct FlockOperation {
operation: u32,
}
impl FlockOperation {
pub fn from_flags(operation: u32) -> Result<Self, Errno> {
if operation & !(LOCK_SH | LOCK_EX | LOCK_UN | LOCK_NB) != 0 {
return error!(EINVAL);
}
if [LOCK_SH, LOCK_EX, LOCK_UN].iter().filter(|&&o| operation & o == o).count() != 1 {
return error!(EINVAL);
}
Ok(Self { operation })
}
pub fn is_unlock(&self) -> bool {
self.operation & LOCK_UN > 0
}
pub fn is_lock_exclusive(&self) -> bool {
self.operation & LOCK_EX > 0
}
pub fn is_blocking(&self) -> bool {
self.operation & LOCK_NB == 0
}
}
impl FileObject {
/// Advisory locking.
///
/// See flock(2).
pub fn flock(
&self,
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
operation: FlockOperation,
) -> Result<(), Errno> {
if self.flags().contains(OpenFlags::PATH) {
return error!(EBADF);
}
loop {
let mut flock_info = self.name.entry.node.ensure_rare_data().flock_info.lock();
if operation.is_unlock() {
flock_info.retain(|fh| !std::ptr::eq(fh, self));
return Ok(());
}
// Operation is a locking operation.
// 1. File is not locked
if flock_info.locked_exclusive.is_none() {
flock_info.locked_exclusive = Some(operation.is_lock_exclusive());
flock_info.locking_handles.push(self.weak_handle.clone());
return Ok(());
}
let file_lock_is_exclusive = flock_info.locked_exclusive == Some(true);
let fd_has_lock = flock_info
.locking_handles
.iter()
.find_map(|w| {
w.upgrade().and_then(|fh| {
if std::ptr::eq(&fh as &FileObject, self) { Some(()) } else { None }
})
})
.is_some();
// 2. File is locked, but fd already have a lock
if fd_has_lock {
if operation.is_lock_exclusive() == file_lock_is_exclusive {
// Correct lock is already held, return.
return Ok(());
} else {
// Incorrect lock is held. Release the lock and loop back to try to reacquire
// it. flock doesn't guarantee atomic lock type switching.
flock_info.retain(|fh| !std::ptr::eq(fh, self));
continue;
}
}
// 3. File is locked, and fd doesn't have a lock.
if !file_lock_is_exclusive && !operation.is_lock_exclusive() {
// The lock is not exclusive, let's grab it.
flock_info.locking_handles.push(self.weak_handle.clone());
return Ok(());
}
// 4. The operation cannot be done at this time.
if !operation.is_blocking() {
return error!(EAGAIN);
}
// Register a waiter to be notified when the lock is released. Release the lock on
// FlockInfo, and wait.
let waiter = Waiter::new();
flock_info.wait_queue.wait_async(&waiter);
std::mem::drop(flock_info);
waiter.wait(locked, current_task)?;
}
}
}
// The inner mod is required because bitflags cannot pass the attribute through to the single
// variant, and attributes cannot be applied to macro invocations.
mod inner_flags {
// Part of the code for the AT_STATX_SYNC_AS_STAT case that's produced by the macro triggers the
// lint, but as a whole, the produced code is still correct.
#![allow(clippy::bad_bit_mask)] // TODO(b/303500202) Remove once addressed in bitflags.
use super::{bitflags, uapi};
bitflags! {
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct StatxFlags: u32 {
const AT_SYMLINK_NOFOLLOW = uapi::AT_SYMLINK_NOFOLLOW;
const AT_EMPTY_PATH = uapi::AT_EMPTY_PATH;
const AT_NO_AUTOMOUNT = uapi::AT_NO_AUTOMOUNT;
const AT_STATX_SYNC_AS_STAT = uapi::AT_STATX_SYNC_AS_STAT;
const AT_STATX_FORCE_SYNC = uapi::AT_STATX_FORCE_SYNC;
const AT_STATX_DONT_SYNC = uapi::AT_STATX_DONT_SYNC;
const STATX_ATTR_VERITY = uapi::STATX_ATTR_VERITY;
}
}
}
pub use inner_flags::StatxFlags;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum UnlinkKind {
/// Unlink a directory.
Directory,
/// Unlink a non-directory.
NonDirectory,
}
pub enum SymlinkTarget {
Path(FsString),
Node(NamespaceNode),
}
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum XattrOp {
/// Set the value of the extended attribute regardless of whether it exists.
Set,
/// Create a new extended attribute. Fail if it already exists.
Create,
/// Replace the value of the extended attribute. Fail if it doesn't exist.
Replace,
}
impl XattrOp {
pub fn into_flags(self) -> u32 {
match self {
Self::Set => 0,
Self::Create => uapi::XATTR_CREATE,
Self::Replace => uapi::XATTR_REPLACE,
}
}
}
/// Returns a value, or the size required to contains it.
#[derive(Clone, Debug, PartialEq)]
pub enum ValueOrSize<T> {
Value(T),
Size(usize),
}
impl<T> ValueOrSize<T> {
pub fn map<F, U>(self, f: F) -> ValueOrSize<U>
where
F: FnOnce(T) -> U,
{
match self {
Self::Size(s) => ValueOrSize::Size(s),
Self::Value(v) => ValueOrSize::Value(f(v)),
}
}
#[cfg(test)]
pub fn unwrap(self) -> T {
match self {
Self::Size(_) => panic!("Unwrap ValueOrSize that is a Size"),
Self::Value(v) => v,
}
}
}
impl<T> From<T> for ValueOrSize<T> {
fn from(t: T) -> Self {
Self::Value(t)
}
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
pub enum FallocMode {
Allocate { keep_size: bool },
PunchHole,
Collapse,
Zero { keep_size: bool },
InsertRange,
UnshareRange,
}
impl FallocMode {
pub fn from_bits(mode: u32) -> Option<Self> {
// `fallocate()` allows only the following values for `mode`.
if mode == 0 {
Some(Self::Allocate { keep_size: false })
} else if mode == FALLOC_FL_KEEP_SIZE {
Some(Self::Allocate { keep_size: true })
} else if mode == FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE {
Some(Self::PunchHole)
} else if mode == FALLOC_FL_COLLAPSE_RANGE {
Some(Self::Collapse)
} else if mode == FALLOC_FL_ZERO_RANGE {
Some(Self::Zero { keep_size: false })
} else if mode == FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE {
Some(Self::Zero { keep_size: true })
} else if mode == FALLOC_FL_INSERT_RANGE {
Some(Self::InsertRange)
} else if mode == FALLOC_FL_UNSHARE_RANGE {
Some(Self::UnshareRange)
} else {
None
}
}
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum CheckAccessReason {
Access,
Chdir,
Chroot,
Exec,
ChangeTimestamps { now: bool },
InternalPermissionChecks,
}
pub trait FsNodeOps: Send + Sync + AsAny + 'static {
/// Delegate the access check to the node.
fn check_access(
&self,
_locked: &mut Locked<FileOpsCore>,
node: &FsNode,
current_task: &CurrentTask,
access: security::PermissionFlags,
info: &RwLock<FsNodeInfo>,
reason: CheckAccessReason,
audit_context: security::Auditable<'_>,
) -> Result<(), Errno> {
node.default_check_access_impl(current_task, access, reason, info.read(), audit_context)
}
/// Build the [`DirEntryOps`] for a new [`DirEntry`] that will be associated
/// to this node.
fn create_dir_entry_ops(&self) -> Box<dyn DirEntryOps> {
Box::new(DefaultDirEntryOps)
}
/// Build the `FileOps` for the file associated to this node.
///
/// The returned FileOps will be used to create a FileObject, which might
/// be assigned an FdNumber.
fn create_file_ops(
&self,
locked: &mut Locked<FileOpsCore>,
node: &FsNode,
_current_task: &CurrentTask,
flags: OpenFlags,
) -> Result<Box<dyn FileOps>, Errno>;
/// Find an existing child node and populate the child parameter. Return the node.
///
/// The child parameter is an empty node. Operations other than initialize may panic before
/// initialize is called.
fn lookup(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
name: &FsStr,
) -> Result<FsNodeHandle, Errno> {
// The default implementation here is suitable for filesystems that have permanent entries;
// entries that already exist will get found in the cache and shouldn't get this far.
error!(ENOENT, format!("looking for {name}"))
}
/// Create and return the given child node.
///
/// The mode field of the FsNodeInfo indicates what kind of child to
/// create.
///
/// This function is never called with FileMode::IFDIR. The mkdir function
/// is used to create directories instead.
fn mknod(
&self,
locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_mode: FileMode,
_dev: DeviceType,
_owner: FsCred,
) -> Result<FsNodeHandle, Errno>;
/// Create and return the given child node as a subdirectory.
fn mkdir(
&self,
locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_mode: FileMode,
_owner: FsCred,
) -> Result<FsNodeHandle, Errno>;
/// Creates a symlink with the given `target` path.
fn create_symlink(
&self,
locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_target: &FsStr,
_owner: FsCred,
) -> Result<FsNodeHandle, Errno>;
/// Creates an anonymous file.
///
/// The FileMode::IFMT of the FileMode is always FileMode::IFREG.
///
/// Used by O_TMPFILE.
fn create_tmpfile(
&self,
_node: &FsNode,
_current_task: &CurrentTask,
_mode: FileMode,
_owner: FsCred,
) -> Result<FsNodeHandle, Errno> {
error!(EOPNOTSUPP)
}
/// Reads the symlink from this node.
fn readlink(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
) -> Result<SymlinkTarget, Errno> {
error!(EINVAL)
}
/// Create a hard link with the given name to the given child.
fn link(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_child: &FsNodeHandle,
) -> Result<(), Errno> {
error!(EPERM)
}
/// Remove the child with the given name, if the child exists.
///
/// The UnlinkKind parameter indicates whether the caller intends to unlink
/// a directory or a non-directory child.
fn unlink(
&self,
locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_child: &FsNodeHandle,
) -> Result<(), Errno>;
/// Acquire the necessary append lock for the operations that depend on them.
/// Should be done before calling `allocate` or `truncate` to avoid lock ordering issues.
fn append_lock_read<'a>(
&'a self,
locked: &'a mut Locked<BeforeFsNodeAppend>,
node: &'a FsNode,
current_task: &CurrentTask,
) -> Result<(RwQueueReadGuard<'a, FsNodeAppend>, &'a mut Locked<FsNodeAppend>), Errno> {
return node.append_lock.read_and(locked, current_task);
}
/// Change the length of the file.
fn truncate(
&self,
_locked: &mut Locked<FileOpsCore>,
_guard: &AppendLockGuard<'_>,
_node: &FsNode,
_current_task: &CurrentTask,
_length: u64,
) -> Result<(), Errno> {
error!(EINVAL)
}
/// Manipulate allocated disk space for the file.
fn allocate(
&self,
_locked: &mut Locked<FileOpsCore>,
_guard: &AppendLockGuard<'_>,
_node: &FsNode,
_current_task: &CurrentTask,
_mode: FallocMode,
_offset: u64,
_length: u64,
) -> Result<(), Errno> {
error!(EINVAL)
}
/// Update the supplied info with initial state (e.g. size) for the node.
///
/// FsNode calls this method when created, to allow the FsNodeOps to
/// set appropriate initial values in the FsNodeInfo.
fn initial_info(&self, _info: &mut FsNodeInfo) {}
/// Update node.info as needed.
///
/// FsNode calls this method before converting the FsNodeInfo struct into
/// the uapi::stat struct to give the file system a chance to update this data
/// before it is used by clients.
///
/// File systems that keep the FsNodeInfo up-to-date do not need to
/// override this function.
///
/// Return a read guard for the updated information.
fn fetch_and_refresh_info<'a>(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
info: &'a RwLock<FsNodeInfo>,
) -> Result<RwLockReadGuard<'a, FsNodeInfo>, Errno> {
Ok(info.read())
}
/// Update node attributes persistently.
fn update_attributes(
&self,
_locked: &mut Locked<FileOpsCore>,
_current_task: &CurrentTask,
_info: &FsNodeInfo,
_has: zxio_node_attr_has_t,
) -> Result<(), Errno> {
Ok(())
}
/// Get an extended attribute on the node.
///
/// An implementation can systematically return a value. Otherwise, if `max_size` is 0, it can
/// instead return the size of the attribute, and can return an ERANGE error if max_size is not
/// 0, and lesser than the required size.
fn get_xattr(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_max_size: usize,
) -> Result<ValueOrSize<FsString>, Errno> {
error!(ENOTSUP)
}
/// Set an extended attribute on the node.
fn set_xattr(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
_value: &FsStr,
_op: XattrOp,
) -> Result<(), Errno> {
error!(ENOTSUP)
}
fn remove_xattr(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_name: &FsStr,
) -> Result<(), Errno> {
error!(ENOTSUP)
}
/// An implementation can systematically return a value. Otherwise, if `max_size` is 0, it can
/// instead return the size of the 0 separated string needed to represent the value, and can
/// return an ERANGE error if max_size is not 0, and lesser than the required size.
fn list_xattrs(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_max_size: usize,
) -> Result<ValueOrSize<Vec<FsString>>, Errno> {
error!(ENOTSUP)
}
/// Called when the FsNode is freed by the Kernel.
fn forget(
self: Box<Self>,
_locked: &mut Locked<FileOpsCore>,
_current_task: &CurrentTask,
_info: FsNodeInfo,
) -> Result<(), Errno> {
Ok(())
}
////////////////////
// FS-Verity operations
/// Marks that FS-Verity is being built. Writes fsverity descriptor and merkle tree, the latter
/// computed by the filesystem.
/// This should ensure there are no writable file handles. Returns EEXIST if the file was
/// already fsverity-enabled. Returns EBUSY if this ioctl was already running on this file.
fn enable_fsverity(&self, _descriptor: &fsverity_descriptor) -> Result<(), Errno> {
error!(ENOTSUP)
}
/// Read fsverity descriptor, if the node is fsverity-enabled. Else returns ENODATA.
fn get_fsverity_descriptor(&self, _log_blocksize: u8) -> Result<fsverity_descriptor, Errno> {
error!(ENOTSUP)
}
/// Returns a descriptive name for this node, suitable to report to userspace in situations
/// where the node's path is unavailable (e.g. because it is anonymous, and has no path).
/// If no name is returned then a default name of the form "<class:[<node_id>]" will be used.
fn internal_name(&self, _node: &FsNode) -> Option<FsString> {
None
}
/// The key used to identify this node in the file system's node cache.
///
/// For many file systems, this will be the same as the inode number. However, some file
/// systems, such as FUSE, sometimes use different `node_key` and inode numbers.
fn node_key(&self, node: &FsNode) -> ino_t {
node.ino
}
}
impl<T> From<T> for Box<dyn FsNodeOps>
where
T: FsNodeOps,
{
fn from(ops: T) -> Box<dyn FsNodeOps> {
Box::new(ops)
}
}
/// Implements [`FsNodeOps`] methods in a way that makes sense for symlinks.
/// You must implement [`FsNodeOps::readlink`].
#[macro_export]
macro_rules! fs_node_impl_symlink {
() => {
$crate::vfs::fs_node_impl_not_dir!();
fn create_file_ops(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
node: &$crate::vfs::FsNode,
_current_task: &CurrentTask,
_flags: starnix_uapi::open_flags::OpenFlags,
) -> Result<Box<dyn $crate::vfs::FileOps>, starnix_uapi::errors::Errno> {
assert!(node.is_lnk());
unreachable!("Symlink nodes cannot be opened.");
}
};
}
#[macro_export]
macro_rules! fs_node_impl_dir_readonly {
() => {
fn check_access(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
node: &$crate::vfs::FsNode,
current_task: &$crate::task::CurrentTask,
permission_flags: $crate::security::PermissionFlags,
info: &starnix_sync::RwLock<$crate::vfs::FsNodeInfo>,
reason: $crate::vfs::CheckAccessReason,
audit_context: $crate::security::Auditable<'_>,
) -> Result<(), starnix_uapi::errors::Errno> {
let access = permission_flags.as_access();
if access.contains(starnix_uapi::file_mode::Access::WRITE) {
return starnix_uapi::error!(
EROFS,
format!("check_access failed: read-only directory")
);
}
node.default_check_access_impl(
current_task,
permission_flags,
reason,
info.read(),
audit_context,
)
}
fn mkdir(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
name: &$crate::vfs::FsStr,
_mode: starnix_uapi::file_mode::FileMode,
_owner: starnix_uapi::auth::FsCred,
) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
starnix_uapi::error!(EROFS, format!("mkdir failed: {:?}", name))
}
fn mknod(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
name: &$crate::vfs::FsStr,
_mode: starnix_uapi::file_mode::FileMode,
_dev: starnix_uapi::device_type::DeviceType,
_owner: starnix_uapi::auth::FsCred,
) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
starnix_uapi::error!(EROFS, format!("mknod failed: {:?}", name))
}
fn create_symlink(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
name: &$crate::vfs::FsStr,
_target: &$crate::vfs::FsStr,
_owner: starnix_uapi::auth::FsCred,
) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
starnix_uapi::error!(EROFS, format!("symlink failed: {:?}", name))
}
fn link(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
name: &$crate::vfs::FsStr,
_child: &$crate::vfs::FsNodeHandle,
) -> Result<(), starnix_uapi::errors::Errno> {
starnix_uapi::error!(EROFS, format!("link failed: {:?}", name))
}
fn unlink(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
name: &$crate::vfs::FsStr,
_child: &$crate::vfs::FsNodeHandle,
) -> Result<(), starnix_uapi::errors::Errno> {
starnix_uapi::error!(EROFS, format!("unlink failed: {:?}", name))
}
};
}
/// Trait that objects can implement if they need to handle extended attribute storage. Allows
/// delegating extended attribute operations in [`FsNodeOps`] to another object.
///
/// See [`fs_node_impl_xattr_delegate`] for usage details.
pub trait XattrStorage {
/// Delegate for [`FsNodeOps::get_xattr`].
fn get_xattr(&self, locked: &mut Locked<FileOpsCore>, name: &FsStr) -> Result<FsString, Errno>;
/// Delegate for [`FsNodeOps::set_xattr`].
fn set_xattr(
&self,
locked: &mut Locked<FileOpsCore>,
name: &FsStr,
value: &FsStr,
op: XattrOp,
) -> Result<(), Errno>;
/// Delegate for [`FsNodeOps::remove_xattr`].
fn remove_xattr(&self, locked: &mut Locked<FileOpsCore>, name: &FsStr) -> Result<(), Errno>;
/// Delegate for [`FsNodeOps::list_xattrs`].
fn list_xattrs(&self, locked: &mut Locked<FileOpsCore>) -> Result<Vec<FsString>, Errno>;
}
/// Implements extended attribute ops for [`FsNodeOps`] by delegating to another object which
/// implements the [`XattrStorage`] trait or a similar interface. For example:
///
/// ```
/// struct Xattrs {}
///
/// impl XattrStorage for Xattrs {
/// // implement XattrStorage
/// }
///
/// struct Node {
/// xattrs: Xattrs
/// }
///
/// impl FsNodeOps for Node {
/// // Delegate extended attribute ops in FsNodeOps to self.xattrs
/// fs_node_impl_xattr_delegate!(self, self.xattrs);
///
/// // add other FsNodeOps impls here
/// }
/// ```
#[macro_export]
macro_rules! fs_node_impl_xattr_delegate {
($self:ident, $delegate:expr) => {
fn get_xattr(
&$self,
locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
name: &$crate::vfs::FsStr,
_size: usize,
) -> Result<$crate::vfs::ValueOrSize<$crate::vfs::FsString>, starnix_uapi::errors::Errno> {
Ok($delegate.get_xattr(locked, name)?.into())
}
fn set_xattr(
&$self,
locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
name: &$crate::vfs::FsStr,
value: &$crate::vfs::FsStr,
op: $crate::vfs::XattrOp,
) -> Result<(), starnix_uapi::errors::Errno> {
$delegate.set_xattr(locked, name, value, op)
}
fn remove_xattr(
&$self,
locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
name: &$crate::vfs::FsStr,
) -> Result<(), starnix_uapi::errors::Errno> {
$delegate.remove_xattr(locked, name)
}
fn list_xattrs(
&$self,
locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_size: usize,
) -> Result<$crate::vfs::ValueOrSize<Vec<$crate::vfs::FsString>>, starnix_uapi::errors::Errno> {
Ok($delegate.list_xattrs(locked)?.into())
}
};
}
/// Stubs out [`FsNodeOps`] methods that only apply to directories.
#[macro_export]
macro_rules! fs_node_impl_not_dir {
() => {
fn lookup(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
_name: &$crate::vfs::FsStr,
) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
starnix_uapi::error!(ENOTDIR)
}
fn mknod(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
_name: &$crate::vfs::FsStr,
_mode: starnix_uapi::file_mode::FileMode,
_dev: starnix_uapi::device_type::DeviceType,
_owner: starnix_uapi::auth::FsCred,
) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
starnix_uapi::error!(ENOTDIR)
}
fn mkdir(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
_name: &$crate::vfs::FsStr,
_mode: starnix_uapi::file_mode::FileMode,
_owner: starnix_uapi::auth::FsCred,
) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
starnix_uapi::error!(ENOTDIR)
}
fn create_symlink(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
_name: &$crate::vfs::FsStr,
_target: &$crate::vfs::FsStr,
_owner: starnix_uapi::auth::FsCred,
) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
starnix_uapi::error!(ENOTDIR)
}
fn unlink(
&self,
_locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
_node: &$crate::vfs::FsNode,
_current_task: &$crate::task::CurrentTask,
_name: &$crate::vfs::FsStr,
_child: &$crate::vfs::FsNodeHandle,
) -> Result<(), starnix_uapi::errors::Errno> {
starnix_uapi::error!(ENOTDIR)
}
};
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum TimeUpdateType {
Now,
Omit,
Time(UtcInstant),
}
// Public re-export of macros allows them to be used like regular rust items.
pub use {
fs_node_impl_dir_readonly, fs_node_impl_not_dir, fs_node_impl_symlink,
fs_node_impl_xattr_delegate,
};
pub struct SpecialNode;
impl FsNodeOps for SpecialNode {
fs_node_impl_not_dir!();
fn create_file_ops(
&self,
_locked: &mut Locked<FileOpsCore>,
_node: &FsNode,
_current_task: &CurrentTask,
_flags: OpenFlags,
) -> Result<Box<dyn FileOps>, Errno> {
unreachable!("Special nodes cannot be opened.");
}
}
impl FsNode {
/// Create a node without inserting it into the FileSystem node cache.
///
/// This is usually not what you want!
/// Only use if you're also using get_or_create_node, like ext4.
pub fn new_uncached(
ino: ino_t,
ops: impl Into<Box<dyn FsNodeOps>>,
fs: &FileSystemHandle,
info: FsNodeInfo,
) -> FsNodeHandle {
let ops = ops.into();
FsNodeHandle::new(Self::new_internal(ino, ops, Arc::downgrade(fs), info).into())
}
fn new_internal(
ino: ino_t,
ops: Box<dyn FsNodeOps>,
fs: Weak<FileSystem>,
info: FsNodeInfo,
) -> Self {
// Allow the FsNodeOps to populate initial info.
let info = {
let mut info = info;
ops.initial_info(&mut info);
info
};
// The linter will fail in non test mode as it will not see the lock check.
#[allow(clippy::let_and_return)]
{
let result = Self {
ino,
ops,
fs,
info: RwLock::new(info),
append_lock: Default::default(),
rare_data: Default::default(),
write_guard_state: Default::default(),
fsverity: Mutex::new(FsVerityState::None),
security_state: Default::default(),
};
#[cfg(any(test, debug_assertions))]
{
#[allow(
clippy::undocumented_unsafe_blocks,
reason = "Force documented unsafe blocks in Starnix"
)]
let locked = unsafe { Unlocked::new() };
let _l1 = result.append_lock.read_for_lock_ordering(locked);
let _l2 = result.info.read();
let _l3 = result.write_guard_state.lock();
let _l4 = result.fsverity.lock();
// TODO(https://fxbug.dev/367585803): Add lock levels to SELinux implementation.
let _l5 = result.security_state.lock();
}
result
}
}
pub fn fs(&self) -> FileSystemHandle {
self.fs.upgrade().expect("FileSystem did not live long enough")
}
pub fn ops(&self) -> &dyn FsNodeOps {
self.ops.as_ref()
}
/// Returns an error if this node is encrypted and locked. Does not require
/// fetch_and_refresh_info because FS_IOC_SET_ENCRYPTION_POLICY updates info and once a node is
/// encrypted, it remains encrypted forever.
pub fn fail_if_locked(&self, _current_task: &CurrentTask) -> Result<(), Errno> {
let node_info = self.info();
if let Some(wrapping_key_id) = node_info.wrapping_key_id {
let crypt_service = self.fs().crypt_service().ok_or_else(|| errno!(ENOKEY))?;
if !crypt_service.contains_key(EncryptionKeyId::from(wrapping_key_id)) {
return error!(ENOKEY);
}
}
Ok(())
}
/// Returns the `FsNode`'s `FsNodeOps` as a `&T`, or `None` if the downcast fails.
pub fn downcast_ops<T>(&self) -> Option<&T>
where
T: 'static,
{
self.ops().as_any().downcast_ref::<T>()
}
pub fn on_file_closed(&self, file: &FileObjectState) {
if let Some(rare_data) = self.rare_data.get() {
let mut flock_info = rare_data.flock_info.lock();
// This function will drop the flock from `file` because the `WeakFileHandle` for
// `file` will no longer upgrade to an `FileHandle`.
flock_info.retain(|_| true);
}
self.record_lock_release(RecordLockOwner::FileObject(file.id));
}
pub fn record_lock(
&self,
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
file: &FileObject,
cmd: RecordLockCommand,
flock: uapi::flock,
) -> Result<Option<uapi::flock>, Errno> {
self.ensure_rare_data().record_locks.lock(locked, current_task, file, cmd, flock)
}
/// Release all record locks acquired by the given owner.
pub fn record_lock_release(&self, owner: RecordLockOwner) {
if let Some(rare_data) = self.rare_data.get() {
rare_data.record_locks.release_locks(owner);
}
}
pub fn create_dir_entry_ops(&self) -> Box<dyn DirEntryOps> {
self.ops().create_dir_entry_ops()
}
pub fn create_file_ops<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
flags: OpenFlags,
) -> Result<Box<dyn FileOps>, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
let locked = locked.cast_locked::<FileOpsCore>();
self.ops().create_file_ops(locked, self, current_task, flags)
}
pub fn open(
&self,
locked: &mut Locked<Unlocked>,
current_task: &CurrentTask,
namespace_node: &NamespaceNode,
flags: OpenFlags,
access_check: AccessCheck,
) -> Result<Box<dyn FileOps>, Errno> {
// If O_PATH is set, there is no need to create a real FileOps because
// most file operations are disabled.
if flags.contains(OpenFlags::PATH) {
return Ok(Box::new(OPathOps::new()));
}
let access = access_check.resolve(flags);
if access.is_nontrivial() {
if flags.contains(OpenFlags::NOATIME) {
self.check_o_noatime_allowed(current_task)?;
}
// `flags` doesn't contain any information about the EXEC permission. Instead the syscalls
// used to execute a file (`sys_execve` and `sys_execveat`) call `open()` with the EXEC
// permission request in `access`.
let mut permission_flags = PermissionFlags::from(access);
// The `APPEND` flag exists only in `flags`, to modify the behaviour of
// `PermissionFlags::WRITE`
if flags.contains(OpenFlags::APPEND) {
permission_flags |= security::PermissionFlags::APPEND;
}
// TODO: https://fxbug.dev/455782510 - Remove this once non-open() checks are fully
// enforced.
permission_flags |= security::PermissionFlags::FOR_OPEN;
self.check_access(
locked,
current_task,
&namespace_node.mount,
permission_flags,
CheckAccessReason::InternalPermissionChecks,
namespace_node,
)?;
}
let (mode, rdev) = {
// Don't hold the info lock while calling into open_device or self.ops().
// TODO: The mode and rdev are immutable and shouldn't require a lock to read.
let info = self.info();
(info.mode, info.rdev)
};
match mode & FileMode::IFMT {
FileMode::IFCHR => {
if namespace_node.mount.flags().contains(MountFlags::NODEV) {
return error!(EACCES);
}
current_task.kernel().open_device(
locked,
current_task,
namespace_node,
flags,
rdev,
DeviceMode::Char,
)
}
FileMode::IFBLK => {
if namespace_node.mount.flags().contains(MountFlags::NODEV) {
return error!(EACCES);
}
current_task.kernel().open_device(
locked,
current_task,
namespace_node,
flags,
rdev,
DeviceMode::Block,
)
}
FileMode::IFIFO => Pipe::open(locked, current_task, self.fifo(current_task), flags),
// UNIX domain sockets can't be opened.
FileMode::IFSOCK => error!(ENXIO),
_ => self.create_file_ops(locked, current_task, flags),
}
}
pub fn lookup<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
) -> Result<FsNodeHandle, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
self.check_access(
locked,
current_task,
mount,
Access::EXEC,
CheckAccessReason::InternalPermissionChecks,
&[Auditable::Name(name), std::panic::Location::caller().into()],
)?;
let locked = locked.cast_locked::<FileOpsCore>();
self.ops().lookup(locked, self, current_task, name)
}
pub fn create_node<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
mut mode: FileMode,
dev: DeviceType,
mut owner: FsCred,
) -> Result<FsNodeHandle, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
assert!(mode & FileMode::IFMT != FileMode::EMPTY, "mknod called without node type.");
self.check_access(
locked,
current_task,
mount,
Access::WRITE,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Name(name),
)?;
if mode.is_reg() {
security::check_fs_node_create_access(current_task, self, mode, name)?;
} else if mode.is_dir() {
// Even though the man page for mknod(2) says that mknod "cannot be used to create
// directories" in starnix the mkdir syscall (`sys_mkdirat`) ends up calling
//create_node.
security::check_fs_node_mkdir_access(current_task, self, mode, name)?;
} else if !matches!(
mode.fmt(),
FileMode::IFCHR | FileMode::IFBLK | FileMode::IFIFO | FileMode::IFSOCK
) {
security::check_fs_node_mknod_access(current_task, self, mode, name, dev)?;
}
self.update_metadata_for_child(current_task, &mut mode, &mut owner);
let new_node = if mode.is_dir() {
let locked = locked.cast_locked::<FileOpsCore>();
self.ops().mkdir(locked, self, current_task, name, mode, owner)?
} else {
// https://man7.org/linux/man-pages/man2/mknod.2.html says on error EPERM:
//
// mode requested creation of something other than a regular
// file, FIFO (named pipe), or UNIX domain socket, and the
// caller is not privileged (Linux: does not have the
// CAP_MKNOD capability); also returned if the filesystem
// containing pathname does not support the type of node
// requested.
if !matches!(mode.fmt(), FileMode::IFREG | FileMode::IFIFO | FileMode::IFSOCK) {
security::check_task_capable(current_task, CAP_MKNOD)?;
}
let locked = locked.cast_locked::<FileOpsCore>();
self.ops().mknod(locked, self, current_task, name, mode, dev, owner)?
};
self.init_new_node_security_on_create(locked, current_task, &new_node, name)?;
Ok(new_node)
}
pub fn create_symlink<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
target: &FsStr,
owner: FsCred,
) -> Result<FsNodeHandle, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
self.check_access(
locked,
current_task,
mount,
Access::WRITE,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Name(name),
)?;
security::check_fs_node_symlink_access(current_task, self, name, target)?;
let locked = locked.cast_locked::<FileOpsCore>();
let new_node =
self.ops().create_symlink(locked, self, current_task, name, target, owner)?;
self.init_new_node_security_on_create(locked, current_task, &new_node, name)?;
Ok(new_node)
}
/// Requests that the LSM initialise a security label for the `new_node`, and optionally provide
/// an extended attribute to write to the file to persist it. If no LSM is enabled, no extended
/// attribute returned, or if the filesystem does not support extended attributes, then the call
/// returns success. All other failure modes return an `Errno` that should be early-returned.
fn init_new_node_security_on_create<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
new_node: &FsNode,
name: &FsStr,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
let locked = locked.cast_locked::<FileOpsCore>();
security::fs_node_init_on_create(current_task, &new_node, self, name)?
.map(|xattr| {
match new_node.ops().set_xattr(
locked,
&new_node,
current_task,
xattr.name,
xattr.value.as_slice().into(),
XattrOp::Create,
) {
Err(e) => {
if e.code == ENOTSUP {
// This should only occur if a task has an "fscreate" context set, and
// creates a new file in a filesystem that does not support xattrs.
Ok(())
} else {
Err(e)
}
}
result => result,
}
})
.unwrap_or_else(|| Ok(()))
}
pub fn create_tmpfile<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
mut mode: FileMode,
mut owner: FsCred,
link_behavior: FsNodeLinkBehavior,
) -> Result<FsNodeHandle, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
self.check_access(
locked,
current_task,
mount,
Access::WRITE,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Location(std::panic::Location::caller()),
)?;
self.update_metadata_for_child(current_task, &mut mode, &mut owner);
let node = self.ops().create_tmpfile(self, current_task, mode, owner)?;
self.init_new_node_security_on_create(locked, current_task, &node, "".into())?;
if link_behavior == FsNodeLinkBehavior::Disallowed {
node.ensure_rare_data().link_behavior.set(link_behavior).unwrap();
}
Ok(node)
}
// This method does not attempt to update the atime of the node.
// Use `NamespaceNode::readlink` which checks the mount flags and updates the atime accordingly.
pub fn readlink<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
) -> Result<SymlinkTarget, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
// TODO: 378864856 - Is there a permission check here other than security checks?
security::check_fs_node_read_link_access(current_task, self)?;
self.ops().readlink(locked.cast_locked::<FileOpsCore>(), self, current_task)
}
pub fn link<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
child: &FsNodeHandle,
) -> Result<FsNodeHandle, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
self.check_access(
locked,
current_task,
mount,
Access::WRITE,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Location(std::panic::Location::caller()),
)?;
if child.is_dir() {
return error!(EPERM);
}
if let Some(child_rare_data) = child.rare_data.get() {
if matches!(child_rare_data.link_behavior.get(), Some(FsNodeLinkBehavior::Disallowed)) {
return error!(ENOENT);
}
}
// Check that `current_task` has permission to create the hard link.
//
// See description of /proc/sys/fs/protected_hardlinks in
// https://man7.org/linux/man-pages/man5/proc.5.html for details of the security
// vulnerabilities.
//
let fsuid = current_task.with_current_creds(|creds| creds.fsuid);
let (child_uid, mode) = {
let info = child.info();
(info.uid, info.mode)
};
// Check that the the filesystem UID of the calling process (`current_task`) is the same as
// the UID of the existing file. The check can be bypassed if the calling process has
// `CAP_FOWNER` capability.
if child_uid != fsuid && !security::is_task_capable_noaudit(current_task, CAP_FOWNER) {
// If current_task is not the user of the existing file, it needs to have read and write
// access to the existing file.
child
.check_access(
locked,
current_task,
mount,
Access::READ | Access::WRITE,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Name(name),
)
.map_err(|e| {
// `check_access(..)` returns EACCES when the access rights doesn't match - change
// it to EPERM to match Linux standards.
if e == EACCES { errno!(EPERM) } else { e }
})?;
// There are also security issues that may arise when users link to setuid, setgid, or
// special files.
if mode.contains(FileMode::ISGID | FileMode::IXGRP) {
return error!(EPERM);
};
if mode.contains(FileMode::ISUID) {
return error!(EPERM);
};
if !mode.contains(FileMode::IFREG) {
return error!(EPERM);
};
}
security::check_fs_node_link_access(current_task, self, child)?;
let locked = locked.cast_locked::<FileOpsCore>();
self.ops().link(locked, self, current_task, name, child)?;
Ok(child.clone())
}
pub fn unlink<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
child: &FsNodeHandle,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
// The user must be able to search and write to the directory.
self.check_access(
locked,
current_task,
mount,
Access::EXEC | Access::WRITE,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Name(name),
)?;
self.check_sticky_bit(current_task, child)?;
if child.is_dir() {
security::check_fs_node_rmdir_access(current_task, self, child, name)?;
} else {
security::check_fs_node_unlink_access(current_task, self, child, name)?;
}
let locked = locked.cast_locked::<FileOpsCore>();
self.ops().unlink(locked, self, current_task, name, child)?;
self.update_ctime_mtime();
Ok(())
}
pub fn truncate<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
length: u64,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<BeforeFsNodeAppend>,
{
self.truncate_with_strategy(locked, RealAppendLockStrategy {}, current_task, mount, length)
}
pub fn truncate_with_strategy<L, M>(
&self,
locked: &mut Locked<L>,
strategy: impl AppendLockStrategy<M>,
current_task: &CurrentTask,
mount: &MountInfo,
length: u64,
) -> Result<(), Errno>
where
M: LockEqualOrBefore<FileOpsCore>,
L: LockEqualOrBefore<M>,
{
if self.is_dir() {
return error!(EISDIR);
}
{
let locked = locked.cast_locked::<M>();
self.check_access(
locked,
current_task,
mount,
Access::WRITE,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Location(std::panic::Location::caller()),
)?;
}
self.truncate_common(locked, strategy, current_task, length)
}
/// Avoid calling this method directly. You probably want to call `FileObject::ftruncate()`
/// which will also perform all file-descriptor based verifications.
pub fn ftruncate<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
length: u64,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<BeforeFsNodeAppend>,
{
if self.is_dir() {
// When truncating a file descriptor, if the descriptor references a directory,
// return EINVAL. This is different from the truncate() syscall which returns EISDIR.
//
// See https://man7.org/linux/man-pages/man2/ftruncate.2.html#ERRORS
return error!(EINVAL);
}
// For ftruncate, we do not need to check that the file node is writable.
//
// The file object that calls this method must verify that the file was opened
// with write permissions.
//
// This matters because a file could be opened with O_CREAT + O_RDWR + 0444 mode.
// The file descriptor returned from such an operation can be truncated, even
// though the file was created with a read-only mode.
//
// See https://man7.org/linux/man-pages/man2/ftruncate.2.html#DESCRIPTION
// which says:
//
// "With ftruncate(), the file must be open for writing; with truncate(),
// the file must be writable."
self.truncate_common(locked, RealAppendLockStrategy {}, current_task, length)
}
// Called by `truncate` and `ftruncate` above.
fn truncate_common<L, M>(
&self,
locked: &mut Locked<L>,
strategy: impl AppendLockStrategy<M>,
current_task: &CurrentTask,
length: u64,
) -> Result<(), Errno>
where
M: LockEqualOrBefore<FileOpsCore>,
L: LockEqualOrBefore<M>,
{
if length > MAX_LFS_FILESIZE as u64 {
return error!(EINVAL);
}
{
let locked = locked.cast_locked::<M>().cast_locked::<FileOpsCore>();
if length > current_task.thread_group().get_rlimit(locked, Resource::FSIZE) {
send_standard_signal(locked, current_task, SignalInfo::default(SIGXFSZ));
return error!(EFBIG);
}
}
let locked = locked.cast_locked::<M>();
self.clear_suid_and_sgid_bits(locked, current_task)?;
// We have to take the append lock since otherwise it would be possible to truncate and for
// an append to continue using the old size.
let (guard, locked) = strategy.lock(locked, current_task, self)?;
self.ops().truncate(locked, &guard, self, current_task, length)?;
self.update_ctime_mtime();
Ok(())
}
/// Avoid calling this method directly. You probably want to call `FileObject::fallocate()`
/// which will also perform additional verifications.
pub fn fallocate<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mode: FallocMode,
offset: u64,
length: u64,
) -> Result<(), Errno>
where
L: LockBefore<BeforeFsNodeAppend>,
{
self.fallocate_with_strategy(
locked,
RealAppendLockStrategy {},
current_task,
mode,
offset,
length,
)
}
pub fn fallocate_with_strategy<L, M>(
&self,
locked: &mut Locked<L>,
strategy: impl AppendLockStrategy<M>,
current_task: &CurrentTask,
mode: FallocMode,
offset: u64,
length: u64,
) -> Result<(), Errno>
where
M: LockEqualOrBefore<FileOpsCore>,
L: LockEqualOrBefore<M>,
{
let allocate_size = checked_add_offset_and_length(offset as usize, length as usize)
.map_err(|_| errno!(EFBIG))? as u64;
{
let locked = locked.cast_locked::<M>().cast_locked::<FileOpsCore>();
if allocate_size > current_task.thread_group().get_rlimit(locked, Resource::FSIZE) {
send_standard_signal(locked, current_task, SignalInfo::default(SIGXFSZ));
return error!(EFBIG);
}
}
let locked = locked.cast_locked::<M>();
self.clear_suid_and_sgid_bits(locked, current_task)?;
let (guard, locked) = strategy.lock(locked, current_task, self)?;
self.ops().allocate(locked, &guard, self, current_task, mode, offset, length)?;
self.update_ctime_mtime();
Ok(())
}
fn update_metadata_for_child(
&self,
current_task: &CurrentTask,
mode: &mut FileMode,
owner: &mut FsCred,
) {
// The setgid bit on a directory causes the gid to be inherited by new children and the
// setgid bit to be inherited by new child directories. See SetgidDirTest in gvisor.
{
let self_info = self.info();
if self_info.mode.contains(FileMode::ISGID) {
owner.gid = self_info.gid;
if mode.is_dir() {
*mode |= FileMode::ISGID;
}
}
}
if !mode.is_dir() {
// https://man7.org/linux/man-pages/man7/inode.7.html says:
//
// For an executable file, the set-group-ID bit causes the
// effective group ID of a process that executes the file to change
// as described in execve(2).
//
// We need to check whether the current task has permission to create such a file.
// See a similar check in `FsNode::chmod`.
let (fsgid, is_in_group) = current_task
.with_current_creds(|creds| (creds.fsgid, creds.is_in_group(owner.gid)));
if owner.gid != fsgid
&& !is_in_group
&& !security::is_task_capable_noaudit(current_task, CAP_FOWNER)
{
*mode &= !FileMode::ISGID;
}
}
}
/// Checks if O_NOATIME is allowed,
pub fn check_o_noatime_allowed(&self, current_task: &CurrentTask) -> Result<(), Errno> {
let fsuid = current_task.with_current_creds(|creds| creds.fsuid);
// Per open(2),
//
// O_NOATIME (since Linux 2.6.8)
// ...
//
// This flag can be employed only if one of the following
// conditions is true:
//
// * The effective UID of the process matches the owner UID
// of the file.
//
// * The calling process has the CAP_FOWNER capability in
// its user namespace and the owner UID of the file has a
// mapping in the namespace.
if fsuid != self.info().uid {
security::check_task_capable(current_task, CAP_FOWNER)?;
}
Ok(())
}
pub fn default_check_access_impl(
&self,
current_task: &CurrentTask,
permission_flags: security::PermissionFlags,
reason: CheckAccessReason,
info: RwLockReadGuard<'_, FsNodeInfo>,
audit_context: Auditable<'_>,
) -> Result<(), Errno> {
let (node_uid, node_gid, mode) = (info.uid, info.gid, info.mode);
std::mem::drop(info);
if let CheckAccessReason::ChangeTimestamps { now } = reason {
// To set the timestamps to the current time the caller must either have write access to
// the file, be the file owner, or hold the CAP_DAC_OVERRIDE or CAP_FOWNER capability.
// To set the timestamps to other values the caller must either be the file owner or hold
// the CAP_FOWNER capability.
let fsuid = current_task.with_current_creds(|creds| creds.fsuid);
if fsuid == node_uid {
return Ok(());
}
if now {
if security::is_task_capable_noaudit(current_task, CAP_FOWNER) {
return Ok(());
}
} else {
security::check_task_capable(current_task, CAP_FOWNER)?;
return Ok(());
}
}
check_access(self, current_task, permission_flags, node_uid, node_gid, mode)?;
security::fs_node_permission(current_task, self, permission_flags, audit_context)
}
/// Check whether the node can be accessed in the current context with the specified access
/// flags (read, write, or exec). Accounts for capabilities and whether the current user is the
/// owner or is in the file's group.
pub fn check_access<'a, L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
access: impl Into<security::PermissionFlags>,
reason: CheckAccessReason,
audit_context: impl Into<security::Auditable<'a>>,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
let mut permission_flags = access.into();
if permission_flags.contains(security::PermissionFlags::WRITE) {
mount.check_readonly_filesystem()?;
}
if permission_flags.contains(security::PermissionFlags::EXEC) && !self.is_dir() {
mount.check_noexec_filesystem()?;
}
if reason == CheckAccessReason::Access {
permission_flags |= PermissionFlags::ACCESS;
}
self.ops().check_access(
locked.cast_locked::<FileOpsCore>(),
self,
current_task,
permission_flags,
&self.info,
reason,
audit_context.into(),
)
}
/// Check whether the stick bit, `S_ISVTX`, forbids the `current_task` from removing the given
/// `child`. If this node has `S_ISVTX`, then either the child must be owned by the `fsuid` of
/// `current_task` or `current_task` must have `CAP_FOWNER`.
pub fn check_sticky_bit(
&self,
current_task: &CurrentTask,
child: &FsNodeHandle,
) -> Result<(), Errno> {
let fsuid = current_task.with_current_creds(|creds| creds.fsuid);
if self.info().mode.contains(FileMode::ISVTX) && child.info().uid != fsuid {
security::check_task_capable(current_task, CAP_FOWNER)?;
}
Ok(())
}
pub fn fifo(&self, current_task: &CurrentTask) -> &PipeHandle {
assert!(self.is_fifo());
self.ensure_rare_data().ensure_fifo(current_task)
}
/// Returns the UNIX domain socket bound to this node, if any.
pub fn bound_socket(&self) -> Option<&SocketHandle> {
if let Some(rare_data) = self.rare_data.get() { rare_data.bound_socket.get() } else { None }
}
/// Register the provided socket as the UNIX domain socket bound to this node.
///
/// It is a fatal error to call this method again if it has already been called on this node.
pub fn set_bound_socket(&self, socket: SocketHandle) {
assert!(self.ensure_rare_data().bound_socket.set(socket).is_ok());
}
pub fn update_attributes<L, F>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mutator: F,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
F: FnOnce(&mut FsNodeInfo) -> Result<(), Errno>,
{
let mut info = self.info.write();
let mut new_info = info.clone();
mutator(&mut new_info)?;
let new_access = new_info.mode.user_access()
| new_info.mode.group_access()
| new_info.mode.other_access();
if new_access.intersects(Access::EXEC) {
let write_guard_state = self.write_guard_state.lock();
if let Ok(seals) = write_guard_state.get_seals() {
if seals.contains(SealFlags::NO_EXEC) {
return error!(EPERM);
}
}
}
// `mutator`s should not update the attribute change time, which is managed by this API.
assert_eq!(info.time_status_change, new_info.time_status_change);
if *info == new_info {
return Ok(());
}
new_info.time_status_change = utc::utc_now();
let mut has = zxio_node_attr_has_t { ..Default::default() };
has.modification_time = info.time_modify != new_info.time_modify;
has.access_time = info.time_access != new_info.time_access;
has.mode = info.mode != new_info.mode;
has.uid = info.uid != new_info.uid;
has.gid = info.gid != new_info.gid;
has.rdev = info.rdev != new_info.rdev;
has.casefold = info.casefold != new_info.casefold;
has.wrapping_key_id = info.wrapping_key_id != new_info.wrapping_key_id;
security::check_fs_node_setattr_access(current_task, &self, &has)?;
// Call `update_attributes(..)` to persist the changes for the following fields.
if has.modification_time
|| has.access_time
|| has.mode
|| has.uid
|| has.gid
|| has.rdev
|| has.casefold
|| has.wrapping_key_id
{
let locked = locked.cast_locked::<FileOpsCore>();
self.ops().update_attributes(locked, current_task, &new_info, has)?;
}
*info = new_info;
Ok(())
}
/// Set the permissions on this FsNode to the given values.
///
/// Does not change the IFMT of the node.
pub fn chmod<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
mut mode: FileMode,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
mount.check_readonly_filesystem()?;
self.update_attributes(locked, current_task, |info| {
let (euid, egid, in_group) = current_task
.with_current_creds(|creds| (creds.euid, creds.egid, creds.is_in_group(info.gid)));
if info.uid != euid {
security::check_task_capable(current_task, CAP_FOWNER)?;
} else if info.gid != egid
&& !in_group
&& mode.intersects(FileMode::ISGID)
&& !security::is_task_capable_noaudit(current_task, CAP_FOWNER)
{
mode &= !FileMode::ISGID;
}
info.chmod(mode);
Ok(())
})
}
/// Sets the owner and/or group on this FsNode.
pub fn chown<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
owner: Option<uid_t>,
group: Option<gid_t>,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
mount.check_readonly_filesystem()?;
self.update_attributes(locked, current_task, |info| {
if security::is_task_capable_noaudit(current_task, CAP_CHOWN) {
info.chown(owner, group);
return Ok(());
}
// Nobody can change the owner.
if let Some(uid) = owner {
if info.uid != uid {
return error!(EPERM);
}
}
let (euid, is_in_group) = current_task
.with_current_creds(|creds| (creds.euid, group.map(|gid| creds.is_in_group(gid))));
// The owner can change the group.
if info.uid == euid {
// To a group that it belongs.
if let Some(is_in_group) = is_in_group {
if !is_in_group {
return error!(EPERM);
}
}
info.chown(None, group);
return Ok(());
}
// Any other user can call chown(file, -1, -1)
if owner.is_some() || group.is_some() {
return error!(EPERM);
}
// But not on set-user-ID or set-group-ID files.
// If we were to chown them, they would drop the set-ID bit.
if info.mode.is_reg()
&& (info.mode.contains(FileMode::ISUID)
|| info.mode.contains(FileMode::ISGID | FileMode::IXGRP))
{
return error!(EPERM);
}
info.chown(None, None);
Ok(())
})
}
/// Forcefully change the owner and group of this node.
///
/// # Safety
///
/// This function skips all the security checks and just updates the owner and group. Also, does
/// not check if the filesystem is read-only and does not update the attribute change time.
///
/// This function is used to set the owner and group of /proc/pid to the credentials of the
/// current task. Please consider carefully whether you want to use this function for another
/// purpose.
pub unsafe fn force_chown(&self, creds: FsCred) {
self.update_info(|info| {
info.chown(Some(creds.uid), Some(creds.gid));
});
}
/// Whether this node is a regular file.
pub fn is_reg(&self) -> bool {
self.info().mode.is_reg()
}
/// Whether this node is a directory.
pub fn is_dir(&self) -> bool {
self.info().mode.is_dir()
}
/// Whether this node is a socket.
pub fn is_sock(&self) -> bool {
self.info().mode.is_sock()
}
/// Whether this node is a FIFO.
pub fn is_fifo(&self) -> bool {
self.info().mode.is_fifo()
}
/// Whether this node is a symbolic link.
pub fn is_lnk(&self) -> bool {
self.info().mode.is_lnk()
}
pub fn dev(&self) -> DeviceType {
self.fs().dev_id
}
pub fn stat<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
) -> Result<uapi::stat, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
security::check_fs_node_getattr_access(current_task, self)?;
let info = self.fetch_and_refresh_info(locked, current_task)?;
let time_to_kernel_timespec_pair = |t| {
let timespec { tv_sec, tv_nsec } = timespec_from_time(t);
let time = tv_sec.try_into().map_err(|_| errno!(EINVAL))?;
let time_nsec = tv_nsec.try_into().map_err(|_| errno!(EINVAL))?;
Ok((time, time_nsec))
};
let (st_atime, st_atime_nsec) = time_to_kernel_timespec_pair(info.time_access)?;
let (st_mtime, st_mtime_nsec) = time_to_kernel_timespec_pair(info.time_modify)?;
let (st_ctime, st_ctime_nsec) = time_to_kernel_timespec_pair(info.time_status_change)?;
Ok(uapi::stat {
st_dev: self.dev().bits(),
st_ino: self.ino,
st_nlink: info.link_count.try_into().map_err(|_| errno!(EINVAL))?,
st_mode: info.mode.bits(),
st_uid: info.uid,
st_gid: info.gid,
st_rdev: info.rdev.bits(),
st_size: info.size.try_into().map_err(|_| errno!(EINVAL))?,
st_blksize: info.blksize.try_into().map_err(|_| errno!(EINVAL))?,
st_blocks: info.blocks.try_into().map_err(|_| errno!(EINVAL))?,
st_atime,
st_atime_nsec,
st_mtime,
st_mtime_nsec,
st_ctime,
st_ctime_nsec,
..Default::default()
})
}
// TODO(https://fxbug.dev/454730248): This is probably the wrong way to implement O_APPEND.
pub fn get_size<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
) -> Result<usize, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
let info = self.fetch_and_refresh_info(locked, current_task)?;
Ok(info.size.try_into().map_err(|_| errno!(EINVAL))?)
}
fn statx_timestamp_from_time(time: UtcInstant) -> statx_timestamp {
let nanos = time.into_nanos();
statx_timestamp {
tv_sec: nanos / NANOS_PER_SECOND,
tv_nsec: (nanos % NANOS_PER_SECOND) as u32,
..Default::default()
}
}
pub fn statx<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
flags: StatxFlags,
mask: u32,
) -> Result<statx, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
security::check_fs_node_getattr_access(current_task, self)?;
// Ignore mask for now and fill in all of the fields.
let info = if flags.contains(StatxFlags::AT_STATX_DONT_SYNC) {
self.info()
} else {
self.fetch_and_refresh_info(locked, current_task)?
};
if mask & STATX__RESERVED == STATX__RESERVED {
return error!(EINVAL);
}
track_stub!(TODO("https://fxbug.dev/302594110"), "statx attributes");
let stx_mnt_id = 0;
let mut stx_attributes = 0;
let stx_attributes_mask = STATX_ATTR_VERITY as u64;
if matches!(*self.fsverity.lock(), FsVerityState::FsVerity) {
stx_attributes |= STATX_ATTR_VERITY as u64;
}
Ok(statx {
stx_mask: STATX_NLINK
| STATX_UID
| STATX_GID
| STATX_ATIME
| STATX_MTIME
| STATX_CTIME
| STATX_INO
| STATX_SIZE
| STATX_BLOCKS
| STATX_BASIC_STATS,
stx_blksize: info.blksize.try_into().map_err(|_| errno!(EINVAL))?,
stx_attributes,
stx_nlink: info.link_count.try_into().map_err(|_| errno!(EINVAL))?,
stx_uid: info.uid,
stx_gid: info.gid,
stx_mode: info.mode.bits().try_into().map_err(|_| errno!(EINVAL))?,
stx_ino: self.ino,
stx_size: info.size.try_into().map_err(|_| errno!(EINVAL))?,
stx_blocks: info.blocks.try_into().map_err(|_| errno!(EINVAL))?,
stx_attributes_mask,
stx_ctime: Self::statx_timestamp_from_time(info.time_status_change),
stx_mtime: Self::statx_timestamp_from_time(info.time_modify),
stx_atime: Self::statx_timestamp_from_time(info.time_access),
stx_rdev_major: info.rdev.major(),
stx_rdev_minor: info.rdev.minor(),
stx_dev_major: self.fs().dev_id.major(),
stx_dev_minor: self.fs().dev_id.minor(),
stx_mnt_id,
..Default::default()
})
}
/// Checks whether `current_task` has capabilities required for the specified `access` to the
/// extended attribute `name`.
fn check_xattr_access<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
access: Access,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
assert!(access == Access::READ || access == Access::WRITE);
let enodata_if_read =
|e: Errno| if access == Access::READ && e.code == EPERM { errno!(ENODATA) } else { e };
// man xattr(7) describes the different access checks applied to each extended attribute
// namespace.
if name.starts_with(XATTR_USER_PREFIX.to_bytes()) {
{
let info = self.info();
if !info.mode.is_reg() && !info.mode.is_dir() {
return Err(enodata_if_read(errno!(EPERM)));
}
}
// TODO: https://fxbug.dev/460734830 - Perform capability check(s) if file has sticky
// bit set.
self.check_access(
locked,
current_task,
mount,
access,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Name(name),
)?;
} else if name.starts_with(XATTR_TRUSTED_PREFIX.to_bytes()) {
// Trusted extended attributes require `CAP_SYS_ADMIN` to read or write.
security::check_task_capable(current_task, CAP_SYS_ADMIN).map_err(enodata_if_read)?;
} else if name.starts_with(XATTR_SYSTEM_PREFIX.to_bytes()) {
// System extended attributes have attribute-specific access policy.
// TODO: https://fxbug.dev/460734830 - Revise how system extended attributes are
// access-controlled.
security::check_task_capable(current_task, CAP_SYS_ADMIN).map_err(enodata_if_read)?;
} else if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
if access == Access::WRITE {
// Writes require `CAP_SYS_ADMIN`, unless the LSM owning `name` specifies to skip.
if !security::fs_node_xattr_skipcap(current_task, name) {
security::check_task_capable(current_task, CAP_SYS_ADMIN)
.map_err(enodata_if_read)?;
}
}
} else {
panic!("Unknown extended attribute prefix: {}", name);
}
Ok(())
}
pub fn get_xattr<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
max_size: usize,
) -> Result<ValueOrSize<FsString>, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
// Perform discretionary capability & access checks appropriate to the xattr prefix.
self.check_xattr_access(locked, current_task, mount, name, Access::READ)?;
// LSM access checks must be performed after discretionary checks.
security::check_fs_node_getxattr_access(current_task, self, name)?;
if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
// If the attribute is in the security.* domain then allow the LSM to handle the
// request, or to delegate to `FsNodeOps::get_xattr()`.
security::fs_node_getsecurity(locked, current_task, self, name, max_size)
} else {
// If the attribute is outside security.*, delegate the read to the `FsNodeOps`.
self.ops().get_xattr(
locked.cast_locked::<FileOpsCore>(),
self,
current_task,
name,
max_size,
)
}
}
pub fn set_xattr<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
value: &FsStr,
op: XattrOp,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
// Perform discretionary capability & access checks appropriate to the xattr prefix.
self.check_xattr_access(locked, current_task, mount, name, Access::WRITE)?;
// LSM access checks must be performed after discretionary checks.
security::check_fs_node_setxattr_access(current_task, self, name, value, op)?;
if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
// If the attribute is in the security.* domain then allow the LSM to handle the
// request, or to delegate to `FsNodeOps::set_xattr()`.
security::fs_node_setsecurity(locked, current_task, self, name, value, op)
} else {
// If the attribute is outside security.*, delegate the read to the `FsNodeOps`.
self.ops().set_xattr(
locked.cast_locked::<FileOpsCore>(),
self,
current_task,
name,
value,
op,
)
}
}
pub fn remove_xattr<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
name: &FsStr,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
// Perform discretionary capability & access checks appropriate to the xattr prefix.
self.check_xattr_access(locked, current_task, mount, name, Access::WRITE)?;
// LSM access checks must be performed after discretionary checks.
security::check_fs_node_removexattr_access(current_task, self, name)?;
self.ops().remove_xattr(locked.cast_locked::<FileOpsCore>(), self, current_task, name)
}
pub fn list_xattrs<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
max_size: usize,
) -> Result<ValueOrSize<Vec<FsString>>, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
security::check_fs_node_listxattr_access(current_task, self)?;
Ok(self
.ops()
.list_xattrs(locked.cast_locked::<FileOpsCore>(), self, current_task, max_size)?
.map(|mut v| {
// Extended attributes may be listed even if the caller would not be able to read
// (or modify) the attribute's value.
// trusted.* attributes are only accessible with CAP_SYS_ADMIN and are omitted by
// `listxattr()` unless the caller holds CAP_SYS_ADMIN.
if !security::is_task_capable_noaudit(current_task, CAP_SYS_ADMIN) {
v.retain(|name| !name.starts_with(XATTR_TRUSTED_PREFIX.to_bytes()));
}
v
}))
}
/// Returns current `FsNodeInfo`.
pub fn info(&self) -> RwLockReadGuard<'_, FsNodeInfo> {
self.info.read()
}
/// Refreshes the `FsNodeInfo` if necessary and returns a read guard.
pub fn fetch_and_refresh_info<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
) -> Result<RwLockReadGuard<'_, FsNodeInfo>, Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
self.ops().fetch_and_refresh_info(
locked.cast_locked::<FileOpsCore>(),
self,
current_task,
&self.info,
)
}
pub fn update_info<F, T>(&self, mutator: F) -> T
where
F: FnOnce(&mut FsNodeInfo) -> T,
{
let mut info = self.info.write();
mutator(&mut info)
}
/// Clear the SUID and SGID bits unless the `current_task` has `CAP_FSETID`
pub fn clear_suid_and_sgid_bits<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
if !security::is_task_capable_noaudit(current_task, CAP_FSETID) {
self.update_attributes(locked, current_task, |info| {
info.clear_suid_and_sgid_bits();
Ok(())
})?;
}
Ok(())
}
/// Update the ctime and mtime of a file to now.
pub fn update_ctime_mtime(&self) {
if self.fs().manages_timestamps() {
return;
}
self.update_info(|info| {
let now = utc::utc_now();
info.time_status_change = now;
info.time_modify = now;
});
}
/// Update the ctime of a file to now.
pub fn update_ctime(&self) {
if self.fs().manages_timestamps() {
return;
}
self.update_info(|info| {
let now = utc::utc_now();
info.time_status_change = now;
});
}
/// Update the atime and mtime if the `current_task` has write access, is the file owner, or
/// holds either the CAP_DAC_OVERRIDE or CAP_FOWNER capability.
pub fn update_atime_mtime<L>(
&self,
locked: &mut Locked<L>,
current_task: &CurrentTask,
mount: &MountInfo,
atime: TimeUpdateType,
mtime: TimeUpdateType,
) -> Result<(), Errno>
where
L: LockEqualOrBefore<FileOpsCore>,
{
// If the filesystem is read-only, this always fail.
mount.check_readonly_filesystem()?;
let now = matches!((atime, mtime), (TimeUpdateType::Now, TimeUpdateType::Now));
self.check_access(
locked,
current_task,
mount,
Access::WRITE,
CheckAccessReason::ChangeTimestamps { now },
security::Auditable::Location(std::panic::Location::caller()),
)?;
if !matches!((atime, mtime), (TimeUpdateType::Omit, TimeUpdateType::Omit)) {
// This function is called by `utimes(..)` which will update the access and
// modification time. We need to call `update_attributes()` to update the mtime of
// filesystems that manages file timestamps.
self.update_attributes(locked, current_task, |info| {
let now = utc::utc_now();
let get_time = |time: TimeUpdateType| match time {
TimeUpdateType::Now => Some(now),
TimeUpdateType::Time(t) => Some(t),
TimeUpdateType::Omit => None,
};
if let Some(time) = get_time(atime) {
info.time_access = time;
}
if let Some(time) = get_time(mtime) {
info.time_modify = time;
}
Ok(())
})?;
}
Ok(())
}
/// Returns a string describing this `FsNode` in the format used by "/proc/../fd" for anonymous
/// file descriptors. By default this is in the form:
/// <class>:[<node_id>]
/// though `FsNodeOps` may customize this as required.
pub fn internal_name(&self) -> FsString {
if let Some(name) = self.ops().internal_name(self) {
return name;
};
let class = if self.is_sock() {
"socket"
} else if self.is_fifo() {
"pipe"
} else {
"file"
};
format!("{}:[{}]", class, self.ino).into()
}
/// The key used to identify this node in the file system's node cache.
///
/// For many file systems, this will be the same as the inode number. However, some file
/// systems, such as FUSE, sometimes use different `node_key` and inode numbers.
pub fn node_key(&self) -> ino_t {
self.ops().node_key(self)
}
fn ensure_rare_data(&self) -> &FsNodeRareData {
self.rare_data.get_or_init(|| Box::new(FsNodeRareData::default()))
}
/// Returns the set of watchers for this node.
///
/// Only call this function if you require this node to actually store a list of watchers. If
/// you just wish to notify any watchers that might exist, please use `notify` instead.
pub fn ensure_watchers(&self) -> &inotify::InotifyWatchers {
&self.ensure_rare_data().watchers
}
/// Notify the watchers of the given event.
pub fn notify(
&self,
event_mask: InotifyMask,
cookie: u32,
name: &FsStr,
mode: FileMode,
is_dead: bool,
) {
if let Some(rare_data) = self.rare_data.get() {
rare_data.watchers.notify(event_mask, cookie, name, mode, is_dead);
}
}
}
impl std::fmt::Debug for FsNode {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FsNode")
.field("fs", &self.fs().name())
.field("info", &*self.info())
.field("ops_ty", &self.ops().type_name())
.finish()
}
}
impl Releasable for FsNode {
type Context<'a> = CurrentTaskAndLocked<'a>;
fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
let (locked, current_task) = context;
if let Some(fs) = self.fs.upgrade() {
fs.remove_node(&self);
}
if let Err(err) = self.ops.forget(
locked.cast_locked::<FileOpsCore>(),
current_task,
self.info.into_inner(),
) {
log_error!("Error on FsNodeOps::forget: {err:?}");
}
}
}
fn check_access(
fs_node: &FsNode,
current_task: &CurrentTask,
permission_flags: security::PermissionFlags,
node_uid: uid_t,
node_gid: gid_t,
mode: FileMode,
) -> Result<(), Errno> {
// Determine which of the access bits apply to the `current_task`.
let (fsuid, is_in_group) =
current_task.with_current_creds(|creds| (creds.fsuid, creds.is_in_group(node_gid)));
let granted = if fsuid == node_uid {
mode.user_access()
} else if is_in_group {
mode.group_access()
} else {
mode.other_access()
};
let access = permission_flags.as_access();
if granted.contains(access) {
return Ok(());
}
// Callers with CAP_DAC_READ_SEARCH override can read files & directories, and traverse
// directories to which they lack permission.
let mut requested = access & !granted;
// If this check was triggered by `access()`, or a variant, then check for a `dontaudit`
// statement for the `audit_access` permission for this caller & file.
let have_dont_audit = OnceBool::new();
let has_capability = move |current_task, capability| {
let dont_audit = have_dont_audit.get_or_init(|| {
permission_flags.contains(PermissionFlags::ACCESS)
&& security::has_dontaudit_access(current_task, fs_node)
});
if dont_audit {
security::is_task_capable_noaudit(current_task, capability)
} else {
security::check_task_capable(current_task, capability).is_ok()
}
};
// CAP_DAC_READ_SEARCH allows bypass of read checks, and directory traverse (eXecute) checks.
let dac_read_search_access =
if mode.is_dir() { Access::READ | Access::EXEC } else { Access::READ };
if dac_read_search_access.intersects(requested)
&& has_capability(current_task, CAP_DAC_READ_SEARCH)
{
requested.remove(dac_read_search_access);
}
if requested.is_empty() {
return Ok(());
}
// CAP_DAC_OVERRIDE allows bypass of all checks (though see the comment for file-execute).
let mut dac_override_access = Access::READ | Access::WRITE;
dac_override_access |= if mode.is_dir() {
Access::EXEC
} else {
// File execute access checks may not be bypassed unless at least one executable bit is set.
(mode.user_access() | mode.group_access() | mode.other_access()) & Access::EXEC
};
if dac_override_access.intersects(requested) && has_capability(current_task, CAP_DAC_OVERRIDE) {
requested.remove(dac_override_access);
}
if requested.is_empty() {
return Ok(());
}
return error!(EACCES);
}
#[cfg(test)]
mod tests {
use super::*;
use crate::device::mem::mem_device_init;
use crate::testing::*;
use crate::vfs::buffers::VecOutputBuffer;
use starnix_uapi::auth::Credentials;
use starnix_uapi::file_mode::mode;
#[::fuchsia::test]
async fn open_device_file() {
spawn_kernel_and_run(async |locked, current_task| {
mem_device_init(locked, &*current_task).expect("mem_device_init");
// Create a device file that points to the `zero` device (which is automatically
// registered in the kernel).
current_task
.fs()
.root()
.create_node(
locked,
&current_task,
"zero".into(),
mode!(IFCHR, 0o666),
DeviceType::ZERO,
)
.expect("create_node");
const CONTENT_LEN: usize = 10;
let mut buffer = VecOutputBuffer::new(CONTENT_LEN);
// Read from the zero device.
let device_file = current_task
.open_file(locked, "zero".into(), OpenFlags::RDONLY)
.expect("open device file");
device_file.read(locked, &current_task, &mut buffer).expect("read from zero");
// Assert the contents.
assert_eq!(&[0; CONTENT_LEN], buffer.data());
})
.await;
}
#[::fuchsia::test]
async fn node_info_is_reflected_in_stat() {
spawn_kernel_and_run(async |locked, current_task| {
// Create a node.
let node = &current_task
.fs()
.root()
.create_node(
locked,
&current_task,
"zero".into(),
FileMode::IFCHR,
DeviceType::ZERO,
)
.expect("create_node")
.entry
.node;
node.update_info(|info| {
info.mode = FileMode::IFSOCK;
info.size = 1;
info.blocks = 2;
info.blksize = 4;
info.uid = 9;
info.gid = 10;
info.link_count = 11;
info.time_status_change = UtcInstant::from_nanos(1);
info.time_access = UtcInstant::from_nanos(2);
info.time_modify = UtcInstant::from_nanos(3);
info.rdev = DeviceType::new(13, 13);
});
let stat = node.stat(locked, &current_task).expect("stat");
assert_eq!(stat.st_mode, FileMode::IFSOCK.bits());
assert_eq!(stat.st_size, 1);
assert_eq!(stat.st_blksize, 4);
assert_eq!(stat.st_blocks, 2);
assert_eq!(stat.st_uid, 9);
assert_eq!(stat.st_gid, 10);
assert_eq!(stat.st_nlink, 11);
assert_eq!(stat.st_ctime, 0);
assert_eq!(stat.st_ctime_nsec, 1);
assert_eq!(stat.st_atime, 0);
assert_eq!(stat.st_atime_nsec, 2);
assert_eq!(stat.st_mtime, 0);
assert_eq!(stat.st_mtime_nsec, 3);
assert_eq!(stat.st_rdev, DeviceType::new(13, 13).bits());
})
.await;
}
#[::fuchsia::test]
fn test_flock_operation() {
assert!(FlockOperation::from_flags(0).is_err());
assert!(FlockOperation::from_flags(u32::MAX).is_err());
let operation1 = FlockOperation::from_flags(LOCK_SH).expect("from_flags");
assert!(!operation1.is_unlock());
assert!(!operation1.is_lock_exclusive());
assert!(operation1.is_blocking());
let operation2 = FlockOperation::from_flags(LOCK_EX | LOCK_NB).expect("from_flags");
assert!(!operation2.is_unlock());
assert!(operation2.is_lock_exclusive());
assert!(!operation2.is_blocking());
let operation3 = FlockOperation::from_flags(LOCK_UN).expect("from_flags");
assert!(operation3.is_unlock());
assert!(!operation3.is_lock_exclusive());
assert!(operation3.is_blocking());
}
#[::fuchsia::test]
async fn test_check_access() {
spawn_kernel_and_run(async |locked, current_task| {
let mut creds = Credentials::with_ids(1, 2);
creds.groups = vec![3, 4];
current_task.set_creds(creds);
// Create a node.
let node = &current_task
.fs()
.root()
.create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceType::NONE)
.expect("create_node")
.entry
.node;
let check_access = |locked: &mut Locked<Unlocked>,
uid: uid_t,
gid: gid_t,
perm: u32,
access: Access| {
node.update_info(|info| {
info.mode = mode!(IFREG, perm);
info.uid = uid;
info.gid = gid;
});
node.check_access(
locked,
&current_task,
&MountInfo::detached(),
access,
CheckAccessReason::InternalPermissionChecks,
security::Auditable::Location(std::panic::Location::caller()),
)
};
assert_eq!(check_access(locked, 0, 0, 0o700, Access::EXEC), error!(EACCES));
assert_eq!(check_access(locked, 0, 0, 0o700, Access::READ), error!(EACCES));
assert_eq!(check_access(locked, 0, 0, 0o700, Access::WRITE), error!(EACCES));
assert_eq!(check_access(locked, 0, 0, 0o070, Access::EXEC), error!(EACCES));
assert_eq!(check_access(locked, 0, 0, 0o070, Access::READ), error!(EACCES));
assert_eq!(check_access(locked, 0, 0, 0o070, Access::WRITE), error!(EACCES));
assert_eq!(check_access(locked, 0, 0, 0o007, Access::EXEC), Ok(()));
assert_eq!(check_access(locked, 0, 0, 0o007, Access::READ), Ok(()));
assert_eq!(check_access(locked, 0, 0, 0o007, Access::WRITE), Ok(()));
assert_eq!(check_access(locked, 1, 0, 0o700, Access::EXEC), Ok(()));
assert_eq!(check_access(locked, 1, 0, 0o700, Access::READ), Ok(()));
assert_eq!(check_access(locked, 1, 0, 0o700, Access::WRITE), Ok(()));
assert_eq!(check_access(locked, 1, 0, 0o100, Access::EXEC), Ok(()));
assert_eq!(check_access(locked, 1, 0, 0o100, Access::READ), error!(EACCES));
assert_eq!(check_access(locked, 1, 0, 0o100, Access::WRITE), error!(EACCES));
assert_eq!(check_access(locked, 1, 0, 0o200, Access::EXEC), error!(EACCES));
assert_eq!(check_access(locked, 1, 0, 0o200, Access::READ), error!(EACCES));
assert_eq!(check_access(locked, 1, 0, 0o200, Access::WRITE), Ok(()));
assert_eq!(check_access(locked, 1, 0, 0o400, Access::EXEC), error!(EACCES));
assert_eq!(check_access(locked, 1, 0, 0o400, Access::READ), Ok(()));
assert_eq!(check_access(locked, 1, 0, 0o400, Access::WRITE), error!(EACCES));
assert_eq!(check_access(locked, 0, 2, 0o700, Access::EXEC), error!(EACCES));
assert_eq!(check_access(locked, 0, 2, 0o700, Access::READ), error!(EACCES));
assert_eq!(check_access(locked, 0, 2, 0o700, Access::WRITE), error!(EACCES));
assert_eq!(check_access(locked, 0, 2, 0o070, Access::EXEC), Ok(()));
assert_eq!(check_access(locked, 0, 2, 0o070, Access::READ), Ok(()));
assert_eq!(check_access(locked, 0, 2, 0o070, Access::WRITE), Ok(()));
assert_eq!(check_access(locked, 0, 3, 0o070, Access::EXEC), Ok(()));
assert_eq!(check_access(locked, 0, 3, 0o070, Access::READ), Ok(()));
assert_eq!(check_access(locked, 0, 3, 0o070, Access::WRITE), Ok(()));
})
.await;
}
#[::fuchsia::test]
async fn set_security_xattr_fails_without_security_module_or_root() {
spawn_kernel_and_run(async |locked, current_task| {
let mut creds = Credentials::with_ids(1, 2);
creds.groups = vec![3, 4];
current_task.set_creds(creds);
// Create a node.
let node = &current_task
.fs()
.root()
.create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceType::NONE)
.expect("create_node")
.entry
.node;
// Give read-write-execute access.
node.update_info(|info| info.mode = mode!(IFREG, 0o777));
// Without a security module, and without CAP_SYS_ADMIN capabilities, setting the xattr
// should fail.
assert_eq!(
node.set_xattr(
locked,
&current_task,
&MountInfo::detached(),
"security.name".into(),
"security_label".into(),
XattrOp::Create,
),
error!(EPERM)
);
})
.await;
}
#[::fuchsia::test]
async fn set_non_user_xattr_fails_without_security_module_or_root() {
spawn_kernel_and_run(async |locked, current_task| {
let mut creds = Credentials::with_ids(1, 2);
creds.groups = vec![3, 4];
current_task.set_creds(creds);
// Create a node.
let node = &current_task
.fs()
.root()
.create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceType::NONE)
.expect("create_node")
.entry
.node;
// Give read-write-execute access.
node.update_info(|info| info.mode = mode!(IFREG, 0o777));
// Without a security module, and without CAP_SYS_ADMIN capabilities, setting the xattr
// should fail.
assert_eq!(
node.set_xattr(
locked,
&current_task,
&MountInfo::detached(),
"trusted.name".into(),
"some data".into(),
XattrOp::Create,
),
error!(EPERM)
);
})
.await;
}
#[::fuchsia::test]
async fn get_security_xattr_succeeds_without_read_access() {
spawn_kernel_and_run(async |locked, current_task| {
let mut creds = Credentials::with_ids(1, 2);
creds.groups = vec![3, 4];
current_task.set_creds(creds);
// Create a node.
let node = &current_task
.fs()
.root()
.create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceType::NONE)
.expect("create_node")
.entry
.node;
// Only give read access to the root and give root access to the current task.
node.update_info(|info| info.mode = mode!(IFREG, 0o100));
current_task.set_creds(Credentials::root());
// Setting the label should succeed even without write access to the file.
assert_eq!(
node.set_xattr(
locked,
&current_task,
&MountInfo::detached(),
"security.name".into(),
"security_label".into(),
XattrOp::Create,
),
Ok(())
);
// Remove root access from the current task.
current_task.set_creds(Credentials::with_ids(1, 1));
// Getting the label should succeed even without read access to the file.
assert_eq!(
node.get_xattr(
locked,
&current_task,
&MountInfo::detached(),
"security.name".into(),
4096
),
Ok(ValueOrSize::Value("security_label".into()))
);
})
.await;
}
}