blob: 85532aef38642d7d33d2c6ae199ca8746f0c396f [file] [log] [blame] [edit]
// Copyright 2023 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use bstr::ByteSlice;
use starnix_lock::Mutex;
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, AtomicU8, Ordering};
use std::sync::Arc;
use ubpf::{
converter::{bpf_addressing_mode, bpf_class},
program::EbpfProgram,
};
use crate::{
fs::buffers::{InputBuffer, OutputBuffer},
fs::{fileops_impl_nonseekable, Anon, FdEvents, FdFlags, FdNumber, FileObject, FileOps},
logging::log_warn,
mm::MemoryAccessorExt,
signals::{send_signal, SignalDetail, SignalInfo},
syscalls::{decls::Syscall, SyscallArg, SyscallResult},
task::{
CurrentTask, EventHandler, ExitStatus, Kernel, Task, TaskFlags, WaitCanceler, WaitQueue,
Waiter,
},
types::*,
};
pub struct SeccompFilter {
/// The BPF program associated with this filter.
program: EbpfProgram,
/// The unique-to-this-process id of thi1s filter. SECCOMP_FILTER_FLAG_TSYNC only works if all
/// threads in this process have filters that are a prefix of the filters of the thread
/// attempting to do the TSYNC. Identical filters attached in separate seccomp calls are treated
/// as different from each other for this purpose, so we need a way of distinguishing them.
unique_id: u64,
/// The next cookie (unique id for this syscall), as used by SECCOMP_RET_USER_NOTIF
cookie: AtomicU64,
// Whether to log the results of this filter
log: bool,
}
/// The result of running a set of seccomp filters.
pub struct SeccompFilterResult {
/// The action indicated by the seccomp filter with the highest priority result.
action: SeccompAction,
/// The filter that returned the highest priority result, as used by SECCOMP_RET_USER_NOTIF,
/// which has to have access to its cookie value
filter: Option<Arc<SeccompFilter>>,
}
impl SeccompFilter {
/// Creates a SeccompFilter object from the given sock_filter. Associates the user-provided
/// id with it, which is intended to be unique to this process.
pub fn from_cbpf(
code: &Vec<sock_filter>,
maybe_unique_id: u64,
should_log: bool,
) -> Result<Self, Errno> {
// If an instruction loads from / stores to an absolute address, that address has to be
// 32-bit aligned and inside the struct seccomp_data passed in.
for insn in code {
if (bpf_class(insn) == BPF_LD || bpf_class(insn) == BPF_ST)
&& (bpf_addressing_mode(insn) == BPF_ABS)
&& (insn.k & 0x3 != 0 || std::mem::size_of::<seccomp_data>() < insn.k as usize)
{
return error!(EINVAL);
}
}
match EbpfProgram::from_cbpf(code) {
Ok(program) => Ok(SeccompFilter {
program,
unique_id: maybe_unique_id,
cookie: AtomicU64::new(0),
log: should_log,
}),
Err(errmsg) => {
log_warn!("{}", errmsg);
error!(EINVAL)
}
}
}
pub fn run(&self, data: &mut seccomp_data) -> u32 {
if let Ok(r) = self.program.run(data) {
return r as u32;
}
SECCOMP_RET_ALLOW
}
}
const SECCOMP_MAX_INSNS_PER_PATH: u16 = 32768;
/// A list of seccomp filters, intended to be associated with a specific process.
#[derive(Default)]
pub struct SeccompFilterContainer {
/// List of currently installed seccomp_filters; most recently added is last.
pub filters: Vec<Arc<SeccompFilter>>,
// The total length of the provided seccomp filters, which cannot
// exceed SECCOMP_MAX_INSNS_PER_PATH - 4 * the number of filters. This is stored
// instead of computed because we store seccomp filters in an
// expanded form, and it is impossible to get the original length.
pub provided_instructions: u16,
// Data needed by SECCOMP_RET_USER_NOTIF
pub notifier: Option<SeccompNotifierHandle>,
}
impl Clone for SeccompFilterContainer {
fn clone(&self) -> Self {
if let Some(n) = &self.notifier {
n.lock().add_thread();
}
SeccompFilterContainer {
filters: self.filters.clone(),
provided_instructions: self.provided_instructions,
notifier: self.notifier.clone(),
}
}
}
impl Drop for SeccompFilterContainer {
fn drop(&mut self) {
if let Some(n) = &self.notifier {
// Notifier needs to send threads a HUP when there is no one left
// referencing it.
n.lock().remove_thread();
}
}
}
fn make_seccomp_data(syscall: &Syscall, ip: u64) -> seccomp_data {
#[cfg(target_arch = "x86_64")]
let arch_val = AUDIT_ARCH_X86_64;
#[cfg(target_arch = "aarch64")]
let arch_val = AUDIT_ARCH_AARCH64;
#[cfg(target_arch = "riscv64")]
let arch_val = AUDIT_ARCH_RISCV64;
seccomp_data {
nr: syscall.decl.number as i32,
arch: arch_val,
instruction_pointer: ip,
args: [
syscall.arg0.raw(),
syscall.arg1.raw(),
syscall.arg2.raw(),
syscall.arg3.raw(),
syscall.arg4.raw(),
syscall.arg5.raw(),
],
}
}
impl SeccompFilterContainer {
/// Ensures that this set of seccomp filters can be "synced to" the given set.
/// This means that our filters are a prefix of the given set of filters.
pub fn can_sync_to(&self, source: &SeccompFilterContainer) -> bool {
if source.filters.len() < self.filters.len() {
return false;
}
for (filter, other_filter) in self.filters.iter().zip(source.filters.iter()) {
if other_filter.unique_id != filter.unique_id {
return false;
}
}
true
}
/// Adds the given filter to this list. The original_length parameter is the length of
/// the originally provided BPF (i.e., the number of sock_filter instructions), used
/// to ensure the total length does not exceed SECCOMP_MAX_INSNS_PER_PATH
pub fn add_filter(
&mut self,
filter: Arc<SeccompFilter>,
original_length: u16,
) -> Result<(), Errno> {
let maybe_new_length = self.provided_instructions + original_length + 4;
if maybe_new_length > SECCOMP_MAX_INSNS_PER_PATH {
return Err(errno!(ENOMEM));
}
self.provided_instructions = maybe_new_length;
self.filters.push(filter);
Ok(())
}
/// Runs all of the seccomp filters in this container, most-to-least recent. Returns the
/// highest priority result (which contains a reference to the filter that generated it)
pub fn run_all(&self, current_task: &CurrentTask, syscall: &Syscall) -> SeccompFilterResult {
let mut r = SeccompFilterResult { action: SeccompAction::Allow, filter: None };
// VDSO calls can't be caught by seccomp, so most seccomp filters forget to declare them.
// But our VDSO implementation is incomplete, and most of the calls forward to the actual
// syscalls. So seccomp should ignore them until they're implemented correctly in the VDSO.
#[cfg(target_arch = "x86_64")] // The set of VDSO calls is arch dependent.
#[allow(non_upper_case_globals)]
if let __NR_clock_gettime | __NR_getcpu | __NR_gettimeofday | __NR_time =
syscall.decl.number as u32
{
return r;
}
#[cfg(target_arch = "aarch64")]
#[allow(non_upper_case_globals)]
if let __NR_clock_gettime | __NR_clock_getres | __NR_gettimeofday =
syscall.decl.number as u32
{
return r;
}
// Filters are executed in reverse order of addition
for filter in self.filters.iter().rev() {
let mut data =
make_seccomp_data(syscall, current_task.registers.instruction_pointer_register());
let new_result = filter.run(&mut data);
let action = SeccompAction::from_u32(new_result).unwrap_or(SeccompAction::KillProcess);
if SeccompAction::has_prio(&action, &r.action) == std::cmp::Ordering::Less {
r = SeccompFilterResult { action, filter: Some(filter.clone()) };
}
}
r
}
/// Creates a new listener for use by SECCOMP_RET_USER_NOTIF. Returns its fd.
pub fn create_listener(&mut self, current_task: &CurrentTask) -> Result<FdNumber, Errno> {
if self.notifier.is_some() {
return Err(errno!(EBUSY));
}
let the_notifier = SeccompNotifier::new();
let handle = Anon::new_file(
current_task,
Box::new(SeccompNotifierFileObject { notifier: the_notifier.clone() }),
OpenFlags::RDWR,
);
let fd = current_task.add_file(handle, FdFlags::CLOEXEC)?;
{
let mut state = the_notifier.lock();
state.add_thread();
}
self.notifier = Some(the_notifier);
Ok(fd)
}
}
/// Possible values for the current status of the seccomp filters for
/// this process.
#[repr(u8)]
#[derive(Clone, Copy, PartialEq)]
pub enum SeccompStateValue {
None = 0,
UserDefined = 1,
Strict = 2,
}
/// Per-process state that cannot be stored in the container (e.g., whether there is a container).
#[derive(Default)]
pub struct SeccompState {
// This AtomicU8 corresponds to a SeccompStateValue.
filter_state: AtomicU8,
}
impl SeccompState {
pub fn from(state: &SeccompState) -> SeccompState {
SeccompState { filter_state: AtomicU8::new(state.filter_state.load(Ordering::Acquire)) }
}
fn from_u8(value: u8) -> SeccompStateValue {
match value {
0 => SeccompStateValue::None,
1 => SeccompStateValue::UserDefined,
2 => SeccompStateValue::Strict,
_ => unreachable!(),
}
}
pub fn get(&self) -> SeccompStateValue {
Self::from_u8(self.filter_state.load(Ordering::Acquire))
}
pub fn set(&self, state: &SeccompStateValue) -> Result<(), Errno> {
loop {
let seccomp_filter_status = self.get();
if seccomp_filter_status == *state {
return Ok(());
}
if seccomp_filter_status != SeccompStateValue::None {
return Err(errno!(EINVAL));
}
if self
.filter_state
.compare_exchange(
seccomp_filter_status as u8,
*state as u8,
Ordering::Release,
Ordering::Acquire,
)
.is_ok()
{
return Ok(());
}
}
}
/// Check to see if this syscall is allowed in STRICT mode, and, if not,
/// send the current task a SIGKILL.
pub fn do_strict(task: &Task, syscall: &Syscall) -> Option<Result<SyscallResult, Errno>> {
if syscall.decl.number as u32 != __NR_exit
&& syscall.decl.number as u32 != __NR_read
&& syscall.decl.number as u32 != __NR_write
{
send_signal(task, SignalInfo::default(SIGKILL));
return Some(Err(errno_from_code!(0)));
}
None
}
// This is supposed to be put in the audit log, but starnix does not yet have an
// audit log. Also, it does not match the Linux format. Still, the machinery
// is in place for when we have to support it for real.
fn log_action(task: &CurrentTask, syscall: &Syscall) {
let creds = task.creds();
let uid = creds.uid;
let gid = creds.gid;
let comm_r = task.command();
let comm = if let Ok(c) = comm_r.to_str() { c } else { "???" };
let arch = if cfg!(target_arch = "x86_64") {
"x86_64"
} else if cfg!(target_arch = "aarch64") {
"aarch64"
} else {
"unknown"
};
crate::logging::log_info!(
"type=SECCOMP: uid={} gid={} pid={} comm={} syscall={} ip={} ARCH={} SYSCALL={}",
uid,
gid,
task.thread_group.leader,
comm,
syscall.decl.number,
task.registers.instruction_pointer_register(),
arch,
syscall.decl.name
);
}
/// Take the given |action| on the given |task|. The action is one of the SECCOMP_RET values
/// (ALLOW, LOG, KILL, KILL_PROCESS, TRAP, ERRNO, USER_NOTIF, TRACE). |task| is the thread that
/// invoked the syscall, and |syscall| is the syscall that was invoked.
/// Returns the result that the syscall will be forced to return by this
/// filter, or None, if the syscall should return its actual return value.
// NB: Allow warning below so that it is clear what we are doing on KILL_PROCESS
#[allow(clippy::wildcard_in_or_patterns)]
pub fn do_user_defined(
result: SeccompFilterResult,
current_task: &mut CurrentTask,
syscall: &Syscall,
) -> Option<Result<SyscallResult, Errno>> {
let action = result.action;
if let Some(filter) = result.filter.as_ref() {
if action.is_logged(current_task.kernel(), filter.log) {
Self::log_action(current_task, syscall);
}
}
match action {
SeccompAction::Allow => None,
SeccompAction::Errno(code) => Some(Err(errno_from_code!(code as i16))),
SeccompAction::KillThread => {
let siginfo = SignalInfo::default(SIGSYS);
let is_last_thread = current_task.thread_group.read().tasks_count() == 1;
let mut task_state = current_task.write();
if is_last_thread {
current_task.set_flags(&mut *task_state, TaskFlags::DUMP_ON_EXIT, true);
current_task.set_exit_status_if_not_already(
&mut *task_state,
ExitStatus::CoreDump(siginfo),
);
} else {
current_task.set_exit_status_if_not_already(
&mut *task_state,
ExitStatus::Kill(siginfo),
);
}
Some(Err(errno_from_code!(0)))
}
SeccompAction::KillProcess => {
current_task.thread_group.exit(ExitStatus::CoreDump(SignalInfo::default(SIGSYS)));
Some(Err(errno_from_code!(0)))
}
SeccompAction::Log => {
Self::log_action(current_task, syscall);
None
}
SeccompAction::Trace => {
// TODO(fxbug.dev/76810): Because there is no ptrace support, this returns ENOSYS
Some(Err(errno!(ENOSYS)))
}
SeccompAction::Trap(errno) => {
#[cfg(target_arch = "x86_64")]
let arch_val = AUDIT_ARCH_X86_64;
#[cfg(target_arch = "aarch64")]
let arch_val = AUDIT_ARCH_AARCH64;
#[cfg(target_arch = "riscv64")]
let arch_val = AUDIT_ARCH_RISCV64;
let siginfo = SignalInfo {
signal: SIGSYS,
errno: errno as i32,
code: SYS_SECCOMP as i32,
detail: SignalDetail::SigSys {
call_addr: current_task.registers.instruction_pointer_register().into(),
syscall: syscall.decl.number as i32,
arch: arch_val,
},
force: true,
};
send_signal(current_task, siginfo);
Some(Err(errno_from_code!(-(syscall.decl.number as i16))))
}
SeccompAction::UserNotif => {
if let Some(notifier) = current_task.get_seccomp_notifier() {
let cookie =
result.filter.as_ref().unwrap().cookie.fetch_add(1, Ordering::Relaxed);
let msg = seccomp_notif {
id: cookie,
pid: current_task.id as u32,
flags: 0,
data: make_seccomp_data(
syscall,
current_task.registers.instruction_pointer_register(),
),
};
// First, add a pending notification, and wake up the supervisor waiting for it.
let waiter = Waiter::new();
{
let mut notifier = notifier.lock();
if notifier.is_closed {
// Someone explicitly close()d the fd with the notifier, which does not
// clear the thread-local notifier. Do it now.
drop(notifier);
current_task.set_seccomp_notifier(None);
return Some(Err(errno!(ENOSYS)));
}
notifier.create_notification(cookie, msg);
notifier.waiters.wait_async_value(&waiter, cookie);
}
// Next, wait for a response from the supervisor
if let Err(e) = waiter.wait(current_task) {
return Some(Err(e));
}
// Fetch the response.
let resp: Option<seccomp_notif_resp>;
{
let mut notifier = notifier.lock();
resp = notifier.get_response(cookie);
notifier.delete_notification(cookie);
}
// The response indicates what you are supposed to do with this syscall.
if let Some(response) = resp {
if response.val != 0 {
return Some(Ok(response.val.into()));
}
if response.error != 0 {
if response.error > 0 {
return Some(Ok(response.error.into()));
} else {
return Some(Err(errno_from_code!(-response.error as i16)));
}
}
if response.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0 {
return None;
}
}
Some(Ok(0.into()))
} else {
Some(Err(errno!(ENOSYS)))
}
}
}
}
}
#[derive(Clone, Copy, PartialEq)]
pub enum SeccompAction {
Allow,
Errno(u32),
KillProcess,
KillThread,
Log,
Trap(u32),
Trace,
UserNotif,
}
impl SeccompAction {
pub fn is_action_available(action: u32) -> Result<SyscallResult, Errno> {
if SeccompAction::from_u32(action).is_none() {
return error!(EOPNOTSUPP);
}
Ok(0.into())
}
pub fn from_u32(action: u32) -> Option<SeccompAction> {
match action & !SECCOMP_RET_DATA {
linux_uapi::SECCOMP_RET_ALLOW => Some(Self::Allow),
linux_uapi::SECCOMP_RET_ERRNO => {
let mut action = action & SECCOMP_RET_DATA;
// Linux kernel compatibility: if errno exceeds 0xfff, it is capped at 0xfff.
action = std::cmp::min(action & 0xffff, 0xfff);
Some(Self::Errno(action))
}
linux_uapi::SECCOMP_RET_KILL_PROCESS => Some(Self::KillProcess),
linux_uapi::SECCOMP_RET_KILL_THREAD => Some(Self::KillThread),
linux_uapi::SECCOMP_RET_LOG => Some(Self::Log),
linux_uapi::SECCOMP_RET_TRACE => Some(Self::Trace),
linux_uapi::SECCOMP_RET_TRAP => Some(Self::Trap(action & SECCOMP_RET_DATA)),
linux_uapi::SECCOMP_RET_USER_NOTIF => Some(Self::UserNotif),
_ => None,
}
}
pub fn to_isize(self) -> isize {
match self {
Self::Allow => linux_uapi::SECCOMP_RET_ALLOW as isize,
Self::Errno(x) => (linux_uapi::SECCOMP_RET_ERRNO | x) as isize,
Self::KillProcess => linux_uapi::SECCOMP_RET_KILL_PROCESS as isize,
Self::KillThread => linux_uapi::SECCOMP_RET_KILL_THREAD as isize,
Self::Log => linux_uapi::SECCOMP_RET_LOG as isize,
Self::Trace => linux_uapi::SECCOMP_RET_TRACE as isize,
Self::Trap(x) => (linux_uapi::SECCOMP_RET_TRAP | x) as isize,
Self::UserNotif => linux_uapi::SECCOMP_RET_USER_NOTIF as isize,
}
}
pub fn canonical_name(self) -> &'static str {
match self {
Self::Allow => &"allow",
Self::Errno(_) => &"errno",
Self::KillProcess => &"kill_process",
Self::KillThread => &"kill_thread",
Self::Log => &"log",
Self::Trace => &"trace",
Self::Trap(_) => &"trap",
Self::UserNotif => &"user_notif",
}
}
pub fn has_prio(a: &SeccompAction, b: &SeccompAction) -> std::cmp::Ordering {
let anum = a.to_isize() as i32;
let bnum = b.to_isize() as i32;
let fullnum = SECCOMP_RET_ACTION_FULL as i32;
let aval = anum & fullnum;
let bval = bnum & fullnum;
aval.cmp(&bval)
}
/// Returns a vector of all available actions, sorted by priority.
pub fn all_actions() -> Vec<SeccompAction> {
let mut result = vec![
Self::Allow,
Self::Errno(0),
Self::KillProcess,
Self::KillThread,
Self::Log,
Self::Trace,
Self::Trap(0),
Self::UserNotif,
];
result.sort_by(Self::has_prio);
result
}
/// Gets the contents of /proc/sys/kernel/seccomp/actions_avail
pub fn get_actions_avail_file() -> Vec<u8> {
let all_actions = Self::all_actions();
if all_actions.len() == 0 {
return vec![];
}
let mut result = String::from(all_actions[0].canonical_name());
for i in 1..all_actions.len() {
result.push_str(" ");
result.push_str(all_actions[i].canonical_name());
}
result.push('\n');
result.into_bytes()
}
fn logged_bit_offset(&self) -> u32 {
match self {
Self::Allow => 1,
Self::Errno(_) => 2,
Self::KillProcess => 3,
Self::KillThread => 4,
Self::Log => 5,
Self::Trace => 6,
Self::Trap(_) => 7,
Self::UserNotif => 8,
}
}
fn set_logged_bit(&self, dst: &mut u16) {
*dst |= 1 << self.logged_bit_offset();
}
pub fn is_logged(&self, kernel: &Arc<Kernel>, filter_flag: bool) -> bool {
if kernel.actions_logged.load(Ordering::Relaxed) & (1 << self.logged_bit_offset()) != 0 {
match self {
// Per the documentation on audit logging of seccomp actions in
// seccomp(2), just because it is listed as logged, that doesn't
// mean we actually log it.
// If it is KILL_PROCESS or KILL_THREAD, return true
Self::KillProcess | Self::KillThread => true,
// If it is one of these and the filter flag was set, return true.
Self::Errno(_) | Self::Log | Self::Trap(_) | Self::UserNotif => filter_flag,
// Never log ALLOW
_ => false,
}
} else {
false
}
}
pub fn set_actions_logged(kernel: &Arc<Kernel>, data: &[u8]) -> Result<(), Errno> {
let mut new_actions_logged: u16 = 0;
for action_res in data.fields_with(|c| c.is_ascii_whitespace()).collect::<Vec<_>>() {
if let Ok(action) = action_res.to_str() {
match action {
"errno" => Self::Errno(0).set_logged_bit(&mut new_actions_logged),
"kill_process" => Self::KillProcess.set_logged_bit(&mut new_actions_logged),
"kill_thread" => Self::KillThread.set_logged_bit(&mut new_actions_logged),
"log" => Self::Log.set_logged_bit(&mut new_actions_logged),
"trace" => Self::Trace.set_logged_bit(&mut new_actions_logged),
"trap" => Self::Trap(0).set_logged_bit(&mut new_actions_logged),
"user_notif" => Self::UserNotif.set_logged_bit(&mut new_actions_logged),
// Not allowed to write anything other than the approved actions to that list.
_ => return Err(errno!(EINVAL)),
}
} else {
return Err(errno!(EINVAL));
}
}
kernel.actions_logged.store(new_actions_logged, Ordering::Relaxed);
Ok(())
}
pub fn get_actions_logged(kernel: &Arc<Kernel>) -> Vec<u8> {
let al = kernel.actions_logged.load(Ordering::Relaxed);
let mut result: String = "".to_string();
for action in Self::all_actions() {
if (al & (1 << action.logged_bit_offset())) != 0 {
result.push_str(action.canonical_name());
result.push(' ');
}
}
if !result.is_empty() {
// remove trailing whitespace.
result.pop();
}
result.into_bytes()
}
}
/// This struct contains data that needs to be shuttled back and forth between the thread doing
/// a USER_NOTIF and the supervisor thread responding to it.
#[derive(Default)]
struct SeccompNotification {
/// notif is the notification set by the filter. When this is set, the associated fd will
/// be set to POLLIN.
notif: seccomp_notif,
/// Consumed indicates whether a supervisor process has read this notification (and so it
/// can no longer be consumed by any other SECCOMP_IOCTL_NOTIF_RECV ioctl). When the notif
/// is consumed, the associated fd will be set to POLLOUT, indicating that it is ready to
/// receive a response.
consumed: bool,
/// resp is the response that the supervisor sends. When this is set, an event will be sent
/// to SeccompNotifiers::waiters corresponding to the unique id of the notification. This
/// will wake up the filter that is waiting for this particular response.
resp: Option<seccomp_notif_resp>,
}
impl SeccompNotification {
fn new(data: seccomp_notif) -> SeccompNotification {
SeccompNotification { notif: data, resp: None, consumed: false }
}
}
/// The underlying implementation of the file descriptor that connects a process that triggers a
/// SECCOMP_RET_USER_NOTIF with the monitoring process. This support seccomp's ability to notify a
/// user-space process on specific syscall triggers. See seccomp_unotify(2) for the semantics.
pub struct SeccompNotifier {
waiters: WaitQueue,
pending_notifications: HashMap<u64, SeccompNotification>,
// This keeps track of the number of threads using this notifier as a filter. If that hits
// zero, the listeners need to receive a HUP.
num_active_threads: u64,
// notifiers are referenced both by fds and in SeccompFilterContainer. If the file no longer
// has fds referring to it, it will be closed, and the SeccompFilterContainers should stop
// using it.
pub is_closed: bool,
}
pub type SeccompNotifierHandle = Arc<Mutex<SeccompNotifier>>;
impl SeccompNotifier {
pub fn new() -> SeccompNotifierHandle {
Arc::new(Mutex::new(SeccompNotifier {
waiters: WaitQueue::default(),
pending_notifications: HashMap::default(),
num_active_threads: 0,
is_closed: false,
}))
}
fn add_thread(&mut self) {
self.num_active_threads += 1;
}
fn remove_thread(&mut self) {
self.num_active_threads -= 1;
if self.num_active_threads == 0 {
self.waiters.notify_fd_events(FdEvents::POLLHUP);
}
}
// Creates a pending notification for communication between the
// target thread and a supervisor, and notifies readers there is
// an opportunity to read.
fn create_notification(&mut self, cookie: u64, notif: seccomp_notif) {
self.pending_notifications.insert(cookie, SeccompNotification::new(notif));
self.waiters.notify_fd_events(FdEvents::POLLIN | FdEvents::POLLRDNORM);
}
// Gets a notification that needs to be handled by a supervisor,
// and notifies waiters that there is an opportunity to write.
fn consume_some_notification(&mut self) -> Option<seccomp_notif> {
for (_, notif) in self.pending_notifications.iter_mut() {
if !notif.consumed {
notif.consumed = true;
self.waiters.notify_fd_events(FdEvents::POLLOUT | FdEvents::POLLWRNORM);
return Some(notif.notif);
}
}
None
}
// In case something goes wrong after we consume the notification.
fn unconsume(&mut self, cookie: u64) {
if let Some(n) = self.pending_notifications.get_mut(&cookie).as_mut() {
n.consumed = false;
}
}
// Returns the appropriate notifications if someone is waiting with poll/epoll/select.
fn get_fd_notifications(&self) -> FdEvents {
let mut events = FdEvents::empty();
for (_, notification) in self.pending_notifications.iter() {
if !notification.consumed {
events |= FdEvents::POLLIN | FdEvents::POLLRDNORM;
} else if notification.resp.is_none() {
events |= FdEvents::POLLOUT | FdEvents::POLLWRNORM;
}
}
if self.num_active_threads == 0 {
events |= FdEvents::POLLHUP;
}
events
}
// Sets the value read by the target in response to this notification. Intended for use by the
// supervisor. Notifies the filter there is a response to this request.
fn set_response(&mut self, cookie: u64, resp: seccomp_notif_resp) -> Option<Errno> {
if let Some(entry) = self.pending_notifications.get_mut(&cookie) {
if entry.resp.is_some() {
return Some(errno!(EINPROGRESS));
}
entry.resp = Some(resp);
self.waiters.notify_value(resp.id);
None
} else {
Some(errno!(EINVAL))
}
}
// Gets the value set by the supervisor for the target to read.
fn get_response(&self, cookie: u64) -> Option<seccomp_notif_resp> {
if let Some(value) = self.pending_notifications.get(&cookie) {
return value.resp;
}
None
}
// Returns whether the cookie represents an active notification.
fn notification_pending(&self, cookie: u64) -> bool {
self.pending_notifications.contains_key(&cookie)
}
// Deletes the notification, when the target is done processing it.
fn delete_notification(&mut self, cookie: u64) {
let _ = self.pending_notifications.remove(&cookie);
}
}
struct SeccompNotifierFileObject {
notifier: SeccompNotifierHandle,
}
impl FileOps for SeccompNotifierFileObject {
fileops_impl_nonseekable!();
fn close(&self, _file: &FileObject) {
let mut state = self.notifier.lock();
for (cookie, notification) in state.pending_notifications.iter() {
if !notification.consumed {
state.waiters.notify_value(*cookie);
state.waiters.notify_fd_events(FdEvents::POLLIN | FdEvents::POLLRDNORM);
} else if notification.resp.is_none() {
state.waiters.notify_fd_events(FdEvents::POLLOUT | FdEvents::POLLWRNORM);
}
}
state.waiters.notify_fd_events(FdEvents::POLLHUP);
state.pending_notifications.clear();
state.is_closed = true;
}
fn read(
&self,
_file: &FileObject,
_current_task: &CurrentTask,
_offset: usize,
_usize: &mut dyn OutputBuffer,
) -> Result<usize, Errno> {
Err(errno!(EINVAL))
}
fn write(
&self,
_file: &FileObject,
_current_task: &CurrentTask,
_offset: usize,
_buffer: &mut dyn InputBuffer,
) -> Result<usize, Errno> {
Err(errno!(EINVAL))
}
fn ioctl(
&self,
_file: &FileObject,
current_task: &CurrentTask,
request: u32,
arg: SyscallArg,
) -> Result<SyscallResult, Errno> {
let user_addr = UserAddress::from(arg);
match request {
SECCOMP_IOCTL_NOTIF_RECV => {
if let Ok(notif) = current_task
.mm
.read_memory_to_vec(user_addr, std::mem::size_of::<seccomp_notif>())
{
for value in notif.iter() {
if *value != 0 {
return error!(EINVAL);
}
}
}
// A RECV reads a notification, optionally waiting for one to become available.
let mut notif: Option<seccomp_notif>;
loop {
// Grab a notification or wait for one to become readable.
let waiter = Waiter::new();
{
let mut notifier = self.notifier.lock();
notif = notifier.consume_some_notification();
if notif.is_some() {
break;
}
notifier.waiters.wait_async_fd_events(
&waiter,
FdEvents::POLLIN | FdEvents::POLLHUP,
EventHandler::None,
);
}
waiter.wait(current_task)?;
}
if let Some(notif) = notif {
if let Err(e) = current_task
.mm
.write_object(UserRef::<seccomp_notif>::new(user_addr), &notif)
{
self.notifier.lock().unconsume(notif.id);
return Err(e);
}
}
Ok(0.into())
}
SECCOMP_IOCTL_NOTIF_SEND => {
// A SEND sends a response to a previously received notification.
let resp: seccomp_notif_resp =
current_task.mm.read_object(UserRef::new(user_addr))?;
if resp.flags & !SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0 {
return error!(EINVAL);
}
if resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0
&& (resp.error != 0 || resp.val != 0)
{
return error!(EINVAL);
}
{
let mut notifier = self.notifier.lock();
if let Some(err) = notifier.set_response(resp.id, resp) {
return Err(err);
}
}
Ok(0.into())
}
SECCOMP_IOCTL_NOTIF_ID_VALID => {
// An ID_VALID indicates that the notification is still in progress.
let cookie: u64 = current_task.mm.read_object(UserRef::new(user_addr))?;
{
let notifier = self.notifier.lock();
if notifier.notification_pending(cookie) {
Ok(0.into())
} else {
error!(ENOENT)
}
}
}
SECCOMP_IOCTL_NOTIF_ADDFD => error!(EINVAL),
_ => error!(EINVAL),
}
}
fn wait_async(
&self,
_file: &FileObject,
_current_task: &CurrentTask,
waiter: &Waiter,
events: FdEvents,
handler: EventHandler,
) -> Option<WaitCanceler> {
let notifier = self.notifier.lock();
Some(notifier.waiters.wait_async_fd_events(waiter, events, handler))
}
fn query_events(
&self,
_file: &FileObject,
_current_task: &CurrentTask,
) -> Result<FdEvents, Errno> {
Ok(self.notifier.lock().get_fd_notifications())
}
}
#[cfg(test)]
mod test {
use crate::task::SeccompAction;
use crate::testing::create_kernel_and_task;
#[::fuchsia::test]
async fn test_actions_logged_accepts_legal_string() {
let (kernel, _) = create_kernel_and_task();
let mut actions = SeccompAction::get_actions_avail_file();
// This is a test in Rust instead of a syscall test because we don't want to change the
// global config in a test.
assert!(
SeccompAction::set_actions_logged(&kernel, &actions[..]).is_err(),
"Should not be able to write allow to actions_logged file"
);
let action_string = std::string::String::from_utf8(actions.clone()).unwrap();
if let Some(action_index) = action_string.find("allow") {
actions.drain(action_index..action_index + "allow".len());
}
let write_result = SeccompAction::set_actions_logged(&kernel, &actions[..]);
assert!(
write_result.is_ok(),
"Could not write legal string \"{}\" to actions_logged file: error {}",
std::string::String::from_utf8(actions.clone()).unwrap(),
write_result.unwrap_err()
);
}
}