blob: c258be78f76470234752132f37370b2939987f38 [file] [log] [blame]
use crate::helpers::check_min_arg_count;
use crate::*;
/// Implementation of the SYS_futex syscall.
/// `args` is the arguments *including* the syscall number.
pub fn futex<'tcx>(
this: &mut MiriInterpCx<'tcx>,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx> {
// The amount of arguments used depends on the type of futex operation.
// The full futex syscall takes six arguments (excluding the syscall
// number), which is also the maximum amount of arguments a linux syscall
// can take on most architectures.
// However, not all futex operations use all six arguments. The unused ones
// may or may not be left out from the `syscall()` call.
// Therefore we don't use `check_arg_count` here, but only check for the
// number of arguments to fall within a range.
let [_, addr, op, val] = check_min_arg_count("`syscall(SYS_futex, ...)`", args)?;
// The first three arguments (after the syscall number itself) are the same to all futex operations:
// (int *addr, int op, int val).
// We checked above that these definitely exist.
let addr = this.read_pointer(addr)?;
let op = this.read_scalar(op)?.to_i32()?;
let val = this.read_scalar(val)?.to_i32()?;
// This is a vararg function so we have to bring our own type for this pointer.
let addr = this.ptr_to_mplace(addr, this.machine.layouts.i32);
let addr_usize = addr.ptr().addr().bytes();
let futex_private = this.eval_libc_i32("FUTEX_PRIVATE_FLAG");
let futex_wait = this.eval_libc_i32("FUTEX_WAIT");
let futex_wait_bitset = this.eval_libc_i32("FUTEX_WAIT_BITSET");
let futex_wake = this.eval_libc_i32("FUTEX_WAKE");
let futex_wake_bitset = this.eval_libc_i32("FUTEX_WAKE_BITSET");
let futex_realtime = this.eval_libc_i32("FUTEX_CLOCK_REALTIME");
// FUTEX_PRIVATE enables an optimization that stops it from working across processes.
// Miri doesn't support that anyway, so we ignore that flag.
match op & !futex_private {
// FUTEX_WAIT: (int *addr, int op = FUTEX_WAIT, int val, const timespec *timeout)
// Blocks the thread if *addr still equals val. Wakes up when FUTEX_WAKE is called on the same address,
// or *timeout expires. `timeout == null` for an infinite timeout.
//
// FUTEX_WAIT_BITSET: (int *addr, int op = FUTEX_WAIT_BITSET, int val, const timespec *timeout, int *_ignored, unsigned int bitset)
// This is identical to FUTEX_WAIT, except:
// - The timeout is absolute rather than relative.
// - You can specify the bitset to selecting what WAKE operations to respond to.
op if op & !futex_realtime == futex_wait || op & !futex_realtime == futex_wait_bitset => {
let wait_bitset = op & !futex_realtime == futex_wait_bitset;
let (timeout, bitset) = if wait_bitset {
let [_, _, _, _, timeout, uaddr2, bitset] =
check_min_arg_count("`syscall(SYS_futex, FUTEX_WAIT_BITSET, ...)`", args)?;
let _timeout = this.read_pointer(timeout)?;
let _uaddr2 = this.read_pointer(uaddr2)?;
(timeout, this.read_scalar(bitset)?.to_u32()?)
} else {
let [_, _, _, _, timeout] =
check_min_arg_count("`syscall(SYS_futex, FUTEX_WAIT, ...)`", args)?;
(timeout, u32::MAX)
};
if bitset == 0 {
this.set_last_error_and_return(LibcError("EINVAL"), dest)?;
return interp_ok(());
}
let timeout = this.deref_pointer_as(timeout, this.libc_ty_layout("timespec"))?;
let timeout = if this.ptr_is_null(timeout.ptr())? {
None
} else {
let duration = match this.read_timespec(&timeout)? {
Some(duration) => duration,
None => {
return this.set_last_error_and_return(LibcError("EINVAL"), dest);
}
};
let timeout_clock = if op & futex_realtime == futex_realtime {
this.check_no_isolation(
"`futex` syscall with `op=FUTEX_WAIT` and non-null timeout with `FUTEX_CLOCK_REALTIME`",
)?;
TimeoutClock::RealTime
} else {
TimeoutClock::Monotonic
};
let timeout_anchor = if wait_bitset {
// FUTEX_WAIT_BITSET uses an absolute timestamp.
TimeoutAnchor::Absolute
} else {
// FUTEX_WAIT uses a relative timestamp.
TimeoutAnchor::Relative
};
Some((timeout_clock, timeout_anchor, duration))
};
// There may be a concurrent thread changing the value of addr
// and then invoking the FUTEX_WAKE syscall. It is critical that the
// effects of this and the other thread are correctly observed,
// otherwise we will deadlock.
//
// There are two scenarios to consider:
// 1. If we (FUTEX_WAIT) execute first, we'll push ourselves into
// the waiters queue and go to sleep. They (addr write & FUTEX_WAKE)
// will see us in the queue and wake us up.
// 2. If they (addr write & FUTEX_WAKE) execute first, we must observe
// addr's new value. If we see an outdated value that happens to equal
// the expected val, then we'll put ourselves to sleep with no one to wake us
// up, so we end up with a deadlock. This is prevented by having a SeqCst
// fence inside FUTEX_WAKE syscall, and another SeqCst fence
// below, the atomic read on addr after the SeqCst fence is guaranteed
// not to see any value older than the addr write immediately before
// calling FUTEX_WAKE. We'll see futex_val != val and return without
// sleeping.
//
// Note that the fences do not create any happens-before relationship.
// The read sees the write immediately before the fence not because
// one happens after the other, but is instead due to a guarantee unique
// to SeqCst fences that restricts what an atomic read placed AFTER the
// fence can see. The read still has to be atomic, otherwise it's a data
// race. This guarantee cannot be achieved with acquire-release fences
// since they only talk about reads placed BEFORE a fence - and places
// no restrictions on what the read itself can see, only that there is
// a happens-before between the fences IF the read happens to see the
// right value. This is useless to us, since we need the read itself
// to see an up-to-date value.
//
// The above case distinction is valid since both FUTEX_WAIT and FUTEX_WAKE
// contain a SeqCst fence, therefore inducing a total order between the operations.
// It is also critical that the fence, the atomic load, and the comparison in FUTEX_WAIT
// altogether happen atomically. If the other thread's fence in FUTEX_WAKE
// gets interleaved after our fence, then we lose the guarantee on the
// atomic load being up-to-date; if the other thread's write on addr and FUTEX_WAKE
// call are interleaved after the load but before the comparison, then we get a TOCTOU
// race condition, and go to sleep thinking the other thread will wake us up,
// even though they have already finished.
//
// Thankfully, preemptions cannot happen inside a Miri shim, so we do not need to
// do anything special to guarantee fence-load-comparison atomicity.
this.atomic_fence(AtomicFenceOrd::SeqCst)?;
// Read an `i32` through the pointer, regardless of any wrapper types.
// It's not uncommon for `addr` to be passed as another type than `*mut i32`, such as `*const AtomicI32`.
let futex_val = this.read_scalar_atomic(&addr, AtomicReadOrd::Relaxed)?.to_i32()?;
if val == futex_val {
// The value still matches, so we block the thread and make it wait for FUTEX_WAKE.
this.futex_wait(
addr_usize,
bitset,
timeout,
Scalar::from_target_isize(0, this), // retval_succ
Scalar::from_target_isize(-1, this), // retval_timeout
dest.clone(),
this.eval_libc("ETIMEDOUT"), // errno_timeout
);
} else {
// The futex value doesn't match the expected value, so we return failure
// right away without sleeping: -1 and errno set to EAGAIN.
return this.set_last_error_and_return(LibcError("EAGAIN"), dest);
}
}
// FUTEX_WAKE: (int *addr, int op = FUTEX_WAKE, int val)
// Wakes at most `val` threads waiting on the futex at `addr`.
// Returns the amount of threads woken up.
// Does not access the futex value at *addr.
// FUTEX_WAKE_BITSET: (int *addr, int op = FUTEX_WAKE, int val, const timespect *_unused, int *_unused, unsigned int bitset)
// Same as FUTEX_WAKE, but allows you to specify a bitset to select which threads to wake up.
op if op == futex_wake || op == futex_wake_bitset => {
let bitset = if op == futex_wake_bitset {
let [_, _, _, _, timeout, uaddr2, bitset] =
check_min_arg_count("`syscall(SYS_futex, FUTEX_WAKE_BITSET, ...)`", args)?;
let _timeout = this.read_pointer(timeout)?;
let _uaddr2 = this.read_pointer(uaddr2)?;
this.read_scalar(bitset)?.to_u32()?
} else {
u32::MAX
};
if bitset == 0 {
return this.set_last_error_and_return(LibcError("EINVAL"), dest);
}
// Together with the SeqCst fence in futex_wait, this makes sure that futex_wait
// will see the latest value on addr which could be changed by our caller
// before doing the syscall.
this.atomic_fence(AtomicFenceOrd::SeqCst)?;
let mut n = 0;
#[allow(clippy::arithmetic_side_effects)]
for _ in 0..val {
if this.futex_wake(addr_usize, bitset)? {
n += 1;
} else {
break;
}
}
this.write_scalar(Scalar::from_target_isize(n, this), dest)?;
}
op => throw_unsup_format!("Miri does not support `futex` syscall with op={}", op),
}
interp_ok(())
}