blob: 76c68add8cdc9240209aa5c6b4507e26560739c9 [file] [log] [blame]
//! This module is responsible for managing the absolute addresses that allocations are located at,
//! and for casting between pointers and integers based on those addresses.
mod reuse_pool;
use std::cell::RefCell;
use std::cmp::max;
use std::collections::hash_map::Entry;
use rand::Rng;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_span::Span;
use rustc_target::abi::{Align, Size};
use crate::{concurrency::VClock, *};
use self::reuse_pool::ReusePool;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum ProvenanceMode {
/// We support `expose_provenance`/`with_exposed_provenance` via "wildcard" provenance.
/// However, we warn on `with_exposed_provenance` to alert the user of the precision loss.
Default,
/// Like `Default`, but without the warning.
Permissive,
/// We error on `with_exposed_provenance`, ensuring no precision loss.
Strict,
}
pub type GlobalState = RefCell<GlobalStateInner>;
#[derive(Debug)]
pub struct GlobalStateInner {
/// This is used as a map between the address of each allocation and its `AllocId`. It is always
/// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
/// from the base address, and we need to find the `AllocId` it belongs to. This is not the
/// *full* inverse of `base_addr`; dead allocations have been removed.
int_to_ptr_map: Vec<(u64, AllocId)>,
/// The base address for each allocation. We cannot put that into
/// `AllocExtra` because function pointers also have a base address, and
/// they do not have an `AllocExtra`.
/// This is the inverse of `int_to_ptr_map`.
base_addr: FxHashMap<AllocId, u64>,
/// Temporarily store prepared memory space for global allocations the first time their memory
/// address is required. This is used to ensure that the memory is allocated before Miri assigns
/// it an internal address, which is important for matching the internal address to the machine
/// address so FFI can read from pointers.
prepared_alloc_bytes: FxHashMap<AllocId, MiriAllocBytes>,
/// A pool of addresses we can reuse for future allocations.
reuse: ReusePool,
/// Whether an allocation has been exposed or not. This cannot be put
/// into `AllocExtra` for the same reason as `base_addr`.
exposed: FxHashSet<AllocId>,
/// This is used as a memory address when a new pointer is casted to an integer. It
/// is always larger than any address that was previously made part of a block.
next_base_addr: u64,
/// The provenance to use for int2ptr casts
provenance_mode: ProvenanceMode,
}
impl VisitProvenance for GlobalStateInner {
fn visit_provenance(&self, _visit: &mut VisitWith<'_>) {
let GlobalStateInner {
int_to_ptr_map: _,
base_addr: _,
prepared_alloc_bytes: _,
reuse: _,
exposed: _,
next_base_addr: _,
provenance_mode: _,
} = self;
// Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them.
// int_to_ptr_map and exposed must contain only live allocations, and those
// are never garbage collected.
// base_addr is only relevant if we have a pointer to an AllocId and need to look up its
// base address; so if an AllocId is not reachable from somewhere else we can remove it
// here.
}
}
impl GlobalStateInner {
pub fn new(config: &MiriConfig, stack_addr: u64) -> Self {
GlobalStateInner {
int_to_ptr_map: Vec::default(),
base_addr: FxHashMap::default(),
prepared_alloc_bytes: FxHashMap::default(),
reuse: ReusePool::new(config),
exposed: FxHashSet::default(),
next_base_addr: stack_addr,
provenance_mode: config.provenance_mode,
}
}
pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_>) {
// `exposed` and `int_to_ptr_map` are cleared immediately when an allocation
// is freed, so `base_addr` is the only one we have to clean up based on the GC.
self.base_addr.retain(|id, _| allocs.is_live(*id));
}
}
/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
/// of `align` that is larger or equal to `addr`
fn align_addr(addr: u64, align: u64) -> u64 {
match addr % align {
0 => addr,
rem => addr.strict_add(align) - rem,
}
}
impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
// Returns the exposed `AllocId` that corresponds to the specified addr,
// or `None` if the addr is out of bounds
fn alloc_id_from_addr(&self, addr: u64, size: i64) -> Option<AllocId> {
let ecx = self.eval_context_ref();
let global_state = ecx.machine.alloc_addresses.borrow();
assert!(global_state.provenance_mode != ProvenanceMode::Strict);
// We always search the allocation to the right of this address. So if the size is structly
// negative, we have to search for `addr-1` instead.
let addr = if size >= 0 { addr } else { addr.saturating_sub(1) };
let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr);
// Determine the in-bounds provenance for this pointer.
let alloc_id = match pos {
Ok(pos) => Some(global_state.int_to_ptr_map[pos].1),
Err(0) => None,
Err(pos) => {
// This is the largest of the addresses smaller than `int`,
// i.e. the greatest lower bound (glb)
let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
// This never overflows because `addr >= glb`
let offset = addr - glb;
// We require this to be strict in-bounds of the allocation. This arm is only
// entered for addresses that are not the base address, so even zero-sized
// allocations will get recognized at their base address -- but all other
// allocations will *not* be recognized at their "end" address.
let size = ecx.get_alloc_info(alloc_id).0;
if offset < size.bytes() { Some(alloc_id) } else { None }
}
}?;
// We only use this provenance if it has been exposed.
if global_state.exposed.contains(&alloc_id) {
// This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
debug_assert!(ecx.is_alloc_live(alloc_id));
Some(alloc_id)
} else {
None
}
}
fn addr_from_alloc_id(
&self,
alloc_id: AllocId,
memory_kind: MemoryKind,
) -> InterpResult<'tcx, u64> {
let ecx = self.eval_context_ref();
let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
let global_state = &mut *global_state;
Ok(match global_state.base_addr.entry(alloc_id) {
Entry::Occupied(entry) => *entry.get(),
Entry::Vacant(entry) => {
let mut rng = ecx.machine.rng.borrow_mut();
let (size, align, kind) = ecx.get_alloc_info(alloc_id);
// This is either called immediately after allocation (and then cached), or when
// adjusting `tcx` pointers (which never get freed). So assert that we are looking
// at a live allocation. This also ensures that we never re-assign an address to an
// allocation that previously had an address, but then was freed and the address
// information was removed.
assert!(!matches!(kind, AllocKind::Dead));
// This allocation does not have a base address yet, pick or reuse one.
let base_addr = if ecx.machine.native_lib.is_some() {
// In native lib mode, we use the "real" address of the bytes for this allocation.
// This ensures the interpreted program and native code have the same view of memory.
match kind {
AllocKind::LiveData => {
let ptr = if ecx.tcx.try_get_global_alloc(alloc_id).is_some() {
// For new global allocations, we always pre-allocate the memory to be able use the machine address directly.
let prepared_bytes = MiriAllocBytes::zeroed(size, align)
.unwrap_or_else(|| {
panic!("Miri ran out of memory: cannot create allocation of {size:?} bytes")
});
let ptr = prepared_bytes.as_ptr();
// Store prepared allocation space to be picked up for use later.
global_state.prepared_alloc_bytes.try_insert(alloc_id, prepared_bytes).unwrap();
ptr
} else {
ecx.get_alloc_bytes_unchecked_raw(alloc_id)?
};
// Ensure this pointer's provenance is exposed, so that it can be used by FFI code.
ptr.expose_provenance().try_into().unwrap()
}
AllocKind::Function | AllocKind::VTable => {
// Allocate some dummy memory to get a unique address for this function/vtable.
let alloc_bytes = MiriAllocBytes::from_bytes(&[0u8; 1], Align::from_bytes(1).unwrap());
// We don't need to expose these bytes as nobody is allowed to access them.
let addr = alloc_bytes.as_ptr().addr().try_into().unwrap();
// Leak the underlying memory to ensure it remains unique.
std::mem::forget(alloc_bytes);
addr
}
AllocKind::Dead => unreachable!()
}
} else if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
&mut *rng,
size,
align,
memory_kind,
ecx.active_thread(),
) {
if let Some(clock) = clock {
ecx.acquire_clock(&clock);
}
reuse_addr
} else {
// We have to pick a fresh address.
// Leave some space to the previous allocation, to give it some chance to be less aligned.
// We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
let slack = rng.gen_range(0..16);
// From next_base_addr + slack, round up to adjust for alignment.
let base_addr = global_state
.next_base_addr
.checked_add(slack)
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
let base_addr = align_addr(base_addr, align.bytes());
// Remember next base address. If this allocation is zero-sized, leave a gap
// of at least 1 to avoid two allocations having the same base address.
// (The logic in `alloc_id_from_addr` assumes unique addresses, and different
// function/vtable pointers need to be distinguishable!)
global_state.next_base_addr = base_addr
.checked_add(max(size.bytes(), 1))
.ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
// Even if `Size` didn't overflow, we might still have filled up the address space.
if global_state.next_base_addr > ecx.target_usize_max() {
throw_exhaust!(AddressSpaceFull);
}
base_addr
};
trace!(
"Assigning base address {:#x} to allocation {:?} (size: {}, align: {})",
base_addr,
alloc_id,
size.bytes(),
align.bytes(),
);
// Store address in cache.
entry.insert(base_addr);
// Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted.
// We have a fast-path for the common case that this address is bigger than all previous ones.
let pos = if global_state
.int_to_ptr_map
.last()
.is_some_and(|(last_addr, _)| *last_addr < base_addr)
{
global_state.int_to_ptr_map.len()
} else {
global_state
.int_to_ptr_map
.binary_search_by_key(&base_addr, |(addr, _)| *addr)
.unwrap_err()
};
global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id));
base_addr
}
})
}
}
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let ecx = self.eval_context_mut();
let global_state = ecx.machine.alloc_addresses.get_mut();
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
if global_state.provenance_mode == ProvenanceMode::Strict {
return Ok(());
}
// Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation
// via int2ptr.
if !ecx.is_alloc_live(alloc_id) {
return Ok(());
}
trace!("Exposing allocation id {alloc_id:?}");
let global_state = ecx.machine.alloc_addresses.get_mut();
global_state.exposed.insert(alloc_id);
if ecx.machine.borrow_tracker.is_some() {
ecx.expose_tag(alloc_id, tag)?;
}
Ok(())
}
fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> {
trace!("Casting {:#x} to a pointer", addr);
let ecx = self.eval_context_ref();
let global_state = ecx.machine.alloc_addresses.borrow();
// Potentially emit a warning.
match global_state.provenance_mode {
ProvenanceMode::Default => {
// The first time this happens at a particular location, print a warning.
thread_local! {
// `Span` is non-`Send`, so we use a thread-local instead.
static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default();
}
PAST_WARNINGS.with_borrow_mut(|past_warnings| {
let first = past_warnings.is_empty();
if past_warnings.insert(ecx.cur_span()) {
// Newly inserted, so first time we see this span.
ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first });
}
});
}
ProvenanceMode::Strict => {
throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance);
}
ProvenanceMode::Permissive => {}
}
// We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is
// completely legal to do a cast and then `wrapping_offset` to another allocation and only
// *then* do a memory access. So the allocation that the pointer happens to point to on a
// cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that
// *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed)
// allocation it might be referencing.
Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
}
/// Convert a relative (tcx) pointer to a Miri pointer.
fn adjust_alloc_root_pointer(
&self,
ptr: interpret::Pointer<CtfeProvenance>,
tag: BorTag,
kind: MemoryKind,
) -> InterpResult<'tcx, interpret::Pointer<Provenance>> {
let ecx = self.eval_context_ref();
let (prov, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
let alloc_id = prov.alloc_id();
// Get a pointer to the beginning of this allocation.
let base_addr = ecx.addr_from_alloc_id(alloc_id, kind)?;
let base_ptr = interpret::Pointer::new(
Provenance::Concrete { alloc_id, tag },
Size::from_bytes(base_addr),
);
// Add offset with the right kind of pointer-overflowing arithmetic.
Ok(base_ptr.wrapping_offset(offset, ecx))
}
// This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved
// memory space in the past, or by doing the pre-allocation right upon being called.
fn get_global_alloc_bytes(&self, id: AllocId, kind: MemoryKind, bytes: &[u8], align: Align) -> InterpResult<'tcx, MiriAllocBytes> {
let ecx = self.eval_context_ref();
Ok(if ecx.machine.native_lib.is_some() {
// In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`.
// This additional call ensures that some `MiriAllocBytes` are always prepared.
ecx.addr_from_alloc_id(id, kind)?;
let mut global_state = ecx.machine.alloc_addresses.borrow_mut();
// The memory we need here will have already been allocated during an earlier call to
// `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead
// fetch the previously prepared bytes from `prepared_alloc_bytes`.
let mut prepared_alloc_bytes = global_state
.prepared_alloc_bytes
.remove(&id)
.unwrap_or_else(|| panic!("alloc bytes for {id:?} have not been prepared"));
// Sanity-check that the prepared allocation has the right size and alignment.
assert!(prepared_alloc_bytes.as_ptr().is_aligned_to(align.bytes_usize()));
assert_eq!(prepared_alloc_bytes.len(), bytes.len());
// Copy allocation contents into prepared memory.
prepared_alloc_bytes.copy_from_slice(bytes);
prepared_alloc_bytes
} else {
MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(&*bytes), align)
})
}
/// When a pointer is used for a memory access, this computes where in which allocation the
/// access is going.
fn ptr_get_alloc(
&self,
ptr: interpret::Pointer<Provenance>,
size: i64,
) -> Option<(AllocId, Size)> {
let ecx = self.eval_context_ref();
let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)
let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
alloc_id
} else {
// A wildcard pointer.
ecx.alloc_id_from_addr(addr.bytes(), size)?
};
// This cannot fail: since we already have a pointer with that provenance, adjust_alloc_root_pointer
// must have been called in the past, so we can just look up the address in the map.
let base_addr = *ecx.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap();
// Wrapping "addr - base_addr"
let rel_offset = ecx.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr));
Some((alloc_id, Size::from_bytes(rel_offset)))
}
}
impl<'tcx> MiriMachine<'tcx> {
pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) {
let global_state = self.alloc_addresses.get_mut();
let rng = self.rng.get_mut();
// We can *not* remove this from `base_addr`, since the interpreter design requires that we
// be able to retrieve an AllocId + offset for any memory access *before* we check if the
// access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
// access to determine the allocation ID and offset -- and there can still be pointers with
// `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
// `None` only if the pointer truly has no provenance (this ensures consistent error
// messages).
// However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
// can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
// returns a dead allocation.
// To avoid a linear scan we first look up the address in `base_addr`, and then find it in
// `int_to_ptr_map`.
let addr = *global_state.base_addr.get(&dead_id).unwrap();
let pos =
global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
let removed = global_state.int_to_ptr_map.remove(pos);
assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
// We can also remove it from `exposed`, since this allocation can anyway not be returned by
// `alloc_id_from_addr` any more.
global_state.exposed.remove(&dead_id);
// Also remember this address for future reuse.
let thread = self.threads.active_thread();
global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || {
if let Some(data_race) = &self.data_race {
data_race.release_clock(&self.threads).clone()
} else {
VClock::default()
}
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_align_addr() {
assert_eq!(align_addr(37, 4), 40);
assert_eq!(align_addr(44, 4), 44);
}
}