| //! This module is responsible for managing the absolute addresses that allocations are located at, |
| //! and for casting between pointers and integers based on those addresses. |
| |
| mod reuse_pool; |
| |
| use std::cell::RefCell; |
| use std::cmp::max; |
| use std::collections::hash_map::Entry; |
| |
| use rand::Rng; |
| |
| use rustc_data_structures::fx::{FxHashMap, FxHashSet}; |
| use rustc_span::Span; |
| use rustc_target::abi::{Align, Size}; |
| |
| use crate::{concurrency::VClock, *}; |
| |
| use self::reuse_pool::ReusePool; |
| |
| #[derive(Copy, Clone, Debug, PartialEq, Eq)] |
| pub enum ProvenanceMode { |
| /// We support `expose_provenance`/`with_exposed_provenance` via "wildcard" provenance. |
| /// However, we warn on `with_exposed_provenance` to alert the user of the precision loss. |
| Default, |
| /// Like `Default`, but without the warning. |
| Permissive, |
| /// We error on `with_exposed_provenance`, ensuring no precision loss. |
| Strict, |
| } |
| |
| pub type GlobalState = RefCell<GlobalStateInner>; |
| |
| #[derive(Debug)] |
| pub struct GlobalStateInner { |
| /// This is used as a map between the address of each allocation and its `AllocId`. It is always |
| /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset |
| /// from the base address, and we need to find the `AllocId` it belongs to. This is not the |
| /// *full* inverse of `base_addr`; dead allocations have been removed. |
| int_to_ptr_map: Vec<(u64, AllocId)>, |
| /// The base address for each allocation. We cannot put that into |
| /// `AllocExtra` because function pointers also have a base address, and |
| /// they do not have an `AllocExtra`. |
| /// This is the inverse of `int_to_ptr_map`. |
| base_addr: FxHashMap<AllocId, u64>, |
| /// Temporarily store prepared memory space for global allocations the first time their memory |
| /// address is required. This is used to ensure that the memory is allocated before Miri assigns |
| /// it an internal address, which is important for matching the internal address to the machine |
| /// address so FFI can read from pointers. |
| prepared_alloc_bytes: FxHashMap<AllocId, MiriAllocBytes>, |
| /// A pool of addresses we can reuse for future allocations. |
| reuse: ReusePool, |
| /// Whether an allocation has been exposed or not. This cannot be put |
| /// into `AllocExtra` for the same reason as `base_addr`. |
| exposed: FxHashSet<AllocId>, |
| /// This is used as a memory address when a new pointer is casted to an integer. It |
| /// is always larger than any address that was previously made part of a block. |
| next_base_addr: u64, |
| /// The provenance to use for int2ptr casts |
| provenance_mode: ProvenanceMode, |
| } |
| |
| impl VisitProvenance for GlobalStateInner { |
| fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { |
| let GlobalStateInner { |
| int_to_ptr_map: _, |
| base_addr: _, |
| prepared_alloc_bytes: _, |
| reuse: _, |
| exposed: _, |
| next_base_addr: _, |
| provenance_mode: _, |
| } = self; |
| // Though base_addr, int_to_ptr_map, and exposed contain AllocIds, we do not want to visit them. |
| // int_to_ptr_map and exposed must contain only live allocations, and those |
| // are never garbage collected. |
| // base_addr is only relevant if we have a pointer to an AllocId and need to look up its |
| // base address; so if an AllocId is not reachable from somewhere else we can remove it |
| // here. |
| } |
| } |
| |
| impl GlobalStateInner { |
| pub fn new(config: &MiriConfig, stack_addr: u64) -> Self { |
| GlobalStateInner { |
| int_to_ptr_map: Vec::default(), |
| base_addr: FxHashMap::default(), |
| prepared_alloc_bytes: FxHashMap::default(), |
| reuse: ReusePool::new(config), |
| exposed: FxHashSet::default(), |
| next_base_addr: stack_addr, |
| provenance_mode: config.provenance_mode, |
| } |
| } |
| |
| pub fn remove_unreachable_allocs(&mut self, allocs: &LiveAllocs<'_, '_>) { |
| // `exposed` and `int_to_ptr_map` are cleared immediately when an allocation |
| // is freed, so `base_addr` is the only one we have to clean up based on the GC. |
| self.base_addr.retain(|id, _| allocs.is_live(*id)); |
| } |
| } |
| |
| /// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple |
| /// of `align` that is larger or equal to `addr` |
| fn align_addr(addr: u64, align: u64) -> u64 { |
| match addr % align { |
| 0 => addr, |
| rem => addr.strict_add(align) - rem, |
| } |
| } |
| |
| impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {} |
| trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { |
| // Returns the exposed `AllocId` that corresponds to the specified addr, |
| // or `None` if the addr is out of bounds |
| fn alloc_id_from_addr(&self, addr: u64, size: i64) -> Option<AllocId> { |
| let ecx = self.eval_context_ref(); |
| let global_state = ecx.machine.alloc_addresses.borrow(); |
| assert!(global_state.provenance_mode != ProvenanceMode::Strict); |
| |
| // We always search the allocation to the right of this address. So if the size is structly |
| // negative, we have to search for `addr-1` instead. |
| let addr = if size >= 0 { addr } else { addr.saturating_sub(1) }; |
| let pos = global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr); |
| |
| // Determine the in-bounds provenance for this pointer. |
| let alloc_id = match pos { |
| Ok(pos) => Some(global_state.int_to_ptr_map[pos].1), |
| Err(0) => None, |
| Err(pos) => { |
| // This is the largest of the addresses smaller than `int`, |
| // i.e. the greatest lower bound (glb) |
| let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1]; |
| // This never overflows because `addr >= glb` |
| let offset = addr - glb; |
| // We require this to be strict in-bounds of the allocation. This arm is only |
| // entered for addresses that are not the base address, so even zero-sized |
| // allocations will get recognized at their base address -- but all other |
| // allocations will *not* be recognized at their "end" address. |
| let size = ecx.get_alloc_info(alloc_id).0; |
| if offset < size.bytes() { Some(alloc_id) } else { None } |
| } |
| }?; |
| |
| // We only use this provenance if it has been exposed. |
| if global_state.exposed.contains(&alloc_id) { |
| // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed. |
| debug_assert!(ecx.is_alloc_live(alloc_id)); |
| Some(alloc_id) |
| } else { |
| None |
| } |
| } |
| |
| fn addr_from_alloc_id( |
| &self, |
| alloc_id: AllocId, |
| memory_kind: MemoryKind, |
| ) -> InterpResult<'tcx, u64> { |
| let ecx = self.eval_context_ref(); |
| let mut global_state = ecx.machine.alloc_addresses.borrow_mut(); |
| let global_state = &mut *global_state; |
| |
| Ok(match global_state.base_addr.entry(alloc_id) { |
| Entry::Occupied(entry) => *entry.get(), |
| Entry::Vacant(entry) => { |
| let mut rng = ecx.machine.rng.borrow_mut(); |
| let (size, align, kind) = ecx.get_alloc_info(alloc_id); |
| // This is either called immediately after allocation (and then cached), or when |
| // adjusting `tcx` pointers (which never get freed). So assert that we are looking |
| // at a live allocation. This also ensures that we never re-assign an address to an |
| // allocation that previously had an address, but then was freed and the address |
| // information was removed. |
| assert!(!matches!(kind, AllocKind::Dead)); |
| |
| // This allocation does not have a base address yet, pick or reuse one. |
| let base_addr = if ecx.machine.native_lib.is_some() { |
| // In native lib mode, we use the "real" address of the bytes for this allocation. |
| // This ensures the interpreted program and native code have the same view of memory. |
| match kind { |
| AllocKind::LiveData => { |
| let ptr = if ecx.tcx.try_get_global_alloc(alloc_id).is_some() { |
| // For new global allocations, we always pre-allocate the memory to be able use the machine address directly. |
| let prepared_bytes = MiriAllocBytes::zeroed(size, align) |
| .unwrap_or_else(|| { |
| panic!("Miri ran out of memory: cannot create allocation of {size:?} bytes") |
| }); |
| let ptr = prepared_bytes.as_ptr(); |
| // Store prepared allocation space to be picked up for use later. |
| global_state.prepared_alloc_bytes.try_insert(alloc_id, prepared_bytes).unwrap(); |
| ptr |
| } else { |
| ecx.get_alloc_bytes_unchecked_raw(alloc_id)? |
| }; |
| // Ensure this pointer's provenance is exposed, so that it can be used by FFI code. |
| ptr.expose_provenance().try_into().unwrap() |
| } |
| AllocKind::Function | AllocKind::VTable => { |
| // Allocate some dummy memory to get a unique address for this function/vtable. |
| let alloc_bytes = MiriAllocBytes::from_bytes(&[0u8; 1], Align::from_bytes(1).unwrap()); |
| // We don't need to expose these bytes as nobody is allowed to access them. |
| let addr = alloc_bytes.as_ptr().addr().try_into().unwrap(); |
| // Leak the underlying memory to ensure it remains unique. |
| std::mem::forget(alloc_bytes); |
| addr |
| } |
| AllocKind::Dead => unreachable!() |
| } |
| } else if let Some((reuse_addr, clock)) = global_state.reuse.take_addr( |
| &mut *rng, |
| size, |
| align, |
| memory_kind, |
| ecx.active_thread(), |
| ) { |
| if let Some(clock) = clock { |
| ecx.acquire_clock(&clock); |
| } |
| reuse_addr |
| } else { |
| // We have to pick a fresh address. |
| // Leave some space to the previous allocation, to give it some chance to be less aligned. |
| // We ensure that `(global_state.next_base_addr + slack) % 16` is uniformly distributed. |
| let slack = rng.gen_range(0..16); |
| // From next_base_addr + slack, round up to adjust for alignment. |
| let base_addr = global_state |
| .next_base_addr |
| .checked_add(slack) |
| .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; |
| let base_addr = align_addr(base_addr, align.bytes()); |
| |
| // Remember next base address. If this allocation is zero-sized, leave a gap |
| // of at least 1 to avoid two allocations having the same base address. |
| // (The logic in `alloc_id_from_addr` assumes unique addresses, and different |
| // function/vtable pointers need to be distinguishable!) |
| global_state.next_base_addr = base_addr |
| .checked_add(max(size.bytes(), 1)) |
| .ok_or_else(|| err_exhaust!(AddressSpaceFull))?; |
| // Even if `Size` didn't overflow, we might still have filled up the address space. |
| if global_state.next_base_addr > ecx.target_usize_max() { |
| throw_exhaust!(AddressSpaceFull); |
| } |
| |
| base_addr |
| }; |
| trace!( |
| "Assigning base address {:#x} to allocation {:?} (size: {}, align: {})", |
| base_addr, |
| alloc_id, |
| size.bytes(), |
| align.bytes(), |
| ); |
| |
| // Store address in cache. |
| entry.insert(base_addr); |
| |
| // Also maintain the opposite mapping in `int_to_ptr_map`, ensuring we keep it sorted. |
| // We have a fast-path for the common case that this address is bigger than all previous ones. |
| let pos = if global_state |
| .int_to_ptr_map |
| .last() |
| .is_some_and(|(last_addr, _)| *last_addr < base_addr) |
| { |
| global_state.int_to_ptr_map.len() |
| } else { |
| global_state |
| .int_to_ptr_map |
| .binary_search_by_key(&base_addr, |(addr, _)| *addr) |
| .unwrap_err() |
| }; |
| global_state.int_to_ptr_map.insert(pos, (base_addr, alloc_id)); |
| |
| base_addr |
| } |
| }) |
| } |
| } |
| |
| impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} |
| pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { |
| fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> { |
| let ecx = self.eval_context_mut(); |
| let global_state = ecx.machine.alloc_addresses.get_mut(); |
| // In strict mode, we don't need this, so we can save some cycles by not tracking it. |
| if global_state.provenance_mode == ProvenanceMode::Strict { |
| return Ok(()); |
| } |
| // Exposing a dead alloc is a no-op, because it's not possible to get a dead allocation |
| // via int2ptr. |
| if !ecx.is_alloc_live(alloc_id) { |
| return Ok(()); |
| } |
| trace!("Exposing allocation id {alloc_id:?}"); |
| let global_state = ecx.machine.alloc_addresses.get_mut(); |
| global_state.exposed.insert(alloc_id); |
| if ecx.machine.borrow_tracker.is_some() { |
| ecx.expose_tag(alloc_id, tag)?; |
| } |
| Ok(()) |
| } |
| |
| fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer> { |
| trace!("Casting {:#x} to a pointer", addr); |
| |
| let ecx = self.eval_context_ref(); |
| let global_state = ecx.machine.alloc_addresses.borrow(); |
| |
| // Potentially emit a warning. |
| match global_state.provenance_mode { |
| ProvenanceMode::Default => { |
| // The first time this happens at a particular location, print a warning. |
| thread_local! { |
| // `Span` is non-`Send`, so we use a thread-local instead. |
| static PAST_WARNINGS: RefCell<FxHashSet<Span>> = RefCell::default(); |
| } |
| PAST_WARNINGS.with_borrow_mut(|past_warnings| { |
| let first = past_warnings.is_empty(); |
| if past_warnings.insert(ecx.cur_span()) { |
| // Newly inserted, so first time we see this span. |
| ecx.emit_diagnostic(NonHaltingDiagnostic::Int2Ptr { details: first }); |
| } |
| }); |
| } |
| ProvenanceMode::Strict => { |
| throw_machine_stop!(TerminationInfo::Int2PtrWithStrictProvenance); |
| } |
| ProvenanceMode::Permissive => {} |
| } |
| |
| // We do *not* look up the `AllocId` here! This is a `ptr as usize` cast, and it is |
| // completely legal to do a cast and then `wrapping_offset` to another allocation and only |
| // *then* do a memory access. So the allocation that the pointer happens to point to on a |
| // cast is fairly irrelevant. Instead we generate this as a "wildcard" pointer, such that |
| // *every time the pointer is used*, we do an `AllocId` lookup to find the (exposed) |
| // allocation it might be referencing. |
| Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr))) |
| } |
| |
| /// Convert a relative (tcx) pointer to a Miri pointer. |
| fn adjust_alloc_root_pointer( |
| &self, |
| ptr: interpret::Pointer<CtfeProvenance>, |
| tag: BorTag, |
| kind: MemoryKind, |
| ) -> InterpResult<'tcx, interpret::Pointer<Provenance>> { |
| let ecx = self.eval_context_ref(); |
| |
| let (prov, offset) = ptr.into_parts(); // offset is relative (AllocId provenance) |
| let alloc_id = prov.alloc_id(); |
| |
| // Get a pointer to the beginning of this allocation. |
| let base_addr = ecx.addr_from_alloc_id(alloc_id, kind)?; |
| let base_ptr = interpret::Pointer::new( |
| Provenance::Concrete { alloc_id, tag }, |
| Size::from_bytes(base_addr), |
| ); |
| // Add offset with the right kind of pointer-overflowing arithmetic. |
| Ok(base_ptr.wrapping_offset(offset, ecx)) |
| } |
| |
| // This returns some prepared `MiriAllocBytes`, either because `addr_from_alloc_id` reserved |
| // memory space in the past, or by doing the pre-allocation right upon being called. |
| fn get_global_alloc_bytes(&self, id: AllocId, kind: MemoryKind, bytes: &[u8], align: Align) -> InterpResult<'tcx, MiriAllocBytes> { |
| let ecx = self.eval_context_ref(); |
| Ok(if ecx.machine.native_lib.is_some() { |
| // In native lib mode, MiriAllocBytes for global allocations are handled via `prepared_alloc_bytes`. |
| // This additional call ensures that some `MiriAllocBytes` are always prepared. |
| ecx.addr_from_alloc_id(id, kind)?; |
| let mut global_state = ecx.machine.alloc_addresses.borrow_mut(); |
| // The memory we need here will have already been allocated during an earlier call to |
| // `addr_from_alloc_id` for this allocation. So don't create a new `MiriAllocBytes` here, instead |
| // fetch the previously prepared bytes from `prepared_alloc_bytes`. |
| let mut prepared_alloc_bytes = global_state |
| .prepared_alloc_bytes |
| .remove(&id) |
| .unwrap_or_else(|| panic!("alloc bytes for {id:?} have not been prepared")); |
| // Sanity-check that the prepared allocation has the right size and alignment. |
| assert!(prepared_alloc_bytes.as_ptr().is_aligned_to(align.bytes_usize())); |
| assert_eq!(prepared_alloc_bytes.len(), bytes.len()); |
| // Copy allocation contents into prepared memory. |
| prepared_alloc_bytes.copy_from_slice(bytes); |
| prepared_alloc_bytes |
| } else { |
| MiriAllocBytes::from_bytes(std::borrow::Cow::Borrowed(&*bytes), align) |
| }) |
| } |
| |
| /// When a pointer is used for a memory access, this computes where in which allocation the |
| /// access is going. |
| fn ptr_get_alloc( |
| &self, |
| ptr: interpret::Pointer<Provenance>, |
| size: i64, |
| ) -> Option<(AllocId, Size)> { |
| let ecx = self.eval_context_ref(); |
| |
| let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance) |
| |
| let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag { |
| alloc_id |
| } else { |
| // A wildcard pointer. |
| ecx.alloc_id_from_addr(addr.bytes(), size)? |
| }; |
| |
| // This cannot fail: since we already have a pointer with that provenance, adjust_alloc_root_pointer |
| // must have been called in the past, so we can just look up the address in the map. |
| let base_addr = *ecx.machine.alloc_addresses.borrow().base_addr.get(&alloc_id).unwrap(); |
| |
| // Wrapping "addr - base_addr" |
| let rel_offset = ecx.truncate_to_target_usize(addr.bytes().wrapping_sub(base_addr)); |
| Some((alloc_id, Size::from_bytes(rel_offset))) |
| } |
| } |
| |
| impl<'tcx> MiriMachine<'tcx> { |
| pub fn free_alloc_id(&mut self, dead_id: AllocId, size: Size, align: Align, kind: MemoryKind) { |
| let global_state = self.alloc_addresses.get_mut(); |
| let rng = self.rng.get_mut(); |
| |
| // We can *not* remove this from `base_addr`, since the interpreter design requires that we |
| // be able to retrieve an AllocId + offset for any memory access *before* we check if the |
| // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory |
| // access to determine the allocation ID and offset -- and there can still be pointers with |
| // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return |
| // `None` only if the pointer truly has no provenance (this ensures consistent error |
| // messages). |
| // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist |
| // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never |
| // returns a dead allocation. |
| // To avoid a linear scan we first look up the address in `base_addr`, and then find it in |
| // `int_to_ptr_map`. |
| let addr = *global_state.base_addr.get(&dead_id).unwrap(); |
| let pos = |
| global_state.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap(); |
| let removed = global_state.int_to_ptr_map.remove(pos); |
| assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing |
| // We can also remove it from `exposed`, since this allocation can anyway not be returned by |
| // `alloc_id_from_addr` any more. |
| global_state.exposed.remove(&dead_id); |
| // Also remember this address for future reuse. |
| let thread = self.threads.active_thread(); |
| global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || { |
| if let Some(data_race) = &self.data_race { |
| data_race.release_clock(&self.threads).clone() |
| } else { |
| VClock::default() |
| } |
| }) |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| #[test] |
| fn test_align_addr() { |
| assert_eq!(align_addr(37, 4), 40); |
| assert_eq!(align_addr(44, 4), 44); |
| } |
| } |