blob: 1dbcfe5588e48c75e421e113bbb9e2a5ab116b3b [file] [log] [blame]
//! Check the validity invariant of a given value, and tell the user
//! where in the value it got violated.
//! In const context, this goes even further and tries to approximate const safety.
//! That's useful because it means other passes (e.g. promotion) can rely on `const`s
//! to be const-safe.
use std::fmt::Write;
use std::ops::RangeInclusive;
use syntax_pos::symbol::{sym, Symbol};
use rustc::hir;
use rustc::ty::layout::{self, TyLayout, LayoutOf, VariantIdx};
use rustc::ty;
use rustc_data_structures::fx::FxHashSet;
use std::hash::Hash;
use super::{
GlobalAlloc, InterpResult, CheckInAllocMsg,
Scalar, OpTy, Machine, InterpCx, ValueVisitor, MPlaceTy,
};
macro_rules! throw_validation_failure {
($what:expr, $where:expr, $details:expr) => {{
let mut msg = format!("encountered {}", $what);
let where_ = &$where;
if !where_.is_empty() {
msg.push_str(" at ");
write_path(&mut msg, where_);
}
write!(&mut msg, ", but expected {}", $details).unwrap();
throw_unsup!(ValidationFailure(msg))
}};
($what:expr, $where:expr) => {{
let mut msg = format!("encountered {}", $what);
let where_ = &$where;
if !where_.is_empty() {
msg.push_str(" at ");
write_path(&mut msg, where_);
}
throw_unsup!(ValidationFailure(msg))
}};
}
macro_rules! try_validation {
($e:expr, $what:expr, $where:expr, $details:expr) => {{
match $e {
Ok(x) => x,
Err(_) => throw_validation_failure!($what, $where, $details),
}
}};
($e:expr, $what:expr, $where:expr) => {{
match $e {
Ok(x) => x,
Err(_) => throw_validation_failure!($what, $where),
}
}};
}
/// We want to show a nice path to the invalid field for diagnostics,
/// but avoid string operations in the happy case where no error happens.
/// So we track a `Vec<PathElem>` where `PathElem` contains all the data we
/// need to later print something for the user.
#[derive(Copy, Clone, Debug)]
pub enum PathElem {
Field(Symbol),
Variant(Symbol),
GeneratorState(VariantIdx),
ClosureVar(Symbol),
ArrayElem(usize),
TupleElem(usize),
Deref,
Tag,
DynDowncast,
}
/// State for tracking recursive validation of references
pub struct RefTracking<T, PATH = ()> {
pub seen: FxHashSet<T>,
pub todo: Vec<(T, PATH)>,
}
impl<T: Copy + Eq + Hash + std::fmt::Debug, PATH: Default> RefTracking<T, PATH> {
pub fn empty() -> Self {
RefTracking {
seen: FxHashSet::default(),
todo: vec![],
}
}
pub fn new(op: T) -> Self {
let mut ref_tracking_for_consts = RefTracking {
seen: FxHashSet::default(),
todo: vec![(op, PATH::default())],
};
ref_tracking_for_consts.seen.insert(op);
ref_tracking_for_consts
}
pub fn track(&mut self, op: T, path: impl FnOnce() -> PATH) {
if self.seen.insert(op) {
trace!("Recursing below ptr {:#?}", op);
let path = path();
// Remember to come back to this later.
self.todo.push((op, path));
}
}
}
/// Format a path
fn write_path(out: &mut String, path: &Vec<PathElem>) {
use self::PathElem::*;
for elem in path.iter() {
match elem {
Field(name) => write!(out, ".{}", name),
Variant(name) => write!(out, ".<downcast-variant({})>", name),
GeneratorState(idx) => write!(out, ".<generator-state({})>", idx.index()),
ClosureVar(name) => write!(out, ".<closure-var({})>", name),
TupleElem(idx) => write!(out, ".{}", idx),
ArrayElem(idx) => write!(out, "[{}]", idx),
Deref =>
// This does not match Rust syntax, but it is more readable for long paths -- and
// some of the other items here also are not Rust syntax. Actually we can't
// even use the usual syntax because we are just showing the projections,
// not the root.
write!(out, ".<deref>"),
Tag => write!(out, ".<enum-tag>"),
DynDowncast => write!(out, ".<dyn-downcast>"),
}.unwrap()
}
}
// Test if a range that wraps at overflow contains `test`
fn wrapping_range_contains(r: &RangeInclusive<u128>, test: u128) -> bool {
let (lo, hi) = r.clone().into_inner();
if lo > hi {
// Wrapped
(..=hi).contains(&test) || (lo..).contains(&test)
} else {
// Normal
r.contains(&test)
}
}
// Formats such that a sentence like "expected something {}" to mean
// "expected something <in the given range>" makes sense.
fn wrapping_range_format(r: &RangeInclusive<u128>, max_hi: u128) -> String {
let (lo, hi) = r.clone().into_inner();
debug_assert!(hi <= max_hi);
if lo > hi {
format!("less or equal to {}, or greater or equal to {}", hi, lo)
} else if lo == hi {
format!("equal to {}", lo)
} else if lo == 0 {
debug_assert!(hi < max_hi, "should not be printing if the range covers everything");
format!("less or equal to {}", hi)
} else if hi == max_hi {
debug_assert!(lo > 0, "should not be printing if the range covers everything");
format!("greater or equal to {}", lo)
} else {
format!("in the range {:?}", r)
}
}
struct ValidityVisitor<'rt, 'mir, 'tcx, M: Machine<'mir, 'tcx>> {
/// The `path` may be pushed to, but the part that is present when a function
/// starts must not be changed! `visit_fields` and `visit_array` rely on
/// this stack discipline.
path: Vec<PathElem>,
ref_tracking_for_consts: Option<&'rt mut RefTracking<
MPlaceTy<'tcx, M::PointerTag>,
Vec<PathElem>,
>>,
ecx: &'rt InterpCx<'mir, 'tcx, M>,
}
impl<'rt, 'mir, 'tcx, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, 'tcx, M> {
fn aggregate_field_path_elem(
&mut self,
layout: TyLayout<'tcx>,
field: usize,
) -> PathElem {
match layout.ty.kind {
// generators and closures.
ty::Closure(def_id, _) | ty::Generator(def_id, _, _) => {
let mut name = None;
if def_id.is_local() {
let tables = self.ecx.tcx.typeck_tables_of(def_id);
if let Some(upvars) = tables.upvar_list.get(&def_id) {
// Sometimes the index is beyond the number of upvars (seen
// for a generator).
if let Some((&var_hir_id, _)) = upvars.get_index(field) {
let node = self.ecx.tcx.hir().get(var_hir_id);
if let hir::Node::Binding(pat) = node {
if let hir::PatKind::Binding(_, _, ident, _) = pat.kind {
name = Some(ident.name);
}
}
}
}
}
PathElem::ClosureVar(name.unwrap_or_else(|| {
// Fall back to showing the field index.
sym::integer(field)
}))
}
// tuples
ty::Tuple(_) => PathElem::TupleElem(field),
// enums
ty::Adt(def, ..) if def.is_enum() => {
// we might be projecting *to* a variant, or to a field *in*a variant.
match layout.variants {
layout::Variants::Single { index } =>
// Inside a variant
PathElem::Field(def.variants[index].fields[field].ident.name),
_ => bug!(),
}
}
// other ADTs
ty::Adt(def, _) => PathElem::Field(def.non_enum_variant().fields[field].ident.name),
// arrays/slices
ty::Array(..) | ty::Slice(..) => PathElem::ArrayElem(field),
// dyn traits
ty::Dynamic(..) => PathElem::DynDowncast,
// nothing else has an aggregate layout
_ => bug!("aggregate_field_path_elem: got non-aggregate type {:?}", layout.ty),
}
}
fn visit_elem(
&mut self,
new_op: OpTy<'tcx, M::PointerTag>,
elem: PathElem,
) -> InterpResult<'tcx> {
// Remember the old state
let path_len = self.path.len();
// Perform operation
self.path.push(elem);
self.visit_value(new_op)?;
// Undo changes
self.path.truncate(path_len);
Ok(())
}
fn check_wide_ptr_meta(
&mut self,
meta: Option<Scalar<M::PointerTag>>,
pointee: TyLayout<'tcx>,
) -> InterpResult<'tcx> {
let tail = self.ecx.tcx.struct_tail_erasing_lifetimes(pointee.ty, self.ecx.param_env);
match tail.kind {
ty::Dynamic(..) => {
let vtable = meta.unwrap();
try_validation!(
self.ecx.memory.check_ptr_access(
vtable,
3*self.ecx.tcx.data_layout.pointer_size, // drop, size, align
self.ecx.tcx.data_layout.pointer_align.abi,
),
"dangling or unaligned vtable pointer in wide pointer or too small vtable",
self.path
);
try_validation!(self.ecx.read_drop_type_from_vtable(vtable),
"invalid drop fn in vtable", self.path);
try_validation!(self.ecx.read_size_and_align_from_vtable(vtable),
"invalid size or align in vtable", self.path);
// FIXME: More checks for the vtable.
}
ty::Slice(..) | ty::Str => {
let _len = try_validation!(meta.unwrap().to_machine_usize(self.ecx),
"non-integer slice length in wide pointer", self.path);
// We do not check that `len * elem_size <= isize::MAX`:
// that is only required for references, and there it falls out of the
// "dereferenceable" check performed by Stacked Borrows.
}
ty::Foreign(..) => {
// Unsized, but not wide.
}
_ =>
bug!("Unexpected unsized type tail: {:?}", tail),
}
Ok(())
}
}
impl<'rt, 'mir, 'tcx, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M>
for ValidityVisitor<'rt, 'mir, 'tcx, M>
{
type V = OpTy<'tcx, M::PointerTag>;
#[inline(always)]
fn ecx(&self) -> &InterpCx<'mir, 'tcx, M> {
&self.ecx
}
#[inline]
fn visit_field(
&mut self,
old_op: OpTy<'tcx, M::PointerTag>,
field: usize,
new_op: OpTy<'tcx, M::PointerTag>
) -> InterpResult<'tcx> {
let elem = self.aggregate_field_path_elem(old_op.layout, field);
self.visit_elem(new_op, elem)
}
#[inline]
fn visit_variant(
&mut self,
old_op: OpTy<'tcx, M::PointerTag>,
variant_id: VariantIdx,
new_op: OpTy<'tcx, M::PointerTag>
) -> InterpResult<'tcx> {
let name = match old_op.layout.ty.kind {
ty::Adt(adt, _) => PathElem::Variant(adt.variants[variant_id].ident.name),
// Generators also have variants
ty::Generator(..) => PathElem::GeneratorState(variant_id),
_ => bug!("Unexpected type with variant: {:?}", old_op.layout.ty),
};
self.visit_elem(new_op, name)
}
#[inline]
fn visit_value(&mut self, op: OpTy<'tcx, M::PointerTag>) -> InterpResult<'tcx>
{
trace!("visit_value: {:?}, {:?}", *op, op.layout);
// Translate some possible errors to something nicer.
match self.walk_value(op) {
Ok(()) => Ok(()),
Err(err) => match err.kind {
err_ub!(InvalidDiscriminant(val)) =>
throw_validation_failure!(
val, self.path, "a valid enum discriminant"
),
err_unsup!(ReadPointerAsBytes) =>
throw_validation_failure!(
"a pointer", self.path, "plain (non-pointer) bytes"
),
_ => Err(err),
}
}
}
fn visit_primitive(&mut self, value: OpTy<'tcx, M::PointerTag>) -> InterpResult<'tcx>
{
let value = self.ecx.read_immediate(value)?;
// Go over all the primitive types
let ty = value.layout.ty;
match ty.kind {
ty::Bool => {
let value = value.to_scalar_or_undef();
try_validation!(value.to_bool(),
value, self.path, "a boolean");
},
ty::Char => {
let value = value.to_scalar_or_undef();
try_validation!(value.to_char(),
value, self.path, "a valid unicode codepoint");
},
ty::Float(_) | ty::Int(_) | ty::Uint(_) => {
// NOTE: Keep this in sync with the array optimization for int/float
// types below!
let size = value.layout.size;
let value = value.to_scalar_or_undef();
if self.ref_tracking_for_consts.is_some() {
// Integers/floats in CTFE: Must be scalar bits, pointers are dangerous
try_validation!(value.to_bits(size),
value, self.path, "initialized plain (non-pointer) bytes");
} else {
// At run-time, for now, we accept *anything* for these types, including
// undef. We should fix that, but let's start low.
}
}
ty::RawPtr(..) => {
// We are conservative with undef for integers, but try to
// actually enforce our current rules for raw pointers.
let place = try_validation!(self.ecx.ref_to_mplace(value),
"undefined pointer", self.path);
if place.layout.is_unsized() {
self.check_wide_ptr_meta(place.meta, place.layout)?;
}
}
_ if ty.is_box() || ty.is_region_ptr() => {
// Handle wide pointers.
// Check metadata early, for better diagnostics
let place = try_validation!(self.ecx.ref_to_mplace(value),
"undefined pointer", self.path);
if place.layout.is_unsized() {
self.check_wide_ptr_meta(place.meta, place.layout)?;
}
// Make sure this is dereferenceable and all.
let (size, align) = self.ecx.size_and_align_of(place.meta, place.layout)?
// for the purpose of validity, consider foreign types to have
// alignment and size determined by the layout (size will be 0,
// alignment should take attributes into account).
.unwrap_or_else(|| (place.layout.size, place.layout.align.abi));
let ptr: Option<_> = match
self.ecx.memory.check_ptr_access_align(
place.ptr,
size,
Some(align),
CheckInAllocMsg::InboundsTest,
)
{
Ok(ptr) => ptr,
Err(err) => {
info!(
"{:?} did not pass access check for size {:?}, align {:?}",
place.ptr, size, align
);
match err.kind {
err_unsup!(InvalidNullPointerUsage) =>
throw_validation_failure!("NULL reference", self.path),
err_unsup!(AlignmentCheckFailed { required, has }) =>
throw_validation_failure!(format_args!("unaligned reference \
(required {} byte alignment but found {})",
required.bytes(), has.bytes()), self.path),
err_unsup!(ReadBytesAsPointer) =>
throw_validation_failure!(
"dangling reference (created from integer)",
self.path
),
_ =>
throw_validation_failure!(
"dangling reference (not entirely in bounds)",
self.path
),
}
}
};
// Recursive checking
if let Some(ref mut ref_tracking) = self.ref_tracking_for_consts {
if let Some(ptr) = ptr { // not a ZST
// Skip validation entirely for some external statics
let alloc_kind = self.ecx.tcx.alloc_map.lock().get(ptr.alloc_id);
if let Some(GlobalAlloc::Static(did)) = alloc_kind {
// `extern static` cannot be validated as they have no body.
// FIXME: Statics from other crates are also skipped.
// They might be checked at a different type, but for now we
// want to avoid recursing too deeply. This is not sound!
if !did.is_local() || self.ecx.tcx.is_foreign_item(did) {
return Ok(());
}
}
}
// Proceed recursively even for ZST, no reason to skip them!
// `!` is a ZST and we want to validate it.
// Normalize before handing `place` to tracking because that will
// check for duplicates.
let place = if size.bytes() > 0 {
self.ecx.force_mplace_ptr(place)
.expect("we already bounds-checked")
} else {
place
};
let path = &self.path;
ref_tracking.track(place, || {
// We need to clone the path anyway, make sure it gets created
// with enough space for the additional `Deref`.
let mut new_path = Vec::with_capacity(path.len() + 1);
new_path.clone_from(path);
new_path.push(PathElem::Deref);
new_path
});
}
}
ty::FnPtr(_sig) => {
let value = value.to_scalar_or_undef();
let _fn = try_validation!(
value.not_undef().and_then(|ptr| self.ecx.memory.get_fn(ptr)),
value, self.path, "a function pointer"
);
// FIXME: Check if the signature matches
}
// This should be all the primitive types
_ => bug!("Unexpected primitive type {}", value.layout.ty)
}
Ok(())
}
fn visit_uninhabited(&mut self) -> InterpResult<'tcx>
{
throw_validation_failure!("a value of an uninhabited type", self.path)
}
fn visit_scalar(
&mut self,
op: OpTy<'tcx, M::PointerTag>,
layout: &layout::Scalar,
) -> InterpResult<'tcx> {
let value = self.ecx.read_scalar(op)?;
// Determine the allowed range
let (lo, hi) = layout.valid_range.clone().into_inner();
// `max_hi` is as big as the size fits
let max_hi = u128::max_value() >> (128 - op.layout.size.bits());
assert!(hi <= max_hi);
// We could also write `(hi + 1) % (max_hi + 1) == lo` but `max_hi + 1` overflows for `u128`
if (lo == 0 && hi == max_hi) || (hi + 1 == lo) {
// Nothing to check
return Ok(());
}
// At least one value is excluded. Get the bits.
let value = try_validation!(value.not_undef(),
value,
self.path,
format_args!(
"something {}",
wrapping_range_format(&layout.valid_range, max_hi),
)
);
let bits = match value.to_bits_or_ptr(op.layout.size, self.ecx) {
Err(ptr) => {
if lo == 1 && hi == max_hi {
// Only NULL is the niche. So make sure the ptr is NOT NULL.
if self.ecx.memory.ptr_may_be_null(ptr) {
throw_validation_failure!(
"a potentially NULL pointer",
self.path,
format_args!(
"something that cannot possibly fail to be {}",
wrapping_range_format(&layout.valid_range, max_hi)
)
)
}
return Ok(());
} else {
// Conservatively, we reject, because the pointer *could* have a bad
// value.
throw_validation_failure!(
"a pointer",
self.path,
format_args!(
"something that cannot possibly fail to be {}",
wrapping_range_format(&layout.valid_range, max_hi)
)
)
}
}
Ok(data) =>
data
};
// Now compare. This is slightly subtle because this is a special "wrap-around" range.
if wrapping_range_contains(&layout.valid_range, bits) {
Ok(())
} else {
throw_validation_failure!(
bits,
self.path,
format_args!("something {}", wrapping_range_format(&layout.valid_range, max_hi))
)
}
}
fn visit_aggregate(
&mut self,
op: OpTy<'tcx, M::PointerTag>,
fields: impl Iterator<Item=InterpResult<'tcx, Self::V>>,
) -> InterpResult<'tcx> {
match op.layout.ty.kind {
ty::Str => {
let mplace = op.assert_mem_place(); // strings are never immediate
try_validation!(self.ecx.read_str(mplace),
"uninitialized or non-UTF-8 data in str", self.path);
}
ty::Array(tys, ..) | ty::Slice(tys) if {
// This optimization applies only for integer and floating point types
// (i.e., types that can hold arbitrary bytes).
match tys.kind {
ty::Int(..) | ty::Uint(..) | ty::Float(..) => true,
_ => false,
}
} => {
// Optimized handling for arrays of integer/float type.
// bailing out for zsts is ok, since the array element type can only be int/float
if op.layout.is_zst() {
return Ok(());
}
// non-ZST array cannot be immediate, slices are never immediate
let mplace = op.assert_mem_place();
// This is the length of the array/slice.
let len = mplace.len(self.ecx)?;
// zero length slices have nothing to be checked
if len == 0 {
return Ok(());
}
// This is the element type size.
let ty_size = self.ecx.layout_of(tys)?.size;
// This is the size in bytes of the whole array.
let size = ty_size * len;
// Size is not 0, get a pointer.
let ptr = self.ecx.force_ptr(mplace.ptr)?;
// This is the optimization: we just check the entire range at once.
// NOTE: Keep this in sync with the handling of integer and float
// types above, in `visit_primitive`.
// In run-time mode, we accept pointers in here. This is actually more
// permissive than a per-element check would be, e.g., we accept
// an &[u8] that contains a pointer even though bytewise checking would
// reject it. However, that's good: We don't inherently want
// to reject those pointers, we just do not have the machinery to
// talk about parts of a pointer.
// We also accept undef, for consistency with the slow path.
match self.ecx.memory.get_raw(ptr.alloc_id)?.check_bytes(
self.ecx,
ptr,
size,
/*allow_ptr_and_undef*/ self.ref_tracking_for_consts.is_none(),
) {
// In the happy case, we needn't check anything else.
Ok(()) => {},
// Some error happened, try to provide a more detailed description.
Err(err) => {
// For some errors we might be able to provide extra information
match err.kind {
err_unsup!(ReadUndefBytes(offset)) => {
// Some byte was undefined, determine which
// element that byte belongs to so we can
// provide an index.
let i = (offset.bytes() / ty_size.bytes()) as usize;
self.path.push(PathElem::ArrayElem(i));
throw_validation_failure!("undefined bytes", self.path)
},
// Other errors shouldn't be possible
_ => return Err(err),
}
}
}
}
_ => {
self.walk_aggregate(op, fields)? // default handler
}
}
Ok(())
}
}
impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
/// This function checks the data at `op`. `op` is assumed to cover valid memory if it
/// is an indirect operand.
/// It will error if the bits at the destination do not match the ones described by the layout.
///
/// `ref_tracking_for_consts` can be `None` to avoid recursive checking below references.
/// This also toggles between "run-time" (no recursion) and "compile-time" (with recursion)
/// validation (e.g., pointer values are fine in integers at runtime) and various other const
/// specific validation checks.
pub fn validate_operand(
&self,
op: OpTy<'tcx, M::PointerTag>,
path: Vec<PathElem>,
ref_tracking_for_consts: Option<&mut RefTracking<
MPlaceTy<'tcx, M::PointerTag>,
Vec<PathElem>,
>>,
) -> InterpResult<'tcx> {
trace!("validate_operand: {:?}, {:?}", *op, op.layout.ty);
// Construct a visitor
let mut visitor = ValidityVisitor {
path,
ref_tracking_for_consts,
ecx: self,
};
// Try to cast to ptr *once* instead of all the time.
let op = self.force_op_ptr(op).unwrap_or(op);
// Run it
visitor.visit_value(op)
}
}