Skip to content

Commit

Permalink
Auto merge of #128742 - RalfJung:miri-vtable-uniqueness, r=saethlin
Browse files Browse the repository at this point in the history
miri: make vtable addresses not globally unique

Miri currently gives vtables a unique global address. That's not actually matching reality though. So this PR enables Miri to generate different addresses for the same type-trait pair.

To avoid generating an unbounded number of `AllocId` (and consuming unbounded amounts of memory), we use the "salt" technique that we also already use for giving constants non-unique addresses: the cache is keyed on a "salt" value n top of the actually relevant key, and Miri picks a random salt (currently in the range `0..16`) each time it needs to choose an `AllocId` for one of these globals -- that means we'll get up to 16 different addresses for each vtable. The salt scheme is integrated into the global allocation deduplication logic in `tcx`, and also used for functions and string literals. (So this also fixes the problem that casting the same function to a fn ptr over and over will consume unbounded memory.)

r? `@saethlin`
Fixes rust-lang/miri#3737
  • Loading branch information
bors committed Aug 13, 2024
2 parents e9e27ab + 4763d12 commit 591ecb8
Show file tree
Hide file tree
Showing 24 changed files with 320 additions and 194 deletions.
6 changes: 6 additions & 0 deletions compiler/rustc_const_eval/src/interpret/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,12 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
}
(ty::Dynamic(data_a, _, ty::Dyn), ty::Dynamic(data_b, _, ty::Dyn)) => {
let val = self.read_immediate(src)?;
// MIR building generates odd NOP casts, prevent them from causing unexpected trouble.
// See <https://github.com/rust-lang/rust/issues/128880>.
// FIXME: ideally we wouldn't have to do this.
if data_a == data_b {
return self.write_immediate(*val, dest);
}
// Take apart the old pointer, and find the dynamic type.
let (old_data, old_vptr) = val.to_scalar_pair();
let old_data = old_data.to_pointer(self)?;
Expand Down
18 changes: 17 additions & 1 deletion compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use rustc_target::spec::abi::Abi as CallAbi;
use super::{
throw_unsup, throw_unsup_format, AllocBytes, AllocId, AllocKind, AllocRange, Allocation,
ConstAllocation, CtfeProvenance, FnArg, Frame, ImmTy, InterpCx, InterpResult, MPlaceTy,
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance,
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, CTFE_ALLOC_SALT,
};

/// Data returned by [`Machine::after_stack_pop`], and consumed by
Expand Down Expand Up @@ -575,6 +575,14 @@ pub trait Machine<'tcx>: Sized {
{
eval(ecx, val, span, layout)
}

/// Returns the salt to be used for a deduplicated global alloation.
/// If the allocation is for a function, the instance is provided as well
/// (this lets Miri ensure unique addresses for some functions).
fn get_global_alloc_salt(
ecx: &InterpCx<'tcx, Self>,
instance: Option<ty::Instance<'tcx>>,
) -> usize;
}

/// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines
Expand Down Expand Up @@ -677,4 +685,12 @@ pub macro compile_time_machine(<$tcx: lifetime>) {
let (prov, offset) = ptr.into_parts();
Some((prov.alloc_id(), offset, prov.immutable()))
}

#[inline(always)]
fn get_global_alloc_salt(
_ecx: &InterpCx<$tcx, Self>,
_instance: Option<ty::Instance<$tcx>>,
) -> usize {
CTFE_ALLOC_SALT
}
}
5 changes: 4 additions & 1 deletion compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {

pub fn fn_ptr(&mut self, fn_val: FnVal<'tcx, M::ExtraFnVal>) -> Pointer<M::Provenance> {
let id = match fn_val {
FnVal::Instance(instance) => self.tcx.reserve_and_set_fn_alloc(instance),
FnVal::Instance(instance) => {
let salt = M::get_global_alloc_salt(self, Some(instance));
self.tcx.reserve_and_set_fn_alloc(instance, salt)
}
FnVal::Other(extra) => {
// FIXME(RalfJung): Should we have a cache here?
let id = self.tcx.reserve_alloc_id();
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_const_eval/src/interpret/place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,8 @@ where
// Use cache for immutable strings.
let ptr = if mutbl.is_not() {
// Use dedup'd allocation function.
let id = tcx.allocate_bytes_dedup(str.as_bytes());
let salt = M::get_global_alloc_salt(self, None);
let id = tcx.allocate_bytes_dedup(str.as_bytes(), salt);

// Turn untagged "global" pointers (obtained via `tcx`) into the machine pointer to the allocation.
M::adjust_alloc_root_pointer(&self, Pointer::from(id), Some(kind))?
Expand Down
4 changes: 3 additions & 1 deletion compiler/rustc_const_eval/src/interpret/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
ensure_monomorphic_enough(*self.tcx, ty)?;
ensure_monomorphic_enough(*self.tcx, poly_trait_ref)?;

let vtable_symbolic_allocation = self.tcx.reserve_and_set_vtable_alloc(ty, poly_trait_ref);
let salt = M::get_global_alloc_salt(self, None);
let vtable_symbolic_allocation =
self.tcx.reserve_and_set_vtable_alloc(ty, poly_trait_ref, salt);
let vtable_ptr = self.global_root_pointer(Pointer::from(vtable_symbolic_allocation))?;
Ok(vtable_ptr.into())
}
Expand Down
145 changes: 51 additions & 94 deletions compiler/rustc_middle/src/mir/interpret/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use std::num::NonZero;
use std::{fmt, io};

use rustc_ast::LitKind;
use rustc_attr::InlineAttr;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lock;
use rustc_errors::ErrorGuaranteed;
Expand Down Expand Up @@ -46,7 +45,7 @@ pub use self::pointer::{CtfeProvenance, Pointer, PointerArithmetic, Provenance};
pub use self::value::Scalar;
use crate::mir;
use crate::ty::codec::{TyDecoder, TyEncoder};
use crate::ty::{self, GenericArgKind, Instance, Ty, TyCtxt};
use crate::ty::{self, Instance, Ty, TyCtxt};

/// Uniquely identifies one of the following:
/// - A constant
Expand Down Expand Up @@ -126,11 +125,10 @@ pub fn specialized_encode_alloc_id<'tcx, E: TyEncoder<I = TyCtxt<'tcx>>>(
AllocDiscriminant::Alloc.encode(encoder);
alloc.encode(encoder);
}
GlobalAlloc::Function { instance, unique } => {
GlobalAlloc::Function { instance } => {
trace!("encoding {:?} with {:#?}", alloc_id, instance);
AllocDiscriminant::Fn.encode(encoder);
instance.encode(encoder);
unique.encode(encoder);
}
GlobalAlloc::VTable(ty, poly_trait_ref) => {
trace!("encoding {:?} with {ty:#?}, {poly_trait_ref:#?}", alloc_id);
Expand Down Expand Up @@ -219,38 +217,32 @@ impl<'s> AllocDecodingSession<'s> {
}

// Now decode the actual data.
let alloc_id = decoder.with_position(pos, |decoder| {
match alloc_kind {
AllocDiscriminant::Alloc => {
trace!("creating memory alloc ID");
let alloc = <ConstAllocation<'tcx> as Decodable<_>>::decode(decoder);
trace!("decoded alloc {:?}", alloc);
decoder.interner().reserve_and_set_memory_alloc(alloc)
}
AllocDiscriminant::Fn => {
trace!("creating fn alloc ID");
let instance = ty::Instance::decode(decoder);
trace!("decoded fn alloc instance: {:?}", instance);
let unique = bool::decode(decoder);
// Here we cannot call `reserve_and_set_fn_alloc` as that would use a query, which
// is not possible in this context. That's why the allocation stores
// whether it is unique or not.
decoder.interner().reserve_and_set_fn_alloc_internal(instance, unique)
}
AllocDiscriminant::VTable => {
trace!("creating vtable alloc ID");
let ty = <Ty<'_> as Decodable<D>>::decode(decoder);
let poly_trait_ref =
<Option<ty::PolyExistentialTraitRef<'_>> as Decodable<D>>::decode(decoder);
trace!("decoded vtable alloc instance: {ty:?}, {poly_trait_ref:?}");
decoder.interner().reserve_and_set_vtable_alloc(ty, poly_trait_ref)
}
AllocDiscriminant::Static => {
trace!("creating extern static alloc ID");
let did = <DefId as Decodable<D>>::decode(decoder);
trace!("decoded static def-ID: {:?}", did);
decoder.interner().reserve_and_set_static_alloc(did)
}
let alloc_id = decoder.with_position(pos, |decoder| match alloc_kind {
AllocDiscriminant::Alloc => {
trace!("creating memory alloc ID");
let alloc = <ConstAllocation<'tcx> as Decodable<_>>::decode(decoder);
trace!("decoded alloc {:?}", alloc);
decoder.interner().reserve_and_set_memory_alloc(alloc)
}
AllocDiscriminant::Fn => {
trace!("creating fn alloc ID");
let instance = ty::Instance::decode(decoder);
trace!("decoded fn alloc instance: {:?}", instance);
decoder.interner().reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT)
}
AllocDiscriminant::VTable => {
trace!("creating vtable alloc ID");
let ty = <Ty<'_> as Decodable<D>>::decode(decoder);
let poly_trait_ref =
<Option<ty::PolyExistentialTraitRef<'_>> as Decodable<D>>::decode(decoder);
trace!("decoded vtable alloc instance: {ty:?}, {poly_trait_ref:?}");
decoder.interner().reserve_and_set_vtable_alloc(ty, poly_trait_ref, CTFE_ALLOC_SALT)
}
AllocDiscriminant::Static => {
trace!("creating extern static alloc ID");
let did = <DefId as Decodable<D>>::decode(decoder);
trace!("decoded static def-ID: {:?}", did);
decoder.interner().reserve_and_set_static_alloc(did)
}
});

Expand All @@ -265,12 +257,7 @@ impl<'s> AllocDecodingSession<'s> {
#[derive(Debug, Clone, Eq, PartialEq, Hash, TyDecodable, TyEncodable, HashStable)]
pub enum GlobalAlloc<'tcx> {
/// The alloc ID is used as a function pointer.
Function {
instance: Instance<'tcx>,
/// Stores whether this instance is unique, i.e. all pointers to this function use the same
/// alloc ID.
unique: bool,
},
Function { instance: Instance<'tcx> },
/// This alloc ID points to a symbolic (not-reified) vtable.
VTable(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>),
/// The alloc ID points to a "lazy" static variable that did not get computed (yet).
Expand Down Expand Up @@ -323,14 +310,17 @@ impl<'tcx> GlobalAlloc<'tcx> {
}
}

pub const CTFE_ALLOC_SALT: usize = 0;

pub(crate) struct AllocMap<'tcx> {
/// Maps `AllocId`s to their corresponding allocations.
alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>,

/// Used to ensure that statics and functions only get one associated `AllocId`.
//
// FIXME: Should we just have two separate dedup maps for statics and functions each?
dedup: FxHashMap<GlobalAlloc<'tcx>, AllocId>,
/// Used to deduplicate global allocations: functions, vtables, string literals, ...
///
/// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating
/// the actual guarantees.
dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>,

/// The `AllocId` to assign to the next requested ID.
/// Always incremented; never gets smaller.
Expand Down Expand Up @@ -368,83 +358,50 @@ impl<'tcx> TyCtxt<'tcx> {

/// Reserves a new ID *if* this allocation has not been dedup-reserved before.
/// Should not be used for mutable memory.
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>) -> AllocId {
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId {
let mut alloc_map = self.alloc_map.lock();
if let GlobalAlloc::Memory(mem) = alloc {
if mem.inner().mutability.is_mut() {
bug!("trying to dedup-reserve mutable memory");
}
}
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc) {
let alloc_salt = (alloc, salt);
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) {
return alloc_id;
}
let id = alloc_map.reserve();
debug!("creating alloc {alloc:?} with id {id:?}");
alloc_map.alloc_map.insert(id, alloc.clone());
alloc_map.dedup.insert(alloc, id);
debug!("creating alloc {:?} with id {id:?}", alloc_salt.0);
alloc_map.alloc_map.insert(id, alloc_salt.0.clone());
alloc_map.dedup.insert(alloc_salt, id);
id
}

/// Generates an `AllocId` for a memory allocation. If the exact same memory has been
/// allocated before, this will return the same `AllocId`.
pub fn reserve_and_set_memory_dedup(self, mem: ConstAllocation<'tcx>) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::Memory(mem))
pub fn reserve_and_set_memory_dedup(self, mem: ConstAllocation<'tcx>, salt: usize) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::Memory(mem), salt)
}

/// Generates an `AllocId` for a static or return a cached one in case this function has been
/// called on the same static before.
pub fn reserve_and_set_static_alloc(self, static_id: DefId) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::Static(static_id))
}

/// Generates an `AllocId` for a function. The caller must already have decided whether this
/// function obtains a unique AllocId or gets de-duplicated via the cache.
fn reserve_and_set_fn_alloc_internal(self, instance: Instance<'tcx>, unique: bool) -> AllocId {
let alloc = GlobalAlloc::Function { instance, unique };
if unique {
// Deduplicate.
self.reserve_and_set_dedup(alloc)
} else {
// Get a fresh ID.
let mut alloc_map = self.alloc_map.lock();
let id = alloc_map.reserve();
alloc_map.alloc_map.insert(id, alloc);
id
}
let salt = 0; // Statics have a guaranteed unique address, no salt added.
self.reserve_and_set_dedup(GlobalAlloc::Static(static_id), salt)
}

/// Generates an `AllocId` for a function. Depending on the function type,
/// this might get deduplicated or assigned a new ID each time.
pub fn reserve_and_set_fn_alloc(self, instance: Instance<'tcx>) -> AllocId {
// Functions cannot be identified by pointers, as asm-equal functions can get deduplicated
// by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be
// duplicated across crates. We thus generate a new `AllocId` for every mention of a
// function. This means that `main as fn() == main as fn()` is false, while `let x = main as
// fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify
// certain functions uniquely, e.g. for backtraces. So we identify whether codegen will
// actually emit duplicate functions. It does that when they have non-lifetime generics, or
// when they can be inlined. All other functions are given a unique address.
// This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied
// upon for anything. But if we don't do this, backtraces look terrible.
let is_generic = instance
.args
.into_iter()
.any(|kind| !matches!(kind.unpack(), GenericArgKind::Lifetime(_)));
let can_be_inlined = match self.codegen_fn_attrs(instance.def_id()).inline {
InlineAttr::Never => false,
_ => true,
};
let unique = !is_generic && !can_be_inlined;
self.reserve_and_set_fn_alloc_internal(instance, unique)
/// Generates an `AllocId` for a function. Will get deduplicated.
pub fn reserve_and_set_fn_alloc(self, instance: Instance<'tcx>, salt: usize) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::Function { instance }, salt)
}

/// Generates an `AllocId` for a (symbolic, not-reified) vtable. Will get deduplicated.
pub fn reserve_and_set_vtable_alloc(
self,
ty: Ty<'tcx>,
poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
salt: usize,
) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::VTable(ty, poly_trait_ref))
self.reserve_and_set_dedup(GlobalAlloc::VTable(ty, poly_trait_ref), salt)
}

/// Interns the `Allocation` and return a new `AllocId`, even if there's already an identical
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_middle/src/ty/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1438,11 +1438,11 @@ impl<'tcx> TyCtxt<'tcx> {

/// Allocates a read-only byte or string literal for `mir::interpret`.
/// Returns the same `AllocId` if called again with the same bytes.
pub fn allocate_bytes_dedup(self, bytes: &[u8]) -> interpret::AllocId {
pub fn allocate_bytes_dedup(self, bytes: &[u8], salt: usize) -> interpret::AllocId {
// Create an allocation that just contains these bytes.
let alloc = interpret::Allocation::from_bytes_byte_aligned_immutable(bytes);
let alloc = self.mk_const_alloc(alloc);
self.reserve_and_set_memory_dedup(alloc)
self.reserve_and_set_memory_dedup(alloc, salt)
}

/// Returns a range of the start/end indices specified with the
Expand Down
11 changes: 8 additions & 3 deletions compiler/rustc_middle/src/ty/vtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::fmt;
use rustc_ast::Mutability;
use rustc_macros::HashStable;

use crate::mir::interpret::{alloc_range, AllocId, Allocation, Pointer, Scalar};
use crate::mir::interpret::{alloc_range, AllocId, Allocation, Pointer, Scalar, CTFE_ALLOC_SALT};
use crate::ty::{self, Instance, PolyTraitRef, Ty, TyCtxt};

#[derive(Clone, Copy, PartialEq, HashStable)]
Expand Down Expand Up @@ -73,6 +73,11 @@ pub(crate) fn vtable_min_entries<'tcx>(

/// Retrieves an allocation that represents the contents of a vtable.
/// Since this is a query, allocations are cached and not duplicated.
///
/// This is an "internal" `AllocId` that should never be used as a value in the interpreted program.
/// The interpreter should use `AllocId` that refer to a `GlobalAlloc::VTable` instead.
/// (This is similar to statics, which also have a similar "internal" `AllocId` storing their
/// initial contents.)
pub(super) fn vtable_allocation_provider<'tcx>(
tcx: TyCtxt<'tcx>,
key: (Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>),
Expand Down Expand Up @@ -114,7 +119,7 @@ pub(super) fn vtable_allocation_provider<'tcx>(
VtblEntry::MetadataDropInPlace => {
if ty.needs_drop(tcx, ty::ParamEnv::reveal_all()) {
let instance = ty::Instance::resolve_drop_in_place(tcx, ty);
let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance);
let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT);
let fn_ptr = Pointer::from(fn_alloc_id);
Scalar::from_pointer(fn_ptr, &tcx)
} else {
Expand All @@ -127,7 +132,7 @@ pub(super) fn vtable_allocation_provider<'tcx>(
VtblEntry::Method(instance) => {
// Prepare the fn ptr we write into the vtable.
let instance = instance.polymorphize(tcx);
let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance);
let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT);
let fn_ptr = Pointer::from(fn_alloc_id);
Scalar::from_pointer(fn_ptr, &tcx)
}
Expand Down
6 changes: 4 additions & 2 deletions compiler/rustc_mir_build/src/build/expr/as_constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

use rustc_ast as ast;
use rustc_hir::LangItem;
use rustc_middle::mir::interpret::{Allocation, LitToConstError, LitToConstInput, Scalar};
use rustc_middle::mir::interpret::{
Allocation, LitToConstError, LitToConstInput, Scalar, CTFE_ALLOC_SALT,
};
use rustc_middle::mir::*;
use rustc_middle::thir::*;
use rustc_middle::ty::{
Expand Down Expand Up @@ -140,7 +142,7 @@ fn lit_to_mir_constant<'tcx>(
ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() }
}
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
let id = tcx.allocate_bytes_dedup(data);
let id = tcx.allocate_bytes_dedup(data, CTFE_ALLOC_SALT);
ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx))
}
(ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) =>
Expand Down
Loading

0 comments on commit 591ecb8

Please sign in to comment.