diff --git a/compiler/rustc_const_eval/src/interpret/machine.rs b/compiler/rustc_const_eval/src/interpret/machine.rs index 530e252b7c077..2fb2f47fa8ca0 100644 --- a/compiler/rustc_const_eval/src/interpret/machine.rs +++ b/compiler/rustc_const_eval/src/interpret/machine.rs @@ -14,7 +14,7 @@ use rustc_target::abi::Size; use rustc_target::spec::abi::Abi as CallAbi; use super::{ - AllocId, AllocRange, Allocation, ConstAllocation, Frame, ImmTy, InterpCx, InterpResult, + AllocId, AllocRange, Allocation, AllocBytes, ConstAllocation, Frame, ImmTy, InterpCx, InterpResult, MemoryKind, OpTy, Operand, PlaceTy, Pointer, Provenance, Scalar, StackPopUnwind, }; @@ -103,10 +103,13 @@ pub trait Machine<'mir, 'tcx>: Sized { /// Extra data stored in every allocation. type AllocExtra: Debug + Clone + 'static; + /// Type for the bytes of the allocation. + type Bytes: AllocBytes + 'static; + /// Memory's allocation map type MemoryMap: AllocMap< AllocId, - (MemoryKind, Allocation), + (MemoryKind, Allocation), > + Default + Clone; @@ -322,7 +325,7 @@ pub trait Machine<'mir, 'tcx>: Sized { id: AllocId, alloc: Cow<'b, Allocation>, kind: Option>, - ) -> InterpResult<'tcx, Cow<'b, Allocation>>; + ) -> InterpResult<'tcx, Cow<'b, Allocation>>; fn eval_inline_asm( _ecx: &mut InterpCx<'mir, 'tcx, Self>, @@ -431,6 +434,7 @@ pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) { type AllocExtra = (); type FrameExtra = (); + type Bytes = Box<[u8]>; #[inline(always)] fn use_addr_for_alignment_check(_ecx: &InterpCx<$mir, $tcx, Self>) -> bool { diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index ed155fbfef087..16275a6df95e4 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -19,8 +19,9 @@ use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TyCtxt}; use rustc_target::abi::{Align, HasDataLayout, Size}; use super::{ - alloc_range, AllocId, AllocMap, AllocRange, Allocation, CheckInAllocMsg, GlobalAlloc, InterpCx, - InterpResult, Machine, MayLeak, Pointer, PointerArithmetic, Provenance, Scalar, + alloc_range, AllocBytes, AllocId, AllocMap, AllocRange, Allocation, CheckInAllocMsg, + GlobalAlloc, InterpCx, InterpResult, Machine, MayLeak, Pointer, PointerArithmetic, Provenance, + Scalar, }; #[derive(Debug, PartialEq, Copy, Clone)] @@ -112,16 +113,16 @@ pub struct Memory<'mir, 'tcx, M: Machine<'mir, 'tcx>> { /// A reference to some allocation that was already bounds-checked for the given region /// and had the on-access machine hooks run. #[derive(Copy, Clone)] -pub struct AllocRef<'a, 'tcx, Prov, Extra> { - alloc: &'a Allocation, +pub struct AllocRef<'a, 'tcx, Prov, Extra, Bytes: AllocBytes = Box<[u8]>> { + alloc: &'a Allocation, range: AllocRange, tcx: TyCtxt<'tcx>, alloc_id: AllocId, } /// A reference to some allocation that was already bounds-checked for the given region /// and had the on-access machine hooks run. -pub struct AllocRefMut<'a, 'tcx, Prov, Extra> { - alloc: &'a mut Allocation, +pub struct AllocRefMut<'a, 'tcx, Prov, Extra, Bytes: AllocBytes = Box<[u8]>> { + alloc: &'a mut Allocation, range: AllocRange, tcx: TyCtxt<'tcx>, alloc_id: AllocId, @@ -475,7 +476,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { &self, id: AllocId, is_write: bool, - ) -> InterpResult<'tcx, Cow<'tcx, Allocation>> { + ) -> InterpResult<'tcx, Cow<'tcx, Allocation>> { let (alloc, def_id) = match self.tcx.try_get_global_alloc(id) { Some(GlobalAlloc::Memory(mem)) => { // Memory of a constant or promoted or anonymous memory referenced by a static. @@ -517,6 +518,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { ) } + /// Get the base address for the bytes in an `Allocation` specified by the + /// `AllocID` passed in; error if no such allocation exists. The address will + /// be exposed (on the host system!). + /// + /// It is up to the caller to take sufficient care when using this address: + /// there could be provenance or uninit memory in there, and other memory + /// accesses could invalidate the exposed pointer. + pub fn expose_alloc_base_addr(&self, id: AllocId) -> InterpResult<'tcx, usize> { + let alloc = self.get_alloc_raw(id)?; + Ok(alloc.expose_base_addr()) + } + /// Gives raw access to the `Allocation`, without bounds or alignment checks. /// The caller is responsible for calling the access hooks! /// @@ -524,7 +537,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { fn get_alloc_raw( &self, id: AllocId, - ) -> InterpResult<'tcx, &Allocation> { + ) -> InterpResult<'tcx, &Allocation> { // The error type of the inner closure here is somewhat funny. We have two // ways of "erroring": An actual error, or because we got a reference from // `get_global_alloc` that we can actually use directly without inserting anything anywhere. @@ -560,7 +573,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { ptr: Pointer>, size: Size, align: Align, - ) -> InterpResult<'tcx, Option>> { + ) -> InterpResult<'tcx, Option>> + { let align = M::enforce_alignment(self).then_some(align); let ptr_and_alloc = self.check_and_deref_ptr( ptr, @@ -603,7 +617,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { fn get_alloc_raw_mut( &mut self, id: AllocId, - ) -> InterpResult<'tcx, (&mut Allocation, &mut M)> { + ) -> InterpResult<'tcx, (&mut Allocation, &mut M)> { // We have "NLL problem case #3" here, which cannot be worked around without loss of // efficiency even for the common case where the key is in the map. // @@ -632,7 +646,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> { ptr: Pointer>, size: Size, align: Align, - ) -> InterpResult<'tcx, Option>> { + ) -> InterpResult<'tcx, Option>> + { let parts = self.get_ptr_access(ptr, size, align)?; if let Some((alloc_id, offset, prov)) = parts { let tcx = *self.tcx; @@ -826,12 +841,12 @@ pub struct DumpAllocs<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> { impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> std::fmt::Debug for DumpAllocs<'a, 'mir, 'tcx, M> { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - // Cannot be a closure because it is generic in `Prov`, `Extra`. - fn write_allocation_track_relocs<'tcx, Prov: Provenance, Extra>( + // Cannot be a closure because it is generic in `Prov`, `Extra`, and `Bytes`. + fn write_allocation_track_relocs<'tcx, Prov: Provenance, Extra, Bytes: AllocBytes>( fmt: &mut std::fmt::Formatter<'_>, tcx: TyCtxt<'tcx>, allocs_to_print: &mut VecDeque, - alloc: &Allocation, + alloc: &Allocation, ) -> std::fmt::Result { for alloc_id in alloc.provenance().values().filter_map(|prov| prov.get_alloc_id()) { allocs_to_print.push_back(alloc_id); @@ -898,7 +913,9 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> std::fmt::Debug for DumpAllocs<'a, } /// Reading and writing. -impl<'tcx, 'a, Prov: Provenance, Extra> AllocRefMut<'a, 'tcx, Prov, Extra> { +impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> + AllocRefMut<'a, 'tcx, Prov, Extra, Bytes> +{ /// `range` is relative to this allocation reference, not the base of the allocation. pub fn write_scalar(&mut self, range: AllocRange, val: Scalar) -> InterpResult<'tcx> { let range = self.range.subrange(range); @@ -923,7 +940,7 @@ impl<'tcx, 'a, Prov: Provenance, Extra> AllocRefMut<'a, 'tcx, Prov, Extra> { } } -impl<'tcx, 'a, Prov: Provenance, Extra> AllocRef<'a, 'tcx, Prov, Extra> { +impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> AllocRef<'a, 'tcx, Prov, Extra, Bytes> { /// `range` is relative to this allocation reference, not the base of the allocation. pub fn read_scalar( &self, diff --git a/compiler/rustc_const_eval/src/interpret/place.rs b/compiler/rustc_const_eval/src/interpret/place.rs index b328892906df2..4cebc84198717 100644 --- a/compiler/rustc_const_eval/src/interpret/place.rs +++ b/compiler/rustc_const_eval/src/interpret/place.rs @@ -339,7 +339,7 @@ where pub(super) fn get_place_alloc( &self, place: &MPlaceTy<'tcx, M::Provenance>, - ) -> InterpResult<'tcx, Option>> { + ) -> InterpResult<'tcx, Option>> { assert!(!place.layout.is_unsized()); assert!(!place.meta.has_meta()); let size = place.layout.size; @@ -350,7 +350,7 @@ where pub(super) fn get_place_alloc_mut( &mut self, place: &MPlaceTy<'tcx, M::Provenance>, - ) -> InterpResult<'tcx, Option>> { + ) -> InterpResult<'tcx, Option>> { assert!(!place.layout.is_unsized()); assert!(!place.meta.has_meta()); let size = place.layout.size; diff --git a/compiler/rustc_middle/src/lib.rs b/compiler/rustc_middle/src/lib.rs index 4064a8f97334b..e9677ec459fe8 100644 --- a/compiler/rustc_middle/src/lib.rs +++ b/compiler/rustc_middle/src/lib.rs @@ -23,6 +23,7 @@ //! This API is completely unstable and subject to change. #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![feature(maybe_uninit_write_slice)] #![feature(allocator_api)] #![feature(array_windows)] #![feature(assert_matches)] @@ -59,6 +60,7 @@ #![feature(intra_doc_pointers)] #![feature(yeet_expr)] #![feature(const_option)] +#![feature(vec_into_raw_parts)] #![recursion_limit = "512"] #![allow(rustc::potential_query_instability)] diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs index 37ec04b07f847..b64f43aa6121e 100644 --- a/compiler/rustc_middle/src/mir/interpret/allocation.rs +++ b/compiler/rustc_middle/src/mir/interpret/allocation.rs @@ -4,8 +4,9 @@ use std::borrow::Cow; use std::convert::{TryFrom, TryInto}; use std::fmt; use std::hash; +use std::hash::Hash; use std::iter; -use std::ops::{Deref, Range}; +use std::ops::{Deref, DerefMut, Range}; use std::ptr; use rustc_ast::Mutability; @@ -21,6 +22,61 @@ use super::{ }; use crate::ty; +/// Functionality required for the bytes of an `Allocation`. +pub trait AllocBytes: + Clone + + core::fmt::Debug + + Eq + + PartialEq + + PartialOrd + + Ord + + core::hash::Hash + + Deref + + DerefMut +{ + /// The address of the bytes. + fn expose_addr(&self) -> u64; + /// Adjust the bytes to the specified alignment -- by default, this is a no-op. + fn adjust_to_align(self, _align: Align) -> Self; + /// Create an `AllocBytes` from a slice of `u8`. + fn from_bytes<'a>(slice: impl Into>, _align: Align) -> Self; + /// Create an uninitialized `AllocBytes` of the specified size and alignment; + /// call the callback error handler if there is an error in allocating the memory. + fn uninit<'tcx, F: Fn() -> InterpError<'tcx>>( + size: Size, + _align: Align, + handle_alloc_fail: F, + ) -> Result>; +} + +// Default `bytes` for `Allocation` is a `Box<[u8]>`. +impl AllocBytes for Box<[u8]> { + fn uninit<'tcx, F: Fn() -> InterpError<'tcx>>( + size: Size, + _align: Align, + handle_alloc_fail: F, + ) -> Result> { + let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()) + .map_err(|_| handle_alloc_fail())?; + // SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]> + let bytes = unsafe { bytes.assume_init() }; + Ok(bytes) + } + + fn adjust_to_align(self, _align: Align) -> Self { + self + } + + fn from_bytes<'a>(slice: impl Into>, _align: Align) -> Self { + Box::<[u8]>::from(slice.into()) + } + + /// The real address of the bytes. + fn expose_addr(&self) -> u64 { + self.as_ptr() as u64 + } +} + /// This type represents an Allocation in the Miri/CTFE core engine. /// /// Its public API is rather low-level, working directly with allocation offsets and a custom error @@ -30,10 +86,10 @@ use crate::ty; // hashed. (see the `Hash` impl below for more details), so the impl is not derived. #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)] #[derive(HashStable)] -pub struct Allocation { +pub struct Allocation> { /// The actual bytes of the allocation. /// Note that the bytes of a pointer represent the offset of the pointer. - bytes: Box<[u8]>, + bytes: Bytes, /// Maps from byte addresses to extra provenance data for each pointer. /// Only the first byte of a pointer is inserted into the map; i.e., /// every entry in this map applies to `pointer_size` consecutive bytes starting @@ -205,15 +261,17 @@ impl AllocRange { } // The constructors are all without extra; the extra gets added by a machine hook later. -impl Allocation { +impl Allocation { /// Creates an allocation initialized by the given bytes + // FIXME! ellen make this generic for bytes pub fn from_bytes<'a>( slice: impl Into>, align: Align, mutability: Mutability, ) -> Self { - let bytes = Box::<[u8]>::from(slice.into()); + let bytes = Bytes::from_bytes(slice, align); let size = Size::from_bytes(bytes.len()); + Self { bytes, provenance: ProvenanceMap::new(), @@ -224,6 +282,10 @@ impl Allocation { } } + // REEEEEEE TODO ELLEN + // make a new constructor that just takes a Bytes + // then, make the allocation this way in ffi_support, and add it to the memory with allocate_raw_ptr + pub fn from_bytes_byte_aligned_immutable<'a>(slice: impl Into>) -> Self { Allocation::from_bytes(slice, Align::ONE, Mutability::Not) } @@ -233,7 +295,7 @@ impl Allocation { /// /// If `panic_on_fail` is true, this will never return `Err`. pub fn uninit<'tcx>(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'tcx, Self> { - let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).map_err(|_| { + let handle_alloc_fail = || -> InterpError<'tcx> { // This results in an error that can happen non-deterministically, since the memory // available to the compiler can change between runs. Normally queries are always // deterministic. However, we can be non-deterministic here because all uses of const @@ -246,9 +308,10 @@ impl Allocation { tcx.sess.delay_span_bug(DUMMY_SP, "exhausted memory during interpretation") }); InterpError::ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted) - })?; - // SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]> - let bytes = unsafe { bytes.assume_init() }; + }; + + let bytes = Bytes::uninit(size, align, handle_alloc_fail)?; + Ok(Allocation { bytes, provenance: ProvenanceMap::new(), @@ -260,17 +323,20 @@ impl Allocation { } } -impl Allocation { +impl Allocation { /// Adjust allocation from the ones in tcx to a custom Machine instance /// with a different Provenance and Extra type. + // FIXME! ellen make this generic for Bytes pub fn adjust_from_tcx( self, cx: &impl HasDataLayout, extra: Extra, mut adjust_ptr: impl FnMut(Pointer) -> Result, Err>, - ) -> Result, Err> { + ) -> Result, Err> { // Compute new pointer provenance, which also adjusts the bytes. - let mut bytes = self.bytes; + // Realign the pointer + let mut bytes = self.bytes.adjust_to_align(self.align); + let mut new_provenance = Vec::with_capacity(self.provenance.0.len()); let ptr_size = cx.data_layout().pointer_size.bytes_usize(); let endian = cx.data_layout().endian; @@ -283,6 +349,7 @@ impl Allocation { write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap(); new_provenance.push((offset, ptr_prov)); } + // Create allocation. Ok(Allocation { bytes, @@ -296,7 +363,7 @@ impl Allocation { } /// Raw accessors. Provide access to otherwise private bytes. -impl Allocation { +impl Allocation { pub fn len(&self) -> usize { self.bytes.len() } @@ -325,8 +392,17 @@ impl Allocation { } /// Byte accessors. -impl Allocation { - /// This is the entirely abstraction-violating way to just grab the raw bytes without +impl Allocation { + /// Get the pointer of the [u8] of bytes. + pub fn expose_base_addr(&self) -> usize { + self.bytes.expose_addr().try_into().unwrap() + } + + /// The last argument controls whether we error out when there are uninitialized or pointer + /// bytes. However, we *always* error when there are relocations overlapping the edges of the + /// range. + /// + /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead, /// caring about provenance or initialization. /// /// This function also guarantees that the resulting pointer will remain stable @@ -392,7 +468,7 @@ impl Allocation { } /// Reading and writing. -impl Allocation { +impl Allocation { /// Reads a *non-ZST* scalar. /// /// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine @@ -506,7 +582,7 @@ impl Allocation { } /// Provenance. -impl Allocation { +impl Allocation { /// Returns all provenance overlapping with the given pointer-offset pair. fn range_get_provenance(&self, cx: &impl HasDataLayout, range: AllocRange) -> &[(Size, Prov)] { // We have to go back `pointer_size - 1` bytes, as that one would still overlap with @@ -621,7 +697,7 @@ pub struct AllocationProvenance { dest_provenance: Vec<(Size, Prov)>, } -impl Allocation { +impl Allocation { pub fn prepare_provenance_copy( &self, cx: &impl HasDataLayout, @@ -1128,7 +1204,7 @@ impl<'a> Iterator for InitChunkIter<'a> { } /// Uninitialized bytes. -impl Allocation { +impl Allocation { /// Checks whether the given range is entirely initialized. /// /// Returns `Ok(())` if it's initialized. Otherwise returns the range of byte @@ -1176,7 +1252,7 @@ impl InitMaskCompressed { } /// Transferring the initialization mask to other allocations. -impl Allocation { +impl Allocation { /// Creates a run-length encoding of the initialization mask; panics if range is empty. /// /// This is essentially a more space-efficient version of diff --git a/compiler/rustc_middle/src/mir/interpret/mod.rs b/compiler/rustc_middle/src/mir/interpret/mod.rs index 0fc1217d57196..e6b9f2c491b9a 100644 --- a/compiler/rustc_middle/src/mir/interpret/mod.rs +++ b/compiler/rustc_middle/src/mir/interpret/mod.rs @@ -127,7 +127,7 @@ pub use self::error::{ pub use self::value::{get_slice_bytes, ConstAlloc, ConstValue, Scalar}; pub use self::allocation::{ - alloc_range, AllocRange, Allocation, ConstAllocation, InitChunk, InitChunkIter, InitMask, + alloc_range, AllocBytes, AllocRange, Allocation, ConstAllocation, InitChunk, InitChunkIter, InitMask, ProvenanceMap, }; diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs index 88c16189f1dc6..543f0ef2e1464 100644 --- a/compiler/rustc_middle/src/mir/pretty.rs +++ b/compiler/rustc_middle/src/mir/pretty.rs @@ -12,7 +12,7 @@ use rustc_data_structures::fx::FxHashMap; use rustc_hir::def_id::DefId; use rustc_index::vec::Idx; use rustc_middle::mir::interpret::{ - read_target_uint, AllocId, Allocation, ConstAllocation, ConstValue, GlobalAlloc, Pointer, + read_target_uint, AllocBytes, AllocId, Allocation, ConstAllocation, ConstValue, GlobalAlloc, Pointer, Provenance, }; use rustc_middle::mir::visit::Visitor; @@ -779,21 +779,21 @@ pub fn write_allocations<'tcx>( /// After the hex dump, an ascii dump follows, replacing all unprintable characters (control /// characters or characters whose value is larger than 127) with a `.` /// This also prints provenance adequately. -pub fn display_allocation<'a, 'tcx, Prov, Extra>( +pub fn display_allocation<'a, 'tcx, Prov, Extra, Bytes: AllocBytes>( tcx: TyCtxt<'tcx>, - alloc: &'a Allocation, -) -> RenderAllocation<'a, 'tcx, Prov, Extra> { + alloc: &'a Allocation, +) -> RenderAllocation<'a, 'tcx, Prov, Extra, Bytes> { RenderAllocation { tcx, alloc } } #[doc(hidden)] -pub struct RenderAllocation<'a, 'tcx, Prov, Extra> { +pub struct RenderAllocation<'a, 'tcx, Prov, Extra, Bytes: AllocBytes = Box<[u8]>> { tcx: TyCtxt<'tcx>, - alloc: &'a Allocation, + alloc: &'a Allocation, } -impl<'a, 'tcx, Prov: Provenance, Extra> std::fmt::Display - for RenderAllocation<'a, 'tcx, Prov, Extra> +impl<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes> std::fmt::Display + for RenderAllocation<'a, 'tcx, Prov, Extra, Bytes> { fn fmt(&self, w: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let RenderAllocation { tcx, alloc } = *self; @@ -837,9 +837,9 @@ fn write_allocation_newline( /// The `prefix` argument allows callers to add an arbitrary prefix before each line (even if there /// is only one line). Note that your prefix should contain a trailing space as the lines are /// printed directly after it. -fn write_allocation_bytes<'tcx, Prov: Provenance, Extra>( +fn write_allocation_bytes<'tcx, Prov: Provenance, Extra, Bytes: AllocBytes>( tcx: TyCtxt<'tcx>, - alloc: &Allocation, + alloc: &Allocation, w: &mut dyn std::fmt::Write, prefix: &str, ) -> std::fmt::Result {