Skip to content

Commit

Permalink
Rollup merge of rust-lang#108022 - CraftSpider:align-bytes, r=oli-obk
Browse files Browse the repository at this point in the history
Support allocations with non-Box<[u8]> bytes

This is prep work for allowing miri to support passing pointers to C code, which will require `Allocation`s to be correctly aligned. Currently, it just makes `Allocation` generic and plumbs the necessary changes through the right places.

The follow-up to this will be adding a type in the miri interpreter which correctly aligns the bytes, using that for the Miri engine, then allowing Miri to pass pointers into these allocations to C calls.

Based off of rust-lang#100467, credit to ```@emarteca``` for the code
  • Loading branch information
matthiaskrgr authored Mar 2, 2023
2 parents 13471d3 + f26b0a2 commit f75f440
Show file tree
Hide file tree
Showing 7 changed files with 128 additions and 50 deletions.
15 changes: 11 additions & 4 deletions compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ use rustc_target::spec::abi::Abi as CallAbi;
use crate::const_eval::CheckAlignment;

use super::{
AllocId, AllocRange, Allocation, ConstAllocation, Frame, ImmTy, InterpCx, InterpResult,
MemoryKind, OpTy, Operand, PlaceTy, Pointer, Provenance, Scalar, StackPopUnwind,
AllocBytes, AllocId, AllocRange, Allocation, ConstAllocation, Frame, ImmTy, InterpCx,
InterpResult, MemoryKind, OpTy, Operand, PlaceTy, Pointer, Provenance, Scalar, StackPopUnwind,
};

/// Data returned by Machine::stack_pop,
Expand Down Expand Up @@ -105,10 +105,16 @@ pub trait Machine<'mir, 'tcx>: Sized {
/// Extra data stored in every allocation.
type AllocExtra: Debug + Clone + 'static;

/// Type for the bytes of the allocation.
type Bytes: AllocBytes + 'static;

/// Memory's allocation map
type MemoryMap: AllocMap<
AllocId,
(MemoryKind<Self::MemoryKind>, Allocation<Self::Provenance, Self::AllocExtra>),
(
MemoryKind<Self::MemoryKind>,
Allocation<Self::Provenance, Self::AllocExtra, Self::Bytes>,
),
> + Default
+ Clone;

Expand Down Expand Up @@ -338,7 +344,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
id: AllocId,
alloc: Cow<'b, Allocation>,
kind: Option<MemoryKind<Self::MemoryKind>>,
) -> InterpResult<'tcx, Cow<'b, Allocation<Self::Provenance, Self::AllocExtra>>>;
) -> InterpResult<'tcx, Cow<'b, Allocation<Self::Provenance, Self::AllocExtra, Self::Bytes>>>;

fn eval_inline_asm(
_ecx: &mut InterpCx<'mir, 'tcx, Self>,
Expand Down Expand Up @@ -459,6 +465,7 @@ pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {

type AllocExtra = ();
type FrameExtra = ();
type Bytes = Box<[u8]>;

#[inline(always)]
fn use_addr_for_alignment_check(_ecx: &InterpCx<$mir, $tcx, Self>) -> bool {
Expand Down
46 changes: 31 additions & 15 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ use rustc_target::abi::{Align, HasDataLayout, Size};
use crate::const_eval::CheckAlignment;

use super::{
alloc_range, AllocId, AllocMap, AllocRange, Allocation, CheckInAllocMsg, GlobalAlloc, InterpCx,
InterpResult, Machine, MayLeak, Pointer, PointerArithmetic, Provenance, Scalar,
alloc_range, AllocBytes, AllocId, AllocMap, AllocRange, Allocation, CheckInAllocMsg,
GlobalAlloc, InterpCx, InterpResult, Machine, MayLeak, Pointer, PointerArithmetic, Provenance,
Scalar,
};

#[derive(Debug, PartialEq, Copy, Clone)]
Expand Down Expand Up @@ -114,16 +115,16 @@ pub struct Memory<'mir, 'tcx, M: Machine<'mir, 'tcx>> {
/// A reference to some allocation that was already bounds-checked for the given region
/// and had the on-access machine hooks run.
#[derive(Copy, Clone)]
pub struct AllocRef<'a, 'tcx, Prov: Provenance, Extra> {
alloc: &'a Allocation<Prov, Extra>,
pub struct AllocRef<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Box<[u8]>> {
alloc: &'a Allocation<Prov, Extra, Bytes>,
range: AllocRange,
tcx: TyCtxt<'tcx>,
alloc_id: AllocId,
}
/// A reference to some allocation that was already bounds-checked for the given region
/// and had the on-access machine hooks run.
pub struct AllocRefMut<'a, 'tcx, Prov: Provenance, Extra> {
alloc: &'a mut Allocation<Prov, Extra>,
pub struct AllocRefMut<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Box<[u8]>> {
alloc: &'a mut Allocation<Prov, Extra, Bytes>,
range: AllocRange,
tcx: TyCtxt<'tcx>,
alloc_id: AllocId,
Expand Down Expand Up @@ -483,7 +484,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
&self,
id: AllocId,
is_write: bool,
) -> InterpResult<'tcx, Cow<'tcx, Allocation<M::Provenance, M::AllocExtra>>> {
) -> InterpResult<'tcx, Cow<'tcx, Allocation<M::Provenance, M::AllocExtra, M::Bytes>>> {
let (alloc, def_id) = match self.tcx.try_get_global_alloc(id) {
Some(GlobalAlloc::Memory(mem)) => {
// Memory of a constant or promoted or anonymous memory referenced by a static.
Expand Down Expand Up @@ -526,14 +527,25 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
)
}

/// Get the base address for the bytes in an `Allocation` specified by the
/// `AllocID` passed in; error if no such allocation exists.
///
/// It is up to the caller to take sufficient care when using this address:
/// there could be provenance or uninit memory in there, and other memory
/// accesses could invalidate the exposed pointer.
pub fn alloc_base_addr(&self, id: AllocId) -> InterpResult<'tcx, *const u8> {
let alloc = self.get_alloc_raw(id)?;
Ok(alloc.base_addr())
}

/// Gives raw access to the `Allocation`, without bounds or alignment checks.
/// The caller is responsible for calling the access hooks!
///
/// You almost certainly want to use `get_ptr_alloc`/`get_ptr_alloc_mut` instead.
fn get_alloc_raw(
&self,
id: AllocId,
) -> InterpResult<'tcx, &Allocation<M::Provenance, M::AllocExtra>> {
) -> InterpResult<'tcx, &Allocation<M::Provenance, M::AllocExtra, M::Bytes>> {
// The error type of the inner closure here is somewhat funny. We have two
// ways of "erroring": An actual error, or because we got a reference from
// `get_global_alloc` that we can actually use directly without inserting anything anywhere.
Expand Down Expand Up @@ -569,7 +581,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
ptr: Pointer<Option<M::Provenance>>,
size: Size,
align: Align,
) -> InterpResult<'tcx, Option<AllocRef<'a, 'tcx, M::Provenance, M::AllocExtra>>> {
) -> InterpResult<'tcx, Option<AllocRef<'a, 'tcx, M::Provenance, M::AllocExtra, M::Bytes>>>
{
let ptr_and_alloc = self.check_and_deref_ptr(
ptr,
size,
Expand Down Expand Up @@ -612,7 +625,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
fn get_alloc_raw_mut(
&mut self,
id: AllocId,
) -> InterpResult<'tcx, (&mut Allocation<M::Provenance, M::AllocExtra>, &mut M)> {
) -> InterpResult<'tcx, (&mut Allocation<M::Provenance, M::AllocExtra, M::Bytes>, &mut M)> {
// We have "NLL problem case #3" here, which cannot be worked around without loss of
// efficiency even for the common case where the key is in the map.
// <https://rust-lang.github.io/rfcs/2094-nll.html#problem-case-3-conditional-control-flow-across-functions>
Expand Down Expand Up @@ -641,7 +654,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
ptr: Pointer<Option<M::Provenance>>,
size: Size,
align: Align,
) -> InterpResult<'tcx, Option<AllocRefMut<'a, 'tcx, M::Provenance, M::AllocExtra>>> {
) -> InterpResult<'tcx, Option<AllocRefMut<'a, 'tcx, M::Provenance, M::AllocExtra, M::Bytes>>>
{
let parts = self.get_ptr_access(ptr, size, align)?;
if let Some((alloc_id, offset, prov)) = parts {
let tcx = *self.tcx;
Expand Down Expand Up @@ -840,11 +854,11 @@ pub struct DumpAllocs<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> {
impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> std::fmt::Debug for DumpAllocs<'a, 'mir, 'tcx, M> {
fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Cannot be a closure because it is generic in `Prov`, `Extra`.
fn write_allocation_track_relocs<'tcx, Prov: Provenance, Extra>(
fn write_allocation_track_relocs<'tcx, Prov: Provenance, Extra, Bytes: AllocBytes>(
fmt: &mut std::fmt::Formatter<'_>,
tcx: TyCtxt<'tcx>,
allocs_to_print: &mut VecDeque<AllocId>,
alloc: &Allocation<Prov, Extra>,
alloc: &Allocation<Prov, Extra, Bytes>,
) -> std::fmt::Result {
for alloc_id in alloc.provenance().provenances().filter_map(|prov| prov.get_alloc_id())
{
Expand Down Expand Up @@ -912,7 +926,9 @@ impl<'a, 'mir, 'tcx, M: Machine<'mir, 'tcx>> std::fmt::Debug for DumpAllocs<'a,
}

/// Reading and writing.
impl<'tcx, 'a, Prov: Provenance, Extra> AllocRefMut<'a, 'tcx, Prov, Extra> {
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes>
AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
{
/// `range` is relative to this allocation reference, not the base of the allocation.
pub fn write_scalar(&mut self, range: AllocRange, val: Scalar<Prov>) -> InterpResult<'tcx> {
let range = self.range.subrange(range);
Expand All @@ -937,7 +953,7 @@ impl<'tcx, 'a, Prov: Provenance, Extra> AllocRefMut<'a, 'tcx, Prov, Extra> {
}
}

impl<'tcx, 'a, Prov: Provenance, Extra> AllocRef<'a, 'tcx, Prov, Extra> {
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> AllocRef<'a, 'tcx, Prov, Extra, Bytes> {
/// `range` is relative to this allocation reference, not the base of the allocation.
pub fn read_scalar(
&self,
Expand Down
6 changes: 4 additions & 2 deletions compiler/rustc_const_eval/src/interpret/place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,8 @@ where
pub(super) fn get_place_alloc(
&self,
place: &MPlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx, Option<AllocRef<'_, 'tcx, M::Provenance, M::AllocExtra>>> {
) -> InterpResult<'tcx, Option<AllocRef<'_, 'tcx, M::Provenance, M::AllocExtra, M::Bytes>>>
{
assert!(place.layout.is_sized());
assert!(!place.meta.has_meta());
let size = place.layout.size;
Expand All @@ -364,7 +365,8 @@ where
pub(super) fn get_place_alloc_mut(
&mut self,
place: &MPlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx, Option<AllocRefMut<'_, 'tcx, M::Provenance, M::AllocExtra>>> {
) -> InterpResult<'tcx, Option<AllocRefMut<'_, 'tcx, M::Provenance, M::AllocExtra, M::Bytes>>>
{
assert!(place.layout.is_sized());
assert!(!place.meta.has_meta());
let size = place.layout.size;
Expand Down
82 changes: 67 additions & 15 deletions compiler/rustc_middle/src/mir/interpret/allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ mod tests;
use std::borrow::Cow;
use std::fmt;
use std::hash;
use std::ops::Range;
use std::hash::Hash;
use std::ops::{Deref, DerefMut, Range};
use std::ptr;

use either::{Left, Right};
Expand All @@ -29,6 +30,39 @@ use provenance_map::*;

pub use init_mask::{InitChunk, InitChunkIter};

/// Functionality required for the bytes of an `Allocation`.
pub trait AllocBytes:
Clone + fmt::Debug + Eq + PartialEq + Hash + Deref<Target = [u8]> + DerefMut<Target = [u8]>
{
/// Adjust the bytes to the specified alignment -- by default, this is a no-op.
fn adjust_to_align(self, _align: Align) -> Self;

/// Create an `AllocBytes` from a slice of `u8`.
fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align) -> Self;

/// Create a zeroed `AllocBytes` of the specified size and alignment;
/// call the callback error handler if there is an error in allocating the memory.
fn zeroed(size: Size, _align: Align) -> Option<Self>;
}

// Default `bytes` for `Allocation` is a `Box<[u8]>`.
impl AllocBytes for Box<[u8]> {
fn adjust_to_align(self, _align: Align) -> Self {
self
}

fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align) -> Self {
Box::<[u8]>::from(slice.into())
}

fn zeroed(size: Size, _align: Align) -> Option<Self> {
let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).ok()?;
// SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
let bytes = unsafe { bytes.assume_init() };
Some(bytes)
}
}

/// This type represents an Allocation in the Miri/CTFE core engine.
///
/// Its public API is rather low-level, working directly with allocation offsets and a custom error
Expand All @@ -38,10 +72,10 @@ pub use init_mask::{InitChunk, InitChunkIter};
// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
#[derive(Clone, Eq, PartialEq, TyEncodable, TyDecodable)]
#[derive(HashStable)]
pub struct Allocation<Prov: Provenance = AllocId, Extra = ()> {
pub struct Allocation<Prov: Provenance = AllocId, Extra = (), Bytes = Box<[u8]>> {
/// The actual bytes of the allocation.
/// Note that the bytes of a pointer represent the offset of the pointer.
bytes: Box<[u8]>,
bytes: Bytes,
/// Maps from byte addresses to extra provenance data for each pointer.
/// Only the first byte of a pointer is inserted into the map; i.e.,
/// every entry in this map applies to `pointer_size` consecutive bytes starting
Expand Down Expand Up @@ -220,14 +254,27 @@ impl AllocRange {
}

// The constructors are all without extra; the extra gets added by a machine hook later.
impl<Prov: Provenance> Allocation<Prov> {
impl<Prov: Provenance, Bytes: AllocBytes> Allocation<Prov, (), Bytes> {
/// Creates an allocation from an existing `Bytes` value - this is needed for miri FFI support
pub fn from_raw_bytes(bytes: Bytes, align: Align, mutability: Mutability) -> Self {
let size = Size::from_bytes(bytes.len());
Self {
bytes,
provenance: ProvenanceMap::new(),
init_mask: InitMask::new(size, true),
align,
mutability,
extra: (),
}
}

/// Creates an allocation initialized by the given bytes
pub fn from_bytes<'a>(
slice: impl Into<Cow<'a, [u8]>>,
align: Align,
mutability: Mutability,
) -> Self {
let bytes = Box::<[u8]>::from(slice.into());
let bytes = Bytes::from_bytes(slice, align);
let size = Size::from_bytes(bytes.len());
Self {
bytes,
Expand All @@ -248,7 +295,7 @@ impl<Prov: Provenance> Allocation<Prov> {
///
/// If `panic_on_fail` is true, this will never return `Err`.
pub fn uninit<'tcx>(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'tcx, Self> {
let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).map_err(|_| {
let bytes = Bytes::zeroed(size, align).ok_or_else(|| {
// This results in an error that can happen non-deterministically, since the memory
// available to the compiler can change between runs. Normally queries are always
// deterministic. However, we can be non-deterministic here because all uses of const
Expand All @@ -262,8 +309,7 @@ impl<Prov: Provenance> Allocation<Prov> {
});
InterpError::ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted)
})?;
// SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
let bytes = unsafe { bytes.assume_init() };

Ok(Allocation {
bytes,
provenance: ProvenanceMap::new(),
Expand All @@ -275,17 +321,19 @@ impl<Prov: Provenance> Allocation<Prov> {
}
}

impl Allocation {
impl<Bytes: AllocBytes> Allocation<AllocId, (), Bytes> {
/// Adjust allocation from the ones in tcx to a custom Machine instance
/// with a different Provenance and Extra type.
pub fn adjust_from_tcx<Prov: Provenance, Extra, Err>(
self,
cx: &impl HasDataLayout,
extra: Extra,
mut adjust_ptr: impl FnMut(Pointer<AllocId>) -> Result<Pointer<Prov>, Err>,
) -> Result<Allocation<Prov, Extra>, Err> {
// Compute new pointer provenance, which also adjusts the bytes.
let mut bytes = self.bytes;
) -> Result<Allocation<Prov, Extra, Bytes>, Err> {
// Compute new pointer provenance, which also adjusts the bytes, and realign the pointer if
// necessary.
let mut bytes = self.bytes.adjust_to_align(self.align);

let mut new_provenance = Vec::with_capacity(self.provenance.ptrs().len());
let ptr_size = cx.data_layout().pointer_size.bytes_usize();
let endian = cx.data_layout().endian;
Expand All @@ -311,7 +359,7 @@ impl Allocation {
}

/// Raw accessors. Provide access to otherwise private bytes.
impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
pub fn len(&self) -> usize {
self.bytes.len()
}
Expand Down Expand Up @@ -340,7 +388,11 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
}

/// Byte accessors.
impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
pub fn base_addr(&self) -> *const u8 {
self.bytes.as_ptr()
}

/// This is the entirely abstraction-violating way to just grab the raw bytes without
/// caring about provenance or initialization.
///
Expand Down Expand Up @@ -412,7 +464,7 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
}

/// Reading and writing.
impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
/// Sets the init bit for the given range.
fn mark_init(&mut self, range: AllocRange, is_init: bool) {
if range.size.bytes() == 0 {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_middle/src/mir/interpret/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ pub use self::error::{
pub use self::value::{get_slice_bytes, ConstAlloc, ConstValue, Scalar};

pub use self::allocation::{
alloc_range, AllocError, AllocRange, AllocResult, Allocation, ConstAllocation, InitChunk,
InitChunkIter,
alloc_range, AllocBytes, AllocError, AllocRange, AllocResult, Allocation, ConstAllocation,
InitChunk, InitChunkIter,
};

pub use self::pointer::{Pointer, PointerArithmetic, Provenance};
Expand Down
Loading

0 comments on commit f75f440

Please sign in to comment.