Skip to content

Commit

Permalink
Auto merge of #129778 - RalfJung:interp-lossy-typed-copy, r=saethlin
Browse files Browse the repository at this point in the history
interpret: make typed copies lossy wrt provenance and padding

A "typed copy" in Rust can be a lossy process: when copying at type `usize` (or any other non-pointer type), if the original memory had any provenance, that provenance is lost. When copying at pointer type, if the original memory had partial provenance (i.e., not the same provenance for all bytes), that provenance is lost. When copying any type with padding, the contents of padding are lost.

This PR equips our validity-checking pass with the ability to reset provenance and padding according to those rules. Can be reviewed commit-by-commit. The first three commits are just preparation without any functional change.

Fixes #845
Fixes #2182
  • Loading branch information
bors committed Sep 10, 2024
2 parents 9a46223 + ed3e1c4 commit f04fc4d
Show file tree
Hide file tree
Showing 33 changed files with 503 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/concurrency/data_race.rs
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
// The program didn't actually do a read, so suppress the memory access hooks.
// This is also a very special exception where we just ignore an error -- if this read
// was UB e.g. because the memory is uninitialized, we don't want to know!
let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok();
let old_val = this.run_for_validation(|this| this.read_scalar(dest)).ok();
this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
this.validate_atomic_store(dest, atomic)?;
this.buffered_atomic_write(val, dest, atomic, old_val)
Expand Down
8 changes: 4 additions & 4 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Dereference a pointer operand to a place using `layout` instead of the pointer's declared type
fn deref_pointer_as(
&self,
op: &impl Readable<'tcx, Provenance>,
op: &impl Projectable<'tcx, Provenance>,
layout: TyAndLayout<'tcx>,
) -> InterpResult<'tcx, MPlaceTy<'tcx>> {
let this = self.eval_context_ref();
Expand All @@ -880,7 +880,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Calculates the MPlaceTy given the offset and layout of an access on an operand
fn deref_pointer_and_offset(
&self,
op: &impl Readable<'tcx, Provenance>,
op: &impl Projectable<'tcx, Provenance>,
offset: u64,
base_layout: TyAndLayout<'tcx>,
value_layout: TyAndLayout<'tcx>,
Expand All @@ -897,7 +897,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

fn deref_pointer_and_read(
&self,
op: &impl Readable<'tcx, Provenance>,
op: &impl Projectable<'tcx, Provenance>,
offset: u64,
base_layout: TyAndLayout<'tcx>,
value_layout: TyAndLayout<'tcx>,
Expand All @@ -909,7 +909,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

fn deref_pointer_and_write(
&mut self,
op: &impl Readable<'tcx, Provenance>,
op: &impl Projectable<'tcx, Provenance>,
offset: u64,
value: impl Into<Scalar>,
base_layout: TyAndLayout<'tcx>,
Expand Down
6 changes: 4 additions & 2 deletions src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// ```
// Would not be considered UB, or the other way around (`is_val_statically_known(0)`).
"is_val_statically_known" => {
let [arg] = check_arg_count(args)?;
this.validate_operand(arg, /*recursive*/ false)?;
let [_arg] = check_arg_count(args)?;
// FIXME: should we check for validity here? It's tricky because we do not have a
// place. Codegen does not seem to set any attributes like `noundef` for intrinsic
// calls, so we don't *have* to do anything.
let branch: bool = this.machine.rng.get_mut().gen();
this.write_scalar(Scalar::from_bool(branch), dest)?;
}
Expand Down
13 changes: 13 additions & 0 deletions src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,9 @@ pub struct MiriMachine<'tcx> {
/// Invariant: the promised alignment will never be less than the native alignment of the
/// allocation.
pub(crate) symbolic_alignment: RefCell<FxHashMap<AllocId, (Size, Align)>>,

/// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes).
union_data_ranges: FxHashMap<Ty<'tcx>, RangeSet>,
}

impl<'tcx> MiriMachine<'tcx> {
Expand Down Expand Up @@ -714,6 +717,7 @@ impl<'tcx> MiriMachine<'tcx> {
allocation_spans: RefCell::new(FxHashMap::default()),
const_cache: RefCell::new(FxHashMap::default()),
symbolic_alignment: RefCell::new(FxHashMap::default()),
union_data_ranges: FxHashMap::default(),
}
}

Expand Down Expand Up @@ -826,6 +830,7 @@ impl VisitProvenance for MiriMachine<'_> {
allocation_spans: _,
const_cache: _,
symbolic_alignment: _,
union_data_ranges: _,
} = self;

threads.visit_provenance(visit);
Expand Down Expand Up @@ -1627,4 +1632,12 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL
}
}

fn cached_union_data_range<'e>(
ecx: &'e mut InterpCx<'tcx, Self>,
ty: Ty<'tcx>,
compute_range: impl FnOnce() -> RangeSet,
) -> Cow<'e, RangeSet> {
Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range))
}
}
10 changes: 10 additions & 0 deletions tests/fail/provenance/int_copy_looses_provenance0.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use std::mem;

// Doing a copy at integer type should lose provenance.
// This tests the unoptimized base case.
fn main() {
let ptrs = [(&42, true)];
let ints: [(usize, bool); 1] = unsafe { mem::transmute(ptrs) };
let ptr = (&raw const ints[0].0).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}
15 changes: 15 additions & 0 deletions tests/fail/provenance/int_copy_looses_provenance0.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance0.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance0.rs:LL:CC

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

10 changes: 10 additions & 0 deletions tests/fail/provenance/int_copy_looses_provenance1.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
use std::mem;

// Doing a copy at integer type should lose provenance.
// This tests the optimized-array case of integer copies.
fn main() {
let ptrs = [&42];
let ints: [usize; 1] = unsafe { mem::transmute(ptrs) };
let ptr = (&raw const ints[0]).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}
15 changes: 15 additions & 0 deletions tests/fail/provenance/int_copy_looses_provenance1.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance1.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance1.rs:LL:CC

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

12 changes: 12 additions & 0 deletions tests/fail/provenance/int_copy_looses_provenance2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
use std::mem;

// Doing a copy at integer type should lose provenance.
// This tests the case where provenacne is hiding in the metadata of a pointer.
fn main() {
let ptrs = [(&42, &42)];
// Typed copy at wide pointer type (with integer-typed metadata).
let ints: [*const [usize]; 1] = unsafe { mem::transmute(ptrs) };
// Get a pointer to the metadata field.
let ptr = (&raw const ints[0]).wrapping_byte_add(mem::size_of::<*const ()>()).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}
15 changes: 15 additions & 0 deletions tests/fail/provenance/int_copy_looses_provenance2.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance2.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance2.rs:LL:CC

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

29 changes: 29 additions & 0 deletions tests/fail/provenance/int_copy_looses_provenance3.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#![feature(strict_provenance)]
use std::mem;

#[repr(C, usize)]
#[allow(unused)]
enum E {
Var1(usize),
Var2(usize),
}

// Doing a copy at integer type should lose provenance.
// This tests the case where provenacne is hiding in the discriminant of an enum.
fn main() {
assert_eq!(mem::size_of::<E>(), 2*mem::size_of::<usize>());

// We want to store provenance in the enum discriminant, but the value still needs to
// be valid atfor the type. So we split provenance and data.
let ptr = &42;
let ptr = ptr as *const i32;
let ptrs = [(ptr.with_addr(0), ptr)];
// Typed copy at the enum type.
let ints: [E; 1] = unsafe { mem::transmute(ptrs) };
// Read the discriminant.
let discr = unsafe { (&raw const ints[0]).cast::<*const i32>().read() };
// Take the provenance from there, together with the original address.
let ptr = discr.with_addr(ptr.addr());
// There should be no provenance is `discr`, so this should be UB.
let _val = unsafe { *ptr }; //~ERROR: dangling
}
15 changes: 15 additions & 0 deletions tests/fail/provenance/int_copy_looses_provenance3.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/int_copy_looses_provenance3.rs:LL:CC
|
LL | let _val = unsafe { *ptr };
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance3.rs:LL:CC

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

18 changes: 18 additions & 0 deletions tests/fail/provenance/ptr_copy_loses_partial_provenance0.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
fn main() {
let half_ptr = std::mem::size_of::<*const ()>() / 2;
let mut bytes = [1u8; 16];
let bytes = bytes.as_mut_ptr();

unsafe {
// Put a pointer in the middle.
bytes.add(half_ptr).cast::<&i32>().write_unaligned(&42);
// Typed copy of the entire thing as two pointers, but not perfectly
// overlapping with the pointer we have in there.
let copy = bytes.cast::<[*const (); 2]>().read_unaligned();
let copy_bytes = copy.as_ptr().cast::<u8>();
// Now go to the middle of the copy and get the pointer back out.
let ptr = copy_bytes.add(half_ptr).cast::<*const i32>().read_unaligned();
// Dereferencing this should fail as the copy has removed the provenance.
let _val = *ptr; //~ERROR: dangling
}
}
15 changes: 15 additions & 0 deletions tests/fail/provenance/ptr_copy_loses_partial_provenance0.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC
|
LL | let _val = *ptr;
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

18 changes: 18 additions & 0 deletions tests/fail/provenance/ptr_copy_loses_partial_provenance1.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
fn main() {
let half_ptr = std::mem::size_of::<*const ()>() / 2;
let mut bytes = [1u8; 16];
let bytes = bytes.as_mut_ptr();

unsafe {
// Put a pointer in the middle.
bytes.add(half_ptr).cast::<&i32>().write_unaligned(&42);
// Typed copy of the entire thing as two *function* pointers, but not perfectly
// overlapping with the pointer we have in there.
let copy = bytes.cast::<[fn(); 2]>().read_unaligned();
let copy_bytes = copy.as_ptr().cast::<u8>();
// Now go to the middle of the copy and get the pointer back out.
let ptr = copy_bytes.add(half_ptr).cast::<*const i32>().read_unaligned();
// Dereferencing this should fail as the copy has removed the provenance.
let _val = *ptr; //~ERROR: dangling
}
}
15 changes: 15 additions & 0 deletions tests/fail/provenance/ptr_copy_loses_partial_provenance1.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC
|
LL | let _val = *ptr;
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

23 changes: 23 additions & 0 deletions tests/fail/uninit/padding-enum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use std::mem;

// We have three fields to avoid the ScalarPair optimization.
#[allow(unused)]
enum E {
None,
Some(&'static (), &'static (), usize),
}

fn main() { unsafe {
let mut p: mem::MaybeUninit<E> = mem::MaybeUninit::zeroed();
// The copy when `E` is returned from `transmute` should destroy padding
// (even when we use `write_unaligned`, which under the hood uses an untyped copy).
p.as_mut_ptr().write_unaligned(mem::transmute((0usize, 0usize, 0usize)));
// This is a `None`, so everything but the discriminant is padding.
assert!(matches!(*p.as_ptr(), E::None));

// Turns out the discriminant is (currently) stored
// in the 2nd pointer, so the first half is padding.
let c = &p as *const _ as *const u8;
let _val = *c.add(0); // Get a padding byte.
//~^ERROR: uninitialized
} }
15 changes: 15 additions & 0 deletions tests/fail/uninit/padding-enum.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> $DIR/padding-enum.rs:LL:CC
|
LL | let _val = *c.add(0); // Get a padding byte.
| ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/padding-enum.rs:LL:CC

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

25 changes: 25 additions & 0 deletions tests/fail/uninit/padding-pair.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#![feature(core_intrinsics)]

use std::mem::{self, MaybeUninit};

fn main() {
// This constructs a `(usize, bool)` pair: 9 bytes initialized, the rest not.
// Ensure that these 9 bytes are indeed initialized, and the rest is indeed not.
// This should be the case even if we write into previously initialized storage.
let mut x: MaybeUninit<Box<[u8]>> = MaybeUninit::zeroed();
let z = std::intrinsics::add_with_overflow(0usize, 0usize);
unsafe { x.as_mut_ptr().cast::<(usize, bool)>().write(z) };
// Now read this bytewise. There should be (`ptr_size + 1`) def bytes followed by
// (`ptr_size - 1`) undef bytes (the padding after the bool) in there.
let z: *const u8 = &x as *const _ as *const _;
let first_undef = mem::size_of::<usize>() as isize + 1;
for i in 0..first_undef {
let byte = unsafe { *z.offset(i) };
assert_eq!(byte, 0);
}
let v = unsafe { *z.offset(first_undef) };
//~^ ERROR: uninitialized
if v == 0 {
println!("it is zero");
}
}
15 changes: 15 additions & 0 deletions tests/fail/uninit/padding-pair.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> $DIR/padding-pair.rs:LL:CC
|
LL | let v = unsafe { *z.offset(first_undef) };
| ^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/padding-pair.rs:LL:CC

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error

Loading

0 comments on commit f04fc4d

Please sign in to comment.