Skip to content

Commit

Permalink
Auto merge of #118324 - RalfJung:ctfe-read-only-pointers, r=saethlin
Browse files Browse the repository at this point in the history
compile-time evaluation: detect writes through immutable pointers

This has two motivations:
- it unblocks #116745 (and therefore takes a big step towards `const_mut_refs` stabilization), because we can now detect if the memory that we find in `const` can be interned as "immutable"
- it would detect the UB that was uncovered in #117905, which was caused by accidental stabilization of `copy` functions in `const` that can only be called with UB

When UB is detected, we emit a future-compat warn-by-default lint. This is not a breaking change, so completely in line with [the const-UB RFC](https://rust-lang.github.io/rfcs/3016-const-ub.html), meaning we don't need t-lang FCP here. I made the lint immediately show up for dependencies since it is nearly impossible to even trigger this lint without `const_mut_refs` -- the accidentally stabilized `copy` functions are the only way this can happen, so the crates that popped up in #117905 are the only causes of such UB (in the code that crater covers), and the three cases of UB that we know about have all been fixed in their respective crates already.

The way this is implemented is by making use of the fact that our interpreter is already generic over the notion of provenance. For CTFE we now use the new `CtfeProvenance` type which is conceptually an `AllocId` plus a boolean `immutable` flag (but packed for a more efficient representation). This means we can mark a pointer as immutable when it is created as a shared reference. The flag will be propagated to all pointers derived from this one. We can then check the immutable flag on each write to reject writes through immutable pointers.

I just hope perf works out.
  • Loading branch information
bors committed Dec 7, 2023
2 parents f16c81f + 8188bd4 commit 0e7f91b
Show file tree
Hide file tree
Showing 86 changed files with 784 additions and 462 deletions.
6 changes: 4 additions & 2 deletions compiler/rustc_codegen_cranelift/src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ pub(crate) fn codegen_const_value<'tcx>(
}
}
Scalar::Ptr(ptr, _size) => {
let (alloc_id, offset) = ptr.into_parts(); // we know the `offset` is relative
let (prov, offset) = ptr.into_parts(); // we know the `offset` is relative
let alloc_id = prov.alloc_id();
let base_addr = match fx.tcx.global_alloc(alloc_id) {
GlobalAlloc::Memory(alloc) => {
let data_id = data_id_for_alloc_id(
Expand Down Expand Up @@ -374,7 +375,8 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant
let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len()).to_vec();
data.define(bytes.into_boxed_slice());

for &(offset, alloc_id) in alloc.provenance().ptrs().iter() {
for &(offset, prov) in alloc.provenance().ptrs().iter() {
let alloc_id = prov.alloc_id();
let addend = {
let endianness = tcx.data_layout.endian;
let offset = offset.bytes() as usize;
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_codegen_gcc/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,8 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
}
}
Scalar::Ptr(ptr, _size) => {
let (alloc_id, offset) = ptr.into_parts();
let (prov, offset) = ptr.into_parts(); // we know the `offset` is relative
let alloc_id = prov.alloc_id();
let base_addr =
match self.tcx.global_alloc(alloc_id) {
GlobalAlloc::Memory(alloc) => {
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_codegen_gcc/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ pub fn const_alloc_to_gcc<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, alloc: ConstAl
let pointer_size = dl.pointer_size.bytes() as usize;

let mut next_offset = 0;
for &(offset, alloc_id) in alloc.provenance().ptrs().iter() {
for &(offset, prov) in alloc.provenance().ptrs().iter() {
let alloc_id = prov.alloc_id();
let offset = offset.bytes();
assert_eq!(offset as usize as u64, offset);
let offset = offset as usize;
Expand Down Expand Up @@ -313,7 +314,7 @@ pub fn const_alloc_to_gcc<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, alloc: ConstAl

llvals.push(cx.scalar_to_backend(
InterpScalar::from_pointer(
interpret::Pointer::new(alloc_id, Size::from_bytes(ptr_offset)),
interpret::Pointer::new(prov, Size::from_bytes(ptr_offset)),
&cx.tcx,
),
abi::Scalar::Initialized { value: Primitive::Pointer(address_space), valid_range: WrappingRange::full(dl.pointer_size) },
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
}
}
Scalar::Ptr(ptr, _size) => {
let (alloc_id, offset) = ptr.into_parts();
let (base_addr, base_addr_space) = match self.tcx.global_alloc(alloc_id) {
let (prov, offset) = ptr.into_parts();
let (base_addr, base_addr_space) = match self.tcx.global_alloc(prov.alloc_id()) {
GlobalAlloc::Memory(alloc) => {
let init = const_alloc_to_llvm(self, alloc);
let alloc = alloc.inner();
Expand Down
9 changes: 3 additions & 6 deletions compiler/rustc_codegen_llvm/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: ConstAllocation<
}

let mut next_offset = 0;
for &(offset, alloc_id) in alloc.provenance().ptrs().iter() {
for &(offset, prov) in alloc.provenance().ptrs().iter() {
let offset = offset.bytes();
assert_eq!(offset as usize as u64, offset);
let offset = offset as usize;
Expand All @@ -92,13 +92,10 @@ pub fn const_alloc_to_llvm<'ll>(cx: &CodegenCx<'ll, '_>, alloc: ConstAllocation<
.expect("const_alloc_to_llvm: could not read relocation pointer")
as u64;

let address_space = cx.tcx.global_alloc(alloc_id).address_space(cx);
let address_space = cx.tcx.global_alloc(prov.alloc_id()).address_space(cx);

llvals.push(cx.scalar_to_backend(
InterpScalar::from_pointer(
Pointer::new(alloc_id, Size::from_bytes(ptr_offset)),
&cx.tcx,
),
InterpScalar::from_pointer(Pointer::new(prov, Size::from_bytes(ptr_offset)), &cx.tcx),
Scalar::Initialized {
value: Primitive::Pointer(address_space),
valid_range: WrappingRange::full(dl.pointer_size),
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/mir/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
bug!("from_const: invalid ScalarPair layout: {:#?}", layout);
};
let a = Scalar::from_pointer(
Pointer::new(bx.tcx().reserve_and_set_memory_alloc(data), Size::ZERO),
Pointer::new(bx.tcx().reserve_and_set_memory_alloc(data).into(), Size::ZERO),
&bx.tcx(),
);
let a_llval = bx.scalar_to_backend(
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_const_eval/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,9 @@ const_eval_validation_uninhabited_val = {$front_matter}: encountered a value of
const_eval_validation_uninit = {$front_matter}: encountered uninitialized memory, but {$expected}
const_eval_validation_unsafe_cell = {$front_matter}: encountered `UnsafeCell` in a `const`
const_eval_write_through_immutable_pointer =
writing through a pointer that was derived from a shared (immutable) reference
const_eval_write_to_read_only =
writing to {$allocation} which is read-only
const_eval_zst_pointer_out_of_bounds =
Expand Down
44 changes: 36 additions & 8 deletions compiler/rustc_const_eval/src/const_eval/error.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
use std::mem;

use rustc_errors::{DiagnosticArgValue, DiagnosticMessage, IntoDiagnostic, IntoDiagnosticArg};
use rustc_hir::CRATE_HIR_ID;
use rustc_middle::mir::AssertKind;
use rustc_middle::query::TyCtxtAt;
use rustc_middle::ty::TyCtxt;
use rustc_middle::ty::{layout::LayoutError, ConstInt};
use rustc_span::{ErrorGuaranteed, Span, Symbol, DUMMY_SP};

use super::InterpCx;
use super::{CompileTimeInterpreter, InterpCx};
use crate::errors::{self, FrameNote, ReportErrorExt};
use crate::interpret::{ErrorHandled, InterpError, InterpErrorInfo, Machine, MachineStopType};
use crate::interpret::{ErrorHandled, InterpError, InterpErrorInfo, MachineStopType};

/// The CTFE machine has some custom error kinds.
#[derive(Clone, Debug)]
Expand Down Expand Up @@ -57,16 +59,20 @@ impl<'tcx> Into<InterpErrorInfo<'tcx>> for ConstEvalErrKind {
}
}

pub fn get_span_and_frames<'tcx, 'mir, M: Machine<'mir, 'tcx>>(
ecx: &InterpCx<'mir, 'tcx, M>,
pub fn get_span_and_frames<'tcx, 'mir>(
tcx: TyCtxtAt<'tcx>,
machine: &CompileTimeInterpreter<'mir, 'tcx>,
) -> (Span, Vec<errors::FrameNote>)
where
'tcx: 'mir,
{
let mut stacktrace = ecx.generate_stacktrace();
let mut stacktrace =
InterpCx::<CompileTimeInterpreter<'mir, 'tcx>>::generate_stacktrace_from_stack(
&machine.stack,
);
// Filter out `requires_caller_location` frames.
stacktrace.retain(|frame| !frame.instance.def.requires_caller_location(*ecx.tcx));
let span = stacktrace.first().map(|f| f.span).unwrap_or(ecx.tcx.span);
stacktrace.retain(|frame| !frame.instance.def.requires_caller_location(*tcx));
let span = stacktrace.first().map(|f| f.span).unwrap_or(tcx.span);

let mut frames = Vec::new();

Expand All @@ -87,7 +93,7 @@ where

let mut last_frame: Option<errors::FrameNote> = None;
for frame_info in &stacktrace {
let frame = frame_info.as_note(*ecx.tcx);
let frame = frame_info.as_note(*tcx);
match last_frame.as_mut() {
Some(last_frame)
if last_frame.span == frame.span
Expand Down Expand Up @@ -156,3 +162,25 @@ where
}
}
}

/// Emit a lint from a const-eval situation.
// Even if this is unused, please don't remove it -- chances are we will need to emit a lint during const-eval again in the future!
pub(super) fn lint<'tcx, 'mir, L>(
tcx: TyCtxtAt<'tcx>,
machine: &CompileTimeInterpreter<'mir, 'tcx>,
lint: &'static rustc_session::lint::Lint,
decorator: impl FnOnce(Vec<errors::FrameNote>) -> L,
) where
L: for<'a> rustc_errors::DecorateLint<'a, ()>,
{
let (span, frames) = get_span_and_frames(tcx, machine);

tcx.emit_spanned_lint(
lint,
// We use the root frame for this so the crate that defines the const defines whether the
// lint is emitted.
machine.stack.first().and_then(|frame| frame.lint_root()).unwrap_or(CRATE_HIR_ID),
span,
decorator(frames),
);
}
14 changes: 7 additions & 7 deletions compiler/rustc_const_eval/src/const_eval/eval_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,8 +155,8 @@ pub(super) fn op_to_const<'tcx>(
match immediate {
Left(ref mplace) => {
// We know `offset` is relative to the allocation, so we can use `into_parts`.
let (alloc_id, offset) = mplace.ptr().into_parts();
let alloc_id = alloc_id.expect("cannot have `fake` place fot non-ZST type");
let (prov, offset) = mplace.ptr().into_parts();
let alloc_id = prov.expect("cannot have `fake` place for non-ZST type").alloc_id();
ConstValue::Indirect { alloc_id, offset }
}
// see comment on `let force_as_immediate` above
Expand All @@ -178,8 +178,8 @@ pub(super) fn op_to_const<'tcx>(
);
let msg = "`op_to_const` on an immediate scalar pair must only be used on slice references to the beginning of an actual allocation";
// We know `offset` is relative to the allocation, so we can use `into_parts`.
let (alloc_id, offset) = a.to_pointer(ecx).expect(msg).into_parts();
let alloc_id = alloc_id.expect(msg);
let (prov, offset) = a.to_pointer(ecx).expect(msg).into_parts();
let alloc_id = prov.expect(msg).alloc_id();
let data = ecx.tcx.global_alloc(alloc_id).unwrap_memory();
assert!(offset == abi::Size::ZERO, "{}", msg);
let meta = b.to_target_usize(ecx).expect(msg);
Expand Down Expand Up @@ -338,7 +338,7 @@ pub fn eval_in_interpreter<'mir, 'tcx>(
*ecx.tcx,
error,
None,
|| super::get_span_and_frames(&ecx),
|| super::get_span_and_frames(ecx.tcx, &ecx.machine),
|span, frames| ConstEvalError {
span,
error_kind: kind,
Expand All @@ -353,7 +353,7 @@ pub fn eval_in_interpreter<'mir, 'tcx>(
let validation =
const_validate_mplace(&ecx, &mplace, is_static, cid.promoted.is_some());

let alloc_id = mplace.ptr().provenance.unwrap();
let alloc_id = mplace.ptr().provenance.unwrap().alloc_id();

// Validation failed, report an error.
if let Err(error) = validation {
Expand Down Expand Up @@ -419,7 +419,7 @@ pub fn const_report_error<'mir, 'tcx>(
*ecx.tcx,
error,
None,
|| crate::const_eval::get_span_and_frames(ecx),
|| crate::const_eval::get_span_and_frames(ecx.tcx, &ecx.machine),
move |span, frames| errors::UndefinedBehavior { span, ub_note, frames, raw_bytes },
)
}
75 changes: 57 additions & 18 deletions compiler/rustc_const_eval/src/const_eval/machine.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,30 @@
use rustc_hir::def::DefKind;
use rustc_hir::LangItem;
use rustc_middle::mir;
use rustc_middle::mir::interpret::PointerArithmetic;
use rustc_middle::ty::layout::{FnAbiOf, TyAndLayout};
use rustc_middle::ty::{self, TyCtxt};
use rustc_span::Span;
use std::borrow::Borrow;
use std::fmt;
use std::hash::Hash;
use std::ops::ControlFlow;

use rustc_ast::Mutability;
use rustc_data_structures::fx::FxIndexMap;
use rustc_data_structures::fx::IndexEntry;
use std::fmt;

use rustc_ast::Mutability;
use rustc_hir::def::DefKind;
use rustc_hir::def_id::DefId;
use rustc_hir::LangItem;
use rustc_middle::mir;
use rustc_middle::mir::AssertMessage;
use rustc_middle::query::TyCtxtAt;
use rustc_middle::ty;
use rustc_middle::ty::layout::{FnAbiOf, TyAndLayout};
use rustc_session::lint::builtin::WRITES_THROUGH_IMMUTABLE_POINTER;
use rustc_span::symbol::{sym, Symbol};
use rustc_span::Span;
use rustc_target::abi::{Align, Size};
use rustc_target::spec::abi::Abi as CallAbi;

use crate::errors::{LongRunning, LongRunningWarn};
use crate::fluent_generated as fluent;
use crate::interpret::{
self, compile_time_machine, AllocId, ConstAllocation, FnArg, FnVal, Frame, ImmTy, InterpCx,
InterpResult, OpTy, PlaceTy, Pointer, Scalar,
self, compile_time_machine, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, FnVal,
Frame, ImmTy, InterpCx, InterpResult, OpTy, PlaceTy, Pointer, PointerArithmetic, Scalar,
};

use super::error::*;
Expand All @@ -49,7 +49,7 @@ pub struct CompileTimeInterpreter<'mir, 'tcx> {
pub(super) num_evaluated_steps: usize,

/// The virtual call stack.
pub(super) stack: Vec<Frame<'mir, 'tcx, AllocId, ()>>,
pub(super) stack: Vec<Frame<'mir, 'tcx>>,

/// We need to make sure consts never point to anything mutable, even recursively. That is
/// relied on for pattern matching on consts with references.
Expand Down Expand Up @@ -638,10 +638,7 @@ impl<'mir, 'tcx> interpret::Machine<'mir, 'tcx> for CompileTimeInterpreter<'mir,
}

#[inline(always)]
fn expose_ptr(
_ecx: &mut InterpCx<'mir, 'tcx, Self>,
_ptr: Pointer<AllocId>,
) -> InterpResult<'tcx> {
fn expose_ptr(_ecx: &mut InterpCx<'mir, 'tcx, Self>, _ptr: Pointer) -> InterpResult<'tcx> {
// This is only reachable with -Zunleash-the-miri-inside-of-you.
throw_unsup_format!("exposing pointers is not possible at compile-time")
}
Expand Down Expand Up @@ -674,7 +671,7 @@ impl<'mir, 'tcx> interpret::Machine<'mir, 'tcx> for CompileTimeInterpreter<'mir,
}

fn before_access_global(
_tcx: TyCtxt<'tcx>,
_tcx: TyCtxtAt<'tcx>,
machine: &Self,
alloc_id: AllocId,
alloc: ConstAllocation<'tcx>,
Expand Down Expand Up @@ -711,6 +708,48 @@ impl<'mir, 'tcx> interpret::Machine<'mir, 'tcx> for CompileTimeInterpreter<'mir,
}
}
}

fn retag_ptr_value(
ecx: &mut InterpCx<'mir, 'tcx, Self>,
_kind: mir::RetagKind,
val: &ImmTy<'tcx, CtfeProvenance>,
) -> InterpResult<'tcx, ImmTy<'tcx, CtfeProvenance>> {
// If it's a frozen shared reference that's not already immutable, make it immutable.
// (Do nothing on `None` provenance, that cannot store immutability anyway.)
if let ty::Ref(_, ty, mutbl) = val.layout.ty.kind()
&& *mutbl == Mutability::Not
&& val.to_scalar_and_meta().0.to_pointer(ecx)?.provenance.is_some_and(|p| !p.immutable())
// That next check is expensive, that's why we have all the guards above.
&& ty.is_freeze(*ecx.tcx, ecx.param_env)
{
let place = ecx.ref_to_mplace(val)?;
let new_place = place.map_provenance(|p| p.map(CtfeProvenance::as_immutable));
Ok(ImmTy::from_immediate(new_place.to_ref(ecx), val.layout))
} else {
Ok(val.clone())
}
}

fn before_memory_write(
tcx: TyCtxtAt<'tcx>,
machine: &mut Self,
_alloc_extra: &mut Self::AllocExtra,
(_alloc_id, immutable): (AllocId, bool),
range: AllocRange,
) -> InterpResult<'tcx> {
if range.size == Size::ZERO {
// Nothing to check.
return Ok(());
}
// Reject writes through immutable pointers.
if immutable {
super::lint(tcx, machine, WRITES_THROUGH_IMMUTABLE_POINTER, |frames| {
crate::errors::WriteThroughImmutablePointer { frames }
});
}
// Everything else is fine.
Ok(())
}
}

// Please do not add any code below the above `Machine` trait impl. I (oli-obk) plan more cleanups
Expand Down
7 changes: 7 additions & 0 deletions compiler/rustc_const_eval/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,13 @@ pub struct ConstEvalError {
pub frame_notes: Vec<FrameNote>,
}

#[derive(LintDiagnostic)]
#[diag(const_eval_write_through_immutable_pointer)]
pub struct WriteThroughImmutablePointer {
#[subdiagnostic]
pub frames: Vec<FrameNote>,
}

#[derive(Diagnostic)]
#[diag(const_eval_nullary_intrinsic_fail)]
pub struct NullaryIntrinsicError {
Expand Down
Loading

0 comments on commit 0e7f91b

Please sign in to comment.