From 2fee9dedce695c374a7020ff0eb38f2b9f22696e Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 6 Aug 2024 19:02:01 +0200 Subject: [PATCH 1/5] miri: make vtable addresses not globally unique --- src/lib.rs | 1 + src/machine.rs | 47 +++++++++++++++++++++++++++++---- tests/pass/dyn-traits.rs | 10 +++++++ tests/pass/function_pointers.rs | 3 ++- tests/pass/rc.rs | 3 ++- 5 files changed, 57 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2b3ae6df5d..966d38508f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,6 +56,7 @@ extern crate either; extern crate tracing; // The rustc crates we need +extern crate rustc_attr; extern crate rustc_apfloat; extern crate rustc_ast; extern crate rustc_const_eval; diff --git a/src/machine.rs b/src/machine.rs index 3e45a3a7e1..df4154bcb5 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -11,6 +11,7 @@ use rand::rngs::StdRng; use rand::Rng; use rand::SeedableRng; +use rustc_attr::InlineAttr; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; #[allow(unused)] use rustc_data_structures::static_assert_size; @@ -47,10 +48,10 @@ pub const SIGRTMIN: i32 = 34; /// `SIGRTMAX` - `SIGRTMIN` >= 8 (which is the value of `_POSIX_RTSIG_MAX`) pub const SIGRTMAX: i32 = 42; -/// Each const has multiple addresses, but only this many. Since const allocations are never -/// deallocated, choosing a new [`AllocId`] and thus base address for each evaluation would -/// produce unbounded memory usage. -const ADDRS_PER_CONST: usize = 16; +/// Each anonymous global (constant, vtable, function pointer, ...) has multiple addresses, but only +/// this many. Since const allocations are never deallocated, choosing a new [`AllocId`] and thus +/// base address for each evaluation would produce unbounded memory usage. +const ADDRS_PER_ANON_GLOBAL: usize = 32; /// Extra data stored with each stack frame pub struct FrameExtra<'tcx> { @@ -1372,7 +1373,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { catch_unwind: None, timing, is_user_relevant: ecx.machine.is_user_relevant(&frame), - salt: ecx.machine.rng.borrow_mut().gen::() % ADDRS_PER_CONST, + salt: ecx.machine.rng.borrow_mut().gen::() % ADDRS_PER_ANON_GLOBAL, }; Ok(frame.with_extra(extra)) @@ -1518,4 +1519,40 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { Entry::Occupied(oe) => Ok(oe.get().clone()), } } + + fn get_global_alloc_salt( + ecx: &InterpCx<'tcx, Self>, + instance: Option>, + ) -> usize { + let unique = if let Some(instance) = instance { + // Functions cannot be identified by pointers, as asm-equal functions can get deduplicated + // by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be + // duplicated across crates. We thus generate a new `AllocId` for every mention of a + // function. This means that `main as fn() == main as fn()` is false, while `let x = main as + // fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify + // certain functions uniquely, e.g. for backtraces. So we identify whether codegen will + // actually emit duplicate functions. It does that when they have non-lifetime generics, or + // when they can be inlined. All other functions are given a unique address. + // This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied + // upon for anything. But if we don't do this, backtraces look terrible. + let is_generic = instance + .args + .into_iter() + .any(|kind| !matches!(kind.unpack(), ty::GenericArgKind::Lifetime(_))); + let can_be_inlined = match ecx.tcx.codegen_fn_attrs(instance.def_id()).inline { + InlineAttr::Never => false, + _ => true, + }; + !is_generic && !can_be_inlined + } else { + // Non-functions are never unique. + false + }; + // Always use the same salt if the allocation is unique. + if unique { + CTFE_ALLOC_SALT + } else { + ecx.machine.rng.borrow_mut().gen::() % ADDRS_PER_ANON_GLOBAL + } + } } diff --git a/tests/pass/dyn-traits.rs b/tests/pass/dyn-traits.rs index 908d521a0d..f622012096 100644 --- a/tests/pass/dyn-traits.rs +++ b/tests/pass/dyn-traits.rs @@ -141,7 +141,17 @@ fn unsized_dyn_autoderef() { } */ +fn vtable_ptr_eq() { + use std::{fmt, ptr}; + + // We don't always get the same vtable when casting this to a wide pointer. + let x = &2; + let x_wide = x as &dyn fmt::Display; + assert!((0..256).any(|_| !ptr::eq(x as &dyn fmt::Display, x_wide))); +} + fn main() { ref_box_dyn(); box_box_trait(); + vtable_ptr_eq(); } diff --git a/tests/pass/function_pointers.rs b/tests/pass/function_pointers.rs index 2aa3ebf2dd..a5c4bc5e0d 100644 --- a/tests/pass/function_pointers.rs +++ b/tests/pass/function_pointers.rs @@ -82,7 +82,8 @@ fn main() { assert!(return_fn_ptr(i) == i); assert!(return_fn_ptr(i) as unsafe fn() -> i32 == i as fn() -> i32 as unsafe fn() -> i32); // Miri gives different addresses to different reifications of a generic function. - assert!(return_fn_ptr(f) != f); + // at least if we try often enough. + assert!((0..256).any(|_| return_fn_ptr(f) != f)); // However, if we only turn `f` into a function pointer and use that pointer, // it is equal to itself. let f2 = f as fn() -> i32; diff --git a/tests/pass/rc.rs b/tests/pass/rc.rs index 6dd1b3aff9..b1470dabc2 100644 --- a/tests/pass/rc.rs +++ b/tests/pass/rc.rs @@ -75,7 +75,8 @@ fn rc_fat_ptr_eq() { let p = Rc::new(1) as Rc; let a: *const dyn Debug = &*p; let r = Rc::into_raw(p); - assert!(a == r); + // Only compare the pointer parts, as the vtable might differ. + assert!(a as *const () == r as *const ()); drop(unsafe { Rc::from_raw(r) }); } From 3215ece199905cbfedbe1cfb3313633fad462d7f Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 6 Aug 2024 22:45:42 +0200 Subject: [PATCH 2/5] do not make function addresses unique with cross_crate_inline_threshold=always (even if that breaks backtraces) --- src/machine.rs | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/src/machine.rs b/src/machine.rs index df4154bcb5..411619ed6b 100644 --- a/src/machine.rs +++ b/src/machine.rs @@ -24,6 +24,7 @@ use rustc_middle::{ Instance, Ty, TyCtxt, }, }; +use rustc_session::config::InliningThreshold; use rustc_span::def_id::{CrateNum, DefId}; use rustc_span::{Span, SpanData, Symbol}; use rustc_target::abi::{Align, Size}; @@ -1525,24 +1526,29 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { instance: Option>, ) -> usize { let unique = if let Some(instance) = instance { - // Functions cannot be identified by pointers, as asm-equal functions can get deduplicated - // by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be - // duplicated across crates. We thus generate a new `AllocId` for every mention of a - // function. This means that `main as fn() == main as fn()` is false, while `let x = main as - // fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify - // certain functions uniquely, e.g. for backtraces. So we identify whether codegen will - // actually emit duplicate functions. It does that when they have non-lifetime generics, or - // when they can be inlined. All other functions are given a unique address. - // This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied - // upon for anything. But if we don't do this, backtraces look terrible. + // Functions cannot be identified by pointers, as asm-equal functions can get + // deduplicated by the linker (we set the "unnamed_addr" attribute for LLVM) and + // functions can be duplicated across crates. We thus generate a new `AllocId` for every + // mention of a function. This means that `main as fn() == main as fn()` is false, while + // `let x = main as fn(); x == x` is true. However, as a quality-of-life feature it can + // be useful to identify certain functions uniquely, e.g. for backtraces. So we identify + // whether codegen will actually emit duplicate functions. It does that when they have + // non-lifetime generics, or when they can be inlined. All other functions are given a + // unique address. This is not a stable guarantee! The `inline` attribute is a hint and + // cannot be relied upon for anything. But if we don't do this, the + // `__rust_begin_short_backtrace`/`__rust_end_short_backtrace` logic breaks and panic + // backtraces look terrible. let is_generic = instance .args .into_iter() .any(|kind| !matches!(kind.unpack(), ty::GenericArgKind::Lifetime(_))); - let can_be_inlined = match ecx.tcx.codegen_fn_attrs(instance.def_id()).inline { - InlineAttr::Never => false, - _ => true, - }; + let can_be_inlined = matches!( + ecx.tcx.sess.opts.unstable_opts.cross_crate_inline_threshold, + InliningThreshold::Always + ) || !matches!( + ecx.tcx.codegen_fn_attrs(instance.def_id()).inline, + InlineAttr::Never + ); !is_generic && !can_be_inlined } else { // Non-functions are never unique. From 0adf88319b11348cb7abfdee9b1d82dcedae482a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 14 Aug 2024 07:43:43 +0200 Subject: [PATCH 3/5] Preparing for merge from rustc --- rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-version b/rust-version index 9dad0df254..d88a4ccc85 100644 --- a/rust-version +++ b/rust-version @@ -1 +1 @@ -1d8f135b20fac63c493d5963ce02963b46ca0986 +e9c965df7b75ab5b1ae8f9a2680839ac1a1a3880 From 35893eb3512a99ab71f6f5a48512860c1dff6400 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 14 Aug 2024 07:46:54 +0200 Subject: [PATCH 4/5] fmt --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 84354c77aa..047ac0f39d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,9 +56,9 @@ extern crate either; extern crate tracing; // The rustc crates we need -extern crate rustc_attr; extern crate rustc_apfloat; extern crate rustc_ast; +extern crate rustc_attr; extern crate rustc_const_eval; extern crate rustc_data_structures; extern crate rustc_errors; From 3dbf6776cc3f2a57555ee923d3f8f5241010ae4a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Wed, 14 Aug 2024 07:47:24 +0200 Subject: [PATCH 5/5] CI: need nightly toolchain for auto-rustup PR --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index efdeaeffe1..22c833a548 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -123,6 +123,8 @@ jobs: run: | git config --global user.name 'The Miri Cronjob Bot' git config --global user.email 'miri@cron.bot' + - name: Install nightly toolchain + run: rustup toolchain install nightly --profile minimal - name: get changes from rustc run: ./miri rustc-pull - name: Install rustup-toolchain-install-master