From 5167b6891ccf05aa7a2191675e6c3da95d84374a Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Tue, 22 Mar 2022 01:27:28 -0400 Subject: [PATCH 01/15] Introduce experimental APIs for conforming to "strict provenance". This patch series examines the question: how bad would it be if we adopted an extremely strict pointer provenance model that completely banished all int<->ptr casts. The key insight to making this approach even *vaguely* pallatable is the ptr.with_addr(addr) -> ptr function, which takes a pointer and an address and creates a new pointer with that address and the provenance of the input pointer. In this way the "chain of custody" is completely and dynamically restored, making the model suitable even for dynamic checkers like CHERI and Miri. This is not a formal model, but lots of the docs discussing the model have been updated to try to the *concept* of this design in the hopes that it can be iterated on. --- library/core/src/ptr/const_ptr.rs | 71 +++++++- library/core/src/ptr/mod.rs | 268 +++++++++++++++++++++++++++++- library/core/src/ptr/mut_ptr.rs | 71 +++++++- 3 files changed, 400 insertions(+), 10 deletions(-) diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs index 74c64370ddcda..617f92e135afb 100644 --- a/library/core/src/ptr/const_ptr.rs +++ b/library/core/src/ptr/const_ptr.rs @@ -150,6 +150,75 @@ impl *const T { bits as Self } + /// Gets the "address" portion of the pointer. + /// + /// This is equivalent to `self as usize`, which semantically discards + /// *provenance* and *address-space* information. To properly restore that information, + /// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr]. + /// + /// On most platforms this information isn't represented at runtime, and so the loss + /// of information is "only" semantic. On more complicated platforms like miri, CHERI, + /// and segmented architectures, this may result in an actual change of representation + /// and the loss of information. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn addr(self) -> usize + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + self as usize + } + + /// Creates a new pointer with the given address. + /// + /// This performs the same operation as an `addr as ptr` cast, but copies + /// the *address-space* and *provenance* of `self` to the new pointer. + /// This allows us to dynamically preserve and propagate this important + /// information in a way that is otherwise impossible with a unary cast. + /// + /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset `self` to the + /// given address, and therefore has all the same capabilities and restrictions. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn with_addr(self, addr: usize) -> Self + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // + // In the mean-time, this operation is defined to be "as if" it was + // a wrapping_offset, so we can emulate it as such. This should properly + // restore pointer provenance even under today's compiler. + let self_addr = self.addr() as isize; + let dest_addr = addr as isize; + let offset = dest_addr.wrapping_sub(self_addr); + + // This is the canonical desugarring of this operation + self.cast::().wrapping_offset(offset).cast::() + } + + /// Creates a new pointer by mapping `self`'s address to a new one. + /// + /// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self + where + T: Sized, + { + self.with_addr(f(self.addr())) + } + /// Decompose a (possibly wide) pointer into its address and metadata components. /// /// The pointer can be later reconstructed with [`from_raw_parts`]. @@ -1006,7 +1075,7 @@ impl *const [T] { /// use std::ptr; /// /// let slice: *const [i8] = ptr::slice_from_raw_parts(ptr::null(), 3); - /// assert_eq!(slice.as_ptr(), 0 as *const i8); + /// assert_eq!(slice.as_ptr(), ptr::null()); /// ``` #[inline] #[unstable(feature = "slice_ptr_get", issue = "74265")] diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 59b1b4c136752..3e8efc96c88f4 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -63,12 +63,212 @@ //! separate allocated object), heap allocations (each allocation created by the global allocator is //! a separate allocated object), and `static` variables. //! +//! +//! # Strict Provenance +//! +//! This section is *non-normative* and describes an experimental set of APIs that help tools +//! that validate the memory-safety of your program's execution. Notably this includes [miri][] +//! and [CHERI][], which can detect when you access out of bounds memory or otherwise violate +//! Rust's memory model. +//! +//! **The following text is overly strict and insufficiently formal, and is an extremely +//! strict interpretation of provenance.** Provenance must exist in some form for any language +//! higher-level than an assembler, but to our knowledge no one has ever been able to square +//! the notion of provenance with all the operations that programmers believe should be permitted. +//! The [Strict Provenance][] experiment seeks to explore the question: *what if we just said you +//! couldn't do all the nasty operations that make provenance so messy?* +//! +//! What APIs would have to be removed? What APIs would have to be added? How much would code +//! have to change, and is it worse or better now? Would any patterns become truly inexpressible? +//! Could we carve out special exceptions for those patterns? +//! +//! +//! ## Provenance +//! +//! **This section is *non-normative* and is part of the [Strict Provenance][] experiment.** +//! +//! Pointers are not *simply* an "integer" or "address". For instance, it's uncontroversial +//! to say that a Use After Free is clearly Undefined Behaviour, even if you "get lucky" +//! and the freed memory gets reallocated before your read/write (in fact this is the +//! worst-case scenario, UAFs would be much less concerning if this didn't happen!). +//! To rationalize this claim, pointers need to somehow be *more* than just their addresses: +//! they must have provenance. +//! +//! When an allocation is created, that allocation has a unique Original Pointer. For alloc +//! APIs this is literally the pointer the call returns, and for variables declarations this +//! is the name of the variable. This is mildly overloading the term "pointer" for the sake +//! of brevity/exposition. +//! +//! The Original Pointer for an allocation is guaranteed to have unique access to the entire +//! allocation and *only* that allocation. In this sense, an allocation can be thought of +//! as a "sandbox" that cannot be broken into or out of. *Provenance* is the permission +//! to access an allocation's sandbox and consists of: +//! +//! * Some kind of globally unique identifier tied to the allocation itself. +//! * A range of bytes in the allocation that the pointer is allowed to access. +//! +//! Provenance is implicitly shared with all pointers transitively derived from +//! The Original Pointer through operations like [`offset`], borrowing, and pointer casts. +//! Some operations may produce a pointer whose provenance has a smaller range +//! than the one it's derived from (i.e. borrowing a subfield and subslicing). +//! +//! Shrinking provenance cannot be undone: even if you "know" there is a larger allocation, you +//! can't derive a pointer with a larger provenance. Similarly, you cannot "recombine" +//! two contiguous provenances back into one (i.e. with a `fn merge(&[T], &[T]) -> &[T]`). +//! +//! A reference to a value always has provenance over exactly the memory that field occupies. +//! A reference slice always has provenance over exactly the range that slice describes. +//! +//! If an allocation is deallocated, all pointers with provenance to that allocation become +//! invalidated, and effectively lose their provenance. +//! +//! +//! ## Pointer Vs Addresses +//! +//! **This section is *non-normative* and is part of the [Strict Provenance][] experiment.** +//! +//! One of the largest historical issues with trying to define provenance is that programmers +//! freely convert between pointers and integers. Once you allow for this, it generally becomes +//! impossible to accurately track and preserve provenance information, and you need to appeal +//! to very complex and unreliable heuristics. But of course, converting between pointers and +//! integers is very useful, so what can we do? +//! +//! Strict Provenance attempts to square this circle by decoupling Rust's traditional conflation +//! of pointers and `usize` (and `isize`), defining a pointer to semantically contain the following +//! information: +//! +//! * The **address-space** it is part of. +//! * The **address** it points to, which can be represented by a `usize`. +//! * The **provenance** it has, defining the memory it has permission to access. +//! +//! Under Strict Provenance, a usize *cannot* accurately represent a pointer, and converting from +//! a pointer to a usize is generally an operation which *only* extracts the address. It is +//! therefore *impossible* to construct a valid pointer from a usize because there is no way +//! to restore the address-space and provenance. +//! +//! The key insight to making this model *at all* viable is the [`with_addr`][] method: +//! +//! ```text +//! /// Creates a new pointer with the given address. +//! /// +//! /// This performs the same operation as an `addr as ptr` cast, but copies +//! /// the *address-space* and *provenance* of `self` to the new pointer. +//! /// This allows us to dynamically preserve and propagate this important +//! /// information in a way that is otherwise impossible with a unary cast. +//! /// +//! /// This is equivalent to using `wrapping_offset` to offset `self` to the +//! /// given address, and therefore has all the same capabilities and restrictions. +//! pub fn with_addr(self, addr: usize) -> Self; +//! ``` +//! +//! So you're still able to drop down to the address representation and do whatever +//! clever bit tricks you want *as long as* you're able to keep around a pointer +//! into the allocation you care about that can "reconstitute" the other parts of the pointer. +//! Usually this is very easy, because you only are taking a pointer, messing with the address, +//! and then immediately converting back to a pointer. To make this use case more ergonomic, +//! we provide the [`map_addr`][] method. +//! +//! To help make it clear that code is "following" Strict Provenance semantics, we also +//! provide an [`addr`][] method which is currently equivalent to `ptr as usize`. In the +//! future we may provide a lint for pointer<->integer casts to help you audit if your +//! code conforms to strict provenance. +//! +//! +//! ## Using Strict Provenance +//! +//! Most code needs no changes to conform to strict provenance, as the only really concerning +//! operation that *wasn't* obviously already Undefined Behaviour is casts from usize to a +//! pointer. For code which *does* cast a usize to a pointer, the scope of the change depends +//! on exactly what you're doing. +//! +//! In general you just need to make sure that if you want to convert a usize address to a +//! pointer and then use that pointer to read/write memory, you need to keep around a pointer +//! that has sufficient provenance to perform that read/write itself. In this way all of your +//! casts from an address to a pointer are essentially just applying offsets/indexing. +//! +//! This is generally trivial to do for simple cases like tagged pointers *as long as you +//! represent the tagged pointer as an actual pointer and not a usize*. For instance: +//! +//! ``` +//! #![feature(strict_provenance)] +//! +//! unsafe { +//! // A flag we want to pack into our pointer +//! static HAS_DATA: usize = 0x1; +//! static FLAG_MASK: usize = !HAS_DATA; +//! +//! // Our value, which must have enough alignment to have spare least-significant-bits. +//! let my_precious_data: u32 = 17; +//! assert!(core::mem::align_of::() > 1); +//! +//! // Create a tagged pointer +//! let ptr = &my_precious_data as *const u32; +//! let tagged = ptr.map_addr(|addr| addr | HAS_DATA); +//! +//! // Check the flag: +//! if tagged.addr() & HAS_DATA != 0 { +//! // Untag and read the pointer +//! let data = *tagged.map_addr(|addr| addr & FLAG_MASK); +//! assert_eq!(data, 17); +//! } else { +//! unreachable!() +//! } +//! } +//! ``` +//! +//! Something more complicated and just generally *evil* like a XOR-List requires more significant +//! changes like allocating all nodes in a pre-allocated Vec or Arena and using a pointer +//! to the whole allocation to reconstitute the XORed addresses. +//! +//! Situations where a valid pointer *must* be created from just an address, such as baremetal code +//! accessing a memory-mapped interface at a fixed address, are an open question on how to support. +//! These situations *will* still be allowed, but we might require some kind of "I know what I'm +//! doing" annotation to explain the situation to the compiler. Because those situations require +//! `volatile` accesses anyway, it should be possible to carve out exceptions for them. +//! +//! Under [Strict Provenance] is is Undefined Behaviour to: +//! +//! * Access memory through a pointer that does not have provenance over that memory. +//! +//! * [`offset`] a pointer to an address it doesn't have provenance over. +//! This means it's always UB to offset a pointer derived from something deallocated, +//! even if the offset is 0. Note that a pointer "one past the end" of its provenance +//! is not actually outside its provenance, it just has 0 bytes it can load/store. +//! +//! But it *is* still sound to: +//! +//! * Create an invalid pointer from just an address (see [`ptr::invalid`][]). This can +//! be used for sentinel values like `null` *or* to represent a tagged pointer that will +//! never be dereferencable. +//! +//! * [`wrapping_offset`][] a pointer outside its provenance. This includes invalid pointers +//! which have no provenance. Unfortunately there may be practical limits on this for a +//! particular platform ([CHERI][] may mark your pointers as invalid, but it has a pretty +//! generour buffer that is *at least* 1KB on each side of the provenance). Note that +//! least-significant-bit tagging is generally pretty robust, and often doesn't even +//! go out of bounds because types have a size >= align. +//! +//! * Forge an allocation of size zero at any sufficiently aligned non-null address. +//! i.e. the usual "ZSTs are fake, do what you want" rules apply *but* this only applies +//! for actual forgery (integers cast to pointers). If you borrow some structs subfield +//! that *happens* to be zero-sized, the resulting pointer will have provenance tied to +//! that allocation and it will still get invalidated if the allocation gets deallocated. +//! In the future we may introduce an API to make such a forged allocation explicit. +//! //! [aliasing]: ../../nomicon/aliasing.html //! [book]: ../../book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer //! [ub]: ../../reference/behavior-considered-undefined.html //! [zst]: ../../nomicon/exotic-sizes.html#zero-sized-types-zsts //! [atomic operations]: crate::sync::atomic //! [`offset`]: pointer::offset +//! [`wrapping_offset`]: pointer::offset +//! [`with_addr`]: pointer::with_addr +//! [`map_addr`]: pointer::map_addr +//! [`addr`]: pointer::addr +//! [`ptr::invalid`]: core::ptr::invalid +//! [miri]: https://github.com/rust-lang/miri +//! [CHERI]: https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/ +//! [Strict Provenance]: https://github.com/rust-lang/rust/issues/95228 #![stable(feature = "rust1", since = "1.0.0")] @@ -210,7 +410,7 @@ pub unsafe fn drop_in_place(to_drop: *mut T) { #[rustc_const_stable(feature = "const_ptr_null", since = "1.24.0")] #[rustc_diagnostic_item = "ptr_null"] pub const fn null() -> *const T { - 0 as *const T + invalid::(0) } /// Creates a null mutable raw pointer. @@ -230,7 +430,55 @@ pub const fn null() -> *const T { #[rustc_const_stable(feature = "const_ptr_null", since = "1.24.0")] #[rustc_diagnostic_item = "ptr_null_mut"] pub const fn null_mut() -> *mut T { - 0 as *mut T + invalid_mut::(0) +} + +/// Creates an invalid pointer with the given address. +/// +/// This is *currently* equivalent to `addr as *const T` but it expresses the intended semantic +/// more clearly, and may become important under future memory models. +/// +/// The module's to-level documentation discusses the precise meaning of an "invalid" +/// pointer but essentially this expresses that the pointer is not associated +/// with any actual allocation and is little more than a usize address in disguise. +/// +/// This pointer will have no provenance associated with it and is therefore +/// UB to read/write/offset. This mostly exists to facilitate things +/// like ptr::null and NonNull::dangling which make invalid pointers. +/// +/// This API and its claimed semantics are part of the Strict Provenance experiment, +/// see the [module documentation][crate::ptr] for details. +#[inline(always)] +#[must_use] +#[rustc_const_stable(feature = "strict_provenance", since = "1.61.0")] +#[unstable(feature = "strict_provenance", issue = "95228")] +pub const fn invalid(addr: usize) -> *const T { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + addr as *const T +} + +/// Creates an invalid mutable pointer with the given address. +/// +/// This is *currently* equivalent to `addr as *const T` but it expresses the intended semantic +/// more clearly, and may become important under future memory models. +/// +/// The module's to-level documentation discusses the precise meaning of an "invalid" +/// pointer but essentially this expresses that the pointer is not associated +/// with any actual allocation and is little more than a usize address in disguise. +/// +/// This pointer will have no provenance associated with it and is therefore +/// UB to read/write/offset. This mostly exists to facilitate things +/// like ptr::null and NonNull::dangling which make invalid pointers. +/// +/// This API and its claimed semantics are part of the Strict Provenance experiment, +/// see the [module documentation][crate::ptr] for details. +#[inline(always)] +#[must_use] +#[rustc_const_stable(feature = "strict_provenance", since = "1.61.0")] +#[unstable(feature = "strict_provenance", issue = "95228")] +pub const fn invalid_mut(addr: usize) -> *mut T { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + addr as *mut T } /// Forms a raw slice from a pointer and a length. @@ -1110,6 +1358,8 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { unchecked_shl, unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub, }; + let addr = p.addr(); + /// Calculate multiplicative modular inverse of `x` modulo `m`. /// /// This implementation is tailored for `align_offset` and has following preconditions: @@ -1170,13 +1420,10 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { // // which distributes operations around the load-bearing, but pessimizing `and` sufficiently // for LLVM to be able to utilize the various optimizations it knows about. - return wrapping_sub( - wrapping_add(p as usize, a_minus_one) & wrapping_sub(0, a), - p as usize, - ); + return wrapping_sub(wrapping_add(addr, a_minus_one) & wrapping_sub(0, a), addr); } - let pmoda = p as usize & a_minus_one; + let pmoda = addr & a_minus_one; if pmoda == 0 { // Already aligned. Yay! return 0; @@ -1193,7 +1440,7 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { let gcd = unsafe { unchecked_shl(1usize, gcdpow) }; // SAFETY: gcd is always greater or equal to 1. - if p as usize & unsafe { unchecked_sub(gcd, 1) } == 0 { + if addr & unsafe { unchecked_sub(gcd, 1) } == 0 { // This branch solves for the following linear congruence equation: // // ` p + so = 0 mod a ` @@ -1347,6 +1594,11 @@ pub fn hash(hashee: *const T, into: &mut S) { hashee.hash(into); } +// FIXME(strict_provenance_magic): function pointers have buggy codegen that +// necessitates casting to a usize to get the backend to do the right thing. +// for now I will break AVR to silence *a billion* lints. We should probably +// have a proper "opaque function pointer type" to handle this kind of thing. + // Impls for function pointers macro_rules! fnptr_impls_safety_abi { ($FnTy: ty, $($Arg: ident),*) => { diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs index 7e48eac4fe033..ead96147b526b 100644 --- a/library/core/src/ptr/mut_ptr.rs +++ b/library/core/src/ptr/mut_ptr.rs @@ -154,6 +154,75 @@ impl *mut T { bits as Self } + /// Gets the "address" portion of the pointer. + /// + /// This is equivalent to `self as usize`, which semantically discards + /// *provenance* and *address-space* information. To properly restore that information, + /// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr]. + /// + /// On most platforms this information isn't represented at runtime, and so the loss + /// of information is "only" semantic. On more complicated platforms like miri, CHERI, + /// and segmented architectures, this may result in an actual change of representation + /// and the loss of information. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn addr(self) -> usize + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + self as usize + } + + /// Creates a new pointer with the given address. + /// + /// This performs the same operation as an `addr as ptr` cast, but copies + /// the *address-space* and *provenance* of `self` to the new pointer. + /// This allows us to dynamically preserve and propagate this important + /// information in a way that is otherwise impossible with a unary cast. + /// + /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset `self` to the + /// given address, and therefore has all the same capabilities and restrictions. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn with_addr(self, addr: usize) -> Self + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // + // In the mean-time, this operation is defined to be "as if" it was + // a wrapping_offset, so we can emulate it as such. This should properly + // restore pointer provenance even under today's compiler. + let self_addr = self.addr() as isize; + let dest_addr = addr as isize; + let offset = dest_addr.wrapping_sub(self_addr); + + // This is the canonical desugarring of this operation + self.cast::().wrapping_offset(offset).cast::() + } + + /// Creates a new pointer by mapping `self`'s address to a new one. + /// + /// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details. + /// + /// This API and its claimed semantics are part of the Strict Provenance experiment, + /// see the [module documentation][crate::ptr] for details. + #[must_use] + #[unstable(feature = "strict_provenance", issue = "95228")] + pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self + where + T: Sized, + { + self.with_addr(f(self.addr())) + } + /// Decompose a (possibly wide) pointer into its address and metadata components. /// /// The pointer can be later reconstructed with [`from_raw_parts_mut`]. @@ -1276,7 +1345,7 @@ impl *mut [T] { /// use std::ptr; /// /// let slice: *mut [i8] = ptr::slice_from_raw_parts_mut(ptr::null_mut(), 3); - /// assert_eq!(slice.as_mut_ptr(), 0 as *mut i8); + /// assert_eq!(slice.as_mut_ptr(), ptr::null_mut()); /// ``` #[inline(always)] #[unstable(feature = "slice_ptr_get", issue = "74265")] From c7de289e1c8d24bd55aaa33813e509920a00c364 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Tue, 22 Mar 2022 01:24:55 -0400 Subject: [PATCH 02/15] Make the stdlib largely conform to strict provenance. Some things like the unwinders and system APIs are not fully conformant, this only covers a lot of low-hanging fruit. --- library/alloc/src/lib.rs | 1 + library/alloc/src/rc.rs | 5 +- library/alloc/src/slice.rs | 2 +- library/alloc/src/sync.rs | 2 +- library/alloc/src/vec/into_iter.rs | 2 +- library/core/src/alloc/layout.rs | 2 +- library/core/src/fmt/mod.rs | 8 +++- library/core/src/hash/mod.rs | 4 +- library/core/src/intrinsics.rs | 6 +-- library/core/src/ptr/non_null.rs | 6 +-- library/core/src/ptr/unique.rs | 2 +- library/core/src/slice/ascii.rs | 6 +-- library/core/src/slice/iter/macros.rs | 4 +- library/core/src/slice/sort.rs | 2 +- library/std/src/backtrace.rs | 8 ++-- library/std/src/io/error/repr_bitpacked.rs | 10 ++-- library/std/src/lib.rs | 1 + library/std/src/os/windows/io/handle.rs | 3 +- library/std/src/os/windows/io/socket.rs | 1 + library/std/src/path.rs | 4 +- library/std/src/sync/once.rs | 49 +++++++++++--------- library/std/src/sys/windows/alloc.rs | 2 +- library/std/src/sys/windows/c.rs | 2 +- library/std/src/sys/windows/compat.rs | 2 +- library/std/src/sys/windows/fs.rs | 11 +++-- library/std/src/sys/windows/mod.rs | 4 +- library/std/src/sys/windows/os.rs | 2 +- library/std/src/sys/windows/thread_parker.rs | 12 ++--- library/std/src/sys_common/condvar/check.rs | 12 +++-- library/std/src/thread/local.rs | 6 +-- 30 files changed, 100 insertions(+), 81 deletions(-) diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 0a180b83355e0..7e90d77b8f289 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -158,6 +158,7 @@ #![feature(rustc_allow_const_fn_unstable)] #![feature(rustc_attrs)] #![feature(staged_api)] +#![feature(strict_provenance)] #![cfg_attr(test, feature(test))] #![feature(unboxed_closures)] #![feature(unsized_fn_params)] diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index 3ddc9acb2e7db..0b57c36247e43 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2115,13 +2115,12 @@ impl Weak { #[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")] #[must_use] pub const fn new() -> Weak { - Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut RcBox) } } + Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::>(usize::MAX)) } } } } pub(crate) fn is_dangling(ptr: *mut T) -> bool { - let address = ptr as *mut () as usize; - address == usize::MAX + (ptr as *mut ()).addr() == usize::MAX } /// Helper type to allow accessing the reference counts without diff --git a/library/alloc/src/slice.rs b/library/alloc/src/slice.rs index f52871c73d9fc..7c892f03bfb78 100644 --- a/library/alloc/src/slice.rs +++ b/library/alloc/src/slice.rs @@ -1044,7 +1044,7 @@ where impl Drop for MergeHole { fn drop(&mut self) { // `T` is not a zero-sized type, so it's okay to divide by its size. - let len = (self.end as usize - self.start as usize) / mem::size_of::(); + let len = (self.end.addr() - self.start.addr()) / mem::size_of::(); unsafe { ptr::copy_nonoverlapping(self.start, self.dest, len); } diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index e2b9890850b3a..f8b4d46ac105d 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -1746,7 +1746,7 @@ impl Weak { #[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")] #[must_use] pub const fn new() -> Weak { - Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut ArcInner) } } + Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::>(usize::MAX)) } } } } diff --git a/library/alloc/src/vec/into_iter.rs b/library/alloc/src/vec/into_iter.rs index f17b8d71b3a1a..cc6dfb0e33017 100644 --- a/library/alloc/src/vec/into_iter.rs +++ b/library/alloc/src/vec/into_iter.rs @@ -159,7 +159,7 @@ impl Iterator for IntoIter { #[inline] fn size_hint(&self) -> (usize, Option) { let exact = if mem::size_of::() == 0 { - (self.end as usize).wrapping_sub(self.ptr as usize) + self.end.addr().wrapping_sub(self.ptr.addr()) } else { unsafe { self.end.offset_from(self.ptr) as usize } }; diff --git a/library/core/src/alloc/layout.rs b/library/core/src/alloc/layout.rs index ea639268652c3..0639d6eed62a5 100644 --- a/library/core/src/alloc/layout.rs +++ b/library/core/src/alloc/layout.rs @@ -194,7 +194,7 @@ impl Layout { #[inline] pub const fn dangling(&self) -> NonNull { // SAFETY: align is guaranteed to be non-zero - unsafe { NonNull::new_unchecked(self.align() as *mut u8) } + unsafe { NonNull::new_unchecked(crate::ptr::invalid_mut::(self.align())) } } /// Creates a layout describing the record that can hold a value diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index 84cf1753f86ba..0e2e869a920ee 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -352,7 +352,11 @@ impl<'a> ArgumentV1<'a> { } fn as_usize(&self) -> Option { - if self.formatter as usize == USIZE_MARKER as usize { + // We are type punning a bit here: USIZE_MARKER only takes an &usize but + // formatter takes an &Opaque. Rust understandably doesn't think we should compare + // the function pointers if they don't have the same signature, so we cast to + // pointers to convince it that we know what we're doing. + if self.formatter as *mut u8 == USIZE_MARKER as *mut u8 { // SAFETY: The `formatter` field is only set to USIZE_MARKER if // the value is a usize, so this is safe Some(unsafe { *(self.value as *const _ as *const usize) }) @@ -2246,7 +2250,7 @@ impl Pointer for *const T { } f.flags |= 1 << (FlagV1::Alternate as u32); - let ret = LowerHex::fmt(&(ptr as usize), f); + let ret = LowerHex::fmt(&(ptr.addr()), f); f.width = old_width; f.flags = old_flags; diff --git a/library/core/src/hash/mod.rs b/library/core/src/hash/mod.rs index 53de8b42c059f..45c9df0c930b9 100644 --- a/library/core/src/hash/mod.rs +++ b/library/core/src/hash/mod.rs @@ -793,7 +793,7 @@ mod impls { #[inline] fn hash(&self, state: &mut H) { let (address, metadata) = self.to_raw_parts(); - state.write_usize(address as usize); + state.write_usize(address.addr()); metadata.hash(state); } } @@ -803,7 +803,7 @@ mod impls { #[inline] fn hash(&self, state: &mut H) { let (address, metadata) = self.to_raw_parts(); - state.write_usize(address as usize); + state.write_usize(address.addr()); metadata.hash(state); } } diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index 129402ad23a56..8ad4317c145ac 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -1972,15 +1972,15 @@ extern "rust-intrinsic" { /// Checks whether `ptr` is properly aligned with respect to /// `align_of::()`. pub(crate) fn is_aligned_and_not_null(ptr: *const T) -> bool { - !ptr.is_null() && ptr as usize % mem::align_of::() == 0 + !ptr.is_null() && ptr.addr() % mem::align_of::() == 0 } /// Checks whether the regions of memory starting at `src` and `dst` of size /// `count * size_of::()` do *not* overlap. #[cfg(debug_assertions)] pub(crate) fn is_nonoverlapping(src: *const T, dst: *const T, count: usize) -> bool { - let src_usize = src as usize; - let dst_usize = dst as usize; + let src_usize = src.addr(); + let dst_usize = dst.addr(); let size = mem::size_of::().checked_mul(count).unwrap(); let diff = if src_usize > dst_usize { src_usize - dst_usize } else { dst_usize - src_usize }; // If the absolute distance between the ptrs is at least as big as the size of the buffer, diff --git a/library/core/src/ptr/non_null.rs b/library/core/src/ptr/non_null.rs index a698aec51ca71..c1b19895f006c 100644 --- a/library/core/src/ptr/non_null.rs +++ b/library/core/src/ptr/non_null.rs @@ -90,7 +90,7 @@ impl NonNull { // to a *mut T. Therefore, `ptr` is not null and the conditions for // calling new_unchecked() are respected. unsafe { - let ptr = mem::align_of::() as *mut T; + let ptr = crate::ptr::invalid_mut::(mem::align_of::()); NonNull::new_unchecked(ptr) } } @@ -469,7 +469,7 @@ impl NonNull<[T]> { /// use std::ptr::NonNull; /// /// let slice: NonNull<[i8]> = NonNull::slice_from_raw_parts(NonNull::dangling(), 3); - /// assert_eq!(slice.as_non_null_ptr(), NonNull::new(1 as *mut i8).unwrap()); + /// assert_eq!(slice.as_non_null_ptr(), NonNull::::dangling()); /// ``` #[inline] #[must_use] @@ -489,7 +489,7 @@ impl NonNull<[T]> { /// use std::ptr::NonNull; /// /// let slice: NonNull<[i8]> = NonNull::slice_from_raw_parts(NonNull::dangling(), 3); - /// assert_eq!(slice.as_mut_ptr(), 1 as *mut i8); + /// assert_eq!(slice.as_mut_ptr(), NonNull::::dangling().as_ptr()); /// ``` #[inline] #[must_use] diff --git a/library/core/src/ptr/unique.rs b/library/core/src/ptr/unique.rs index cff68f64f78e0..29398cbeb238d 100644 --- a/library/core/src/ptr/unique.rs +++ b/library/core/src/ptr/unique.rs @@ -73,7 +73,7 @@ impl Unique { pub const fn dangling() -> Self { // SAFETY: mem::align_of() returns a valid, non-null pointer. The // conditions to call new_unchecked() are thus respected. - unsafe { Unique::new_unchecked(mem::align_of::() as *mut T) } + unsafe { Unique::new_unchecked(crate::ptr::invalid_mut::(mem::align_of::())) } } } diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index c02a6f2d78c64..63d761d3c02d6 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -294,7 +294,7 @@ fn is_ascii(s: &[u8]) -> bool { // Paranoia check about alignment, since we're about to do a bunch of // unaligned loads. In practice this should be impossible barring a bug in // `align_offset` though. - debug_assert_eq!((word_ptr as usize) % mem::align_of::(), 0); + debug_assert_eq!((word_ptr.addr()) % mem::align_of::(), 0); // Read subsequent words until the last aligned word, excluding the last // aligned word by itself to be done in tail check later, to ensure that @@ -302,9 +302,9 @@ fn is_ascii(s: &[u8]) -> bool { while byte_pos < len - USIZE_SIZE { debug_assert!( // Sanity check that the read is in bounds - (word_ptr as usize + USIZE_SIZE) <= (start.wrapping_add(len) as usize) && + (word_ptr.addr() + USIZE_SIZE) <= (start.wrapping_add(len).addr()) && // And that our assumptions about `byte_pos` hold. - (word_ptr as usize) - (start as usize) == byte_pos + (word_ptr.addr()) - (start.addr()) == byte_pos ); // SAFETY: We know `word_ptr` is properly aligned (because of diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index cf15756868e65..96ead49dd6aaf 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -20,13 +20,13 @@ macro_rules! len { if size == 0 { // This _cannot_ use `unchecked_sub` because we depend on wrapping // to represent the length of long ZST slice iterators. - ($self.end as usize).wrapping_sub(start.as_ptr() as usize) + ($self.end.addr()).wrapping_sub(start.as_ptr().addr()) } else { // We know that `start <= end`, so can do better than `offset_from`, // which needs to deal in signed. By setting appropriate flags here // we can tell LLVM this, which helps it remove bounds checks. // SAFETY: By the type invariant, `start <= end` - let diff = unsafe { unchecked_sub($self.end as usize, start.as_ptr() as usize) }; + let diff = unsafe { unchecked_sub($self.end.addr(), start.as_ptr().addr()) }; // By also telling LLVM that the pointers are apart by an exact // multiple of the type size, it can optimize `len() == 0` down to // `start == end` instead of `(end - start) < size`. diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs index 2ba0e5320d7b9..5cf08b5740e82 100644 --- a/library/core/src/slice/sort.rs +++ b/library/core/src/slice/sort.rs @@ -269,7 +269,7 @@ where // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive). fn width(l: *mut T, r: *mut T) -> usize { assert!(mem::size_of::() > 0); - (r as usize - l as usize) / mem::size_of::() + (r.addr() - l.addr()) / mem::size_of::() } loop { diff --git a/library/std/src/backtrace.rs b/library/std/src/backtrace.rs index 94e6070c0f794..fded482095a46 100644 --- a/library/std/src/backtrace.rs +++ b/library/std/src/backtrace.rs @@ -293,7 +293,7 @@ impl Backtrace { if !Backtrace::enabled() { return Backtrace { inner: Inner::Disabled }; } - Backtrace::create(Backtrace::capture as usize) + Backtrace::create((Backtrace::capture as *mut ()).addr()) } /// Forcibly captures a full backtrace, regardless of environment variable @@ -308,7 +308,7 @@ impl Backtrace { /// parts of code. #[inline(never)] // want to make sure there's a frame here to remove pub fn force_capture() -> Backtrace { - Backtrace::create(Backtrace::force_capture as usize) + Backtrace::create((Backtrace::force_capture as *mut ()).addr()) } /// Forcibly captures a disabled backtrace, regardless of environment @@ -330,7 +330,7 @@ impl Backtrace { frame: RawFrame::Actual(frame.clone()), symbols: Vec::new(), }); - if frame.symbol_address() as usize == ip && actual_start.is_none() { + if frame.symbol_address().addr() == ip && actual_start.is_none() { actual_start = Some(frames.len()); } true @@ -493,7 +493,7 @@ impl RawFrame { match self { RawFrame::Actual(frame) => frame.ip(), #[cfg(test)] - RawFrame::Fake => 1 as *mut c_void, + RawFrame::Fake => ptr::invalid_mut(1), } } } diff --git a/library/std/src/io/error/repr_bitpacked.rs b/library/std/src/io/error/repr_bitpacked.rs index 208d5a80c5a69..7cc1c701064c5 100644 --- a/library/std/src/io/error/repr_bitpacked.rs +++ b/library/std/src/io/error/repr_bitpacked.rs @@ -106,7 +106,7 @@ use super::{Custom, ErrorData, ErrorKind, SimpleMessage}; use alloc::boxed::Box; use core::marker::PhantomData; use core::mem::{align_of, size_of}; -use core::ptr::NonNull; +use core::ptr::{self, NonNull}; // The 2 least-significant bits are used as tag. const TAG_MASK: usize = 0b11; @@ -136,7 +136,7 @@ impl Repr { let p = Box::into_raw(b).cast::(); // Should only be possible if an allocator handed out a pointer with // wrong alignment. - debug_assert_eq!((p as usize & TAG_MASK), 0); + debug_assert_eq!((p.addr() & TAG_MASK), 0); // Note: We know `TAG_CUSTOM <= size_of::()` (static_assert at // end of file), and both the start and end of the expression must be // valid without address space wraparound due to `Box`'s semantics. @@ -166,7 +166,7 @@ impl Repr { pub(super) fn new_os(code: i32) -> Self { let utagged = ((code as usize) << 32) | TAG_OS; // Safety: `TAG_OS` is not zero, so the result of the `|` is not 0. - let res = Self(unsafe { NonNull::new_unchecked(utagged as *mut ()) }, PhantomData); + let res = Self(unsafe { NonNull::new_unchecked(ptr::invalid_mut(utagged)) }, PhantomData); // quickly smoke-check we encoded the right thing (This generally will // only run in libstd's tests, unless the user uses -Zbuild-std) debug_assert!( @@ -180,7 +180,7 @@ impl Repr { pub(super) fn new_simple(kind: ErrorKind) -> Self { let utagged = ((kind as usize) << 32) | TAG_SIMPLE; // Safety: `TAG_SIMPLE` is not zero, so the result of the `|` is not 0. - let res = Self(unsafe { NonNull::new_unchecked(utagged as *mut ()) }, PhantomData); + let res = Self(unsafe { NonNull::new_unchecked(ptr::invalid_mut(utagged)) }, PhantomData); // quickly smoke-check we encoded the right thing (This generally will // only run in libstd's tests, unless the user uses -Zbuild-std) debug_assert!( @@ -238,7 +238,7 @@ unsafe fn decode_repr(ptr: NonNull<()>, make_custom: F) -> ErrorData where F: FnOnce(*mut Custom) -> C, { - let bits = ptr.as_ptr() as usize; + let bits = ptr.as_ptr().addr(); match bits & TAG_MASK { TAG_OS => { let code = ((bits as i64) >> 32) as i32; diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index a464f2d4c7431..133ced5f26cfb 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -275,6 +275,7 @@ #![feature(extend_one)] #![feature(float_minimum_maximum)] #![feature(format_args_nl)] +#![feature(strict_provenance)] #![feature(get_mut_unchecked)] #![feature(hashmap_internals)] #![feature(int_error_internals)] diff --git a/library/std/src/os/windows/io/handle.rs b/library/std/src/os/windows/io/handle.rs index 120af9f99dd9b..ee30cc8be6b57 100644 --- a/library/std/src/os/windows/io/handle.rs +++ b/library/std/src/os/windows/io/handle.rs @@ -9,6 +9,7 @@ use crate::fs; use crate::io; use crate::marker::PhantomData; use crate::mem::forget; +use crate::ptr; use crate::sys::c; use crate::sys::cvt; use crate::sys_common::{AsInner, FromInner, IntoInner}; @@ -182,7 +183,7 @@ impl OwnedHandle { return unsafe { Ok(Self::from_raw_handle(handle)) }; } - let mut ret = 0 as c::HANDLE; + let mut ret = ptr::null_mut(); cvt(unsafe { let cur_proc = c::GetCurrentProcess(); c::DuplicateHandle( diff --git a/library/std/src/os/windows/io/socket.rs b/library/std/src/os/windows/io/socket.rs index a6b979cc22bd3..db93cd15d4ab7 100644 --- a/library/std/src/os/windows/io/socket.rs +++ b/library/std/src/os/windows/io/socket.rs @@ -129,6 +129,7 @@ impl OwnedSocket { } } + // FIXME(strict_provenance_magic): we defined RawSocket to be a u64 ;-; #[cfg(not(target_vendor = "uwp"))] pub(crate) fn set_no_inherit(&self) -> io::Result<()> { cvt(unsafe { diff --git a/library/std/src/path.rs b/library/std/src/path.rs index bcf5c9328b79c..8ecea8ce07f6b 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -1449,8 +1449,8 @@ impl PathBuf { }; // truncate until right after the file stem - let end_file_stem = file_stem[file_stem.len()..].as_ptr() as usize; - let start = os_str_as_u8_slice(&self.inner).as_ptr() as usize; + let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr(); + let start = os_str_as_u8_slice(&self.inner).as_ptr().addr(); let v = self.as_mut_vec(); v.truncate(end_file_stem.wrapping_sub(start)); diff --git a/library/std/src/sync/once.rs b/library/std/src/sync/once.rs index 511de863dc51b..d2dd4c075d2a9 100644 --- a/library/std/src/sync/once.rs +++ b/library/std/src/sync/once.rs @@ -91,9 +91,12 @@ use crate::cell::Cell; use crate::fmt; use crate::marker; use crate::panic::{RefUnwindSafe, UnwindSafe}; -use crate::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use crate::ptr; +use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; use crate::thread::{self, Thread}; +type Masked = (); + /// A synchronization primitive which can be used to run a one-time global /// initialization. Useful for one-time initialization for FFI or related /// functionality. This type can only be constructed with [`Once::new()`]. @@ -113,7 +116,7 @@ use crate::thread::{self, Thread}; pub struct Once { // `state_and_queue` is actually a pointer to a `Waiter` with extra state // bits, so we add the `PhantomData` appropriately. - state_and_queue: AtomicUsize, + state_and_queue: AtomicPtr, _marker: marker::PhantomData<*const Waiter>, } @@ -136,7 +139,7 @@ impl RefUnwindSafe for Once {} #[derive(Debug)] pub struct OnceState { poisoned: bool, - set_state_on_drop_to: Cell, + set_state_on_drop_to: Cell<*mut Masked>, } /// Initialization value for static [`Once`] values. @@ -184,8 +187,8 @@ struct Waiter { // Every node is a struct on the stack of a waiting thread. // Will wake up the waiters when it gets dropped, i.e. also on panic. struct WaiterQueue<'a> { - state_and_queue: &'a AtomicUsize, - set_state_on_drop_to: usize, + state_and_queue: &'a AtomicPtr, + set_state_on_drop_to: *mut Masked, } impl Once { @@ -195,7 +198,10 @@ impl Once { #[rustc_const_stable(feature = "const_once_new", since = "1.32.0")] #[must_use] pub const fn new() -> Once { - Once { state_and_queue: AtomicUsize::new(INCOMPLETE), _marker: marker::PhantomData } + Once { + state_and_queue: AtomicPtr::new(ptr::invalid_mut(INCOMPLETE)), + _marker: marker::PhantomData, + } } /// Performs an initialization routine once and only once. The given closure @@ -376,7 +382,7 @@ impl Once { // operations visible to us, and, this being a fast path, weaker // ordering helps with performance. This `Acquire` synchronizes with // `Release` operations on the slow path. - self.state_and_queue.load(Ordering::Acquire) == COMPLETE + self.state_and_queue.load(Ordering::Acquire).addr() == COMPLETE } // This is a non-generic function to reduce the monomorphization cost of @@ -395,7 +401,7 @@ impl Once { fn call_inner(&self, ignore_poisoning: bool, init: &mut dyn FnMut(&OnceState)) { let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire); loop { - match state_and_queue { + match state_and_queue.addr() { COMPLETE => break, POISONED if !ignore_poisoning => { // Panic to propagate the poison. @@ -405,7 +411,7 @@ impl Once { // Try to register this thread as the one RUNNING. let exchange_result = self.state_and_queue.compare_exchange( state_and_queue, - RUNNING, + ptr::invalid_mut(RUNNING), Ordering::Acquire, Ordering::Acquire, ); @@ -417,13 +423,13 @@ impl Once { // wake them up on drop. let mut waiter_queue = WaiterQueue { state_and_queue: &self.state_and_queue, - set_state_on_drop_to: POISONED, + set_state_on_drop_to: ptr::invalid_mut(POISONED), }; // Run the initialization function, letting it know if we're // poisoned or not. let init_state = OnceState { - poisoned: state_and_queue == POISONED, - set_state_on_drop_to: Cell::new(COMPLETE), + poisoned: state_and_queue.addr() == POISONED, + set_state_on_drop_to: Cell::new(ptr::invalid_mut(COMPLETE)), }; init(&init_state); waiter_queue.set_state_on_drop_to = init_state.set_state_on_drop_to.get(); @@ -432,7 +438,7 @@ impl Once { _ => { // All other values must be RUNNING with possibly a // pointer to the waiter queue in the more significant bits. - assert!(state_and_queue & STATE_MASK == RUNNING); + assert!(state_and_queue.addr() & STATE_MASK == RUNNING); wait(&self.state_and_queue, state_and_queue); state_and_queue = self.state_and_queue.load(Ordering::Acquire); } @@ -441,13 +447,13 @@ impl Once { } } -fn wait(state_and_queue: &AtomicUsize, mut current_state: usize) { +fn wait(state_and_queue: &AtomicPtr, mut current_state: *mut Masked) { // Note: the following code was carefully written to avoid creating a // mutable reference to `node` that gets aliased. loop { // Don't queue this thread if the status is no longer running, // otherwise we will not be woken up. - if current_state & STATE_MASK != RUNNING { + if current_state.addr() & STATE_MASK != RUNNING { return; } @@ -455,15 +461,15 @@ fn wait(state_and_queue: &AtomicUsize, mut current_state: usize) { let node = Waiter { thread: Cell::new(Some(thread::current())), signaled: AtomicBool::new(false), - next: (current_state & !STATE_MASK) as *const Waiter, + next: current_state.with_addr(current_state.addr() & !STATE_MASK) as *const Waiter, }; - let me = &node as *const Waiter as usize; + let me = &node as *const Waiter as *const Masked as *mut Masked; // Try to slide in the node at the head of the linked list, making sure // that another thread didn't just replace the head of the linked list. let exchange_result = state_and_queue.compare_exchange( current_state, - me | RUNNING, + me.with_addr(me.addr() | RUNNING), Ordering::Release, Ordering::Relaxed, ); @@ -502,7 +508,7 @@ impl Drop for WaiterQueue<'_> { self.state_and_queue.swap(self.set_state_on_drop_to, Ordering::AcqRel); // We should only ever see an old state which was RUNNING. - assert_eq!(state_and_queue & STATE_MASK, RUNNING); + assert_eq!(state_and_queue.addr() & STATE_MASK, RUNNING); // Walk the entire linked list of waiters and wake them up (in lifo // order, last to register is first to wake up). @@ -511,7 +517,8 @@ impl Drop for WaiterQueue<'_> { // free `node` if there happens to be has a spurious wakeup. // So we have to take out the `thread` field and copy the pointer to // `next` first. - let mut queue = (state_and_queue & !STATE_MASK) as *const Waiter; + let mut queue = + state_and_queue.with_addr(state_and_queue.addr() & !STATE_MASK) as *const Waiter; while !queue.is_null() { let next = (*queue).next; let thread = (*queue).thread.take().unwrap(); @@ -568,6 +575,6 @@ impl OnceState { /// Poison the associated [`Once`] without explicitly panicking. // NOTE: This is currently only exposed for the `lazy` module pub(crate) fn poison(&self) { - self.set_state_on_drop_to.set(POISONED); + self.set_state_on_drop_to.set(ptr::invalid_mut(POISONED)); } } diff --git a/library/std/src/sys/windows/alloc.rs b/library/std/src/sys/windows/alloc.rs index 2fe71f9f28d5c..fdc81cdea7dec 100644 --- a/library/std/src/sys/windows/alloc.rs +++ b/library/std/src/sys/windows/alloc.rs @@ -159,7 +159,7 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 { // Create a correctly aligned pointer offset from the start of the allocated block, // and write a header before it. - let offset = layout.align() - (ptr as usize & (layout.align() - 1)); + let offset = layout.align() - (ptr.addr() & (layout.align() - 1)); // SAFETY: `MIN_ALIGN` <= `offset` <= `layout.align()` and the size of the allocated // block is `layout.align() + layout.size()`. `aligned` will thus be a correctly aligned // pointer inside the allocated block with at least `layout.size()` bytes after it and at diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs index 9b61b2476d5bb..0edf43e5d9dd5 100644 --- a/library/std/src/sys/windows/c.rs +++ b/library/std/src/sys/windows/c.rs @@ -173,7 +173,7 @@ pub const PROGRESS_CONTINUE: DWORD = 0; pub const E_NOTIMPL: HRESULT = 0x80004001u32 as HRESULT; -pub const INVALID_HANDLE_VALUE: HANDLE = !0 as HANDLE; +pub const INVALID_HANDLE_VALUE: HANDLE = ptr::invalid_mut(!0); pub const FACILITY_NT_BIT: DWORD = 0x1000_0000; diff --git a/library/std/src/sys/windows/compat.rs b/library/std/src/sys/windows/compat.rs index cbd3366b189ed..a914a3bcc120b 100644 --- a/library/std/src/sys/windows/compat.rs +++ b/library/std/src/sys/windows/compat.rs @@ -88,7 +88,7 @@ macro_rules! compat_fn { let symbol_name: *const u8 = concat!(stringify!($symbol), "\0").as_ptr(); let module_handle = $crate::sys::c::GetModuleHandleA(module_name as *const i8); if !module_handle.is_null() { - match $crate::sys::c::GetProcAddress(module_handle, symbol_name as *const i8) as usize { + match $crate::sys::c::GetProcAddress(module_handle, symbol_name as *const i8).addr() { 0 => {} n => { PTR = Some(mem::transmute::(n)); diff --git a/library/std/src/sys/windows/fs.rs b/library/std/src/sys/windows/fs.rs index d6c40a15329a9..95903899297b6 100644 --- a/library/std/src/sys/windows/fs.rs +++ b/library/std/src/sys/windows/fs.rs @@ -57,6 +57,9 @@ pub struct DirEntry { data: c::WIN32_FIND_DATAW, } +unsafe impl Send for OpenOptions {} +unsafe impl Sync for OpenOptions {} + #[derive(Clone, Debug)] pub struct OpenOptions { // generic @@ -72,7 +75,7 @@ pub struct OpenOptions { attributes: c::DWORD, share_mode: c::DWORD, security_qos_flags: c::DWORD, - security_attributes: usize, // FIXME: should be a reference + security_attributes: c::LPSECURITY_ATTRIBUTES, } #[derive(Clone, PartialEq, Eq, Debug)] @@ -187,7 +190,7 @@ impl OpenOptions { share_mode: c::FILE_SHARE_READ | c::FILE_SHARE_WRITE | c::FILE_SHARE_DELETE, attributes: 0, security_qos_flags: 0, - security_attributes: 0, + security_attributes: ptr::null_mut(), } } @@ -228,7 +231,7 @@ impl OpenOptions { self.security_qos_flags = flags | c::SECURITY_SQOS_PRESENT; } pub fn security_attributes(&mut self, attrs: c::LPSECURITY_ATTRIBUTES) { - self.security_attributes = attrs as usize; + self.security_attributes = attrs; } fn get_access_mode(&self) -> io::Result { @@ -289,7 +292,7 @@ impl File { path.as_ptr(), opts.get_access_mode()?, opts.share_mode, - opts.security_attributes as *mut _, + opts.security_attributes, opts.get_creation_mode()?, opts.get_flags_and_attributes(), ptr::null_mut(), diff --git a/library/std/src/sys/windows/mod.rs b/library/std/src/sys/windows/mod.rs index 62814eaaa5667..87e3fec6353f9 100644 --- a/library/std/src/sys/windows/mod.rs +++ b/library/std/src/sys/windows/mod.rs @@ -136,7 +136,7 @@ pub fn unrolled_find_u16s(needle: u16, haystack: &[u16]) -> Option { ($($n:literal,)+) => { $( if start[$n] == needle { - return Some((&start[$n] as *const u16 as usize - ptr as usize) / 2); + return Some(((&start[$n] as *const u16).addr() - ptr.addr()) / 2); } )+ } @@ -149,7 +149,7 @@ pub fn unrolled_find_u16s(needle: u16, haystack: &[u16]) -> Option { for c in start { if *c == needle { - return Some((c as *const u16 as usize - ptr as usize) / 2); + return Some(((c as *const u16).addr() - ptr.addr()) / 2); } } None diff --git a/library/std/src/sys/windows/os.rs b/library/std/src/sys/windows/os.rs index 450bceae00081..bcac996c024ec 100644 --- a/library/std/src/sys/windows/os.rs +++ b/library/std/src/sys/windows/os.rs @@ -134,7 +134,7 @@ impl Drop for Env { pub fn env() -> Env { unsafe { let ch = c::GetEnvironmentStringsW(); - if ch as usize == 0 { + if ch.is_null() { panic!("failure getting env string from OS: {}", io::Error::last_os_error()); } Env { base: ch, cur: ch } diff --git a/library/std/src/sys/windows/thread_parker.rs b/library/std/src/sys/windows/thread_parker.rs index 5888ee8e34bfb..3497da51deeda 100644 --- a/library/std/src/sys/windows/thread_parker.rs +++ b/library/std/src/sys/windows/thread_parker.rs @@ -60,7 +60,7 @@ use crate::convert::TryFrom; use crate::ptr; use crate::sync::atomic::{ - AtomicI8, AtomicUsize, + AtomicI8, AtomicPtr, Ordering::{Acquire, Relaxed, Release}, }; use crate::sys::{c, dur2timeout}; @@ -217,8 +217,8 @@ impl Parker { } fn keyed_event_handle() -> c::HANDLE { - const INVALID: usize = !0; - static HANDLE: AtomicUsize = AtomicUsize::new(INVALID); + const INVALID: c::HANDLE = ptr::invalid_mut(!0); + static HANDLE: AtomicPtr = AtomicPtr::new(INVALID); match HANDLE.load(Relaxed) { INVALID => { let mut handle = c::INVALID_HANDLE_VALUE; @@ -233,7 +233,7 @@ fn keyed_event_handle() -> c::HANDLE { r => panic!("Unable to create keyed event handle: error {r}"), } } - match HANDLE.compare_exchange(INVALID, handle as usize, Relaxed, Relaxed) { + match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) { Ok(_) => handle, Err(h) => { // Lost the race to another thread initializing HANDLE before we did. @@ -241,10 +241,10 @@ fn keyed_event_handle() -> c::HANDLE { unsafe { c::CloseHandle(handle); } - h as c::HANDLE + h } } } - handle => handle as c::HANDLE, + handle => handle, } } diff --git a/library/std/src/sys_common/condvar/check.rs b/library/std/src/sys_common/condvar/check.rs index 47aff060d6f79..7671850ac55b8 100644 --- a/library/std/src/sys_common/condvar/check.rs +++ b/library/std/src/sys_common/condvar/check.rs @@ -1,4 +1,5 @@ -use crate::sync::atomic::{AtomicUsize, Ordering}; +use crate::ptr; +use crate::sync::atomic::{AtomicPtr, Ordering}; use crate::sys::locks as imp; use crate::sys_common::mutex::MovableMutex; @@ -13,17 +14,18 @@ impl CondvarCheck for Box { } pub struct SameMutexCheck { - addr: AtomicUsize, + addr: AtomicPtr<()>, } #[allow(dead_code)] impl SameMutexCheck { pub const fn new() -> Self { - Self { addr: AtomicUsize::new(0) } + Self { addr: AtomicPtr::new(ptr::null_mut()) } } pub fn verify(&self, mutex: &MovableMutex) { - let addr = mutex.raw() as *const imp::Mutex as usize; - match self.addr.compare_exchange(0, addr, Ordering::SeqCst, Ordering::SeqCst) { + let addr = mutex.raw() as *const imp::Mutex as *const () as *mut _; + match self.addr.compare_exchange(ptr::null_mut(), addr, Ordering::SeqCst, Ordering::SeqCst) + { Ok(_) => {} // Stored the address Err(n) if n == addr => {} // Lost a race to store the same address _ => panic!("attempted to use a condition variable with two mutexes"), diff --git a/library/std/src/thread/local.rs b/library/std/src/thread/local.rs index a100444f04968..ca29261b1c98d 100644 --- a/library/std/src/thread/local.rs +++ b/library/std/src/thread/local.rs @@ -1071,7 +1071,7 @@ pub mod os { pub unsafe fn get(&'static self, init: impl FnOnce() -> T) -> Option<&'static T> { // SAFETY: See the documentation for this method. let ptr = unsafe { self.os.get() as *mut Value }; - if ptr as usize > 1 { + if ptr.addr() > 1 { // SAFETY: the check ensured the pointer is safe (its destructor // is not running) + it is coming from a trusted source (self). if let Some(ref value) = unsafe { (*ptr).inner.get() } { @@ -1090,7 +1090,7 @@ pub mod os { // SAFETY: No mutable references are ever handed out meaning getting // the value is ok. let ptr = unsafe { self.os.get() as *mut Value }; - if ptr as usize == 1 { + if ptr.addr() == 1 { // destructor is running return None; } @@ -1130,7 +1130,7 @@ pub mod os { unsafe { let ptr = Box::from_raw(ptr as *mut Value); let key = ptr.key; - key.os.set(1 as *mut u8); + key.os.set(ptr::invalid_mut(1)); drop(ptr); key.os.set(ptr::null_mut()); } From 68643603ad00900d6a91a0dae90bb2ebb4f0db48 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Tue, 22 Mar 2022 16:21:33 -0400 Subject: [PATCH 03/15] Make some rustc code conform to strict provenance. There's some really bad stuff around `ty` and pointer tagging stuff that was too much work to handle here. --- compiler/rustc_arena/src/lib.rs | 20 +++++++++++--------- compiler/rustc_codegen_ssa/src/lib.rs | 1 + compiler/rustc_codegen_ssa/src/mono_item.rs | 2 +- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_arena/src/lib.rs b/compiler/rustc_arena/src/lib.rs index 3928d70c0ede2..de1d5c07f5028 100644 --- a/compiler/rustc_arena/src/lib.rs +++ b/compiler/rustc_arena/src/lib.rs @@ -18,6 +18,7 @@ #![feature(decl_macro)] #![feature(rustc_attrs)] #![cfg_attr(test, feature(test))] +#![feature(strict_provenance)] use smallvec::SmallVec; @@ -87,7 +88,7 @@ impl ArenaChunk { unsafe { if mem::size_of::() == 0 { // A pointer as large as possible for zero-sized elements. - !0 as *mut T + ptr::invalid_mut(!0) } else { self.start().add(self.storage.len()) } @@ -199,7 +200,7 @@ impl TypedArena { unsafe { if mem::size_of::() == 0 { self.ptr.set((self.ptr.get() as *mut u8).wrapping_offset(1) as *mut T); - let ptr = mem::align_of::() as *mut T; + let ptr = ptr::NonNull::::dangling().as_ptr(); // Don't drop the object. This `write` is equivalent to `forget`. ptr::write(ptr, object); &mut *ptr @@ -216,7 +217,7 @@ impl TypedArena { #[inline] fn can_allocate(&self, additional: usize) -> bool { - let available_bytes = self.end.get() as usize - self.ptr.get() as usize; + let available_bytes = self.end.get().addr() - self.ptr.get().addr(); let additional_bytes = additional.checked_mul(mem::size_of::()).unwrap(); available_bytes >= additional_bytes } @@ -262,7 +263,7 @@ impl TypedArena { // If a type is `!needs_drop`, we don't need to keep track of how many elements // the chunk stores - the field will be ignored anyway. if mem::needs_drop::() { - let used_bytes = self.ptr.get() as usize - last_chunk.start() as usize; + let used_bytes = self.ptr.get().addr() - last_chunk.start().addr(); last_chunk.entries = used_bytes / mem::size_of::(); } @@ -288,9 +289,9 @@ impl TypedArena { // chunks. fn clear_last_chunk(&self, last_chunk: &mut ArenaChunk) { // Determine how much was filled. - let start = last_chunk.start() as usize; + let start = last_chunk.start().addr(); // We obtain the value of the pointer to the first uninitialized element. - let end = self.ptr.get() as usize; + let end = self.ptr.get().addr(); // We then calculate the number of elements to be dropped in the last chunk, // which is the filled area's length. let diff = if mem::size_of::() == 0 { @@ -395,15 +396,16 @@ impl DroplessArena { /// request. #[inline] fn alloc_raw_without_grow(&self, layout: Layout) -> Option<*mut u8> { - let start = self.start.get() as usize; - let end = self.end.get() as usize; + let start = self.start.get().addr(); + let old_end = self.end.get(); + let end = old_end.addr(); let align = layout.align(); let bytes = layout.size(); let new_end = end.checked_sub(bytes)? & !(align - 1); if start <= new_end { - let new_end = new_end as *mut u8; + let new_end = old_end.with_addr(new_end); self.end.set(new_end); Some(new_end) } else { diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 25e27f565eae2..6cf6be79a8628 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -6,6 +6,7 @@ #![feature(once_cell)] #![feature(nll)] #![feature(associated_type_bounds)] +#![feature(strict_provenance)] #![recursion_limit = "256"] #![allow(rustc::potential_query_instability)] diff --git a/compiler/rustc_codegen_ssa/src/mono_item.rs b/compiler/rustc_codegen_ssa/src/mono_item.rs index 5f0f50ae2df1d..5414c619dcbca 100644 --- a/compiler/rustc_codegen_ssa/src/mono_item.rs +++ b/compiler/rustc_codegen_ssa/src/mono_item.rs @@ -116,7 +116,7 @@ impl<'a, 'tcx: 'a> MonoItemExt<'a, 'tcx> for MonoItem<'tcx> { fn to_raw_string(&self) -> String { match *self { MonoItem::Fn(instance) => { - format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr() as usize) + format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr().addr()) } MonoItem::Static(id) => format!("Static({:?})", id), MonoItem::GlobalAsm(id) => format!("GlobalAsm({:?})", id), From 09395f626b2ff7378fb250300654b1817953a390 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 22 Mar 2022 21:29:38 -0400 Subject: [PATCH 04/15] Make some linux/unix APIs better conform to strict provenance. This largely makes the stdlib conform to strict provenance on Ubuntu. Some hairier things have been left alone for now. --- library/std/src/backtrace.rs | 2 +- library/std/src/os/unix/net/addr.rs | 4 +-- library/std/src/sys/unix/memchr.rs | 4 +-- library/std/src/sys/unix/thread.rs | 48 +++++++++++++++-------------- library/std/src/sys/unix/weak.rs | 17 +++++----- 5 files changed, 39 insertions(+), 36 deletions(-) diff --git a/library/std/src/backtrace.rs b/library/std/src/backtrace.rs index fded482095a46..3b0922ad16f25 100644 --- a/library/std/src/backtrace.rs +++ b/library/std/src/backtrace.rs @@ -493,7 +493,7 @@ impl RawFrame { match self { RawFrame::Actual(frame) => frame.ip(), #[cfg(test)] - RawFrame::Fake => ptr::invalid_mut(1), + RawFrame::Fake => crate::ptr::invalid_mut(1), } } } diff --git a/library/std/src/os/unix/net/addr.rs b/library/std/src/os/unix/net/addr.rs index a3ef4b2d92cc4..c323161d76537 100644 --- a/library/std/src/os/unix/net/addr.rs +++ b/library/std/src/os/unix/net/addr.rs @@ -17,8 +17,8 @@ mod libc { fn sun_path_offset(addr: &libc::sockaddr_un) -> usize { // Work with an actual instance of the type since using a null pointer is UB - let base = addr as *const _ as usize; - let path = &addr.sun_path as *const _ as usize; + let base = (addr as *const libc::sockaddr_un).addr(); + let path = (&addr.sun_path as *const i8).addr(); path - base } diff --git a/library/std/src/sys/unix/memchr.rs b/library/std/src/sys/unix/memchr.rs index a9273ea676cb3..a3e4f8ff56aee 100644 --- a/library/std/src/sys/unix/memchr.rs +++ b/library/std/src/sys/unix/memchr.rs @@ -9,7 +9,7 @@ pub fn memchr(needle: u8, haystack: &[u8]) -> Option { haystack.len(), ) }; - if p.is_null() { None } else { Some(p as usize - (haystack.as_ptr() as usize)) } + if p.is_null() { None } else { Some(p.addr() - haystack.as_ptr().addr()) } } pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { @@ -26,7 +26,7 @@ pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { haystack.len(), ) }; - if p.is_null() { None } else { Some(p as usize - (haystack.as_ptr() as usize)) } + if p.is_null() { None } else { Some(p.addr() - haystack.as_ptr().addr()) } } #[cfg(not(target_os = "linux"))] diff --git a/library/std/src/sys/unix/thread.rs b/library/std/src/sys/unix/thread.rs index 2d5d306ed62bb..be70d00cb1ace 100644 --- a/library/std/src/sys/unix/thread.rs +++ b/library/std/src/sys/unix/thread.rs @@ -505,9 +505,8 @@ pub mod guard { #[cfg(target_os = "macos")] unsafe fn get_stack_start() -> Option<*mut libc::c_void> { let th = libc::pthread_self(); - let stackaddr = - libc::pthread_get_stackaddr_np(th) as usize - libc::pthread_get_stacksize_np(th); - Some(stackaddr as *mut libc::c_void) + let stackptr = libc::pthread_get_stackaddr_np(th); + Some(stackptr.map_addr(|addr| addr - libc::pthread_get_stacksize_np(th))) } #[cfg(target_os = "openbsd")] @@ -515,14 +514,15 @@ pub mod guard { let mut current_stack: libc::stack_t = crate::mem::zeroed(); assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), &mut current_stack), 0); + let stack_ptr = current_stack.ss_sp; let stackaddr = if libc::pthread_main_np() == 1 { // main thread - current_stack.ss_sp as usize - current_stack.ss_size + PAGE_SIZE.load(Ordering::Relaxed) + stack_ptr.addr() - current_stack.ss_size + PAGE_SIZE.load(Ordering::Relaxed) } else { // new thread - current_stack.ss_sp as usize - current_stack.ss_size + stack_ptr.addr() - current_stack.ss_size }; - Some(stackaddr as *mut libc::c_void) + Some(stack_ptr.with_addr(stack_addr)) } #[cfg(any( @@ -557,7 +557,8 @@ pub mod guard { unsafe fn get_stack_start_aligned() -> Option<*mut libc::c_void> { let page_size = PAGE_SIZE.load(Ordering::Relaxed); assert!(page_size != 0); - let stackaddr = get_stack_start()?; + let stackptr = get_stack_start()?; + let stackaddr = stackptr.addr(); // Ensure stackaddr is page aligned! A parent process might // have reset RLIMIT_STACK to be non-page aligned. The @@ -565,11 +566,11 @@ pub mod guard { // stackaddr < stackaddr + stacksize, so if stackaddr is not // page-aligned, calculate the fix such that stackaddr < // new_page_aligned_stackaddr < stackaddr + stacksize - let remainder = (stackaddr as usize) % page_size; + let remainder = stackaddr % page_size; Some(if remainder == 0 { - stackaddr + stackptr } else { - ((stackaddr as usize) + page_size - remainder) as *mut libc::c_void + stackptr.with_addr(stackaddr + page_size - remainder) }) } @@ -588,8 +589,8 @@ pub mod guard { // Instead, we'll just note where we expect rlimit to start // faulting, so our handler can report "stack overflow", and // trust that the kernel's own stack guard will work. - let stackaddr = get_stack_start_aligned()?; - let stackaddr = stackaddr as usize; + let stackptr = get_stack_start_aligned()?; + let stackaddr = stackptr.addr(); Some(stackaddr - page_size..stackaddr) } else if cfg!(all(target_os = "linux", target_env = "musl")) { // For the main thread, the musl's pthread_attr_getstack @@ -602,8 +603,8 @@ pub mod guard { // at the bottom. If we try to remap the bottom of the stack // ourselves, FreeBSD's guard page moves upwards. So we'll just use // the builtin guard page. - let stackaddr = get_stack_start_aligned()?; - let guardaddr = stackaddr as usize; + let stackptr = get_stack_start_aligned()?; + let guardaddr = stackptr.addr(); // Technically the number of guard pages is tunable and controlled // by the security.bsd.stack_guard_page sysctl, but there are // few reasons to change it from the default. The default value has @@ -620,25 +621,25 @@ pub mod guard { // than the initial mmap() used, so we mmap() here with // read/write permissions and only then mprotect() it to // no permissions at all. See issue #50313. - let stackaddr = get_stack_start_aligned()?; + let stackptr = get_stack_start_aligned()?; let result = mmap( - stackaddr, + stackptr, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0, ); - if result != stackaddr || result == MAP_FAILED { + if result != stackptr || result == MAP_FAILED { panic!("failed to allocate a guard page: {}", io::Error::last_os_error()); } - let result = mprotect(stackaddr, page_size, PROT_NONE); + let result = mprotect(stackptr, page_size, PROT_NONE); if result != 0 { panic!("failed to protect the guard page: {}", io::Error::last_os_error()); } - let guardaddr = stackaddr as usize; + let guardaddr = stackptr.addr(); Some(guardaddr..guardaddr + page_size) } @@ -646,7 +647,8 @@ pub mod guard { #[cfg(any(target_os = "macos", target_os = "openbsd", target_os = "solaris"))] pub unsafe fn current() -> Option { - let stackaddr = get_stack_start()? as usize; + let stackptr = get_stack_start()?; + let stackaddr = stackptr.addr(); Some(stackaddr - PAGE_SIZE.load(Ordering::Relaxed)..stackaddr) } @@ -679,11 +681,11 @@ pub mod guard { panic!("there is no guard page"); } } - let mut stackaddr = crate::ptr::null_mut(); + let mut stackptr = crate::ptr::null_mut::(); let mut size = 0; - assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackaddr, &mut size), 0); + assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackptr, &mut size), 0); - let stackaddr = stackaddr as usize; + let stackaddr = stackptr.addr(); ret = if cfg!(any(target_os = "freebsd", target_os = "netbsd")) { Some(stackaddr - guardsize..stackaddr) } else if cfg!(all(target_os = "linux", target_env = "musl")) { diff --git a/library/std/src/sys/unix/weak.rs b/library/std/src/sys/unix/weak.rs index da63c068384a2..c9606aec3f6df 100644 --- a/library/std/src/sys/unix/weak.rs +++ b/library/std/src/sys/unix/weak.rs @@ -22,9 +22,10 @@ // that, we'll just allow that some unix targets don't use this module at all. #![allow(dead_code, unused_macros)] -use crate::ffi::CStr; +use crate::ffi::{c_void, CStr}; use crate::marker::PhantomData; use crate::mem; +use crate::ptr; use crate::sync::atomic::{self, AtomicUsize, Ordering}; // We can use true weak linkage on ELF targets. @@ -129,25 +130,25 @@ impl DlsymWeak { // Cold because it should only happen during first-time initialization. #[cold] unsafe fn initialize(&self) -> Option { - assert_eq!(mem::size_of::(), mem::size_of::()); + assert_eq!(mem::size_of::(), mem::size_of::<*mut ()>()); let val = fetch(self.name); // This synchronizes with the acquire fence in `get`. - self.addr.store(val, Ordering::Release); + self.addr.store(val.addr(), Ordering::Release); - match val { + match val.addr() { 0 => None, - addr => Some(mem::transmute_copy::(&addr)), + _ => Some(mem::transmute_copy::<*mut c_void, F>(&val)), } } } -unsafe fn fetch(name: &str) -> usize { +unsafe fn fetch(name: &str) -> *mut c_void { let name = match CStr::from_bytes_with_nul(name.as_bytes()) { Ok(cstr) => cstr, - Err(..) => return 0, + Err(..) => return ptr::null_mut(), }; - libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) as usize + libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) } #[cfg(not(any(target_os = "linux", target_os = "android")))] From b608df8277d0e21dd04b73691a56b124ef125d6a Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Wed, 23 Mar 2022 12:38:04 -0400 Subject: [PATCH 05/15] revert changes that cast functions to raw pointers, portability hazard --- library/core/src/fmt/mod.rs | 4 ++-- library/std/src/backtrace.rs | 4 ++-- library/std/src/sys/unix/weak.rs | 17 ++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index 0e2e869a920ee..6c1d20f36e2f6 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -355,8 +355,8 @@ impl<'a> ArgumentV1<'a> { // We are type punning a bit here: USIZE_MARKER only takes an &usize but // formatter takes an &Opaque. Rust understandably doesn't think we should compare // the function pointers if they don't have the same signature, so we cast to - // pointers to convince it that we know what we're doing. - if self.formatter as *mut u8 == USIZE_MARKER as *mut u8 { + // usizes to tell it that we just want to compare addresses. + if self.formatter as usize == USIZE_MARKER as usize { // SAFETY: The `formatter` field is only set to USIZE_MARKER if // the value is a usize, so this is safe Some(unsafe { *(self.value as *const _ as *const usize) }) diff --git a/library/std/src/backtrace.rs b/library/std/src/backtrace.rs index 3b0922ad16f25..05e9b2eb6bc3c 100644 --- a/library/std/src/backtrace.rs +++ b/library/std/src/backtrace.rs @@ -293,7 +293,7 @@ impl Backtrace { if !Backtrace::enabled() { return Backtrace { inner: Inner::Disabled }; } - Backtrace::create((Backtrace::capture as *mut ()).addr()) + Backtrace::create(Backtrace::capture as usize) } /// Forcibly captures a full backtrace, regardless of environment variable @@ -308,7 +308,7 @@ impl Backtrace { /// parts of code. #[inline(never)] // want to make sure there's a frame here to remove pub fn force_capture() -> Backtrace { - Backtrace::create((Backtrace::force_capture as *mut ()).addr()) + Backtrace::create(Backtrace::force_capture as usize) } /// Forcibly captures a disabled backtrace, regardless of environment diff --git a/library/std/src/sys/unix/weak.rs b/library/std/src/sys/unix/weak.rs index c9606aec3f6df..da63c068384a2 100644 --- a/library/std/src/sys/unix/weak.rs +++ b/library/std/src/sys/unix/weak.rs @@ -22,10 +22,9 @@ // that, we'll just allow that some unix targets don't use this module at all. #![allow(dead_code, unused_macros)] -use crate::ffi::{c_void, CStr}; +use crate::ffi::CStr; use crate::marker::PhantomData; use crate::mem; -use crate::ptr; use crate::sync::atomic::{self, AtomicUsize, Ordering}; // We can use true weak linkage on ELF targets. @@ -130,25 +129,25 @@ impl DlsymWeak { // Cold because it should only happen during first-time initialization. #[cold] unsafe fn initialize(&self) -> Option { - assert_eq!(mem::size_of::(), mem::size_of::<*mut ()>()); + assert_eq!(mem::size_of::(), mem::size_of::()); let val = fetch(self.name); // This synchronizes with the acquire fence in `get`. - self.addr.store(val.addr(), Ordering::Release); + self.addr.store(val, Ordering::Release); - match val.addr() { + match val { 0 => None, - _ => Some(mem::transmute_copy::<*mut c_void, F>(&val)), + addr => Some(mem::transmute_copy::(&addr)), } } } -unsafe fn fetch(name: &str) -> *mut c_void { +unsafe fn fetch(name: &str) -> usize { let name = match CStr::from_bytes_with_nul(name.as_bytes()) { Ok(cstr) => cstr, - Err(..) => return ptr::null_mut(), + Err(..) => return 0, }; - libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) + libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) as usize } #[cfg(not(any(target_os = "linux", target_os = "android")))] From 31e1cde4b5bd70dfe892f5e7bc449ad3a6e1d773 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Sat, 26 Mar 2022 15:55:05 -0400 Subject: [PATCH 06/15] clean up pointer docs --- library/core/src/ptr/const_ptr.rs | 16 +++--- library/core/src/ptr/mod.rs | 89 ++++++++++++++++++++++--------- library/core/src/ptr/mut_ptr.rs | 16 +++--- 3 files changed, 84 insertions(+), 37 deletions(-) diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs index 617f92e135afb..20ee128026421 100644 --- a/library/core/src/ptr/const_ptr.rs +++ b/library/core/src/ptr/const_ptr.rs @@ -156,14 +156,16 @@ impl *const T { /// *provenance* and *address-space* information. To properly restore that information, /// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr]. /// - /// On most platforms this information isn't represented at runtime, and so the loss - /// of information is "only" semantic. On more complicated platforms like miri, CHERI, - /// and segmented architectures, this may result in an actual change of representation - /// and the loss of information. + /// On most platforms this will produce a value with the same bytes as the original + /// pointer, because all the bytes are dedicated to describing the address. + /// Platforms which need to store additional information in the pointer may + /// perform a change of representation to produce a value containing only the address + /// portion of the pointer. What that means is up to the platform to define. /// /// This API and its claimed semantics are part of the Strict Provenance experiment, /// see the [module documentation][crate::ptr] for details. #[must_use] + #[inline] #[unstable(feature = "strict_provenance", issue = "95228")] pub fn addr(self) -> usize where @@ -180,12 +182,13 @@ impl *const T { /// This allows us to dynamically preserve and propagate this important /// information in a way that is otherwise impossible with a unary cast. /// - /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset `self` to the - /// given address, and therefore has all the same capabilities and restrictions. + /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset + /// `self` to the given address, and therefore has all the same capabilities and restrictions. /// /// This API and its claimed semantics are part of the Strict Provenance experiment, /// see the [module documentation][crate::ptr] for details. #[must_use] + #[inline] #[unstable(feature = "strict_provenance", issue = "95228")] pub fn with_addr(self, addr: usize) -> Self where @@ -211,6 +214,7 @@ impl *const T { /// This API and its claimed semantics are part of the Strict Provenance experiment, /// see the [module documentation][crate::ptr] for details. #[must_use] + #[inline] #[unstable(feature = "strict_provenance", issue = "95228")] pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self where diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 3e8efc96c88f4..6d04add67d176 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -66,22 +66,30 @@ //! //! # Strict Provenance //! -//! This section is *non-normative* and describes an experimental set of APIs that help tools -//! that validate the memory-safety of your program's execution. Notably this includes [miri][] +//! **The following text is non-normative, insufficiently formal, and is an extremely strict +//! interpretation of provenance. It's ok if your code doesn't strictly conform to it.** +//! +//! [Strict Provenance][] is an experimental set of APIs that help tools that try +//! to validate the memory-safety of your program's execution. Notably this includes [miri][] //! and [CHERI][], which can detect when you access out of bounds memory or otherwise violate //! Rust's memory model. //! -//! **The following text is overly strict and insufficiently formal, and is an extremely -//! strict interpretation of provenance.** Provenance must exist in some form for any language -//! higher-level than an assembler, but to our knowledge no one has ever been able to square -//! the notion of provenance with all the operations that programmers believe should be permitted. +//! Provenance must exist in some form for any programming +//! language compiled for modern computer architectures, but specifying a model for provenance +//! in a way that is useful to both compilers and programmers is an ongoing challenge. //! The [Strict Provenance][] experiment seeks to explore the question: *what if we just said you //! couldn't do all the nasty operations that make provenance so messy?* //! //! What APIs would have to be removed? What APIs would have to be added? How much would code //! have to change, and is it worse or better now? Would any patterns become truly inexpressible? -//! Could we carve out special exceptions for those patterns? +//! Could we carve out special exceptions for those patterns? Should we? //! +//! A secondary goal of this project is to see if we can disamiguate the many functions of +//! pointer<->integer casts enough for the definition of `usize` to be loosened so that it +//! isn't *pointer*-sized but address-space/offset/allocation-sized (we'll probably continue +//! to conflate these notions). This would potentially make it possible to more efficiently +//! target platforms where pointers are larger than offsets, such as CHERI and maybe some +//! segmented architecures. //! //! ## Provenance //! @@ -102,26 +110,37 @@ //! The Original Pointer for an allocation is guaranteed to have unique access to the entire //! allocation and *only* that allocation. In this sense, an allocation can be thought of //! as a "sandbox" that cannot be broken into or out of. *Provenance* is the permission -//! to access an allocation's sandbox and consists of: +//! to access an allocation's sandbox and has both a *spatial* and *temporal* component: +//! +//! * Spatial: A range of bytes in the allocation that the pointer is allowed to access. +//! * Temporal: Some kind of globally unique identifier tied to the allocation itself. //! -//! * Some kind of globally unique identifier tied to the allocation itself. -//! * A range of bytes in the allocation that the pointer is allowed to access. +//! Spatial provenance makes sure you don't go beyond your sandbox, while temporal provenance +//! makes sure that you can't "get lucky" after your permission to access some memory +//! has been revoked (either through deallocations or borrows expiring). //! //! Provenance is implicitly shared with all pointers transitively derived from //! The Original Pointer through operations like [`offset`], borrowing, and pointer casts. -//! Some operations may produce a pointer whose provenance has a smaller range -//! than the one it's derived from (i.e. borrowing a subfield and subslicing). +//! Some operations may *shrink* the derived provenance, limiting how much memory it can +//! access or how long it's valid for (i.e. borrowing a subfield and subslicing). //! //! Shrinking provenance cannot be undone: even if you "know" there is a larger allocation, you //! can't derive a pointer with a larger provenance. Similarly, you cannot "recombine" //! two contiguous provenances back into one (i.e. with a `fn merge(&[T], &[T]) -> &[T]`). //! //! A reference to a value always has provenance over exactly the memory that field occupies. -//! A reference slice always has provenance over exactly the range that slice describes. +//! A reference to a slice always has provenance over exactly the range that slice describes. //! //! If an allocation is deallocated, all pointers with provenance to that allocation become //! invalidated, and effectively lose their provenance. //! +//! The strict provenance experiment is mostly only interested in exploring stricter *spatial* +//! provenance. In this sense it can be thought of as a subset of the more ambitious and +//! formal [Stacked Borrows][] research project, which is what tools like [miri][] are based on. +//! In particular, Stacked Borrows is necessary to properly describe what borrows are allowed +//! to do and when they become invalidated. This necessarily involves much more complex +//! *temporal* reasoning than simply identifying allocations. +//! //! //! ## Pointer Vs Addresses //! @@ -241,13 +260,6 @@ //! be used for sentinel values like `null` *or* to represent a tagged pointer that will //! never be dereferencable. //! -//! * [`wrapping_offset`][] a pointer outside its provenance. This includes invalid pointers -//! which have no provenance. Unfortunately there may be practical limits on this for a -//! particular platform ([CHERI][] may mark your pointers as invalid, but it has a pretty -//! generour buffer that is *at least* 1KB on each side of the provenance). Note that -//! least-significant-bit tagging is generally pretty robust, and often doesn't even -//! go out of bounds because types have a size >= align. -//! //! * Forge an allocation of size zero at any sufficiently aligned non-null address. //! i.e. the usual "ZSTs are fake, do what you want" rules apply *but* this only applies //! for actual forgery (integers cast to pointers). If you borrow some structs subfield @@ -255,6 +267,26 @@ //! that allocation and it will still get invalidated if the allocation gets deallocated. //! In the future we may introduce an API to make such a forged allocation explicit. //! +//! * [`wrapping_offset`][] a pointer outside its provenance. This includes invalid pointers +//! which have "no" provenance. Unfortunately there may be practical limits on this for a +//! particular platform, and it's an open question as to how to specify this (if at all). +//! Notably, [CHERI][] relies on a compression scheme that can't handle a +//! pointer getting offset "too far" out of bounds. If this happens, the address +//! returned by `addr` will be the value you expect, but the provenance will get invalidated +//! and using it to read/write will fault. The details of this are architecture-specific +//! and based on alignment, but the buffer on either side of the pointer's range is pretty +//! generous (think kilobytes, not bytes). +//! +//! * Perform pointer tagging tricks. This falls out of [`wrapping_offset`] but is worth +//! mentioning in more detail because of the limitations of [CHERI][]. Low-bit tagging +//! is very robust, and often doesn't even go out of bounds because types have a +//! size >= align (and over-aligning actually gives CHERI more flexibility). Anything +//! more complex than this rapidly enters "extremely platform-specific" territory as +//! certain things may or may not be allowed based on specific supported operations. +//! For instance, ARM explicitly supports high-bit tagging, and so CHERI on ARM inherits +//! that and should support it. +//! +//! //! [aliasing]: ../../nomicon/aliasing.html //! [book]: ../../book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer //! [ub]: ../../reference/behavior-considered-undefined.html @@ -269,6 +301,7 @@ //! [miri]: https://github.com/rust-lang/miri //! [CHERI]: https://www.cl.cam.ac.uk/research/security/ctsrd/cheri/ //! [Strict Provenance]: https://github.com/rust-lang/rust/issues/95228 +//! [Stacked Borrows]: https://plv.mpi-sws.org/rustbelt/stacked-borrows/ #![stable(feature = "rust1", since = "1.0.0")] @@ -410,7 +443,7 @@ pub unsafe fn drop_in_place(to_drop: *mut T) { #[rustc_const_stable(feature = "const_ptr_null", since = "1.24.0")] #[rustc_diagnostic_item = "ptr_null"] pub const fn null() -> *const T { - invalid::(0) + invalid(0) } /// Creates a null mutable raw pointer. @@ -430,7 +463,7 @@ pub const fn null() -> *const T { #[rustc_const_stable(feature = "const_ptr_null", since = "1.24.0")] #[rustc_diagnostic_item = "ptr_null_mut"] pub const fn null_mut() -> *mut T { - invalid_mut::(0) + invalid_mut(0) } /// Creates an invalid pointer with the given address. @@ -438,7 +471,7 @@ pub const fn null_mut() -> *mut T { /// This is *currently* equivalent to `addr as *const T` but it expresses the intended semantic /// more clearly, and may become important under future memory models. /// -/// The module's to-level documentation discusses the precise meaning of an "invalid" +/// The module's top-level documentation discusses the precise meaning of an "invalid" /// pointer but essentially this expresses that the pointer is not associated /// with any actual allocation and is little more than a usize address in disguise. /// @@ -446,6 +479,9 @@ pub const fn null_mut() -> *mut T { /// UB to read/write/offset. This mostly exists to facilitate things /// like ptr::null and NonNull::dangling which make invalid pointers. /// +/// (Standard "Zero-Sized-Types get to cheat and lie" caveats apply, although it +/// may be desirable to give them their own API just to make that 100% clear.) +/// /// This API and its claimed semantics are part of the Strict Provenance experiment, /// see the [module documentation][crate::ptr] for details. #[inline(always)] @@ -459,10 +495,10 @@ pub const fn invalid(addr: usize) -> *const T { /// Creates an invalid mutable pointer with the given address. /// -/// This is *currently* equivalent to `addr as *const T` but it expresses the intended semantic +/// This is *currently* equivalent to `addr as *mut T` but it expresses the intended semantic /// more clearly, and may become important under future memory models. /// -/// The module's to-level documentation discusses the precise meaning of an "invalid" +/// The module's top-level documentation discusses the precise meaning of an "invalid" /// pointer but essentially this expresses that the pointer is not associated /// with any actual allocation and is little more than a usize address in disguise. /// @@ -470,6 +506,9 @@ pub const fn invalid(addr: usize) -> *const T { /// UB to read/write/offset. This mostly exists to facilitate things /// like ptr::null and NonNull::dangling which make invalid pointers. /// +/// (Standard "Zero-Sized-Types get to cheat and lie" caveats apply, although it +/// may be desirable to give them their own API just to make that 100% clear.) +/// /// This API and its claimed semantics are part of the Strict Provenance experiment, /// see the [module documentation][crate::ptr] for details. #[inline(always)] diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs index ead96147b526b..547208025214a 100644 --- a/library/core/src/ptr/mut_ptr.rs +++ b/library/core/src/ptr/mut_ptr.rs @@ -160,14 +160,16 @@ impl *mut T { /// *provenance* and *address-space* information. To properly restore that information, /// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr]. /// - /// On most platforms this information isn't represented at runtime, and so the loss - /// of information is "only" semantic. On more complicated platforms like miri, CHERI, - /// and segmented architectures, this may result in an actual change of representation - /// and the loss of information. + /// On most platforms this will produce a value with the same bytes as the original + /// pointer, because all the bytes are dedicated to describing the address. + /// Platforms which need to store additional information in the pointer may + /// perform a change of representation to produce a value containing only the address + /// portion of the pointer. What that means is up to the platform to define. /// /// This API and its claimed semantics are part of the Strict Provenance experiment, /// see the [module documentation][crate::ptr] for details. #[must_use] + #[inline] #[unstable(feature = "strict_provenance", issue = "95228")] pub fn addr(self) -> usize where @@ -184,12 +186,13 @@ impl *mut T { /// This allows us to dynamically preserve and propagate this important /// information in a way that is otherwise impossible with a unary cast. /// - /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset `self` to the - /// given address, and therefore has all the same capabilities and restrictions. + /// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset + /// `self` to the given address, and therefore has all the same capabilities and restrictions. /// /// This API and its claimed semantics are part of the Strict Provenance experiment, /// see the [module documentation][crate::ptr] for details. #[must_use] + #[inline] #[unstable(feature = "strict_provenance", issue = "95228")] pub fn with_addr(self, addr: usize) -> Self where @@ -215,6 +218,7 @@ impl *mut T { /// This API and its claimed semantics are part of the Strict Provenance experiment, /// see the [module documentation][crate::ptr] for details. #[must_use] + #[inline] #[unstable(feature = "strict_provenance", issue = "95228")] pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self where From 7514d760b8fabd46e4874520b653ab8803c3517e Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Sat, 26 Mar 2022 17:03:40 -0400 Subject: [PATCH 07/15] cleanup some of the less terrifying library code --- library/alloc/src/slice.rs | 4 ++-- library/core/src/slice/ascii.rs | 6 +++--- library/core/src/slice/iter/macros.rs | 2 +- library/std/src/io/error/repr_bitpacked.rs | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/library/alloc/src/slice.rs b/library/alloc/src/slice.rs index 7c892f03bfb78..31365562ddb50 100644 --- a/library/alloc/src/slice.rs +++ b/library/alloc/src/slice.rs @@ -1043,9 +1043,9 @@ where impl Drop for MergeHole { fn drop(&mut self) { - // `T` is not a zero-sized type, so it's okay to divide by its size. - let len = (self.end.addr() - self.start.addr()) / mem::size_of::(); + // `T` is not a zero-sized type, and these are pointers into a slice's elements. unsafe { + let len = self.end.offset_from(self.start) as usize; ptr::copy_nonoverlapping(self.start, self.dest, len); } } diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index 63d761d3c02d6..6c9107401fda6 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -294,7 +294,7 @@ fn is_ascii(s: &[u8]) -> bool { // Paranoia check about alignment, since we're about to do a bunch of // unaligned loads. In practice this should be impossible barring a bug in // `align_offset` though. - debug_assert_eq!((word_ptr.addr()) % mem::align_of::(), 0); + debug_assert_eq!(word_ptr.addr() % mem::align_of::(), 0); // Read subsequent words until the last aligned word, excluding the last // aligned word by itself to be done in tail check later, to ensure that @@ -302,9 +302,9 @@ fn is_ascii(s: &[u8]) -> bool { while byte_pos < len - USIZE_SIZE { debug_assert!( // Sanity check that the read is in bounds - (word_ptr.addr() + USIZE_SIZE) <= (start.wrapping_add(len).addr()) && + (word_ptr.addr() + USIZE_SIZE) <= start.addr().wrapping_add(len) && // And that our assumptions about `byte_pos` hold. - (word_ptr.addr()) - (start.addr()) == byte_pos + (word_ptr.addr() - start.addr()) == byte_pos ); // SAFETY: We know `word_ptr` is properly aligned (because of diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index 96ead49dd6aaf..b74ab28fc092a 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -20,7 +20,7 @@ macro_rules! len { if size == 0 { // This _cannot_ use `unchecked_sub` because we depend on wrapping // to represent the length of long ZST slice iterators. - ($self.end.addr()).wrapping_sub(start.as_ptr().addr()) + $self.end.addr().wrapping_sub(start.as_ptr().addr()) } else { // We know that `start <= end`, so can do better than `offset_from`, // which needs to deal in signed. By setting appropriate flags here diff --git a/library/std/src/io/error/repr_bitpacked.rs b/library/std/src/io/error/repr_bitpacked.rs index 7cc1c701064c5..e80068b46abb9 100644 --- a/library/std/src/io/error/repr_bitpacked.rs +++ b/library/std/src/io/error/repr_bitpacked.rs @@ -136,7 +136,7 @@ impl Repr { let p = Box::into_raw(b).cast::(); // Should only be possible if an allocator handed out a pointer with // wrong alignment. - debug_assert_eq!((p.addr() & TAG_MASK), 0); + debug_assert_eq!(p.addr() & TAG_MASK, 0); // Note: We know `TAG_CUSTOM <= size_of::()` (static_assert at // end of file), and both the start and end of the expression must be // valid without address space wraparound due to `Box`'s semantics. From 9efcd996d528b7da9beb0a9743b6659c667154d6 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Sun, 27 Mar 2022 15:06:06 -0400 Subject: [PATCH 08/15] Add even more details to top-level pointer docs --- library/core/src/ptr/mod.rs | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 6d04add67d176..d988edb213e58 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -153,8 +153,8 @@ //! integers is very useful, so what can we do? //! //! Strict Provenance attempts to square this circle by decoupling Rust's traditional conflation -//! of pointers and `usize` (and `isize`), defining a pointer to semantically contain the following -//! information: +//! of pointers and `usize` (and `isize`), and defining a pointer to semantically contain the +//! following information: //! //! * The **address-space** it is part of. //! * The **address** it points to, which can be represented by a `usize`. @@ -235,6 +235,10 @@ //! } //! ``` //! +//! (Yes, if you've been using AtomicUsize for pointers in concurrent datastructures, you should +//! be using AtomicPtr instead. If that messes up the way you atomically manipulate pointers, +//! we would like to know why, and what needs to be done to fix it.) +//! //! Something more complicated and just generally *evil* like a XOR-List requires more significant //! changes like allocating all nodes in a pre-allocated Vec or Arena and using a pointer //! to the whole allocation to reconstitute the XORed addresses. @@ -258,11 +262,13 @@ //! //! * Create an invalid pointer from just an address (see [`ptr::invalid`][]). This can //! be used for sentinel values like `null` *or* to represent a tagged pointer that will -//! never be dereferencable. +//! never be dereferencable. In general, it is always sound for an integer to pretend +//! to be a pointer "for fun" as long as you don't use operations on it which require +//! it to be valid (offset, read, write, etc). //! //! * Forge an allocation of size zero at any sufficiently aligned non-null address. //! i.e. the usual "ZSTs are fake, do what you want" rules apply *but* this only applies -//! for actual forgery (integers cast to pointers). If you borrow some structs subfield +//! for actual forgery (integers cast to pointers). If you borrow some struct's field //! that *happens* to be zero-sized, the resulting pointer will have provenance tied to //! that allocation and it will still get invalidated if the allocation gets deallocated. //! In the future we may introduce an API to make such a forged allocation explicit. @@ -277,6 +283,16 @@ //! and based on alignment, but the buffer on either side of the pointer's range is pretty //! generous (think kilobytes, not bytes). //! +//! * Compare arbitrary pointers by address. Addresses *are* just integers and so there is +//! always a coherent answer, even if the pointers are invalid or from different +//! address-spaces/provenances. Of course, comparing addresses from different address-spaces +//! is generally going to be *meaningless*, but so is comparing Kilograms to Meters, and Rust +//! doesn't prevent that either. Similarly, if you get "lucky" and notice that a pointer +//! one-past-the-end is the "same" address as the start of an unrelated allocation, anything +//! you do with that fact is *probably* going to be gibberish. The scope of that gibberish +//! is kept under control by the fact that the two pointers *still* aren't allowed to access +//! the other's allocation (bytes), because they still have different provenance. +//! //! * Perform pointer tagging tricks. This falls out of [`wrapping_offset`] but is worth //! mentioning in more detail because of the limitations of [CHERI][]. Low-bit tagging //! is very robust, and often doesn't even go out of bounds because types have a From 5f720fa55e3f9bc6a59ea8caf1fdff18405d3b65 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Mon, 28 Mar 2022 00:37:28 -0400 Subject: [PATCH 09/15] more review fixes to ptr docs --- library/core/src/ptr/mod.rs | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index d988edb213e58..afa6225a9f753 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -103,17 +103,17 @@ //! they must have provenance. //! //! When an allocation is created, that allocation has a unique Original Pointer. For alloc -//! APIs this is literally the pointer the call returns, and for variables declarations this -//! is the name of the variable. This is mildly overloading the term "pointer" for the sake -//! of brevity/exposition. +//! APIs this is literally the pointer the call returns, and for local variables and statics, +//! this is the name of the variable/static. This is mildly overloading the term "pointer" +//! for the sake of brevity/exposition. //! //! The Original Pointer for an allocation is guaranteed to have unique access to the entire //! allocation and *only* that allocation. In this sense, an allocation can be thought of //! as a "sandbox" that cannot be broken into or out of. *Provenance* is the permission //! to access an allocation's sandbox and has both a *spatial* and *temporal* component: //! -//! * Spatial: A range of bytes in the allocation that the pointer is allowed to access. -//! * Temporal: Some kind of globally unique identifier tied to the allocation itself. +//! * Spatial: A range of bytes that the pointer is allowed to access. +//! * Temporal: The allocation that access to these bytes is tied to. //! //! Spatial provenance makes sure you don't go beyond your sandbox, while temporal provenance //! makes sure that you can't "get lucky" after your permission to access some memory @@ -139,7 +139,8 @@ //! formal [Stacked Borrows][] research project, which is what tools like [miri][] are based on. //! In particular, Stacked Borrows is necessary to properly describe what borrows are allowed //! to do and when they become invalidated. This necessarily involves much more complex -//! *temporal* reasoning than simply identifying allocations. +//! *temporal* reasoning than simply identifying allocations. Adjusting APIs and code +//! for the strict provenance experiment will also greatly help Stacked Borrows. //! //! //! ## Pointer Vs Addresses @@ -152,7 +153,13 @@ //! to very complex and unreliable heuristics. But of course, converting between pointers and //! integers is very useful, so what can we do? //! -//! Strict Provenance attempts to square this circle by decoupling Rust's traditional conflation +//! Also did you know WASM is actually a "Harvard Architecture"? As in function pointers are +//! handled completely differently from data pointers? And we kind of just shipped Rust on WASM +//! without really addressing the fact that we let you freely convert between function pointers +//! and data pointers, because it mostly Just Works? Let's just put that on the "pointer casts +//! are dubious" pile. +//! +//! Strict Provenance attempts to square these circles by decoupling Rust's traditional conflation //! of pointers and `usize` (and `isize`), and defining a pointer to semantically contain the //! following information: //! @@ -246,14 +253,15 @@ //! Situations where a valid pointer *must* be created from just an address, such as baremetal code //! accessing a memory-mapped interface at a fixed address, are an open question on how to support. //! These situations *will* still be allowed, but we might require some kind of "I know what I'm -//! doing" annotation to explain the situation to the compiler. Because those situations require -//! `volatile` accesses anyway, it should be possible to carve out exceptions for them. +//! doing" annotation to explain the situation to the compiler. It's also possible they need no +//! special attention at all, because they're generally accessing memory outside the scope of +//! "the abstract machine", or already using "I know what I'm doing" annotations like "volatile". //! //! Under [Strict Provenance] is is Undefined Behaviour to: //! //! * Access memory through a pointer that does not have provenance over that memory. //! -//! * [`offset`] a pointer to an address it doesn't have provenance over. +//! * [`offset`] a pointer to or from an address it doesn't have provenance over. //! This means it's always UB to offset a pointer derived from something deallocated, //! even if the offset is 0. Note that a pointer "one past the end" of its provenance //! is not actually outside its provenance, it just has 0 bytes it can load/store. @@ -295,7 +303,7 @@ //! //! * Perform pointer tagging tricks. This falls out of [`wrapping_offset`] but is worth //! mentioning in more detail because of the limitations of [CHERI][]. Low-bit tagging -//! is very robust, and often doesn't even go out of bounds because types have a +//! is very robust, and often doesn't even go out of bounds because types ensure //! size >= align (and over-aligning actually gives CHERI more flexibility). Anything //! more complex than this rapidly enters "extremely platform-specific" territory as //! certain things may or may not be allowed based on specific supported operations. From 28576e9c511219ba8bf79c241fc52f23eeceaa2b Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Mon, 28 Mar 2022 00:43:18 -0400 Subject: [PATCH 10/15] mark FIXMES for all the places found that are probably offset_from --- compiler/rustc_arena/src/lib.rs | 6 ++++++ library/core/src/slice/sort.rs | 2 ++ library/std/src/sys/unix/memchr.rs | 2 ++ 3 files changed, 10 insertions(+) diff --git a/compiler/rustc_arena/src/lib.rs b/compiler/rustc_arena/src/lib.rs index de1d5c07f5028..62995dfd2e2f0 100644 --- a/compiler/rustc_arena/src/lib.rs +++ b/compiler/rustc_arena/src/lib.rs @@ -217,6 +217,8 @@ impl TypedArena { #[inline] fn can_allocate(&self, additional: usize) -> bool { + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). let available_bytes = self.end.get().addr() - self.ptr.get().addr(); let additional_bytes = additional.checked_mul(mem::size_of::()).unwrap(); available_bytes >= additional_bytes @@ -263,6 +265,8 @@ impl TypedArena { // If a type is `!needs_drop`, we don't need to keep track of how many elements // the chunk stores - the field will be ignored anyway. if mem::needs_drop::() { + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). let used_bytes = self.ptr.get().addr() - last_chunk.start().addr(); last_chunk.entries = used_bytes / mem::size_of::(); } @@ -300,6 +304,8 @@ impl TypedArena { // Recall that `end` was incremented for each allocated value. end - start } else { + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). (end - start) / mem::size_of::() }; // Pass that to the `destroy` method. diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs index 5cf08b5740e82..1f392a079718c 100644 --- a/library/core/src/slice/sort.rs +++ b/library/core/src/slice/sort.rs @@ -269,6 +269,8 @@ where // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive). fn width(l: *mut T, r: *mut T) -> usize { assert!(mem::size_of::() > 0); + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). (r.addr() - l.addr()) / mem::size_of::() } diff --git a/library/std/src/sys/unix/memchr.rs b/library/std/src/sys/unix/memchr.rs index a3e4f8ff56aee..73ba604eccba2 100644 --- a/library/std/src/sys/unix/memchr.rs +++ b/library/std/src/sys/unix/memchr.rs @@ -26,6 +26,8 @@ pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { haystack.len(), ) }; + // FIXME: this should *likely* use `offset_from`, but more + // investigation is needed (including running tests in miri). if p.is_null() { None } else { Some(p.addr() - haystack.as_ptr().addr()) } } From 378ed259d99a557c1335b1287385b2f11d54b182 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Mon, 28 Mar 2022 13:16:04 -0400 Subject: [PATCH 11/15] refine the definition of temporal provenance --- library/core/src/ptr/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index afa6225a9f753..2c38c9de2a98c 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -113,7 +113,7 @@ //! to access an allocation's sandbox and has both a *spatial* and *temporal* component: //! //! * Spatial: A range of bytes that the pointer is allowed to access. -//! * Temporal: The allocation that access to these bytes is tied to. +//! * Temporal: The lifetime (of the allocation) that access to these bytes is tied to. //! //! Spatial provenance makes sure you don't go beyond your sandbox, while temporal provenance //! makes sure that you can't "get lucky" after your permission to access some memory From 075c576182fe1eb759f747135566eb39b7bc0899 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Mon, 28 Mar 2022 14:06:16 -0400 Subject: [PATCH 12/15] fix doc link --- library/core/src/ptr/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 2c38c9de2a98c..816a5bca00aa4 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -317,7 +317,7 @@ //! [zst]: ../../nomicon/exotic-sizes.html#zero-sized-types-zsts //! [atomic operations]: crate::sync::atomic //! [`offset`]: pointer::offset -//! [`wrapping_offset`]: pointer::offset +//! [`wrapping_offset`]: pointer::wrapping_offset //! [`with_addr`]: pointer::with_addr //! [`map_addr`]: pointer::map_addr //! [`addr`]: pointer::addr From a91a9eeffff78895f7879e196fa08ffdcfcda0fd Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Mon, 28 Mar 2022 14:26:24 -0400 Subject: [PATCH 13/15] clarify that WASM has address spaces --- library/core/src/ptr/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 816a5bca00aa4..6a7841d3de6af 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -163,7 +163,7 @@ //! of pointers and `usize` (and `isize`), and defining a pointer to semantically contain the //! following information: //! -//! * The **address-space** it is part of. +//! * The **address-space** it is part of (i.e. "data" vs "code" in WASM). //! * The **address** it points to, which can be represented by a `usize`. //! * The **provenance** it has, defining the memory it has permission to access. //! From 37d4753776f21df54e027141e1ef8922345a9b35 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Tue, 29 Mar 2022 17:45:54 -0400 Subject: [PATCH 14/15] fixup feature position in liballoc --- library/alloc/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 7e90d77b8f289..065d071a2e360 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -127,6 +127,7 @@ #![feature(slice_ptr_len)] #![feature(slice_range)] #![feature(str_internals)] +#![feature(strict_provenance)] #![feature(trusted_len)] #![feature(trusted_random_access)] #![feature(try_trait_v2)] @@ -158,7 +159,6 @@ #![feature(rustc_allow_const_fn_unstable)] #![feature(rustc_attrs)] #![feature(staged_api)] -#![feature(strict_provenance)] #![cfg_attr(test, feature(test))] #![feature(unboxed_closures)] #![feature(unsized_fn_params)] From e3a3afe05099dc1f9078fa1f65ade467b92f42c3 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Tue, 29 Mar 2022 22:45:31 -0400 Subject: [PATCH 15/15] fix unix typedef --- library/std/src/os/unix/net/addr.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/os/unix/net/addr.rs b/library/std/src/os/unix/net/addr.rs index c323161d76537..9aeae4b2cae69 100644 --- a/library/std/src/os/unix/net/addr.rs +++ b/library/std/src/os/unix/net/addr.rs @@ -18,7 +18,7 @@ mod libc { fn sun_path_offset(addr: &libc::sockaddr_un) -> usize { // Work with an actual instance of the type since using a null pointer is UB let base = (addr as *const libc::sockaddr_un).addr(); - let path = (&addr.sun_path as *const i8).addr(); + let path = (&addr.sun_path as *const libc::c_char).addr(); path - base }