From f6a57c195519d1e3e1f2f84b5793b2449f6c1625 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Wed, 1 Mar 2023 10:57:46 -0800 Subject: [PATCH 1/2] Move `Option::as_slice` to an always-sound implementation This approach depends on CSE to not have any branches or selects when the guessed offset is correct -- which it always will be right now -- but to also be *sound* (just less efficient) if the layout algorithms change such that the guess is incorrect. --- library/core/src/option.rs | 116 +++++++++++++++++++++---------- tests/codegen/option-as-slice.rs | 18 +++-- 2 files changed, 92 insertions(+), 42 deletions(-) diff --git a/library/core/src/option.rs b/library/core/src/option.rs index 994c08d1fb50d..c480b6ca4af90 100644 --- a/library/core/src/option.rs +++ b/library/core/src/option.rs @@ -735,22 +735,43 @@ impl Option { } } - const fn get_some_offset() -> isize { - if mem::size_of::>() == mem::size_of::() { - // niche optimization means the `T` is always stored at the same position as the Option. - 0 + /// This is a guess at how many bytes into the option the payload can be found. + /// + /// For niche-optimized types it's correct because it's pigeon-holed to only + /// one possible place. For other types, it's usually correct today, but + /// tweaks to the layout algorithm (particularly expansions of + /// `-Z randomize-layout`) might make it incorrect at any point. + /// + /// It's guaranteed to be a multiple of alignment (so will always give a + /// correctly-aligned location) and to be within the allocated object, so + /// is valid to use with `offset` and to use for a zero-sized read. + const SOME_BYTE_OFFSET_GUESS: isize = { + let some_uninit = Some(mem::MaybeUninit::::uninit()); + let payload_ref = some_uninit.as_ref().unwrap(); + // SAFETY: `as_ref` gives an address inside the existing `Option`, + // so both pointers are derived from the same thing and the result + // cannot overflow an `isize`. + let offset = unsafe { <*const _>::byte_offset_from(payload_ref, &some_uninit) }; + + // The offset is into the object, so it's guaranteed to be non-negative. + assert!(offset >= 0); + + // The payload and the overall option are aligned, + // so the offset will be a multiple of the alignment too. + assert!((offset as usize) % mem::align_of::() == 0); + + let max_offset = mem::size_of::() - mem::size_of::(); + if offset as usize <= max_offset { + // The offset is at least inside the object, so let's try it. + offset } else { - assert!(mem::size_of::>() == mem::size_of::>>()); - let some_uninit = Some(mem::MaybeUninit::::uninit()); - // SAFETY: This gets the byte offset of the `Some(_)` value following the fact that - // niche optimization is not active, and thus Option and Option> share - // the same layout. - unsafe { - (some_uninit.as_ref().unwrap() as *const mem::MaybeUninit) - .byte_offset_from(&some_uninit as *const Option>) - } + // The offset guess is definitely wrong, so use the address + // of the original option since we have it already. + // This also correctly handles the case of layout-optimized enums + // where `max_offset == 0` and thus this is the only possibility. + 0 } - } + }; /// Returns a slice of the contained value, if any. If this is `None`, an /// empty slice is returned. This can be useful to have a single type of @@ -784,18 +805,28 @@ impl Option { #[must_use] #[unstable(feature = "option_as_slice", issue = "108545")] pub fn as_slice(&self) -> &[T] { - // SAFETY: This is sound as long as `get_some_offset` returns the - // correct offset. Though in the `None` case, the slice may be located - // at a pointer pointing into padding, the fact that the slice is - // empty, and the padding is at a properly aligned position for a - // value of that type makes it sound. - unsafe { - slice::from_raw_parts( - (self as *const Option).wrapping_byte_offset(Self::get_some_offset()) - as *const T, - self.is_some() as usize, - ) - } + let payload_ptr: *const T = + // The goal here is that both arms here are calculating exactly + // the same pointer, and thus it'll be folded away when the guessed + // offset is correct, but if the guess is wrong for some reason + // it'll at least still be sound, just no longer optimal. + if let Some(payload) = self { + payload + } else { + let self_ptr: *const Self = self; + // SAFETY: `SOME_BYTE_OFFSET_GUESS` guarantees that its value is + // such that this will be in-bounds of the object. + unsafe { self_ptr.byte_offset(Self::SOME_BYTE_OFFSET_GUESS).cast() } + }; + let len = usize::from(self.is_some()); + + // SAFETY: When the `Option` is `Some`, we're using the actual pointer + // to the payload, with a length of 1, so this is equivalent to + // `slice::from_ref`, and thus is safe. + // When the `Option` is `None`, the length used is 0, so to be safe it + // just needs to be aligned, which it is because `&self` is aligned and + // the offset used is a multiple of alignment. + unsafe { slice::from_raw_parts(payload_ptr, len) } } /// Returns a mutable slice of the contained value, if any. If this is @@ -840,17 +871,28 @@ impl Option { #[must_use] #[unstable(feature = "option_as_slice", issue = "108545")] pub fn as_mut_slice(&mut self) -> &mut [T] { - // SAFETY: This is sound as long as `get_some_offset` returns the - // correct offset. Though in the `None` case, the slice may be located - // at a pointer pointing into padding, the fact that the slice is - // empty, and the padding is at a properly aligned position for a - // value of that type makes it sound. - unsafe { - slice::from_raw_parts_mut( - (self as *mut Option).wrapping_byte_offset(Self::get_some_offset()) as *mut T, - self.is_some() as usize, - ) - } + let payload_ptr: *mut T = + // The goal here is that both arms here are calculating exactly + // the same pointer, and thus it'll be folded away when the guessed + // offset is correct, but if the guess is wrong for some reason + // it'll at least still be sound, just no longer optimal. + if let Some(payload) = self { + payload + } else { + let self_ptr: *mut Self = self; + // SAFETY: `SOME_BYTE_OFFSET_GUESS` guarantees that its value is + // such that this will be in-bounds of the object. + unsafe { self_ptr.byte_offset(Self::SOME_BYTE_OFFSET_GUESS).cast() } + }; + let len = usize::from(self.is_some()); + + // SAFETY: When the `Option` is `Some`, we're using the actual pointer + // to the payload, with a length of 1, so this is equivalent to + // `slice::from_mut`, and thus is safe. + // When the `Option` is `None`, the length used is 0, so to be safe it + // just needs to be aligned, which it is because `&self` is aligned and + // the offset used is a multiple of alignment. + unsafe { slice::from_raw_parts_mut(payload_ptr, len) } } ///////////////////////////////////////////////////////////////////////// diff --git a/tests/codegen/option-as-slice.rs b/tests/codegen/option-as-slice.rs index d5077dbf6ccd9..d735d55837408 100644 --- a/tests/codegen/option-as-slice.rs +++ b/tests/codegen/option-as-slice.rs @@ -1,4 +1,4 @@ -// compile-flags: -O +// compile-flags: -O -Z randomize-layout=no // only-x86_64 #![crate_type = "lib"] @@ -12,17 +12,25 @@ use core::option::Option; // CHECK-LABEL: @u64_opt_as_slice #[no_mangle] pub fn u64_opt_as_slice(o: &Option) -> &[u64] { - // CHECK: start: // CHECK-NOT: select - // CHECK: ret + // CHECK-NOT: br + // CHECK-NOT: switch + // CHECK-NOT: icmp o.as_slice() } // CHECK-LABEL: @nonzero_u64_opt_as_slice #[no_mangle] pub fn nonzero_u64_opt_as_slice(o: &Option) -> &[NonZeroU64] { - // CHECK: start: // CHECK-NOT: select - // CHECK: ret + // CHECK-NOT: br + // CHECK-NOT: switch + // CHECK-NOT: icmp + // CHECK: %[[NZ:.+]] = icmp ne i64 %{{.+}}, 0 + // CHECK-NEXT: zext i1 %[[NZ]] to i64 + // CHECK-NOT: select + // CHECK-NOT: br + // CHECK-NOT: switch + // CHECK-NOT: icmp o.as_slice() } From e97505704e2043df46c99c3cbd0dd1ce3938c123 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Sun, 12 Mar 2023 16:30:51 -0700 Subject: [PATCH 2/2] Clarify the text of some comments --- library/core/src/option.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/library/core/src/option.rs b/library/core/src/option.rs index c480b6ca4af90..0f2475a8bdea6 100644 --- a/library/core/src/option.rs +++ b/library/core/src/option.rs @@ -745,6 +745,9 @@ impl Option { /// It's guaranteed to be a multiple of alignment (so will always give a /// correctly-aligned location) and to be within the allocated object, so /// is valid to use with `offset` and to use for a zero-sized read. + /// + /// FIXME: This is a horrible hack, but allows a nice optimization. It should + /// be replaced with `offset_of!` once that works on enum variants. const SOME_BYTE_OFFSET_GUESS: isize = { let some_uninit = Some(mem::MaybeUninit::::uninit()); let payload_ref = some_uninit.as_ref().unwrap(); @@ -762,7 +765,8 @@ impl Option { let max_offset = mem::size_of::() - mem::size_of::(); if offset as usize <= max_offset { - // The offset is at least inside the object, so let's try it. + // There's enough space after this offset for a `T` to exist without + // overflowing the bounds of the object, so let's try it. offset } else { // The offset guess is definitely wrong, so use the address