diff --git a/compiler/rustc_hir/src/lang_items.rs b/compiler/rustc_hir/src/lang_items.rs index 72ff317d45d6e..0863d65d8f9c8 100644 --- a/compiler/rustc_hir/src/lang_items.rs +++ b/compiler/rustc_hir/src/lang_items.rs @@ -301,6 +301,7 @@ language_item_table! { Context, sym::Context, context, Target::Struct, GenericRequirement::None; FuturePoll, sym::poll, future_poll_fn, Target::Method(MethodKind::Trait { body: false }), GenericRequirement::None; + Option, sym::Option, option_type, Target::Enum, GenericRequirement::None; OptionSome, sym::Some, option_some_variant, Target::Variant, GenericRequirement::None; OptionNone, sym::None, option_none_variant, Target::Variant, GenericRequirement::None; diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index 8c364a4f3b2b8..2f9fc7adb5212 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -222,6 +222,21 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) { ], tcx.mk_ptr(ty::TypeAndMut { ty: param(0), mutbl: hir::Mutability::Not }), ), + sym::option_payload_ptr => { + let option_def_id = tcx.require_lang_item(hir::LangItem::Option, None); + let p0 = param(0); + ( + 1, + vec![tcx.mk_ptr(ty::TypeAndMut { + ty: tcx.mk_adt( + tcx.adt_def(option_def_id), + tcx.mk_substs_from_iter([ty::GenericArg::from(p0)].into_iter()), + ), + mutbl: hir::Mutability::Not, + })], + tcx.mk_ptr(ty::TypeAndMut { ty: p0, mutbl: hir::Mutability::Not }), + ) + } sym::ptr_mask => ( 1, vec![ diff --git a/compiler/rustc_mir_transform/src/lower_intrinsics.rs b/compiler/rustc_mir_transform/src/lower_intrinsics.rs index 5d7382305ae14..46eab1184bdad 100644 --- a/compiler/rustc_mir_transform/src/lower_intrinsics.rs +++ b/compiler/rustc_mir_transform/src/lower_intrinsics.rs @@ -6,6 +6,7 @@ use rustc_middle::ty::subst::SubstsRef; use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_span::symbol::{sym, Symbol}; use rustc_span::Span; +use rustc_target::abi::VariantIdx; pub struct LowerIntrinsics; @@ -191,6 +192,35 @@ impl<'tcx> MirPass<'tcx> for LowerIntrinsics { terminator.kind = TerminatorKind::Goto { target }; } } + sym::option_payload_ptr => { + if let (Some(target), Some(arg)) = (*target, args[0].place()) { + let ty::RawPtr(ty::TypeAndMut { ty: dest_ty, .. }) = + destination.ty(local_decls, tcx).ty.kind() + else { bug!(); }; + + block.statements.push(Statement { + source_info: terminator.source_info, + kind: StatementKind::Assign(Box::new(( + *destination, + Rvalue::AddressOf( + Mutability::Not, + arg.project_deeper( + &[ + PlaceElem::Deref, + PlaceElem::Downcast( + Some(sym::Some), + VariantIdx::from_u32(1), + ), + PlaceElem::Field(Field::from_u32(0), *dest_ty), + ], + tcx, + ), + ), + ))), + }); + terminator.kind = TerminatorKind::Goto { target }; + } + } _ if intrinsic_name.as_str().starts_with("simd_shuffle") => { validate_simd_shuffle(tcx, args, terminator.source_info.span); } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 0154c719ef608..139fb48e90e06 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1043,6 +1043,7 @@ symbols! { optin_builtin_traits, option, option_env, + option_payload_ptr, options, or, or_patterns, diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index ee8846675ce25..7482b8b0862e2 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -2214,6 +2214,12 @@ extern "rust-intrinsic" { where G: FnOnce, F: FnOnce; + + #[cfg(not(bootstrap))] + /// This method creates a pointer to any `Some` value. If the argument is + /// `None`, an invalid within-bounds pointer (that is still acceptable for + /// constructing an empty slice) is returned. + pub fn option_payload_ptr(arg: *const Option) -> *const T; } // Some functions are defined here because they accidentally got made diff --git a/library/core/src/option.rs b/library/core/src/option.rs index 0f2475a8bdea6..cba597e66aa13 100644 --- a/library/core/src/option.rs +++ b/library/core/src/option.rs @@ -559,6 +559,7 @@ use crate::{ /// The `Option` type. See [the module level documentation](self) for more. #[derive(Copy, PartialOrd, Eq, Ord, Debug, Hash)] #[rustc_diagnostic_item = "Option"] +#[cfg_attr(not(bootstrap), lang = "Option")] #[stable(feature = "rust1", since = "1.0.0")] pub enum Option { /// No value. @@ -735,48 +736,6 @@ impl Option { } } - /// This is a guess at how many bytes into the option the payload can be found. - /// - /// For niche-optimized types it's correct because it's pigeon-holed to only - /// one possible place. For other types, it's usually correct today, but - /// tweaks to the layout algorithm (particularly expansions of - /// `-Z randomize-layout`) might make it incorrect at any point. - /// - /// It's guaranteed to be a multiple of alignment (so will always give a - /// correctly-aligned location) and to be within the allocated object, so - /// is valid to use with `offset` and to use for a zero-sized read. - /// - /// FIXME: This is a horrible hack, but allows a nice optimization. It should - /// be replaced with `offset_of!` once that works on enum variants. - const SOME_BYTE_OFFSET_GUESS: isize = { - let some_uninit = Some(mem::MaybeUninit::::uninit()); - let payload_ref = some_uninit.as_ref().unwrap(); - // SAFETY: `as_ref` gives an address inside the existing `Option`, - // so both pointers are derived from the same thing and the result - // cannot overflow an `isize`. - let offset = unsafe { <*const _>::byte_offset_from(payload_ref, &some_uninit) }; - - // The offset is into the object, so it's guaranteed to be non-negative. - assert!(offset >= 0); - - // The payload and the overall option are aligned, - // so the offset will be a multiple of the alignment too. - assert!((offset as usize) % mem::align_of::() == 0); - - let max_offset = mem::size_of::() - mem::size_of::(); - if offset as usize <= max_offset { - // There's enough space after this offset for a `T` to exist without - // overflowing the bounds of the object, so let's try it. - offset - } else { - // The offset guess is definitely wrong, so use the address - // of the original option since we have it already. - // This also correctly handles the case of layout-optimized enums - // where `max_offset == 0` and thus this is the only possibility. - 0 - } - }; - /// Returns a slice of the contained value, if any. If this is `None`, an /// empty slice is returned. This can be useful to have a single type of /// iterator over an `Option` or slice. @@ -809,28 +768,29 @@ impl Option { #[must_use] #[unstable(feature = "option_as_slice", issue = "108545")] pub fn as_slice(&self) -> &[T] { - let payload_ptr: *const T = - // The goal here is that both arms here are calculating exactly - // the same pointer, and thus it'll be folded away when the guessed - // offset is correct, but if the guess is wrong for some reason - // it'll at least still be sound, just no longer optimal. - if let Some(payload) = self { - payload - } else { - let self_ptr: *const Self = self; - // SAFETY: `SOME_BYTE_OFFSET_GUESS` guarantees that its value is - // such that this will be in-bounds of the object. - unsafe { self_ptr.byte_offset(Self::SOME_BYTE_OFFSET_GUESS).cast() } - }; - let len = usize::from(self.is_some()); + #[cfg(bootstrap)] + match self { + Some(value) => slice::from_ref(value), + None => &[], + } + #[cfg(not(bootstrap))] // SAFETY: When the `Option` is `Some`, we're using the actual pointer // to the payload, with a length of 1, so this is equivalent to // `slice::from_ref`, and thus is safe. // When the `Option` is `None`, the length used is 0, so to be safe it // just needs to be aligned, which it is because `&self` is aligned and // the offset used is a multiple of alignment. - unsafe { slice::from_raw_parts(payload_ptr, len) } + // + // In the new version, the intrinsic always returns a pointer to an + // in-bounds and correctly aligned position for a `T` (even if in the + // `None` case it's just padding). + unsafe { + slice::from_raw_parts( + crate::intrinsics::option_payload_ptr(crate::ptr::from_ref(self)), + usize::from(self.is_some()), + ) + } } /// Returns a mutable slice of the contained value, if any. If this is @@ -875,28 +835,32 @@ impl Option { #[must_use] #[unstable(feature = "option_as_slice", issue = "108545")] pub fn as_mut_slice(&mut self) -> &mut [T] { - let payload_ptr: *mut T = - // The goal here is that both arms here are calculating exactly - // the same pointer, and thus it'll be folded away when the guessed - // offset is correct, but if the guess is wrong for some reason - // it'll at least still be sound, just no longer optimal. - if let Some(payload) = self { - payload - } else { - let self_ptr: *mut Self = self; - // SAFETY: `SOME_BYTE_OFFSET_GUESS` guarantees that its value is - // such that this will be in-bounds of the object. - unsafe { self_ptr.byte_offset(Self::SOME_BYTE_OFFSET_GUESS).cast() } - }; - let len = usize::from(self.is_some()); + #[cfg(bootstrap)] + match self { + Some(value) => slice::from_mut(value), + None => &mut [], + } + #[cfg(not(bootstrap))] // SAFETY: When the `Option` is `Some`, we're using the actual pointer // to the payload, with a length of 1, so this is equivalent to // `slice::from_mut`, and thus is safe. // When the `Option` is `None`, the length used is 0, so to be safe it // just needs to be aligned, which it is because `&self` is aligned and // the offset used is a multiple of alignment. - unsafe { slice::from_raw_parts_mut(payload_ptr, len) } + // + // In the new version, the intrinsic creates a `*const T` from a + // mutable reference so it is safe to cast back to a mutable pointer + // here. As with `as_slice`, the intrinsic always returns a pointer to + // an in-bounds and correctly aligned position for a `T` (even if in + // the `None` case it's just padding). + unsafe { + slice::from_raw_parts_mut( + crate::intrinsics::option_payload_ptr(crate::ptr::from_mut(self).cast_const()) + .cast_mut(), + usize::from(self.is_some()), + ) + } } ///////////////////////////////////////////////////////////////////////// diff --git a/tests/mir-opt/lower_intrinsics.option_payload.LowerIntrinsics.diff b/tests/mir-opt/lower_intrinsics.option_payload.LowerIntrinsics.diff new file mode 100644 index 0000000000000..e535141e772f8 --- /dev/null +++ b/tests/mir-opt/lower_intrinsics.option_payload.LowerIntrinsics.diff @@ -0,0 +1,54 @@ +- // MIR for `option_payload` before LowerIntrinsics ++ // MIR for `option_payload` after LowerIntrinsics + + fn option_payload(_1: &Option, _2: &Option) -> () { + debug o => _1; // in scope 0 at $DIR/lower_intrinsics.rs:+0:23: +0:24 + debug p => _2; // in scope 0 at $DIR/lower_intrinsics.rs:+0:42: +0:43 + let mut _0: (); // return place in scope 0 at $DIR/lower_intrinsics.rs:+0:62: +0:62 + let mut _4: *const std::option::Option; // in scope 0 at $DIR/lower_intrinsics.rs:+2:55: +2:56 + let mut _6: *const std::option::Option; // in scope 0 at $DIR/lower_intrinsics.rs:+3:55: +3:56 + scope 1 { + let _3: *const usize; // in scope 1 at $DIR/lower_intrinsics.rs:+2:13: +2:15 + scope 2 { + debug _x => _3; // in scope 2 at $DIR/lower_intrinsics.rs:+2:13: +2:15 + let _5: *const std::string::String; // in scope 2 at $DIR/lower_intrinsics.rs:+3:13: +3:15 + scope 3 { + debug _y => _5; // in scope 3 at $DIR/lower_intrinsics.rs:+3:13: +3:15 + } + } + } + + bb0: { + StorageLive(_3); // scope 1 at $DIR/lower_intrinsics.rs:+2:13: +2:15 + StorageLive(_4); // scope 1 at $DIR/lower_intrinsics.rs:+2:55: +2:56 + _4 = &raw const (*_1); // scope 1 at $DIR/lower_intrinsics.rs:+2:55: +2:56 +- _3 = option_payload_ptr::(move _4) -> bb1; // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57 +- // mir::Constant +- // + span: $DIR/lower_intrinsics.rs:99:18: 99:54 +- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const Option) -> *const usize {option_payload_ptr::}, val: Value() } ++ _3 = &raw const (((*_4) as Some).0: usize); // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57 ++ goto -> bb1; // scope 1 at $DIR/lower_intrinsics.rs:+2:18: +2:57 + } + + bb1: { + StorageDead(_4); // scope 1 at $DIR/lower_intrinsics.rs:+2:56: +2:57 + StorageLive(_5); // scope 2 at $DIR/lower_intrinsics.rs:+3:13: +3:15 + StorageLive(_6); // scope 2 at $DIR/lower_intrinsics.rs:+3:55: +3:56 + _6 = &raw const (*_2); // scope 2 at $DIR/lower_intrinsics.rs:+3:55: +3:56 +- _5 = option_payload_ptr::(move _6) -> bb2; // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57 +- // mir::Constant +- // + span: $DIR/lower_intrinsics.rs:100:18: 100:54 +- // + literal: Const { ty: unsafe extern "rust-intrinsic" fn(*const Option) -> *const String {option_payload_ptr::}, val: Value() } ++ _5 = &raw const (((*_6) as Some).0: std::string::String); // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57 ++ goto -> bb2; // scope 2 at $DIR/lower_intrinsics.rs:+3:18: +3:57 + } + + bb2: { + StorageDead(_6); // scope 2 at $DIR/lower_intrinsics.rs:+3:56: +3:57 + _0 = const (); // scope 1 at $DIR/lower_intrinsics.rs:+1:5: +4:6 + StorageDead(_5); // scope 2 at $DIR/lower_intrinsics.rs:+4:5: +4:6 + StorageDead(_3); // scope 1 at $DIR/lower_intrinsics.rs:+4:5: +4:6 + return; // scope 0 at $DIR/lower_intrinsics.rs:+5:2: +5:2 + } + } + diff --git a/tests/mir-opt/lower_intrinsics.rs b/tests/mir-opt/lower_intrinsics.rs index a0a1df4e5ca86..f07e2816f4f41 100644 --- a/tests/mir-opt/lower_intrinsics.rs +++ b/tests/mir-opt/lower_intrinsics.rs @@ -91,3 +91,12 @@ pub fn read_via_copy_uninhabited(r: &Never) -> Never { } pub enum Never {} + +// EMIT_MIR lower_intrinsics.option_payload.LowerIntrinsics.diff +#[cfg(not(bootstrap))] +pub fn option_payload(o: &Option, p: &Option) { + unsafe { + let _x = core::intrinsics::option_payload_ptr(o); + let _y = core::intrinsics::option_payload_ptr(p); + } +}