From 061c786c77f631c757f4c7e9edfaa66bcf45914d Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Wed, 1 May 2024 13:16:26 +0200 Subject: [PATCH 01/16] RFC: #[derive(SmartPointer)] Co-authored-by: Lukas Wirth --- text/3621-derive-smart-pointer.md | 676 ++++++++++++++++++++++++++++++ 1 file changed, 676 insertions(+) create mode 100644 text/3621-derive-smart-pointer.md diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md new file mode 100644 index 00000000000..7b14ac71d74 --- /dev/null +++ b/text/3621-derive-smart-pointer.md @@ -0,0 +1,676 @@ +- Feature Name: `derive_smart_pointer` +- Start Date: 2024-05-01 +- RFC PR: [rust-lang/rfcs#3621](https://github.com/rust-lang/rfcs/pull/3621) +- Rust Issue: [rust-lang/rust#123430](https://github.com/rust-lang/rust/issues/123430) + +# Summary +[summary]: #summary + +Make it possible to define custom smart pointers that work with trait objects. +For now, it will only be possible to do this using a derive macro, as we do not +stabilize the underlying traits. + +This RFC builds on top of the [arbitrary self types v2 RFC][ast]. All +references to the `Receiver` trait are references to the version defined by +that RFC, which is different from the `Receiver` trait in nightly at the time +of writing. + +# Motivation +[motivation]: #motivation + +Currently, the standard library types `Rc` and `Arc` are special. It's not +possible for third-party libraries to define custom smart pointers that work +with trait objects. + +It is generally desireable to make std less special, but this particular RFC is +motived by use-cases in the Linux Kernel. In the Linux Kernel, we need +reference counted objects often, but we are not able to use the standard +library `Arc`. There are several reasons for this: + +1. The standard Rust `Arc` will call `abort` on overflow. This is not + acceptable in the kernel; instead we want to saturate the count when it hits + `isize::MAX`. This effectively leaks the `Arc`. +2. Using Rust atomics raises various issues with the memory model. We are using + the LKMM (Linux Kernel Memory Model) rather than the usual C++ model. This + means that all atomic operations should be implemented with an `asm!` block + or similar that matches what kernel C does, rather than an LLVM intrinsic + like we do today. + +The Linux Kernel also needs another custom smart pointer called `ListArc`, +which is needed to provide a safe API for the linked list that the kernel uses. +The kernel needs these linked lists to avoid allocating memory during critical +regions on spinlocks. + +For more detailed explanations of these use-cases, please refer to: + +* [Arc in the Linux Kernel](https://rust-for-linux.com/arc-in-the-linux-kernel). + * This document was discussed during [the 2024-03-06 meeting with t-lang](https://hackmd.io/OCz8EfzrRXeogXEDcOrL2w). +* The kernel's custom linked list: [Mailing list](https://lore.kernel.org/all/20240402-linked-list-v1-0-b1c59ba7ae3b@google.com/), [GitHub](https://github.com/Darksonn/linux/commits/b4/linked-list/). +* [Discussion on the memory model issue with t-opsem](https://rust-lang.zulipchat.com/#narrow/stream/136281-t-opsem/topic/.E2.9C.94.20Rust.20and.20the.20Linux.20Kernel.20Memory.20Model/near/422047516) + +# Guide-level explanation +[guide-level-explanation]: #guide-level-explanation + +The derive macro `SmartPointer` allows you to use custom smart pointers with +trait objects. This means that you will be able to coerce from +`SmartPointer` to `SmartPointer` when `MyStruct` +implements `MyTrait`. Additionally, the derive macro allows you to use `self: +SmartPointer` in traits without making them non-object-safe. + +It is not possible to use this feature without the derive macro, as we are not +stabilizing its expansion. + +## Coercions to trait objects + +By using the macro, the following example will compile: +```rust +#[derive(SmartPointer)] +struct MySmartPointer(Box); + +impl Deref for MySmartPointer { + type Target = T; + fn deref(&self) -> &T { + &self.0 + } +} + +trait MyTrait {} + +impl MyTrait for i32 {} + +fn main() { + let ptr: MySmartPointer = MySmartPointer(Box::new(4)); + + // This coercion would be an error without the derive. + let ptr: MySmartPointer = ptr; +} +``` +Without the `#[derive(SmartPointer)]` macro, this example would fail with the +following error: +``` +error[E0308]: mismatched types + --> src/main.rs:11:44 + | +11 | let ptr: MySmartPointer = ptr; + | --------------------------- ^^^ expected `MySmartPointer`, found `MySmartPointer` + | | + | expected due to this + | + = note: expected struct `MySmartPointer` + found struct `MySmartPointer` + = help: `i32` implements `MyTrait` so you could box the found value and coerce it to the trait object `Box`, you will have to change the expected type as well +``` + +## Object safety + +Consider the following trait: +```rust +trait MyTrait { + // Arbitrary self types is enough for this. + fn func(self: MySmartPointer); +} + +// But this requires #[derive(SmartPointer)]. +fn call_func(value: MySmartPointer) { + value.func(); +} +``` +You do not need `#[derive(SmartPointer)]` to declare this trait ([arbitrary +self types][ast] is enough), but the trait will not be object safe unless you +annotate `MySmartPointer` with `#[derive(SmartPointer)]`. If you don't, then +the use of `dyn MyTrait` triggers the following error: +``` +error[E0038]: the trait `MyTrait` cannot be made into an object + --> src/lib.rs:11:36 + | +8 | fn func(self: MySmartPointer); + | -------------------- help: consider changing method `func`'s `self` parameter to be `&self`: `&Self` +... +11 | fn call_func(value: MySmartPointer) { + | ^^^^^^^^^^^ `MyTrait` cannot be made into an object + | +note: for a trait to be "object safe" it needs to allow building a vtable to allow the call to be resolvable dynamically; for more information visit + --> src/lib.rs:8:19 + | +7 | trait MyTrait { + | ------- this trait cannot be made into an object... +8 | fn func(self: MySmartPointer); + | ^^^^^^^^^^^^^^^^^^^^ ...because method `func`'s `self` parameter cannot be dispatched on +``` +Note that using the `self: MySmartPointer` syntax requires that you +implement `Receiver` (or `Deref`), as the derive macro does not emit an +implementation of `Receiver`. + +## Requirements for using the macro + +Whenever a `self: MySmartPointer` method is called on a trait object, the +compiler will convert from `MySmartPointer` to +`MySmartPointer` using something similar to a transmute. Because of +this, there are strict requirements on the layout of `MySmartPointer`. It is +required that `MySmartPointer` is a struct, and that (other than one-aligned, +zero-sized fields) it must have exactly one field. The type must either be a +standard library pointer type (reference, raw pointer, NonNull, Box, Arc, etc.) +or another user-defined type also using this derive macro. +```rust +#[derive(SmartPointer)] +struct MySmartPointer { + ptr: Box, + _phantom: PhantomData, +} +``` + +### Multiple type parameters + +If the type has multiple type parameters, then you must explicitly specify +which one should be used for dynamic dispatch. For example: +```rust +#[derive(SmartPointer)] +struct MySmartPointer<#[pointee] T: ?Sized, U> { + ptr: Box, + _phantom: PhantomData, +} +``` +Specifying `#[pointee]` when the struct has only one type parameter is allowed, +but not required. + +## Example of a custom Rc +[custom-rc]: #example-of-a-custom-rc + +The macro makes it possible to implement custom smart pointers. For example, +you could implement your own `Rc` type like this: + +```rust +#[derive(SmartPointer)] +pub struct Rc { + inner: NonNull>, +} + +struct RcInner { + refcount: usize, + value: T, +} + +impl Deref for Rc { + type Target = T; + fn deref(&self) -> &T { + let ptr = self.inner.as_ptr(); + unsafe { &*ptr.value } + } +} + +impl Rc { + pub fn new(value: T) -> Self { + let inner = Box::new(ArcInner { + refcount: 1, + value, + }); + Self { + inner: NonNull::from(Box::leak(inner)), + } + } +} + +impl Clone for Rc { + fn clone(&self) -> Self { + unsafe { (*self.inner.as_ptr()).refcount += 1 }; + Self { inner: self.inner } + } +} + +impl Drop for Rc { + fn drop(&mut self) { + let ptr = self.inner.as_ptr(); + unsafe { (*ptr).refcount -= 1 }; + if unsafe { (*ptr).refcount } == 0 { + drop(unsafe { Box::from_raw(ptr) }); + } + } +} +``` +In this example, `#[derive(SmartPointer)]` makes it possible to use `Rc`. + + +# Reference-level explanation +[reference-level-explanation]: #reference-level-explanation + +The derive macro will expand into two trait implementations, +[`core::ops::CoerceUnsized`] to enable unsizing coercions and +[`core::ops::DispatchFromDyn`] for dynamic dispatch. This expansion will be +adapted in the future if the underlying mechanisms for unsizing coercions and +dynamically dispatched receivers changes. + +As mentioned in the [rationale][why-only-macro] section, this RFC only proposes +to stabilize the derive macro. The underlying traits used by its expansion will +remain unstable for now. + +## Input Requirements +[input-requirements]: #input-requirements + +The macro sets the following requirements on its input: + +1. The definition must be a struct. +2. The struct must have at least one type parameter. If multiple type + parameters are present, exactly one of them has to be annotated with the + `#[pointee]` derive helper attribute. +3. The struct must not be `#[repr(packed)]` or `#[repr(C)]`. +4. Other than one-aligned, zero-sized fields, the struct must have exactly one + field and that field’s type must be must implement `DispatchFromDyn` + where `F` is the type of `T`’s field type. + +(Adapted from the docs for [`DispatchFromDyn`].) + +Point 1 and 2 are verified syntactically by the derive macro, whereas 3 and 4 +are verified semantically by the compiler when checking the generated +[`DispatchFromDyn`] implementation as it does today. + +## Expansion + +The macro will expand to two implementations, one for +[`core::ops::CoerceUnsized`] and one for [`core::ops::DispatchFromDyn`]. This +is enough for a type to participe in unsizing coercions and dynamic dispatch. + +The derive macro will implement the traits for the type according to the +following procedure: + +- Copy all generic parameters and their bounds from the struct definition into + the impl. +- Add an additional type parameter `U` and give it a `?Sized` bound. +- Add an additional `Unsize` bound to the `#[pointee]` type parameter. +- The generic parameter of the traits being implemented will be `Self`, except + that the `#[pointee]` type parameter is replaced with `U`. + +Given the following example code: +```rust +#[derive(SmartPointer)] +struct MySmartPointer<'a, #[pointee] T: ?Sized, A>{ + ptr: &'a T + phantom: PhantomData +} +``` + +we'll get the following expansion: + +```rust +#[automatically_derived] +impl<'a, T, A, U> ::core::ops::CoerceUnsized> for MySmartPointer<'a, T, A> +where + T: ?Sized + ::core::marker::Unsize, + U: ?::core::marker::Sized + +#[automatically_derived] +impl<'a, T, A, U> ::core::ops::DispatchFromDyn> for MySmartPointer<'a, T, A> +where + T: ?Sized + ::core::marker::Unsize, + U: ?::core::marker::Sized +{} +``` + +## `Receiver` and `Deref` implementations + +The macro does not emit a [`Receiver`][ast] implementation. Types that do not +implement `Receiver` can still use `#[derive(SmartPointer)]`, but they can't be +used with dynamic dispatch directly. + +The raw pointer type would be an example of a type that (behaves like it) is +annotated with `#[derive(SmartPointer)]` without an implementation of +`Receiver`. In the case of raw pointers, you can coerce from `*const MyStruct` +to `*const dyn MyTrait`, but you must first convert them to a reference before +you can use them for dynamic dispatch. + +## Vtable requirements + +As seen in the `Rc` example, the macro needs to be usable even if the pointer +is `NonNull>` (as opposed to `NonNull`). + +# Drawbacks +[drawbacks]: #drawbacks + +- Stabilizing this macro limits how the underlying traits can be changed in the + future, since we cannot change them in ways that make it impossible to + implement the macro as-is. + +- Stabilizing this macro reduces the incentive to stabilize the underlying + traits, meaning that it may take significantly longer before we do so. This + RFC does not include support for coercing transparent containers like + [`Cell`], so hopefully that will be enough incentive to continue work on the + underlying traits. + +# Rationale and alternatives +[rationale-and-alternatives]: #rationale-and-alternatives + +## Why only stabilize a macro? +[why-only-macro]: #why-only-stabilize-a-macro + +This RFC proposes to stabilize the `#[derive(SmartPointer)]` macro without +stabilizing what it expands to. This effectively means that the macro is the +only way to use these features for custom types. The rationale for this is that +we currently don't know how to stabilize the traits, and that this is a serious +blocker for making progress on this issue. Stabilizing the macro will unblock +projects that wish to define custom smart pointers, and does not prevent +evolution of the underlying traits. + +See also [the section on prior art][prior-art], which discusses a previous +attempt to stabilize the underlying traits. + +## Receiver and Deref traits + +The vast majority of custom smart pointers will implement `Receiver` (often via +`Deref`, which results in a `Receiver` impl due to the blanket impl). So why +not also emit a `Receiver`/`Deref` impl in the output of the macro. One +advantage of doing so is that this may sufficiently limit the macro so that we +do not need to solve the pin soundness issue discussed in [the unresolved +questions section][unresolved-questions]. + +However, it turns out that there are quite a few different ways we might +implement `Deref`. For example, consider [the custom `Rc` example][custom-rc]: +```rust +#[derive(SmartPointer)] +pub struct Rc { + inner: NonNull>, +} + +struct RcInner { + refcount: usize, + value: T, +} + +impl Deref for Rc { + type Target = T; + fn deref(&self) -> &T { + let ptr = self.inner.as_ptr(); + unsafe { &*ptr.value } + } +} +``` +Making the macro general enough to generate `Deref` impls that are _that_ +complex would not be feasible. And it doesn't make sense to stabilize the macro +without support for the custom `Rc` case, as implementing a custom `Arc` in the +Linux Kernel is the primary motivation for this RFC. + +Note that having the macro generate a `Receiver` impl instead doesn't work +either, because that prevents the user from implementing `Deref` at all. (There +is a blanket impl of `Receiver` for all `Deref` types.) + +## Why not two derive macros? + +The derive macro generates two different trait implementations: + +- [`CoerceUnsized`] that allows conversions from `SmartPtr` to + `SmartPtr`. +- [`DispatchFromDyn`] that allows conversions from `SmartPtr` to + `SmartPtr`. + +It could be argued that these should be split into two separate derive macros. +We are not proposing this for a few reasons: + +- If there are two derive macros, then we have to support the case where you + only use one of them. There are use-cases for implementing [`CoerceUnsized`] + without [`DispatchFromDyn`], but you do this for cases where your type is not + a smart pointer, but rather a transparent container like [`Cell`]. It makes + coercions like `Cell>` to `Cell>` possible. + Supporting that is a significantly increased scope of the RFC, and the + authors believe that supporting transparent containers should be a separate + follow-up RFC. + +- Right now there are use cases for `CoerceUnsized` (transparent containers) + and `CoerceUnsized+DispatchFromDyn` (smart pointers), but there aren't any + real use-cases for having `DispatchFromDyn` alone. Because of that, one + possible future design of the underlying traits could be to have one trait + for smart pointers, and another one for transparent containers. Adding two + derive macros prevents us from changing the underlying traits to that design + in the future. + +- The authors believe that a convenience `#[derive(SmartPointer)]` macro will + continue to make sense, even once the underlying traits are stabilized. It is + significantly easier to use than the expansion. + +- If we want the macro to correspond one-to-one to the underlying traits, then + we would want to use the same names as the underlying traits. However, we + don't know what the traits will be called when we finally figure out how to + stabilize them. (One of the traits have already been renamed once!) + +Even raw-pointer-like types that do not implement `Receiver` still want to +implement `DispatchFromDyn`, since this allows you to use them as the field +type in other structs that use `#[derive(SmartPointer)]`. For example, the +custom `Rc` has a field of type `NonNull`, and this works since `NonNull` is +`DispatchFromDyn`. + +[`Cell`]: https://doc.rust-lang.org/stable/core/cell/struct.Cell.html + +## What about `#[pointee]`? + +This RFC currently proposes to mark the generic parameter used for dynamic +dispatch with `#[pointee]`. For convenience, the RFC proposes that this is only +needed when there are multiple generic parameters. + +There are potential use-cases for smart pointers with additional generic +parameters. Specifically, the `ListArc` type used by the linked lists currently +has an additional const generic parameter to allow you to use the same +refcounted value with multiple lists. People have argued that it would be +better to change this to a generic type instead of a const generic, so it would +be useful to keep the option of having multiple generic types on the struct. + +# Prior art +[prior-art]: #prior-art + +## Stabilizing subsets of features + +There are several prior examples of unstable features that have been blocked +from stabilization for various reasons, where we have been able to make +progress by reducing the scope and stabilizing a subset. + +- The most recent example of this is [the arbitrary self types RFC][ast], where + [it was proposed to reduce the scope][ast-scope] so that we do not block + progress on the feature. +- Another example of this is [the async fn in traits feature][rpit]. This was + stabilized even though it is not yet advisable to use it for traits in the + public API of crates, due to missing parts of the feature. + +There have already been [previous attempts to stabilize the underlying +traits][pre-rfc], and they did not make much progress. Therefore, this RFC +proposes to reduce ths scope and instead stabilize a derive macro. + +[ast-scope]: https://github.com/rust-lang/rfcs/pull/3519#discussion_r1492385549 +[rpit]: https://blog.rust-lang.org/2023/12/21/async-fn-rpit-in-traits.html + +## Macros whose output is unstable + +The Rust testing framework is considered unstable, and the only stable way to +interact with it is via the `#[test]` attribute macro. The macro's output uses +the unstable internals of the testing framework. This allows the testing +framework to be changed in the future. + +Note also that the `pin!` macro expands to something that uses an unstable +feature, though it does so for a different reason than +`#[derive(SmartPointer)]` and `#[test]`. + +# Unresolved questions +[unresolved-questions]: #unresolved-questions + +Unfortunately, the API proposed by this RFC is unsound. :( + +Basically, the issue is that if `MyStruct` is `Unpin`, then you can create a +`Pin>` safely, even though you can coerce that to +`Pin>` (and `dyn MyTrait` may be `!Unpin`). If +`SmartPointer` has a malicious implementation of `Deref`, then this can lead to +unsoundness. Since `Deref` is a safe trait, we cannot outlaw malicious +implementations of `Deref`. + +Intuitively, the way that `Deref` can be malicious is by not always derefing to +the same value. + +Some solution idea is outlined below, but the authors need your input on what +to do about this problem. + +## Unsafe macro + +The easiest solution is probably to just make the macro unsafe. We could do this +in a similar vein to [the unsafe attributes RFC][unsafe-attribute]. +```rust +// SAFETY: The Deref impl always returns the same value. +#[unsafe(derive(SmartPointer))] +pub struct Rc { + inner: NonNull>, +} +``` +This way, if you coerce an `Pin>` to `Pin>` and +this is unsound due to a weird `Deref` impl, then it's your fault because you +unsafely asserted that you have a reasonable `Deref` implementation. + +That said, there are some possible forwards compatibility hazards with this +solution. We might start out with an unsafe derive macro, and then in the future +we might decide to instead use the `StableDeref` solution mentioned below. Then, +`#[unsafe(derive(SmartPointer))]` would have to generate an implementation of +the `StableDeref` trait too, because otherwise `Pin>` would lose +the ability to be unsize coerced, which would be a breaking change. This means +that `#[unsafe(derive(SmartPointer))]` and `#[derive(SmartPointer)]` could end +up expanding to _different_ things. + +[unsafe-attribute]: https://github.com/rust-lang/rfcs/pull/3325 + +## StableDeref + +We are quite limited in how we can work around this issue due to backwards +compatibility concerns with `Pin`. We cannot prevent you from using `Pin::new` +with structs that have malicious `Deref` implementations. However, one possible +place we can intervene is the coercion from `Pin>` to +`Pin>`. If you need to use unsafe before those +coerceions are possible, then the problem is solved. For example, we might +introduce a `StableDeref` trait: +```rs +/// # Safety +/// +/// Any two calls to `deref` must return the same value at the same address unless +/// `self` has been modified in the meantime. Moves and unsizing coercions of `self` +/// are not considered modifications. +/// +/// Here, "same value" means that if `deref` returns a trait object, then the actual +/// type behind that trait object must not change. Additionally, when you unsize +/// coerce from `Self` to `Unsized`, then if you call `deref` on `Unsized` and get a +/// trait object, then the underlying type of that trait object must be `::Target`. +/// +/// Analogous requirements apply to other unsized types. E.g., if `deref` returns +/// `[T]`, then the length must not change. (The underlying type must not change +/// from `[T; N]` to `[T; M]`.) +/// +/// If this type implements `DerefMut`, then the same restrictions apply to calls +/// to `deref_mut`. +unsafe trait StableDeref: Deref { } +``` +Then we make it so that you can only coerce pinned pointers when they implement +`StableDeref`. We can do that by modifying its implementation of +[`CoerceUnsized`] to this: +```rs +impl CoerceUnsized> for Pin +where + T: CoerceUnsized, + T: StableDeref, + U: StableDeref, +{} +``` +This way, the user must implement the unsafe trait before they can coerce +pinned versions of the pointer. Since the trait is unsafe, it is not our fault +if that leads to unsoundness. + +This should not be a breaking change as long as we implement `StableDeref` for +all standard library types that can be coerced when wrapped in `Pin`. + +The proposed trait is called `StableDeref` because the way that `Deref` +implementations can be malicious is essentially by having +`SmartPointer` and `SmartPointer` deref to two different +values. There may be some opportunity to reuse the `DerefPure` trait from [the +deref patterns feature][deref-patterns], and there is also some prior art with +the [`stable_deref_trait`](https://docs.rs/stable_deref_trait) crate, + +[deref-patterns]: https://github.com/rust-lang/rust/issues/87121/ + +## Don't allow Pin coercions with custom smart pointers + +This solution is essentially the `StableDeref` solution except that we don't +stabilize `StableDeref`. This way, there's no stable way to use +`#[derive(SmartPointer)]` types with `Pin` coercions. + +This solutions isn't a problem for the Linux Kernel right now because our custom +`Arc` happens to be implicitly pinned for convenience reasons, but if it wasn't, +then we would need coercions of `Pin`-wrapped values. + +## Negative trait bounds? + +There are also various solutions that involve negative trait bounds. For +example, you might instead modify `CoerceUnsized` like this: +```rust +// Permit going from `Pin` to` Pin` +impl CoerceUnsized> for Pin

+where + P: CoerceUnsized, + P: Deref, + U: Deref, +{ } + +// Permit going from `Pin` to `Pin` +impl CoerceUnsized> for Pin

+where + P: CoerceUnsized, + P: Deref, + U: Deref, +{ } + +// Permit going from `Pin` to `Pin` +impl CoerceUnsized> for Pin

+where + P: CoerceUnsized, + P: core::ops::Deref, + U: core::ops::Deref, +{ } +``` +This RFC does not propose it because it is a breaking change and the +`PinCoerceUnsized` or `DerefPure` solutions are simpler. This solution is +discussed in more details in [the pre-RFC for stabilizing the underlying +traits][pre-rfc]. + # Prior art [prior-art]: #prior-art @@ -574,156 +752,7 @@ feature, though it does so for a different reason than # Unresolved questions [unresolved-questions]: #unresolved-questions -Unfortunately, the API proposed by this RFC is unsound. :( - -Basically, the issue is that if `MyStruct` is `Unpin`, then you can create a -`Pin>` safely, even though you can coerce that to -`Pin>` (and `dyn MyTrait` may be `!Unpin`). If -`SmartPointer` has a malicious implementation of `Deref`, then this can lead to -unsoundness. Since `Deref` is a safe trait, we cannot outlaw malicious -implementations of `Deref`. - -Intuitively, the way that `Deref` can be malicious is by not always derefing to -the same value. - -Some solution idea is outlined below, but the authors need your input on what -to do about this problem. - -## Unsafe macro - -The easiest solution is probably to just make the macro unsafe. We could do this -in a similar vein to [the unsafe attributes RFC][unsafe-attribute]. -```rust -// SAFETY: The Deref impl always returns the same value. -#[unsafe(derive(SmartPointer))] -pub struct Rc { - inner: NonNull>, -} -``` -This way, if you coerce an `Pin>` to `Pin>` and -this is unsound due to a weird `Deref` impl, then it's your fault because you -unsafely asserted that you have a reasonable `Deref` implementation. - -That said, there are some possible forwards compatibility hazards with this -solution. We might start out with an unsafe derive macro, and then in the future -we might decide to instead use the `StableDeref` solution mentioned below. Then, -`#[unsafe(derive(SmartPointer))]` would have to generate an implementation of -the `StableDeref` trait too, because otherwise `Pin>` would lose -the ability to be unsize coerced, which would be a breaking change. This means -that `#[unsafe(derive(SmartPointer))]` and `#[derive(SmartPointer)]` could end -up expanding to _different_ things. - -[unsafe-attribute]: https://github.com/rust-lang/rfcs/pull/3325 - -## StableDeref - -We are quite limited in how we can work around this issue due to backwards -compatibility concerns with `Pin`. We cannot prevent you from using `Pin::new` -with structs that have malicious `Deref` implementations. However, one possible -place we can intervene is the coercion from `Pin>` to -`Pin>`. If you need to use unsafe before those -coercions are possible, then the problem is solved. For example, we might -introduce a `StableDeref` trait: -```rs -/// # Safety -/// -/// Any two calls to `deref` must return the same value at the same address unless -/// `self` has been modified in the meantime. Moves and unsizing coercions of `self` -/// are not considered modifications. -/// -/// Here, "same value" means that if `deref` returns a trait object, then the actual -/// type behind that trait object must not change. Additionally, when you unsize -/// coerce from `Self` to `Unsized`, then if you call `deref` on `Unsized` and get a -/// trait object, then the underlying type of that trait object must be `::Target`. -/// -/// Analogous requirements apply to other unsized types. E.g., if `deref` returns -/// `[T]`, then the length must not change. (The underlying type must not change -/// from `[T; N]` to `[T; M]`.) -/// -/// If this type implements `DerefMut`, then the same restrictions apply to calls -/// to `deref_mut`. -unsafe trait StableDeref: Deref { } -``` -Then we make it so that you can only coerce pinned pointers when they implement -`StableDeref`. We can do that by modifying its trait implementations to this: -```rs -impl CoerceUnsized> for Pin -where - T: CoerceUnsized, - T: StableDeref, - U: StableDeref, -{} - -impl DispatchFromDyn> for Pin -where - T: CoerceUnsized, - T: StableDeref, - U: StableDeref, -{} -``` -This way, the user must implement the unsafe trait before they can coerce -pinned versions of the pointer. Since the trait is unsafe, it is not our fault -if that leads to unsoundness. - -This should not be a breaking change as long as we implement `StableDeref` for -all standard library types that can be coerced when wrapped in `Pin`. - -The proposed trait is called `StableDeref` because the way that `Deref` -implementations can be malicious is essentially by having -`SmartPointer` and `SmartPointer` deref to two different -values. There may be some opportunity to reuse the `DerefPure` trait from [the -deref patterns feature][deref-patterns], and there is also some prior art with -the [`stable_deref_trait`](https://docs.rs/stable_deref_trait) crate, - -[deref-patterns]: https://github.com/rust-lang/rust/issues/87121/ - -## Don't allow Pin coercions with custom smart pointers - -This solution is essentially the `StableDeref` solution except that we don't -stabilize `StableDeref`. This way, there's no stable way to use -`#[derive(SmartPointer)]` types with `Pin` coercions. - -This solutions isn't a problem for the Linux Kernel right now because our custom -`Arc` happens to be implicitly pinned for convenience reasons, but if it wasn't, -then we would need coercions of `Pin`-wrapped values. - -## Negative trait bounds? - -There are also various solutions that involve negative trait bounds. For -example, you might instead modify `CoerceUnsized` like this: -```rust -// Permit going from `Pin` to` Pin` -impl CoerceUnsized> for Pin

-where - P: CoerceUnsized, - P: Deref, - U: Deref, -{ } - -// Permit going from `Pin` to `Pin` -impl CoerceUnsized> for Pin

-where - P: CoerceUnsized, - P: core::ops::Deref, - U: core::ops::Deref, -{ } -``` -This way, you can only coerce pinned pointers when this doesn't change whether -the target type is `Unpin`. - -It would solve the unsoundness, but it does have the disadvantage of having no -path to making these pinned coercions possible for smart pointers that don't -have malicious `Deref` implementations. Another downside of this solution is -that it's a breaking change, because it disallows these pinned coercions even -with the standard library pointers, which allows them today. - -There are also other variations on the negative trait bounds, which become -different implementations of the "Don't allow Pin coercions with custom smart -pointers" solution. - -This solution is discussed in more details in [the pre-RFC for stabilizing the -underlying traits][pre-rfc]. +No unresolved questions. # Future possibilities [future-possibilities]: #future-possibilities From afe5534cb2f8cedda638959f9affd346a33c431a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 24 May 2024 17:33:28 +0200 Subject: [PATCH 13/16] Add unresolved concern about naming --- text/3621-derive-smart-pointer.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index fdf35201774..f960b55986f 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -752,7 +752,15 @@ feature, though it does so for a different reason than # Unresolved questions [unresolved-questions]: #unresolved-questions -No unresolved questions. +Bikeshedding over the name remains. + +The name `#[derive(SmartPointer)]` leaves some things to be desired, as smart +pointers would generally want to implement some traits that this macro does +*not* expand to. Most prominently, any smart pointer should implement `Deref` or +`Receiver`. Really, the macro just says that this pointer works with unsizing +and dynamic dispatch. + +We will settle on the final name prior to stabilization. # Future possibilities [future-possibilities]: #future-possibilities From 5cfbde51824631231f331540eee853d70bd3cf5b Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 24 May 2024 17:48:35 +0200 Subject: [PATCH 14/16] Add missing link --- text/3621-derive-smart-pointer.md | 1 + 1 file changed, 1 insertion(+) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index f960b55986f..b8fb1a20f00 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -807,3 +807,4 @@ like `#[pointee]` that must be annotated on the field in question. [`core::ops::CoerceUnsized`]: https://doc.rust-lang.org/stable/core/ops/trait.CoerceUnsized.html [`DispatchFromDyn`]: https://doc.rust-lang.org/stable/core/ops/trait.DispatchFromDyn.html [`core::ops::DispatchFromDyn`]: https://doc.rust-lang.org/stable/core/ops/trait.DispatchFromDyn.html +[unsafe-attribute]: https://github.com/rust-lang/rfcs/pull/3325 From 4fcfcca48fffe991548ca1d8c8aa88ed799c7299 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 25 Jun 2024 16:21:27 +0200 Subject: [PATCH 15/16] Require `#[repr(transparent)]` --- text/3621-derive-smart-pointer.md | 48 +++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index b8fb1a20f00..2ebc6d86a45 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -265,9 +265,8 @@ The macro sets the following requirements on its input: 2. The struct must have at least one type parameter. If multiple type parameters are present, exactly one of them has to be annotated with the `#[pointee]` derive helper attribute. -3. The struct must not be `#[repr(packed)]` or `#[repr(C)]`. -4. Other than one-aligned, zero-sized fields, the struct must have exactly one - field. +3. The struct must be `#[repr(transparent)]`. +4. The struct must have at least one field. 5. Assume that `T` is a type that can be unsized to `U`, and let `FT` and `FU` be the type of the struct's field when the pointee is equal to `T` and `U` respectively. If the struct's trait bounds are satisfied for both `T` and @@ -276,9 +275,10 @@ The macro sets the following requirements on its input: (Adapted from the docs for [`DispatchFromDyn`].) -Point 1 and 2 are verified syntactically by the derive macro, whereas 3, 4 and 5 +Point 1 and 2 are verified syntactically by the derive macro. Points 4 and 5 are verified semantically by the compiler when checking the generated -[`DispatchFromDyn`] implementation as it does today. +[`DispatchFromDyn`] implementation as it does today. Point 3 is verified by +introducing a new unstable helper trait `AssertReprTransparent`. The `#[pointee]` attribute may also be written as `#[smart_pointer::pointee]`. @@ -331,6 +331,16 @@ where T: ::core::marker::Unsize, {} ``` +The macro will also generate an implementation of the new +`AssertReprTransparent` helper trait. The implementation will have the same +trait bounds as the struct definition. +```rust +#[automatically_derived] +impl<'a, T, A> ::core::ops::AssertReprTransparent for MySmartPointer<'a, T, A> +where + T: ?Sized + SomeTrait, +{} +``` ## `Receiver` and `Deref` implementations @@ -390,6 +400,13 @@ Although this RFC proposes to add the `PinCoerceUnsized` trait to ensure that unsizing coercions of pinned pointers cannot be used to cause unsoundness, the RFC does not propose to stabilize the trait. +## `AssertReprTransparent` + +To verify the requirement that the struct is `#[repr(transparent)]`, we +introduce a new unstable marker trait called `AssertReprTransparent`. This trait +will be a lang item, and the compiler will emit an error if the trait is used +with a type that is not `#[repr(transparent)]`. + # Drawbacks [drawbacks]: #drawbacks @@ -715,6 +732,27 @@ This RFC does not propose it because it is a breaking change and the discussed in more details in [the pre-RFC for stabilizing the underlying traits][pre-rfc]. +## `AssertReprTransparent` + +When you implement the [`DispatchFromDyn`] trait, the compiler enforces various +things about the type to verify that it makes sense to implement +`DispatchFromDyn`. One of the things that the compiler verifies is that the +struct must not be `#[repr(packed)]` or `#[repr(C)]`. + +However, because `#[derive(SmartPointer)]` has more narrow use-case than +`DispatchFromDyn`, we would like to restrict it further so that the macro only +works with `#[repr(transparent)]` types. To do this, we use a new trait called +`AssertReprTransparent` that verifies that the struct is `#[repr(transparent)]` +like how `DispatchFromDyn` verifies that the struct must not be +`#[repr(packed)]` or `#[repr(C)]`. + +We cannot change the logic in `DispatchFromDyn` because some existing standard +library types cannot be `#[repr(transparent)]`. For example, this includes +`Box` due to its allocator field. + +This requirement may be relaxed in the future, in which case +`AssertReprTransparent` can be removed again. + # Prior art [prior-art]: #prior-art From 85ef575ed226a54c49717d7eb932a6800b65100c Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Tue, 9 Jul 2024 14:01:19 +0200 Subject: [PATCH 16/16] Remove details about how repr(transparent) constraint is implemented And add #[repr(transparent)] to the examples. --- text/3621-derive-smart-pointer.md | 65 ++++++++----------------------- 1 file changed, 17 insertions(+), 48 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index 2ebc6d86a45..d6a28ff1d63 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -65,6 +65,7 @@ stabilizing its expansion. By using the macro, the following example will compile: ```rust #[derive(SmartPointer)] +#[repr(transparent)] struct MySmartPointer(Box); impl Deref for MySmartPointer { @@ -147,12 +148,13 @@ Whenever a `self: MySmartPointer` method is called on a trait object, the compiler will convert from `MySmartPointer` to `MySmartPointer` using something similar to a transmute. Because of this, there are strict requirements on the layout of `MySmartPointer`. It is -required that `MySmartPointer` is a struct, and that (other than one-aligned, -zero-sized fields) it must have exactly one field. The type must either be a -standard library pointer type (reference, raw pointer, NonNull, Box, Arc, etc.) -or another user-defined type also using this derive macro. +required that `MySmartPointer` is a `#[repr(transparent)]` struct, and the type +of its non-zero-sized field must either be a standard library pointer type +(reference, raw pointer, NonNull, Box, Arc, etc.) or another user-defined type +also using this derive macro. ```rust #[derive(SmartPointer)] +#[repr(transparent)] struct MySmartPointer { ptr: Box, _phantom: PhantomData, @@ -165,6 +167,7 @@ If the type has multiple type parameters, then you must explicitly specify which one should be used for dynamic dispatch. For example: ```rust #[derive(SmartPointer)] +#[repr(transparent)] struct MySmartPointer<#[pointee] T: ?Sized, U> { ptr: Box, _phantom: PhantomData, @@ -194,6 +197,7 @@ you could implement your own `Rc` type like this: ```rust #[derive(SmartPointer)] +#[repr(transparent)] pub struct Rc { inner: NonNull>, } @@ -275,10 +279,9 @@ The macro sets the following requirements on its input: (Adapted from the docs for [`DispatchFromDyn`].) -Point 1 and 2 are verified syntactically by the derive macro. Points 4 and 5 +Points 1, 2 and 3 are verified syntactically by the derive macro. Points 4 and 5 are verified semantically by the compiler when checking the generated -[`DispatchFromDyn`] implementation as it does today. Point 3 is verified by -introducing a new unstable helper trait `AssertReprTransparent`. +[`DispatchFromDyn`] implementation as it does today. The `#[pointee]` attribute may also be written as `#[smart_pointer::pointee]`. @@ -303,6 +306,7 @@ following procedure: Given the following example code: ```rust #[derive(SmartPointer)] +#[repr(transparent)] struct MySmartPointer<'a, #[pointee] T, A> where T: ?Sized + SomeTrait, @@ -331,16 +335,6 @@ where T: ::core::marker::Unsize, {} ``` -The macro will also generate an implementation of the new -`AssertReprTransparent` helper trait. The implementation will have the same -trait bounds as the struct definition. -```rust -#[automatically_derived] -impl<'a, T, A> ::core::ops::AssertReprTransparent for MySmartPointer<'a, T, A> -where - T: ?Sized + SomeTrait, -{} -``` ## `Receiver` and `Deref` implementations @@ -400,13 +394,6 @@ Although this RFC proposes to add the `PinCoerceUnsized` trait to ensure that unsizing coercions of pinned pointers cannot be used to cause unsoundness, the RFC does not propose to stabilize the trait. -## `AssertReprTransparent` - -To verify the requirement that the struct is `#[repr(transparent)]`, we -introduce a new unstable marker trait called `AssertReprTransparent`. This trait -will be a lang item, and the compiler will emit an error if the trait is used -with a type that is not `#[repr(transparent)]`. - # Drawbacks [drawbacks]: #drawbacks @@ -455,6 +442,7 @@ However, it turns out that there are quite a few different ways we might implement `Deref`. For example, consider [the custom `Rc` example][custom-rc]: ```rust #[derive(SmartPointer)] +#[repr(transparent)] pub struct Rc { inner: NonNull>, } @@ -692,6 +680,7 @@ RFC][unsafe-attribute]. ```rust // SAFETY: The Deref impl is not malicious. #[unsafe(derive(SmartPointer))] +#[repr(transparent)] pub struct Rc { inner: NonNull>, } @@ -732,27 +721,6 @@ This RFC does not propose it because it is a breaking change and the discussed in more details in [the pre-RFC for stabilizing the underlying traits][pre-rfc]. -## `AssertReprTransparent` - -When you implement the [`DispatchFromDyn`] trait, the compiler enforces various -things about the type to verify that it makes sense to implement -`DispatchFromDyn`. One of the things that the compiler verifies is that the -struct must not be `#[repr(packed)]` or `#[repr(C)]`. - -However, because `#[derive(SmartPointer)]` has more narrow use-case than -`DispatchFromDyn`, we would like to restrict it further so that the macro only -works with `#[repr(transparent)]` types. To do this, we use a new trait called -`AssertReprTransparent` that verifies that the struct is `#[repr(transparent)]` -like how `DispatchFromDyn` verifies that the struct must not be -`#[repr(packed)]` or `#[repr(C)]`. - -We cannot change the logic in `DispatchFromDyn` because some existing standard -library types cannot be `#[repr(transparent)]`. For example, this includes -`Box` due to its allocator field. - -This requirement may be relaxed in the future, in which case -`AssertReprTransparent` can be removed again. - # Prior art [prior-art]: #prior-art @@ -820,9 +788,10 @@ proposals for relaxing them have been seen before (e.g., in the [pre-RFC][pre-rfc].) One example of a restriction that we could lift is the restriction that there is -only one non-zero-sized field. This would allow smart pointers to use custom -allocators. (Today, types like `Box` and `Rc` only work with trait objects when -using the default zero-sized allocator.) +only one non-zero-sized field (i.e., that it must be `#[repr(transparent)]`). +This would allow smart pointers to use custom allocators. (Today, types like +`Box` and `Rc` only work with trait objects when using the default zero-sized +allocator.) This could also allow implementations of `Rc` and `Arc` that store the value and refcount in two different allocations, like how the C++ `shared_ptr` works.

+where + P: CoerceUnsized, + P: core::ops::Deref, + U: core::ops::Deref, +{ } +``` +This way, you can only coerce pinned pointers when this doesn't change whether +the target type is `Unpin`. + +It would solve the unsoundness, but it does have the disadvantage of having no +path to making these pinned coercions possible for smart pointers that don't +have malicious `Deref` implementations. Another downside of this solution is +that it's a breaking change, because it disallows these pinned coercions even +with the standard library pointers, which allows them today. + +There are also other variations on the negative trait bounds, which become +different implementations of the "Don't allow Pin coercions with custom smart +pointers" solution. + +This solution is discussed in more details in [the pre-RFC for stabilizing the +underlying traits][pre-rfc]. + +# Future possibilities +[future-possibilities]: #future-possibilities + +One of the design goals of this RFC is that it should make this feature +available to crates without significantly limiting how the underlying traits +can evolve. The authors hope that we will find a way to stabilize the +underlying traits in the future. + +One of the things that is left out of scope of this RFC is coercions involving +custom transparent containers similar to [`Cell`]. They require an +implementation of [`CoerceUnsized`] without [`DispatchFromDyn`]. + +There is a reasonable change that we may be able to lift some of [the +restrictions][input-requirements] on the shape of the struct as well. The +current restrictions are just whatever [`DispatchFromDyn`] requires today, and +proposals for relaxing them have been seen before (e.g., in the +[pre-RFC][pre-rfc].) + +One example of a restriction that we could lift is the restrction that there is +only one non-zero-sized field. This could allow implementations of `Rc` and +`Arc` that store the value and refcount in two different allocations, like how +the C++ `shared_ptr` works. +```rust +#[derive(SmartPointer)] +pub struct Rc { + refcount: NonNull, + value: NonNull, +} +``` +Implementing this probably requires the `#[derive(SmartPointer)]` macro to know +syntactically which field holds the vtable. One simple way to do that could be +to say that it must be the last field, analogous to the unsized field in structs +that must also be the last field. Another option is to add another attribute +like `#[pointee]` that must be annotated on the field in question. + +[ast]: https://github.com/rust-lang/rfcs/pull/3519 +[pre-rfc]: https://internals.rust-lang.org/t/pre-rfc-flexible-unsize-and-coerceunsize-traits/18789 +[`CoerceUnsized`]: https://doc.rust-lang.org/stable/core/ops/trait.CoerceUnsized.html +[`core::ops::CoerceUnsized`]: https://doc.rust-lang.org/stable/core/ops/trait.CoerceUnsized.html +[`DispatchFromDyn`]: https://doc.rust-lang.org/stable/core/ops/trait.DispatchFromDyn.html +[`core::ops::DispatchFromDyn`]: https://doc.rust-lang.org/stable/core/ops/trait.DispatchFromDyn.html From 8a62c64af65cdbf8756a30264d9a72e6556446cf Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Thu, 2 May 2024 11:39:34 +0200 Subject: [PATCH 02/16] Apply suggestions by lqd --- text/3621-derive-smart-pointer.md | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index 7b14ac71d74..5e4938a36f8 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -200,7 +200,7 @@ impl Deref for Rc { impl Rc { pub fn new(value: T) -> Self { - let inner = Box::new(ArcInner { + let inner = Box::new(RcInner { refcount: 1, value, }); @@ -284,8 +284,8 @@ Given the following example code: ```rust #[derive(SmartPointer)] struct MySmartPointer<'a, #[pointee] T: ?Sized, A>{ - ptr: &'a T - phantom: PhantomData + ptr: &'a T, + phantom: PhantomData, } ``` @@ -296,13 +296,14 @@ we'll get the following expansion: impl<'a, T, A, U> ::core::ops::CoerceUnsized> for MySmartPointer<'a, T, A> where T: ?Sized + ::core::marker::Unsize, - U: ?::core::marker::Sized + U: ?::core::marker::Sized, +{} #[automatically_derived] impl<'a, T, A, U> ::core::ops::DispatchFromDyn> for MySmartPointer<'a, T, A> where T: ?Sized + ::core::marker::Unsize, - U: ?::core::marker::Sized + U: ?::core::marker::Sized, {} ``` @@ -321,7 +322,7 @@ you can use them for dynamic dispatch. ## Vtable requirements As seen in the `Rc` example, the macro needs to be usable even if the pointer -is `NonNull>` (as opposed to `NonNull`). +is `NonNull>` (as opposed to `NonNull`). # Drawbacks [drawbacks]: #drawbacks @@ -469,7 +470,7 @@ progress by reducing the scope and stabilizing a subset. There have already been [previous attempts to stabilize the underlying traits][pre-rfc], and they did not make much progress. Therefore, this RFC -proposes to reduce ths scope and instead stabilize a derive macro. +proposes to reduce the scope and instead stabilize a derive macro. [ast-scope]: https://github.com/rust-lang/rfcs/pull/3519#discussion_r1492385549 [rpit]: https://blog.rust-lang.org/2023/12/21/async-fn-rpit-in-traits.html From f77582569b942595758311d2232aab3f0d8f3bd6 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 3 May 2024 10:41:26 +0200 Subject: [PATCH 03/16] Transparent containers implement both traits --- text/3621-derive-smart-pointer.md | 62 ++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index 5e4938a36f8..12784be71b8 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -393,6 +393,38 @@ Note that having the macro generate a `Receiver` impl instead doesn't work either, because that prevents the user from implementing `Deref` at all. (There is a blanket impl of `Receiver` for all `Deref` types.) +## Transparent containers + +Smart pointers are not the only use case for implementing the [`CoerceUnsized`] +and [`DispatchFromDyn`] traits. They are also used for "transparent containers" +such as [`Cell`]. That use-case allows coercions such as `Cell>` +to `Cell>`. (Coercions where the `Cell` is inside the `Box` are +already supported on stable Rust.) + +It is not possible to use the derive macro proposed by this RFC for transparent +containers because they require a different set of where bounds when +implementing the traits. To compare: +```rust +// smart pointer example +impl DispatchFromDyn> for Box +where + T: Unsize + ?Sized, + U: ?Sized, +{} + +// transparent container example +impl DispatchFromDyn> for Cell +where + T: DispatchFromDyn, +{} +``` +Attempting to annotate `#[derive(SmartPointer)]` onto a transparent container +will fail to compile because [it violates the rules for implementing +`DispatchFromDyn`][tc-pg]. Supporting custom transparent containers is out of +scope for this RFC. + +[tc-pg]: https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=c3fe2a11822e4c5e2dae5bfec9d77b9e + ## Why not two derive macros? The derive macro generates two different trait implementations: @@ -406,21 +438,16 @@ It could be argued that these should be split into two separate derive macros. We are not proposing this for a few reasons: - If there are two derive macros, then we have to support the case where you - only use one of them. There are use-cases for implementing [`CoerceUnsized`] - without [`DispatchFromDyn`], but you do this for cases where your type is not - a smart pointer, but rather a transparent container like [`Cell`]. It makes - coercions like `Cell>` to `Cell>` possible. - Supporting that is a significantly increased scope of the RFC, and the - authors believe that supporting transparent containers should be a separate - follow-up RFC. - -- Right now there are use cases for `CoerceUnsized` (transparent containers) - and `CoerceUnsized+DispatchFromDyn` (smart pointers), but there aren't any - real use-cases for having `DispatchFromDyn` alone. Because of that, one - possible future design of the underlying traits could be to have one trait - for smart pointers, and another one for transparent containers. Adding two - derive macros prevents us from changing the underlying traits to that design - in the future. + only use one of them. There isn't much reason to do that, and the authors are + not aware of any examples where you would prefer to implement one of the + traits without implementing both. + +- Having two different macros means that we lock ourselves into solutions that + involve two traits that split the feature in the way that we split it today. + However, it is easy to imagine situations where we would want to split the + traits in a different way. For example, we might instead want one trait for + smart pointers, and another trait for transparent containers. Or maybe we just + want one trait that does both things. - The authors believe that a convenience `#[derive(SmartPointer)]` macro will continue to make sense, even once the underlying traits are stabilized. It is @@ -643,8 +670,9 @@ can evolve. The authors hope that we will find a way to stabilize the underlying traits in the future. One of the things that is left out of scope of this RFC is coercions involving -custom transparent containers similar to [`Cell`]. They require an -implementation of [`CoerceUnsized`] without [`DispatchFromDyn`]. +custom transparent containers similar to [`Cell`]. They require you to implement +the traits with different where bounds. Adding support for custom transparent +containers makes sense as a future expansion of the feature. There is a reasonable change that we may be able to lift some of [the restrictions][input-requirements] on the shape of the struct as well. The From 8f029b2fcc77aaf69efabfb6b1f566eb6f92d515 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 3 May 2024 16:07:42 +0200 Subject: [PATCH 04/16] Fix typos --- text/3621-derive-smart-pointer.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index 12784be71b8..fb3471f4bd5 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -268,7 +268,7 @@ are verified semantically by the compiler when checking the generated The macro will expand to two implementations, one for [`core::ops::CoerceUnsized`] and one for [`core::ops::DispatchFromDyn`]. This -is enough for a type to participe in unsizing coercions and dynamic dispatch. +is enough for a type to participate in unsizing coercions and dynamic dispatch. The derive macro will implement the traits for the type according to the following procedure: @@ -564,7 +564,7 @@ compatibility concerns with `Pin`. We cannot prevent you from using `Pin::new` with structs that have malicious `Deref` implementations. However, one possible place we can intervene is the coercion from `Pin>` to `Pin>`. If you need to use unsafe before those -coerceions are possible, then the problem is solved. For example, we might +coercions are possible, then the problem is solved. For example, we might introduce a `StableDeref` trait: ```rs /// # Safety @@ -680,7 +680,7 @@ current restrictions are just whatever [`DispatchFromDyn`] requires today, and proposals for relaxing them have been seen before (e.g., in the [pre-RFC][pre-rfc].) -One example of a restriction that we could lift is the restrction that there is +One example of a restriction that we could lift is the restriction that there is only one non-zero-sized field. This could allow implementations of `Rc` and `Arc` that store the value and refcount in two different allocations, like how the C++ `shared_ptr` works. From 211abb3466e3f92257f955d4e28445baa8f955eb Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Sat, 11 May 2024 09:28:59 +0200 Subject: [PATCH 05/16] Adjust how the macro expands trait bounds Without this adjustment, it would not be possible to use this macro with types such as the `ARef` type that was discussed on the RFC thread [1], since we need `AlwaysRefcounted` to be specified for both T and U. Link: https://www.github.com/rust-lang/rfcs/pull/3621#issuecomment-2094094231 [1] --- text/3621-derive-smart-pointer.md | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index fb3471f4bd5..4f85100f91b 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -270,20 +270,25 @@ The macro will expand to two implementations, one for [`core::ops::CoerceUnsized`] and one for [`core::ops::DispatchFromDyn`]. This is enough for a type to participate in unsizing coercions and dynamic dispatch. -The derive macro will implement the traits for the type according to the +The derive macro will implement both traits for the type according to the following procedure: -- Copy all generic parameters and their bounds from the struct definition into - the impl. -- Add an additional type parameter `U` and give it a `?Sized` bound. +- Copy all generic parameters from the struct definition into the impl. +- Add an additional type parameter `U`. +- For every trait bound declared on the trait, add it twice to the trait + implementation. Once exactly as written, and once with every instance of the + `#[pointee]` parameter replaced with `U`. - Add an additional `Unsize` bound to the `#[pointee]` type parameter. -- The generic parameter of the traits being implemented will be `Self`, except +- The generic parameter of the trait being implemented will be `Self`, except that the `#[pointee]` type parameter is replaced with `U`. Given the following example code: ```rust #[derive(SmartPointer)] -struct MySmartPointer<'a, #[pointee] T: ?Sized, A>{ +struct MySmartPointer<'a, #[pointee] T, A> +where + T: ?Sized + SomeTrait, +{ ptr: &'a T, phantom: PhantomData, } @@ -295,15 +300,17 @@ we'll get the following expansion: #[automatically_derived] impl<'a, T, A, U> ::core::ops::CoerceUnsized> for MySmartPointer<'a, T, A> where - T: ?Sized + ::core::marker::Unsize, - U: ?::core::marker::Sized, + T: ?Sized + SomeTrait, + U: ?Sized + SomeTrait, + T: ::core::marker::Unsize, {} #[automatically_derived] impl<'a, T, A, U> ::core::ops::DispatchFromDyn> for MySmartPointer<'a, T, A> where - T: ?Sized + ::core::marker::Unsize, - U: ?::core::marker::Sized, + T: ?Sized + SomeTrait, + U: ?Sized + SomeTrait, + T: ::core::marker::Unsize, {} ``` From 4cd0f76661f37a07f870a4d10a1f2c3ceacc006d Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Sat, 11 May 2024 10:34:58 +0200 Subject: [PATCH 06/16] Add `#[smart_pointer::pointee]` --- text/3621-derive-smart-pointer.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index 4f85100f91b..bacf7620a9f 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -264,6 +264,8 @@ Point 1 and 2 are verified syntactically by the derive macro, whereas 3 and 4 are verified semantically by the compiler when checking the generated [`DispatchFromDyn`] implementation as it does today. +The `#[pointee]` attribute may also be written as `#[smart_pointer::pointee]`. + ## Expansion The macro will expand to two implementations, one for @@ -486,6 +488,30 @@ refcounted value with multiple lists. People have argued that it would be better to change this to a generic type instead of a const generic, so it would be useful to keep the option of having multiple generic types on the struct. +### Conflicts with third-party derive macros + +The `#[pointee]` attribute could in principle conflict with other derive macros +that also wish to annotate one of the parameters with an attribute called +`#[pointee]`. To disambiguate such cases, we also allow the attribute to be +spelled `#[smart_pointer::pointee]`. + +It is an error to specify both `#[pointee]` and `#[smart_pointer::pointee]`, so +both macros must support this kind of disambiguation. + +Another way to avoid conflicts between `#[derive(SmartPointer)]` and third-party +macros is to always assume that the first generic parameter is the pointee. +This RFC does not propose that solution because: + +* It prevents the pointee from having a default unless it is the only parameter, + because parameters with a default must come last. +* If logic such as "the first parameter" becomes commonplace in macro design, + then it does not really solve the issue with conflicts: you could have two + macros that both assume that the first parameter is special. And this kind of + conflict will be more common than attribute conflicts, because the attribute + will only conflict if both macros use an attribute of the same name. + +The authors are not aware of any macros using a `#[pointee]` attribute today. + # Prior art [prior-art]: #prior-art From 44e8727c2cd3bab72e68965819921d4b60eeed66 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Sat, 11 May 2024 10:49:13 +0200 Subject: [PATCH 07/16] Add drawback about trait name not matching --- text/3621-derive-smart-pointer.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index bacf7620a9f..e81a3beb561 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -346,6 +346,11 @@ is `NonNull>` (as opposed to `NonNull`). [`Cell`], so hopefully that will be enough incentive to continue work on the underlying traits. +- This would be the first example in the standard library of a derive macro that + does not implement a trait of the same name as the macro. (However, there are + examples of macros that implement multiple traits: `#[derive(PartialEq)]` + also implements `StructuralPartialEq`.) + # Rationale and alternatives [rationale-and-alternatives]: #rationale-and-alternatives From cd05ef26a9a63e41417a77f2f38c375fc0d25ed3 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Sat, 11 May 2024 11:00:28 +0200 Subject: [PATCH 08/16] Explain what the macro requires without reference to `DispatchFromDyn` --- text/3621-derive-smart-pointer.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index e81a3beb561..b99e04bb54b 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -255,12 +255,16 @@ The macro sets the following requirements on its input: `#[pointee]` derive helper attribute. 3. The struct must not be `#[repr(packed)]` or `#[repr(C)]`. 4. Other than one-aligned, zero-sized fields, the struct must have exactly one - field and that field’s type must be must implement `DispatchFromDyn` - where `F` is the type of `T`’s field type. + field. +5. Assume that `T` is a type that can be unsized to `U`, and let `FT` and `FU` + be the type of the struct's field when the pointee is equal to `T` and `U` + respectively. If the struct's trait bounds are satisfied for both `T` and + `U`, then it must be possible to convert `FT` to `FU` using an unsizing + coercion. (Adapted from the docs for [`DispatchFromDyn`].) -Point 1 and 2 are verified syntactically by the derive macro, whereas 3 and 4 +Point 1 and 2 are verified syntactically by the derive macro, whereas 3, 4 and 5 are verified semantically by the compiler when checking the generated [`DispatchFromDyn`] implementation as it does today. From a9104e781433fd80fe76f65ad20445344652ef92 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Sat, 11 May 2024 11:05:33 +0200 Subject: [PATCH 09/16] Mention custom allocators in future possibilities --- text/3621-derive-smart-pointer.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index b99e04bb54b..fa333e87a6d 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -723,9 +723,12 @@ proposals for relaxing them have been seen before (e.g., in the [pre-RFC][pre-rfc].) One example of a restriction that we could lift is the restriction that there is -only one non-zero-sized field. This could allow implementations of `Rc` and -`Arc` that store the value and refcount in two different allocations, like how -the C++ `shared_ptr` works. +only one non-zero-sized field. This would allow smart pointers to use custom +allocators. (Today, types like `Box` and `Rc` only work with trait objects when +using the default zero-sized allocator.) + +This could also allow implementations of `Rc` and `Arc` that store the value and +refcount in two different allocations, like how the C++ `shared_ptr` works. ```rust #[derive(SmartPointer)] pub struct Rc { From 7f4e36beed89fd3b5e2a80263e546b4db00bf51a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Sat, 11 May 2024 11:15:22 +0200 Subject: [PATCH 10/16] Add section on non-derive macros --- text/3621-derive-smart-pointer.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index fa333e87a6d..7449ad58ca8 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -521,6 +521,22 @@ This RFC does not propose that solution because: The authors are not aware of any macros using a `#[pointee]` attribute today. +## Derive macro or not? + +Stabilizing this as a derive macro more or less locks us in with the decision +that the compiler will use traits to specify which types are compatible with +trait objects. However, one could imagine other mechanisms. For example, stable +Rust currently has logic saying that any struct where the last field is `?Sized` +will work with unsizing operations. (E.g., if `Wrapper` is such a struct, then +you can convert from `Box>` to `Box>`.) That +mechanism is not specified using a trait. + +However, using traits for this functionality seems to be the most flexible. To +solve the unresolved questions, we most likely need to constrain the +implementations of these traits for `Pin` with stricter trait bounds than what +is specified on the struct. That will get much more complicated if we use a +mechanism other than traits to specify this logic. + # Prior art [prior-art]: #prior-art From 1eec5b335187347a002e6257682eb19bb77c197a Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Sat, 11 May 2024 11:16:59 +0200 Subject: [PATCH 11/16] Also modify `DispatchFromDyn` for `Pin` --- text/3621-derive-smart-pointer.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index 7449ad58ca8..552b1215638 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -646,8 +646,7 @@ introduce a `StableDeref` trait: unsafe trait StableDeref: Deref { } ``` Then we make it so that you can only coerce pinned pointers when they implement -`StableDeref`. We can do that by modifying its implementation of -[`CoerceUnsized`] to this: +`StableDeref`. We can do that by modifying its trait implementations to this: ```rs impl CoerceUnsized> for Pin where @@ -655,6 +654,13 @@ where T: StableDeref, U: StableDeref, {} + +impl DispatchFromDyn> for Pin +where + T: CoerceUnsized, + T: StableDeref, + U: StableDeref, +{} ``` This way, the user must implement the unsafe trait before they can coerce pinned versions of the pointer. Since the trait is unsafe, it is not our fault From 17b4d1810278b25f1439dea3e750da400c785568 Mon Sep 17 00:00:00 2001 From: Alice Ryhl Date: Fri, 24 May 2024 17:27:34 +0200 Subject: [PATCH 12/16] Propose specific solution for Pin unsoundness issue --- text/3621-derive-smart-pointer.md | 331 ++++++++++++++++-------------- 1 file changed, 180 insertions(+), 151 deletions(-) diff --git a/text/3621-derive-smart-pointer.md b/text/3621-derive-smart-pointer.md index 552b1215638..fdf35201774 100644 --- a/text/3621-derive-smart-pointer.md +++ b/text/3621-derive-smart-pointer.md @@ -173,6 +173,19 @@ struct MySmartPointer<#[pointee] T: ?Sized, U> { Specifying `#[pointee]` when the struct has only one type parameter is allowed, but not required. +## Pinned pointers + +The `#[derive(SmartPointer)]` macro is not sufficient to coerce the smart +pointer when it is wrapped in `Pin`. That is, even if `MySmartPointer` +coerces to `MySmartPointer`, you will not be able to coerce +`Pin>` to `Pin>`. +Similarly, traits with self types of `Pin>` are not object +safe. + +If you implement the unstable unsafe trait called `PinCoerceUnsized` for +`MySmartPointer`, then the smart pointer will gain the ability to be coerced +when wrapped in `Pin`. The trait is not being stabilized by this RFC. + ## Example of a custom Rc [custom-rc]: #example-of-a-custom-rc @@ -230,7 +243,6 @@ impl Drop for Rc { In this example, `#[derive(SmartPointer)]` makes it possible to use `Rc`. - # Reference-level explanation [reference-level-explanation]: #reference-level-explanation @@ -337,6 +349,47 @@ you can use them for dynamic dispatch. As seen in the `Rc` example, the macro needs to be usable even if the pointer is `NonNull>` (as opposed to `NonNull`). +## `PinCoerceUnsized` + +The standard library defines the following unstable trait: +```rust +/// Trait that indicates that this is a pointer or a wrapper for one, where +/// unsizing can be performed on the pointee when it is pinned. +/// +/// # Safety +/// +/// If this type implements `Deref`, then the concrete type returned by `deref` +/// and `deref_mut` must not change without a modification. The following +/// operations are not considered modifications: +/// +/// * Moving the pointer. +/// * Performing unsizing coercions on the pointer. +/// * Performing dynamic dispatch with the pointer. +/// * Calling `deref` or `deref_mut` on the pointer. +/// +/// The concrete type of a trait object is the type that the vtable corresponds +/// to. The concrete type of a slice is an array of the same element type and +/// the length specified in the metadata. The concrete type of a sized type +/// is the type itself. +pub unsafe trait PinCoerceUnsized: CoerceUnsized {} + +impl CoerceUnsized> for Pin +where + T: PinCoerceUnsized, +{} + +impl DispatchFromDyn> for Pin +where + T: PinCoerceUnsized + DispatchFromDyn, +{} +``` +The trait is implemented for all standard library types that implement +`CoerceUnsized`. + +Although this RFC proposes to add the `PinCoerceUnsized` trait to ensure that +unsizing coercions of pinned pointers cannot be used to cause unsoundness, the +RFC does not propose to stabilize the trait. + # Drawbacks [drawbacks]: #drawbacks @@ -537,6 +590,131 @@ implementations of these traits for `Pin` with stricter trait bounds than what is specified on the struct. That will get much more complicated if we use a mechanism other than traits to specify this logic. +## `PinCoerceUnsized` + +Beyond the addition of the `#[derive(SmartPointer)]` macro, this RFC also +proposes to add a new unstable trait called `PinCoerceUnsized`. This trait is +necessary because the API proposed by this RFC would otherwise by unsound: + +> You could use `Pin::new` to create a `Pin>` and coerce +> that to `Pin>`. Then, if `SmartPtr` has a malicious +> implementation of the `Deref` trait, then `deref` could return a `&mut dyn +> Future` whose concrete type is not `MyUnpinFuture`, but instead some other +> future type that *does* need to be pinned. Since no unsafe code is involved in +> any of these steps, this means that we are able to safely create a pinned +> pointer to a value that has not been pinned. + +Adding the unsafe `PinCoerceUnsized` trait ensures that the user cannot coerce +`Pin>` to `Pin>` without using +unsafe to promise that the concrete type returned when calling `deref` on the +resulting `Pin>` is `MyUnpinFuture`. + +This RFC does not propose to stabilize `PinCoerceUnsized` because of naming +issues. If we do not know whether `CoerceUnsized` will still use that name when +we stabilize it, then we can't stabilize a trait called `PinCoerceUnsized`. +Furthermore, the Linux kernel (which forms the motivation for this RFC) does not +currently need it to be stabilized. + +There are some alternatives to `PinCoerceUnsized`. The primary contender for an +alternative solution is `DerefPure`. However, that solution involves a minor +breaking change, and we can always decide to switch to `DerefPure` later even if +we adopt `PinCoerceUnsized` now. + +### `StableDeref` + +A previous version of this RFC proposed to instead add a trait called +`StableDeref` that pretty much had the same requirements as `PinCoerceUnsized`, +except that it also required the address returned by `deref` to be stable. + +The motivation behind adding a `StableDeref` trait instead of `PinCoerceUnsized` +is that `StableDeref` would also be useful for other things, and that both +traits essentially just say that the `Deref` implementation doesn't do anything +unreasonable. The requirement that the address is stable is not strictly +required to keep the API sound, but semantically it is incoherent to have a +pinned pointer whose address can change, so it is not overly burdensome to +require it. + +However, this suggestion was abandoned due to an inconsistency with the +`StableDeref` trait defined by the ecosystem. That trait requires that raw +pointers to the contents of the pointer stay valid even if the smart pointer is +moved, but this is not satisfied by `Box` or `&mut T` because moving these +pointers asserts that they are unique. This is a problem because whichever trait +we use for pinned unsizing coercions, it *must* be implemented by `Box` and +`&mut T`. + +### `DerefPure` + +In a similar manner to the `StableDeref` option, we can use the existing +`DerefPure` trait. This option is a reasonable way forward, but this RFC does +not propose it because it would be a breaking change. (Note that `StableDeref` +is also a breaking change for the same reason.) + +Basically, the problem is that `Deref` is a supertrait of `DerefPure`, but there +are a few types that can be coerced when pinned that do not implement `Deref`. +For example, this code compiles today: +```rust +trait MyTrait {} +impl MyTrait for String {} + +fn pin_cell_map(p: Pin>>) -> Pin>> { + p +} +``` +The `Cell` type does not implement `Deref`, but the above code still compiles. +Note that since all methods on `Pin` _do_ require `Deref`, such pinned pointers +are useless and impossible to construct. But it is a breaking change +nonetheless. + +If this breakage is considered acceptable, then using `DerefPure` instead of a +new `PinCoerceUnsized` would be a reasonable way forward. + +### Make the derive macro unsafe + +We could just make the macro unsafe in a similar vein to [the unsafe attributes +RFC][unsafe-attribute]. +```rust +// SAFETY: The Deref impl is not malicious. +#[unsafe(derive(SmartPointer))] +pub struct Rc { + inner: NonNull>, +} +``` +This would solve the unsoundness, but this RFC does not propose it because it +raises forwards compatibility hazards. We might start out with an unsafe derive +macro, and then in the future we might decide to instead use the +`PinCoerceUnsized` solution. Then, `#[unsafe(derive(SmartPointer))]` would have +to generate an implementation of `PinCoerceUnsized` trait too, because otherwise +`#[unsafe(derive(SmartPointer))] Pin>` would lose the ability to be +unsize coerced, which would be a breaking change. This means that +`#[unsafe(derive(SmartPointer))]` and `#[derive(SmartPointer)]` could end up +expanding to _different_ things. + +### Negative trait bounds + +There are also various solutions that involve negative trait bounds. For +example, you might instead modify `CoerceUnsized` like this: +```rust +// Permit going from `Pin` to` Pin` +impl CoerceUnsized> for Pin