From c6da3ab1cc68418c9f532273724c091453532c22 Mon Sep 17 00:00:00 2001 From: Joshua Liebow-Feeser Date: Fri, 1 Mar 2024 10:59:24 -0800 Subject: [PATCH] Split ByteSlice and SplitByteSlice Closes #1 --- src/lib.rs | 216 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 145 insertions(+), 71 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 9a986433e20..9ba01a8c53e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4785,13 +4785,21 @@ where // INVARIANTS: `try_cast_into_no_leftover` validates size and alignment. Some(Ref(bytes, PhantomData)) } +} +impl Ref +where + B: SplitByteSlice, + T: KnownLayout + NoCell + ?Sized, +{ fn bikeshed_new_from_prefix_known_layout(bytes: B) -> Option<(Ref, B)> { let (_, split_at) = Ptr::from_ref(bytes.deref()).try_cast_into::(CastType::Prefix)?; let (bytes, suffix) = bytes.split_at(split_at); // INVARIANTS: `try_cast_into` validates size and alignment, and returns // a `split_at` that indicates how many bytes of `bytes` correspond to a - // valid `T`. + // valid `T`. By safety invariant on `SplitByteSlice`, `split_at` is + // implemented correctly, and so we can rely on it to produce the + // correct `bytes` and `suffix`. Some((Ref(bytes, PhantomData), suffix)) } @@ -4800,7 +4808,9 @@ where let (prefix, bytes) = bytes.split_at(split_at); // INVARIANTS: `try_cast_into` validates size and alignment, and returns // a `split_at` that indicates how many bytes of `bytes` correspond to a - // valid `T`. + // valid `T`. By safety invariant on `SplitByteSlice`, `split_at` is + // implemented correctly, and so we can rely on it to produce the + // correct `prefix` and `bytes`. Some((prefix, Ref(bytes, PhantomData))) } } @@ -4816,7 +4826,12 @@ where // INVARIANTS: We just validated size and alignment. Some(Ref(bytes, PhantomData)) } +} +impl Ref +where + B: SplitByteSlice, +{ fn new_sized_from_prefix(bytes: B) -> Option<(Ref, B)> { if bytes.len() < mem::size_of::() || !util::aligned_to::<_, T>(bytes.deref()) { return None; @@ -4824,7 +4839,9 @@ where let (bytes, suffix) = bytes.split_at(mem::size_of::()); // INVARIANTS: We just validated alignment and that `bytes` is at least // as large as `T`. `bytes.split_at(mem::size_of::())` ensures that - // the new `bytes` is exactly the size of `T`. + // the new `bytes` is exactly the size of `T`. By safety invariant on + // `SplitByteSlice`, `split_at` is implemented correctly, and so we can + // rely on it to produce the correct `bytes` and `suffix`. Some((Ref(bytes, PhantomData), suffix)) } @@ -4839,6 +4856,9 @@ where // size_of::()`, the `bytes` which results from `let (prefix, bytes) // = bytes.split_at(split_at)` has length `size_of::()`. After // constructing `bytes`, we validate that it has the proper alignment. + // By safety invariant on `SplitByteSlice`, `split_at` is implemented + // correctly, and so we can rely on it to produce the correct `prefix` + // and `bytes`. Some((prefix, Ref(bytes, PhantomData))) } } @@ -4859,7 +4879,13 @@ where // INVARIANTS: `try_cast_into_no_leftover` validates size and alignment. Some(Ref(bytes, PhantomData)) } +} +impl Ref +where + B: SplitByteSlice, + T: KnownLayout + NoCell + ?Sized, +{ /// Constructs a new `Ref` from the prefix of a byte slice. /// /// `new_from_prefix` verifies that `bytes.len() >= size_of::()` and that @@ -4873,7 +4899,9 @@ where let (bytes, suffix) = bytes.split_at(split_at); // INVARIANTS: `try_cast_into` validates size and alignment, and returns // a `split_at` that indicates how many bytes of `bytes` correspond to a - // valid `T`. + // valid `T`. By safety invariant on `SplitByteSlice`, `split_at` is + // implemented correctly, and so we can rely on it to produce the + // correct `bytes` and `suffix`. Some((Ref(bytes, PhantomData), suffix)) } @@ -4891,7 +4919,9 @@ where let (prefix, bytes) = bytes.split_at(split_at); // INVARIANTS: `try_cast_into` validates size and alignment, and returns // a `split_at` that indicates how many bytes of `bytes` correspond to a - // valid `T`. + // valid `T`. By safety invariant on `SplitByteSlice`, `split_at` is + // implemented correctly, and so we can rely on it to produce the + // correct `prefix` and `bytes`. Some((prefix, Ref(bytes, PhantomData))) } } @@ -4907,7 +4937,13 @@ where pub fn new_slice(bytes: B) -> Option> { Self::new(bytes) } +} +impl Ref +where + B: SplitByteSlice, + T: NoCell, +{ /// Constructs a new `Ref` of a slice type from the prefix of a byte slice. /// /// `new_slice_from_prefix` verifies that `bytes.len() >= size_of::() * @@ -5003,7 +5039,13 @@ where pub fn new_zeroed(bytes: B) -> Option> { map_zeroed(Self::new(bytes)) } +} +impl Ref +where + B: SplitByteSliceMut, + T: KnownLayout + NoCell + ?Sized, +{ /// Constructs a new `Ref` from the prefix of a byte slice, zeroing the /// prefix. /// @@ -5063,7 +5105,13 @@ where pub fn new_slice_zeroed(bytes: B) -> Option> { map_zeroed(Self::new(bytes)) } +} +impl Ref +where + B: SplitByteSliceMut, + T: NoCell, +{ /// Constructs a new `Ref` of a slice type from the prefix of a byte slice, /// after zeroing the bytes. /// @@ -5122,7 +5170,13 @@ where pub fn new_unaligned(bytes: B) -> Option> { Ref::new(bytes) } +} +impl Ref +where + B: SplitByteSlice, + T: Unaligned + KnownLayout + NoCell + ?Sized, +{ /// Constructs a new `Ref` from the prefix of a byte slice for a type with /// no alignment requirement. /// @@ -5166,7 +5220,13 @@ where pub fn new_slice_unaligned(bytes: B) -> Option> { Ref::new(bytes) } +} +impl Ref +where + B: SplitByteSlice, + T: Unaligned + NoCell, +{ /// Constructs a new `Ref` of a slice type with no alignment requirement /// from the prefix of a byte slice. /// @@ -5221,7 +5281,13 @@ where pub fn new_unaligned_zeroed(bytes: B) -> Option> { map_zeroed(Self::new_unaligned(bytes)) } +} +impl Ref +where + B: SplitByteSliceMut, + T: Unaligned + KnownLayout + NoCell + ?Sized, +{ /// Constructs a new `Ref` from the prefix of a byte slice for a type with /// no alignment requirement, zeroing the prefix. /// @@ -5278,7 +5344,13 @@ where pub fn new_slice_unaligned_zeroed(bytes: B) -> Option> { map_zeroed(Self::new_slice_unaligned(bytes)) } +} +impl Ref +where + B: SplitByteSliceMut, + T: Unaligned + NoCell, +{ /// Constructs a new `Ref` of a slice type with no alignment requirement /// from the prefix of a byte slice, after zeroing the bytes. /// @@ -5405,7 +5477,7 @@ where /// Gets the underlying bytes. #[inline] pub fn bytes(&self) -> &[u8] { - &self.0 + self.0.deref() } } @@ -5417,7 +5489,7 @@ where /// Gets the underlying bytes mutably. #[inline] pub fn bytes_mut(&mut self) -> &mut [u8] { - &mut self.0 + self.0.deref_mut() } } @@ -5429,11 +5501,13 @@ where /// Reads a copy of `T`. #[inline] pub fn read(&self) -> T { - // SAFETY: Because of the invariants on `Ref`, we know that `self.0` is - // at least `size_of::()` bytes long, and that it is at least as - // aligned as `align_of::()`. Because `T: FromBytes`, it is sound to + // SAFETY: Because of the invariants on `Ref`, we know that `self.0` was + // at least `size_of::()` bytes long when it was validated, and that + // it was at least as aligned as `align_of::()`. Because of the + // safety invariant on `ByteSlice`, we know that these must still hold + // when we dereference here. Because `T: FromBytes`, it is sound to // interpret these bytes as a `T`. - unsafe { ptr::read(self.0.as_ptr().cast::()) } + unsafe { ptr::read(self.0.deref().as_ptr().cast::()) } } } @@ -5445,12 +5519,14 @@ where /// Writes the bytes of `t` and then forgets `t`. #[inline] pub fn write(&mut self, t: T) { - // SAFETY: Because of the invariants on `Ref`, we know that `self.0` is - // at least `size_of::()` bytes long, and that it is at least as - // aligned as `align_of::()`. Writing `t` to the buffer will allow - // all of the bytes of `t` to be accessed as a `[u8]`, but because `T: - // IntoBytes`, we know this is sound. - unsafe { ptr::write(self.0.as_mut_ptr().cast::(), t) } + // SAFETY: Because of the invariants on `Ref`, we know that `self.0` was + // at least `size_of::()` bytes long when it was validated, and that + // it was at least as aligned as `align_of::()`. Because of the + // safety invariant on `ByteSlice`, we know that these must still hold + // when we dereference here. Writing `t` to the buffer will allow all of + // the bytes of `t` to be accessed as a `[u8]`, but because `T: + // IntoBytes`, we know that this is sound. + unsafe { ptr::write(self.0.deref_mut().as_mut_ptr().cast::(), t) } } } @@ -5557,20 +5633,6 @@ where } } -mod sealed { - pub trait ByteSliceSealed {} -} - -// ByteSlice and ByteSliceMut abstract over [u8] references (&[u8], &mut [u8], -// Ref<[u8]>, RefMut<[u8]>, etc). We rely on various behaviors of these -// references such as that a given reference will never changes its length -// between calls to deref() or deref_mut(), and that split_at() works as -// expected. If ByteSlice or ByteSliceMut were not sealed, consumers could -// implement them in a way that violated these behaviors, and would break our -// unsafe code. Thus, we seal them and implement it only for known-good -// reference types. For the same reason, they're unsafe traits. - -#[allow(clippy::missing_safety_doc)] // TODO(fxbug.dev/99068) /// A mutable or immutable reference to a byte slice. /// /// `ByteSlice` abstracts over the mutability of a byte slice reference, and is @@ -5586,53 +5648,53 @@ mod sealed { /// /// # Safety /// -/// If `Self: Clone` or `Self: Copy`, these implementations must be "stable" in -/// the sense that, given `bytes: Self`, if `bytes.deref()` produces a byte -/// slice of a given address and length, any copy or clone of `bytes`, -/// `bytes_new`, must also produce a byte slice of the same address and length -/// from `bytes_new.deref()`. -/// -// It may seem overkill to go to this length to ensure that this doc link never -// breaks. We do this because it simplifies CI - it means that generating docs -// always succeeds, so we don't need special logic to only generate docs under -// certain features. +/// Implementations of `ByteSlice` must promise that their implementations of +/// [`Deref`] and [`DerefMut`] are "stable". In particular, given `B: ByteSlice` +/// and `b: B`, `b` must always dereference to a byte slice with the same +/// address and length. This is true for both `b.deref()` and `b.deref_mut()`. +/// If `B: Copy` or `B: Clone`, then the same is also true of copies or clones +/// of `b`. For example, `b.deref_mut()` must return a byte slice with the same +/// address and length as `b.clone().deref()`. #[cfg_attr(feature = "alloc", doc = "[`Vec`]: alloc::vec::Vec")] #[cfg_attr( not(feature = "alloc"), doc = "[`Vec`]: https://doc.rust-lang.org/std/vec/struct.Vec.html" )] -pub unsafe trait ByteSlice: - Deref + Sized + self::sealed::ByteSliceSealed -{ - /// Gets a raw pointer to the first byte in the slice. - #[inline] - fn as_ptr(&self) -> *const u8 { - <[u8]>::as_ptr(self) - } +pub unsafe trait ByteSlice: Deref + Sized {} + +#[allow(clippy::missing_safety_doc)] // TODO(fxbug.dev/99068) +/// A mutable reference to a byte slice. +/// +/// `ByteSliceMut` abstracts over various ways of storing a mutable reference to +/// a byte slice, and is implemented for various special reference types such as +/// `RefMut<[u8]>`. +pub unsafe trait ByteSliceMut: ByteSlice + DerefMut {} +/// A [`ByteSlice`] that can be split in two. +/// +/// # Safety +/// +/// Unsafe code may depend for its soundness on the assumption that `split_at` +/// is implemented correctly. In particular, given `B: SplitByteSlice` and `b: +/// B`, if `b.deref()` returns a byte slice with address `addr` and length +/// `len`, then if `split <= len`, `b.split_at(split)` will return `(first, +/// second)` such that: +/// - `first`'s address is `addr` and its length is `split` +/// - `second`'s address is `addr + split` and its length is `len - split` +pub unsafe trait SplitByteSlice: ByteSlice { /// Splits the slice at the midpoint. /// /// `x.split_at(mid)` returns `x[..mid]` and `x[mid..]`. /// /// # Panics /// - /// `x.split_at(mid)` panics if `mid > x.len()`. + /// `x.split_at(mid)` panics if `mid > x.deref().len()`. fn split_at(self, mid: usize) -> (Self, Self); } -#[allow(clippy::missing_safety_doc)] // TODO(fxbug.dev/99068) -/// A mutable reference to a byte slice. -/// -/// `ByteSliceMut` abstracts over various ways of storing a mutable reference to -/// a byte slice, and is implemented for various special reference types such as -/// `RefMut<[u8]>`. -pub unsafe trait ByteSliceMut: ByteSlice + DerefMut { - /// Gets a mutable raw pointer to the first byte in the slice. - #[inline] - fn as_mut_ptr(&mut self) -> *mut u8 { - <[u8]>::as_mut_ptr(self) - } -} +/// A shorthand for [`SplitByteSlice`] and [`ByteSliceMut`]. +pub trait SplitByteSliceMut: SplitByteSlice + ByteSliceMut {} +impl SplitByteSliceMut for B {} /// A [`ByteSlice`] that conveys no ownership, and so can be converted into a /// byte slice. @@ -5658,10 +5720,13 @@ pub trait IntoByteSlice<'a>: ByteSlice + Into<&'a [u8]> {} /// [`RefMut`]: core::cell::RefMut pub trait IntoByteSliceMut<'a>: ByteSliceMut + Into<&'a mut [u8]> {} -impl<'a> sealed::ByteSliceSealed for &'a [u8] {} // TODO(#429): Add a "SAFETY" comment and remove this `allow`. #[allow(clippy::undocumented_unsafe_blocks)] -unsafe impl<'a> ByteSlice for &'a [u8] { +unsafe impl<'a> ByteSlice for &'a [u8] {} + +// SAFETY: This delegates to the stdlib implementation, which is assumed to be +// correct. +unsafe impl<'a> SplitByteSlice for &'a [u8] { #[inline] fn split_at(self, mid: usize) -> (Self, Self) { <[u8]>::split_at(self, mid) @@ -5670,10 +5735,13 @@ unsafe impl<'a> ByteSlice for &'a [u8] { impl<'a> IntoByteSlice<'a> for &'a [u8] {} -impl<'a> sealed::ByteSliceSealed for &'a mut [u8] {} // TODO(#429): Add a "SAFETY" comment and remove this `allow`. #[allow(clippy::undocumented_unsafe_blocks)] -unsafe impl<'a> ByteSlice for &'a mut [u8] { +unsafe impl<'a> ByteSlice for &'a mut [u8] {} + +// SAFETY: This delegates to the stdlib implementation, which is assumed to be +// correct. +unsafe impl<'a> SplitByteSlice for &'a mut [u8] { #[inline] fn split_at(self, mid: usize) -> (Self, Self) { <[u8]>::split_at_mut(self, mid) @@ -5682,20 +5750,26 @@ unsafe impl<'a> ByteSlice for &'a mut [u8] { impl<'a> IntoByteSliceMut<'a> for &'a mut [u8] {} -impl<'a> sealed::ByteSliceSealed for cell::Ref<'a, [u8]> {} // TODO(#429): Add a "SAFETY" comment and remove this `allow`. #[allow(clippy::undocumented_unsafe_blocks)] -unsafe impl<'a> ByteSlice for cell::Ref<'a, [u8]> { +unsafe impl<'a> ByteSlice for cell::Ref<'a, [u8]> {} + +// SAFETY: This delegates to stdlib implementations, which are assumed to be +// correct. +unsafe impl<'a> SplitByteSlice for cell::Ref<'a, [u8]> { #[inline] fn split_at(self, mid: usize) -> (Self, Self) { cell::Ref::map_split(self, |slice| <[u8]>::split_at(slice, mid)) } } -impl<'a> sealed::ByteSliceSealed for RefMut<'a, [u8]> {} // TODO(#429): Add a "SAFETY" comment and remove this `allow`. #[allow(clippy::undocumented_unsafe_blocks)] -unsafe impl<'a> ByteSlice for RefMut<'a, [u8]> { +unsafe impl<'a> ByteSlice for RefMut<'a, [u8]> {} + +// SAFETY: This delegates to stdlib implementations, which are assumed to be +// correct. +unsafe impl<'a> SplitByteSlice for RefMut<'a, [u8]> { #[inline] fn split_at(self, mid: usize) -> (Self, Self) { RefMut::map_split(self, |slice| <[u8]>::split_at_mut(slice, mid))