From aba7a0757ed990c7a3c509acbd34b5f5bb85d405 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sat, 12 Feb 2022 12:45:57 -0500 Subject: [PATCH 1/4] Fix provenance UB and alignment UB A &Header cannot be used to get a useful pointer to data beyond it, because the pointer from the as-cast of the &Header only has provenance over the Header. After a set_len call that decreases the length, it is invalid to create a slice then try to get_unchecked into the region between the old and new length, because the reference in the slice that the ThinVec now Derefs to does not have provenance over that region. Alternatively, this is UB because the docs stipulate that you're not allowed to use `get_unchecked` to index out of bounds. Misaligned data pointers were produced when the gecko-ffi feature was enabled and T has an alignment greater than 4. I think the use of align_offset in tests is subtly wrong, align_offset seems to be for optimizations only. The docs say that a valid implementation may always return usize::MAX. --- src/lib.rs | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 19e38b8..82760f4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -264,23 +264,6 @@ impl Header { fn set_len(&mut self, len: usize) { self._len = assert_size(len); } - - fn data(&self) -> *mut T { - let header_size = mem::size_of::
(); - let padding = padding::(); - - let ptr = self as *const Header as *mut Header as *mut u8; - - unsafe { - if padding > 0 && self.cap() == 0 { - // The empty header isn't well-aligned, just make an aligned one up - NonNull::dangling().as_ptr() - } else { - // This could technically result in overflow, but padding would have to be absurdly large for this to occur. - ptr.add(header_size + padding) as *mut T - } - } - } } #[cfg(feature = "gecko-ffi")] @@ -321,10 +304,10 @@ impl Header { /// optimize everything to not do that (basically, make ptr == len and branch /// on size == 0 in every method), but it's a bunch of work for something that /// doesn't matter much. -#[cfg(any(not(feature = "gecko-ffi"), test))] +#[cfg(any(not(feature = "gecko-ffi"), test, miri))] static EMPTY_HEADER: Header = Header { _len: 0, _cap: 0 }; -#[cfg(all(feature = "gecko-ffi", not(test)))] +#[cfg(all(feature = "gecko-ffi", not(test), not(miri)))] extern "C" { #[link_name = "sEmptyTArrayHeader"] static EMPTY_HEADER: Header; @@ -470,7 +453,18 @@ impl ThinVec { unsafe { self.ptr.as_ref() } } fn data_raw(&self) -> *mut T { - self.header().data() + unsafe { + if self.header().cap() == 0 { + // The empty header isn't well-aligned, just make an aligned one up + NonNull::dangling().as_ptr() + } else { + // This could technically result in overflow, but padding would have to be absurdly large for this to occur. + let padding = padding::(); + let header_size = mem::size_of::
(); + let ptr = self.ptr.as_ptr() as *mut u8; + ptr.add(header_size + padding) as *mut T + } + } } // This is unsafe when the header is EMPTY_HEADER. @@ -565,7 +559,7 @@ impl ThinVec { // doesn't re-drop the just-failed value. let new_len = self.len() - 1; self.set_len(new_len); - ptr::drop_in_place(self.get_unchecked_mut(new_len)); + ptr::drop_in_place(self.data_raw().add(new_len)); } } } @@ -915,7 +909,7 @@ impl ThinVec { (*ptr).set_cap(new_cap); self.ptr = NonNull::new_unchecked(ptr); } else { - let mut new_header = header_with_capacity::(new_cap); + let new_header = header_with_capacity::(new_cap); // If we get here and have a non-zero len, then we must be handling // a gecko auto array, and we have items in a stack buffer. We shouldn't @@ -931,8 +925,9 @@ impl ThinVec { let len = self.len(); if cfg!(feature = "gecko-ffi") && len > 0 { new_header - .as_mut() - .data::() + .as_ptr() + .add(1) + .cast::() .copy_from_nonoverlapping(self.data_raw(), len); self.set_len(0); } @@ -2654,9 +2649,9 @@ mod std_tests { macro_rules! assert_aligned_head_ptr { ($typename:ty) => {{ let v: ThinVec<$typename> = ThinVec::with_capacity(1 /* ensure allocation */); - let head_ptr: *mut $typename = v.header().data::<$typename>(); + let head_ptr: *mut $typename = v.data_raw(); assert_eq!( - head_ptr.align_offset(std::mem::align_of::<$typename>()), + head_ptr as usize % std::mem::align_of::<$typename>(), 0, "expected Header::data<{}> to be aligned", stringify!($typename) From 2ab260b1eb373ecabcf059b1e88e7051e8ff9e4a Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Sun, 13 Feb 2022 23:03:28 -0500 Subject: [PATCH 2/4] Fix (for real this time) issues with alignment And also explain the nuanced explanation in detail, and try to back up the implementation with tests that ensure that the data pointer for a zero-length ThinVec is correct, in and out of gecko-ffi mode. Co-authored-by: Aria Beingessner --- src/lib.rs | 78 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 82760f4..5d83e13 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -247,9 +247,22 @@ mod impl_details { } } -/// The header of a ThinVec. -/// -/// The _cap can be a bitfield, so use accessors to avoid trouble. +// The header of a ThinVec. +// +// The _cap can be a bitfield, so use accessors to avoid trouble. +// +// In "real" gecko-ffi mode, the empty singleton will be aligned +// to 8 by gecko. But in tests we have to provide the singleton +// ourselves, and Rust makes it hard to "just" align a static. +// To avoid messing around with a wrapper type around the +// singleton *just* for tests, we just force all headers to be +// aligned to 8 in this weird "zombie" gecko mode. +// +// This shouldn't affect runtime layout (padding), but it will +// result in us asking the allocator to needlessly overalign +// non-empty ThinVecs containing align < 8 types in +// zombie-mode, but not in "real" geck-ffi mode. Minor. +#[cfg_attr(all(feature = "gecko-ffi", any(test, miri)), repr(align(8)))] #[repr(C)] struct Header { _len: SizeType, @@ -453,13 +466,42 @@ impl ThinVec { unsafe { self.ptr.as_ref() } } fn data_raw(&self) -> *mut T { + // `padding` contains ~static assertions against types that are + // incompatible with the current feature flags. Even if we don't + // care about its result, we should always call it before getting + // a data pointer to guard against invalid types! + let padding = padding::(); + + // Although we ensure the data array is aligned when we allocate, + // we can't do that with the empty singleton. So when it might not + // be properly aligned, we substitute in the NonNull::dangling + // which *is* aligned. + // + // To minimize dynamic branches on `cap` for all accesses + // to the data, we include this guard which should only involve + // compile-time constants. Ideally this should result in the branch + // only be included for types with excessive alignment. + let empty_header_is_aligned = if cfg!(feature = "gecko-ffi") { + // in gecko-ffi mode `padding` will ensure this under + // the assumption that the header has size 8 and the + // static empty singleton is aligned to 8. + true + } else { + // In non-gecko-ffi mode, the empty singleton is just + // naturally aligned to the Header. If the Header is at + // least as aligned as T *and* the padding would have + // been 0, then one-past-the-end of the empty singleton + // *is* a valid data pointer and we can remove the + // `dangling` special case. + mem::align_of::
() >= mem::align_of::() && padding == 0 + }; + unsafe { - if self.header().cap() == 0 { - // The empty header isn't well-aligned, just make an aligned one up + if !empty_header_is_aligned && self.header().cap() == 0 { NonNull::dangling().as_ptr() } else { - // This could technically result in overflow, but padding would have to be absurdly large for this to occur. - let padding = padding::(); + // This could technically result in overflow, but padding + // would have to be absurdly large for this to occur. let header_size = mem::size_of::
(); let ptr = self.ptr.as_ptr() as *mut u8; ptr.add(header_size + padding) as *mut T @@ -1368,6 +1410,28 @@ mod tests { ThinVec::::new(); } + #[test] + fn test_data_ptr_alignment() { + let v = ThinVec::::new(); + assert!(v.data_raw() as usize % 2 == 0); + + let v = ThinVec::::new(); + assert!(v.data_raw() as usize % 4 == 0); + + let v = ThinVec::::new(); + assert!(v.data_raw() as usize % 8 == 0); + } + + #[test] + #[cfg_attr(feature = "gecko-ffi", should_panic)] + fn test_overaligned_type_is_rejected_for_gecko_ffi_mode() { + #[repr(align(16))] + struct Align16(u8); + + let v = ThinVec::::new(); + assert!(v.data_raw() as usize % 16 == 0); + } + #[test] fn test_partial_eq() { assert_eq!(thin_vec![0], thin_vec![0]); From a3df169c63d43f923c2de1cb0878b9370e44aa32 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Mon, 14 Feb 2022 20:11:55 -0500 Subject: [PATCH 3/4] Duplicate the padding check in constructors Only asserting alignment in data_raw is a bit too late, because the check will occur for an always-empty ThinVec in the destructor, which will cause a double-panic and a SIGILL. Duplicating it in the constructors prevents creating an invalid ThinVec in the first place. The check in data_raw is retained as a prevention against using an invalid ThinVec through FFI. --- src/lib.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 5d83e13..ca85f3b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -438,17 +438,24 @@ macro_rules! thin_vec { impl ThinVec { pub fn new() -> ThinVec { - unsafe { - ThinVec { - ptr: NonNull::new_unchecked(&EMPTY_HEADER as *const Header as *mut Header), - boo: PhantomData, - } - } + ThinVec::with_capacity(0) } pub fn with_capacity(cap: usize) -> ThinVec { + // `padding` contains ~static assertions against types that are + // incompatible with the current feature flags. We also call it to + // invoke these assertions when getting a pointer to the `ThinVec` + // contents, but since we also get a pointer to the contents in the + // `Drop` impl, trippng an assertion along that code path causes a + // double panic. We duplicate the assertion here so that it is + // testable, + let _ = padding::(); + if cap == 0 { - ThinVec::new() + ThinVec { + ptr: NonNull::new_unchecked(&EMPTY_HEADER as *const Header as *mut Header), + boo: PhantomData, + } } else { ThinVec { ptr: header_with_capacity::(cap), From 3b4eb987bd2e9fb4ca8521e3ad3e42b3d3739c64 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Mon, 14 Feb 2022 21:03:37 -0500 Subject: [PATCH 4/4] Fix unsafe block --- src/lib.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ca85f3b..f1d10c6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -452,9 +452,11 @@ impl ThinVec { let _ = padding::(); if cap == 0 { - ThinVec { - ptr: NonNull::new_unchecked(&EMPTY_HEADER as *const Header as *mut Header), - boo: PhantomData, + unsafe { + ThinVec { + ptr: NonNull::new_unchecked(&EMPTY_HEADER as *const Header as *mut Header), + boo: PhantomData, + } } } else { ThinVec {