From 5b99ac91a762bb36c1e49f4448a7c64258c6d6b1 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Thu, 16 Sep 2021 17:24:29 -0700 Subject: [PATCH 01/18] Add SliceComponents::from_bytes_unchecked --- utils/zerovec/src/varzerovec/components.rs | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 94f0f01cacf..88b926c2e66 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -87,6 +87,39 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { Ok(components) } + /// Construct a [`SliceComponents`] from a byte slice that has previously + /// successfully returned a [`SliceComponents`] when passed to + /// [`SliceComponents::parse_byte_slice()`]. Will return the same + /// object as one would get from calling [`SliceComponents::parse_byte_slice()`]. + /// + /// # Safety + /// The bytes must have previously successfully run through + /// [`SliceComponents::parse_byte_slice()`] + pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self { + if slice.is_empty() { + return SliceComponents { + indices: &[], + things: &[], + entire_slice: slice, + marker: PhantomData, + }; + } + let len_bytes = slice.get_unchecked(0..4); + let len_ule = PlainOldULE::<4>::from_byte_slice_unchecked(len_bytes); + + let len = u32::from_unaligned(len_ule.get_unchecked(0)) as usize; + let indices_bytes = slice.get_unchecked(4..4 * len + 4); + let indices = PlainOldULE::<4>::from_byte_slice_unchecked(indices_bytes); + let things = slice.get_unchecked(4 * len + 4..); + + SliceComponents { + indices, + things, + entire_slice: slice, + marker: PhantomData, + } + } + #[inline] pub fn len(self) -> usize { self.indices.len() From b5ff3c9ebcd62d1c57130665dfc8fdbecf84097e Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Thu, 16 Sep 2021 17:47:23 -0700 Subject: [PATCH 02/18] Add VZVULE --- utils/zerovec/src/lib.rs | 2 +- utils/zerovec/src/varzerovec/mod.rs | 7 ++- utils/zerovec/src/varzerovec/ule.rs | 86 +++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 3 deletions(-) create mode 100644 utils/zerovec/src/varzerovec/ule.rs diff --git a/utils/zerovec/src/lib.rs b/utils/zerovec/src/lib.rs index baca535dfe4..53c0b3fc839 100644 --- a/utils/zerovec/src/lib.rs +++ b/utils/zerovec/src/lib.rs @@ -81,7 +81,7 @@ pub mod map; #[cfg(test)] pub mod samples; pub mod ule; -mod varzerovec; +pub mod varzerovec; mod zerovec; #[cfg(feature = "yoke")] diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 4679ae3e356..de6c910f80d 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -8,9 +8,12 @@ use either::Either; use std::fmt::{self, Display}; use std::ops::Index; -mod components; +pub(crate) mod components; #[cfg(feature = "serde")] mod serde; +mod ule; + +pub use ule::VarZeroVecULE; /// A zero-copy vector for variable-width types. /// @@ -156,7 +159,7 @@ where } } -type ParseErrorFor = VarZeroVecError<<::VarULE as VarULE>::Error>; +pub type ParseErrorFor = VarZeroVecError<<::VarULE as VarULE>::Error>; impl From for VarZeroVecError { fn from(e: E) -> Self { diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs new file mode 100644 index 00000000000..7a18fcd03d8 --- /dev/null +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -0,0 +1,86 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::components::SliceComponents; +use super::*; +use core::marker::PhantomData; +use core::mem; + +// safety invariant: The slice MUST be one which parses to +// a valid SliceComponents +#[repr(transparent)] +pub struct VarZeroVecULE { + marker: PhantomData<[T]>, + /// The original slice this was constructed from + entire_slice: [u8], +} + +impl VarZeroVecULE { + fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { + unsafe { + // safety: VarZeroVecULE is guaranteed to parse here + SliceComponents::from_bytes_unchecked(&self.entire_slice) + } + } + + /// Get the number of elements in this vector + pub fn len(&self) -> usize { + self.get_components().len() + } + + /// Returns `true` if the vector contains no elements. + pub fn is_empty(&self) -> bool { + self.get_components().is_empty() + } + + /// Obtain an iterator over VarZeroVecULE's elements + pub fn iter<'b>(&'b self) -> impl Iterator { + self.get_components().iter() + } + + /// Get one of VarZeroVecULE's elements, returning None if the index is out of bounds + pub fn get(&self, idx: usize) -> Option<&T::VarULE> { + self.get_components().get(idx) + } + + /// Get this [`VarZeroVecULE`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroVecULE`], + /// it is more efficient to perform this conversion first + pub fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T> { + VarZeroVec(VarZeroVecInner::Borrowed(self.get_components())) + } +} + +impl VarZeroVecULE +where + T: AsVarULE, + T::VarULE: Ord, +{ + /// Binary searches a sorted `VarZeroVecULE` for the given element. For more information, see + /// the primitive function [`binary_search`]. + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search(&self, x: &T::VarULE) -> Result { + self.get_components().binary_search(x) + } +} +unsafe impl VarULE for VarZeroVecULE { + type Error = ParseErrorFor; + + fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, Self::Error> { + let _: SliceComponents = SliceComponents::try_from_bytes(bytes)?; + unsafe { Ok(Self::from_byte_slice_unchecked(bytes)) } + } + + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + fn as_byte_slice(&self) -> &[u8] { + &self.entire_slice + } +} From 9d31cda8df4280744f42a147db8c4892bc96cdc3 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Thu, 16 Sep 2021 18:00:21 -0700 Subject: [PATCH 03/18] Impl AsVarULE for VarZeroVec --- utils/zerovec/src/varzerovec/components.rs | 1 - utils/zerovec/src/varzerovec/mod.rs | 1 - utils/zerovec/src/varzerovec/ule.rs | 22 ++++++++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 88b926c2e66..ca28da811e4 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -131,7 +131,6 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { } #[inline] - #[cfg(feature = "serde")] pub fn entire_slice(self) -> &'a [u8] { self.entire_slice } diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index de6c910f80d..0696dc7bf85 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -412,7 +412,6 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { } /// If this is borrowed, get the borrowed slice - #[cfg(feature = "serde")] pub(crate) fn get_slice_for_borrowed(&self) -> Option<&'a [u8]> { match self.0 { VarZeroVecInner::Owned(..) => None, diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs index 7a18fcd03d8..f2e9163e4a1 100644 --- a/utils/zerovec/src/varzerovec/ule.rs +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -84,3 +84,25 @@ unsafe impl VarULE for VarZeroVecULE { &self.entire_slice } } + +impl AsVarULE for VarZeroVec<'static, T> +where + T: AsVarULE, + T: Clone, +{ + type VarULE = VarZeroVecULE; + #[inline] + fn as_unaligned(&self) -> &VarZeroVecULE { + let slice = self + .get_slice_for_borrowed() + .expect("It should not be possible for borrowed VarZeroVecs to contain owned VarZeroVecs"); + unsafe { + // safety: the slice is known to come from a valid parsed VZV + VarZeroVecULE::from_byte_slice_unchecked(slice) + } + } + #[inline] + fn from_unaligned(unaligned: &VarZeroVecULE) -> Self { + unaligned.as_varzerovec().into_owned() + } +} From 900157a1a7916fe82f2bb4e0abb26f71bf8ca853 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Fri, 17 Sep 2021 17:37:29 -0700 Subject: [PATCH 04/18] Add basic readonly VarZeroVecOwned --- utils/zerovec/src/varzerovec/components.rs | 7 ++ utils/zerovec/src/varzerovec/mod.rs | 3 +- utils/zerovec/src/varzerovec/owned.rs | 79 ++++++++++++++++++++++ utils/zerovec/src/varzerovec/ule.rs | 6 +- 4 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 utils/zerovec/src/varzerovec/owned.rs diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index ca28da811e4..b826d6b5e3d 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -224,6 +224,13 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { .chain(last) .map(|s| unsafe { T::VarULE::from_byte_slice_unchecked(s) }) } + + pub fn to_vec(&self) -> Vec + where + T: Clone, + { + self.iter().map(T::from_unaligned).collect() + } } impl<'a, T> SliceComponents<'a, T> diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 0696dc7bf85..73d98839697 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -9,6 +9,7 @@ use std::fmt::{self, Display}; use std::ops::Index; pub(crate) mod components; +mod owned; #[cfg(feature = "serde")] mod serde; mod ule; @@ -359,7 +360,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { match self.0 { VarZeroVecInner::Owned(ref mut vec) => vec, VarZeroVecInner::Borrowed(components) => { - let vec = components.iter().map(T::from_unaligned).collect(); + let vec = components.to_vec(); let new_self = VarZeroVecInner::Owned(vec).into(); *self = new_self; // recursion is limited since we are guaranteed to hit the Owned branch diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs new file mode 100644 index 00000000000..34036004878 --- /dev/null +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -0,0 +1,79 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use core::marker::PhantomData; + +pub struct VarZeroVecOwned { + marker: PhantomData<[T]>, + // safety invariant: must parse into a valid SliceComponents + entire_slice: Vec, +} + +impl VarZeroVecOwned { + /// Construct a VarZeroVecOwned from a list of elements + pub fn new(elements: &[T]) -> Option { + Some(Self { + marker: PhantomData, + entire_slice: components::get_serializable_bytes(elements)?, + }) + } + + fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { + unsafe { + // safety: VarZeroVecOwned is guaranteed to parse here + SliceComponents::from_bytes_unchecked(&self.entire_slice) + } + } + + /// Get the number of elements in this vector + pub fn len(&self) -> usize { + self.get_components().len() + } + + /// Returns `true` if the vector contains no elements. + pub fn is_empty(&self) -> bool { + self.get_components().is_empty() + } + + /// Obtain an iterator over VarZeroVecOwned's elements + pub fn iter<'b>(&'b self) -> impl Iterator { + self.get_components().iter() + } + + /// Get one of VarZeroVecOwned's elements, returning None if the index is out of bounds + pub fn get(&self, idx: usize) -> Option<&T::VarULE> { + self.get_components().get(idx) + } + + /// Get this [`VarZeroVecOwned`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroVecOwned`], + /// it is more efficient to perform this conversion first + pub fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T> { + VarZeroVec(VarZeroVecInner::Borrowed(self.get_components())) + } + + pub fn to_vec(&self) -> Vec + where + T: Clone, + { + self.get_components().to_vec() + } +} + +impl VarZeroVecOwned +where + T: AsVarULE, + T::VarULE: Ord, +{ + /// Binary searches a sorted `VarZeroVecOwned` for the given element. For more information, see + /// the primitive function [`binary_search`]. + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search(&self, x: &T::VarULE) -> Result { + self.get_components().binary_search(x) + } +} diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs index f2e9163e4a1..64c8db8710a 100644 --- a/utils/zerovec/src/varzerovec/ule.rs +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -93,9 +93,9 @@ where type VarULE = VarZeroVecULE; #[inline] fn as_unaligned(&self) -> &VarZeroVecULE { - let slice = self - .get_slice_for_borrowed() - .expect("It should not be possible for borrowed VarZeroVecs to contain owned VarZeroVecs"); + let slice = self.get_slice_for_borrowed().expect( + "It should not be possible for borrowed VarZeroVecs to contain owned VarZeroVecs", + ); unsafe { // safety: the slice is known to come from a valid parsed VZV VarZeroVecULE::from_byte_slice_unchecked(slice) From 4aadbf37735632baf39bde6d5dc5ecfabddb9e82 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Mon, 20 Sep 2021 17:00:55 -0700 Subject: [PATCH 05/18] Add mutation ops --- utils/zerovec/src/ule/plain.rs | 13 ++ utils/zerovec/src/varzerovec/components.rs | 10 + utils/zerovec/src/varzerovec/owned.rs | 227 ++++++++++++++++++++- 3 files changed, 245 insertions(+), 5 deletions(-) diff --git a/utils/zerovec/src/ule/plain.rs b/utils/zerovec/src/ule/plain.rs index 6414e3c1294..477c87ae3d9 100644 --- a/utils/zerovec/src/ule/plain.rs +++ b/utils/zerovec/src/ule/plain.rs @@ -50,6 +50,19 @@ macro_rules! impl_byte_slice_size { unsafe { std::slice::from_raw_parts(data as *const u8, len) } } } + + impl PlainOldULE<$size> { + #[inline] + /// This has the same invariants as from_byte_slice_unchecked: + /// this must be called on slices that would successfully work through + /// parse_byte_slice() + pub unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { + let data = bytes.as_mut_ptr(); + let len = bytes.len() / $size; + // Safe because Self is transparent over [u8; $size] + std::slice::from_raw_parts_mut(data as *mut Self, len) + } + } }; } diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index b826d6b5e3d..6e4d17a2eba 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -231,6 +231,16 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { { self.iter().map(T::from_unaligned).collect() } + + // Dump a debuggable representation of this type + pub(crate) fn dump(&self) -> String { + let indices = self + .indices + .iter() + .map(u32::from_unaligned) + .collect::>(); + format!("SliceComponents {{ indices: {:?} }}", indices) + } } impl<'a, T> SliceComponents<'a, T> diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs index 34036004878..1047c93135e 100644 --- a/utils/zerovec/src/varzerovec/owned.rs +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -3,7 +3,11 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use super::*; +use core::fmt; use core::marker::PhantomData; +use core::ptr; +use core::slice; +use core::mem; pub struct VarZeroVecOwned { marker: PhantomData<[T]>, @@ -13,11 +17,21 @@ pub struct VarZeroVecOwned { impl VarZeroVecOwned { /// Construct a VarZeroVecOwned from a list of elements - pub fn new(elements: &[T]) -> Option { - Some(Self { + pub fn new(elements: &[T]) -> Self { + Self { marker: PhantomData, - entire_slice: components::get_serializable_bytes(elements)?, - }) + entire_slice: components::get_serializable_bytes(elements).unwrap(), + } + } + + /// Try to allocate a buffer with enough capacity for `capacity` + /// elements. Since `T` can take up an arbitrary size this will + /// just allocate enough space for 4-byte Ts + pub fn with_capacity(capacity: usize) -> Self { + Self { + marker: PhantomData, + entire_slice: Vec::with_capacity(capacity * 8 + 1), + } } fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { @@ -55,12 +69,156 @@ impl VarZeroVecOwned { VarZeroVec(VarZeroVecInner::Borrowed(self.get_components())) } + /// Empty the vector + pub fn clear(&mut self) { + self.entire_slice.clear() + } + pub fn to_vec(&self) -> Vec where T: Clone, { self.get_components().to_vec() } + + /// Insert an element at index `idx` + pub fn insert(&mut self, index: usize, element: &T) { + let len = self.len(); + if index > len { + panic!( + "Called out-of-bounds insert() on VarZeroVec, index {} len {}", + index, len + ); + } + + if len == 0 { + // If there is no data, just construct it with the existing procedure + // for constructing an entire slice + self.entire_slice = + components::get_serializable_bytes(slice::from_ref(element)).unwrap(); + return; + } + + // The format of the encoded data is: + // - four bytes of "len" + // - len*4 bytes for an array of indices + // - the actual data to which the indices point + // + // When inserting an element, space must be made in the `data` + // segment for the element, and space must be made in the `indices` segment + // for the new index. Note that making space in the indices segment + // means that the data segment needs to be shifted by 4 bytes, on top + // of any reshuffling that needs to be done to make space + // + // We do this in the following steps: + // 1. Shift the data around + // 1a. We move any data *after* the inserted element by the length of the element + 4 + // to make space for the element and one extra index + // 1b. We insert the element at four bytes after the spot where the data we just moved + // started, again making space for that extra index + // 1c. We move the data from *before* the inserted element by 4 bytes to make space for the + // extra index + // 2. Update the indices: Shift indices after the inserted element by one slot, incrementing them + // by the length of the inserted element + // 3. Update the length + + let element = element.as_unaligned(); + let element_slice = element.as_byte_slice(); + // the amount the vector is growing by + let shift = 4 + element_slice.len(); + self.entire_slice.reserve(shift); + unsafe { + // Step 1: Shift the data around + { + let (indices, data) = self.entire_slice.split_at_mut(4 + 4 * len); + + let (_len, indices) = indices.split_at_mut(4); + // safety: the indices at [4...4 + len * 4] form a slice of PlainOldULE<4> + let indices = PlainOldULE::<4>::from_byte_slice_unchecked(indices); + + // Step 1a: Move the data after the inserted element by the length of + // the element + 4 to make space for the element and one extra index + + // calculate the data insertion point as an index into `data` + let insertion_point = if index != len { + // Assuming we're not pushing onto the end, we need to shift the tail-end elements + // by `shift` to make room for one extra index and `element_slice` data + let shift_start = u32::from_unaligned(&indices[index]) as usize; + let shift_start_p = data.as_mut_ptr().add(shift_start); + // shift elements from shift_start_p to the end of the slice by `shift` elements + ptr::copy( + shift_start_p, + shift_start_p.offset(shift as isize), + data.len() - shift_start, + ); + shift_start + 4 + } else { + // If we're inserting to the end of the vector, we just need to insert at + // the length + 4 (space for one index) + data.len() + 4 + }; + + let data_p = data.as_mut_ptr(); + + // Step 1b: insert the new element + ptr::copy( + element_slice.as_ptr(), + data_p.offset(insertion_point as isize), + element_slice.len(), + ); + + // Step 1c: shift the remaining elements to make room for the new index + if insertion_point != 0 { + ptr::copy(data_p, data_p.offset(4), insertion_point - 4); + } + } + self.entire_slice.set_len(self.entire_slice.len() + shift); + // Step 2: Shift indices after the inserted element by one slot, incrementing them + // by the length of the inserted element + { + let len = len + 1; + let (indices, data) = self.entire_slice.split_at_mut(4 + 4 * len); + let (len_ule, indices) = indices.split_at_mut(4); + // safety: the indices at [4...4 + len * 4] form a slice of PlainOldULE<4> + let indices = PlainOldULE::<4>::from_byte_slice_unchecked_mut(indices); + // safety: the starting index is a single PlainOldULE<4> + let len_ule = PlainOldULE::<4>::from_byte_slice_unchecked_mut(len_ule); + + let element_len = element_slice.len() as u32; + if index + 1 == len { + // appending to the end is weird, because there's no end-index for us to copy; + // the end-index is simply the old length + indices[index] = (data.len() as u32 - element_len).into(); + } else { + let mut next = u32::from_unaligned(&indices[index]); + for idx in &mut indices[index + 1..] { + let incremented: u32 = next + element_len; + next = u32::from_unaligned(idx); + *idx = incremented.into(); + } + } + + // Step 3: update the length + len_ule[0] = (u32::from_unaligned(&len_ule[0]) + 1).into() + } + } + } + + pub fn remove(&mut self, index: usize) -> T where T: Clone{ + // TODO (Manishearth) make these faster + let mut vec = self.to_vec(); + let ret = vec.remove(index); + *self = Self::new(&vec); + ret + } + + pub fn replace(&mut self, index: usize, value: T) -> T where T: Clone { + // TODO (Manishearth) make these faster + let mut vec = self.to_vec(); + let ret = mem::replace(&mut vec[index], value); + *self = Self::new(&vec); + ret + } } impl VarZeroVecOwned @@ -68,7 +226,7 @@ where T: AsVarULE, T::VarULE: Ord, { - /// Binary searches a sorted `VarZeroVecOwned` for the given element. For more information, see + /// Binary searches a sorted `VarZeroVecOwned` for the given element. FoGeneralr more information, see /// the primitive function [`binary_search`]. /// /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search @@ -77,3 +235,62 @@ where self.get_components().binary_search(x) } } + +impl PartialEq<&[T]> for VarZeroVecOwned +where + T: AsVarULE, + T::VarULE: PartialEq, +{ + #[inline] + fn eq(&self, other: &&[T]) -> bool { + self.iter().eq(other.iter().map(|t| t.as_unaligned())) + } +} + +impl Index for VarZeroVecOwned { + type Output = T::VarULE; + fn index(&self, index: usize) -> &Self::Output { + self.get(index).expect("Indexing VarZeroVec out of bounds") + } +} + +impl fmt::Debug for VarZeroVecOwned +where + T::VarULE: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +#[cfg(test)] +mod test { + use super::VarZeroVecOwned; + #[test] + fn test_insert_integrity() { + let mut items: Vec = vec![ + "foo".into(), + "bar".into(), + "baz".into(), + "abcdefghijklmn".into(), + "five".into(), + ]; + let mut zerovec = VarZeroVecOwned::new(&items); + zerovec.insert(1, &"foo3".into()); + items.insert(1, "foo3".into()); + assert_eq!(zerovec, &*items); + + items.insert(0, "1234567890".into()); + zerovec.insert(0, &"1234567890".into()); + assert_eq!(zerovec, &*items); + + // make sure inserting at the end works + items.insert(items.len(), "qwertyuiop".into()); + zerovec.insert(zerovec.len(), &"qwertyuiop".into()); + assert_eq!(zerovec, &*items); + + items.insert(0, "asdfghjkl;".into()); + zerovec.insert(0, &"asdfghjkl;".into()); + assert_eq!(zerovec, &*items); + } +} From b78f3c5194dba690083a9d11b3c57e4690f7b6f7 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 16:37:29 -0700 Subject: [PATCH 06/18] Use VZVOwned in VZV --- Cargo.lock | 1 - utils/zerovec/Cargo.toml | 1 - utils/zerovec/src/lib.rs | 2 +- utils/zerovec/src/map/vecs.rs | 12 ++-- utils/zerovec/src/varzerovec/mod.rs | 92 ++++++++++++--------------- utils/zerovec/src/varzerovec/owned.rs | 49 +++++++++++--- utils/zerovec/src/varzerovec/serde.rs | 18 ++---- utils/zerovec/src/varzerovec/ule.rs | 4 +- 8 files changed, 95 insertions(+), 84 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 504f6f3e24b..9401c17158b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2925,7 +2925,6 @@ version = "0.3.0" dependencies = [ "bincode", "criterion", - "either", "getrandom 0.2.2", "iai", "icu_benchmark_macros", diff --git a/utils/zerovec/Cargo.toml b/utils/zerovec/Cargo.toml index d6969aeecbb..c9c7318ad8e 100644 --- a/utils/zerovec/Cargo.toml +++ b/utils/zerovec/Cargo.toml @@ -24,7 +24,6 @@ include = [ all-features = true [dependencies] -either = "1.6.1" serde = { version = "1.0", optional = true , default-features = false, features = ["alloc"] } yoke = { path = "../yoke", version = "0.2.0", optional = true } diff --git a/utils/zerovec/src/lib.rs b/utils/zerovec/src/lib.rs index 53c0b3fc839..2134b710696 100644 --- a/utils/zerovec/src/lib.rs +++ b/utils/zerovec/src/lib.rs @@ -63,7 +63,7 @@ //! //! let data = DataStruct { //! nums: ZeroVec::from_slice(&[211, 281, 421, 461]), -//! strs: VarZeroVec::from(vec!["hello".to_string(), "world".to_string()]), +//! strs: VarZeroVec::from(&["hello".to_string(), "world".to_string()] as &[_]), //! }; //! let bincode_bytes = bincode::serialize(&data) //! .expect("Serialization should be successful"); diff --git a/utils/zerovec/src/map/vecs.rs b/utils/zerovec/src/map/vecs.rs index 922e7392686..c702022a46f 100644 --- a/utils/zerovec/src/map/vecs.rs +++ b/utils/zerovec/src/map/vecs.rs @@ -3,6 +3,7 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::*; +use crate::varzerovec::owned::VarZeroVecOwned; use crate::VarZeroVec; use crate::ZeroVec; use std::cmp::Ordering; @@ -108,26 +109,27 @@ where self.get(index) } fn insert(&mut self, index: usize, value: T) { - self.make_mut().insert(index, value) + self.make_mut().insert(index, &value) } fn remove(&mut self, index: usize) -> T { self.make_mut().remove(index) } fn replace(&mut self, index: usize, value: T) -> T { let vec = self.make_mut(); - mem::replace(&mut vec[index], value) + vec.replace(index, value) } fn push(&mut self, value: T) { - self.make_mut().push(value) + let len = self.len(); + self.make_mut().insert(len, &value) } fn len(&self) -> usize { self.len() } fn new() -> Self { - Vec::new().into() + VarZeroVecOwned::new().into() } fn with_capacity(cap: usize) -> Self { - Vec::with_capacity(cap).into() + VarZeroVecOwned::with_capacity(cap).into() } fn clear(&mut self) { self.make_mut().clear() diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 73d98839697..151dfd010ae 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -4,16 +4,16 @@ use crate::ule::*; use components::SliceComponents; -use either::Either; use std::fmt::{self, Display}; use std::ops::Index; pub(crate) mod components; -mod owned; +pub(crate) mod owned; #[cfg(feature = "serde")] mod serde; mod ule; +use owned::VarZeroVecOwned; pub use ule::VarZeroVecULE; /// A zero-copy vector for variable-width types. @@ -128,7 +128,7 @@ pub struct VarZeroVec<'a, T>(VarZeroVecInner<'a, T>); /// The actual implementation details of this can be found in the `components` module #[derive(Clone)] enum VarZeroVecInner<'a, T> { - Owned(Vec), + Owned(VarZeroVecOwned), /// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into /// the buffer components. Logically this is capable of behaving as /// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually @@ -168,21 +168,27 @@ impl From for VarZeroVecError { } } -impl<'a, T> From> for VarZeroVec<'a, T> { +impl<'a, T> From> for VarZeroVec<'a, T> { #[inline] - fn from(other: Vec) -> Self { - Self(VarZeroVecInner::Owned(other)) + fn from(other: VarZeroVecInner<'a, T>) -> Self { + Self(other) } } -impl<'a, T> From> for VarZeroVec<'a, T> { +impl<'a, T> From> for VarZeroVec<'a, T> { #[inline] - fn from(other: VarZeroVecInner<'a, T>) -> Self { - Self(other) + fn from(other: VarZeroVecOwned) -> Self { + VarZeroVecInner::Owned(other).into() } } impl<'a, T: AsVarULE> VarZeroVec<'a, T> { + fn get_components<'b>(&'b self) -> SliceComponents<'b, T> { + match self.0 { + VarZeroVecInner::Owned(ref vec) => vec.get_components(), + VarZeroVecInner::Borrowed(components) => components, + } + } /// Get the number of elements in this vector /// /// # Example @@ -201,10 +207,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// # Ok::<(), VarZeroVecError>(()) /// ``` pub fn len(&self) -> usize { - match self.0 { - VarZeroVecInner::Owned(ref vec) => vec.len(), - VarZeroVecInner::Borrowed(components) => components.len(), - } + self.get_components().len() } /// Returns `true` if the vector contains no elements. @@ -224,10 +227,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// # Ok::<(), VarZeroVecError>(()) /// ``` pub fn is_empty(&self) -> bool { - match self.0 { - VarZeroVecInner::Owned(ref vec) => vec.is_empty(), - VarZeroVecInner::Borrowed(components) => components.is_empty(), - } + self.get_components().is_empty() } /// Parse a VarZeroVec from a slice of the appropriate format @@ -255,7 +255,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { pub fn try_from_bytes(slice: &'a [u8]) -> Result> { if slice.is_empty() { // does not allocate - return Ok(VarZeroVecInner::Owned(Vec::new()).into()); + return Ok(VarZeroVecInner::Owned(VarZeroVecOwned::new()).into()); } let components = SliceComponents::::try_from_bytes(slice)?; @@ -285,14 +285,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// # Ok::<(), VarZeroVecError>(()) /// ``` pub fn iter<'b: 'a>(&'b self) -> impl Iterator { - // We use Either here so that we can use `impl Trait` with heterogeneous types - // - // An alternate design is to write explicit iterators. Once Rust has generators this will - // be unnecessary. - match self.0 { - VarZeroVecInner::Owned(ref vec) => Either::Left(vec.iter().map(|t| t.as_unaligned())), - VarZeroVecInner::Borrowed(components) => Either::Right(components.iter()), - } + self.get_components().iter() } /// Get one of VarZeroVec's elements, returning None if the index is out of bounds @@ -318,10 +311,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// # Ok::<(), VarZeroVecError>(()) /// ``` pub fn get(&self, idx: usize) -> Option<&T::VarULE> { - match self.0 { - VarZeroVecInner::Owned(ref vec) => vec.get(idx).map(|t| t.as_unaligned()), - VarZeroVecInner::Borrowed(components) => components.get(idx), - } + self.get_components().get(idx) } /// Convert this into a mutable vector of the owned `T` type, cloning if necessary. @@ -353,15 +343,14 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { // // This function is crate-public for now since we don't yet want to stabilize // the internal implementation details - pub(crate) fn make_mut(&mut self) -> &mut Vec + pub(crate) fn make_mut(&mut self) -> &mut VarZeroVecOwned where T: Clone, { match self.0 { VarZeroVecInner::Owned(ref mut vec) => vec, VarZeroVecInner::Borrowed(components) => { - let vec = components.to_vec(); - let new_self = VarZeroVecInner::Owned(vec).into(); + let new_self = VarZeroVecOwned::from_components(components).into(); *self = new_self; // recursion is limited since we are guaranteed to hit the Owned branch self.make_mut() @@ -404,20 +393,12 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { where T: Clone, { - let mut new = self.clone(); - new.make_mut(); - match new.0 { - VarZeroVecInner::Owned(vec) => vec, - _ => unreachable!(), - } + self.get_components().to_vec() } /// If this is borrowed, get the borrowed slice - pub(crate) fn get_slice_for_borrowed(&self) -> Option<&'a [u8]> { - match self.0 { - VarZeroVecInner::Owned(..) => None, - VarZeroVecInner::Borrowed(b) => Some(b.entire_slice()), - } + pub(crate) fn get_encoded_slice(&self) -> &[u8] { + self.get_components().entire_slice() } /// For a slice of `T`, get a list of bytes that can be passed to @@ -445,6 +426,13 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { pub fn get_serializable_bytes(elements: &[T]) -> Option> { components::get_serializable_bytes(elements) } + + pub(crate) fn is_owned(&self) -> bool { + match self.0 { + VarZeroVecInner::Owned(..) => true, + VarZeroVecInner::Borrowed(..) => false, + } + } } impl<'a, T> VarZeroVec<'a, T> @@ -475,12 +463,7 @@ where /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search #[inline] pub fn binary_search(&self, x: &T::VarULE) -> Result { - match self.0 { - VarZeroVecInner::Owned(ref vec) => { - vec.binary_search_by(|probe| probe.as_unaligned().cmp(x)) - } - VarZeroVecInner::Borrowed(components) => components.binary_search(x), - } + self.get_components().binary_search(x) } } @@ -491,6 +474,15 @@ impl<'a, T: AsVarULE> Index for VarZeroVec<'a, T> { } } +impl<'a, T> From<&'_ [T]> for VarZeroVec<'a, T> +where + T: AsVarULE, +{ + fn from(other: &'_ [T]) -> Self { + VarZeroVecOwned::from_elements(other).into() + } +} + impl<'a, 'b, T> PartialEq> for VarZeroVec<'a, T> where T: AsVarULE, diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs index 1047c93135e..2a3e9f00b93 100644 --- a/utils/zerovec/src/varzerovec/owned.rs +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -5,10 +5,11 @@ use super::*; use core::fmt; use core::marker::PhantomData; +use core::mem; use core::ptr; use core::slice; -use core::mem; +#[derive(Clone)] pub struct VarZeroVecOwned { marker: PhantomData<[T]>, // safety invariant: must parse into a valid SliceComponents @@ -16,8 +17,23 @@ pub struct VarZeroVecOwned { } impl VarZeroVecOwned { + /// Construct an empty VarZeroVecOwned + pub fn new() -> Self { + Self { + marker: PhantomData, + entire_slice: Vec::new(), + } + } + + pub fn from_components(components: SliceComponents) -> Self { + Self { + marker: PhantomData, + entire_slice: components.entire_slice().into(), + } + } + /// Construct a VarZeroVecOwned from a list of elements - pub fn new(elements: &[T]) -> Self { + pub fn from_elements(elements: &[T]) -> Self { Self { marker: PhantomData, entire_slice: components::get_serializable_bytes(elements).unwrap(), @@ -27,14 +43,21 @@ impl VarZeroVecOwned { /// Try to allocate a buffer with enough capacity for `capacity` /// elements. Since `T` can take up an arbitrary size this will /// just allocate enough space for 4-byte Ts - pub fn with_capacity(capacity: usize) -> Self { + pub(crate) fn with_capacity(capacity: usize) -> Self { Self { marker: PhantomData, - entire_slice: Vec::with_capacity(capacity * 8 + 1), + entire_slice: Vec::with_capacity(capacity * 8), } } - fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { + /// Try to reserve space for `capacity` + /// elements. Since `T` can take up an arbitrary size this will + /// just allocate enough space for 4-byte Ts + pub(crate) fn reserve(&mut self, capacity: usize) { + self.entire_slice.reserve(capacity * 8) + } + + pub(crate) fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { unsafe { // safety: VarZeroVecOwned is guaranteed to parse here SliceComponents::from_bytes_unchecked(&self.entire_slice) @@ -204,19 +227,25 @@ impl VarZeroVecOwned { } } - pub fn remove(&mut self, index: usize) -> T where T: Clone{ + pub fn remove(&mut self, index: usize) -> T + where + T: Clone, + { // TODO (Manishearth) make these faster let mut vec = self.to_vec(); let ret = vec.remove(index); - *self = Self::new(&vec); + *self = Self::from_elements(&vec); ret } - pub fn replace(&mut self, index: usize, value: T) -> T where T: Clone { + pub fn replace(&mut self, index: usize, value: T) -> T + where + T: Clone, + { // TODO (Manishearth) make these faster let mut vec = self.to_vec(); let ret = mem::replace(&mut vec[index], value); - *self = Self::new(&vec); + *self = Self::from_elements(&vec); ret } } @@ -275,7 +304,7 @@ mod test { "abcdefghijklmn".into(), "five".into(), ]; - let mut zerovec = VarZeroVecOwned::new(&items); + let mut zerovec = VarZeroVecOwned::from_elements(&items); zerovec.insert(1, &"foo3".into()); items.insert(1, "foo3".into()); assert_eq!(zerovec, &*items); diff --git a/utils/zerovec/src/varzerovec/serde.rs b/utils/zerovec/src/varzerovec/serde.rs index e9d7a8327e7..f82fc04b7c7 100644 --- a/utils/zerovec/src/varzerovec/serde.rs +++ b/utils/zerovec/src/varzerovec/serde.rs @@ -5,7 +5,7 @@ use super::VarZeroVec; use crate::ule::*; use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; -use serde::ser::{self, Serialize, SerializeSeq, Serializer}; +use serde::ser::{Serialize, SerializeSeq, Serializer}; use std::fmt; use std::marker::PhantomData; @@ -51,7 +51,7 @@ where while let Some(value) = seq.next_element::()? { vec.push(value); } - Ok(vec.into()) + Ok((&*vec).into()) } } @@ -95,16 +95,8 @@ where seq.serialize_element(&T::from_unaligned(value))?; } seq.end() - } else if let Some(slice) = self.get_slice_for_borrowed() { - serializer.serialize_bytes(slice) } else { - // This creates an additional Vec allocation to enable code reuse of - // VarZeroVec::to_vec()'s. The alternative is to write a different - // implementation of get_serializable_bytes() which enables us to pull - // out the byte buffer components bit by bit and use serialize_seq + serialize_element - let vec = VarZeroVec::get_serializable_bytes(&self.to_vec()) - .ok_or_else(|| ser::Error::custom("VarZeroVec too large to be serialized"))?; - serializer.serialize_bytes(&vec) + serializer.serialize_bytes(self.get_encoded_slice()) } } } @@ -144,7 +136,7 @@ mod test { let zerovec_new: VarZeroVec = serde_json::from_str(&json_str).expect("deserialize from buffer to VarZeroVec"); assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); - assert!(zerovec_new.get_slice_for_borrowed().is_none()); + assert!(zerovec_new.is_owned()); } #[test] @@ -155,7 +147,7 @@ mod test { let zerovec_new: VarZeroVec = bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroVec"); assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); - assert!(zerovec_new.get_slice_for_borrowed().is_some()); + assert!(!zerovec_new.is_owned()); } #[test] diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs index 64c8db8710a..257870e5f86 100644 --- a/utils/zerovec/src/varzerovec/ule.rs +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -93,9 +93,7 @@ where type VarULE = VarZeroVecULE; #[inline] fn as_unaligned(&self) -> &VarZeroVecULE { - let slice = self.get_slice_for_borrowed().expect( - "It should not be possible for borrowed VarZeroVecs to contain owned VarZeroVecs", - ); + let slice = self.get_encoded_slice(); unsafe { // safety: the slice is known to come from a valid parsed VZV VarZeroVecULE::from_byte_slice_unchecked(slice) From ab5e4f1028eae7702fc41078c96cd8af14f655be Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 17:13:13 -0700 Subject: [PATCH 07/18] Add test --- utils/zerovec/src/varzerovec/mod.rs | 13 +++++-- utils/zerovec/src/varzerovec/ule.rs | 55 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 151dfd010ae..9d5b68eae50 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -102,6 +102,10 @@ pub use ule::VarZeroVecULE; /// # Ok::<(), VarZeroVecError>(()) /// ``` /// +/// +/// [`VarZeroVec`]s can be nested infinitely, see the docs of [`VarZeroVecULE`] +/// for more information. +/// /// [`ule`]: crate::ule #[derive(Clone)] pub struct VarZeroVec<'a, T>(VarZeroVecInner<'a, T>); @@ -232,7 +236,8 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// Parse a VarZeroVec from a slice of the appropriate format /// - /// Slices of the right format can be obtained via VarZeroVec::get_serializable_bytes() + /// Slices of the right format can be obtained via [`VarZeroVec::get_serializable_bytes()`] + /// or [`VarZeroVec::get_encoded_slice()`] /// /// # Example /// @@ -396,8 +401,10 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { self.get_components().to_vec() } - /// If this is borrowed, get the borrowed slice - pub(crate) fn get_encoded_slice(&self) -> &[u8] { + /// Obtain the internal encoded slice + /// + /// This can be passed back to [`Self::try_from_bytes()`] + pub fn get_encoded_slice(&self) -> &[u8] { self.get_components().entire_slice() } diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs index 257870e5f86..8f141144076 100644 --- a/utils/zerovec/src/varzerovec/ule.rs +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -7,6 +7,40 @@ use super::*; use core::marker::PhantomData; use core::mem; +/// A VarULE version of VarZeroVec allowing for VarZeroVecs to be nested indefinitely +/// +/// ```rust +/// use zerovec::VarZeroVec; +/// use zerovec::ZeroVec; +/// use zerovec::varzerovec::VarZeroVecULE; +/// use zerovec::ule::*; +/// let strings_1: Vec = vec!["foo".into(), "bar".into(), "baz".into()]; +/// let strings_2: Vec = vec!["twelve".into(), "seventeen".into(), "forty two".into()]; +/// let strings_3: Vec = vec!["我".into(), "喜歡".into(), "烏龍茶".into()]; +/// let strings_4: Vec = vec!["w".into(), "ω".into(), "文".into(), "𑄃".into()]; +/// let strings_12 = vec![strings_1.clone(), strings_2.clone()]; +/// let strings_34 = vec![strings_3.clone(), strings_4.clone()]; +/// let all_strings = vec![strings_12, strings_34]; +/// +/// let vzv_1 = VarZeroVec::from(&*strings_1); +/// let vzv_2 = VarZeroVec::from(&*strings_2); +/// let vzv_3 = VarZeroVec::from(&*strings_3); +/// let vzv_4 = VarZeroVec::from(&*strings_4); +/// let vzv_12 = VarZeroVec::from(&[vzv_1, vzv_2] as &[_]); +/// let vzv_34 = VarZeroVec::from(&[vzv_3, vzv_4] as &[_]); +/// let vzv_all = VarZeroVec::from(&[vzv_12, vzv_34] as &[_]); +/// +/// let reconstructed = vzv_all.iter() +/// .map(|v: &VarZeroVecULE<_>| { +/// v.iter().map(|x: &VarZeroVecULE<_>| x.as_varzerovec().to_vec()).collect::>() +/// }).collect::>(); +/// assert_eq!(reconstructed, all_strings); +/// +/// let bytes = vzv_all.get_encoded_slice(); +/// let vzv_from_bytes: VarZeroVec>> = VarZeroVec::try_from_bytes(bytes).unwrap(); +/// assert_eq!(vzv_from_bytes, vzv_all); +/// ``` +// // safety invariant: The slice MUST be one which parses to // a valid SliceComponents #[repr(transparent)] @@ -104,3 +138,24 @@ where unaligned.as_varzerovec().into_owned() } } + +impl PartialEq> for VarZeroVecULE +where + T: AsVarULE, + T::VarULE: PartialEq, +{ + #[inline] + fn eq(&self, other: &VarZeroVecULE) -> bool { + // Note: T implements PartialEq but not T::ULE + self.iter().eq(other.iter()) + } +} + +impl fmt::Debug for VarZeroVecULE +where + T::VarULE: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} From a95f241895d8794bc7e9d3876805796cbcb25e3d Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 17:31:57 -0700 Subject: [PATCH 08/18] fix ci --- utils/zerovec/benches/vzv.rs | 2 +- utils/zerovec/src/varzerovec/components.rs | 1 + utils/zerovec/src/varzerovec/mod.rs | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/utils/zerovec/benches/vzv.rs b/utils/zerovec/benches/vzv.rs index bc828adb91b..29ba6aa7561 100644 --- a/utils/zerovec/benches/vzv.rs +++ b/utils/zerovec/benches/vzv.rs @@ -139,7 +139,7 @@ fn serde_benches(c: &mut Criterion) { let seed = 2021; let (string_vec, _) = random_alphanums(2..=20, 100, seed); let bincode_vec = bincode::serialize(&string_vec).unwrap(); - let vzv = VarZeroVec::from(string_vec); + let vzv = VarZeroVec::from(&*string_vec); let bincode_vzv = bincode::serialize(&vzv).unwrap(); // *** Deserialize vec of 100 strings *** diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 6e4d17a2eba..45dc318faa6 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -233,6 +233,7 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { } // Dump a debuggable representation of this type + #[allow(unused)] // useful for debugging pub(crate) fn dump(&self) -> String { let indices = self .indices diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 9d5b68eae50..417e55daf8c 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -434,7 +434,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { components::get_serializable_bytes(elements) } - pub(crate) fn is_owned(&self) -> bool { + pub fn is_owned(&self) -> bool { match self.0 { VarZeroVecInner::Owned(..) => true, VarZeroVecInner::Borrowed(..) => false, From b106f089488689abebf15335dc30c39d92b233bc Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 22:56:14 -0700 Subject: [PATCH 09/18] no unsafe on from_byte_slice_unchecked_mut --- utils/zerovec/src/ule/plain.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/utils/zerovec/src/ule/plain.rs b/utils/zerovec/src/ule/plain.rs index 477c87ae3d9..ba68ac9d137 100644 --- a/utils/zerovec/src/ule/plain.rs +++ b/utils/zerovec/src/ule/plain.rs @@ -53,14 +53,13 @@ macro_rules! impl_byte_slice_size { impl PlainOldULE<$size> { #[inline] - /// This has the same invariants as from_byte_slice_unchecked: - /// this must be called on slices that would successfully work through - /// parse_byte_slice() - pub unsafe fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { + pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { let data = bytes.as_mut_ptr(); let len = bytes.len() / $size; // Safe because Self is transparent over [u8; $size] - std::slice::from_raw_parts_mut(data as *mut Self, len) + unsafe { + std::slice::from_raw_parts_mut(data as *mut Self, len) + } } } }; From 464c4087590e047b4a7896e8d371d20157845b97 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 22:56:41 -0700 Subject: [PATCH 10/18] try_from_bytes -> parse_byte_slice --- utils/zerovec/benches/vzv.rs | 6 +-- utils/zerovec/benches/zerovec.rs | 8 ++-- utils/zerovec/benches/zerovec_iai.rs | 4 +- utils/zerovec/benches/zerovec_serde.rs | 6 +-- utils/zerovec/src/varzerovec/components.rs | 4 +- utils/zerovec/src/varzerovec/mod.rs | 32 ++++++------- utils/zerovec/src/varzerovec/serde.rs | 8 ++-- utils/zerovec/src/varzerovec/ule.rs | 4 +- utils/zerovec/src/zerovec/mod.rs | 54 +++++++++++----------- utils/zerovec/src/zerovec/serde.rs | 2 +- 10 files changed, 64 insertions(+), 64 deletions(-) diff --git a/utils/zerovec/benches/vzv.rs b/utils/zerovec/benches/vzv.rs index 29ba6aa7561..fd5c80e88f7 100644 --- a/utils/zerovec/benches/vzv.rs +++ b/utils/zerovec/benches/vzv.rs @@ -46,7 +46,7 @@ fn overview_bench(c: &mut Criterion) { let seed = 42; let (string_vec, _) = random_alphanums(2..=10, 100, seed); let bytes: Vec = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap(); - let vzv = VarZeroVec::::try_from_bytes(black_box(bytes.as_slice())).unwrap(); + let vzv = VarZeroVec::::parse_byte_slice(black_box(bytes.as_slice())).unwrap(); c.bench_function("vzv/overview", |b| { b.iter(|| { @@ -73,7 +73,7 @@ fn char_count_benches(c: &mut Criterion) { let seed = 2021; let (string_vec, _) = random_alphanums(2..=20, 100, seed); let bytes: Vec = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap(); - let vzv = VarZeroVec::::try_from_bytes(black_box(bytes.as_slice())).unwrap(); + let vzv = VarZeroVec::::parse_byte_slice(black_box(bytes.as_slice())).unwrap(); // *** Count chars in vec of 100 strings *** c.bench_function("vzv/char_count/slice", |b| { @@ -100,7 +100,7 @@ fn binary_search_benches(c: &mut Criterion) { let (string_vec, seed) = random_alphanums(2..=20, 500, seed); let (needles, _) = random_alphanums(2..=20, 10, seed); let bytes: Vec = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap(); - let vzv = VarZeroVec::::try_from_bytes(black_box(bytes.as_slice())).unwrap(); + let vzv = VarZeroVec::::parse_byte_slice(black_box(bytes.as_slice())).unwrap(); let single_needle = "lmnop".to_string(); // *** Binary search vec of 500 strings 10 times *** diff --git a/utils/zerovec/benches/zerovec.rs b/utils/zerovec/benches/zerovec.rs index cff2067053d..fdde6d8c477 100644 --- a/utils/zerovec/benches/zerovec.rs +++ b/utils/zerovec/benches/zerovec.rs @@ -55,13 +55,13 @@ where buffer .0 .extend(ZeroVec::from_slice(vec.as_slice()).as_bytes()); - ZeroVec::::try_from_bytes(&buffer.0[1..]).unwrap() + ZeroVec::::parse_byte_slice(&buffer.0[1..]).unwrap() } fn overview_bench(c: &mut Criterion) { c.bench_function("zerovec/overview", |b| { b.iter(|| { - ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)) + ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)) .unwrap() .iter() .sum::() @@ -83,7 +83,7 @@ fn sum_benches(c: &mut Criterion) { c.bench_function("zerovec/sum/sample/zerovec", |b| { b.iter(|| { - ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)) + ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)) .unwrap() .iter() .sum::() @@ -98,7 +98,7 @@ fn binary_search_benches(c: &mut Criterion) { }); c.bench_function("zerovec/binary_search/sample/zerovec", |b| { - let zerovec = ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap(); + let zerovec = ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(); b.iter(|| zerovec.binary_search(&0x0c0d0c)); }); diff --git a/utils/zerovec/benches/zerovec_iai.rs b/utils/zerovec/benches/zerovec_iai.rs index 711b4089058..455dd464dd4 100644 --- a/utils/zerovec/benches/zerovec_iai.rs +++ b/utils/zerovec/benches/zerovec_iai.rs @@ -15,7 +15,7 @@ fn sum_slice() -> u32 { } fn sum_zerovec() -> u32 { - ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)) + ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)) .unwrap() .iter() .sum::() @@ -26,7 +26,7 @@ fn binarysearch_slice() -> Result { } fn binarysearch_zerovec() -> Result { - ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)) + ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)) .unwrap() .binary_search(&0x0c0d0c) } diff --git a/utils/zerovec/benches/zerovec_serde.rs b/utils/zerovec/benches/zerovec_serde.rs index 9038bc12b25..a22b32a40e2 100644 --- a/utils/zerovec/benches/zerovec_serde.rs +++ b/utils/zerovec/benches/zerovec_serde.rs @@ -32,7 +32,7 @@ fn overview_bench(c: &mut Criterion) { c.bench_function("zerovec_serde/overview", |b| { // Same as "zerovec_serde/deserialize_sum/u32/zerovec" let buffer = - bincode::serialize(&ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap()) + bincode::serialize(&ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap()) .unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) @@ -72,7 +72,7 @@ fn u32_benches(c: &mut Criterion) { c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| { let buffer = - bincode::serialize(&ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap()) + bincode::serialize(&ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap()) .unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) @@ -131,7 +131,7 @@ fn stress_benches(c: &mut Criterion) { }); // *** Compute sum of vec of 100 `u32` *** - let zerovec = ZeroVec::::try_from_bytes(zerovec_aligned.as_bytes()).unwrap(); + let zerovec = ZeroVec::::parse_byte_slice(zerovec_aligned.as_bytes()).unwrap(); c.bench_function("zerovec_serde/sum/stress/zerovec", |b| { b.iter(|| black_box(&zerovec).iter().sum::()); }); diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 45dc318faa6..36872a4e8cb 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -15,7 +15,7 @@ fn usizeify(x: PlainOldULE<4>) -> usize { /// /// This is where the actual work involved in VarZeroVec happens /// -/// See [`SliceComponents::try_from_bytes()`] for information on the internal invariants involved +/// See [`SliceComponents::parse_byte_slice()`] for information on the internal invariants involved pub struct SliceComponents<'a, T> { /// The list of indices into the `things` slice indices: &'a [PlainOldULE<4>], @@ -50,7 +50,7 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { /// - `indices[len - 1]..things.len()` must index into a valid section of /// `things`, such that it parses to a `T::VarULE` #[inline] - pub fn try_from_bytes(slice: &'a [u8]) -> Result> { + pub fn parse_byte_slice(slice: &'a [u8]) -> Result> { if slice.is_empty() { return Ok(SliceComponents { indices: &[], diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 417e55daf8c..ed226267263 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -62,7 +62,7 @@ pub use ule::VarZeroVecULE; /// 6, 0, 0, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131, /// ]; /// -/// let zerovec: VarZeroVec = VarZeroVec::try_from_bytes(bytes)?; +/// let zerovec: VarZeroVec = VarZeroVec::parse_byte_slice(bytes)?; /// /// assert_eq!(zerovec.get(2), Some("文")); /// assert_eq!(zerovec, &*strings); @@ -90,8 +90,8 @@ pub use ule::VarZeroVecULE; /// 0, 0, 42, 0, 0, 0, 3, 217, 0, 0, 57, 48, 0, 0, 49, 212, 0, 0, /// 9, 0, 0, 0]; /// -/// let zerovec: VarZeroVec>> = VarZeroVec::try_from_bytes(bytes)?; -/// let zerovec2: VarZeroVec> = VarZeroVec::try_from_bytes(bytes)?; +/// let zerovec: VarZeroVec>> = VarZeroVec::parse_byte_slice(bytes)?; +/// let zerovec2: VarZeroVec> = VarZeroVec::parse_byte_slice(bytes)?; /// /// assert_eq!(zerovec.get(2).and_then(|v| v.get(1)), Some(&55555.into())); /// assert_eq!(zerovec2.get(2).and_then(|v| v.get(1)), Some(&55555.into())); @@ -206,7 +206,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); /// - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// assert_eq!(vec.len(), 4); /// # Ok::<(), VarZeroVecError>(()) /// ``` @@ -226,7 +226,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings: Vec = vec![]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); /// - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// assert!(vec.is_empty()); /// # Ok::<(), VarZeroVecError>(()) /// ``` @@ -250,20 +250,20 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); /// - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// assert_eq!(&vec[0], "foo"); /// assert_eq!(&vec[1], "bar"); /// assert_eq!(&vec[2], "baz"); /// assert_eq!(&vec[3], "quux"); /// # Ok::<(), VarZeroVecError>(()) /// ``` - pub fn try_from_bytes(slice: &'a [u8]) -> Result> { + pub fn parse_byte_slice(slice: &'a [u8]) -> Result> { if slice.is_empty() { // does not allocate return Ok(VarZeroVecInner::Owned(VarZeroVecOwned::new()).into()); } - let components = SliceComponents::::try_from_bytes(slice)?; + let components = SliceComponents::::parse_byte_slice(slice)?; Ok(VarZeroVecInner::Borrowed(components).into()) } @@ -280,7 +280,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// assert_eq!(iter_results[0], "foo"); @@ -305,7 +305,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// assert_eq!(vec.get(0), Some("foo")); @@ -332,7 +332,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// assert_eq!(vec.len(), 4); /// let mutvec = vec.make_mut(); @@ -375,7 +375,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// assert_eq!(vec.len(), 4); /// // has 'static lifetime @@ -403,13 +403,13 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// Obtain the internal encoded slice /// - /// This can be passed back to [`Self::try_from_bytes()`] + /// This can be passed back to [`Self::parse_byte_slice()`] pub fn get_encoded_slice(&self) -> &[u8] { self.get_components().entire_slice() } /// For a slice of `T`, get a list of bytes that can be passed to - /// `try_from_bytes` to recoup the same data. + /// `parse_byte_slice` to recoup the same data. /// /// Returns `None` if the slice is too large to be represented in a list of /// bytes whose length fits in a `u32`. @@ -424,7 +424,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), "baz".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); /// - /// let mut borrowed: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut borrowed: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// assert_eq!(borrowed, &*strings); /// /// # Ok::<(), VarZeroVecError>(()) @@ -460,7 +460,7 @@ where /// let strings = vec!["a".to_owned(), "b".to_owned(), /// "f".to_owned(), "g".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// assert_eq!(vec.binary_search("f"), Ok(2)); /// assert_eq!(vec.binary_search("e"), Err(2)); diff --git a/utils/zerovec/src/varzerovec/serde.rs b/utils/zerovec/src/varzerovec/serde.rs index f82fc04b7c7..5e7d5cbb0ec 100644 --- a/utils/zerovec/src/varzerovec/serde.rs +++ b/utils/zerovec/src/varzerovec/serde.rs @@ -36,7 +36,7 @@ where where E: de::Error, { - VarZeroVec::try_from_bytes(bytes).map_err(de::Error::custom) + VarZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom) } fn visit_seq(self, mut seq: A) -> Result @@ -126,7 +126,7 @@ mod test { ]; #[test] fn test_serde_json() { - let zerovec_orig: VarZeroVec = VarZeroVec::try_from_bytes(BYTES).expect("parse"); + let zerovec_orig: VarZeroVec = VarZeroVec::parse_byte_slice(BYTES).expect("parse"); let json_str = serde_json::to_string(&zerovec_orig).expect("serialize"); assert_eq!(JSON_STR, json_str); // VarZeroVec should deserialize from JSON to either Vec or VarZeroVec @@ -141,7 +141,7 @@ mod test { #[test] fn test_serde_bincode() { - let zerovec_orig: VarZeroVec = VarZeroVec::try_from_bytes(BYTES).expect("parse"); + let zerovec_orig: VarZeroVec = VarZeroVec::parse_byte_slice(BYTES).expect("parse"); let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); assert_eq!(BINCODE_BUF, bincode_buf); let zerovec_new: VarZeroVec = @@ -158,7 +158,7 @@ mod test { .map(String::from) .collect::>(); let mut zerovec: VarZeroVec = - VarZeroVec::try_from_bytes(NONASCII_BYTES).expect("parse"); + VarZeroVec::parse_byte_slice(NONASCII_BYTES).expect("parse"); assert_eq!(zerovec.to_vec(), src_vec); let bincode_buf = bincode::serialize(&zerovec).expect("serialize"); let zerovec_result = diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs index 8f141144076..7210f45713f 100644 --- a/utils/zerovec/src/varzerovec/ule.rs +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -37,7 +37,7 @@ use core::mem; /// assert_eq!(reconstructed, all_strings); /// /// let bytes = vzv_all.get_encoded_slice(); -/// let vzv_from_bytes: VarZeroVec>> = VarZeroVec::try_from_bytes(bytes).unwrap(); +/// let vzv_from_bytes: VarZeroVec>> = VarZeroVec::parse_byte_slice(bytes).unwrap(); /// assert_eq!(vzv_from_bytes, vzv_all); /// ``` // @@ -105,7 +105,7 @@ unsafe impl VarULE for VarZeroVecULE { type Error = ParseErrorFor; fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, Self::Error> { - let _: SliceComponents = SliceComponents::try_from_bytes(bytes)?; + let _: SliceComponents = SliceComponents::parse_byte_slice(bytes)?; unsafe { Ok(Self::from_byte_slice_unchecked(bytes)) } } diff --git a/utils/zerovec/src/zerovec/mod.rs b/utils/zerovec/src/zerovec/mod.rs index 9f18a791457..de6bca3b1d2 100644 --- a/utils/zerovec/src/zerovec/mod.rs +++ b/utils/zerovec/src/zerovec/mod.rs @@ -44,7 +44,7 @@ use std::fmt; /// let nums: &[u16] = &[211, 281, 421, 461]; /// /// // Conversion from &[u8] to &[u16::ULE] is infallible. -/// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); +/// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); /// assert_eq!(zerovec.get(2), Some(421)); @@ -117,12 +117,12 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); /// assert_eq!(zerovec.get(2), Some(421)); /// ``` - pub fn try_from_bytes(bytes: &'a [u8]) -> Result::ULE as ULE>::Error> { + pub fn parse_byte_slice(bytes: &'a [u8]) -> Result::ULE as ULE>::Error> { let slice: &'a [T::ULE] = T::ULE::parse_byte_slice(bytes)?; Ok(Self::Borrowed(slice)) } @@ -168,7 +168,7 @@ where /// use zerovec::ule::AsULE; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(4, zerovec.len()); /// assert_eq!( @@ -189,10 +189,10 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// assert!(!zerovec.is_empty()); /// - /// let emptyvec: ZeroVec = ZeroVec::try_from_bytes(&[]).expect("infallible"); + /// let emptyvec: ZeroVec = ZeroVec::parse_byte_slice(&[]).expect("infallible"); /// assert!(emptyvec.is_empty()); /// ``` #[inline] @@ -313,7 +313,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(zerovec.get(2), Some(421)); /// assert_eq!(zerovec.get(4), None); @@ -337,7 +337,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(zerovec.first(), Some(211)); /// ``` @@ -356,7 +356,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(zerovec.last(), Some(461)); /// ``` @@ -375,7 +375,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// let mut it = zerovec.iter(); /// /// assert_eq!(it.next(), Some(211)); @@ -397,7 +397,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); /// /// let owned = zerovec.into_owned(); @@ -422,7 +422,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let mut zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let mut zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); /// /// zerovec.make_mut().push(12_u16.as_unaligned()); @@ -458,7 +458,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(zerovec.binary_search(&281), Ok(1)); /// assert_eq!(zerovec.binary_search(&282), Err(2)); @@ -486,7 +486,7 @@ mod tests { assert_eq!(zerovec.get(2), Some(TEST_SLICE[2])); } { - let zerovec = ZeroVec::::try_from_bytes(TEST_BUFFER_LE).unwrap(); + let zerovec = ZeroVec::::parse_byte_slice(TEST_BUFFER_LE).unwrap(); assert_eq!(zerovec.get(0), Some(TEST_SLICE[0])); assert_eq!(zerovec.get(1), Some(TEST_SLICE[1])); assert_eq!(zerovec.get(2), Some(TEST_SLICE[2])); @@ -501,7 +501,7 @@ mod tests { assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c)); } { - let zerovec = ZeroVec::::try_from_bytes(TEST_BUFFER_LE).unwrap(); + let zerovec = ZeroVec::::parse_byte_slice(TEST_BUFFER_LE).unwrap(); assert_eq!(Ok(3), zerovec.binary_search(&0x0e0d0c)); assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c)); } @@ -511,73 +511,73 @@ mod tests { fn test_odd_alignment() { assert_eq!( Some(0x020100), - ZeroVec::::try_from_bytes(TEST_BUFFER_LE) + ZeroVec::::parse_byte_slice(TEST_BUFFER_LE) .unwrap() .get(0) ); assert_eq!( Some(0x04000201), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[1..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[1..]) .unwrap() .get(0) ); assert_eq!( Some(0x05040002), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[2..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[2..]) .unwrap() .get(0) ); assert_eq!( Some(0x06050400), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[3..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[3..]) .unwrap() .get(0) ); assert_eq!( Some(0x060504), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[4..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[4..]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c00), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[75..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[75..]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c00), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[3..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[3..]) .unwrap() .get(18) ); assert_eq!( Some(0x4e4d4c), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[76..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[76..]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c), - ZeroVec::::try_from_bytes(TEST_BUFFER_LE) + ZeroVec::::parse_byte_slice(TEST_BUFFER_LE) .unwrap() .get(19) ); assert_eq!( None, - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[77..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[77..]) .unwrap() .get(0) ); assert_eq!( None, - ZeroVec::::try_from_bytes(TEST_BUFFER_LE) + ZeroVec::::parse_byte_slice(TEST_BUFFER_LE) .unwrap() .get(20) ); assert_eq!( None, - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[3..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[3..]) .unwrap() .get(19) ); diff --git a/utils/zerovec/src/zerovec/serde.rs b/utils/zerovec/src/zerovec/serde.rs index 31634d37798..9b22a2c54d6 100644 --- a/utils/zerovec/src/zerovec/serde.rs +++ b/utils/zerovec/src/zerovec/serde.rs @@ -36,7 +36,7 @@ where where E: de::Error, { - ZeroVec::try_from_bytes(bytes).map_err(de::Error::custom) + ZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom) } fn visit_seq(self, mut seq: A) -> Result From 8bc6d518cc31fce7c06e48d3131c4f1bff173701 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 23:08:04 -0700 Subject: [PATCH 11/18] Address some review comments --- utils/zerovec/benches/zerovec_serde.rs | 14 ++++++++------ utils/zerovec/src/ule/plain.rs | 4 +--- utils/zerovec/src/varzerovec/mod.rs | 5 ++++- utils/zerovec/src/varzerovec/owned.rs | 19 ++++++++++++++++--- utils/zerovec/src/varzerovec/ule.rs | 1 + 5 files changed, 30 insertions(+), 13 deletions(-) diff --git a/utils/zerovec/benches/zerovec_serde.rs b/utils/zerovec/benches/zerovec_serde.rs index a22b32a40e2..67e9fb0d62f 100644 --- a/utils/zerovec/benches/zerovec_serde.rs +++ b/utils/zerovec/benches/zerovec_serde.rs @@ -31,9 +31,10 @@ fn random_numbers(count: usize) -> Vec { fn overview_bench(c: &mut Criterion) { c.bench_function("zerovec_serde/overview", |b| { // Same as "zerovec_serde/deserialize_sum/u32/zerovec" - let buffer = - bincode::serialize(&ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap()) - .unwrap(); + let buffer = bincode::serialize( + &ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(), + ) + .unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) .unwrap() @@ -71,9 +72,10 @@ fn u32_benches(c: &mut Criterion) { }); c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| { - let buffer = - bincode::serialize(&ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap()) - .unwrap(); + let buffer = bincode::serialize( + &ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(), + ) + .unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) .unwrap() diff --git a/utils/zerovec/src/ule/plain.rs b/utils/zerovec/src/ule/plain.rs index ba68ac9d137..7e5e318ec57 100644 --- a/utils/zerovec/src/ule/plain.rs +++ b/utils/zerovec/src/ule/plain.rs @@ -57,9 +57,7 @@ macro_rules! impl_byte_slice_size { let data = bytes.as_mut_ptr(); let len = bytes.len() / $size; // Safe because Self is transparent over [u8; $size] - unsafe { - std::slice::from_raw_parts_mut(data as *mut Self, len) - } + unsafe { std::slice::from_raw_parts_mut(data as *mut Self, len) } } } }; diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index ed226267263..fd47184a89b 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -189,7 +189,7 @@ impl<'a, T> From> for VarZeroVec<'a, T> { impl<'a, T: AsVarULE> VarZeroVec<'a, T> { fn get_components<'b>(&'b self) -> SliceComponents<'b, T> { match self.0 { - VarZeroVecInner::Owned(ref vec) => vec.get_components(), + VarZeroVecInner::Owned(ref owned) => owned.get_components(), VarZeroVecInner::Borrowed(components) => components, } } @@ -434,6 +434,9 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { components::get_serializable_bytes(elements) } + /// Return whether the [`VarZeroVec`] is operating on owned or borrowed + /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can + /// be used to force it into an owned type pub fn is_owned(&self) -> bool { match self.0 { VarZeroVecInner::Owned(..) => true, diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs index 2a3e9f00b93..7ef706fcda5 100644 --- a/utils/zerovec/src/varzerovec/owned.rs +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -36,7 +36,10 @@ impl VarZeroVecOwned { pub fn from_elements(elements: &[T]) -> Self { Self { marker: PhantomData, - entire_slice: components::get_serializable_bytes(elements).unwrap(), + entire_slice: components::get_serializable_bytes(elements).expect( + "Attempted to build VarZeroVec out of elements that \ + cumulatively are larger than a u32 in size", + ), } } @@ -57,6 +60,7 @@ impl VarZeroVecOwned { self.entire_slice.reserve(capacity * 8) } + #[inline] pub(crate) fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { unsafe { // safety: VarZeroVecOwned is guaranteed to parse here @@ -117,8 +121,11 @@ impl VarZeroVecOwned { if len == 0 { // If there is no data, just construct it with the existing procedure // for constructing an entire slice - self.entire_slice = - components::get_serializable_bytes(slice::from_ref(element)).unwrap(); + self.entire_slice = components::get_serializable_bytes(slice::from_ref(element)) + .expect( + "attempted to insert an element too large to be encoded\ + in a VarZeroVec", + ); return; } @@ -150,6 +157,12 @@ impl VarZeroVecOwned { // the amount the vector is growing by let shift = 4 + element_slice.len(); self.entire_slice.reserve(shift); + if self.entire_slice.len() + shift > u32::MAX as usize { + // (on 32 bit platforms the `.reserve()` itself will panic) + panic!( + "Attempted to grow VarZeroVec to an encoded size that does not fit within a u32" + ); + } unsafe { // Step 1: Shift the data around { diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs index 7210f45713f..1fb9cb1d462 100644 --- a/utils/zerovec/src/varzerovec/ule.rs +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -51,6 +51,7 @@ pub struct VarZeroVecULE { } impl VarZeroVecULE { + #[inline] fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { unsafe { // safety: VarZeroVecULE is guaranteed to parse here From 9a2b2c71c41ba544d286b5ee0f960ec0363c47a4 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 23:37:09 -0700 Subject: [PATCH 12/18] get issue number --- utils/zerovec/src/varzerovec/owned.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs index 7ef706fcda5..5e8a2cf6629 100644 --- a/utils/zerovec/src/varzerovec/owned.rs +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -181,6 +181,8 @@ impl VarZeroVecOwned { // by `shift` to make room for one extra index and `element_slice` data let shift_start = u32::from_unaligned(&indices[index]) as usize; let shift_start_p = data.as_mut_ptr().add(shift_start); + debug_assert!(shift_start <= data.len()); + debug_assert!(shift_start + shift <= data.capacity()); // shift elements from shift_start_p to the end of the slice by `shift` elements ptr::copy( shift_start_p, @@ -235,7 +237,8 @@ impl VarZeroVecOwned { } // Step 3: update the length - len_ule[0] = (u32::from_unaligned(&len_ule[0]) + 1).into() + debug_assert!(u32::from_unaligned(&len_ule[0]) + 1 == len); + len_ule[0] = len.into() } } } @@ -244,7 +247,7 @@ impl VarZeroVecOwned { where T: Clone, { - // TODO (Manishearth) make these faster + // TODO (#1080) make these faster let mut vec = self.to_vec(); let ret = vec.remove(index); *self = Self::from_elements(&vec); @@ -255,7 +258,7 @@ impl VarZeroVecOwned { where T: Clone, { - // TODO (Manishearth) make these faster + // TODO (#1080) make these faster let mut vec = self.to_vec(); let ret = mem::replace(&mut vec[index], value); *self = Self::from_elements(&vec); From 57984f7f38f57c7c1473509de23097f6502ca937 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 23:47:32 -0700 Subject: [PATCH 13/18] require no padding bytes in VarULE --- utils/zerovec/src/ule/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/zerovec/src/ule/mod.rs b/utils/zerovec/src/ule/mod.rs index c7369ff5b94..71799ee86e9 100644 --- a/utils/zerovec/src/ule/mod.rs +++ b/utils/zerovec/src/ule/mod.rs @@ -189,6 +189,9 @@ pub trait AsVarULE { /// /// # Safety /// +/// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] MUST return +/// a slice of initialized bytes provided that `Self` is initialized. +/// /// See the safety invariant documented on [`Self::from_byte_slice_unchecked()`] to implement this trait. pub unsafe trait VarULE: 'static { /// The error type to used by [`VarULE::parse_byte_slice()`] From afff570720f25180def0249feed8807e2bd89f5b Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Tue, 21 Sep 2021 23:48:56 -0700 Subject: [PATCH 14/18] fix compile --- utils/zerovec/src/varzerovec/owned.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs index 5e8a2cf6629..2ce268bb46f 100644 --- a/utils/zerovec/src/varzerovec/owned.rs +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -181,8 +181,6 @@ impl VarZeroVecOwned { // by `shift` to make room for one extra index and `element_slice` data let shift_start = u32::from_unaligned(&indices[index]) as usize; let shift_start_p = data.as_mut_ptr().add(shift_start); - debug_assert!(shift_start <= data.len()); - debug_assert!(shift_start + shift <= data.capacity()); // shift elements from shift_start_p to the end of the slice by `shift` elements ptr::copy( shift_start_p, @@ -237,8 +235,8 @@ impl VarZeroVecOwned { } // Step 3: update the length - debug_assert!(u32::from_unaligned(&len_ule[0]) + 1 == len); - len_ule[0] = len.into() + debug_assert!(u32::from_unaligned(&len_ule[0]) + 1 == len as u32); + len_ule[0] = (len as u32).into() } } } From d2bff2ecf868e10cf87a82579ad335c102b49377 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 22 Sep 2021 17:38:22 -0700 Subject: [PATCH 15/18] rename try_from_bytes --- experimental/codepointtrie/src/planes.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/experimental/codepointtrie/src/planes.rs b/experimental/codepointtrie/src/planes.rs index 83d72c21a9f..063727c8474 100644 --- a/experimental/codepointtrie/src/planes.rs +++ b/experimental/codepointtrie/src/planes.rs @@ -169,8 +169,8 @@ pub fn get_planes_trie() -> CodePointTrie<'static, u8, Small> { 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x10, 0x10, 0x10, 0, ]; - let index: ZeroVec = ZeroVec::try_from_bytes(index_array_as_bytes).expect("infallible"); - let data: ZeroVec = ZeroVec::try_from_bytes(data_8_array).expect("infallible"); + let index: ZeroVec = ZeroVec::parse_byte_slice(index_array_as_bytes).expect("infallible"); + let data: ZeroVec = ZeroVec::parse_byte_slice(data_8_array).expect("infallible"); let index_length = 1168; let data_length = 372; let high_start = 0x100000; @@ -291,7 +291,7 @@ mod tests { fn test_index_byte_array_literal() { let index_array_as_bytes: &[u8] = super::INDEX_ARRAY_AS_BYTES; let index_zv_bytes: ZeroVec = - ZeroVec::try_from_bytes(index_array_as_bytes).expect("infallible"); + ZeroVec::parse_byte_slice(index_array_as_bytes).expect("infallible"); let index_zv_aligned: ZeroVec = ZeroVec::from_slice(INDEX_ARRAY); assert_eq!(index_zv_bytes, index_zv_aligned); } From 4966e22906b669afd94315e7635535aad59e9f7e Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 22 Sep 2021 17:39:34 -0700 Subject: [PATCH 16/18] fix tidy --- utils/zerovec/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/zerovec/README.md b/utils/zerovec/README.md index 760afd2659a..2e37bd41610 100644 --- a/utils/zerovec/README.md +++ b/utils/zerovec/README.md @@ -60,7 +60,7 @@ pub struct DataStruct<'data> { let data = DataStruct { nums: ZeroVec::from_slice(&[211, 281, 421, 461]), - strs: VarZeroVec::from(vec!["hello".to_string(), "world".to_string()]), + strs: VarZeroVec::from(&["hello".to_string(), "world".to_string()] as &[_]), }; let bincode_bytes = bincode::serialize(&data) .expect("Serialization should be successful"); From 29861afdb38d84d0c5f65c840df3d3242c4a175d Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 22 Sep 2021 17:42:41 -0700 Subject: [PATCH 17/18] satisfy clippy --- utils/zerovec/src/lib.rs | 4 ++++ utils/zerovec/src/varzerovec/components.rs | 2 +- utils/zerovec/src/varzerovec/owned.rs | 10 ++++++++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/utils/zerovec/src/lib.rs b/utils/zerovec/src/lib.rs index 2134b710696..faf2fa0a5e3 100644 --- a/utils/zerovec/src/lib.rs +++ b/utils/zerovec/src/lib.rs @@ -77,6 +77,10 @@ //! # } // feature = "serde" //! ``` +// this crate does a lot of nuanced lifetime manipulation, being explicit +// is better here. +#![allow(clippy::needless_lifetimes)] + pub mod map; #[cfg(test)] pub mod samples; diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 36872a4e8cb..2ea3d9d43d0 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -225,7 +225,7 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { .map(|s| unsafe { T::VarULE::from_byte_slice_unchecked(s) }) } - pub fn to_vec(&self) -> Vec + pub fn to_vec(self) -> Vec where T: Clone, { diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs index 2ce268bb46f..6bf19aa4774 100644 --- a/utils/zerovec/src/varzerovec/owned.rs +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -184,7 +184,7 @@ impl VarZeroVecOwned { // shift elements from shift_start_p to the end of the slice by `shift` elements ptr::copy( shift_start_p, - shift_start_p.offset(shift as isize), + shift_start_p.add(shift), data.len() - shift_start, ); shift_start + 4 @@ -199,7 +199,7 @@ impl VarZeroVecOwned { // Step 1b: insert the new element ptr::copy( element_slice.as_ptr(), - data_p.offset(insertion_point as isize), + data_p.add(insertion_point), element_slice.len(), ); @@ -306,6 +306,12 @@ where } } +impl Default for VarZeroVecOwned { + fn default() -> Self { + Self::new() + } +} + #[cfg(test)] mod test { use super::VarZeroVecOwned; From 27c1b9cb0fa9b19b46f9db870fad3580db31199e Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 22 Sep 2021 18:23:39 -0700 Subject: [PATCH 18/18] safety comment --- utils/zerovec/src/ule/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/zerovec/src/ule/mod.rs b/utils/zerovec/src/ule/mod.rs index 71799ee86e9..4d5ca14a0a7 100644 --- a/utils/zerovec/src/ule/mod.rs +++ b/utils/zerovec/src/ule/mod.rs @@ -192,7 +192,8 @@ pub trait AsVarULE { /// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] MUST return /// a slice of initialized bytes provided that `Self` is initialized. /// -/// See the safety invariant documented on [`Self::from_byte_slice_unchecked()`] to implement this trait. +/// [`VarULE::from_byte_slice_unchecked()`] _must_ be implemented to return the same result +/// as [`VarULE::parse_byte_slice()`] provided both are passed the same validly parsing byte slices. pub unsafe trait VarULE: 'static { /// The error type to used by [`VarULE::parse_byte_slice()`] type Error;