diff --git a/Cargo.lock b/Cargo.lock index 9d0319aa02a..4bd7d9ade08 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2913,7 +2913,6 @@ version = "0.3.0" dependencies = [ "bincode", "criterion", - "either", "getrandom 0.2.2", "iai", "icu_benchmark_macros", diff --git a/experimental/codepointtrie/src/planes.rs b/experimental/codepointtrie/src/planes.rs index 83d72c21a9f..063727c8474 100644 --- a/experimental/codepointtrie/src/planes.rs +++ b/experimental/codepointtrie/src/planes.rs @@ -169,8 +169,8 @@ pub fn get_planes_trie() -> CodePointTrie<'static, u8, Small> { 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x10, 0x10, 0x10, 0, ]; - let index: ZeroVec = ZeroVec::try_from_bytes(index_array_as_bytes).expect("infallible"); - let data: ZeroVec = ZeroVec::try_from_bytes(data_8_array).expect("infallible"); + let index: ZeroVec = ZeroVec::parse_byte_slice(index_array_as_bytes).expect("infallible"); + let data: ZeroVec = ZeroVec::parse_byte_slice(data_8_array).expect("infallible"); let index_length = 1168; let data_length = 372; let high_start = 0x100000; @@ -291,7 +291,7 @@ mod tests { fn test_index_byte_array_literal() { let index_array_as_bytes: &[u8] = super::INDEX_ARRAY_AS_BYTES; let index_zv_bytes: ZeroVec = - ZeroVec::try_from_bytes(index_array_as_bytes).expect("infallible"); + ZeroVec::parse_byte_slice(index_array_as_bytes).expect("infallible"); let index_zv_aligned: ZeroVec = ZeroVec::from_slice(INDEX_ARRAY); assert_eq!(index_zv_bytes, index_zv_aligned); } diff --git a/utils/zerovec/Cargo.toml b/utils/zerovec/Cargo.toml index d6969aeecbb..c9c7318ad8e 100644 --- a/utils/zerovec/Cargo.toml +++ b/utils/zerovec/Cargo.toml @@ -24,7 +24,6 @@ include = [ all-features = true [dependencies] -either = "1.6.1" serde = { version = "1.0", optional = true , default-features = false, features = ["alloc"] } yoke = { path = "../yoke", version = "0.2.0", optional = true } diff --git a/utils/zerovec/README.md b/utils/zerovec/README.md index 3e9f229277b..67729a9db18 100644 --- a/utils/zerovec/README.md +++ b/utils/zerovec/README.md @@ -62,7 +62,7 @@ pub struct DataStruct<'data> { let data = DataStruct { nums: ZeroVec::from_slice(&[211, 281, 421, 461]), - strs: VarZeroVec::from(vec!["hello".to_string(), "world".to_string()]), + strs: VarZeroVec::from(&["hello".to_string(), "world".to_string()] as &[_]), }; let bincode_bytes = bincode::serialize(&data) .expect("Serialization should be successful"); diff --git a/utils/zerovec/benches/vzv.rs b/utils/zerovec/benches/vzv.rs index bc828adb91b..fd5c80e88f7 100644 --- a/utils/zerovec/benches/vzv.rs +++ b/utils/zerovec/benches/vzv.rs @@ -46,7 +46,7 @@ fn overview_bench(c: &mut Criterion) { let seed = 42; let (string_vec, _) = random_alphanums(2..=10, 100, seed); let bytes: Vec = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap(); - let vzv = VarZeroVec::::try_from_bytes(black_box(bytes.as_slice())).unwrap(); + let vzv = VarZeroVec::::parse_byte_slice(black_box(bytes.as_slice())).unwrap(); c.bench_function("vzv/overview", |b| { b.iter(|| { @@ -73,7 +73,7 @@ fn char_count_benches(c: &mut Criterion) { let seed = 2021; let (string_vec, _) = random_alphanums(2..=20, 100, seed); let bytes: Vec = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap(); - let vzv = VarZeroVec::::try_from_bytes(black_box(bytes.as_slice())).unwrap(); + let vzv = VarZeroVec::::parse_byte_slice(black_box(bytes.as_slice())).unwrap(); // *** Count chars in vec of 100 strings *** c.bench_function("vzv/char_count/slice", |b| { @@ -100,7 +100,7 @@ fn binary_search_benches(c: &mut Criterion) { let (string_vec, seed) = random_alphanums(2..=20, 500, seed); let (needles, _) = random_alphanums(2..=20, 10, seed); let bytes: Vec = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap(); - let vzv = VarZeroVec::::try_from_bytes(black_box(bytes.as_slice())).unwrap(); + let vzv = VarZeroVec::::parse_byte_slice(black_box(bytes.as_slice())).unwrap(); let single_needle = "lmnop".to_string(); // *** Binary search vec of 500 strings 10 times *** @@ -139,7 +139,7 @@ fn serde_benches(c: &mut Criterion) { let seed = 2021; let (string_vec, _) = random_alphanums(2..=20, 100, seed); let bincode_vec = bincode::serialize(&string_vec).unwrap(); - let vzv = VarZeroVec::from(string_vec); + let vzv = VarZeroVec::from(&*string_vec); let bincode_vzv = bincode::serialize(&vzv).unwrap(); // *** Deserialize vec of 100 strings *** diff --git a/utils/zerovec/benches/zerovec.rs b/utils/zerovec/benches/zerovec.rs index cff2067053d..fdde6d8c477 100644 --- a/utils/zerovec/benches/zerovec.rs +++ b/utils/zerovec/benches/zerovec.rs @@ -55,13 +55,13 @@ where buffer .0 .extend(ZeroVec::from_slice(vec.as_slice()).as_bytes()); - ZeroVec::::try_from_bytes(&buffer.0[1..]).unwrap() + ZeroVec::::parse_byte_slice(&buffer.0[1..]).unwrap() } fn overview_bench(c: &mut Criterion) { c.bench_function("zerovec/overview", |b| { b.iter(|| { - ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)) + ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)) .unwrap() .iter() .sum::() @@ -83,7 +83,7 @@ fn sum_benches(c: &mut Criterion) { c.bench_function("zerovec/sum/sample/zerovec", |b| { b.iter(|| { - ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)) + ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)) .unwrap() .iter() .sum::() @@ -98,7 +98,7 @@ fn binary_search_benches(c: &mut Criterion) { }); c.bench_function("zerovec/binary_search/sample/zerovec", |b| { - let zerovec = ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap(); + let zerovec = ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(); b.iter(|| zerovec.binary_search(&0x0c0d0c)); }); diff --git a/utils/zerovec/benches/zerovec_iai.rs b/utils/zerovec/benches/zerovec_iai.rs index 711b4089058..455dd464dd4 100644 --- a/utils/zerovec/benches/zerovec_iai.rs +++ b/utils/zerovec/benches/zerovec_iai.rs @@ -15,7 +15,7 @@ fn sum_slice() -> u32 { } fn sum_zerovec() -> u32 { - ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)) + ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)) .unwrap() .iter() .sum::() @@ -26,7 +26,7 @@ fn binarysearch_slice() -> Result { } fn binarysearch_zerovec() -> Result { - ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)) + ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)) .unwrap() .binary_search(&0x0c0d0c) } diff --git a/utils/zerovec/benches/zerovec_serde.rs b/utils/zerovec/benches/zerovec_serde.rs index 9038bc12b25..67e9fb0d62f 100644 --- a/utils/zerovec/benches/zerovec_serde.rs +++ b/utils/zerovec/benches/zerovec_serde.rs @@ -31,9 +31,10 @@ fn random_numbers(count: usize) -> Vec { fn overview_bench(c: &mut Criterion) { c.bench_function("zerovec_serde/overview", |b| { // Same as "zerovec_serde/deserialize_sum/u32/zerovec" - let buffer = - bincode::serialize(&ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap()) - .unwrap(); + let buffer = bincode::serialize( + &ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(), + ) + .unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) .unwrap() @@ -71,9 +72,10 @@ fn u32_benches(c: &mut Criterion) { }); c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| { - let buffer = - bincode::serialize(&ZeroVec::::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap()) - .unwrap(); + let buffer = bincode::serialize( + &ZeroVec::::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(), + ) + .unwrap(); b.iter(|| { bincode::deserialize::>(&buffer) .unwrap() @@ -131,7 +133,7 @@ fn stress_benches(c: &mut Criterion) { }); // *** Compute sum of vec of 100 `u32` *** - let zerovec = ZeroVec::::try_from_bytes(zerovec_aligned.as_bytes()).unwrap(); + let zerovec = ZeroVec::::parse_byte_slice(zerovec_aligned.as_bytes()).unwrap(); c.bench_function("zerovec_serde/sum/stress/zerovec", |b| { b.iter(|| black_box(&zerovec).iter().sum::()); }); diff --git a/utils/zerovec/src/lib.rs b/utils/zerovec/src/lib.rs index 336fad54b38..e1a0320f7d5 100644 --- a/utils/zerovec/src/lib.rs +++ b/utils/zerovec/src/lib.rs @@ -65,7 +65,7 @@ //! //! let data = DataStruct { //! nums: ZeroVec::from_slice(&[211, 281, 421, 461]), -//! strs: VarZeroVec::from(vec!["hello".to_string(), "world".to_string()]), +//! strs: VarZeroVec::from(&["hello".to_string(), "world".to_string()] as &[_]), //! }; //! let bincode_bytes = bincode::serialize(&data) //! .expect("Serialization should be successful"); @@ -79,11 +79,15 @@ //! # } // feature = "serde" //! ``` +// this crate does a lot of nuanced lifetime manipulation, being explicit +// is better here. +#![allow(clippy::needless_lifetimes)] + pub mod map; #[cfg(test)] pub mod samples; pub mod ule; -mod varzerovec; +pub mod varzerovec; mod zerovec; #[cfg(feature = "yoke")] diff --git a/utils/zerovec/src/map/vecs.rs b/utils/zerovec/src/map/vecs.rs index 922e7392686..c702022a46f 100644 --- a/utils/zerovec/src/map/vecs.rs +++ b/utils/zerovec/src/map/vecs.rs @@ -3,6 +3,7 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::ule::*; +use crate::varzerovec::owned::VarZeroVecOwned; use crate::VarZeroVec; use crate::ZeroVec; use std::cmp::Ordering; @@ -108,26 +109,27 @@ where self.get(index) } fn insert(&mut self, index: usize, value: T) { - self.make_mut().insert(index, value) + self.make_mut().insert(index, &value) } fn remove(&mut self, index: usize) -> T { self.make_mut().remove(index) } fn replace(&mut self, index: usize, value: T) -> T { let vec = self.make_mut(); - mem::replace(&mut vec[index], value) + vec.replace(index, value) } fn push(&mut self, value: T) { - self.make_mut().push(value) + let len = self.len(); + self.make_mut().insert(len, &value) } fn len(&self) -> usize { self.len() } fn new() -> Self { - Vec::new().into() + VarZeroVecOwned::new().into() } fn with_capacity(cap: usize) -> Self { - Vec::with_capacity(cap).into() + VarZeroVecOwned::with_capacity(cap).into() } fn clear(&mut self) { self.make_mut().clear() diff --git a/utils/zerovec/src/ule/mod.rs b/utils/zerovec/src/ule/mod.rs index c7369ff5b94..4d5ca14a0a7 100644 --- a/utils/zerovec/src/ule/mod.rs +++ b/utils/zerovec/src/ule/mod.rs @@ -189,7 +189,11 @@ pub trait AsVarULE { /// /// # Safety /// -/// See the safety invariant documented on [`Self::from_byte_slice_unchecked()`] to implement this trait. +/// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] MUST return +/// a slice of initialized bytes provided that `Self` is initialized. +/// +/// [`VarULE::from_byte_slice_unchecked()`] _must_ be implemented to return the same result +/// as [`VarULE::parse_byte_slice()`] provided both are passed the same validly parsing byte slices. pub unsafe trait VarULE: 'static { /// The error type to used by [`VarULE::parse_byte_slice()`] type Error; diff --git a/utils/zerovec/src/ule/plain.rs b/utils/zerovec/src/ule/plain.rs index e4f73e2960c..eab8b163676 100644 --- a/utils/zerovec/src/ule/plain.rs +++ b/utils/zerovec/src/ule/plain.rs @@ -50,6 +50,16 @@ macro_rules! impl_byte_slice_size { unsafe { std::slice::from_raw_parts(data as *const u8, len) } } } + + impl PlainOldULE<$size> { + #[inline] + pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] { + let data = bytes.as_mut_ptr(); + let len = bytes.len() / $size; + // Safe because Self is transparent over [u8; $size] + unsafe { std::slice::from_raw_parts_mut(data as *mut Self, len) } + } + } }; } diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 94f0f01cacf..2ea3d9d43d0 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -15,7 +15,7 @@ fn usizeify(x: PlainOldULE<4>) -> usize { /// /// This is where the actual work involved in VarZeroVec happens /// -/// See [`SliceComponents::try_from_bytes()`] for information on the internal invariants involved +/// See [`SliceComponents::parse_byte_slice()`] for information on the internal invariants involved pub struct SliceComponents<'a, T> { /// The list of indices into the `things` slice indices: &'a [PlainOldULE<4>], @@ -50,7 +50,7 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { /// - `indices[len - 1]..things.len()` must index into a valid section of /// `things`, such that it parses to a `T::VarULE` #[inline] - pub fn try_from_bytes(slice: &'a [u8]) -> Result> { + pub fn parse_byte_slice(slice: &'a [u8]) -> Result> { if slice.is_empty() { return Ok(SliceComponents { indices: &[], @@ -87,6 +87,39 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { Ok(components) } + /// Construct a [`SliceComponents`] from a byte slice that has previously + /// successfully returned a [`SliceComponents`] when passed to + /// [`SliceComponents::parse_byte_slice()`]. Will return the same + /// object as one would get from calling [`SliceComponents::parse_byte_slice()`]. + /// + /// # Safety + /// The bytes must have previously successfully run through + /// [`SliceComponents::parse_byte_slice()`] + pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self { + if slice.is_empty() { + return SliceComponents { + indices: &[], + things: &[], + entire_slice: slice, + marker: PhantomData, + }; + } + let len_bytes = slice.get_unchecked(0..4); + let len_ule = PlainOldULE::<4>::from_byte_slice_unchecked(len_bytes); + + let len = u32::from_unaligned(len_ule.get_unchecked(0)) as usize; + let indices_bytes = slice.get_unchecked(4..4 * len + 4); + let indices = PlainOldULE::<4>::from_byte_slice_unchecked(indices_bytes); + let things = slice.get_unchecked(4 * len + 4..); + + SliceComponents { + indices, + things, + entire_slice: slice, + marker: PhantomData, + } + } + #[inline] pub fn len(self) -> usize { self.indices.len() @@ -98,7 +131,6 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { } #[inline] - #[cfg(feature = "serde")] pub fn entire_slice(self) -> &'a [u8] { self.entire_slice } @@ -192,6 +224,24 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> { .chain(last) .map(|s| unsafe { T::VarULE::from_byte_slice_unchecked(s) }) } + + pub fn to_vec(self) -> Vec + where + T: Clone, + { + self.iter().map(T::from_unaligned).collect() + } + + // Dump a debuggable representation of this type + #[allow(unused)] // useful for debugging + pub(crate) fn dump(&self) -> String { + let indices = self + .indices + .iter() + .map(u32::from_unaligned) + .collect::>(); + format!("SliceComponents {{ indices: {:?} }}", indices) + } } impl<'a, T> SliceComponents<'a, T> diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 4679ae3e356..fd47184a89b 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -4,13 +4,17 @@ use crate::ule::*; use components::SliceComponents; -use either::Either; use std::fmt::{self, Display}; use std::ops::Index; -mod components; +pub(crate) mod components; +pub(crate) mod owned; #[cfg(feature = "serde")] mod serde; +mod ule; + +use owned::VarZeroVecOwned; +pub use ule::VarZeroVecULE; /// A zero-copy vector for variable-width types. /// @@ -58,7 +62,7 @@ mod serde; /// 6, 0, 0, 0, 119, 207, 137, 230, 150, 135, 240, 145, 132, 131, /// ]; /// -/// let zerovec: VarZeroVec = VarZeroVec::try_from_bytes(bytes)?; +/// let zerovec: VarZeroVec = VarZeroVec::parse_byte_slice(bytes)?; /// /// assert_eq!(zerovec.get(2), Some("文")); /// assert_eq!(zerovec, &*strings); @@ -86,8 +90,8 @@ mod serde; /// 0, 0, 42, 0, 0, 0, 3, 217, 0, 0, 57, 48, 0, 0, 49, 212, 0, 0, /// 9, 0, 0, 0]; /// -/// let zerovec: VarZeroVec>> = VarZeroVec::try_from_bytes(bytes)?; -/// let zerovec2: VarZeroVec> = VarZeroVec::try_from_bytes(bytes)?; +/// let zerovec: VarZeroVec>> = VarZeroVec::parse_byte_slice(bytes)?; +/// let zerovec2: VarZeroVec> = VarZeroVec::parse_byte_slice(bytes)?; /// /// assert_eq!(zerovec.get(2).and_then(|v| v.get(1)), Some(&55555.into())); /// assert_eq!(zerovec2.get(2).and_then(|v| v.get(1)), Some(&55555.into())); @@ -98,6 +102,10 @@ mod serde; /// # Ok::<(), VarZeroVecError>(()) /// ``` /// +/// +/// [`VarZeroVec`]s can be nested infinitely, see the docs of [`VarZeroVecULE`] +/// for more information. +/// /// [`ule`]: crate::ule #[derive(Clone)] pub struct VarZeroVec<'a, T>(VarZeroVecInner<'a, T>); @@ -124,7 +132,7 @@ pub struct VarZeroVec<'a, T>(VarZeroVecInner<'a, T>); /// The actual implementation details of this can be found in the `components` module #[derive(Clone)] enum VarZeroVecInner<'a, T> { - Owned(Vec), + Owned(VarZeroVecOwned), /// This is *basically* an `&'a [u8]` to a zero copy buffer, but split out into /// the buffer components. Logically this is capable of behaving as /// a `&'a [T::VarULE]`, but since `T::VarULE` is unsized that type does not actually @@ -156,7 +164,7 @@ where } } -type ParseErrorFor = VarZeroVecError<<::VarULE as VarULE>::Error>; +pub type ParseErrorFor = VarZeroVecError<<::VarULE as VarULE>::Error>; impl From for VarZeroVecError { fn from(e: E) -> Self { @@ -164,21 +172,27 @@ impl From for VarZeroVecError { } } -impl<'a, T> From> for VarZeroVec<'a, T> { +impl<'a, T> From> for VarZeroVec<'a, T> { #[inline] - fn from(other: Vec) -> Self { - Self(VarZeroVecInner::Owned(other)) + fn from(other: VarZeroVecInner<'a, T>) -> Self { + Self(other) } } -impl<'a, T> From> for VarZeroVec<'a, T> { +impl<'a, T> From> for VarZeroVec<'a, T> { #[inline] - fn from(other: VarZeroVecInner<'a, T>) -> Self { - Self(other) + fn from(other: VarZeroVecOwned) -> Self { + VarZeroVecInner::Owned(other).into() } } impl<'a, T: AsVarULE> VarZeroVec<'a, T> { + fn get_components<'b>(&'b self) -> SliceComponents<'b, T> { + match self.0 { + VarZeroVecInner::Owned(ref owned) => owned.get_components(), + VarZeroVecInner::Borrowed(components) => components, + } + } /// Get the number of elements in this vector /// /// # Example @@ -192,15 +206,12 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); /// - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// assert_eq!(vec.len(), 4); /// # Ok::<(), VarZeroVecError>(()) /// ``` pub fn len(&self) -> usize { - match self.0 { - VarZeroVecInner::Owned(ref vec) => vec.len(), - VarZeroVecInner::Borrowed(components) => components.len(), - } + self.get_components().len() } /// Returns `true` if the vector contains no elements. @@ -215,20 +226,18 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings: Vec = vec![]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); /// - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// assert!(vec.is_empty()); /// # Ok::<(), VarZeroVecError>(()) /// ``` pub fn is_empty(&self) -> bool { - match self.0 { - VarZeroVecInner::Owned(ref vec) => vec.is_empty(), - VarZeroVecInner::Borrowed(components) => components.is_empty(), - } + self.get_components().is_empty() } /// Parse a VarZeroVec from a slice of the appropriate format /// - /// Slices of the right format can be obtained via VarZeroVec::get_serializable_bytes() + /// Slices of the right format can be obtained via [`VarZeroVec::get_serializable_bytes()`] + /// or [`VarZeroVec::get_encoded_slice()`] /// /// # Example /// @@ -241,20 +250,20 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); /// - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// assert_eq!(&vec[0], "foo"); /// assert_eq!(&vec[1], "bar"); /// assert_eq!(&vec[2], "baz"); /// assert_eq!(&vec[3], "quux"); /// # Ok::<(), VarZeroVecError>(()) /// ``` - pub fn try_from_bytes(slice: &'a [u8]) -> Result> { + pub fn parse_byte_slice(slice: &'a [u8]) -> Result> { if slice.is_empty() { // does not allocate - return Ok(VarZeroVecInner::Owned(Vec::new()).into()); + return Ok(VarZeroVecInner::Owned(VarZeroVecOwned::new()).into()); } - let components = SliceComponents::::try_from_bytes(slice)?; + let components = SliceComponents::::parse_byte_slice(slice)?; Ok(VarZeroVecInner::Borrowed(components).into()) } @@ -271,7 +280,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// assert_eq!(iter_results[0], "foo"); @@ -281,14 +290,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// # Ok::<(), VarZeroVecError>(()) /// ``` pub fn iter<'b: 'a>(&'b self) -> impl Iterator { - // We use Either here so that we can use `impl Trait` with heterogeneous types - // - // An alternate design is to write explicit iterators. Once Rust has generators this will - // be unnecessary. - match self.0 { - VarZeroVecInner::Owned(ref vec) => Either::Left(vec.iter().map(|t| t.as_unaligned())), - VarZeroVecInner::Borrowed(components) => Either::Right(components.iter()), - } + self.get_components().iter() } /// Get one of VarZeroVec's elements, returning None if the index is out of bounds @@ -303,7 +305,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// let mut iter_results: Vec<&str> = vec.iter().collect(); /// assert_eq!(vec.get(0), Some("foo")); @@ -314,10 +316,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// # Ok::<(), VarZeroVecError>(()) /// ``` pub fn get(&self, idx: usize) -> Option<&T::VarULE> { - match self.0 { - VarZeroVecInner::Owned(ref vec) => vec.get(idx).map(|t| t.as_unaligned()), - VarZeroVecInner::Borrowed(components) => components.get(idx), - } + self.get_components().get(idx) } /// Convert this into a mutable vector of the owned `T` type, cloning if necessary. @@ -333,7 +332,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// assert_eq!(vec.len(), 4); /// let mutvec = vec.make_mut(); @@ -349,15 +348,14 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { // // This function is crate-public for now since we don't yet want to stabilize // the internal implementation details - pub(crate) fn make_mut(&mut self) -> &mut Vec + pub(crate) fn make_mut(&mut self) -> &mut VarZeroVecOwned where T: Clone, { match self.0 { VarZeroVecInner::Owned(ref mut vec) => vec, VarZeroVecInner::Borrowed(components) => { - let vec = components.iter().map(T::from_unaligned).collect(); - let new_self = VarZeroVecInner::Owned(vec).into(); + let new_self = VarZeroVecOwned::from_components(components).into(); *self = new_self; // recursion is limited since we are guaranteed to hit the Owned branch self.make_mut() @@ -377,7 +375,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), /// "baz".to_owned(), "quux".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// assert_eq!(vec.len(), 4); /// // has 'static lifetime @@ -400,25 +398,18 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { where T: Clone, { - let mut new = self.clone(); - new.make_mut(); - match new.0 { - VarZeroVecInner::Owned(vec) => vec, - _ => unreachable!(), - } + self.get_components().to_vec() } - /// If this is borrowed, get the borrowed slice - #[cfg(feature = "serde")] - pub(crate) fn get_slice_for_borrowed(&self) -> Option<&'a [u8]> { - match self.0 { - VarZeroVecInner::Owned(..) => None, - VarZeroVecInner::Borrowed(b) => Some(b.entire_slice()), - } + /// Obtain the internal encoded slice + /// + /// This can be passed back to [`Self::parse_byte_slice()`] + pub fn get_encoded_slice(&self) -> &[u8] { + self.get_components().entire_slice() } /// For a slice of `T`, get a list of bytes that can be passed to - /// `try_from_bytes` to recoup the same data. + /// `parse_byte_slice` to recoup the same data. /// /// Returns `None` if the slice is too large to be represented in a list of /// bytes whose length fits in a `u32`. @@ -433,7 +424,7 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { /// let strings = vec!["foo".to_owned(), "bar".to_owned(), "baz".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); /// - /// let mut borrowed: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut borrowed: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// assert_eq!(borrowed, &*strings); /// /// # Ok::<(), VarZeroVecError>(()) @@ -442,6 +433,16 @@ impl<'a, T: AsVarULE> VarZeroVec<'a, T> { pub fn get_serializable_bytes(elements: &[T]) -> Option> { components::get_serializable_bytes(elements) } + + /// Return whether the [`VarZeroVec`] is operating on owned or borrowed + /// data. [`VarZeroVec::into_owned()`] and [`VarZeroVec::make_mut()`] can + /// be used to force it into an owned type + pub fn is_owned(&self) -> bool { + match self.0 { + VarZeroVecInner::Owned(..) => true, + VarZeroVecInner::Borrowed(..) => false, + } + } } impl<'a, T> VarZeroVec<'a, T> @@ -462,7 +463,7 @@ where /// let strings = vec!["a".to_owned(), "b".to_owned(), /// "f".to_owned(), "g".to_owned()]; /// let bytes = VarZeroVec::get_serializable_bytes(&strings).unwrap(); - /// let mut vec: VarZeroVec = VarZeroVec::try_from_bytes(&bytes)?; + /// let mut vec: VarZeroVec = VarZeroVec::parse_byte_slice(&bytes)?; /// /// assert_eq!(vec.binary_search("f"), Ok(2)); /// assert_eq!(vec.binary_search("e"), Err(2)); @@ -472,12 +473,7 @@ where /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search #[inline] pub fn binary_search(&self, x: &T::VarULE) -> Result { - match self.0 { - VarZeroVecInner::Owned(ref vec) => { - vec.binary_search_by(|probe| probe.as_unaligned().cmp(x)) - } - VarZeroVecInner::Borrowed(components) => components.binary_search(x), - } + self.get_components().binary_search(x) } } @@ -488,6 +484,15 @@ impl<'a, T: AsVarULE> Index for VarZeroVec<'a, T> { } } +impl<'a, T> From<&'_ [T]> for VarZeroVec<'a, T> +where + T: AsVarULE, +{ + fn from(other: &'_ [T]) -> Self { + VarZeroVecOwned::from_elements(other).into() + } +} + impl<'a, 'b, T> PartialEq> for VarZeroVec<'a, T> where T: AsVarULE, diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs new file mode 100644 index 00000000000..6bf19aa4774 --- /dev/null +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -0,0 +1,345 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::*; +use core::fmt; +use core::marker::PhantomData; +use core::mem; +use core::ptr; +use core::slice; + +#[derive(Clone)] +pub struct VarZeroVecOwned { + marker: PhantomData<[T]>, + // safety invariant: must parse into a valid SliceComponents + entire_slice: Vec, +} + +impl VarZeroVecOwned { + /// Construct an empty VarZeroVecOwned + pub fn new() -> Self { + Self { + marker: PhantomData, + entire_slice: Vec::new(), + } + } + + pub fn from_components(components: SliceComponents) -> Self { + Self { + marker: PhantomData, + entire_slice: components.entire_slice().into(), + } + } + + /// Construct a VarZeroVecOwned from a list of elements + pub fn from_elements(elements: &[T]) -> Self { + Self { + marker: PhantomData, + entire_slice: components::get_serializable_bytes(elements).expect( + "Attempted to build VarZeroVec out of elements that \ + cumulatively are larger than a u32 in size", + ), + } + } + + /// Try to allocate a buffer with enough capacity for `capacity` + /// elements. Since `T` can take up an arbitrary size this will + /// just allocate enough space for 4-byte Ts + pub(crate) fn with_capacity(capacity: usize) -> Self { + Self { + marker: PhantomData, + entire_slice: Vec::with_capacity(capacity * 8), + } + } + + /// Try to reserve space for `capacity` + /// elements. Since `T` can take up an arbitrary size this will + /// just allocate enough space for 4-byte Ts + pub(crate) fn reserve(&mut self, capacity: usize) { + self.entire_slice.reserve(capacity * 8) + } + + #[inline] + pub(crate) fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { + unsafe { + // safety: VarZeroVecOwned is guaranteed to parse here + SliceComponents::from_bytes_unchecked(&self.entire_slice) + } + } + + /// Get the number of elements in this vector + pub fn len(&self) -> usize { + self.get_components().len() + } + + /// Returns `true` if the vector contains no elements. + pub fn is_empty(&self) -> bool { + self.get_components().is_empty() + } + + /// Obtain an iterator over VarZeroVecOwned's elements + pub fn iter<'b>(&'b self) -> impl Iterator { + self.get_components().iter() + } + + /// Get one of VarZeroVecOwned's elements, returning None if the index is out of bounds + pub fn get(&self, idx: usize) -> Option<&T::VarULE> { + self.get_components().get(idx) + } + + /// Get this [`VarZeroVecOwned`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroVecOwned`], + /// it is more efficient to perform this conversion first + pub fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T> { + VarZeroVec(VarZeroVecInner::Borrowed(self.get_components())) + } + + /// Empty the vector + pub fn clear(&mut self) { + self.entire_slice.clear() + } + + pub fn to_vec(&self) -> Vec + where + T: Clone, + { + self.get_components().to_vec() + } + + /// Insert an element at index `idx` + pub fn insert(&mut self, index: usize, element: &T) { + let len = self.len(); + if index > len { + panic!( + "Called out-of-bounds insert() on VarZeroVec, index {} len {}", + index, len + ); + } + + if len == 0 { + // If there is no data, just construct it with the existing procedure + // for constructing an entire slice + self.entire_slice = components::get_serializable_bytes(slice::from_ref(element)) + .expect( + "attempted to insert an element too large to be encoded\ + in a VarZeroVec", + ); + return; + } + + // The format of the encoded data is: + // - four bytes of "len" + // - len*4 bytes for an array of indices + // - the actual data to which the indices point + // + // When inserting an element, space must be made in the `data` + // segment for the element, and space must be made in the `indices` segment + // for the new index. Note that making space in the indices segment + // means that the data segment needs to be shifted by 4 bytes, on top + // of any reshuffling that needs to be done to make space + // + // We do this in the following steps: + // 1. Shift the data around + // 1a. We move any data *after* the inserted element by the length of the element + 4 + // to make space for the element and one extra index + // 1b. We insert the element at four bytes after the spot where the data we just moved + // started, again making space for that extra index + // 1c. We move the data from *before* the inserted element by 4 bytes to make space for the + // extra index + // 2. Update the indices: Shift indices after the inserted element by one slot, incrementing them + // by the length of the inserted element + // 3. Update the length + + let element = element.as_unaligned(); + let element_slice = element.as_byte_slice(); + // the amount the vector is growing by + let shift = 4 + element_slice.len(); + self.entire_slice.reserve(shift); + if self.entire_slice.len() + shift > u32::MAX as usize { + // (on 32 bit platforms the `.reserve()` itself will panic) + panic!( + "Attempted to grow VarZeroVec to an encoded size that does not fit within a u32" + ); + } + unsafe { + // Step 1: Shift the data around + { + let (indices, data) = self.entire_slice.split_at_mut(4 + 4 * len); + + let (_len, indices) = indices.split_at_mut(4); + // safety: the indices at [4...4 + len * 4] form a slice of PlainOldULE<4> + let indices = PlainOldULE::<4>::from_byte_slice_unchecked(indices); + + // Step 1a: Move the data after the inserted element by the length of + // the element + 4 to make space for the element and one extra index + + // calculate the data insertion point as an index into `data` + let insertion_point = if index != len { + // Assuming we're not pushing onto the end, we need to shift the tail-end elements + // by `shift` to make room for one extra index and `element_slice` data + let shift_start = u32::from_unaligned(&indices[index]) as usize; + let shift_start_p = data.as_mut_ptr().add(shift_start); + // shift elements from shift_start_p to the end of the slice by `shift` elements + ptr::copy( + shift_start_p, + shift_start_p.add(shift), + data.len() - shift_start, + ); + shift_start + 4 + } else { + // If we're inserting to the end of the vector, we just need to insert at + // the length + 4 (space for one index) + data.len() + 4 + }; + + let data_p = data.as_mut_ptr(); + + // Step 1b: insert the new element + ptr::copy( + element_slice.as_ptr(), + data_p.add(insertion_point), + element_slice.len(), + ); + + // Step 1c: shift the remaining elements to make room for the new index + if insertion_point != 0 { + ptr::copy(data_p, data_p.offset(4), insertion_point - 4); + } + } + self.entire_slice.set_len(self.entire_slice.len() + shift); + // Step 2: Shift indices after the inserted element by one slot, incrementing them + // by the length of the inserted element + { + let len = len + 1; + let (indices, data) = self.entire_slice.split_at_mut(4 + 4 * len); + let (len_ule, indices) = indices.split_at_mut(4); + // safety: the indices at [4...4 + len * 4] form a slice of PlainOldULE<4> + let indices = PlainOldULE::<4>::from_byte_slice_unchecked_mut(indices); + // safety: the starting index is a single PlainOldULE<4> + let len_ule = PlainOldULE::<4>::from_byte_slice_unchecked_mut(len_ule); + + let element_len = element_slice.len() as u32; + if index + 1 == len { + // appending to the end is weird, because there's no end-index for us to copy; + // the end-index is simply the old length + indices[index] = (data.len() as u32 - element_len).into(); + } else { + let mut next = u32::from_unaligned(&indices[index]); + for idx in &mut indices[index + 1..] { + let incremented: u32 = next + element_len; + next = u32::from_unaligned(idx); + *idx = incremented.into(); + } + } + + // Step 3: update the length + debug_assert!(u32::from_unaligned(&len_ule[0]) + 1 == len as u32); + len_ule[0] = (len as u32).into() + } + } + } + + pub fn remove(&mut self, index: usize) -> T + where + T: Clone, + { + // TODO (#1080) make these faster + let mut vec = self.to_vec(); + let ret = vec.remove(index); + *self = Self::from_elements(&vec); + ret + } + + pub fn replace(&mut self, index: usize, value: T) -> T + where + T: Clone, + { + // TODO (#1080) make these faster + let mut vec = self.to_vec(); + let ret = mem::replace(&mut vec[index], value); + *self = Self::from_elements(&vec); + ret + } +} + +impl VarZeroVecOwned +where + T: AsVarULE, + T::VarULE: Ord, +{ + /// Binary searches a sorted `VarZeroVecOwned` for the given element. FoGeneralr more information, see + /// the primitive function [`binary_search`]. + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search(&self, x: &T::VarULE) -> Result { + self.get_components().binary_search(x) + } +} + +impl PartialEq<&[T]> for VarZeroVecOwned +where + T: AsVarULE, + T::VarULE: PartialEq, +{ + #[inline] + fn eq(&self, other: &&[T]) -> bool { + self.iter().eq(other.iter().map(|t| t.as_unaligned())) + } +} + +impl Index for VarZeroVecOwned { + type Output = T::VarULE; + fn index(&self, index: usize) -> &Self::Output { + self.get(index).expect("Indexing VarZeroVec out of bounds") + } +} + +impl fmt::Debug for VarZeroVecOwned +where + T::VarULE: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +impl Default for VarZeroVecOwned { + fn default() -> Self { + Self::new() + } +} + +#[cfg(test)] +mod test { + use super::VarZeroVecOwned; + #[test] + fn test_insert_integrity() { + let mut items: Vec = vec![ + "foo".into(), + "bar".into(), + "baz".into(), + "abcdefghijklmn".into(), + "five".into(), + ]; + let mut zerovec = VarZeroVecOwned::from_elements(&items); + zerovec.insert(1, &"foo3".into()); + items.insert(1, "foo3".into()); + assert_eq!(zerovec, &*items); + + items.insert(0, "1234567890".into()); + zerovec.insert(0, &"1234567890".into()); + assert_eq!(zerovec, &*items); + + // make sure inserting at the end works + items.insert(items.len(), "qwertyuiop".into()); + zerovec.insert(zerovec.len(), &"qwertyuiop".into()); + assert_eq!(zerovec, &*items); + + items.insert(0, "asdfghjkl;".into()); + zerovec.insert(0, &"asdfghjkl;".into()); + assert_eq!(zerovec, &*items); + } +} diff --git a/utils/zerovec/src/varzerovec/serde.rs b/utils/zerovec/src/varzerovec/serde.rs index e9d7a8327e7..5e7d5cbb0ec 100644 --- a/utils/zerovec/src/varzerovec/serde.rs +++ b/utils/zerovec/src/varzerovec/serde.rs @@ -5,7 +5,7 @@ use super::VarZeroVec; use crate::ule::*; use serde::de::{self, Deserialize, Deserializer, SeqAccess, Visitor}; -use serde::ser::{self, Serialize, SerializeSeq, Serializer}; +use serde::ser::{Serialize, SerializeSeq, Serializer}; use std::fmt; use std::marker::PhantomData; @@ -36,7 +36,7 @@ where where E: de::Error, { - VarZeroVec::try_from_bytes(bytes).map_err(de::Error::custom) + VarZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom) } fn visit_seq(self, mut seq: A) -> Result @@ -51,7 +51,7 @@ where while let Some(value) = seq.next_element::()? { vec.push(value); } - Ok(vec.into()) + Ok((&*vec).into()) } } @@ -95,16 +95,8 @@ where seq.serialize_element(&T::from_unaligned(value))?; } seq.end() - } else if let Some(slice) = self.get_slice_for_borrowed() { - serializer.serialize_bytes(slice) } else { - // This creates an additional Vec allocation to enable code reuse of - // VarZeroVec::to_vec()'s. The alternative is to write a different - // implementation of get_serializable_bytes() which enables us to pull - // out the byte buffer components bit by bit and use serialize_seq + serialize_element - let vec = VarZeroVec::get_serializable_bytes(&self.to_vec()) - .ok_or_else(|| ser::Error::custom("VarZeroVec too large to be serialized"))?; - serializer.serialize_bytes(&vec) + serializer.serialize_bytes(self.get_encoded_slice()) } } } @@ -134,7 +126,7 @@ mod test { ]; #[test] fn test_serde_json() { - let zerovec_orig: VarZeroVec = VarZeroVec::try_from_bytes(BYTES).expect("parse"); + let zerovec_orig: VarZeroVec = VarZeroVec::parse_byte_slice(BYTES).expect("parse"); let json_str = serde_json::to_string(&zerovec_orig).expect("serialize"); assert_eq!(JSON_STR, json_str); // VarZeroVec should deserialize from JSON to either Vec or VarZeroVec @@ -144,18 +136,18 @@ mod test { let zerovec_new: VarZeroVec = serde_json::from_str(&json_str).expect("deserialize from buffer to VarZeroVec"); assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); - assert!(zerovec_new.get_slice_for_borrowed().is_none()); + assert!(zerovec_new.is_owned()); } #[test] fn test_serde_bincode() { - let zerovec_orig: VarZeroVec = VarZeroVec::try_from_bytes(BYTES).expect("parse"); + let zerovec_orig: VarZeroVec = VarZeroVec::parse_byte_slice(BYTES).expect("parse"); let bincode_buf = bincode::serialize(&zerovec_orig).expect("serialize"); assert_eq!(BINCODE_BUF, bincode_buf); let zerovec_new: VarZeroVec = bincode::deserialize(&bincode_buf).expect("deserialize from buffer to VarZeroVec"); assert_eq!(zerovec_orig.to_vec(), zerovec_new.to_vec()); - assert!(zerovec_new.get_slice_for_borrowed().is_some()); + assert!(!zerovec_new.is_owned()); } #[test] @@ -166,7 +158,7 @@ mod test { .map(String::from) .collect::>(); let mut zerovec: VarZeroVec = - VarZeroVec::try_from_bytes(NONASCII_BYTES).expect("parse"); + VarZeroVec::parse_byte_slice(NONASCII_BYTES).expect("parse"); assert_eq!(zerovec.to_vec(), src_vec); let bincode_buf = bincode::serialize(&zerovec).expect("serialize"); let zerovec_result = diff --git a/utils/zerovec/src/varzerovec/ule.rs b/utils/zerovec/src/varzerovec/ule.rs new file mode 100644 index 00000000000..1fb9cb1d462 --- /dev/null +++ b/utils/zerovec/src/varzerovec/ule.rs @@ -0,0 +1,162 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use super::components::SliceComponents; +use super::*; +use core::marker::PhantomData; +use core::mem; + +/// A VarULE version of VarZeroVec allowing for VarZeroVecs to be nested indefinitely +/// +/// ```rust +/// use zerovec::VarZeroVec; +/// use zerovec::ZeroVec; +/// use zerovec::varzerovec::VarZeroVecULE; +/// use zerovec::ule::*; +/// let strings_1: Vec = vec!["foo".into(), "bar".into(), "baz".into()]; +/// let strings_2: Vec = vec!["twelve".into(), "seventeen".into(), "forty two".into()]; +/// let strings_3: Vec = vec!["我".into(), "喜歡".into(), "烏龍茶".into()]; +/// let strings_4: Vec = vec!["w".into(), "ω".into(), "文".into(), "𑄃".into()]; +/// let strings_12 = vec![strings_1.clone(), strings_2.clone()]; +/// let strings_34 = vec![strings_3.clone(), strings_4.clone()]; +/// let all_strings = vec![strings_12, strings_34]; +/// +/// let vzv_1 = VarZeroVec::from(&*strings_1); +/// let vzv_2 = VarZeroVec::from(&*strings_2); +/// let vzv_3 = VarZeroVec::from(&*strings_3); +/// let vzv_4 = VarZeroVec::from(&*strings_4); +/// let vzv_12 = VarZeroVec::from(&[vzv_1, vzv_2] as &[_]); +/// let vzv_34 = VarZeroVec::from(&[vzv_3, vzv_4] as &[_]); +/// let vzv_all = VarZeroVec::from(&[vzv_12, vzv_34] as &[_]); +/// +/// let reconstructed = vzv_all.iter() +/// .map(|v: &VarZeroVecULE<_>| { +/// v.iter().map(|x: &VarZeroVecULE<_>| x.as_varzerovec().to_vec()).collect::>() +/// }).collect::>(); +/// assert_eq!(reconstructed, all_strings); +/// +/// let bytes = vzv_all.get_encoded_slice(); +/// let vzv_from_bytes: VarZeroVec>> = VarZeroVec::parse_byte_slice(bytes).unwrap(); +/// assert_eq!(vzv_from_bytes, vzv_all); +/// ``` +// +// safety invariant: The slice MUST be one which parses to +// a valid SliceComponents +#[repr(transparent)] +pub struct VarZeroVecULE { + marker: PhantomData<[T]>, + /// The original slice this was constructed from + entire_slice: [u8], +} + +impl VarZeroVecULE { + #[inline] + fn get_components<'a>(&'a self) -> SliceComponents<'a, T> { + unsafe { + // safety: VarZeroVecULE is guaranteed to parse here + SliceComponents::from_bytes_unchecked(&self.entire_slice) + } + } + + /// Get the number of elements in this vector + pub fn len(&self) -> usize { + self.get_components().len() + } + + /// Returns `true` if the vector contains no elements. + pub fn is_empty(&self) -> bool { + self.get_components().is_empty() + } + + /// Obtain an iterator over VarZeroVecULE's elements + pub fn iter<'b>(&'b self) -> impl Iterator { + self.get_components().iter() + } + + /// Get one of VarZeroVecULE's elements, returning None if the index is out of bounds + pub fn get(&self, idx: usize) -> Option<&T::VarULE> { + self.get_components().get(idx) + } + + /// Get this [`VarZeroVecULE`] as a borrowed [`VarZeroVec`] + /// + /// If you wish to repeatedly call methods on this [`VarZeroVecULE`], + /// it is more efficient to perform this conversion first + pub fn as_varzerovec<'a>(&'a self) -> VarZeroVec<'a, T> { + VarZeroVec(VarZeroVecInner::Borrowed(self.get_components())) + } +} + +impl VarZeroVecULE +where + T: AsVarULE, + T::VarULE: Ord, +{ + /// Binary searches a sorted `VarZeroVecULE` for the given element. For more information, see + /// the primitive function [`binary_search`]. + /// + /// [`binary_search`]: https://doc.rust-lang.org/std/primitive.slice.html#method.binary_search + #[inline] + pub fn binary_search(&self, x: &T::VarULE) -> Result { + self.get_components().binary_search(x) + } +} +unsafe impl VarULE for VarZeroVecULE { + type Error = ParseErrorFor; + + fn parse_byte_slice(bytes: &[u8]) -> Result<&Self, Self::Error> { + let _: SliceComponents = SliceComponents::parse_byte_slice(bytes)?; + unsafe { Ok(Self::from_byte_slice_unchecked(bytes)) } + } + + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + // self is really just a wrapper around a byte slice + mem::transmute(bytes) + } + + fn as_byte_slice(&self) -> &[u8] { + &self.entire_slice + } +} + +impl AsVarULE for VarZeroVec<'static, T> +where + T: AsVarULE, + T: Clone, +{ + type VarULE = VarZeroVecULE; + #[inline] + fn as_unaligned(&self) -> &VarZeroVecULE { + let slice = self.get_encoded_slice(); + unsafe { + // safety: the slice is known to come from a valid parsed VZV + VarZeroVecULE::from_byte_slice_unchecked(slice) + } + } + #[inline] + fn from_unaligned(unaligned: &VarZeroVecULE) -> Self { + unaligned.as_varzerovec().into_owned() + } +} + +impl PartialEq> for VarZeroVecULE +where + T: AsVarULE, + T::VarULE: PartialEq, +{ + #[inline] + fn eq(&self, other: &VarZeroVecULE) -> bool { + // Note: T implements PartialEq but not T::ULE + self.iter().eq(other.iter()) + } +} + +impl fmt::Debug for VarZeroVecULE +where + T::VarULE: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} diff --git a/utils/zerovec/src/zerovec/mod.rs b/utils/zerovec/src/zerovec/mod.rs index 9f18a791457..de6bca3b1d2 100644 --- a/utils/zerovec/src/zerovec/mod.rs +++ b/utils/zerovec/src/zerovec/mod.rs @@ -44,7 +44,7 @@ use std::fmt; /// let nums: &[u16] = &[211, 281, 421, 461]; /// /// // Conversion from &[u8] to &[u16::ULE] is infallible. -/// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); +/// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); /// assert_eq!(zerovec.get(2), Some(421)); @@ -117,12 +117,12 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); /// assert_eq!(zerovec.get(2), Some(421)); /// ``` - pub fn try_from_bytes(bytes: &'a [u8]) -> Result::ULE as ULE>::Error> { + pub fn parse_byte_slice(bytes: &'a [u8]) -> Result::ULE as ULE>::Error> { let slice: &'a [T::ULE] = T::ULE::parse_byte_slice(bytes)?; Ok(Self::Borrowed(slice)) } @@ -168,7 +168,7 @@ where /// use zerovec::ule::AsULE; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(4, zerovec.len()); /// assert_eq!( @@ -189,10 +189,10 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// assert!(!zerovec.is_empty()); /// - /// let emptyvec: ZeroVec = ZeroVec::try_from_bytes(&[]).expect("infallible"); + /// let emptyvec: ZeroVec = ZeroVec::parse_byte_slice(&[]).expect("infallible"); /// assert!(emptyvec.is_empty()); /// ``` #[inline] @@ -313,7 +313,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(zerovec.get(2), Some(421)); /// assert_eq!(zerovec.get(4), None); @@ -337,7 +337,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(zerovec.first(), Some(211)); /// ``` @@ -356,7 +356,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(zerovec.last(), Some(461)); /// ``` @@ -375,7 +375,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// let mut it = zerovec.iter(); /// /// assert_eq!(it.next(), Some(211)); @@ -397,7 +397,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); /// /// let owned = zerovec.into_owned(); @@ -422,7 +422,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let mut zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let mut zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); /// /// zerovec.make_mut().push(12_u16.as_unaligned()); @@ -458,7 +458,7 @@ where /// use zerovec::ZeroVec; /// /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let zerovec: ZeroVec = ZeroVec::try_from_bytes(bytes).expect("infallible"); + /// let zerovec: ZeroVec = ZeroVec::parse_byte_slice(bytes).expect("infallible"); /// /// assert_eq!(zerovec.binary_search(&281), Ok(1)); /// assert_eq!(zerovec.binary_search(&282), Err(2)); @@ -486,7 +486,7 @@ mod tests { assert_eq!(zerovec.get(2), Some(TEST_SLICE[2])); } { - let zerovec = ZeroVec::::try_from_bytes(TEST_BUFFER_LE).unwrap(); + let zerovec = ZeroVec::::parse_byte_slice(TEST_BUFFER_LE).unwrap(); assert_eq!(zerovec.get(0), Some(TEST_SLICE[0])); assert_eq!(zerovec.get(1), Some(TEST_SLICE[1])); assert_eq!(zerovec.get(2), Some(TEST_SLICE[2])); @@ -501,7 +501,7 @@ mod tests { assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c)); } { - let zerovec = ZeroVec::::try_from_bytes(TEST_BUFFER_LE).unwrap(); + let zerovec = ZeroVec::::parse_byte_slice(TEST_BUFFER_LE).unwrap(); assert_eq!(Ok(3), zerovec.binary_search(&0x0e0d0c)); assert_eq!(Err(3), zerovec.binary_search(&0x0c0d0c)); } @@ -511,73 +511,73 @@ mod tests { fn test_odd_alignment() { assert_eq!( Some(0x020100), - ZeroVec::::try_from_bytes(TEST_BUFFER_LE) + ZeroVec::::parse_byte_slice(TEST_BUFFER_LE) .unwrap() .get(0) ); assert_eq!( Some(0x04000201), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[1..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[1..]) .unwrap() .get(0) ); assert_eq!( Some(0x05040002), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[2..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[2..]) .unwrap() .get(0) ); assert_eq!( Some(0x06050400), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[3..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[3..]) .unwrap() .get(0) ); assert_eq!( Some(0x060504), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[4..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[4..]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c00), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[75..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[75..]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c00), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[3..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[3..]) .unwrap() .get(18) ); assert_eq!( Some(0x4e4d4c), - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[76..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[76..]) .unwrap() .get(0) ); assert_eq!( Some(0x4e4d4c), - ZeroVec::::try_from_bytes(TEST_BUFFER_LE) + ZeroVec::::parse_byte_slice(TEST_BUFFER_LE) .unwrap() .get(19) ); assert_eq!( None, - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[77..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[77..]) .unwrap() .get(0) ); assert_eq!( None, - ZeroVec::::try_from_bytes(TEST_BUFFER_LE) + ZeroVec::::parse_byte_slice(TEST_BUFFER_LE) .unwrap() .get(20) ); assert_eq!( None, - ZeroVec::::try_from_bytes(&TEST_BUFFER_LE[3..]) + ZeroVec::::parse_byte_slice(&TEST_BUFFER_LE[3..]) .unwrap() .get(19) ); diff --git a/utils/zerovec/src/zerovec/serde.rs b/utils/zerovec/src/zerovec/serde.rs index 31634d37798..9b22a2c54d6 100644 --- a/utils/zerovec/src/zerovec/serde.rs +++ b/utils/zerovec/src/zerovec/serde.rs @@ -36,7 +36,7 @@ where where E: de::Error, { - ZeroVec::try_from_bytes(bytes).map_err(de::Error::custom) + ZeroVec::parse_byte_slice(bytes).map_err(de::Error::custom) } fn visit_seq(self, mut seq: A) -> Result