diff --git a/utils/zerovec/src/map/borrowed.rs b/utils/zerovec/src/map/borrowed.rs new file mode 100644 index 00000000000..1956f9a4f82 --- /dev/null +++ b/utils/zerovec/src/map/borrowed.rs @@ -0,0 +1,220 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::AsULE; +use crate::ZeroVec; + +pub use super::kv::ZeroMapKV; +pub use super::vecs::{BorrowedZeroVecLike, MutableZeroVecLike, ZeroVecLike}; + +/// A borrowed-only version of [`ZeroMap`](super::ZeroMap) +/// +/// This is useful for fully-zero-copy deserialization from non-human-readable +/// serialization formats. It also has the advantage that it can return references that live for +/// the lifetime of the backing buffer as opposed to that of the [`ZeroMapBorrowed`] instance. +/// +/// # Examples +/// +/// ``` +/// use zerovec::map::ZeroMapBorrowed; +/// +/// // Example byte buffer representing the map { 1: "one" } +/// let BINCODE_BYTES: &[u8; 31] = &[ +/// 4, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, +/// 1, 0, 0, 0, 0, 0, 0, 0, 111, 110, 101 +/// ]; +/// +/// // Deserializing to ZeroMap requires no heap allocations. +/// let zero_map: ZeroMapBorrowed = bincode::deserialize(BINCODE_BYTES) +/// .expect("Should deserialize successfully"); +/// assert_eq!(zero_map.get(&1), Some("one")); +/// ``` +/// +/// This can be obtained from a [`ZeroMap`](super::ZeroMap) via [`ZeroMap::as_borrowed`](super::ZeroMap::as_borrowed) +pub struct ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + pub(crate) keys: + <>::Container as MutableZeroVecLike<'a, K>>::BorrowedVariant, + pub(crate) values: + <>::Container as MutableZeroVecLike<'a, V>>::BorrowedVariant, +} + +impl<'a, K, V> Copy for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ +} +impl<'a, K, V> Clone for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + fn clone(&self) -> Self { + ZeroMapBorrowed { + keys: self.keys, + values: self.values, + } + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + /// The number of elements in the [`ZeroMapBorrowed`] + pub fn len(&self) -> usize { + self.values.len() + } + + /// Whether the [`ZeroMapBorrowed`] is empty + pub fn is_empty(&self) -> bool { + self.values.len() == 0 + } + + /// Get the value associated with `key`, if it exists. + /// + /// This is able to return values that live longer than the map itself + /// since they borrow directly from the backing buffer. This is the + /// primary advantage of using [`ZeroMapBorrowed`] over [`ZeroMap`]. + /// + /// ```rust + /// use zerovec::ZeroMap; + /// use zerovec::map::ZeroMapBorrowed; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// let borrowed = map.as_borrowed(); + /// assert_eq!(borrowed.get(&1), Some("one")); + /// assert_eq!(borrowed.get(&3), None); + /// + /// let borrow = borrowed.get(&1); + /// drop(borrowed); + /// // still exists after the ZeroMapBorrowed has been dropped + /// assert_eq!(borrow, Some("one")); + /// ``` + pub fn get(&self, key: &K::NeedleType) -> Option<&'a V::GetType> { + let index = self.keys.binary_search(key).ok()?; + self.values.get_borrowed(index) + } + + /// Returns whether `key` is contained in this map + /// + /// ```rust + /// use zerovec::ZeroMap; + /// use zerovec::map::ZeroMapBorrowed; + /// + /// let mut map = ZeroMap::new(); + /// map.insert(&1, "one"); + /// map.insert(&2, "two"); + /// let borrowed = map.as_borrowed(); + /// assert_eq!(borrowed.contains_key(&1), true); + /// assert_eq!(borrowed.contains_key(&3), false); + /// ``` + pub fn contains_key(&self, key: &K::NeedleType) -> bool { + self.keys.binary_search(key).is_ok() + } + + /// Produce an ordered iterator over key-value pairs + pub fn iter<'b>( + &'b self, + ) -> impl Iterator< + Item = ( + &'a >::GetType, + &'a >::GetType, + ), + > + 'b { + (0..self.keys.len()).map(move |idx| { + ( + self.keys.get_borrowed(idx).unwrap(), + self.values.get_borrowed(idx).unwrap(), + ) + }) + } + + /// Produce an ordered iterator over keys + pub fn iter_keys<'b>(&'b self) -> impl Iterator>::GetType> + 'b { + (0..self.keys.len()).map(move |idx| self.keys.get_borrowed(idx).unwrap()) + } + + /// Produce an iterator over values, ordered by keys + pub fn iter_values<'b>( + &'b self, + ) -> impl Iterator>::GetType> + 'b { + (0..self.values.len()).map(move |idx| self.values.get_borrowed(idx).unwrap()) + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a, Container = ZeroVec<'a, V>>, + V: AsULE + Ord + Copy, +{ + /// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE` + pub fn get_copied(&self, key: &K::NeedleType) -> Option { + let index = self.keys.binary_search(key).ok()?; + <[V::ULE]>::get(self.values, index) + .copied() + .map(V::from_unaligned) + } + + /// Similar to [`Self::iter()`] except it returns a direct copy of the values instead of references + /// to `V::ULE`, in cases when `V` is fixed-size + pub fn iter_copied_values<'b>( + &'b self, + ) -> impl Iterator>::GetType, V)> { + (0..self.keys.len()).map(move |idx| { + ( + self.keys.get(idx).unwrap(), + <[V::ULE]>::get(self.values, idx) + .copied() + .map(V::from_unaligned) + .unwrap(), + ) + }) + } +} + +impl<'a, K, V> ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a, Container = ZeroVec<'a, K>>, + V: ZeroMapKV<'a, Container = ZeroVec<'a, V>>, + K: AsULE + Copy + Ord, + V: AsULE + Copy + Ord, +{ + /// Similar to [`Self::iter()`] except it returns a direct copy of the keys values instead of references + /// to `K::ULE` and `V::ULE`, in cases when `K` and `V` are fixed-size + #[allow(clippy::needless_lifetimes)] // Lifetime is necessary in impl Trait + pub fn iter_copied<'b>(&'b self) -> impl Iterator + 'b { + let keys = &self.keys; + let values = &self.values; + let len = <[K::ULE]>::len(keys); + (0..len).map(move |idx| { + ( + <[K::ULE]>::get(keys, idx) + .copied() + .map(K::from_unaligned) + .unwrap(), + <[V::ULE]>::get(values, idx) + .copied() + .map(V::from_unaligned) + .unwrap(), + ) + }) + } +} diff --git a/utils/zerovec/src/map/kv.rs b/utils/zerovec/src/map/kv.rs index f214641bda3..4415fd69e25 100644 --- a/utils/zerovec/src/map/kv.rs +++ b/utils/zerovec/src/map/kv.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::vecs::ZeroVecLike; +use super::vecs::MutableZeroVecLike; use crate::ule::*; use crate::VarZeroVec; use crate::ZeroVec; @@ -18,7 +18,7 @@ use core::cmp::Ordering; #[allow(clippy::upper_case_acronyms)] // KV is not an acronym pub trait ZeroMapKV<'a> { /// The container that can be used with this type: [`ZeroVec`] or [`VarZeroVec`]. - type Container: ZeroVecLike< + type Container: MutableZeroVecLike< 'a, Self, NeedleType = Self::NeedleType, @@ -32,7 +32,7 @@ pub trait ZeroMapKV<'a> { /// The type produced by `Container::get()` /// /// This type will be predetermined by the choice of `Self::Container` - type GetType: ?Sized; + type GetType: ?Sized + 'static; /// The type to use whilst serializing. This may not necessarily be `Self`, however it /// must serialize to the exact same thing as `Self` type SerializeType: ?Sized; diff --git a/utils/zerovec/src/map/mod.rs b/utils/zerovec/src/map/mod.rs index f32b7ffffdd..3a3fe654ac4 100644 --- a/utils/zerovec/src/map/mod.rs +++ b/utils/zerovec/src/map/mod.rs @@ -8,13 +8,15 @@ use crate::ule::AsULE; use crate::ZeroVec; use core::cmp::Ordering; +mod borrowed; mod kv; #[cfg(feature = "serde")] mod serde; mod vecs; +pub use borrowed::ZeroMapBorrowed; pub use kv::ZeroMapKV; -pub use vecs::ZeroVecLike; +pub use vecs::{MutableZeroVecLike, ZeroVecLike}; /// A zero-copy map datastructure, built on sorted binary-searchable [`ZeroVec`] /// and [`VarZeroVec`]. @@ -91,6 +93,14 @@ where } } + /// Obtain a borrowed version of this map + pub fn as_borrowed(&'a self) -> ZeroMapBorrowed<'a, K, V> { + ZeroMapBorrowed { + keys: self.keys.as_borrowed(), + values: self.values.as_borrowed(), + } + } + /// The number of elements in the [`ZeroMap`] pub fn len(&self) -> usize { self.values.len() @@ -297,3 +307,18 @@ where }) } } + +impl<'a, K, V> From> for ZeroMap<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, +{ + fn from(other: ZeroMapBorrowed<'a, K, V>) -> Self { + Self { + keys: K::Container::from_borrowed(other.keys), + values: V::Container::from_borrowed(other.values), + } + } +} diff --git a/utils/zerovec/src/map/serde.rs b/utils/zerovec/src/map/serde.rs index 18ed4a6f2a1..21f3b4fd8dc 100644 --- a/utils/zerovec/src/map/serde.rs +++ b/utils/zerovec/src/map/serde.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::{ZeroMap, ZeroMapKV, ZeroVecLike}; +use super::{MutableZeroVecLike, ZeroMap, ZeroMapBorrowed, ZeroMapKV, ZeroVecLike}; use alloc::boxed::Box; use core::fmt; use core::marker::PhantomData; @@ -39,6 +39,26 @@ where } } +/// This impl can be made available by enabling the optional `serde` feature of the `zerovec` crate +impl<'a, K, V> Serialize for ZeroMapBorrowed<'a, K, V> +where + K: ZeroMapKV<'a>, + V: ZeroMapKV<'a>, + K: ?Sized, + V: ?Sized, + K::Container: Serialize, + V::Container: Serialize, + K::SerializeType: Serialize, + V::SerializeType: Serialize, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + ZeroMap::::from(*self).serialize(serializer) + } +} + /// Modified example from https://serde.rs/deserialize-map.html struct ZeroMapMapVisitor { #[allow(clippy::type_complexity)] // it's a marker type, complexity doesn't matter @@ -126,6 +146,46 @@ where } } +// /// This impl can be made available by enabling the optional `serde` feature of the `zerovec` crate +impl<'de, K: ?Sized, V: ?Sized> Deserialize<'de> for ZeroMapBorrowed<'de, K, V> +where + K: Ord, + K::Container: Deserialize<'de>, + V::Container: Deserialize<'de>, + K: ZeroMapKV<'de>, + V: ZeroMapKV<'de>, + K::OwnedType: Deserialize<'de>, + V::OwnedType: Deserialize<'de>, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + Err(de::Error::custom( + "ZeroMapBorrowed cannot be deserialized from human-readable formats", + )) + } else { + let deserialized: ZeroMap<'de, K, V> = ZeroMap::deserialize(deserializer)?; + let keys = if let Some(keys) = deserialized.keys.as_borrowed_inner() { + keys + } else { + return Err(de::Error::custom( + "ZeroMapBorrowed can only deserialize in zero-copy ways", + )); + }; + let values = if let Some(values) = deserialized.values.as_borrowed_inner() { + values + } else { + return Err(de::Error::custom( + "ZeroMapBorrowed can only deserialize in zero-copy ways", + )); + }; + Ok(Self { keys, values }) + } + } +} + #[cfg(test)] mod test { use super::super::*; @@ -165,5 +225,12 @@ mod test { new_map.iter().collect::>(), map.iter().collect::>() ); + + let new_map: ZeroMapBorrowed = + bincode::deserialize(&bincode_bytes).expect("deserialize"); + assert_eq!( + new_map.iter().collect::>(), + map.iter().collect::>() + ); } } diff --git a/utils/zerovec/src/map/vecs.rs b/utils/zerovec/src/map/vecs.rs index 0706ac0ae5b..72bbcdf2fe6 100644 --- a/utils/zerovec/src/map/vecs.rs +++ b/utils/zerovec/src/map/vecs.rs @@ -4,6 +4,7 @@ use crate::ule::*; use crate::varzerovec::owned::VarZeroVecOwned; +use crate::varzerovec::VarZeroVecBorrowed; use crate::VarZeroVec; use crate::ZeroVec; use alloc::boxed::Box; @@ -17,15 +18,45 @@ pub trait ZeroVecLike<'a, T: ?Sized> { /// The type received by `Self::binary_search()` type NeedleType: ?Sized; /// The type returned by `Self::get()` - type GetType: ?Sized; - /// The type returned by `Self::remove()` and `Self::replace()` - type OwnedType; + type GetType: ?Sized + 'static; /// Search for a key in a sorted vector, returns `Ok(index)` if found, /// returns `Err(insert_index)` if not found, where `insert_index` is the /// index where it should be inserted to maintain sort order. fn binary_search(&self, k: &Self::NeedleType) -> Result; /// Get element at `index` fn get(&self, index: usize) -> Option<&Self::GetType>; + /// The length of this vector + fn len(&self) -> usize; + /// Check if this vector is in ascending order according to `T`s `Ord` impl + fn is_ascending(&self) -> bool; + /// Check if this vector is empty + fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +/// Trait abstracting over [`ZeroVec`] and [`VarZeroVec`], for use in [`ZeroMap`](super::ZeroMap). You +/// should not be implementing or calling this trait directly. +/// +/// This trait augments [`ZeroVecLike`] with methods allowing for taking +/// longer references to the underlying buffer, for borrowed-only vector types. +pub trait BorrowedZeroVecLike<'a, T: ?Sized>: ZeroVecLike<'a, T> { + /// Get element at `index`, with a longer lifetime + fn get_borrowed(&self, index: usize) -> Option<&'a Self::GetType>; +} + +/// Trait abstracting over [`ZeroVec`] and [`VarZeroVec`], for use in [`ZeroMap`](super::ZeroMap). You +/// should not be implementing or calling this trait directly. +/// +/// This trait augments [`ZeroVecLike`] with methods allowing for mutation of the underlying +/// vector for owned vector types. +pub trait MutableZeroVecLike<'a, T: ?Sized>: ZeroVecLike<'a, T> { + /// The type returned by `Self::remove()` and `Self::replace()` + type OwnedType; + /// A fully borrowed version of this + type BorrowedVariant: ZeroVecLike<'a, T, NeedleType = Self::NeedleType, GetType = Self::GetType> + + BorrowedZeroVecLike<'a, T> + + Copy; /// Insert an element at `index` fn insert(&mut self, index: usize, value: &T); /// Remove the element at `index` (panicking if nonexistant) @@ -34,8 +65,6 @@ pub trait ZeroVecLike<'a, T: ?Sized> { fn replace(&mut self, index: usize, value: &T) -> Self::OwnedType; /// Push an element to the end of this vector fn push(&mut self, value: &T); - /// The length of this vector - fn len(&self) -> usize; /// Create a new, empty vector fn new() -> Self; /// Create a new, empty vector, with given capacity @@ -44,12 +73,29 @@ pub trait ZeroVecLike<'a, T: ?Sized> { fn clear(&mut self); /// Reserve space for `addl` additional elements fn reserve(&mut self, addl: usize); - /// Check if this vector is in ascending order according to `T`s `Ord` impl - fn is_ascending(&self) -> bool; - /// Check if this vector is empty - fn is_empty(&self) -> bool { - self.len() == 0 - } + /// Construct a borrowed variant by borrowing from `&self`. + /// + /// This function behaves like `&'b self -> Self::BorrowedVariant<'b>`, + /// where `'b` is the lifetime of the reference to this object. + /// + /// Note: We rely on the compiler recognizing `'a` and `'b` as covariant and + /// casting `&'b Self<'a>` to `&'b Self<'b>` when this gets called, which works + /// out for `ZeroVec` and `VarZeroVec` containers just fine. + fn as_borrowed(&'a self) -> Self::BorrowedVariant; + + /// Extract the inner borrowed variant if possible. Returns `None` if the data is owned. + /// + /// This function behaves like `&'_ self -> Self::BorrowedVariant<'a>`, + /// where `'a` is the lifetime of this object's borrowed data. + /// + /// This function is similar to matching the `Borrowed` variant of `ZeroVec` + /// or `VarZeroVec`, returning the inner borrowed type. + fn as_borrowed_inner(&self) -> Option; + + /// Construct from the borrowed version of the type + /// + /// These are useful to ensure serialization parity between borrowed and owned versions + fn from_borrowed(b: Self::BorrowedVariant) -> Self; } impl<'a, T> ZeroVecLike<'a, T> for ZeroVec<'a, T> @@ -58,13 +104,60 @@ where { type NeedleType = T; type GetType = T::ULE; - type OwnedType = T; fn binary_search(&self, k: &T) -> Result { self.binary_search(k) } fn get(&self, index: usize) -> Option<&T::ULE> { self.get_ule_ref(index) } + fn len(&self) -> usize { + self.len() + } + fn is_ascending(&self) -> bool { + self.as_slice() + .windows(2) + .all(|w| T::from_unaligned(w[1]).cmp(&T::from_unaligned(w[0])) == Ordering::Greater) + } +} + +impl<'a, T> ZeroVecLike<'a, T> for &'a [T::ULE] +where + T: AsULE + Ord + Copy, +{ + type NeedleType = T; + type GetType = T::ULE; + fn binary_search(&self, k: &T) -> Result { + ZeroVec::::Borrowed(self).binary_search(k) + } + fn get(&self, index: usize) -> Option<&T::ULE> { + <[T::ULE]>::get(self, index) + } + fn len(&self) -> usize { + <[T::ULE]>::len(self) + } + fn is_ascending(&self) -> bool { + ZeroVec::::Borrowed(self) + .as_slice() + .windows(2) + .all(|w| T::from_unaligned(w[1]).cmp(&T::from_unaligned(w[0])) == Ordering::Greater) + } +} + +impl<'a, T> BorrowedZeroVecLike<'a, T> for &'a [T::ULE] +where + T: AsULE + Ord + Copy, +{ + fn get_borrowed(&self, index: usize) -> Option<&'a T::ULE> { + <[T::ULE]>::get(self, index) + } +} + +impl<'a, T> MutableZeroVecLike<'a, T> for ZeroVec<'a, T> +where + T: AsULE + Ord + Copy, +{ + type OwnedType = T; + type BorrowedVariant = &'a [T::ULE]; fn insert(&mut self, index: usize, value: &T) { self.to_mut().insert(index, value.as_unaligned()) } @@ -78,9 +171,6 @@ where fn push(&mut self, value: &T) { self.to_mut().push(value.as_unaligned()) } - fn len(&self) -> usize { - self.len() - } fn new() -> Self { ZeroVec::Owned(Vec::new()) } @@ -93,10 +183,19 @@ where fn reserve(&mut self, addl: usize) { self.to_mut().reserve(addl) } - fn is_ascending(&self) -> bool { + + fn as_borrowed(&'a self) -> &'a [T::ULE] { self.as_slice() - .windows(2) - .all(|w| T::from_unaligned(w[1]).cmp(&T::from_unaligned(w[0])) == Ordering::Greater) + } + fn as_borrowed_inner(&self) -> Option<&'a [T::ULE]> { + if let ZeroVec::Borrowed(b) = *self { + Some(b) + } else { + None + } + } + fn from_borrowed(b: &'a [T::ULE]) -> Self { + ZeroVec::Borrowed(b) } } @@ -108,13 +207,81 @@ where { type NeedleType = T; type GetType = T; - type OwnedType = Box; fn binary_search(&self, k: &T) -> Result { self.binary_search(k) } fn get(&self, index: usize) -> Option<&T> { self.get(index) } + fn len(&self) -> usize { + self.len() + } + fn is_ascending(&self) -> bool { + if !self.is_empty() { + let mut prev = self.get(0).unwrap(); + for element in self.iter().skip(1) { + if element.cmp(prev) != Ordering::Greater { + return false; + } + prev = element; + } + } + true + } +} + +impl<'a, T> ZeroVecLike<'a, T> for VarZeroVecBorrowed<'a, T> +where + T: VarULE, + T: Ord, + T: ?Sized, +{ + type NeedleType = T; + type GetType = T; + fn binary_search(&self, k: &T) -> Result { + Self::binary_search(self, k) + } + fn get(&self, index: usize) -> Option<&T> { + // using UFCS to avoid accidental recursion + Self::get(*self, index) + } + fn len(&self) -> usize { + Self::len(*self) + } + fn is_ascending(&self) -> bool { + if !self.is_empty() { + let mut prev = self.get(0).unwrap(); + for element in self.iter().skip(1) { + if element.cmp(prev) != Ordering::Greater { + return false; + } + prev = element; + } + } + true + } +} + +impl<'a, T> BorrowedZeroVecLike<'a, T> for VarZeroVecBorrowed<'a, T> +where + T: VarULE, + T: Ord, + T: ?Sized, +{ + fn get_borrowed(&self, index: usize) -> Option<&'a T> { + // using UFCS to avoid accidental recursion + Self::get(*self, index) + } +} + +impl<'a, T> MutableZeroVecLike<'a, T> for VarZeroVec<'a, T> +where + T: VarULE, + T: Ord, + T: ?Sized, +{ + type OwnedType = Box; + type BorrowedVariant = VarZeroVecBorrowed<'a, T>; fn insert(&mut self, index: usize, value: &T) { self.make_mut().insert(index, value) } @@ -134,9 +301,6 @@ where let len = self.len(); self.make_mut().insert(len, value) } - fn len(&self) -> usize { - self.len() - } fn new() -> Self { VarZeroVecOwned::new().into() } @@ -149,16 +313,17 @@ where fn reserve(&mut self, addl: usize) { self.make_mut().reserve(addl) } - fn is_ascending(&self) -> bool { - if !self.is_empty() { - let mut prev = self.get(0).unwrap(); - for element in self.iter().skip(1) { - if element.cmp(prev) != Ordering::Greater { - return false; - } - prev = element; - } + fn as_borrowed(&'a self) -> VarZeroVecBorrowed<'a, T> { + self.as_borrowed() + } + fn as_borrowed_inner(&self) -> Option> { + if let VarZeroVec::Borrowed(b) = *self { + Some(b) + } else { + None } - true + } + fn from_borrowed(b: VarZeroVecBorrowed<'a, T>) -> Self { + VarZeroVec::Borrowed(b) } }