Skip to content

Commit

Permalink
Support for infinitely nesting VarZeroVecs (#1065)
Browse files Browse the repository at this point in the history
* Add SliceComponents::from_bytes_unchecked

* Add VZVULE

* Impl AsVarULE for VarZeroVec

* Add basic readonly VarZeroVecOwned

* Add mutation ops

* Use VZVOwned in VZV

* Add test

* fix ci

* no unsafe on from_byte_slice_unchecked_mut

* try_from_bytes -> parse_byte_slice

* Address some review comments

* get issue number

* require no padding bytes in VarULE

* fix compile

* rename try_from_bytes

* fix tidy

* satisfy clippy

* safety comment
  • Loading branch information
Manishearth authored Sep 23, 2021
1 parent c6ec54a commit 877707a
Show file tree
Hide file tree
Showing 19 changed files with 722 additions and 148 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions experimental/codepointtrie/src/planes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ pub fn get_planes_trie() -> CodePointTrie<'static, u8, Small> {
0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x10, 0x10, 0x10, 0,
];
let index: ZeroVec<u16> = ZeroVec::try_from_bytes(index_array_as_bytes).expect("infallible");
let data: ZeroVec<u8> = ZeroVec::try_from_bytes(data_8_array).expect("infallible");
let index: ZeroVec<u16> = ZeroVec::parse_byte_slice(index_array_as_bytes).expect("infallible");
let data: ZeroVec<u8> = ZeroVec::parse_byte_slice(data_8_array).expect("infallible");
let index_length = 1168;
let data_length = 372;
let high_start = 0x100000;
Expand Down Expand Up @@ -291,7 +291,7 @@ mod tests {
fn test_index_byte_array_literal() {
let index_array_as_bytes: &[u8] = super::INDEX_ARRAY_AS_BYTES;
let index_zv_bytes: ZeroVec<u16> =
ZeroVec::try_from_bytes(index_array_as_bytes).expect("infallible");
ZeroVec::parse_byte_slice(index_array_as_bytes).expect("infallible");
let index_zv_aligned: ZeroVec<u16> = ZeroVec::from_slice(INDEX_ARRAY);
assert_eq!(index_zv_bytes, index_zv_aligned);
}
Expand Down
1 change: 0 additions & 1 deletion utils/zerovec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ include = [
all-features = true

[dependencies]
either = "1.6.1"
serde = { version = "1.0", optional = true , default-features = false, features = ["alloc"] }
yoke = { path = "../yoke", version = "0.2.0", optional = true }

Expand Down
2 changes: 1 addition & 1 deletion utils/zerovec/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ pub struct DataStruct<'data> {

let data = DataStruct {
nums: ZeroVec::from_slice(&[211, 281, 421, 461]),
strs: VarZeroVec::from(vec!["hello".to_string(), "world".to_string()]),
strs: VarZeroVec::from(&["hello".to_string(), "world".to_string()] as &[_]),
};
let bincode_bytes = bincode::serialize(&data)
.expect("Serialization should be successful");
Expand Down
8 changes: 4 additions & 4 deletions utils/zerovec/benches/vzv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fn overview_bench(c: &mut Criterion) {
let seed = 42;
let (string_vec, _) = random_alphanums(2..=10, 100, seed);
let bytes: Vec<u8> = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap();
let vzv = VarZeroVec::<String>::try_from_bytes(black_box(bytes.as_slice())).unwrap();
let vzv = VarZeroVec::<String>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();

c.bench_function("vzv/overview", |b| {
b.iter(|| {
Expand All @@ -73,7 +73,7 @@ fn char_count_benches(c: &mut Criterion) {
let seed = 2021;
let (string_vec, _) = random_alphanums(2..=20, 100, seed);
let bytes: Vec<u8> = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap();
let vzv = VarZeroVec::<String>::try_from_bytes(black_box(bytes.as_slice())).unwrap();
let vzv = VarZeroVec::<String>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();

// *** Count chars in vec of 100 strings ***
c.bench_function("vzv/char_count/slice", |b| {
Expand All @@ -100,7 +100,7 @@ fn binary_search_benches(c: &mut Criterion) {
let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
let (needles, _) = random_alphanums(2..=20, 10, seed);
let bytes: Vec<u8> = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap();
let vzv = VarZeroVec::<String>::try_from_bytes(black_box(bytes.as_slice())).unwrap();
let vzv = VarZeroVec::<String>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
let single_needle = "lmnop".to_string();

// *** Binary search vec of 500 strings 10 times ***
Expand Down Expand Up @@ -139,7 +139,7 @@ fn serde_benches(c: &mut Criterion) {
let seed = 2021;
let (string_vec, _) = random_alphanums(2..=20, 100, seed);
let bincode_vec = bincode::serialize(&string_vec).unwrap();
let vzv = VarZeroVec::from(string_vec);
let vzv = VarZeroVec::from(&*string_vec);
let bincode_vzv = bincode::serialize(&vzv).unwrap();

// *** Deserialize vec of 100 strings ***
Expand Down
8 changes: 4 additions & 4 deletions utils/zerovec/benches/zerovec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ where
buffer
.0
.extend(ZeroVec::from_slice(vec.as_slice()).as_bytes());
ZeroVec::<T>::try_from_bytes(&buffer.0[1..]).unwrap()
ZeroVec::<T>::parse_byte_slice(&buffer.0[1..]).unwrap()
}

fn overview_bench(c: &mut Criterion) {
c.bench_function("zerovec/overview", |b| {
b.iter(|| {
ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE))
ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
.unwrap()
.iter()
.sum::<u32>()
Expand All @@ -83,7 +83,7 @@ fn sum_benches(c: &mut Criterion) {

c.bench_function("zerovec/sum/sample/zerovec", |b| {
b.iter(|| {
ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE))
ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
.unwrap()
.iter()
.sum::<u32>()
Expand All @@ -98,7 +98,7 @@ fn binary_search_benches(c: &mut Criterion) {
});

c.bench_function("zerovec/binary_search/sample/zerovec", |b| {
let zerovec = ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap();
let zerovec = ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap();
b.iter(|| zerovec.binary_search(&0x0c0d0c));
});

Expand Down
4 changes: 2 additions & 2 deletions utils/zerovec/benches/zerovec_iai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn sum_slice() -> u32 {
}

fn sum_zerovec() -> u32 {
ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE))
ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
.unwrap()
.iter()
.sum::<u32>()
Expand All @@ -26,7 +26,7 @@ fn binarysearch_slice() -> Result<usize, usize> {
}

fn binarysearch_zerovec() -> Result<usize, usize> {
ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE))
ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
.unwrap()
.binary_search(&0x0c0d0c)
}
Expand Down
16 changes: 9 additions & 7 deletions utils/zerovec/benches/zerovec_serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ fn random_numbers(count: usize) -> Vec<u32> {
fn overview_bench(c: &mut Criterion) {
c.bench_function("zerovec_serde/overview", |b| {
// Same as "zerovec_serde/deserialize_sum/u32/zerovec"
let buffer =
bincode::serialize(&ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap())
.unwrap();
let buffer = bincode::serialize(
&ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(),
)
.unwrap();
b.iter(|| {
bincode::deserialize::<ZeroVec<u32>>(&buffer)
.unwrap()
Expand Down Expand Up @@ -71,9 +72,10 @@ fn u32_benches(c: &mut Criterion) {
});

c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| {
let buffer =
bincode::serialize(&ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap())
.unwrap();
let buffer = bincode::serialize(
&ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(),
)
.unwrap();
b.iter(|| {
bincode::deserialize::<ZeroVec<u32>>(&buffer)
.unwrap()
Expand Down Expand Up @@ -131,7 +133,7 @@ fn stress_benches(c: &mut Criterion) {
});

// *** Compute sum of vec of 100 `u32` ***
let zerovec = ZeroVec::<u32>::try_from_bytes(zerovec_aligned.as_bytes()).unwrap();
let zerovec = ZeroVec::<u32>::parse_byte_slice(zerovec_aligned.as_bytes()).unwrap();
c.bench_function("zerovec_serde/sum/stress/zerovec", |b| {
b.iter(|| black_box(&zerovec).iter().sum::<u32>());
});
Expand Down
8 changes: 6 additions & 2 deletions utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
//!
//! let data = DataStruct {
//! nums: ZeroVec::from_slice(&[211, 281, 421, 461]),
//! strs: VarZeroVec::from(vec!["hello".to_string(), "world".to_string()]),
//! strs: VarZeroVec::from(&["hello".to_string(), "world".to_string()] as &[_]),
//! };
//! let bincode_bytes = bincode::serialize(&data)
//! .expect("Serialization should be successful");
Expand All @@ -79,11 +79,15 @@
//! # } // feature = "serde"
//! ```
// this crate does a lot of nuanced lifetime manipulation, being explicit
// is better here.
#![allow(clippy::needless_lifetimes)]

pub mod map;
#[cfg(test)]
pub mod samples;
pub mod ule;
mod varzerovec;
pub mod varzerovec;
mod zerovec;

#[cfg(feature = "yoke")]
Expand Down
12 changes: 7 additions & 5 deletions utils/zerovec/src/map/vecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::ule::*;
use crate::varzerovec::owned::VarZeroVecOwned;
use crate::VarZeroVec;
use crate::ZeroVec;
use std::cmp::Ordering;
Expand Down Expand Up @@ -108,26 +109,27 @@ where
self.get(index)
}
fn insert(&mut self, index: usize, value: T) {
self.make_mut().insert(index, value)
self.make_mut().insert(index, &value)
}
fn remove(&mut self, index: usize) -> T {
self.make_mut().remove(index)
}
fn replace(&mut self, index: usize, value: T) -> T {
let vec = self.make_mut();
mem::replace(&mut vec[index], value)
vec.replace(index, value)
}
fn push(&mut self, value: T) {
self.make_mut().push(value)
let len = self.len();
self.make_mut().insert(len, &value)
}
fn len(&self) -> usize {
self.len()
}
fn new() -> Self {
Vec::new().into()
VarZeroVecOwned::new().into()
}
fn with_capacity(cap: usize) -> Self {
Vec::with_capacity(cap).into()
VarZeroVecOwned::with_capacity(cap).into()
}
fn clear(&mut self) {
self.make_mut().clear()
Expand Down
6 changes: 5 additions & 1 deletion utils/zerovec/src/ule/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ pub trait AsVarULE {
///
/// # Safety
///
/// See the safety invariant documented on [`Self::from_byte_slice_unchecked()`] to implement this trait.
/// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] MUST return
/// a slice of initialized bytes provided that `Self` is initialized.
///
/// [`VarULE::from_byte_slice_unchecked()`] _must_ be implemented to return the same result
/// as [`VarULE::parse_byte_slice()`] provided both are passed the same validly parsing byte slices.
pub unsafe trait VarULE: 'static {
/// The error type to used by [`VarULE::parse_byte_slice()`]
type Error;
Expand Down
10 changes: 10 additions & 0 deletions utils/zerovec/src/ule/plain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ macro_rules! impl_byte_slice_size {
unsafe { std::slice::from_raw_parts(data as *const u8, len) }
}
}

impl PlainOldULE<$size> {
#[inline]
pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
let data = bytes.as_mut_ptr();
let len = bytes.len() / $size;
// Safe because Self is transparent over [u8; $size]
unsafe { std::slice::from_raw_parts_mut(data as *mut Self, len) }
}
}
};
}

Expand Down
56 changes: 53 additions & 3 deletions utils/zerovec/src/varzerovec/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn usizeify(x: PlainOldULE<4>) -> usize {
///
/// This is where the actual work involved in VarZeroVec happens
///
/// See [`SliceComponents::try_from_bytes()`] for information on the internal invariants involved
/// See [`SliceComponents::parse_byte_slice()`] for information on the internal invariants involved
pub struct SliceComponents<'a, T> {
/// The list of indices into the `things` slice
indices: &'a [PlainOldULE<4>],
Expand Down Expand Up @@ -50,7 +50,7 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> {
/// - `indices[len - 1]..things.len()` must index into a valid section of
/// `things`, such that it parses to a `T::VarULE`
#[inline]
pub fn try_from_bytes(slice: &'a [u8]) -> Result<Self, ParseErrorFor<T>> {
pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ParseErrorFor<T>> {
if slice.is_empty() {
return Ok(SliceComponents {
indices: &[],
Expand Down Expand Up @@ -87,6 +87,39 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> {
Ok(components)
}

/// Construct a [`SliceComponents`] from a byte slice that has previously
/// successfully returned a [`SliceComponents`] when passed to
/// [`SliceComponents::parse_byte_slice()`]. Will return the same
/// object as one would get from calling [`SliceComponents::parse_byte_slice()`].
///
/// # Safety
/// The bytes must have previously successfully run through
/// [`SliceComponents::parse_byte_slice()`]
pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self {
if slice.is_empty() {
return SliceComponents {
indices: &[],
things: &[],
entire_slice: slice,
marker: PhantomData,
};
}
let len_bytes = slice.get_unchecked(0..4);
let len_ule = PlainOldULE::<4>::from_byte_slice_unchecked(len_bytes);

let len = u32::from_unaligned(len_ule.get_unchecked(0)) as usize;
let indices_bytes = slice.get_unchecked(4..4 * len + 4);
let indices = PlainOldULE::<4>::from_byte_slice_unchecked(indices_bytes);
let things = slice.get_unchecked(4 * len + 4..);

SliceComponents {
indices,
things,
entire_slice: slice,
marker: PhantomData,
}
}

#[inline]
pub fn len(self) -> usize {
self.indices.len()
Expand All @@ -98,7 +131,6 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> {
}

#[inline]
#[cfg(feature = "serde")]
pub fn entire_slice(self) -> &'a [u8] {
self.entire_slice
}
Expand Down Expand Up @@ -192,6 +224,24 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> {
.chain(last)
.map(|s| unsafe { T::VarULE::from_byte_slice_unchecked(s) })
}

pub fn to_vec(self) -> Vec<T>
where
T: Clone,
{
self.iter().map(T::from_unaligned).collect()
}

// Dump a debuggable representation of this type
#[allow(unused)] // useful for debugging
pub(crate) fn dump(&self) -> String {
let indices = self
.indices
.iter()
.map(u32::from_unaligned)
.collect::<Vec<_>>();
format!("SliceComponents {{ indices: {:?} }}", indices)
}
}

impl<'a, T> SliceComponents<'a, T>
Expand Down
Loading

0 comments on commit 877707a

Please sign in to comment.