Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for infinitely nesting VarZeroVecs #1065

Merged
merged 18 commits into from
Sep 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions experimental/codepointtrie/src/planes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ pub fn get_planes_trie() -> CodePointTrie<'static, u8, Small> {
0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x10, 0x10, 0x10, 0,
];
let index: ZeroVec<u16> = ZeroVec::try_from_bytes(index_array_as_bytes).expect("infallible");
let data: ZeroVec<u8> = ZeroVec::try_from_bytes(data_8_array).expect("infallible");
let index: ZeroVec<u16> = ZeroVec::parse_byte_slice(index_array_as_bytes).expect("infallible");
let data: ZeroVec<u8> = ZeroVec::parse_byte_slice(data_8_array).expect("infallible");
let index_length = 1168;
let data_length = 372;
let high_start = 0x100000;
Expand Down Expand Up @@ -291,7 +291,7 @@ mod tests {
fn test_index_byte_array_literal() {
let index_array_as_bytes: &[u8] = super::INDEX_ARRAY_AS_BYTES;
let index_zv_bytes: ZeroVec<u16> =
ZeroVec::try_from_bytes(index_array_as_bytes).expect("infallible");
ZeroVec::parse_byte_slice(index_array_as_bytes).expect("infallible");
let index_zv_aligned: ZeroVec<u16> = ZeroVec::from_slice(INDEX_ARRAY);
assert_eq!(index_zv_bytes, index_zv_aligned);
}
Expand Down
1 change: 0 additions & 1 deletion utils/zerovec/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ include = [
all-features = true

[dependencies]
either = "1.6.1"
serde = { version = "1.0", optional = true , default-features = false, features = ["alloc"] }
yoke = { path = "../yoke", version = "0.2.0", optional = true }

Expand Down
2 changes: 1 addition & 1 deletion utils/zerovec/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub struct DataStruct<'data> {

let data = DataStruct {
nums: ZeroVec::from_slice(&[211, 281, 421, 461]),
strs: VarZeroVec::from(vec!["hello".to_string(), "world".to_string()]),
strs: VarZeroVec::from(&["hello".to_string(), "world".to_string()] as &[_]),
};
let bincode_bytes = bincode::serialize(&data)
.expect("Serialization should be successful");
Expand Down
8 changes: 4 additions & 4 deletions utils/zerovec/benches/vzv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ fn overview_bench(c: &mut Criterion) {
let seed = 42;
let (string_vec, _) = random_alphanums(2..=10, 100, seed);
let bytes: Vec<u8> = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap();
let vzv = VarZeroVec::<String>::try_from_bytes(black_box(bytes.as_slice())).unwrap();
let vzv = VarZeroVec::<String>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();

c.bench_function("vzv/overview", |b| {
b.iter(|| {
Expand All @@ -73,7 +73,7 @@ fn char_count_benches(c: &mut Criterion) {
let seed = 2021;
let (string_vec, _) = random_alphanums(2..=20, 100, seed);
let bytes: Vec<u8> = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap();
let vzv = VarZeroVec::<String>::try_from_bytes(black_box(bytes.as_slice())).unwrap();
let vzv = VarZeroVec::<String>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();

// *** Count chars in vec of 100 strings ***
c.bench_function("vzv/char_count/slice", |b| {
Expand All @@ -100,7 +100,7 @@ fn binary_search_benches(c: &mut Criterion) {
let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
let (needles, _) = random_alphanums(2..=20, 10, seed);
let bytes: Vec<u8> = VarZeroVec::get_serializable_bytes(string_vec.as_slice()).unwrap();
let vzv = VarZeroVec::<String>::try_from_bytes(black_box(bytes.as_slice())).unwrap();
let vzv = VarZeroVec::<String>::parse_byte_slice(black_box(bytes.as_slice())).unwrap();
let single_needle = "lmnop".to_string();

// *** Binary search vec of 500 strings 10 times ***
Expand Down Expand Up @@ -139,7 +139,7 @@ fn serde_benches(c: &mut Criterion) {
let seed = 2021;
let (string_vec, _) = random_alphanums(2..=20, 100, seed);
let bincode_vec = bincode::serialize(&string_vec).unwrap();
let vzv = VarZeroVec::from(string_vec);
let vzv = VarZeroVec::from(&*string_vec);
let bincode_vzv = bincode::serialize(&vzv).unwrap();

// *** Deserialize vec of 100 strings ***
Expand Down
8 changes: 4 additions & 4 deletions utils/zerovec/benches/zerovec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@ where
buffer
.0
.extend(ZeroVec::from_slice(vec.as_slice()).as_bytes());
ZeroVec::<T>::try_from_bytes(&buffer.0[1..]).unwrap()
ZeroVec::<T>::parse_byte_slice(&buffer.0[1..]).unwrap()
}

fn overview_bench(c: &mut Criterion) {
c.bench_function("zerovec/overview", |b| {
b.iter(|| {
ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE))
ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
.unwrap()
.iter()
.sum::<u32>()
Expand All @@ -83,7 +83,7 @@ fn sum_benches(c: &mut Criterion) {

c.bench_function("zerovec/sum/sample/zerovec", |b| {
b.iter(|| {
ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE))
ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
.unwrap()
.iter()
.sum::<u32>()
Expand All @@ -98,7 +98,7 @@ fn binary_search_benches(c: &mut Criterion) {
});

c.bench_function("zerovec/binary_search/sample/zerovec", |b| {
let zerovec = ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap();
let zerovec = ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap();
b.iter(|| zerovec.binary_search(&0x0c0d0c));
});

Expand Down
4 changes: 2 additions & 2 deletions utils/zerovec/benches/zerovec_iai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn sum_slice() -> u32 {
}

fn sum_zerovec() -> u32 {
ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE))
ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
.unwrap()
.iter()
.sum::<u32>()
Expand All @@ -26,7 +26,7 @@ fn binarysearch_slice() -> Result<usize, usize> {
}

fn binarysearch_zerovec() -> Result<usize, usize> {
ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE))
ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE))
.unwrap()
.binary_search(&0x0c0d0c)
}
Expand Down
16 changes: 9 additions & 7 deletions utils/zerovec/benches/zerovec_serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ fn random_numbers(count: usize) -> Vec<u32> {
fn overview_bench(c: &mut Criterion) {
c.bench_function("zerovec_serde/overview", |b| {
// Same as "zerovec_serde/deserialize_sum/u32/zerovec"
let buffer =
bincode::serialize(&ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap())
.unwrap();
let buffer = bincode::serialize(
&ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(),
)
.unwrap();
b.iter(|| {
bincode::deserialize::<ZeroVec<u32>>(&buffer)
.unwrap()
Expand Down Expand Up @@ -71,9 +72,10 @@ fn u32_benches(c: &mut Criterion) {
});

c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| {
let buffer =
bincode::serialize(&ZeroVec::<u32>::try_from_bytes(black_box(TEST_BUFFER_LE)).unwrap())
.unwrap();
let buffer = bincode::serialize(
&ZeroVec::<u32>::parse_byte_slice(black_box(TEST_BUFFER_LE)).unwrap(),
)
.unwrap();
b.iter(|| {
bincode::deserialize::<ZeroVec<u32>>(&buffer)
.unwrap()
Expand Down Expand Up @@ -131,7 +133,7 @@ fn stress_benches(c: &mut Criterion) {
});

// *** Compute sum of vec of 100 `u32` ***
let zerovec = ZeroVec::<u32>::try_from_bytes(zerovec_aligned.as_bytes()).unwrap();
let zerovec = ZeroVec::<u32>::parse_byte_slice(zerovec_aligned.as_bytes()).unwrap();
c.bench_function("zerovec_serde/sum/stress/zerovec", |b| {
b.iter(|| black_box(&zerovec).iter().sum::<u32>());
});
Expand Down
8 changes: 6 additions & 2 deletions utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
//!
//! let data = DataStruct {
//! nums: ZeroVec::from_slice(&[211, 281, 421, 461]),
//! strs: VarZeroVec::from(vec!["hello".to_string(), "world".to_string()]),
//! strs: VarZeroVec::from(&["hello".to_string(), "world".to_string()] as &[_]),
//! };
//! let bincode_bytes = bincode::serialize(&data)
//! .expect("Serialization should be successful");
Expand All @@ -77,11 +77,15 @@
//! # } // feature = "serde"
//! ```

// this crate does a lot of nuanced lifetime manipulation, being explicit
// is better here.
#![allow(clippy::needless_lifetimes)]

pub mod map;
#[cfg(test)]
pub mod samples;
pub mod ule;
mod varzerovec;
pub mod varzerovec;
mod zerovec;

#[cfg(feature = "yoke")]
Expand Down
12 changes: 7 additions & 5 deletions utils/zerovec/src/map/vecs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::ule::*;
use crate::varzerovec::owned::VarZeroVecOwned;
use crate::VarZeroVec;
use crate::ZeroVec;
use std::cmp::Ordering;
Expand Down Expand Up @@ -108,26 +109,27 @@ where
self.get(index)
}
fn insert(&mut self, index: usize, value: T) {
self.make_mut().insert(index, value)
self.make_mut().insert(index, &value)
}
fn remove(&mut self, index: usize) -> T {
self.make_mut().remove(index)
}
fn replace(&mut self, index: usize, value: T) -> T {
let vec = self.make_mut();
mem::replace(&mut vec[index], value)
vec.replace(index, value)
}
fn push(&mut self, value: T) {
self.make_mut().push(value)
let len = self.len();
self.make_mut().insert(len, &value)
}
fn len(&self) -> usize {
self.len()
}
fn new() -> Self {
Vec::new().into()
VarZeroVecOwned::new().into()
}
fn with_capacity(cap: usize) -> Self {
Vec::with_capacity(cap).into()
VarZeroVecOwned::with_capacity(cap).into()
}
fn clear(&mut self) {
self.make_mut().clear()
Expand Down
6 changes: 5 additions & 1 deletion utils/zerovec/src/ule/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ pub trait AsVarULE {
///
/// # Safety
///
/// See the safety invariant documented on [`Self::from_byte_slice_unchecked()`] to implement this trait.
/// There must be no padding bytes involved in this type: [`Self::as_byte_slice()`] MUST return
/// a slice of initialized bytes provided that `Self` is initialized.
Comment on lines +192 to +193
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. The safety docs are partly here and partly on the from_byte_slice_unchecked function
  2. Can we / should we say something like, mem::size_of_val(bytes) must equal mem::size_of_val(self) ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Done. I wrote new docs instead of moving them, from_byte_slice_unchecked still has duplicated safety docs, but I think it's good to be redundant for unsafe stuff.
  2. I don't think we need to, everything is totally safe even if the bytes are a subset, it just becomes nearly impossible to implement parse_byte_slice correctly.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about equality when I said that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

///
/// [`VarULE::from_byte_slice_unchecked()`] _must_ be implemented to return the same result
/// as [`VarULE::parse_byte_slice()`] provided both are passed the same validly parsing byte slices.
pub unsafe trait VarULE: 'static {
/// The error type to used by [`VarULE::parse_byte_slice()`]
type Error;
Expand Down
10 changes: 10 additions & 0 deletions utils/zerovec/src/ule/plain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@ macro_rules! impl_byte_slice_size {
unsafe { std::slice::from_raw_parts(data as *const u8, len) }
}
}

impl PlainOldULE<$size> {
#[inline]
pub fn from_byte_slice_unchecked_mut(bytes: &mut [u8]) -> &mut [Self] {
let data = bytes.as_mut_ptr();
let len = bytes.len() / $size;
// Safe because Self is transparent over [u8; $size]
unsafe { std::slice::from_raw_parts_mut(data as *mut Self, len) }
}
}
};
}

Expand Down
56 changes: 53 additions & 3 deletions utils/zerovec/src/varzerovec/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ fn usizeify(x: PlainOldULE<4>) -> usize {
///
/// This is where the actual work involved in VarZeroVec happens
///
/// See [`SliceComponents::try_from_bytes()`] for information on the internal invariants involved
/// See [`SliceComponents::parse_byte_slice()`] for information on the internal invariants involved
pub struct SliceComponents<'a, T> {
/// The list of indices into the `things` slice
indices: &'a [PlainOldULE<4>],
Expand Down Expand Up @@ -50,7 +50,7 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> {
/// - `indices[len - 1]..things.len()` must index into a valid section of
/// `things`, such that it parses to a `T::VarULE`
#[inline]
pub fn try_from_bytes(slice: &'a [u8]) -> Result<Self, ParseErrorFor<T>> {
pub fn parse_byte_slice(slice: &'a [u8]) -> Result<Self, ParseErrorFor<T>> {
if slice.is_empty() {
return Ok(SliceComponents {
indices: &[],
Expand Down Expand Up @@ -87,6 +87,39 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> {
Ok(components)
}

/// Construct a [`SliceComponents`] from a byte slice that has previously
/// successfully returned a [`SliceComponents`] when passed to
/// [`SliceComponents::parse_byte_slice()`]. Will return the same
/// object as one would get from calling [`SliceComponents::parse_byte_slice()`].
Manishearth marked this conversation as resolved.
Show resolved Hide resolved
///
/// # Safety
/// The bytes must have previously successfully run through
/// [`SliceComponents::parse_byte_slice()`]
pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self {
if slice.is_empty() {
return SliceComponents {
indices: &[],
things: &[],
entire_slice: slice,
marker: PhantomData,
};
}
let len_bytes = slice.get_unchecked(0..4);
let len_ule = PlainOldULE::<4>::from_byte_slice_unchecked(len_bytes);

let len = u32::from_unaligned(len_ule.get_unchecked(0)) as usize;
let indices_bytes = slice.get_unchecked(4..4 * len + 4);
let indices = PlainOldULE::<4>::from_byte_slice_unchecked(indices_bytes);
let things = slice.get_unchecked(4 * len + 4..);

SliceComponents {
indices,
things,
entire_slice: slice,
marker: PhantomData,
}
}

#[inline]
pub fn len(self) -> usize {
self.indices.len()
Expand All @@ -98,7 +131,6 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> {
}

#[inline]
#[cfg(feature = "serde")]
pub fn entire_slice(self) -> &'a [u8] {
self.entire_slice
}
Expand Down Expand Up @@ -192,6 +224,24 @@ impl<'a, T: AsVarULE> SliceComponents<'a, T> {
.chain(last)
.map(|s| unsafe { T::VarULE::from_byte_slice_unchecked(s) })
}

pub fn to_vec(self) -> Vec<T>
where
T: Clone,
{
self.iter().map(T::from_unaligned).collect()
}

// Dump a debuggable representation of this type
#[allow(unused)] // useful for debugging
pub(crate) fn dump(&self) -> String {
let indices = self
.indices
.iter()
.map(u32::from_unaligned)
.collect::<Vec<_>>();
format!("SliceComponents {{ indices: {:?} }}", indices)
}
}

impl<'a, T> SliceComponents<'a, T>
Expand Down
Loading