From 622afe6d1033fe6abd175415ce1357a302754df5 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Thu, 17 Aug 2023 16:56:28 +0200 Subject: [PATCH 1/3] fix --- utils/zerovec/src/varzerovec/slice.rs | 2 +- utils/zerovec/src/varzerovec/vec.rs | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/utils/zerovec/src/varzerovec/slice.rs b/utils/zerovec/src/varzerovec/slice.rs index afdbe80d920..65858ed6c8c 100644 --- a/utils/zerovec/src/varzerovec/slice.rs +++ b/utils/zerovec/src/varzerovec/slice.rs @@ -108,7 +108,7 @@ pub struct VarZeroSlice { impl VarZeroSlice { /// Construct a new empty VarZeroSlice pub const fn new_empty() -> &'static Self { - let arr: &[u8] = &[]; + let arr: &[u8] = &[0; components::LENGTH_WIDTH]; unsafe { mem::transmute(arr) } } diff --git a/utils/zerovec/src/varzerovec/vec.rs b/utils/zerovec/src/varzerovec/vec.rs index 1401a180a29..4617f86e962 100644 --- a/utils/zerovec/src/varzerovec/vec.rs +++ b/utils/zerovec/src/varzerovec/vec.rs @@ -503,3 +503,8 @@ impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T self.iter().cmp(other.iter()) } } + +#[test] +fn single_empty_representation() { + assert_eq!(VarZeroVec::::new().as_bytes(), VarZeroVec::::from(&[] as &[&str]).as_bytes()); +} From dcd27cd62f5c33949a63495b4a44f10e162fff25 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Thu, 17 Aug 2023 17:21:02 +0200 Subject: [PATCH 2/3] better --- utils/zerovec/src/varzerovec/components.rs | 5 +++- utils/zerovec/src/varzerovec/owned.rs | 17 +++++++---- utils/zerovec/src/varzerovec/slice.rs | 4 +-- utils/zerovec/src/varzerovec/vec.rs | 33 ++++++++++++++++++---- 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 5168fb7143b..795b67d5467 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -168,6 +168,7 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> /// `things`, such that it parses to a `T::VarULE` #[inline] pub fn parse_byte_slice(slice: &'a [u8]) -> Result { + // The empty VZV is special-cased to the empty slice if slice.is_empty() { return Ok(VarZeroVecComponents { len: 0, @@ -219,6 +220,7 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> /// The bytes must have previously successfully run through /// [`VarZeroVecComponents::parse_byte_slice()`] pub unsafe fn from_bytes_unchecked(slice: &'a [u8]) -> Self { + // The empty VZV is special-cased to the empty slice if slice.is_empty() { return VarZeroVecComponents { len: 0, @@ -485,12 +487,13 @@ where } /// Collects the bytes for a VarZeroSlice into a Vec. -pub fn get_serializable_bytes(elements: &[A]) -> Option> +pub fn get_serializable_bytes_non_empty(elements: &[A]) -> Option> where T: VarULE + ?Sized, A: EncodeAsVarULE, F: VarZeroVecFormat, { + debug_assert!(!elements.is_empty()); let len = compute_serializable_len::(elements)?; debug_assert!(len >= LENGTH_WIDTH as u32); let mut output: Vec = alloc::vec![0; len as usize]; diff --git a/utils/zerovec/src/varzerovec/owned.rs b/utils/zerovec/src/varzerovec/owned.rs index 0990a5d91d5..c5556315fbd 100644 --- a/utils/zerovec/src/varzerovec/owned.rs +++ b/utils/zerovec/src/varzerovec/owned.rs @@ -84,13 +84,18 @@ impl VarZeroVecOwned { where A: EncodeAsVarULE, { - Ok(Self { - marker: PhantomData, - // TODO(#1410): Rethink length errors in VZV. - entire_slice: components::get_serializable_bytes::(elements).ok_or( - "Attempted to build VarZeroVec out of elements that \ + Ok(if elements.is_empty() { + Self::from_slice(VarZeroSlice::new_empty()) + } else { + Self { + marker: PhantomData, + // TODO(#1410): Rethink length errors in VZV. + entire_slice: components::get_serializable_bytes_non_empty::(elements) + .ok_or( + "Attempted to build VarZeroVec out of elements that \ cumulatively are larger than a u32 in size", - )?, + )?, + } }) } diff --git a/utils/zerovec/src/varzerovec/slice.rs b/utils/zerovec/src/varzerovec/slice.rs index 65858ed6c8c..190153fe017 100644 --- a/utils/zerovec/src/varzerovec/slice.rs +++ b/utils/zerovec/src/varzerovec/slice.rs @@ -108,8 +108,8 @@ pub struct VarZeroSlice { impl VarZeroSlice { /// Construct a new empty VarZeroSlice pub const fn new_empty() -> &'static Self { - let arr: &[u8] = &[0; components::LENGTH_WIDTH]; - unsafe { mem::transmute(arr) } + // The empty VZV is special-cased to the empty slice + unsafe { mem::transmute(&[] as &[u8]) } } /// Obtain a [`VarZeroVecComponents`] borrowing from the internal buffer diff --git a/utils/zerovec/src/varzerovec/vec.rs b/utils/zerovec/src/varzerovec/vec.rs index 4617f86e962..64928509f8e 100644 --- a/utils/zerovec/src/varzerovec/vec.rs +++ b/utils/zerovec/src/varzerovec/vec.rs @@ -425,8 +425,12 @@ where { #[inline] fn from(elements: &[A]) -> Self { - #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility - VarZeroVecOwned::try_from_elements(elements).unwrap().into() + if elements.is_empty() { + VarZeroSlice::new_empty().into() + } else { + #[allow(clippy::unwrap_used)] // TODO(#1410) Better story for fallibility + VarZeroVecOwned::try_from_elements(elements).unwrap().into() + } } } @@ -451,8 +455,14 @@ where { #[inline] fn eq(&self, other: &VarZeroVec<'b, T, F>) -> bool { - // VarULE has an API guarantee that this is equivalent - // to `T::VarULE::eq()` + // VZV::from_elements used to produce a non-canonical representation of the + // empty VZV, so we cannot use byte equality for empty vecs. + if self.is_empty() || other.is_empty() { + return self.is_empty() && other.is_empty(); + } + // VarULE has an API guarantee that byte equality is semantic equality. + // For non-empty VZVs, there's only a single metadata representation, + // so this guarantee extends to the whole VZV representation. self.as_bytes().eq(other.as_bytes()) } } @@ -505,6 +515,17 @@ impl<'a, T: VarULE + ?Sized + Ord, F: VarZeroVecFormat> Ord for VarZeroVec<'a, T } #[test] -fn single_empty_representation() { - assert_eq!(VarZeroVec::::new().as_bytes(), VarZeroVec::::from(&[] as &[&str]).as_bytes()); +fn assert_single_empty_representation() { + assert_eq!( + VarZeroVec::::new().as_bytes(), + VarZeroVec::::from(&[] as &[&str]).as_bytes() + ); +} + +#[test] +fn weird_empty_representation_equality() { + assert_eq!( + VarZeroVec::::parse_byte_slice(&[0, 0, 0, 0]).unwrap(), + VarZeroVec::::parse_byte_slice(&[]).unwrap() + ); } From 24c2dac4094066bd8b02c962a8fd0ed46c0dbf2f Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Thu, 17 Aug 2023 18:24:09 +0200 Subject: [PATCH 3/3] datagen --- .../tests/data/postcard/fingerprints.csv | 166 +++++++++--------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index f3acc057cb0..47bbb10dda9 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -1253,7 +1253,7 @@ list/unit@1, sr-Latn, 23B, 58b0dcc30cdabe1d list/unit@1, th, 30B, 3634af6f27539959 list/unit@1, tr, 16B, 375096409d6f0946 list/unit@1, und, 17B, 924e2d2be2336604 -locid_transform/aliases@1, und, 8108B, 910e28d57c7c9911 +locid_transform/aliases@1, und, 8104B, 3ad5e40d558cf536 locid_transform/likelysubtags@1, und, 4691B, d7ae441bbc9e1b75 locid_transform/likelysubtags_ext@1, und, 75692B, d502bcb0acd0e4e4 locid_transform/likelysubtags_l@1, und, 1810B, e172457d83d68447 @@ -1397,96 +1397,96 @@ props/casemap@1, und, 22431B, e5f6c1c194a5002f props/casemap_unfold@1, und, 932B, 85d63de2fdea5a3d props/ccc@1, und, 5270B, 85887761e8c0b554 props/ea@1, und, 4939B, de9bdf3107a1a963 -props/exemplarchars/auxiliary@1, ar, 103B, 81ba9ba73c59c4c4 -props/exemplarchars/auxiliary@1, ar-EG, 103B, 81ba9ba73c59c4c4 -props/exemplarchars/auxiliary@1, bn, 23B, 5030e834721a31d -props/exemplarchars/auxiliary@1, ccp, 7B, a7ee17bdde05404c -props/exemplarchars/auxiliary@1, en, 136B, ca9fa2472e43d8db -props/exemplarchars/auxiliary@1, en-001, 136B, ca9fa2472e43d8db -props/exemplarchars/auxiliary@1, en-ZA, 192B, ab071c63635d6ef6 -props/exemplarchars/auxiliary@1, es, 200B, 3e5ad25e4b57ebd1 -props/exemplarchars/auxiliary@1, es-AR, 200B, 3e5ad25e4b57ebd1 -props/exemplarchars/auxiliary@1, fil, 47B, 1e59c3e095a11ff4 -props/exemplarchars/auxiliary@1, fr, 144B, 8f01707e31ae9b77 -props/exemplarchars/auxiliary@1, ja, 608B, 5cda0ba025ae5157 +props/exemplarchars/auxiliary@1, ar, 99B, 6558940e4709505f +props/exemplarchars/auxiliary@1, ar-EG, 99B, 6558940e4709505f +props/exemplarchars/auxiliary@1, bn, 19B, c240e2a137c88e +props/exemplarchars/auxiliary@1, ccp, 3B, c76b19d07c9fe625 +props/exemplarchars/auxiliary@1, en, 132B, ca4c480e9d2cd1ec +props/exemplarchars/auxiliary@1, en-001, 132B, ca4c480e9d2cd1ec +props/exemplarchars/auxiliary@1, en-ZA, 188B, 60246eda9d6070c9 +props/exemplarchars/auxiliary@1, es, 196B, efe819d83875f9d6 +props/exemplarchars/auxiliary@1, es-AR, 196B, efe819d83875f9d6 +props/exemplarchars/auxiliary@1, fil, 43B, 683b36475b8b233c +props/exemplarchars/auxiliary@1, fr, 140B, 80654f8db9f31544 +props/exemplarchars/auxiliary@1, ja, 604B, de28efe64dd267ca props/exemplarchars/auxiliary@1, ru, 61B, 6a20fa4f2a8172f3 props/exemplarchars/auxiliary@1, sr, 37B, e291a541ad2ca8fd -props/exemplarchars/auxiliary@1, sr-Latn, 31B, 60f5752fc7094742 -props/exemplarchars/auxiliary@1, th, 15B, 5d3232640b4d876a -props/exemplarchars/auxiliary@1, tr, 152B, 54b1b627d8fe7736 -props/exemplarchars/auxiliary@1, und, 7B, a7ee17bdde05404c -props/exemplarchars/index@1, ar, 39B, dd0c9bdcb63dc97b -props/exemplarchars/index@1, ar-EG, 39B, dd0c9bdcb63dc97b +props/exemplarchars/auxiliary@1, sr-Latn, 27B, b96890025602e911 +props/exemplarchars/auxiliary@1, th, 11B, deddb822a77299fc +props/exemplarchars/auxiliary@1, tr, 148B, 51731dc6dd6ada64 +props/exemplarchars/auxiliary@1, und, 3B, c76b19d07c9fe625 +props/exemplarchars/index@1, ar, 35B, 1e36db71576dd57a +props/exemplarchars/index@1, ar-EG, 35B, 1e36db71576dd57a props/exemplarchars/index@1, bn, 66B, d11f0e0b22169f5b -props/exemplarchars/index@1, ccp, 15B, d350751af6bbc3d3 -props/exemplarchars/index@1, en, 15B, 757e29ef1029b9f2 -props/exemplarchars/index@1, en-001, 15B, 757e29ef1029b9f2 -props/exemplarchars/index@1, en-ZA, 15B, 757e29ef1029b9f2 -props/exemplarchars/index@1, es, 23B, d2c341d54a78da7 -props/exemplarchars/index@1, es-AR, 23B, d2c341d54a78da7 +props/exemplarchars/index@1, ccp, 11B, 3479553a04586974 +props/exemplarchars/index@1, en, 11B, c5ed740dfb27db94 +props/exemplarchars/index@1, en-001, 11B, c5ed740dfb27db94 +props/exemplarchars/index@1, en-ZA, 11B, c5ed740dfb27db94 +props/exemplarchars/index@1, es, 19B, d390b154318cf4c5 +props/exemplarchars/index@1, es-AR, 19B, d390b154318cf4c5 props/exemplarchars/index@1, fil, 27B, c6994dd550d5b4e2 -props/exemplarchars/index@1, fr, 15B, 757e29ef1029b9f2 -props/exemplarchars/index@1, ja, 87B, 1fe8d5166096a8f -props/exemplarchars/index@1, ru, 39B, b216a14c26e08b41 -props/exemplarchars/index@1, sr, 39B, 1f34a0b2c4e8c9cf +props/exemplarchars/index@1, fr, 11B, c5ed740dfb27db94 +props/exemplarchars/index@1, ja, 83B, 6c0d3e7fc7b1cdcc +props/exemplarchars/index@1, ru, 35B, 638c1c9980e0bd90 +props/exemplarchars/index@1, sr, 35B, 178072467638bd1b props/exemplarchars/index@1, sr-Latn, 60B, 6e724686cc05703e -props/exemplarchars/index@1, th, 15B, 9a73c145265a2ae5 -props/exemplarchars/index@1, tr, 55B, becd57ac0c1a82ed -props/exemplarchars/index@1, und, 7B, a7ee17bdde05404c -props/exemplarchars/main@1, ar, 31B, 4fa3539c523a0dc5 -props/exemplarchars/main@1, ar-EG, 31B, 4fa3539c523a0dc5 +props/exemplarchars/index@1, th, 11B, 146fc32e7f67f2c8 +props/exemplarchars/index@1, tr, 51B, 121515d5ff38f62b +props/exemplarchars/index@1, und, 3B, c76b19d07c9fe625 +props/exemplarchars/main@1, ar, 27B, c08afa9f1c17fc4c +props/exemplarchars/main@1, ar-EG, 27B, c08afa9f1c17fc4c props/exemplarchars/main@1, bn, 146B, 31d8ce207d73155 -props/exemplarchars/main@1, ccp, 15B, e191e70902e7f035 -props/exemplarchars/main@1, en, 15B, a7d71792c12bfffa -props/exemplarchars/main@1, en-001, 15B, a7d71792c12bfffa -props/exemplarchars/main@1, en-ZA, 15B, a7d71792c12bfffa -props/exemplarchars/main@1, es, 71B, fa77ca1cfdf9230c -props/exemplarchars/main@1, es-AR, 71B, fa77ca1cfdf9230c +props/exemplarchars/main@1, ccp, 11B, 4ca535c7c5505492 +props/exemplarchars/main@1, en, 11B, fdebd22f9ff8a46b +props/exemplarchars/main@1, en-001, 11B, fdebd22f9ff8a46b +props/exemplarchars/main@1, en-ZA, 11B, fdebd22f9ff8a46b +props/exemplarchars/main@1, es, 67B, 36546e1a555281da +props/exemplarchars/main@1, es-AR, 67B, 36546e1a555281da props/exemplarchars/main@1, fil, 27B, 5aa0ee9120937a2e -props/exemplarchars/main@1, fr, 87B, 978fe355ffd56d69 -props/exemplarchars/main@1, ja, 13960B, ecdb0541bf50467a -props/exemplarchars/main@1, ru, 23B, 776f46a709a45c84 -props/exemplarchars/main@1, sr, 47B, 6dd4749272778c73 +props/exemplarchars/main@1, fr, 83B, e8c49e64b7de061f +props/exemplarchars/main@1, ja, 13956B, 71baba690436f06b +props/exemplarchars/main@1, ru, 19B, 826b7a0baf4968c7 +props/exemplarchars/main@1, sr, 43B, 543c615d18604069 props/exemplarchars/main@1, sr-Latn, 84B, b3210a1fdbb25658 -props/exemplarchars/main@1, th, 23B, dc264c756b6c70c5 -props/exemplarchars/main@1, tr, 79B, 2a46aadb009b9040 -props/exemplarchars/main@1, und, 7B, a7ee17bdde05404c -props/exemplarchars/numbers@1, ar, 79B, 45e0d61dd613fbb9 -props/exemplarchars/numbers@1, ar-EG, 79B, 45e0d61dd613fbb9 -props/exemplarchars/numbers@1, bn, 55B, 5dbd24383df14b12 -props/exemplarchars/numbers@1, ccp, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, en, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, en-001, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, en-ZA, 47B, e8216b1455c6fe34 -props/exemplarchars/numbers@1, es, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, es-AR, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, fil, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, fr, 87B, 4665b771b63788a9 -props/exemplarchars/numbers@1, ja, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, ru, 47B, e8216b1455c6fe34 -props/exemplarchars/numbers@1, sr, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, sr-Latn, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, th, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, tr, 47B, e1b5384c071ba661 -props/exemplarchars/numbers@1, und, 47B, e1b5384c071ba661 -props/exemplarchars/punctuation@1, ar, 119B, ed76591cd7577ded -props/exemplarchars/punctuation@1, ar-EG, 119B, ed76591cd7577ded -props/exemplarchars/punctuation@1, bn, 127B, fcc476bc9071298f -props/exemplarchars/punctuation@1, ccp, 136B, c611097cf97ab4b0 -props/exemplarchars/punctuation@1, en, 127B, fcc476bc9071298f -props/exemplarchars/punctuation@1, en-001, 127B, fcc476bc9071298f -props/exemplarchars/punctuation@1, en-ZA, 127B, fcc476bc9071298f -props/exemplarchars/punctuation@1, es, 152B, 75f0f7cbd4257ed2 -props/exemplarchars/punctuation@1, es-AR, 152B, 75f0f7cbd4257ed2 -props/exemplarchars/punctuation@1, fil, 119B, cc9ce6072abce056 -props/exemplarchars/punctuation@1, fr, 144B, ec3639703b696076 -props/exemplarchars/punctuation@1, ja, 312B, c7c2f5c2e2a238e5 -props/exemplarchars/punctuation@1, ru, 160B, f08f7f99e3254699 -props/exemplarchars/punctuation@1, sr, 144B, 82109d7f0d039c80 -props/exemplarchars/punctuation@1, sr-Latn, 144B, 82109d7f0d039c80 -props/exemplarchars/punctuation@1, th, 111B, 3054746777f45c15 -props/exemplarchars/punctuation@1, tr, 127B, fcc476bc9071298f -props/exemplarchars/punctuation@1, und, 87B, 1eb15399ab2a5d3b +props/exemplarchars/main@1, th, 19B, fd3463d155825715 +props/exemplarchars/main@1, tr, 75B, f83818046ea9136c +props/exemplarchars/main@1, und, 3B, c76b19d07c9fe625 +props/exemplarchars/numbers@1, ar, 75B, 6f6ed88ed26911fe +props/exemplarchars/numbers@1, ar-EG, 75B, 6f6ed88ed26911fe +props/exemplarchars/numbers@1, bn, 51B, b1a24d63a91986e5 +props/exemplarchars/numbers@1, ccp, 43B, 6150146944dccad +props/exemplarchars/numbers@1, en, 43B, 6150146944dccad +props/exemplarchars/numbers@1, en-001, 43B, 6150146944dccad +props/exemplarchars/numbers@1, en-ZA, 43B, bdb5a5fd3e342c04 +props/exemplarchars/numbers@1, es, 43B, 6150146944dccad +props/exemplarchars/numbers@1, es-AR, 43B, 6150146944dccad +props/exemplarchars/numbers@1, fil, 43B, 6150146944dccad +props/exemplarchars/numbers@1, fr, 83B, 46c1863087d7f583 +props/exemplarchars/numbers@1, ja, 43B, 6150146944dccad +props/exemplarchars/numbers@1, ru, 43B, bdb5a5fd3e342c04 +props/exemplarchars/numbers@1, sr, 43B, 6150146944dccad +props/exemplarchars/numbers@1, sr-Latn, 43B, 6150146944dccad +props/exemplarchars/numbers@1, th, 43B, 6150146944dccad +props/exemplarchars/numbers@1, tr, 43B, 6150146944dccad +props/exemplarchars/numbers@1, und, 43B, 6150146944dccad +props/exemplarchars/punctuation@1, ar, 115B, 849a586a4478edb6 +props/exemplarchars/punctuation@1, ar-EG, 115B, 849a586a4478edb6 +props/exemplarchars/punctuation@1, bn, 123B, 2e7906785ab47589 +props/exemplarchars/punctuation@1, ccp, 132B, c3c0f02949da230d +props/exemplarchars/punctuation@1, en, 123B, 2e7906785ab47589 +props/exemplarchars/punctuation@1, en-001, 123B, 2e7906785ab47589 +props/exemplarchars/punctuation@1, en-ZA, 123B, 2e7906785ab47589 +props/exemplarchars/punctuation@1, es, 148B, 6fc36862866bc1 +props/exemplarchars/punctuation@1, es-AR, 148B, 6fc36862866bc1 +props/exemplarchars/punctuation@1, fil, 115B, bcd01ba9a6f9afd2 +props/exemplarchars/punctuation@1, fr, 140B, c93fbe47124d89f7 +props/exemplarchars/punctuation@1, ja, 308B, 26f8e2db987a5e5c +props/exemplarchars/punctuation@1, ru, 156B, 4053f13c5d2e0ce1 +props/exemplarchars/punctuation@1, sr, 140B, face2a479b6ba77f +props/exemplarchars/punctuation@1, sr-Latn, 140B, face2a479b6ba77f +props/exemplarchars/punctuation@1, th, 107B, 365776b3531212fd +props/exemplarchars/punctuation@1, tr, 123B, 2e7906785ab47589 +props/exemplarchars/punctuation@1, und, 83B, 65bb6a9f6c28188f props/gc@1, und, 16984B, 23f5131c2f0afb5d props/graph@1, und, 5699B, 42fbc9da34d13b06 props/lb@1, und, 14640B, a43615cc519e775