From 26de9b9cc07751adf3d5a792201c89c601839c25 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Wed, 13 Oct 2021 03:41:21 +0200 Subject: [PATCH 01/30] Separate runtime and reference rules in PluralRules. --- Cargo.lock | 1 + components/plurals/Cargo.toml | 3 +- components/plurals/src/data.rs | 8 +- components/plurals/src/error.rs | 2 +- components/plurals/src/rules/mod.rs | 19 +- .../plurals/src/rules/{ => reference}/ast.rs | 34 +-- .../src/rules/{ => reference}/lexer.rs | 4 +- components/plurals/src/rules/reference/mod.rs | 14 ++ .../src/rules/{ => reference}/parser.rs | 4 +- .../src/rules/{ => reference}/resolver.rs | 5 +- .../src/rules/{ => reference}/serializer.rs | 8 +- components/plurals/src/rules/runtime/ast.rs | 227 ++++++++++++++++++ components/plurals/src/rules/runtime/mod.rs | 9 + .../plurals/src/rules/runtime/resolver.rs | 16 ++ components/plurals/tests/rules.rs | 5 +- 15 files changed, 313 insertions(+), 46 deletions(-) rename components/plurals/src/rules/{ => reference}/ast.rs (93%) rename components/plurals/src/rules/{ => reference}/lexer.rs (98%) create mode 100644 components/plurals/src/rules/reference/mod.rs rename components/plurals/src/rules/{ => reference}/parser.rs (98%) rename components/plurals/src/rules/{ => reference}/resolver.rs (95%) rename components/plurals/src/rules/{ => reference}/serializer.rs (96%) create mode 100644 components/plurals/src/rules/runtime/ast.rs create mode 100644 components/plurals/src/rules/runtime/mod.rs create mode 100644 components/plurals/src/rules/runtime/resolver.rs diff --git a/Cargo.lock b/Cargo.lock index f3772c11875..87b230ea0b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1234,6 +1234,7 @@ dependencies = [ "icu_testdata", "serde", "serde_json", + "zerovec", ] [[package]] diff --git a/components/plurals/Cargo.toml b/components/plurals/Cargo.toml index 2bb033f2e7e..8491e1c17fa 100644 --- a/components/plurals/Cargo.toml +++ b/components/plurals/Cargo.toml @@ -37,6 +37,7 @@ icu_provider = { version = "0.4", path = "../../provider/core", features = ["mac icu_locid = { version = "0.4", path = "../locid" } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } displaydoc = { version = "0.2.3", default-features = false } +zerovec = { version = "0.3", path = "../../utils/zerovec" } [dev-dependencies] criterion = "0.3" @@ -57,7 +58,7 @@ bench = false # This option is required for Benchmark CI std = ["icu_locid/std", "icu_provider/std"] default = ["provider_serde"] bench = [] -provider_serde = ["serde"] +provider_serde = ["serde", "zerovec/serde"] [[bench]] name = "operands" diff --git a/components/plurals/src/data.rs b/components/plurals/src/data.rs index bd631b75b40..ca4f56f8447 100644 --- a/components/plurals/src/data.rs +++ b/components/plurals/src/data.rs @@ -5,7 +5,7 @@ use crate::operands::PluralOperands; use crate::provider::PluralRuleStringsV1; use crate::rules; -use crate::rules::ast; +use crate::rules::reference::ast; use crate::{PluralCategory, PluralRulesError}; use alloc::borrow::Cow; use core::convert::TryInto; @@ -53,7 +53,9 @@ impl PluralRuleList { fn parse_rule(input: &Option>) -> Result, PluralRulesError> { Ok(if let Some(input) = input { - Some(rules::parse_condition((input).as_bytes())?) + Some(rules::reference::parser::parse_condition( + (input).as_bytes(), + )?) } else { None }) @@ -97,7 +99,7 @@ impl RulesSelector { .find_map(|category| { conditions .get(*category) - .filter(|cond| rules::test_condition(cond, operands)) + .filter(|cond| rules::reference::resolver::test_condition(cond, operands)) .map(|_| *category) }) .unwrap_or(PluralCategory::Other), diff --git a/components/plurals/src/error.rs b/components/plurals/src/error.rs index cf0df419a0b..c9f9196bb78 100644 --- a/components/plurals/src/error.rs +++ b/components/plurals/src/error.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::rules::parser::ParserError; +use crate::rules::reference::parser::ParserError; use displaydoc::Display; use icu_provider::prelude::DataError; diff --git a/components/plurals/src/rules/mod.rs b/components/plurals/src/rules/mod.rs index 5c65f9e0e80..4a9049f66cd 100644 --- a/components/plurals/src/rules/mod.rs +++ b/components/plurals/src/rules/mod.rs @@ -55,8 +55,8 @@ //! When parsed, the resulting [`AST`] will look like this: //! //! ``` -//! use icu::plurals::rules::parse_condition; -//! use icu::plurals::rules::ast::*; +//! use icu::plurals::rules::reference::parse_condition; +//! use icu::plurals::rules::reference::ast::*; //! //! let input = "i = 1 and v = 0 @integer 1"; //! @@ -97,7 +97,8 @@ //! matches: //! //! ``` -//! use icu::plurals::rules::{test_condition, parse_condition}; +//! use icu::plurals::rules::reference::test_condition; +//! use icu::plurals::rules::reference::parse_condition; //! use icu::plurals::PluralOperands; //! //! let input = "i = 1 and v = 0 @integer 1"; @@ -143,13 +144,5 @@ //! [`Condition`]: super::rules::ast::Condition //! [`Sample`]: super::rules::ast::Samples //! [`AST`]: super::rules::ast -pub mod ast; -pub(crate) mod lexer; -pub(crate) mod parser; -pub(crate) mod resolver; -pub(crate) mod serializer; - -pub use lexer::Lexer; -pub use parser::{parse, parse_condition}; -pub use resolver::test_condition; -pub use serializer::serialize; +pub mod reference; +pub mod runtime; diff --git a/components/plurals/src/rules/ast.rs b/components/plurals/src/rules/reference/ast.rs similarity index 93% rename from components/plurals/src/rules/ast.rs rename to components/plurals/src/rules/reference/ast.rs index 468dbc196bd..f74d7daf069 100644 --- a/components/plurals/src/rules/ast.rs +++ b/components/plurals/src/rules/reference/ast.rs @@ -9,8 +9,8 @@ //! # Examples //! //! ``` -//! use icu::plurals::rules::parse_condition; -//! use icu::plurals::rules::ast::*; +//! use icu::plurals::rules::reference::parse_condition; +//! use icu::plurals::rules::reference::ast::*; //! //! let input = "i = 1"; //! @@ -48,8 +48,8 @@ use core::ops::RangeInclusive; /// # Examples /// /// ``` -/// use icu::plurals::rules::ast::*; -/// use icu::plurals::rules::{parse, parse_condition}; +/// use icu::plurals::rules::reference::ast::*; +/// use icu::plurals::rules::reference::{parse, parse_condition}; /// /// let condition = parse_condition(b"i = 5 or v = 2") /// .expect("Parsing failed."); @@ -96,8 +96,8 @@ pub struct Rule { /// # Examples /// /// ``` -/// use icu::plurals::rules::ast::*; -/// use icu::plurals::rules::parse_condition; +/// use icu::plurals::rules::reference::ast::*; +/// use icu::plurals::rules::reference::parse_condition; /// /// let condition = Condition(Box::new([ /// AndCondition(Box::new([Relation { @@ -143,7 +143,7 @@ pub struct Condition(pub Box<[AndCondition]>); /// Can be represented by the AST: /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// /// AndCondition(Box::new([ /// Relation { @@ -184,7 +184,7 @@ pub struct AndCondition(pub Box<[Relation]>); /// Can be represented by the AST: /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// /// Relation { /// expression: Expression { @@ -234,7 +234,7 @@ pub enum Operator { /// Can be represented by the AST: /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// /// Expression { /// operand: Operand::I, @@ -263,7 +263,7 @@ pub struct Expression { /// Can be represented by the AST: /// /// ``` -/// use icu::plurals::rules::ast::Operand; +/// use icu::plurals::rules::reference::ast::Operand; /// /// Operand::I; /// ``` @@ -303,7 +303,7 @@ pub enum Operand { /// Can be represented by the AST: /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// /// RangeList(Box::new([ /// RangeListItem::Value(Value(5)), @@ -331,7 +331,7 @@ pub struct RangeList(pub Box<[RangeListItem]>); /// Can be represented by the AST: /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// /// let _ = RangeListItem::Value(Value(5)); /// let _ = RangeListItem::Range(Value(11)..=Value(15)); @@ -357,7 +357,7 @@ pub enum RangeListItem { /// Can be represented by the AST: /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// /// RangeListItem::Value(Value(99)); /// ``` @@ -373,7 +373,7 @@ pub struct Value(pub u64); /// ``` /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// Samples { /// integer: Some(SampleList { /// sample_ranges: Box::new([SampleRange { @@ -407,7 +407,7 @@ pub struct Samples { /// ``` /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// SampleList { /// sample_ranges: Box::new([ /// SampleRange { @@ -434,7 +434,7 @@ pub struct SampleList { /// ``` /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// SampleRange { /// lower_val: DecimalValue("0.0".to_string()), /// upper_val: Some(DecimalValue("1.5".to_string())), @@ -456,7 +456,7 @@ pub struct SampleRange { /// ``` /// /// ``` -/// use icu::plurals::rules::ast::*; +/// use icu::plurals::rules::reference::ast::*; /// DecimalValue("1.00".to_string()); /// ``` #[derive(Debug, Clone, PartialEq)] diff --git a/components/plurals/src/rules/lexer.rs b/components/plurals/src/rules/reference/lexer.rs similarity index 98% rename from components/plurals/src/rules/lexer.rs rename to components/plurals/src/rules/reference/lexer.rs index 4ab4f668bb7..159d60474fa 100644 --- a/components/plurals/src/rules/lexer.rs +++ b/components/plurals/src/rules/reference/lexer.rs @@ -43,7 +43,7 @@ impl std::error::Error for LexerError {} /// # Examples /// /// ``` -/// use icu::plurals::rules::Lexer; +/// use icu::plurals::rules::reference::Lexer; /// /// let input = b"i = 5"; /// let lexer = Lexer::new(input); @@ -60,7 +60,7 @@ impl<'l> Lexer<'l> { /// # Examples /// /// ``` - /// use icu::plurals::rules::Lexer; + /// use icu::plurals::rules::reference::Lexer; /// /// Lexer::new(b"n = 1"); /// ``` diff --git a/components/plurals/src/rules/reference/mod.rs b/components/plurals/src/rules/reference/mod.rs new file mode 100644 index 00000000000..3c56cba53a7 --- /dev/null +++ b/components/plurals/src/rules/reference/mod.rs @@ -0,0 +1,14 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +pub mod ast; +pub(crate) mod lexer; +pub(crate) mod parser; +pub(crate) mod resolver; +pub(crate) mod serializer; + +pub use lexer::Lexer; +pub use parser::{parse, parse_condition}; +pub use resolver::test_condition; +pub use serializer::serialize; diff --git a/components/plurals/src/rules/parser.rs b/components/plurals/src/rules/reference/parser.rs similarity index 98% rename from components/plurals/src/rules/parser.rs rename to components/plurals/src/rules/reference/parser.rs index a2129f0b6bd..9e8ae3ed473 100644 --- a/components/plurals/src/rules/parser.rs +++ b/components/plurals/src/rules/reference/parser.rs @@ -48,7 +48,7 @@ impl std::error::Error for ParserError {} /// # Examples /// /// ``` -/// use icu::plurals::rules::parse; +/// use icu::plurals::rules::reference::parse; /// /// let input = b"i = 0 or n = 1 @integer 0, 1 @decimal 0.0~1.0, 0.00~0.04"; /// assert_eq!(parse(input).is_ok(), true); @@ -76,7 +76,7 @@ pub fn parse(input: &[u8]) -> Result { /// # Examples /// /// ``` -/// use icu::plurals::rules::parse_condition; +/// use icu::plurals::rules::reference::parse_condition; /// /// let input = b"i = 0 or n = 1"; /// assert_eq!(parse_condition(input).is_ok(), true); diff --git a/components/plurals/src/rules/resolver.rs b/components/plurals/src/rules/reference/resolver.rs similarity index 95% rename from components/plurals/src/rules/resolver.rs rename to components/plurals/src/rules/reference/resolver.rs index b732d18de88..4c21daee787 100644 --- a/components/plurals/src/rules/resolver.rs +++ b/components/plurals/src/rules/reference/resolver.rs @@ -2,8 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::ast; use crate::operands::PluralOperands; +use crate::rules::reference::ast; /// Function used to test [`Condition`] against [`PluralOperands`] to identify /// the appropriate [`PluralCategory`]. @@ -12,7 +12,8 @@ use crate::operands::PluralOperands; /// /// ``` /// use icu::plurals::PluralOperands; -/// use icu::plurals::rules::{parse_condition, test_condition}; +/// use icu::plurals::rules::reference::parse_condition; +/// use icu::plurals::rules::reference::test_condition; /// /// let operands = PluralOperands::from(5_usize); /// let condition = parse_condition(b"i = 4..6") diff --git a/components/plurals/src/rules/serializer.rs b/components/plurals/src/rules/reference/serializer.rs similarity index 96% rename from components/plurals/src/rules/serializer.rs rename to components/plurals/src/rules/reference/serializer.rs index 9d6334f7cf4..86eb7592b1a 100644 --- a/components/plurals/src/rules/serializer.rs +++ b/components/plurals/src/rules/reference/serializer.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use crate::rules::ast; +use crate::rules::reference::ast; use core::fmt; use core::ops::RangeInclusive; @@ -11,9 +11,9 @@ use core::ops::RangeInclusive; /// # Examples /// /// ``` -/// use icu::plurals::rules::parse; -/// use icu::plurals::rules::ast; -/// use icu::plurals::rules::serialize; +/// use icu::plurals::rules::reference::parse; +/// use icu::plurals::rules::reference::ast; +/// use icu::plurals::rules::reference::serialize; /// /// let input = "i = 0 or n = 1 @integer 0, 1 @decimal 0.0~1.0, 0.00~0.04"; /// diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs new file mode 100644 index 00000000000..8e9e77f1b9d --- /dev/null +++ b/components/plurals/src/rules/runtime/ast.rs @@ -0,0 +1,227 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(missing_docs)] + +use zerovec::ule::{AsULE, PlainOldULE, VarULE, ULE}; +use zerovec::varzerovec::encode::EncodeAsVarULE; +use zerovec::VarZeroVec; +use zerovec::ZeroVec; + +pub struct Rule<'data>(pub VarZeroVec<'data, RelationULE>); + +#[derive(Clone, Debug, PartialEq)] +pub enum Operand { + N, + I, + V, + W, + F, + T, + C, + E, +} + +#[repr(packed)] +pub struct RelationULE { + conjunction: bool, + modulo: ::ULE, + range_list: [RangeOrValueULE], +} + +unsafe impl VarULE for RelationULE { + type Error = &'static str; + + #[inline] + unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { + let ptr = bytes as *const [u8] as *const u8; + let len = bytes.len(); + let len_new = len / 8; + let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const RangeOrValueULE, len_new); + &*(fake_slice as *const Self) + } + + #[inline] + fn as_byte_slice(&self) -> &[u8] { + unsafe { + core::slice::from_raw_parts( + self as *const Self as *const u8, + core::mem::size_of_val(self), + ) + } + } + +// what it should do is attempt to parse the first 4 bytes as a u32::ULE (POU<4>), and the remaining bytes as a ZV + fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { + as ULE>::validate_byte_slice(&bytes[..4]) + .map_err(|_| "foo")?; + let remaining = &bytes[4..]; + RangeOrValueULE::validate_byte_slice(remaining) + .map_err(|_| "foo")?; + Ok(()) + } +} + +pub struct Relation<'data> { + conjunction: bool, + // operand: Operand, + modulo: u32, + // equal: bool, + range_list: ZeroVec<'data, RangeOrValue>, +} + +unsafe impl EncodeAsVarULE for Relation<'_> { + type VarULE = RelationULE; + fn encode_var_ule(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + cb(&[ + PlainOldULE::<4>::as_byte_slice(&[self.modulo.as_unaligned()]), + self.range_list.as_bytes(), + ]) + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum RangeOrValue { + Range(u32, u32), // XXX: Can we get away from smaller? + Value(u32), +} + +#[repr(transparent)] +pub struct RangeOrValueULE(PlainOldULE<8>); + +unsafe impl ULE for RangeOrValueULE { + type Error = zerovec::ule::ULEError; + + fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { + PlainOldULE::<8>::validate_byte_slice(bytes) + } +} + +impl AsULE for RangeOrValue { + type ULE = RangeOrValueULE; + + #[inline] + fn as_unaligned(&self) -> Self::ULE { + let mut result = [0; 8]; + match self { + Self::Range(start, end) => { + let start_bytes = start.to_be_bytes(); + let end_bytes = end.to_be_bytes(); + result[..4].copy_from_slice(&start_bytes); + result[4..].copy_from_slice(&end_bytes); + RangeOrValueULE(result.into()) + } + Self::Value(idx) => { + let bytes = idx.to_be_bytes(); + result[..4].copy_from_slice(&bytes); + result[4..].copy_from_slice(&bytes); + RangeOrValueULE(result.into()) + } + } + } + + #[inline] + fn from_unaligned(unaligned: &Self::ULE) -> Self { + let b = unaligned.0 .0; + let start = u32::from_be_bytes([b[0], b[1], b[2], b[3]]); + let end = u32::from_be_bytes([b[4], b[5], b[6], b[7]]); + if start == end { + Self::Value(start) + } else { + Self::Range(start, end) + } + } +} + +impl From<&crate::rules::reference::ast::Rule> for Rule<'_> { + fn from(input: &crate::rules::reference::ast::Rule) -> Self { + let mut relations: alloc::vec::Vec = alloc::vec![]; + + for and_condition in input.condition.0.iter() { + for relation in and_condition.0.iter() { + let range_list = alloc::vec![RangeOrValue::Value(1)]; + + relations.push(Relation { + conjunction: true, + // operand: Operand::N, + modulo: 1, + // equal: true, + range_list: ZeroVec::clone_from_slice(&range_list), + }) + } + } + + Self(VarZeroVec::from(relations.as_slice())) + } +} + +mod test { + use super::*; + use crate::rules::reference::ast; + use crate::rules::reference::parse; + use crate::rules::runtime::test_rule; + use crate::PluralOperands; + + #[test] + fn sanity_test() { + // let input = "n % 10 = 3..4,9 and n % 100 != 10..19,70..79,90..99 or n = 0"; + let input = "n = 1"; + let full_ast = parse(input.as_bytes()).unwrap(); + assert_eq!( + full_ast, + ast::Rule { + condition: ast::Condition(Box::new([ast::AndCondition(Box::new([ + ast::Relation { + expression: ast::Expression { + operand: ast::Operand::N, + modulus: None, + }, + operator: ast::Operator::Eq, + range_list: ast::RangeList(Box::new([ast::RangeListItem::Value( + ast::Value(1) + )])) + } + ]))])), + samples: None, + } + ); + + let rule = Rule::from(&full_ast); + + let fd = fixed_decimal::decimal::FixedDecimal::from(5); + let operands = PluralOperands::from(&fd); + assert!( + test_rule(&rule, &operands), + ); + } + + #[test] + fn range_or_value_ule_test() { + let rov = RangeOrValue::Value(1); + let ule = rov.as_unaligned().0; + let ref_bytes = &[0, 0, 0, 1, 0, 0, 0, 1]; + assert_eq!(ULE::as_byte_slice(&[ule]), *ref_bytes); + + let rov = RangeOrValue::Range(2, 4); + let ule = rov.as_unaligned().0; + let ref_bytes = &[0, 0, 0, 2, 0, 0, 0, 4]; + assert_eq!(ULE::as_byte_slice(&[ule]), *ref_bytes); + } + + #[test] + fn relation_ule_test() { + let rov = RangeOrValue::Value(1); + let relation = Relation { + conjunction: true, + modulo: 0, + range_list: ZeroVec::clone_from_slice(&[rov]), + }; + let mut relations = alloc::vec![relation]; + let vzv = VarZeroVec::from(relations.as_slice()); + assert_eq!( + vzv.get_encoded_slice(), + &[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1] + ); + } +} diff --git a/components/plurals/src/rules/runtime/mod.rs b/components/plurals/src/rules/runtime/mod.rs new file mode 100644 index 00000000000..b1b6c7669b0 --- /dev/null +++ b/components/plurals/src/rules/runtime/mod.rs @@ -0,0 +1,9 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![allow(missing_docs)] +pub mod ast; +pub(crate) mod resolver; + +pub use resolver::test_rule; diff --git a/components/plurals/src/rules/runtime/resolver.rs b/components/plurals/src/rules/runtime/resolver.rs new file mode 100644 index 00000000000..0fa34bac01b --- /dev/null +++ b/components/plurals/src/rules/runtime/resolver.rs @@ -0,0 +1,16 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::operands::PluralOperands; +use crate::rules::runtime::ast; + +pub fn test_rule(rule: &ast::Rule, operands: &PluralOperands) -> bool { + let mut left = true; + + for condition in rule.0.iter() { + if condition.conjunction { + } + } + true +} diff --git a/components/plurals/tests/rules.rs b/components/plurals/tests/rules.rs index e6fe81a8542..4a787ebc400 100644 --- a/components/plurals/tests/rules.rs +++ b/components/plurals/tests/rules.rs @@ -5,7 +5,10 @@ mod fixtures; mod helpers; -use icu_plurals::rules::{parse, parse_condition, serialize, test_condition, Lexer}; +use icu_plurals::rules::{ + reference::test_condition, + reference::{parse, parse_condition, serialize, Lexer}, +}; use icu_plurals::PluralOperands; #[test] From fb3034fafcf5452e1ae77c481f5e74cda2d5b8f1 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Mon, 18 Oct 2021 10:58:46 -0700 Subject: [PATCH 02/30] Add ZeroVec::borrowed_from_slice() --- utils/zerovec/src/zerovec/mod.rs | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/utils/zerovec/src/zerovec/mod.rs b/utils/zerovec/src/zerovec/mod.rs index b80c733cd34..bd7e9c14afe 100644 --- a/utils/zerovec/src/zerovec/mod.rs +++ b/utils/zerovec/src/zerovec/mod.rs @@ -418,7 +418,7 @@ impl<'a> ZeroVec<'a, u8> { } } -impl ZeroVec<'_, T> +impl<'a, T> ZeroVec<'a, T> where T: AsULE, { @@ -445,6 +445,31 @@ where Self::Owned(other.iter().copied().map(T::as_unaligned).collect()) } + /// Creates a `ZeroVec` from a `&[T::ULE]` by borrowing from it. + /// + /// This function results in a `Borrowed` instance of `ZeroVec`. + /// + /// # Example + /// + /// ``` + /// use zerovec::ZeroVec; + /// use zerovec::ule::*; + /// + /// // The little-endian bytes correspond to the numbers on the following line. + /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; + /// let nums: &[PlainOldULE<2>] = &[211_u16.as_unaligned(), 281_u16.as_unaligned(), + /// 421_u16.as_unaligned(), 461_u16.as_unaligned()]; + /// + /// let zerovec = ZeroVec::::borrowed_from_slice(nums); + /// + /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); + /// assert_eq!(bytes, zerovec.as_bytes()); + /// ``` + #[inline] + pub fn borrowed_from_slice(other: &'a [T::ULE]) -> Self { + Self::Borrowed(other) + } + /// Creates a `Vec` from a `ZeroVec`. /// /// # Example From 830549a2f64100b49c141574d61911cdb5d55db2 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Mon, 18 Oct 2021 11:03:47 -0700 Subject: [PATCH 03/30] Add RelationULE::as_relation() --- components/plurals/src/rules/runtime/ast.rs | 10 ++++++++++ components/plurals/src/rules/runtime/resolver.rs | 1 + 2 files changed, 11 insertions(+) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 8e9e77f1b9d..0f4ccbd3e9b 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -30,6 +30,16 @@ pub struct RelationULE { range_list: [RangeOrValueULE], } +impl RelationULE { + pub fn as_relation<'a>(&'a self) -> Relation<'a> { + Relation { + conjunction: self.conjunction, + modulo: u32::from_unaligned(&self.modulo), + range_list: ZeroVec::borrowed_from_slice(&self.range_list), + } + } +} + unsafe impl VarULE for RelationULE { type Error = &'static str; diff --git a/components/plurals/src/rules/runtime/resolver.rs b/components/plurals/src/rules/runtime/resolver.rs index 0fa34bac01b..174f17f05d8 100644 --- a/components/plurals/src/rules/runtime/resolver.rs +++ b/components/plurals/src/rules/runtime/resolver.rs @@ -9,6 +9,7 @@ pub fn test_rule(rule: &ast::Rule, operands: &PluralOperands) -> bool { let mut left = true; for condition in rule.0.iter() { + let condition = condition.as_relation(); if condition.conjunction { } } From a1ed16e0a3bfba9ba5f8d84b10d3e68c6be20633 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Mon, 18 Oct 2021 12:32:51 -0700 Subject: [PATCH 04/30] Add encoding/decoding for andor/plurals/operand --- components/plurals/src/rules/runtime/ast.rs | 116 ++++++++++++++---- .../plurals/src/rules/runtime/resolver.rs | 3 +- 2 files changed, 90 insertions(+), 29 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 0f4ccbd3e9b..fcad7c21a8b 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -9,35 +9,85 @@ use zerovec::varzerovec::encode::EncodeAsVarULE; use zerovec::VarZeroVec; use zerovec::ZeroVec; +use core::mem; + pub struct Rule<'data>(pub VarZeroVec<'data, RelationULE>); -#[derive(Clone, Debug, PartialEq)] +#[derive(Copy, Clone, Debug, PartialEq)] +#[repr(u8)] pub enum Operand { - N, - I, - V, - W, - F, - T, - C, - E, + N = 1, + I = 2, + V = 3, + W = 4, + F = 5, + T = 6, + C = 7, + E = 8, +} + +impl Operand { + fn encode(self) -> u8 { + self as u8 + } + + /// Safety invariant: must be a valid encoded operand + unsafe fn decode(encoded: u8) -> Self { + // safe due to the repr(u8) + unsafe { mem::transmute(encoded) } + } } #[repr(packed)] pub struct RelationULE { - conjunction: bool, + /// This maps to (AndOr, Polarity, Operand), + /// with the first bit mapping to AndOr (1 == And), the second bit + /// to Polarity (1 == Positive), and the remaining bits to Operand + /// encoded via Operand::encode. It is unsound for the Operand bits to + /// not be a valid encoded Operand. + andor_polarity_operand: u8, modulo: ::ULE, range_list: [RangeOrValueULE], } impl RelationULE { pub fn as_relation<'a>(&'a self) -> Relation<'a> { + // safe to call because we are operating on a valid RelationULE + let (and_or, polarity, operand) = + unsafe { Self::decode_andor_polarity_operand(self.andor_polarity_operand) }; Relation { - conjunction: self.conjunction, + and_or, + polarity, + operand, modulo: u32::from_unaligned(&self.modulo), range_list: ZeroVec::borrowed_from_slice(&self.range_list), } } + + fn encode_andor_polarity_operand(and_or: AndOr, polarity: Polarity, operand: Operand) -> u8 { + ((and_or == AndOr::And) as u8) + << 7 + ((polarity == Polarity::Positive) as u8) + << 6 + operand.encode() + } + + // Must be called on an `encoded` that is obtained from `encode_andor_polarity_operand`, i.e. + // the field on a valid RelationULE + unsafe fn decode_andor_polarity_operand(encoded: u8) -> (AndOr, Polarity, Operand) { + let and_or = if encoded & 0b1000_0000 != 0 { + AndOr::And + } else { + AndOr::Or + }; + + let polarity = if encoded & 0b0100_0000 != 0 { + Polarity::Positive + } else { + Polarity::Negative + }; + + let operand = Operand::decode(encoded & 0b0010_000); + (and_or, polarity, operand) + } } unsafe impl VarULE for RelationULE { @@ -62,29 +112,43 @@ unsafe impl VarULE for RelationULE { } } -// what it should do is attempt to parse the first 4 bytes as a u32::ULE (POU<4>), and the remaining bytes as a ZV + // what it should do is attempt to parse the first 4 bytes as a u32::ULE (POU<4>), and the remaining bytes as a ZV fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { - as ULE>::validate_byte_slice(&bytes[..4]) - .map_err(|_| "foo")?; + as ULE>::validate_byte_slice(&bytes[..4]).map_err(|_| "foo")?; let remaining = &bytes[4..]; - RangeOrValueULE::validate_byte_slice(remaining) - .map_err(|_| "foo")?; + RangeOrValueULE::validate_byte_slice(remaining).map_err(|_| "foo")?; Ok(()) } } +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum AndOr { + Or, + And, +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +pub enum Polarity { + Negative, + Positive, +} + pub struct Relation<'data> { - conjunction: bool, - // operand: Operand, - modulo: u32, - // equal: bool, - range_list: ZeroVec<'data, RangeOrValue>, + pub(crate) and_or: AndOr, + pub(crate) polarity: Polarity, + pub(crate) operand: Operand, + pub(crate) modulo: u32, + pub(crate) range_list: ZeroVec<'data, RangeOrValue>, } unsafe impl EncodeAsVarULE for Relation<'_> { type VarULE = RelationULE; fn encode_var_ule(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + let encoded = + RelationULE::encode_andor_polarity_operand(self.and_or, self.polarity, self.operand); + cb(&[ + &[encoded], PlainOldULE::<4>::as_byte_slice(&[self.modulo.as_unaligned()]), self.range_list.as_bytes(), ]) @@ -153,10 +217,10 @@ impl From<&crate::rules::reference::ast::Rule> for Rule<'_> { let range_list = alloc::vec![RangeOrValue::Value(1)]; relations.push(Relation { - conjunction: true, - // operand: Operand::N, + and_or: AndOr::And, + polarity: Polarity::Positive, + operand: Operand::N, modulo: 1, - // equal: true, range_list: ZeroVec::clone_from_slice(&range_list), }) } @@ -201,9 +265,7 @@ mod test { let fd = fixed_decimal::decimal::FixedDecimal::from(5); let operands = PluralOperands::from(&fd); - assert!( - test_rule(&rule, &operands), - ); + assert!(test_rule(&rule, &operands),); } #[test] diff --git a/components/plurals/src/rules/runtime/resolver.rs b/components/plurals/src/rules/runtime/resolver.rs index 174f17f05d8..7939316a084 100644 --- a/components/plurals/src/rules/runtime/resolver.rs +++ b/components/plurals/src/rules/runtime/resolver.rs @@ -10,8 +10,7 @@ pub fn test_rule(rule: &ast::Rule, operands: &PluralOperands) -> bool { for condition in rule.0.iter() { let condition = condition.as_relation(); - if condition.conjunction { - } + if condition.and_or == ast::AndOr::And {} } true } From 92d4889946b58fc12a2f19f496a6c6d61cd25ab8 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 20 Oct 2021 07:19:41 -0700 Subject: [PATCH 05/30] fix encode --- components/plurals/src/rules/runtime/ast.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index fcad7c21a8b..7e140a48c99 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -5,7 +5,7 @@ #![allow(missing_docs)] use zerovec::ule::{AsULE, PlainOldULE, VarULE, ULE}; -use zerovec::varzerovec::encode::EncodeAsVarULE; +use zerovec::ule::custom::EncodeAsVarULE; use zerovec::VarZeroVec; use zerovec::ZeroVec; @@ -141,8 +141,7 @@ pub struct Relation<'data> { pub(crate) range_list: ZeroVec<'data, RangeOrValue>, } -unsafe impl EncodeAsVarULE for Relation<'_> { - type VarULE = RelationULE; +unsafe impl EncodeAsVarULE for Relation<'_> { fn encode_var_ule(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { let encoded = RelationULE::encode_andor_polarity_operand(self.and_or, self.polarity, self.operand); From ce1e773d0c74f25b7f344a503275dbf3e2230efb Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 20 Oct 2021 07:22:02 -0700 Subject: [PATCH 06/30] fix from_byte_slice_unchecked --- components/plurals/src/rules/runtime/ast.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 7e140a48c99..57934a3c79f 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -97,9 +97,11 @@ unsafe impl VarULE for RelationULE { unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { let ptr = bytes as *const [u8] as *const u8; let len = bytes.len(); - let len_new = len / 8; + let len_new = (len - 5) / 8; let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const RangeOrValueULE, len_new); - &*(fake_slice as *const Self) + let ret = &*(fake_slice as *const Self); + debug_assert_eq!(core::mem::size_of_val(ret), core::mem::size_of_val(bytes)); + ret } #[inline] From 66bf8cd88577a73c2fe5c9935021bd2c199515e2 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 20 Oct 2021 09:37:37 -0700 Subject: [PATCH 07/30] slightly better impl --- components/plurals/Cargo.toml | 2 +- components/plurals/src/data.rs | 15 +- components/plurals/src/provider.rs | 15 +- components/plurals/src/rules/reference/ast.rs | 10 +- .../plurals/src/rules/reference/parser.rs | 10 +- components/plurals/src/rules/runtime/ast.rs | 417 ++++++++++++++---- .../plurals/src/rules/runtime/resolver.rs | 57 ++- 7 files changed, 420 insertions(+), 106 deletions(-) diff --git a/components/plurals/Cargo.toml b/components/plurals/Cargo.toml index 8491e1c17fa..b4057e5b97f 100644 --- a/components/plurals/Cargo.toml +++ b/components/plurals/Cargo.toml @@ -37,7 +37,7 @@ icu_provider = { version = "0.4", path = "../../provider/core", features = ["mac icu_locid = { version = "0.4", path = "../locid" } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } displaydoc = { version = "0.2.3", default-features = false } -zerovec = { version = "0.3", path = "../../utils/zerovec" } +zerovec = { version = "0.4", path = "../../utils/zerovec" } [dev-dependencies] criterion = "0.3" diff --git a/components/plurals/src/data.rs b/components/plurals/src/data.rs index ca4f56f8447..81f1e02b670 100644 --- a/components/plurals/src/data.rs +++ b/components/plurals/src/data.rs @@ -64,13 +64,14 @@ fn parse_rule(input: &Option>) -> Result, Plural impl<'data> TryInto for &PluralRuleStringsV1<'data> { type Error = PluralRulesError; fn try_into(self) -> Result { - Ok(PluralRuleList { - zero: parse_rule(&self.zero)?, - one: parse_rule(&self.one)?, - two: parse_rule(&self.two)?, - few: parse_rule(&self.few)?, - many: parse_rule(&self.many)?, - }) + todo!() + // Ok(PluralRuleList { + // zero: parse_rule(&self.zero)?, + // one: parse_rule(&self.one)?, + // two: parse_rule(&self.two)?, + // few: parse_rule(&self.few)?, + // many: parse_rule(&self.many)?, + // }) } } diff --git a/components/plurals/src/provider.rs b/components/plurals/src/provider.rs index 64dd7f51798..63304171a06 100644 --- a/components/plurals/src/provider.rs +++ b/components/plurals/src/provider.rs @@ -6,6 +6,7 @@ //! //! Read more about data providers: [`icu_provider`] +use crate::rules::runtime::ast::Rule; use alloc::borrow::Cow; use icu_provider::yoke::{self, *}; @@ -23,16 +24,18 @@ pub mod resolver; /// /// More information: #[icu_provider::data_struct] -#[derive(Debug, PartialEq, Clone, Default)] +#[derive(Default)] #[cfg_attr( feature = "provider_serde", derive(serde::Serialize, serde::Deserialize) )] #[allow(missing_docs)] // TODO(#1029) - Add missing docs. pub struct PluralRuleStringsV1<'data> { - pub zero: Option>, - pub one: Option>, - pub two: Option>, - pub few: Option>, - pub many: Option>, + #[cfg_attr(feature = "provider_serde", serde(borrow))] + pub zero: Option>, + // pub zero: Option>, + // pub one: Option>, + // pub two: Option>, + // pub few: Option>, + // pub many: Option>, } diff --git a/components/plurals/src/rules/reference/ast.rs b/components/plurals/src/rules/reference/ast.rs index f74d7daf069..e67a3954743 100644 --- a/components/plurals/src/rules/reference/ast.rs +++ b/components/plurals/src/rules/reference/ast.rs @@ -38,8 +38,8 @@ //! [`PluralCategory`]: crate::PluralCategory //! [`parse`]: super::parse() //! [`test_condition`]: super::test_condition() -use alloc::boxed::Box; use alloc::string::String; +use alloc::vec::Vec; use core::ops::RangeInclusive; /// A complete AST representation of a plural rule. @@ -127,7 +127,7 @@ pub struct Rule { /// /// [`AndConditions`]: AndCondition #[derive(Debug, Clone, PartialEq)] -pub struct Condition(pub Box<[AndCondition]>); +pub struct Condition(pub Vec); /// An incomplete AST representation of a plural rule. Comprises a vector of [`Relations`]. /// @@ -168,7 +168,7 @@ pub struct Condition(pub Box<[AndCondition]>); /// /// [`Relations`]: Relation #[derive(Debug, Clone, PartialEq)] -pub struct AndCondition(pub Box<[Relation]>); +pub struct AndCondition(pub Vec); /// An incomplete AST representation of a plural rule. Comprises an [`Expression`], an [`Operator`], and a [`RangeList`]. /// @@ -314,7 +314,7 @@ pub enum Operand { /// /// [`RangeListItems`]: RangeListItem #[derive(Debug, Clone, PartialEq)] -pub struct RangeList(pub Box<[RangeListItem]>); +pub struct RangeList(pub Vec); /// An enum of items that appear in a [`RangeList`]: `Range` or a `Value`. /// @@ -421,7 +421,7 @@ pub struct Samples { #[derive(Debug, Clone, PartialEq)] #[allow(missing_docs)] // TODO(#1029) - Add missing docs. pub struct SampleList { - pub sample_ranges: Box<[SampleRange]>, + pub sample_ranges: Vec, pub ellipsis: bool, } diff --git a/components/plurals/src/rules/reference/parser.rs b/components/plurals/src/rules/reference/parser.rs index 9e8ae3ed473..2a112992d08 100644 --- a/components/plurals/src/rules/reference/parser.rs +++ b/components/plurals/src/rules/reference/parser.rs @@ -123,7 +123,7 @@ impl<'p> Parser<'p> { if let Some(cond) = self.get_and_condition()? { result.push(cond); } else { - return Ok(ast::Condition(result.into_boxed_slice())); + return Ok(ast::Condition(result)); } while self.take_if(Token::Or) { @@ -134,7 +134,7 @@ impl<'p> Parser<'p> { } } // If lexer is not done, error? - Ok(ast::Condition(result.into_boxed_slice())) + Ok(ast::Condition(result)) } fn get_and_condition(&mut self) -> Result, ParserError> { @@ -148,7 +148,7 @@ impl<'p> Parser<'p> { return Err(ParserError::ExpectedRelation); } } - Ok(Some(ast::AndCondition(rel.into_boxed_slice()))) + Ok(Some(ast::AndCondition(rel))) } else { Ok(None) } @@ -195,7 +195,7 @@ impl<'p> Parser<'p> { break; } } - Ok(ast::RangeList(range_list.into_boxed_slice())) + Ok(ast::RangeList(range_list)) } fn take_if(&mut self, token: Token) -> bool { @@ -255,7 +255,7 @@ impl<'p> Parser<'p> { ranges.push(self.get_sample_range()?); } Ok(ast::SampleList { - sample_ranges: ranges.into_boxed_slice(), + sample_ranges: ranges, ellipsis, }) } diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 57934a3c79f..1b57f5602c2 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -4,40 +4,261 @@ #![allow(missing_docs)] -use zerovec::ule::{AsULE, PlainOldULE, VarULE, ULE}; -use zerovec::ule::custom::EncodeAsVarULE; -use zerovec::VarZeroVec; -use zerovec::ZeroVec; +use crate::rules::reference; +use core::{convert::TryInto, fmt, mem, str::FromStr}; +use icu_provider::yoke::{self, *}; +use zerovec::{ + ule::{custom::EncodeAsVarULE, AsULE, PlainOldULE, VarULE, ULE}, + {VarZeroVec, ZeroVec}, +}; + +#[derive(Yokeable, ZeroCopyFrom)] +pub struct Rule<'data>(pub VarZeroVec<'data, RelationULE>); -use core::mem; +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[repr(u8)] +pub enum AndOr { + Or, + And, +} -pub struct Rule<'data>(pub VarZeroVec<'data, RelationULE>); +#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[repr(u8)] +pub enum Polarity { + Negative, + Positive, +} #[derive(Copy, Clone, Debug, PartialEq)] #[repr(u8)] pub enum Operand { - N = 1, - I = 2, - V = 3, - W = 4, - F = 5, - T = 6, - C = 7, - E = 8, + N, + I, + V, + W, + F, + T, + C, + E, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum RangeOrValue { + Range(u32, u32), // XXX: Can we get away from smaller? + Value(u32), } +#[derive(Clone, Debug, PartialEq)] +pub struct Relation<'data> { + pub(crate) and_or: AndOr, + pub(crate) polarity: Polarity, + pub(crate) operand: Operand, + pub(crate) modulo: u32, + pub(crate) range_list: ZeroVec<'data, RangeOrValue>, +} + +///// + impl Operand { - fn encode(self) -> u8 { + fn idx(self) -> u8 { self as u8 } - /// Safety invariant: must be a valid encoded operand - unsafe fn decode(encoded: u8) -> Self { + fn from_idx(idx: u8) -> Self { + debug_assert!(idx < 8); // safe due to the repr(u8) - unsafe { mem::transmute(encoded) } + unsafe { mem::transmute(idx) } + } +} + +impl From<&reference::ast::Rule> for Rule<'_> { + fn from(input: &reference::ast::Rule) -> Self { + let mut relations: alloc::vec::Vec = alloc::vec![]; + + for (i_or, and_condition) in input.condition.0.iter().enumerate() { + for (i_and, relation) in and_condition.0.iter().enumerate() { + let range_list = relation + .range_list + .0 + .iter() + .map(Into::into) + .collect::>(); + + let and_or = if i_or > 0 && i_and == 0 { + AndOr::Or + } else { + AndOr::And + }; + + relations.push(Relation { + and_or, + polarity: relation.operator.into(), + operand: relation.expression.operand.into(), + modulo: get_modulo(&relation.expression.modulus), + range_list: ZeroVec::clone_from_slice(&range_list), + }) + } + } + + Self(VarZeroVec::from(relations.as_slice())) + } +} + +impl From<&Rule<'_>> for reference::ast::Rule { + fn from(input: &Rule<'_>) -> Self { + let mut or_conditions: alloc::vec::Vec = alloc::vec![]; + let mut and_conditions: alloc::vec::Vec = alloc::vec![]; + for rel in input.0.iter() { + let rel = rel.as_relation(); + let list = rel.range_list.iter().map(Into::into).collect(); + let relation = reference::ast::Relation { + expression: (rel.operand, rel.modulo).into(), + operator: rel.polarity.into(), + range_list: reference::ast::RangeList(list), + }; + + if rel.and_or == AndOr::And { + and_conditions.push(relation); + } else { + or_conditions.push(reference::ast::AndCondition(and_conditions)); + and_conditions = alloc::vec![relation]; + } + } + + if !and_conditions.is_empty() { + or_conditions.push(reference::ast::AndCondition(and_conditions)); + } + + Self { + condition: reference::ast::Condition(or_conditions), + samples: None, + } + } +} + +impl From for Polarity { + fn from(op: reference::ast::Operator) -> Self { + match op { + reference::ast::Operator::Eq => Polarity::Positive, + reference::ast::Operator::NotEq => Polarity::Negative, + } + } +} + +impl From for reference::ast::Operator { + fn from(pol: Polarity) -> Self { + match pol { + Polarity::Negative => reference::ast::Operator::NotEq, + Polarity::Positive => reference::ast::Operator::Eq, + } + } +} + +impl From for Operand { + fn from(op: reference::ast::Operand) -> Self { + match op { + reference::ast::Operand::N => Self::N, + reference::ast::Operand::I => Self::I, + reference::ast::Operand::V => Self::V, + reference::ast::Operand::W => Self::W, + reference::ast::Operand::F => Self::F, + reference::ast::Operand::T => Self::T, + reference::ast::Operand::C => Self::C, + reference::ast::Operand::E => Self::E, + } + } +} + +impl From for reference::ast::Operand { + fn from(op: Operand) -> Self { + match op { + Operand::N => Self::N, + Operand::I => Self::I, + Operand::V => Self::V, + Operand::W => Self::W, + Operand::F => Self::F, + Operand::T => Self::T, + Operand::C => Self::C, + Operand::E => Self::E, + } + } +} + +impl From<(Operand, u32)> for reference::ast::Expression { + fn from(input: (Operand, u32)) -> Self { + Self { + operand: input.0.into(), + modulus: get_modulus(input.1), + } + } +} + +fn get_modulo(op: &Option) -> u32 { + if let Some(op) = op { + u32::from(op) + } else { + 0 + } +} + +fn get_modulus(input: u32) -> Option { + if input == 0 { + None + } else { + Some(input.into()) } } +impl From<&reference::ast::Value> for u32 { + fn from(v: &reference::ast::Value) -> Self { + v.0.try_into().expect("Failed to convert u64 into u32") + } +} + +impl From for reference::ast::Value { + fn from(input: u32) -> Self { + Self(input.into()) + } +} + +impl From<&reference::ast::RangeListItem> for RangeOrValue { + fn from(item: &reference::ast::RangeListItem) -> Self { + match item { + reference::ast::RangeListItem::Range(range) => { + RangeOrValue::Range(range.start().into(), range.end().into()) + } + reference::ast::RangeListItem::Value(value) => RangeOrValue::Value(value.into()), + } + } +} + +impl From for reference::ast::RangeListItem { + fn from(item: RangeOrValue) -> Self { + match item { + RangeOrValue::Range(min, max) => Self::Range(min.into()..=max.into()), + RangeOrValue::Value(value) => Self::Value(value.into()), + } + } +} + +impl FromStr for Rule<'_> { + type Err = reference::parser::ParserError; + + fn from_str(s: &str) -> Result { + let rule = reference::parser::parse(s.as_bytes())?; + Ok((&rule).into()) + } +} + +impl fmt::Display for Rule<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let reference = reference::ast::Rule::from(self); + reference::serialize(&reference, f) + } +} + +///////////////// + #[repr(packed)] pub struct RelationULE { /// This maps to (AndOr, Polarity, Operand), @@ -51,7 +272,7 @@ pub struct RelationULE { } impl RelationULE { - pub fn as_relation<'a>(&'a self) -> Relation<'a> { + pub fn as_relation(&self) -> Relation { // safe to call because we are operating on a valid RelationULE let (and_or, polarity, operand) = unsafe { Self::decode_andor_polarity_operand(self.andor_polarity_operand) }; @@ -59,15 +280,15 @@ impl RelationULE { and_or, polarity, operand, - modulo: u32::from_unaligned(&self.modulo), + modulo: u32::from_unaligned(self.modulo), range_list: ZeroVec::borrowed_from_slice(&self.range_list), } } fn encode_andor_polarity_operand(and_or: AndOr, polarity: Polarity, operand: Operand) -> u8 { - ((and_or == AndOr::And) as u8) - << 7 + ((polarity == Polarity::Positive) as u8) - << 6 + operand.encode() + (((and_or == AndOr::And) as u8) << 7) + + (((polarity == Polarity::Positive) as u8) << 6) + + operand.idx() } // Must be called on an `encoded` that is obtained from `encode_andor_polarity_operand`, i.e. @@ -85,7 +306,7 @@ impl RelationULE { Polarity::Negative }; - let operand = Operand::decode(encoded & 0b0010_000); + let operand = Operand::from_idx(encoded & 0b0010_0000); (and_or, polarity, operand) } } @@ -95,9 +316,12 @@ unsafe impl VarULE for RelationULE { #[inline] unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { - let ptr = bytes as *const [u8] as *const u8; + let ptr = bytes.as_ptr(); let len = bytes.len(); + // subtract length of andor_polarity_operand and modulo and then convert between a slice of bytes and PlainOldULE<8> let len_new = (len - 5) / 8; + // it's hard constructing custom DSTs, we fake a pointer/length construction + // eventually we can use the Pointer::Metadata APIs when they stabilize let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const RangeOrValueULE, len_new); let ret = &*(fake_slice as *const Self); debug_assert_eq!(core::mem::size_of_val(ret), core::mem::size_of_val(bytes)); @@ -116,38 +340,18 @@ unsafe impl VarULE for RelationULE { // what it should do is attempt to parse the first 4 bytes as a u32::ULE (POU<4>), and the remaining bytes as a ZV fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { - as ULE>::validate_byte_slice(&bytes[..4]).map_err(|_| "foo")?; - let remaining = &bytes[4..]; + let bits = bytes[0]; + as ULE>::validate_byte_slice(&bytes[1..5]).map_err(|_| "foo")?; + let remaining = &bytes[5..]; RangeOrValueULE::validate_byte_slice(remaining).map_err(|_| "foo")?; Ok(()) } } -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub enum AndOr { - Or, - And, -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug)] -pub enum Polarity { - Negative, - Positive, -} - -pub struct Relation<'data> { - pub(crate) and_or: AndOr, - pub(crate) polarity: Polarity, - pub(crate) operand: Operand, - pub(crate) modulo: u32, - pub(crate) range_list: ZeroVec<'data, RangeOrValue>, -} - unsafe impl EncodeAsVarULE for Relation<'_> { - fn encode_var_ule(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { + fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { let encoded = RelationULE::encode_andor_polarity_operand(self.and_or, self.polarity, self.operand); - cb(&[ &[encoded], PlainOldULE::<4>::as_byte_slice(&[self.modulo.as_unaligned()]), @@ -156,12 +360,7 @@ unsafe impl EncodeAsVarULE for Relation<'_> { } } -#[derive(Clone, Copy, Debug, PartialEq)] -pub enum RangeOrValue { - Range(u32, u32), // XXX: Can we get away from smaller? - Value(u32), -} - +#[derive(Clone, Copy)] #[repr(transparent)] pub struct RangeOrValueULE(PlainOldULE<8>); @@ -177,7 +376,7 @@ impl AsULE for RangeOrValue { type ULE = RangeOrValueULE; #[inline] - fn as_unaligned(&self) -> Self::ULE { + fn as_unaligned(self) -> Self::ULE { let mut result = [0; 8]; match self { Self::Range(start, end) => { @@ -197,7 +396,7 @@ impl AsULE for RangeOrValue { } #[inline] - fn from_unaligned(unaligned: &Self::ULE) -> Self { + fn from_unaligned(unaligned: Self::ULE) -> Self { let b = unaligned.0 .0; let start = u32::from_be_bytes([b[0], b[1], b[2], b[3]]); let end = u32::from_be_bytes([b[4], b[5], b[6], b[7]]); @@ -209,28 +408,79 @@ impl AsULE for RangeOrValue { } } -impl From<&crate::rules::reference::ast::Rule> for Rule<'_> { - fn from(input: &crate::rules::reference::ast::Rule) -> Self { - let mut relations: alloc::vec::Vec = alloc::vec![]; +#[cfg(feature = "provider_serde")] +mod serde { + use super::*; + use ::serde::{de, ser, Deserialize, Deserializer, Serialize}; + use alloc::{ + format, + string::{String, ToString}, + }; + + impl Serialize for Rule<'_> { + fn serialize(&self, serializer: S) -> Result + where + S: ser::Serializer, + { + if serializer.is_human_readable() { + let string: String = self.to_string(); + serializer.serialize_str(&string) + } else { + serializer.serialize_bytes(self.0.get_encoded_slice()) + } + } + } - for and_condition in input.condition.0.iter() { - for relation in and_condition.0.iter() { - let range_list = alloc::vec![RangeOrValue::Value(1)]; + struct DeserializeRule; - relations.push(Relation { - and_or: AndOr::And, - polarity: Polarity::Positive, - operand: Operand::N, - modulo: 1, - range_list: ZeroVec::clone_from_slice(&range_list), - }) - } + impl<'de> de::Visitor<'de> for DeserializeRule { + type Value = Rule<'de>; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "a valid rule.") } - Self(VarZeroVec::from(relations.as_slice())) + fn visit_borrowed_str(self, rule_string: &'de str) -> Result + where + E: de::Error, + { + Rule::from_str(rule_string).map_err(|err| { + de::Error::invalid_value( + de::Unexpected::Other(&format!("{}", err)), + &"a valid UTS 35 rule string", + ) + }) + } + + fn visit_borrowed_bytes(self, rule_bytes: &'de [u8]) -> Result + where + E: de::Error, + { + let rule = VarZeroVec::parse_byte_slice(rule_bytes).map_err(|err| { + de::Error::invalid_value( + de::Unexpected::Other(&format!("{}", err)), + &"a valid UTS 35 rule byte slice", + ) + })?; + Ok(Rule(rule)) + } + } + + impl<'de: 'data, 'data> Deserialize<'de> for Rule<'data> { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + deserializer.deserialize_str(DeserializeRule) + } else { + deserializer.deserialize_bytes(DeserializeRule) + } + } } } +#[cfg(test)] mod test { use super::*; use crate::rules::reference::ast; @@ -239,7 +489,7 @@ mod test { use crate::PluralOperands; #[test] - fn sanity_test() { + fn simple_rule_test() { // let input = "n % 10 = 3..4,9 and n % 100 != 10..19,70..79,90..99 or n = 0"; let input = "n = 1"; let full_ast = parse(input.as_bytes()).unwrap(); @@ -264,7 +514,18 @@ mod test { let rule = Rule::from(&full_ast); - let fd = fixed_decimal::decimal::FixedDecimal::from(5); + let fd = fixed_decimal::decimal::FixedDecimal::from(1); + let operands = PluralOperands::from(&fd); + assert!(test_rule(&rule, &operands),); + } + + #[test] + fn complex_rule_test() { + let input = "n % 10 = 3..4,9 and n % 100 != 10..19,70..79,90..99 or n = 0"; + let full_ast = parse(input.as_bytes()).unwrap(); + let rule = Rule::from(&full_ast); + + let fd = fixed_decimal::decimal::FixedDecimal::from(110); let operands = PluralOperands::from(&fd); assert!(test_rule(&rule, &operands),); } @@ -286,15 +547,17 @@ mod test { fn relation_ule_test() { let rov = RangeOrValue::Value(1); let relation = Relation { - conjunction: true, + and_or: AndOr::And, + polarity: Polarity::Positive, + operand: Operand::N, modulo: 0, range_list: ZeroVec::clone_from_slice(&[rov]), }; - let mut relations = alloc::vec![relation]; + let relations = alloc::vec![relation]; let vzv = VarZeroVec::from(relations.as_slice()); assert_eq!( vzv.get_encoded_slice(), - &[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1] + &[1, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1] ); } } diff --git a/components/plurals/src/rules/runtime/resolver.rs b/components/plurals/src/rules/runtime/resolver.rs index 7939316a084..119a0d49e4b 100644 --- a/components/plurals/src/rules/runtime/resolver.rs +++ b/components/plurals/src/rules/runtime/resolver.rs @@ -4,13 +4,60 @@ use crate::operands::PluralOperands; use crate::rules::runtime::ast; +use zerovec::ZeroVec; pub fn test_rule(rule: &ast::Rule, operands: &PluralOperands) -> bool { - let mut left = true; + let mut left_and = true; - for condition in rule.0.iter() { - let condition = condition.as_relation(); - if condition.and_or == ast::AndOr::And {} + for relation in rule.0.iter() { + let relation = relation.as_relation(); + if relation.and_or == ast::AndOr::Or { + if left_and { + return true; + } else { + left_and = test_value(get_value(&relation, operands), &relation.range_list); + } + } else if left_and && !test_value(get_value(&relation, operands), &relation.range_list) { + left_and = false; + } + } + left_and +} + +fn test_value(value: Option, range_list: &ZeroVec) -> bool { + range_list.iter().any(|range| { + if let Some(value) = value { + match range { + ast::RangeOrValue::Range(min, max) => { + value >= u64::from(min) && value <= u64::from(max) + } + ast::RangeOrValue::Value(v) => u64::from(v) == value, + } + } else { + false + } + }) +} + +fn get_value(relation: &ast::Relation, operands: &PluralOperands) -> Option { + let value = match relation.operand { + ast::Operand::N => { + if operands.w == 0 { + operands.i + } else { + return None; + } + } + ast::Operand::I => operands.i, + ast::Operand::F => operands.f, + ast::Operand::V => operands.v as u64, + ast::Operand::W => operands.w as u64, + ast::Operand::T => operands.t, + ast::Operand::C | ast::Operand::E => operands.c as u64, + }; + if relation.modulo > 0 { + value.checked_rem_euclid(relation.modulo.into()) + } else { + Some(value) } - true } From 978c6e3f7fe363c028b6dc6c6501cabf82d66779 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 28 Oct 2021 13:28:00 -0500 Subject: [PATCH 08/30] Plug it all together --- components/datetime/src/datetime.rs | 10 +- components/datetime/src/format/datetime.rs | 2 +- components/datetime/src/zoned_datetime.rs | 6 +- components/plurals/Cargo.toml | 2 +- components/plurals/benches/parser.rs | 6 +- components/plurals/src/data.rs | 126 ------------------ components/plurals/src/lib.rs | 110 ++++++++++----- components/plurals/src/provider.rs | 18 +-- components/plurals/src/provider/resolver.rs | 33 ----- components/plurals/src/rules/mod.rs | 16 +-- components/plurals/src/rules/reference/ast.rs | 62 ++++----- components/plurals/src/rules/runtime/ast.rs | 69 +++++++--- .../plurals/src/rules/runtime/resolver.rs | 52 ++++---- components/plurals/tests/plurals.rs | 42 +++--- provider/cldr/src/transform/plurals.rs | 21 ++- 15 files changed, 243 insertions(+), 332 deletions(-) delete mode 100644 components/plurals/src/data.rs delete mode 100644 components/plurals/src/provider/resolver.rs diff --git a/components/datetime/src/datetime.rs b/components/datetime/src/datetime.rs index 71dd41e50f7..783ff7e40c6 100644 --- a/components/datetime/src/datetime.rs +++ b/components/datetime/src/datetime.rs @@ -13,7 +13,7 @@ use crate::{ }; use alloc::string::String; use icu_locid::Locale; -use icu_plurals::{provider::PluralRuleStringsV1Marker, PluralRuleType, PluralRules}; +use icu_plurals::{provider::PluralRulesV1Marker, PluralRuleType, PluralRules}; use icu_provider::prelude::*; use crate::{ @@ -65,7 +65,7 @@ pub struct DateTimeFormat<'data> { pub(super) locale: Locale, pub(super) patterns: DataPayload<'data, PatternPluralsFromPatternsV1Marker>, pub(super) symbols: Option>, - pub(super) ordinal_rules: Option, + pub(super) ordinal_rules: Option>, } impl<'data> DateTimeFormat<'data> { @@ -99,7 +99,7 @@ impl<'data> DateTimeFormat<'data> { D: DataProvider<'data, DateSymbolsV1Marker> + DataProvider<'data, DatePatternsV1Marker> + DataProvider<'data, DateSkeletonPatternsV1Marker> - + DataProvider<'data, PluralRuleStringsV1Marker>, + + DataProvider<'data, PluralRulesV1Marker>, { let locale = locale.into(); @@ -113,7 +113,7 @@ impl<'data> DateTimeFormat<'data> { let ordinal_rules = if let PatternPlurals::MultipleVariants(_) = &patterns.get().0 { Some(PluralRules::try_new( - langid.clone(), + locale.clone(), data_provider, PluralRuleType::Ordinal, )?) @@ -159,7 +159,7 @@ impl<'data> DateTimeFormat<'data> { locale: T, patterns: DataPayload<'data, PatternPluralsFromPatternsV1Marker>, symbols: Option>, - ordinal_rules: Option, + ordinal_rules: Option>, ) -> Self { let locale = locale.into(); diff --git a/components/datetime/src/format/datetime.rs b/components/datetime/src/format/datetime.rs index 09d221527f3..3e797480871 100644 --- a/components/datetime/src/format/datetime.rs +++ b/components/datetime/src/format/datetime.rs @@ -56,7 +56,7 @@ where pub(crate) symbols: Option<&'l provider::gregory::DateSymbolsV1>, pub(crate) datetime: &'l T, pub(crate) locale: &'l Locale, - pub(crate) ordinal_rules: Option<&'l PluralRules>, + pub(crate) ordinal_rules: Option<&'l PluralRules<'data>>, } impl<'l, 'data, T> Writeable for FormattedDateTime<'l, 'data, T> diff --git a/components/datetime/src/zoned_datetime.rs b/components/datetime/src/zoned_datetime.rs index bc60a90d4ba..aeabc4e4943 100644 --- a/components/datetime/src/zoned_datetime.rs +++ b/components/datetime/src/zoned_datetime.rs @@ -4,7 +4,7 @@ use alloc::string::String; use icu_locid::{LanguageIdentifier, Locale}; -use icu_plurals::{provider::PluralRuleStringsV1Marker, PluralRuleType, PluralRules}; +use icu_plurals::{provider::PluralRulesV1Marker, PluralRuleType, PluralRules}; use icu_provider::{DataProvider, DataRequest, ResourceOptions, ResourcePath}; use crate::{ @@ -118,7 +118,7 @@ impl<'data> ZonedDateTimeFormat<'data> { + DataProvider<'data, provider::time_zones::MetaZoneSpecificNamesLongV1Marker> + DataProvider<'data, provider::time_zones::MetaZoneSpecificNamesShortV1Marker> + ?Sized, - PP: DataProvider<'data, PluralRuleStringsV1Marker> + ?Sized, + PP: DataProvider<'data, PluralRulesV1Marker>, { let locale = locale.into(); let langid: LanguageIdentifier = locale.clone().into(); @@ -131,7 +131,7 @@ impl<'data> ZonedDateTimeFormat<'data> { let ordinal_rules = if let PatternPlurals::MultipleVariants(_) = &patterns.get().0 { Some(PluralRules::try_new( - langid.clone(), + locale.clone(), plural_provider, PluralRuleType::Ordinal, )?) diff --git a/components/plurals/Cargo.toml b/components/plurals/Cargo.toml index b4057e5b97f..378d0131c3d 100644 --- a/components/plurals/Cargo.toml +++ b/components/plurals/Cargo.toml @@ -37,7 +37,7 @@ icu_provider = { version = "0.4", path = "../../provider/core", features = ["mac icu_locid = { version = "0.4", path = "../locid" } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } displaydoc = { version = "0.2.3", default-features = false } -zerovec = { version = "0.4", path = "../../utils/zerovec" } +zerovec = { version = "0.4", path = "../../utils/zerovec", features = ["yoke"] } [dev-dependencies] criterion = "0.3" diff --git a/components/plurals/benches/parser.rs b/components/plurals/benches/parser.rs index 99c26c8068c..804eeed92e3 100644 --- a/components/plurals/benches/parser.rs +++ b/components/plurals/benches/parser.rs @@ -10,7 +10,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use icu_provider::prelude::*; fn parser(c: &mut Criterion) { - use icu_plurals::rules::parse_condition; + use icu_plurals::rules::reference::parse_condition; let fixture_data = helpers::get_plurals_data(); @@ -19,7 +19,7 @@ fn parser(c: &mut Criterion) { let mut rules = vec![]; for langid in &fixture_data.langs { - let data_payload: DataPayload = provider + let data_payload: DataPayload = provider .load_payload(&DataRequest { resource_path: ResourcePath { key: icu_plurals::provider::key::CARDINAL_V1, @@ -57,7 +57,7 @@ fn parser(c: &mut Criterion) { #[cfg(feature = "bench")] c.bench_function("plurals/parser/lex", |b| { - use icu_plurals::rules::Lexer; + use icu_plurals::rules::reference::Lexer; b.iter(|| { for rule in &rules { let _ = Lexer::new(black_box(rule.as_bytes())).count(); diff --git a/components/plurals/src/data.rs b/components/plurals/src/data.rs deleted file mode 100644 index 81f1e02b670..00000000000 --- a/components/plurals/src/data.rs +++ /dev/null @@ -1,126 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -use crate::operands::PluralOperands; -use crate::provider::PluralRuleStringsV1; -use crate::rules; -use crate::rules::reference::ast; -use crate::{PluralCategory, PluralRulesError}; -use alloc::borrow::Cow; -use core::convert::TryInto; - -/// A raw function pointer to a `PluralRulesFn` -// pub type PluralRulesFn = fn(&PluralOperands) -> PluralCategory; - -/// A structure holding a list of [`ast::Condition`] for a given locale and type. -/// -/// [`PluralCategory`]: super::PluralCategory -/// [`ast::Condition`]: super::rules::ast::Condition -#[derive(Default, Debug)] -pub struct PluralRuleList { - zero: Option, - one: Option, - two: Option, - few: Option, - many: Option, -} - -impl PluralRuleList { - fn has_rules_for(&self, category: PluralCategory) -> bool { - // There is implicitly always a rule for "Other" as the fallback. - match category { - PluralCategory::Zero => self.zero.is_some(), - PluralCategory::One => self.one.is_some(), - PluralCategory::Two => self.two.is_some(), - PluralCategory::Few => self.few.is_some(), - PluralCategory::Many => self.many.is_some(), - PluralCategory::Other => true, - } - } - - fn get(&self, category: PluralCategory) -> Option<&ast::Condition> { - match category { - PluralCategory::Zero => self.zero.as_ref(), - PluralCategory::One => self.one.as_ref(), - PluralCategory::Two => self.two.as_ref(), - PluralCategory::Few => self.few.as_ref(), - PluralCategory::Many => self.many.as_ref(), - PluralCategory::Other => None, - } - } -} - -fn parse_rule(input: &Option>) -> Result, PluralRulesError> { - Ok(if let Some(input) = input { - Some(rules::reference::parser::parse_condition( - (input).as_bytes(), - )?) - } else { - None - }) -} - -impl<'data> TryInto for &PluralRuleStringsV1<'data> { - type Error = PluralRulesError; - fn try_into(self) -> Result { - todo!() - // Ok(PluralRuleList { - // zero: parse_rule(&self.zero)?, - // one: parse_rule(&self.one)?, - // two: parse_rule(&self.two)?, - // few: parse_rule(&self.few)?, - // many: parse_rule(&self.many)?, - // }) - } -} - -/// An enum storing models of -/// handling plural rules selection. -pub enum RulesSelector { - /// A raw function pointer to a `PluralRulesFn` - /// - /// This variant is used by providers which store rules as native Rust functions. - // Function(PluralRulesFn), - /// A list of tuples of ([`PluralCategory`]-[`ast::Condition`]) pairs. - /// - /// This variant is used by providers which parse the list of conditions out - /// of source strings. - /// - /// [`PluralCategory`]: super::PluralCategory - /// [`ast::Condition`]: super::rules::ast::Condition - Conditions(PluralRuleList), -} - -impl RulesSelector { - pub fn select(&self, operands: &PluralOperands) -> PluralCategory { - match self { - // Self::Function(ptr) => ptr(operands), - Self::Conditions(conditions) => PluralCategory::all() - .find_map(|category| { - conditions - .get(*category) - .filter(|cond| rules::reference::resolver::test_condition(cond, operands)) - .map(|_| *category) - }) - .unwrap_or(PluralCategory::Other), - } - } - - /// Returns an iterator over each [`PluralCategory`] for which this [`RulesSelector`] has rules. - /// - /// The category [`PluralCategory::Other`] is always included. - pub fn categories(&self) -> impl Iterator + '_ { - match self { - Self::Conditions(conditions) => { - PluralCategory::all().filter(move |&category| conditions.has_rules_for(*category)) - } - } - } -} - -impl From for RulesSelector { - fn from(input: PluralRuleList) -> Self { - Self::Conditions(input) - } -} diff --git a/components/plurals/src/lib.rs b/components/plurals/src/lib.rs index 8a80f7f30f1..99a6e95ac7b 100644 --- a/components/plurals/src/lib.rs +++ b/components/plurals/src/lib.rs @@ -70,19 +70,19 @@ extern crate alloc; -mod data; +// mod data; mod error; mod operands; pub mod provider; pub mod rules; use core::cmp::{Ord, PartialOrd}; -use core::convert::TryInto; pub use error::PluralRulesError; -use icu_locid::LanguageIdentifier; +use icu_locid::Locale; use icu_provider::prelude::*; pub use operands::PluralOperands; -use provider::{resolver, PluralRuleStringsV1, PluralRuleStringsV1Marker}; +use provider::PluralRulesV1Marker; +use rules::runtime::test_rule; /// A type of a plural rule which can be associated with the [`PluralRules`] struct. /// @@ -202,7 +202,7 @@ pub enum PluralCategory { impl PluralCategory { /// Returns an ordered iterator over variants of [`Plural Categories`]. /// - /// Categories are returned in alphabetical order. + /// Categories are returned in UTS 35 order. /// /// # Examples /// @@ -211,26 +211,27 @@ impl PluralCategory { /// /// let mut categories = PluralCategory::all(); /// + /// assert_eq!(categories.next(), Some(&PluralCategory::Zero)); + /// assert_eq!(categories.next(), Some(&PluralCategory::One)); + /// assert_eq!(categories.next(), Some(&PluralCategory::Two)); /// assert_eq!(categories.next(), Some(&PluralCategory::Few)); /// assert_eq!(categories.next(), Some(&PluralCategory::Many)); - /// assert_eq!(categories.next(), Some(&PluralCategory::One)); /// assert_eq!(categories.next(), Some(&PluralCategory::Other)); - /// assert_eq!(categories.next(), Some(&PluralCategory::Two)); - /// assert_eq!(categories.next(), Some(&PluralCategory::Zero)); /// assert_eq!(categories.next(), None); /// ``` /// /// [`Plural Categories`]: PluralCategory - pub fn all() -> impl ExactSizeIterator { + pub fn all() -> impl ExactSizeIterator { [ + Self::Zero, + Self::One, + Self::Two, Self::Few, Self::Many, - Self::One, Self::Other, - Self::Two, - Self::Zero, ] .iter() + .copied() } /// Returns the PluralCategory coresponding to given TR35 string. @@ -270,12 +271,12 @@ impl PluralCategory { /// [`ICU4X`]: ../icu/index.html /// [`Plural Type`]: PluralRuleType /// [`Plural Category`]: PluralCategory -pub struct PluralRules { - _langid: LanguageIdentifier, - selector: data::RulesSelector, +pub struct PluralRules<'data> { + _locale: Locale, + rules: DataPayload<'data, PluralRulesV1Marker>, } -impl PluralRules { +impl<'data> PluralRules<'data> { /// Constructs a new `PluralRules` for a given locale, [`type`] and [`data provider`]. /// /// This constructor will fail if the [`Data Provider`] does not have the data. @@ -296,13 +297,31 @@ impl PluralRules { /// /// [`type`]: PluralRuleType /// [`data provider`]: icu_provider::DataProvider - pub fn try_new<'data, D: DataProvider<'data, PluralRuleStringsV1Marker> + ?Sized>( - langid: LanguageIdentifier, + pub fn try_new, D>( + locale: T, data_provider: &D, - type_: PluralRuleType, - ) -> Result { - let data = resolver::resolve_plural_data(langid.clone(), data_provider, type_)?; - Self::new_from_data(langid, data.get()) + rule_type: PluralRuleType, + ) -> Result + where + D: DataProvider<'data, PluralRulesV1Marker>, + { + let locale = locale.into(); + let key = match rule_type { + PluralRuleType::Cardinal => provider::key::CARDINAL_V1, + PluralRuleType::Ordinal => provider::key::ORDINAL_V1, + }; + let rules = data_provider + .load_payload(&DataRequest { + resource_path: ResourcePath { + key, + options: ResourceOptions { + variant: None, + langid: Some(locale.clone().into()), + }, + }, + })? + .take_payload()?; + Self::new(locale, rules) } /// Returns the [`Plural Category`] appropriate for the given number. @@ -364,7 +383,28 @@ impl PluralRules { /// [`Plural Category`]: PluralCategory /// [`Plural Operands`]: operands::PluralOperands pub fn select>(&self, input: I) -> PluralCategory { - self.selector.select(&input.into()) + let rules = self.rules.get(); + let input = input.into(); + PluralCategory::all() + .find(|cat| { + let rule = match cat { + PluralCategory::Zero => &rules.zero, + PluralCategory::One => &rules.one, + PluralCategory::Two => &rules.two, + PluralCategory::Few => &rules.few, + PluralCategory::Many => &rules.many, + PluralCategory::Other => { + return true; + } + }; + if let Some(rule) = rule { + if test_rule(rule, &input) { + return true; + } + } + false + }) + .unwrap_or(PluralCategory::Other) } /// Returns all [`Plural Categories`] appropriate for a [`PluralRules`] object @@ -396,20 +436,28 @@ impl PluralRules { /// ``` /// /// [`Plural Categories`]: PluralCategory - pub fn categories(&self) -> impl Iterator + '_ { - self.selector.categories() + pub fn categories(&self) -> impl Iterator + '_ { + let rules = self.rules.get(); + PluralCategory::all().filter(move |cat| match cat { + PluralCategory::Zero => rules.zero.is_some(), + PluralCategory::One => rules.one.is_some(), + PluralCategory::Two => rules.two.is_some(), + PluralCategory::Few => rules.few.is_some(), + PluralCategory::Many => rules.many.is_some(), + PluralCategory::Other => true, + }) } /// Lower-level constructor that allows constructing a [`PluralRules`] directly from /// data obtained from a provider. - pub fn new_from_data( - langid: LanguageIdentifier, - data: &PluralRuleStringsV1, + pub fn new>( + locale: T, + rules: DataPayload<'data, PluralRulesV1Marker>, ) -> Result { - let data: data::PluralRuleList = data.try_into()?; + let locale = locale.into(); Ok(Self { - _langid: langid, - selector: data.into(), + _locale: locale, + rules, }) } } diff --git a/components/plurals/src/provider.rs b/components/plurals/src/provider.rs index 63304171a06..d016cd054c0 100644 --- a/components/plurals/src/provider.rs +++ b/components/plurals/src/provider.rs @@ -7,7 +7,6 @@ //! Read more about data providers: [`icu_provider`] use crate::rules::runtime::ast::Rule; -use alloc::borrow::Cow; use icu_provider::yoke::{self, *}; #[allow(missing_docs)] // TODO(#1029) - Add missing docs. @@ -17,8 +16,6 @@ pub mod key { pub const ORDINAL_V1: ResourceKey = resource_key!(Plurals, "ordinal", 1); } -pub mod resolver; - /// Plural rule strings conforming to UTS 35 syntax. Includes separate fields for five of the six /// standard plural forms. If none of the rules match, the "other" category is assumed. /// @@ -30,12 +27,15 @@ pub mod resolver; derive(serde::Serialize, serde::Deserialize) )] #[allow(missing_docs)] // TODO(#1029) - Add missing docs. -pub struct PluralRuleStringsV1<'data> { +pub struct PluralRulesV1<'data> { #[cfg_attr(feature = "provider_serde", serde(borrow))] pub zero: Option>, - // pub zero: Option>, - // pub one: Option>, - // pub two: Option>, - // pub few: Option>, - // pub many: Option>, + #[cfg_attr(feature = "provider_serde", serde(borrow))] + pub one: Option>, + #[cfg_attr(feature = "provider_serde", serde(borrow))] + pub two: Option>, + #[cfg_attr(feature = "provider_serde", serde(borrow))] + pub few: Option>, + #[cfg_attr(feature = "provider_serde", serde(borrow))] + pub many: Option>, } diff --git a/components/plurals/src/provider/resolver.rs b/components/plurals/src/provider/resolver.rs deleted file mode 100644 index c30a75f5ee3..00000000000 --- a/components/plurals/src/provider/resolver.rs +++ /dev/null @@ -1,33 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -//! Code for resolving plural data. - -use super::PluralRuleStringsV1Marker; -use crate::{PluralRuleType, PluralRulesError}; -use icu_locid::LanguageIdentifier; -use icu_provider::prelude::*; - -#[allow(missing_docs)] // TODO(#1029) - Add missing docs. -pub fn resolve_plural_data<'data, D: DataProvider<'data, PluralRuleStringsV1Marker> + ?Sized>( - langid: LanguageIdentifier, - data_provider: &D, - type_: PluralRuleType, -) -> Result, PluralRulesError> { - let key = match type_ { - PluralRuleType::Cardinal => super::key::CARDINAL_V1, - PluralRuleType::Ordinal => super::key::ORDINAL_V1, - }; - Ok(data_provider - .load_payload(&DataRequest { - resource_path: ResourcePath { - key, - options: ResourceOptions { - variant: None, - langid: Some(langid), - }, - }, - })? - .take_payload()?) -} diff --git a/components/plurals/src/rules/mod.rs b/components/plurals/src/rules/mod.rs index 4a9049f66cd..0b1e4833d78 100644 --- a/components/plurals/src/rules/mod.rs +++ b/components/plurals/src/rules/mod.rs @@ -62,19 +62,19 @@ //! //! let ast = parse_condition(input.as_bytes()) //! .expect("Parsing failed."); -//! assert_eq!(ast, Condition(Box::new([ -//! AndCondition(Box::new([ +//! assert_eq!(ast, Condition(vec![ +//! AndCondition(vec![ //! Relation { //! expression: Expression { //! operand: Operand::I, //! modulus: None, //! }, //! operator: Operator::Eq, -//! range_list: RangeList(Box::new([ +//! range_list: RangeList(vec![ //! RangeListItem::Value( //! Value(1) //! ) -//! ])) +//! ]) //! }, //! Relation { //! expression: Expression { @@ -82,14 +82,14 @@ //! modulus: None, //! }, //! operator: Operator::Eq, -//! range_list: RangeList(Box::new([ +//! range_list: RangeList(vec![ //! RangeListItem::Value( //! Value(0) //! ) -//! ])) +//! ]) //! }, -//! ])), -//! ]))); +//! ]), +//! ])); //! ``` //! //! Finally, we can pass this [`AST`] (in fact, just the [`Condition`] node), diff --git a/components/plurals/src/rules/reference/ast.rs b/components/plurals/src/rules/reference/ast.rs index e67a3954743..871f164a88b 100644 --- a/components/plurals/src/rules/reference/ast.rs +++ b/components/plurals/src/rules/reference/ast.rs @@ -17,22 +17,22 @@ //! let ast = parse_condition(input.as_bytes()) //! .expect("Parsing failed."); //! -//! assert_eq!(ast, Condition(Box::new([ -//! AndCondition(Box::new([ +//! assert_eq!(ast, Condition(vec![ +//! AndCondition(vec![ //! Relation { //! expression: Expression { //! operand: Operand::I, //! modulus: None, //! }, //! operator: Operator::Eq, -//! range_list: RangeList(Box::new([ +//! range_list: RangeList(vec![ //! RangeListItem::Value( //! Value(1) //! ) -//! ])) +//! ]) //! } -//! ])) -//! ]))); +//! ]) +//! ])); //! ``` //! //! [`PluralCategory`]: crate::PluralCategory @@ -56,17 +56,17 @@ use core::ops::RangeInclusive; /// /// let samples = Samples { /// integer: Some(SampleList { -/// sample_ranges: Box::new([SampleRange { +/// sample_ranges: vec![SampleRange { /// lower_val: DecimalValue("2".to_string()), /// upper_val: None, -/// }]), +/// }], /// ellipsis: true /// }), /// decimal: Some(SampleList { -/// sample_ranges: Box::new([SampleRange { +/// sample_ranges: vec![SampleRange { /// lower_val: DecimalValue("2.5".to_string()), /// upper_val: None, -/// }]), +/// }], /// ellipsis: false /// }), /// }; @@ -99,24 +99,24 @@ pub struct Rule { /// use icu::plurals::rules::reference::ast::*; /// use icu::plurals::rules::reference::parse_condition; /// -/// let condition = Condition(Box::new([ -/// AndCondition(Box::new([Relation { +/// let condition = Condition(vec![ +/// AndCondition(vec![Relation { /// expression: Expression { /// operand: Operand::I, /// modulus: None, /// }, /// operator: Operator::Eq, -/// range_list: RangeList(Box::new([RangeListItem::Value(Value(5))])), -/// }])), -/// AndCondition(Box::new([Relation { +/// range_list: RangeList(vec![RangeListItem::Value(Value(5))]), +/// }]), +/// AndCondition(vec![Relation { /// expression: Expression { /// operand: Operand::V, /// modulus: None, /// }, /// operator: Operator::Eq, -/// range_list: RangeList(Box::new([RangeListItem::Value(Value(2))])), -/// }])), -/// ])); +/// range_list: RangeList(vec![RangeListItem::Value(Value(2))]), +/// }]), +/// ]); /// /// assert_eq!( /// condition, @@ -145,14 +145,14 @@ pub struct Condition(pub Vec); /// ``` /// use icu::plurals::rules::reference::ast::*; /// -/// AndCondition(Box::new([ +/// AndCondition(vec![ /// Relation { /// expression: Expression { /// operand: Operand::I, /// modulus: None, /// }, /// operator: Operator::Eq, -/// range_list: RangeList(Box::new([RangeListItem::Value(Value(5))])), +/// range_list: RangeList(vec![RangeListItem::Value(Value(5))]), /// }, /// Relation { /// expression: Expression { @@ -160,9 +160,9 @@ pub struct Condition(pub Vec); /// modulus: None, /// }, /// operator: Operator::NotEq, -/// range_list: RangeList(Box::new([RangeListItem::Value(Value(2))])), +/// range_list: RangeList(vec![RangeListItem::Value(Value(2))]), /// }, -/// ])); +/// ]); /// /// ``` /// @@ -192,7 +192,7 @@ pub struct AndCondition(pub Vec); /// modulus: None, /// }, /// operator: Operator::Eq, -/// range_list: RangeList(Box::new([RangeListItem::Value(Value(3))])), +/// range_list: RangeList(vec![RangeListItem::Value(Value(3))]), /// }; /// /// ``` @@ -305,11 +305,11 @@ pub enum Operand { /// ``` /// use icu::plurals::rules::reference::ast::*; /// -/// RangeList(Box::new([ +/// RangeList(vec![ /// RangeListItem::Value(Value(5)), /// RangeListItem::Value(Value(7)), /// RangeListItem::Value(Value(9)), -/// ])); +/// ]); /// ``` /// /// [`RangeListItems`]: RangeListItem @@ -376,17 +376,17 @@ pub struct Value(pub u64); /// use icu::plurals::rules::reference::ast::*; /// Samples { /// integer: Some(SampleList { -/// sample_ranges: Box::new([SampleRange { +/// sample_ranges: vec![SampleRange { /// lower_val: DecimalValue("2".to_string()), /// upper_val: None, -/// }]), +/// }], /// ellipsis: true /// }), /// decimal: Some(SampleList { -/// sample_ranges: Box::new([SampleRange { +/// sample_ranges: vec![SampleRange { /// lower_val: DecimalValue("2.5".to_string()), /// upper_val: None, -/// }]), +/// }], /// ellipsis: false /// }), /// }; @@ -409,12 +409,12 @@ pub struct Samples { /// ``` /// use icu::plurals::rules::reference::ast::*; /// SampleList { -/// sample_ranges: Box::new([ +/// sample_ranges: vec![ /// SampleRange { /// lower_val: DecimalValue("0.0".to_string()), /// upper_val: Some(DecimalValue("1.5".to_string())), /// } -/// ]), +/// ], /// ellipsis: true /// }; /// ``` diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 1b57f5602c2..44a6c763ab9 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -1,4 +1,4 @@ -// This file is part of ICU4X. For terms of use, please see the file +// This file is part of ICU4 and_or: todo!(), polarity: todo!(), operand: todo!(), modulo: todo!(), range_list: todo!() and_or: todo!(), polarity: todo!(), operand: todo!(), modulo: todo!(), range_list: todo!() X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). @@ -12,7 +12,7 @@ use zerovec::{ {VarZeroVec, ZeroVec}, }; -#[derive(Yokeable, ZeroCopyFrom)] +#[derive(Debug, Yokeable, ZeroCopyFrom)] pub struct Rule<'data>(pub VarZeroVec<'data, RelationULE>); #[derive(Copy, Clone, Eq, PartialEq, Debug)] @@ -259,6 +259,7 @@ impl fmt::Display for Rule<'_> { ///////////////// +#[derive(Debug)] #[repr(packed)] pub struct RelationULE { /// This maps to (AndOr, Polarity, Operand), @@ -286,6 +287,7 @@ impl RelationULE { } fn encode_andor_polarity_operand(and_or: AndOr, polarity: Polarity, operand: Operand) -> u8 { + // XXX: Ensure and_or is one bit, polarity is one bit, and operand is max 6 bits (((and_or == AndOr::And) as u8) << 7) + (((polarity == Polarity::Positive) as u8) << 6) + operand.idx() @@ -306,7 +308,7 @@ impl RelationULE { Polarity::Negative }; - let operand = Operand::from_idx(encoded & 0b0010_0000); + let operand = Operand::from_idx(encoded & 0b0011_1111); (and_or, polarity, operand) } } @@ -340,7 +342,7 @@ unsafe impl VarULE for RelationULE { // what it should do is attempt to parse the first 4 bytes as a u32::ULE (POU<4>), and the remaining bytes as a ZV fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { - let bits = bytes[0]; + let bits = bytes[0]; // XXX: Validate those bits as ULE>::validate_byte_slice(&bytes[1..5]).map_err(|_| "foo")?; let remaining = &bytes[5..]; RangeOrValueULE::validate_byte_slice(remaining).map_err(|_| "foo")?; @@ -360,7 +362,7 @@ unsafe impl EncodeAsVarULE for Relation<'_> { } } -#[derive(Clone, Copy)] +#[derive(Debug, Clone, Copy)] #[repr(transparent)] pub struct RangeOrValueULE(PlainOldULE<8>); @@ -490,29 +492,40 @@ mod test { #[test] fn simple_rule_test() { - // let input = "n % 10 = 3..4,9 and n % 100 != 10..19,70..79,90..99 or n = 0"; - let input = "n = 1"; + let input = "i = 1"; let full_ast = parse(input.as_bytes()).unwrap(); assert_eq!( full_ast, ast::Rule { - condition: ast::Condition(Box::new([ast::AndCondition(Box::new([ - ast::Relation { - expression: ast::Expression { - operand: ast::Operand::N, - modulus: None, - }, - operator: ast::Operator::Eq, - range_list: ast::RangeList(Box::new([ast::RangeListItem::Value( - ast::Value(1) - )])) - } - ]))])), + condition: ast::Condition(vec![ast::AndCondition(vec![ast::Relation { + expression: ast::Expression { + operand: ast::Operand::I, + modulus: None, + }, + operator: ast::Operator::Eq, + range_list: ast::RangeList(vec![ast::RangeListItem::Value(ast::Value(1))]) + }])]), samples: None, } ); let rule = Rule::from(&full_ast); + let relation = rule + .0 + .iter() + .next() + .expect("Should have a relation") + .as_relation(); + assert_eq!( + relation, + Relation { + and_or: AndOr::And, + polarity: Polarity::Positive, + operand: Operand::I, + modulo: 0, + range_list: ZeroVec::Borrowed(&[RangeOrValue::Value(1).as_unaligned()]) + } + ); let fd = fixed_decimal::decimal::FixedDecimal::from(1); let operands = PluralOperands::from(&fd); @@ -525,7 +538,23 @@ mod test { let full_ast = parse(input.as_bytes()).unwrap(); let rule = Rule::from(&full_ast); - let fd = fixed_decimal::decimal::FixedDecimal::from(110); + let fd = fixed_decimal::decimal::FixedDecimal::from(0); + let operands = PluralOperands::from(&fd); + assert!(test_rule(&rule, &operands),); + + let fd = fixed_decimal::decimal::FixedDecimal::from(13); + let operands = PluralOperands::from(&fd); + assert!(test_rule(&rule, &operands),); + + let fd = fixed_decimal::decimal::FixedDecimal::from(103); + let operands = PluralOperands::from(&fd); + assert!(test_rule(&rule, &operands),); + + let fd = fixed_decimal::decimal::FixedDecimal::from(178); + let operands = PluralOperands::from(&fd); + assert!(!test_rule(&rule, &operands),); + + let fd = fixed_decimal::decimal::FixedDecimal::from(0); let operands = PluralOperands::from(&fd); assert!(test_rule(&rule, &operands),); } diff --git a/components/plurals/src/rules/runtime/resolver.rs b/components/plurals/src/rules/runtime/resolver.rs index 119a0d49e4b..a678a7827c1 100644 --- a/components/plurals/src/rules/runtime/resolver.rs +++ b/components/plurals/src/rules/runtime/resolver.rs @@ -4,50 +4,48 @@ use crate::operands::PluralOperands; use crate::rules::runtime::ast; -use zerovec::ZeroVec; pub fn test_rule(rule: &ast::Rule, operands: &PluralOperands) -> bool { - let mut left_and = true; + let mut left = true; for relation in rule.0.iter() { let relation = relation.as_relation(); - if relation.and_or == ast::AndOr::Or { - if left_and { + if left { + if relation.and_or == ast::AndOr::Or { return true; } else { - left_and = test_value(get_value(&relation, operands), &relation.range_list); + left = test_relation(&relation, operands); } - } else if left_and && !test_value(get_value(&relation, operands), &relation.range_list) { - left_and = false; + } else if relation.and_or == ast::AndOr::Or { + left = test_relation(&relation, operands); } } - left_and + left } -fn test_value(value: Option, range_list: &ZeroVec) -> bool { - range_list.iter().any(|range| { - if let Some(value) = value { - match range { - ast::RangeOrValue::Range(min, max) => { - value >= u64::from(min) && value <= u64::from(max) - } - ast::RangeOrValue::Value(v) => u64::from(v) == value, +#[inline] +fn test_relation(relation: &ast::Relation, operands: &PluralOperands) -> bool { + let result = if let Some(value) = get_value(relation, operands) { + relation.range_list.iter().any(|range| match range { + ast::RangeOrValue::Value(v) => u64::from(v) == value, + ast::RangeOrValue::Range(min, max) => { + value >= u64::from(min) && value <= u64::from(max) } - } else { - false - } - }) + }) + } else { + false + }; + match relation.polarity { + ast::Polarity::Negative => !result, + ast::Polarity::Positive => result, + } } +#[inline] fn get_value(relation: &ast::Relation, operands: &PluralOperands) -> Option { let value = match relation.operand { - ast::Operand::N => { - if operands.w == 0 { - operands.i - } else { - return None; - } - } + ast::Operand::N if operands.w == 0 => operands.i, + ast::Operand::N => return None, ast::Operand::I => operands.i, ast::Operand::F => operands.f, ast::Operand::V => operands.v as u64, diff --git a/components/plurals/tests/plurals.rs b/components/plurals/tests/plurals.rs index 7dc7f9396ab..3b1a99522bc 100644 --- a/components/plurals/tests/plurals.rs +++ b/components/plurals/tests/plurals.rs @@ -3,7 +3,7 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use icu_locid_macros::langid; -use icu_plurals::provider::{self, PluralRuleStringsV1}; +use icu_plurals::provider::{self, PluralRulesV1}; use icu_plurals::{PluralCategory, PluralRuleType, PluralRules}; use icu_provider::prelude::*; use icu_provider::struct_provider::StructProvider; @@ -46,25 +46,25 @@ fn test_plural_category_all() { assert_eq!(categories[5], &PluralCategory::Zero); } -#[test] -fn test_plural_rules_non_static_lifetime() { - let local_string = "v = 0 and i % 10 = 1".to_string(); - let local_data = PluralRuleStringsV1 { - zero: None, - one: Some(Cow::Borrowed(&local_string)), - two: None, - few: None, - many: None, - }; - let provider = StructProvider { - key: provider::key::CARDINAL_V1, - data: DataPayload::from_partial_owned(Rc::from(local_data)), - }; +// #[test] +// fn test_plural_rules_non_static_lifetime() { +// let local_string = "v = 0 and i % 10 = 1".to_string(); +// let local_data = PluralRulesV1 { +// zero: None, +// one: Some(local_string.parse().expect("Failed to parse plural rule")), +// two: None, +// few: None, +// many: None, +// }; +// let provider = StructProvider { +// key: provider::key::CARDINAL_V1, +// data: DataPayload::from_partial_owned(Rc::from(local_data)), +// }; - let lid = langid!("und"); - let pr = PluralRules::try_new(lid, &provider, PluralRuleType::Cardinal).unwrap(); +// let lid = langid!("und"); +// let pr = PluralRules::try_new(lid, &provider, PluralRuleType::Cardinal).unwrap(); - assert_eq!(pr.select(1_usize), PluralCategory::One); - assert_eq!(pr.select(5_usize), PluralCategory::Other); - assert_eq!(pr.select(11_usize), PluralCategory::One); -} +// assert_eq!(pr.select(1_usize), PluralCategory::One); +// assert_eq!(pr.select(5_usize), PluralCategory::Other); +// assert_eq!(pr.select(11_usize), PluralCategory::One); +// } diff --git a/provider/cldr/src/transform/plurals.rs b/provider/cldr/src/transform/plurals.rs index f17edacbeac..1c37e46d85e 100644 --- a/provider/cldr/src/transform/plurals.rs +++ b/provider/cldr/src/transform/plurals.rs @@ -6,10 +6,9 @@ use crate::error::Error; use crate::reader::open_reader; use crate::CldrPaths; use icu_plurals::provider::*; -use icu_plurals::rules::{parse, serialize}; +use icu_plurals::rules::runtime::ast::Rule; use icu_provider::iter::{IterableDataProviderCore, KeyedDataProvider}; use icu_provider::prelude::*; -use std::borrow::Cow; use std::convert::TryFrom; use std::marker::PhantomData; @@ -85,11 +84,11 @@ impl<'data> PluralsProvider<'data> { } } -impl<'data> DataProvider<'data, PluralRuleStringsV1Marker> for PluralsProvider<'data> { +impl<'data> DataProvider<'data, PluralRulesV1Marker> for PluralsProvider<'data> { fn load_payload( &self, req: &DataRequest, - ) -> Result, DataError> { + ) -> Result, DataError> { let cldr_rules = self.get_rules_for(&req.resource_path.key)?; // TODO: Implement language fallback? let langid = req.try_langid()?; @@ -101,13 +100,13 @@ impl<'data> DataProvider<'data, PluralRuleStringsV1Marker> for PluralsProvider<' metadata: DataResponseMetadata { data_langid: req.resource_path.options.langid.clone(), }, - payload: Some(DataPayload::from_owned(PluralRuleStringsV1::from(r))), + payload: Some(DataPayload::from_owned(PluralRulesV1::from(r))), }) } } icu_provider::impl_dyn_provider!(PluralsProvider<'data>, { - _ => PluralRuleStringsV1Marker, + _ => PluralRulesV1Marker, }, SERDE_SE, 'data); impl<'data> IterableDataProviderCore for PluralsProvider<'data> { @@ -130,17 +129,13 @@ impl<'data> IterableDataProviderCore for PluralsProvider<'data> { } } -impl From<&cldr_json::LocalePluralRules> for PluralRuleStringsV1<'static> { +impl From<&cldr_json::LocalePluralRules> for PluralRulesV1<'static> { fn from(other: &cldr_json::LocalePluralRules) -> Self { /// Removes samples from plural rule strings. Takes an owned [`String`] reference and /// returns a new [`String`] in a [`Cow::Owned`]. #[allow(clippy::ptr_arg)] - fn convert(s: &String) -> Cow<'static, str> { - let mut ast = parse(s.as_bytes()).expect("Rule parsing failed."); - ast.samples = None; - let mut result = String::with_capacity(s.len()); - serialize(&ast, &mut result).expect("Serialization failed."); - Cow::Owned(result) + fn convert(s: &String) -> Rule<'static> { + s.parse().expect("Rule parsing failed.") } Self { zero: other.zero.as_ref().map(convert), From 5fd0dbb81cdf60c4d3a9665fefc97c9a2f39cc70 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 28 Oct 2021 19:18:12 -0500 Subject: [PATCH 09/30] Fix tests --- components/plurals/src/lib.rs | 18 +++++++++--------- components/plurals/src/rules/runtime/ast.rs | 6 +++++- components/plurals/tests/categories.rs | 4 ++-- .../plurals/tests/fixtures/categories.json | 16 ++++++++-------- components/plurals/tests/fixtures/mod.rs | 2 +- components/plurals/tests/plurals.rs | 14 +++++++------- 6 files changed, 32 insertions(+), 28 deletions(-) diff --git a/components/plurals/src/lib.rs b/components/plurals/src/lib.rs index 99a6e95ac7b..0539436ad0b 100644 --- a/components/plurals/src/lib.rs +++ b/components/plurals/src/lib.rs @@ -211,12 +211,12 @@ impl PluralCategory { /// /// let mut categories = PluralCategory::all(); /// - /// assert_eq!(categories.next(), Some(&PluralCategory::Zero)); - /// assert_eq!(categories.next(), Some(&PluralCategory::One)); - /// assert_eq!(categories.next(), Some(&PluralCategory::Two)); - /// assert_eq!(categories.next(), Some(&PluralCategory::Few)); - /// assert_eq!(categories.next(), Some(&PluralCategory::Many)); - /// assert_eq!(categories.next(), Some(&PluralCategory::Other)); + /// assert_eq!(categories.next(), Some(PluralCategory::Zero)); + /// assert_eq!(categories.next(), Some(PluralCategory::One)); + /// assert_eq!(categories.next(), Some(PluralCategory::Two)); + /// assert_eq!(categories.next(), Some(PluralCategory::Few)); + /// assert_eq!(categories.next(), Some(PluralCategory::Many)); + /// assert_eq!(categories.next(), Some(PluralCategory::Other)); /// assert_eq!(categories.next(), None); /// ``` /// @@ -429,9 +429,9 @@ impl<'data> PluralRules<'data> { /// .expect("Failed to construct a PluralRules struct."); /// /// let mut categories = pr.categories(); - /// assert_eq!(categories.next(), Some(&PluralCategory::Many)); - /// assert_eq!(categories.next(), Some(&PluralCategory::One)); - /// assert_eq!(categories.next(), Some(&PluralCategory::Other)); + /// assert_eq!(categories.next(), Some(PluralCategory::One)); + /// assert_eq!(categories.next(), Some(PluralCategory::Many)); + /// assert_eq!(categories.next(), Some(PluralCategory::Other)); /// assert_eq!(categories.next(), None); /// ``` /// diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 44a6c763ab9..a1034f7059c 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -544,12 +544,16 @@ mod test { let fd = fixed_decimal::decimal::FixedDecimal::from(13); let operands = PluralOperands::from(&fd); - assert!(test_rule(&rule, &operands),); + assert!(!test_rule(&rule, &operands),); let fd = fixed_decimal::decimal::FixedDecimal::from(103); let operands = PluralOperands::from(&fd); assert!(test_rule(&rule, &operands),); + let fd = fixed_decimal::decimal::FixedDecimal::from(113); + let operands = PluralOperands::from(&fd); + assert!(!test_rule(&rule, &operands),); + let fd = fixed_decimal::decimal::FixedDecimal::from(178); let operands = PluralOperands::from(&fd); assert!(!test_rule(&rule, &operands),); diff --git a/components/plurals/tests/categories.rs b/components/plurals/tests/categories.rs index 8ac65f6a751..7bc1d882f4e 100644 --- a/components/plurals/tests/categories.rs +++ b/components/plurals/tests/categories.rs @@ -36,7 +36,7 @@ fn test_categories() { test.langid, test.plural_type, test.categories, - pr.categories().collect::>(), + pr.categories().collect::>(), ); for (expected, actual) in test.categories.iter().zip(pr.categories()) { @@ -51,7 +51,7 @@ fn test_categories() { test.langid, test.plural_type, test.categories, - pr.categories().collect::>(), + pr.categories().collect::>(), ); } } diff --git a/components/plurals/tests/fixtures/categories.json b/components/plurals/tests/fixtures/categories.json index 563d920dfa5..0934c9c10e5 100644 --- a/components/plurals/tests/fixtures/categories.json +++ b/components/plurals/tests/fixtures/categories.json @@ -3,12 +3,12 @@ "langid": "ar", "plural_type": "Cardinal", "categories": [ - "few", - "many", + "zero", "one", - "other", "two", - "zero" + "few", + "many", + "other" ] }, { @@ -30,18 +30,18 @@ "langid": "en", "plural_type": "Ordinal", "categories": [ - "few", "one", - "other", - "two" + "two", + "few", + "other" ] }, { "langid": "fr", "plural_type": "Cardinal", "categories": [ - "many", "one", + "many", "other" ] }, diff --git a/components/plurals/tests/fixtures/mod.rs b/components/plurals/tests/fixtures/mod.rs index f4dbb61c01c..537af1bb125 100644 --- a/components/plurals/tests/fixtures/mod.rs +++ b/components/plurals/tests/fixtures/mod.rs @@ -148,7 +148,7 @@ pub enum PluralCategoryInput { Other, } -impl PartialEq for PluralCategoryInput { +impl PartialEq for &PluralCategoryInput { fn eq(&self, other: &PluralCategory) -> bool { matches!( (self, other), diff --git a/components/plurals/tests/plurals.rs b/components/plurals/tests/plurals.rs index 3b1a99522bc..5508cb9cf2b 100644 --- a/components/plurals/tests/plurals.rs +++ b/components/plurals/tests/plurals.rs @@ -34,16 +34,16 @@ fn test_plural_rules_missing() { #[test] fn test_plural_category_all() { - let categories: Vec<&PluralCategory> = PluralCategory::all().collect(); + let categories: Vec = PluralCategory::all().collect(); assert_eq!(categories.len(), 6); - assert_eq!(categories[0], &PluralCategory::Few); - assert_eq!(categories[1], &PluralCategory::Many); - assert_eq!(categories[2], &PluralCategory::One); - assert_eq!(categories[3], &PluralCategory::Other); - assert_eq!(categories[4], &PluralCategory::Two); - assert_eq!(categories[5], &PluralCategory::Zero); + assert_eq!(categories[0], PluralCategory::Zero); + assert_eq!(categories[1], PluralCategory::One); + assert_eq!(categories[2], PluralCategory::Two); + assert_eq!(categories[3], PluralCategory::Few); + assert_eq!(categories[4], PluralCategory::Many); + assert_eq!(categories[5], PluralCategory::Other); } // #[test] From bad24b0cae3eecf9da872b9b8564a1eb11b6ca21 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Wed, 3 Nov 2021 15:57:05 -0600 Subject: [PATCH 10/30] Apply feedback --- components/plurals/src/lib.rs | 59 +++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/components/plurals/src/lib.rs b/components/plurals/src/lib.rs index 0539436ad0b..8639d5b956e 100644 --- a/components/plurals/src/lib.rs +++ b/components/plurals/src/lib.rs @@ -385,25 +385,21 @@ impl<'data> PluralRules<'data> { pub fn select>(&self, input: I) -> PluralCategory { let rules = self.rules.get(); let input = input.into(); - PluralCategory::all() - .find(|cat| { - let rule = match cat { - PluralCategory::Zero => &rules.zero, - PluralCategory::One => &rules.one, - PluralCategory::Two => &rules.two, - PluralCategory::Few => &rules.few, - PluralCategory::Many => &rules.many, - PluralCategory::Other => { - return true; - } - }; - if let Some(rule) = rule { - if test_rule(rule, &input) { - return true; - } - } - false - }) + + macro_rules! test_rule { + ($rule:ident, $cat:ident) => { + rules + .$rule + .as_ref() + .and_then(|r| test_rule(r, &input).then(|| PluralCategory::$cat)) + }; + } + + test_rule!(zero, Zero) + .or_else(|| test_rule!(one, One)) + .or_else(|| test_rule!(two, Two)) + .or_else(|| test_rule!(few, Few)) + .or_else(|| test_rule!(many, Many)) .unwrap_or(PluralCategory::Other) } @@ -438,14 +434,23 @@ impl<'data> PluralRules<'data> { /// [`Plural Categories`]: PluralCategory pub fn categories(&self) -> impl Iterator + '_ { let rules = self.rules.get(); - PluralCategory::all().filter(move |cat| match cat { - PluralCategory::Zero => rules.zero.is_some(), - PluralCategory::One => rules.one.is_some(), - PluralCategory::Two => rules.two.is_some(), - PluralCategory::Few => rules.few.is_some(), - PluralCategory::Many => rules.many.is_some(), - PluralCategory::Other => true, - }) + + macro_rules! test_rule { + ($rule:ident, $cat:ident) => { + rules + .$rule + .as_ref() + .map(|_| PluralCategory::$cat) + .into_iter() + }; + } + + test_rule!(zero, Zero) + .chain(test_rule!(one, One)) + .chain(test_rule!(two, Two)) + .chain(test_rule!(few, Few)) + .chain(test_rule!(many, Many)) + .chain(Some(PluralCategory::Other).into_iter()) } /// Lower-level constructor that allows constructing a [`PluralRules`] directly from From 562602bb2a894a156381f0403bc9996111448a1b Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 4 Nov 2021 16:19:19 -0600 Subject: [PATCH 11/30] Add RelationULE doc --- components/plurals/src/rules/runtime/ast.rs | 46 +++++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index a1034f7059c..8162956e930 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -1,4 +1,4 @@ -// This file is part of ICU4 and_or: todo!(), polarity: todo!(), operand: todo!(), modulo: todo!(), range_list: todo!() and_or: todo!(), polarity: todo!(), operand: todo!(), modulo: todo!(), range_list: todo!() X. For terms of use, please see the file +// This file is part of ICU4X. For terms of use, please see the file // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). @@ -44,7 +44,7 @@ pub enum Operand { #[derive(Clone, Copy, Debug, PartialEq)] pub enum RangeOrValue { - Range(u32, u32), // XXX: Can we get away from smaller? + Range(u32, u32), Value(u32), } @@ -257,8 +257,46 @@ impl fmt::Display for Rule<'_> { } } -///////////////// - +/// `RelationULE` is a type optimized for efficent storing and +/// deserialization of `rule::runtime::ast::Rule` `ZeroVec` model. +/// +/// The serialization model packages the rule into 4 bytes plus +/// variable length of the list of ranges. +/// +/// The first byte is used to store: +/// * One bit to store the state of AndOr. +/// * One bit to store the state of Polarity. +/// * Six bits to store the Operand. +/// +/// The following four bytes store the value of the modulo as `u32`. +/// +/// Finally, the remaining variable length is used to store a list +/// of `RangeOrValue` elements which have `[u32;2]` layout. +/// +/// # Diagram +/// +/// ```text +/// ┌───────────────┬───────────────┬───────────────┐ +/// │ u8 │ [u8;4] │ [u8] │ +/// ├─┬─┬─┬─┬─┬─┬─┬─┼───┬───┬───┬───┼───────────────┤ +/// ├─┼─┼─┴─┴─┴─┴─┴─┼───┴───┴───┴───┼───────────────┤ +/// │ │ │ Operand │ Modulo │ RangeOrValue │ +/// └─┴─┴───────────┴───────────────┴───────────────┘ +/// ▲ ▲ +/// │ │ +/// │ Polarity +/// │ +/// AndOr +/// ``` +/// +/// # Optimization +/// +/// This model is optimized for efficient packaging of the `Relation` elements +/// and performant deserialization from the `RelationULE` to `Relation` type. +/// +/// # Constraints +/// +/// The model constraints the `Operand` to 64 variants, and `Modulo` to `u32::MAX`. #[derive(Debug)] #[repr(packed)] pub struct RelationULE { From 1f5c9c327e88b09243beaad68cde8d261ebb3731 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Fri, 5 Nov 2021 00:42:13 -0600 Subject: [PATCH 12/30] Apply feedback --- Cargo.lock | 1 + components/plurals/Cargo.toml | 1 + components/plurals/src/lib.rs | 12 +- components/plurals/src/provider.rs | 2 +- components/plurals/src/rules/runtime/ast.rs | 141 ++++++++------------ components/plurals/tests/plurals.rs | 52 ++++---- 6 files changed, 88 insertions(+), 121 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 87b230ea0b2..32be453572c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1232,6 +1232,7 @@ dependencies = [ "icu_locid_macros", "icu_provider", "icu_testdata", + "num_enum", "serde", "serde_json", "zerovec", diff --git a/components/plurals/Cargo.toml b/components/plurals/Cargo.toml index 378d0131c3d..0a338d16319 100644 --- a/components/plurals/Cargo.toml +++ b/components/plurals/Cargo.toml @@ -38,6 +38,7 @@ icu_locid = { version = "0.4", path = "../locid" } serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true } displaydoc = { version = "0.2.3", default-features = false } zerovec = { version = "0.4", path = "../../utils/zerovec", features = ["yoke"] } +num_enum = { version = "0.5", default_features = false } [dev-dependencies] criterion = "0.3" diff --git a/components/plurals/src/lib.rs b/components/plurals/src/lib.rs index 8639d5b956e..0dbdae05f07 100644 --- a/components/plurals/src/lib.rs +++ b/components/plurals/src/lib.rs @@ -211,24 +211,24 @@ impl PluralCategory { /// /// let mut categories = PluralCategory::all(); /// - /// assert_eq!(categories.next(), Some(PluralCategory::Zero)); - /// assert_eq!(categories.next(), Some(PluralCategory::One)); - /// assert_eq!(categories.next(), Some(PluralCategory::Two)); /// assert_eq!(categories.next(), Some(PluralCategory::Few)); /// assert_eq!(categories.next(), Some(PluralCategory::Many)); + /// assert_eq!(categories.next(), Some(PluralCategory::One)); /// assert_eq!(categories.next(), Some(PluralCategory::Other)); + /// assert_eq!(categories.next(), Some(PluralCategory::Two)); + /// assert_eq!(categories.next(), Some(PluralCategory::Zero)); /// assert_eq!(categories.next(), None); /// ``` /// /// [`Plural Categories`]: PluralCategory pub fn all() -> impl ExactSizeIterator { [ - Self::Zero, - Self::One, - Self::Two, Self::Few, Self::Many, + Self::One, Self::Other, + Self::Two, + Self::Zero, ] .iter() .copied() diff --git a/components/plurals/src/provider.rs b/components/plurals/src/provider.rs index d016cd054c0..e528ad853fc 100644 --- a/components/plurals/src/provider.rs +++ b/components/plurals/src/provider.rs @@ -21,7 +21,7 @@ pub mod key { /// /// More information: #[icu_provider::data_struct] -#[derive(Default)] +#[derive(Default, Clone)] #[cfg_attr( feature = "provider_serde", derive(serde::Serialize, serde::Deserialize) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 8162956e930..b044a8ddc62 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -5,14 +5,19 @@ #![allow(missing_docs)] use crate::rules::reference; -use core::{convert::TryInto, fmt, mem, str::FromStr}; +use core::{ + convert::{TryFrom, TryInto}, + fmt, + str::FromStr, +}; use icu_provider::yoke::{self, *}; +use num_enum::{IntoPrimitive, TryFromPrimitive, UnsafeFromPrimitive}; use zerovec::{ - ule::{custom::EncodeAsVarULE, AsULE, PlainOldULE, VarULE, ULE}, + ule::{custom::EncodeAsVarULE, AsULE, PairULE, PlainOldULE, VarULE, ULE}, {VarZeroVec, ZeroVec}, }; -#[derive(Debug, Yokeable, ZeroCopyFrom)] +#[derive(Yokeable, ZeroCopyFrom, Clone)] pub struct Rule<'data>(pub VarZeroVec<'data, RelationULE>); #[derive(Copy, Clone, Eq, PartialEq, Debug)] @@ -29,7 +34,7 @@ pub enum Polarity { Positive, } -#[derive(Copy, Clone, Debug, PartialEq)] +#[derive(IntoPrimitive, UnsafeFromPrimitive, TryFromPrimitive, Copy, Clone, Debug, PartialEq)] #[repr(u8)] pub enum Operand { N, @@ -59,18 +64,6 @@ pub struct Relation<'data> { ///// -impl Operand { - fn idx(self) -> u8 { - self as u8 - } - - fn from_idx(idx: u8) -> Self { - debug_assert!(idx < 8); - // safe due to the repr(u8) - unsafe { mem::transmute(idx) } - } -} - impl From<&reference::ast::Rule> for Rule<'_> { fn from(input: &reference::ast::Rule) -> Self { let mut relations: alloc::vec::Vec = alloc::vec![]; @@ -297,7 +290,6 @@ impl fmt::Display for Rule<'_> { /// # Constraints /// /// The model constraints the `Operand` to 64 variants, and `Modulo` to `u32::MAX`. -#[derive(Debug)] #[repr(packed)] pub struct RelationULE { /// This maps to (AndOr, Polarity, Operand), @@ -312,7 +304,6 @@ pub struct RelationULE { impl RelationULE { pub fn as_relation(&self) -> Relation { - // safe to call because we are operating on a valid RelationULE let (and_or, polarity, operand) = unsafe { Self::decode_andor_polarity_operand(self.andor_polarity_operand) }; Relation { @@ -325,14 +316,18 @@ impl RelationULE { } fn encode_andor_polarity_operand(and_or: AndOr, polarity: Polarity, operand: Operand) -> u8 { - // XXX: Ensure and_or is one bit, polarity is one bit, and operand is max 6 bits + let encoded_operand = u8::from(operand); + debug_assert!(encoded_operand <= 0b0011_1111); (((and_or == AndOr::And) as u8) << 7) + (((polarity == Polarity::Positive) as u8) << 6) - + operand.idx() + + encoded_operand + } + + fn validate_andor_polarity_operand(encoded: u8) -> Result<(), &'static str> { + Operand::try_from(encoded & 0b0011_1111).map_err(|_| "Failed to decode operand.")?; + Ok(()) } - // Must be called on an `encoded` that is obtained from `encode_andor_polarity_operand`, i.e. - // the field on a valid RelationULE unsafe fn decode_andor_polarity_operand(encoded: u8) -> (AndOr, Polarity, Operand) { let and_or = if encoded & 0b1000_0000 != 0 { AndOr::And @@ -346,11 +341,16 @@ impl RelationULE { Polarity::Negative }; - let operand = Operand::from_idx(encoded & 0b0011_1111); + let operand = Operand::from_unchecked(encoded & 0b0011_1111); (and_or, polarity, operand) } } +// Safety (based on the safety checklist on the ULE trait): +// 1. RelationULE does not include any uninitialized or padding bytes. +// 2. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 3. The other ULE methods use the default impl. +// 4. FieldULE byte equality is semantic equality. unsafe impl VarULE for RelationULE { type Error = &'static str; @@ -358,7 +358,7 @@ unsafe impl VarULE for RelationULE { unsafe fn from_byte_slice_unchecked(bytes: &[u8]) -> &Self { let ptr = bytes.as_ptr(); let len = bytes.len(); - // subtract length of andor_polarity_operand and modulo and then convert between a slice of bytes and PlainOldULE<8> + // subtract length of andor_polarity_operand and modulo and then convert between a slice of bytes and RangeOrValueULE let len_new = (len - 5) / 8; // it's hard constructing custom DSTs, we fake a pointer/length construction // eventually we can use the Pointer::Metadata APIs when they stabilize @@ -368,20 +368,8 @@ unsafe impl VarULE for RelationULE { ret } - #[inline] - fn as_byte_slice(&self) -> &[u8] { - unsafe { - core::slice::from_raw_parts( - self as *const Self as *const u8, - core::mem::size_of_val(self), - ) - } - } - - // what it should do is attempt to parse the first 4 bytes as a u32::ULE (POU<4>), and the remaining bytes as a ZV fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { - let bits = bytes[0]; // XXX: Validate those bits - as ULE>::validate_byte_slice(&bytes[1..5]).map_err(|_| "foo")?; + RelationULE::validate_andor_polarity_operand(bytes[0])?; let remaining = &bytes[5..]; RangeOrValueULE::validate_byte_slice(remaining).map_err(|_| "foo")?; Ok(()) @@ -400,46 +388,23 @@ unsafe impl EncodeAsVarULE for Relation<'_> { } } -#[derive(Debug, Clone, Copy)] -#[repr(transparent)] -pub struct RangeOrValueULE(PlainOldULE<8>); - -unsafe impl ULE for RangeOrValueULE { - type Error = zerovec::ule::ULEError; - - fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { - PlainOldULE::<8>::validate_byte_slice(bytes) - } -} +type RangeOrValueULE = PairULE<::ULE, ::ULE>; impl AsULE for RangeOrValue { type ULE = RangeOrValueULE; #[inline] fn as_unaligned(self) -> Self::ULE { - let mut result = [0; 8]; match self { - Self::Range(start, end) => { - let start_bytes = start.to_be_bytes(); - let end_bytes = end.to_be_bytes(); - result[..4].copy_from_slice(&start_bytes); - result[4..].copy_from_slice(&end_bytes); - RangeOrValueULE(result.into()) - } - Self::Value(idx) => { - let bytes = idx.to_be_bytes(); - result[..4].copy_from_slice(&bytes); - result[4..].copy_from_slice(&bytes); - RangeOrValueULE(result.into()) - } + Self::Range(start, end) => PairULE(start.as_unaligned(), end.as_unaligned()), + Self::Value(idx) => PairULE(idx.as_unaligned(), idx.as_unaligned()), } } #[inline] fn from_unaligned(unaligned: Self::ULE) -> Self { - let b = unaligned.0 .0; - let start = u32::from_be_bytes([b[0], b[1], b[2], b[3]]); - let end = u32::from_be_bytes([b[4], b[5], b[6], b[7]]); + let start = u32::from_unaligned(unaligned.0); + let end = u32::from_unaligned(unaligned.1); if start == end { Self::Value(start) } else { @@ -576,41 +541,41 @@ mod test { let full_ast = parse(input.as_bytes()).unwrap(); let rule = Rule::from(&full_ast); - let fd = fixed_decimal::decimal::FixedDecimal::from(0); - let operands = PluralOperands::from(&fd); - assert!(test_rule(&rule, &operands),); + // let fd = fixed_decimal::decimal::FixedDecimal::from(0); + // let operands = PluralOperands::from(&fd); + // assert!(test_rule(&rule, &operands),); - let fd = fixed_decimal::decimal::FixedDecimal::from(13); - let operands = PluralOperands::from(&fd); - assert!(!test_rule(&rule, &operands),); + // let fd = fixed_decimal::decimal::FixedDecimal::from(13); + // let operands = PluralOperands::from(&fd); + // assert!(!test_rule(&rule, &operands),); - let fd = fixed_decimal::decimal::FixedDecimal::from(103); - let operands = PluralOperands::from(&fd); - assert!(test_rule(&rule, &operands),); + // let fd = fixed_decimal::decimal::FixedDecimal::from(103); + // let operands = PluralOperands::from(&fd); + // assert!(test_rule(&rule, &operands),); - let fd = fixed_decimal::decimal::FixedDecimal::from(113); - let operands = PluralOperands::from(&fd); - assert!(!test_rule(&rule, &operands),); + // let fd = fixed_decimal::decimal::FixedDecimal::from(113); + // let operands = PluralOperands::from(&fd); + // assert!(!test_rule(&rule, &operands),); - let fd = fixed_decimal::decimal::FixedDecimal::from(178); - let operands = PluralOperands::from(&fd); - assert!(!test_rule(&rule, &operands),); + // let fd = fixed_decimal::decimal::FixedDecimal::from(178); + // let operands = PluralOperands::from(&fd); + // assert!(!test_rule(&rule, &operands),); - let fd = fixed_decimal::decimal::FixedDecimal::from(0); - let operands = PluralOperands::from(&fd); - assert!(test_rule(&rule, &operands),); + // let fd = fixed_decimal::decimal::FixedDecimal::from(0); + // let operands = PluralOperands::from(&fd); + // assert!(test_rule(&rule, &operands),); } #[test] fn range_or_value_ule_test() { let rov = RangeOrValue::Value(1); - let ule = rov.as_unaligned().0; - let ref_bytes = &[0, 0, 0, 1, 0, 0, 0, 1]; + let ule = rov.as_unaligned(); + let ref_bytes = &[1, 0, 0, 0, 1, 0, 0, 0]; assert_eq!(ULE::as_byte_slice(&[ule]), *ref_bytes); let rov = RangeOrValue::Range(2, 4); - let ule = rov.as_unaligned().0; - let ref_bytes = &[0, 0, 0, 2, 0, 0, 0, 4]; + let ule = rov.as_unaligned(); + let ref_bytes = &[2, 0, 0, 0, 4, 0, 0, 0]; assert_eq!(ULE::as_byte_slice(&[ule]), *ref_bytes); } @@ -628,7 +593,7 @@ mod test { let vzv = VarZeroVec::from(relations.as_slice()); assert_eq!( vzv.get_encoded_slice(), - &[1, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1] + &[1, 0, 0, 0, 0, 0, 0, 0, 192, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0] ); } } diff --git a/components/plurals/tests/plurals.rs b/components/plurals/tests/plurals.rs index 5508cb9cf2b..216cfd9ed6d 100644 --- a/components/plurals/tests/plurals.rs +++ b/components/plurals/tests/plurals.rs @@ -38,33 +38,33 @@ fn test_plural_category_all() { assert_eq!(categories.len(), 6); - assert_eq!(categories[0], PluralCategory::Zero); - assert_eq!(categories[1], PluralCategory::One); - assert_eq!(categories[2], PluralCategory::Two); - assert_eq!(categories[3], PluralCategory::Few); - assert_eq!(categories[4], PluralCategory::Many); - assert_eq!(categories[5], PluralCategory::Other); + assert_eq!(categories[0], PluralCategory::Few); + assert_eq!(categories[1], PluralCategory::Many); + assert_eq!(categories[2], PluralCategory::One); + assert_eq!(categories[3], PluralCategory::Other); + assert_eq!(categories[4], PluralCategory::Two); + assert_eq!(categories[5], PluralCategory::Zero); } -// #[test] -// fn test_plural_rules_non_static_lifetime() { -// let local_string = "v = 0 and i % 10 = 1".to_string(); -// let local_data = PluralRulesV1 { -// zero: None, -// one: Some(local_string.parse().expect("Failed to parse plural rule")), -// two: None, -// few: None, -// many: None, -// }; -// let provider = StructProvider { -// key: provider::key::CARDINAL_V1, -// data: DataPayload::from_partial_owned(Rc::from(local_data)), -// }; +#[test] +fn test_plural_rules_non_static_lifetime() { + let local_string = "v = 0 and i % 10 = 1".to_string(); + let local_data = PluralRulesV1 { + zero: None, + one: Some(local_string.parse().expect("Failed to parse plural rule")), + two: None, + few: None, + many: None, + }; + let provider = StructProvider { + key: provider::key::CARDINAL_V1, + data: DataPayload::from_partial_owned(Rc::from(local_data)), + }; -// let lid = langid!("und"); -// let pr = PluralRules::try_new(lid, &provider, PluralRuleType::Cardinal).unwrap(); + let lid = langid!("und"); + let pr = PluralRules::try_new(lid, &provider, PluralRuleType::Cardinal).unwrap(); -// assert_eq!(pr.select(1_usize), PluralCategory::One); -// assert_eq!(pr.select(5_usize), PluralCategory::Other); -// assert_eq!(pr.select(11_usize), PluralCategory::One); -// } + assert_eq!(pr.select(1_usize), PluralCategory::One); + assert_eq!(pr.select(5_usize), PluralCategory::Other); + assert_eq!(pr.select(11_usize), PluralCategory::One); +} From 85bef5f9636706d05206f369fdf0aa728502dd52 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Fri, 5 Nov 2021 10:27:38 -0600 Subject: [PATCH 13/30] Add inlines and docs --- components/plurals/src/rules/runtime/ast.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index b044a8ddc62..7de4102d696 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -303,6 +303,7 @@ pub struct RelationULE { } impl RelationULE { + #[inline] pub fn as_relation(&self) -> Relation { let (and_or, polarity, operand) = unsafe { Self::decode_andor_polarity_operand(self.andor_polarity_operand) }; @@ -323,11 +324,13 @@ impl RelationULE { + encoded_operand } + #[inline] fn validate_andor_polarity_operand(encoded: u8) -> Result<(), &'static str> { Operand::try_from(encoded & 0b0011_1111).map_err(|_| "Failed to decode operand.")?; Ok(()) } + #[inline] unsafe fn decode_andor_polarity_operand(encoded: u8) -> (AndOr, Polarity, Operand) { let and_or = if encoded & 0b1000_0000 != 0 { AndOr::And @@ -350,7 +353,7 @@ impl RelationULE { // 1. RelationULE does not include any uninitialized or padding bytes. // 2. The impl of validate_byte_slice() returns an error if any byte is not valid. // 3. The other ULE methods use the default impl. -// 4. FieldULE byte equality is semantic equality. +// 4. RelationULE byte equality is semantic equality. unsafe impl VarULE for RelationULE { type Error = &'static str; @@ -368,10 +371,13 @@ unsafe impl VarULE for RelationULE { ret } + #[inline] fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { RelationULE::validate_andor_polarity_operand(bytes[0])?; + // Skip bytes 1-4 as they're always valid `u32` for `Modulo`. let remaining = &bytes[5..]; - RangeOrValueULE::validate_byte_slice(remaining).map_err(|_| "foo")?; + RangeOrValueULE::validate_byte_slice(remaining) + .map_err(|_| "Invalid list of RangeOrValueULE")?; Ok(()) } } From 2aca97a3fb17f9473ab81d21a3e51c5793635d2f Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Fri, 5 Nov 2021 16:20:58 -0600 Subject: [PATCH 14/30] Uncomment tests --- components/plurals/src/rules/runtime/ast.rs | 36 ++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 7de4102d696..2a164f1ff14 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -547,29 +547,29 @@ mod test { let full_ast = parse(input.as_bytes()).unwrap(); let rule = Rule::from(&full_ast); - // let fd = fixed_decimal::decimal::FixedDecimal::from(0); - // let operands = PluralOperands::from(&fd); - // assert!(test_rule(&rule, &operands),); + let fd = fixed_decimal::decimal::FixedDecimal::from(0); + let operands = PluralOperands::from(&fd); + assert!(test_rule(&rule, &operands),); - // let fd = fixed_decimal::decimal::FixedDecimal::from(13); - // let operands = PluralOperands::from(&fd); - // assert!(!test_rule(&rule, &operands),); + let fd = fixed_decimal::decimal::FixedDecimal::from(13); + let operands = PluralOperands::from(&fd); + assert!(!test_rule(&rule, &operands),); - // let fd = fixed_decimal::decimal::FixedDecimal::from(103); - // let operands = PluralOperands::from(&fd); - // assert!(test_rule(&rule, &operands),); + let fd = fixed_decimal::decimal::FixedDecimal::from(103); + let operands = PluralOperands::from(&fd); + assert!(test_rule(&rule, &operands),); - // let fd = fixed_decimal::decimal::FixedDecimal::from(113); - // let operands = PluralOperands::from(&fd); - // assert!(!test_rule(&rule, &operands),); + let fd = fixed_decimal::decimal::FixedDecimal::from(113); + let operands = PluralOperands::from(&fd); + assert!(!test_rule(&rule, &operands),); - // let fd = fixed_decimal::decimal::FixedDecimal::from(178); - // let operands = PluralOperands::from(&fd); - // assert!(!test_rule(&rule, &operands),); + let fd = fixed_decimal::decimal::FixedDecimal::from(178); + let operands = PluralOperands::from(&fd); + assert!(!test_rule(&rule, &operands),); - // let fd = fixed_decimal::decimal::FixedDecimal::from(0); - // let operands = PluralOperands::from(&fd); - // assert!(test_rule(&rule, &operands),); + let fd = fixed_decimal::decimal::FixedDecimal::from(0); + let operands = PluralOperands::from(&fd); + assert!(test_rule(&rule, &operands),); } #[test] From 02eba1ca57d0a2b4dfb3382a79c5468ec727e82a Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Mon, 8 Nov 2021 17:02:24 -0600 Subject: [PATCH 15/30] Apply reviewers feedback --- components/plurals/src/rules/runtime/ast.rs | 2 +- .../plurals/src/rules/runtime/resolver.rs | 12 ++++----- utils/zerovec/src/zerovec/mod.rs | 27 +------------------ 3 files changed, 7 insertions(+), 34 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 2a164f1ff14..bc81678aef1 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -312,7 +312,7 @@ impl RelationULE { polarity, operand, modulo: u32::from_unaligned(self.modulo), - range_list: ZeroVec::borrowed_from_slice(&self.range_list), + range_list: ZeroVec::Borrowed(&self.range_list), } } diff --git a/components/plurals/src/rules/runtime/resolver.rs b/components/plurals/src/rules/runtime/resolver.rs index a678a7827c1..c7dae24444d 100644 --- a/components/plurals/src/rules/runtime/resolver.rs +++ b/components/plurals/src/rules/runtime/resolver.rs @@ -5,18 +5,16 @@ use crate::operands::PluralOperands; use crate::rules::runtime::ast; +#[inline] pub fn test_rule(rule: &ast::Rule, operands: &PluralOperands) -> bool { let mut left = true; for relation in rule.0.iter() { let relation = relation.as_relation(); - if left { - if relation.and_or == ast::AndOr::Or { - return true; - } else { - left = test_relation(&relation, operands); - } - } else if relation.and_or == ast::AndOr::Or { + if left && relation.and_or == ast::AndOr::Or { + return true; + } + if left || relation.and_or == ast::AndOr::Or { left = test_relation(&relation, operands); } } diff --git a/utils/zerovec/src/zerovec/mod.rs b/utils/zerovec/src/zerovec/mod.rs index bd7e9c14afe..b80c733cd34 100644 --- a/utils/zerovec/src/zerovec/mod.rs +++ b/utils/zerovec/src/zerovec/mod.rs @@ -418,7 +418,7 @@ impl<'a> ZeroVec<'a, u8> { } } -impl<'a, T> ZeroVec<'a, T> +impl ZeroVec<'_, T> where T: AsULE, { @@ -445,31 +445,6 @@ where Self::Owned(other.iter().copied().map(T::as_unaligned).collect()) } - /// Creates a `ZeroVec` from a `&[T::ULE]` by borrowing from it. - /// - /// This function results in a `Borrowed` instance of `ZeroVec`. - /// - /// # Example - /// - /// ``` - /// use zerovec::ZeroVec; - /// use zerovec::ule::*; - /// - /// // The little-endian bytes correspond to the numbers on the following line. - /// let bytes: &[u8] = &[0xD3, 0x00, 0x19, 0x01, 0xA5, 0x01, 0xCD, 0x01]; - /// let nums: &[PlainOldULE<2>] = &[211_u16.as_unaligned(), 281_u16.as_unaligned(), - /// 421_u16.as_unaligned(), 461_u16.as_unaligned()]; - /// - /// let zerovec = ZeroVec::::borrowed_from_slice(nums); - /// - /// assert!(matches!(zerovec, ZeroVec::Borrowed(_))); - /// assert_eq!(bytes, zerovec.as_bytes()); - /// ``` - #[inline] - pub fn borrowed_from_slice(other: &'a [T::ULE]) -> Self { - Self::Borrowed(other) - } - /// Creates a `Vec` from a `ZeroVec`. /// /// # Example From 55f1f30e918cc6c0063078d94964a78af8512fa7 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Tue, 9 Nov 2021 18:38:24 -0600 Subject: [PATCH 16/30] Fix tests and apply feedback --- components/plurals/src/lib.rs | 4 +- components/plurals/src/rules/mod.rs | 13 +++++- components/plurals/src/rules/reference/mod.rs | 2 + components/plurals/src/rules/runtime/ast.rs | 3 ++ components/plurals/src/rules/runtime/mod.rs | 1 - .../plurals/src/rules/runtime/resolver.rs | 21 ++++++++++ components/plurals/tests/plurals.rs | 1 - ffi/diplomat/src/pluralrules.rs | 40 +++++++++++++++---- ffi/ecma402/src/pluralrules.rs | 14 +++---- provider/fs/benches/provider_fs.rs | 2 +- 10 files changed, 79 insertions(+), 22 deletions(-) diff --git a/components/plurals/src/lib.rs b/components/plurals/src/lib.rs index 0dbdae05f07..cf5d11330e4 100644 --- a/components/plurals/src/lib.rs +++ b/components/plurals/src/lib.rs @@ -65,12 +65,10 @@ //! [`Language Plural Rules`]: https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules //! [`CLDR`]: http://cldr.unicode.org/ -#![warn(missing_docs)] #![cfg_attr(not(any(test, feature = "std")), no_std)] extern crate alloc; -// mod data; mod error; mod operands; pub mod provider; @@ -303,7 +301,7 @@ impl<'data> PluralRules<'data> { rule_type: PluralRuleType, ) -> Result where - D: DataProvider<'data, PluralRulesV1Marker>, + D: DataProvider<'data, PluralRulesV1Marker> + ?Sized, { let locale = locale.into(); let key = match rule_type { diff --git a/components/plurals/src/rules/mod.rs b/components/plurals/src/rules/mod.rs index 0b1e4833d78..92a69ee68ba 100644 --- a/components/plurals/src/rules/mod.rs +++ b/components/plurals/src/rules/mod.rs @@ -131,6 +131,17 @@ //! //! For example, in Russian [`PluralCategory::One`] matches numbers such as `11`, `21`, `121` etc. //! +//! # Runtime vs. Resolver Rules +//! +//! `ICU4X` provides two sets of rules AST and APIs to manage it: +//! * `reference` is the canonical implementation of the specification intended for +//! tooling and testing to use. +//! This module provides APIs for parsing, editing and serialization +//! of the rules. +//! * `runtime` is a non-public, non-mutable runtime version optimized for +//! performance and low memory overhead. This version provides +//! runtime resolver used by the `PluralRules` itself. +//! //! [`PluralCategory`]: super::PluralCategory //! [`PluralCategories`]: super::PluralCategory //! [`PluralCategory::One`]: super::PluralCategory::One @@ -145,4 +156,4 @@ //! [`Sample`]: super::rules::ast::Samples //! [`AST`]: super::rules::ast pub mod reference; -pub mod runtime; +pub(crate) mod runtime; diff --git a/components/plurals/src/rules/reference/mod.rs b/components/plurals/src/rules/reference/mod.rs index 3c56cba53a7..ce71c385b31 100644 --- a/components/plurals/src/rules/reference/mod.rs +++ b/components/plurals/src/rules/reference/mod.rs @@ -2,6 +2,8 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). +//! Reference version of the Plural Rules parser, AST and serializer. + pub mod ast; pub(crate) mod lexer; pub(crate) mod parser; diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index bc81678aef1..ff9bcfa6880 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -375,6 +375,9 @@ unsafe impl VarULE for RelationULE { fn validate_byte_slice(bytes: &[u8]) -> Result<(), Self::Error> { RelationULE::validate_andor_polarity_operand(bytes[0])?; // Skip bytes 1-4 as they're always valid `u32` for `Modulo`. + if bytes.len() < 5 { + return Err("byte slice is too short"); + } let remaining = &bytes[5..]; RangeOrValueULE::validate_byte_slice(remaining) .map_err(|_| "Invalid list of RangeOrValueULE")?; diff --git a/components/plurals/src/rules/runtime/mod.rs b/components/plurals/src/rules/runtime/mod.rs index b1b6c7669b0..10500ceb1bc 100644 --- a/components/plurals/src/rules/runtime/mod.rs +++ b/components/plurals/src/rules/runtime/mod.rs @@ -2,7 +2,6 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -#![allow(missing_docs)] pub mod ast; pub(crate) mod resolver; diff --git a/components/plurals/src/rules/runtime/resolver.rs b/components/plurals/src/rules/runtime/resolver.rs index c7dae24444d..50e89d3bd69 100644 --- a/components/plurals/src/rules/runtime/resolver.rs +++ b/components/plurals/src/rules/runtime/resolver.rs @@ -7,6 +7,27 @@ use crate::rules::runtime::ast; #[inline] pub fn test_rule(rule: &ast::Rule, operands: &PluralOperands) -> bool { + // This algorithm is a simple non-recursive interpreter of the + // [`UTS #35: Language Plural Rules`]. + // + // The algorithm exploits the fact that plural rules syntax is a simple + // logical operator expression composition with maximum depth of one + // level of `OR` expression. + // + // That means that any `AND` expression accumulates to a single boolean + // result which either results in a test passing, or the next set + // of `AND` relations is evaluated after the `OR`. + // + // To achieve that, the algorithm traverses the relations from left to right + // collecting all matching relations into a temporary `left` variable for + // as long as they are followed by the `AND` operator. + // + // If any relation fails to match, the `left` variable becomes `false` and the + // interpreter skips to the first `OR` operator, rejects the left side, and + // evaluates the right as a candidate and so on. + // + // [`UTS #35: Language Plural Rules`]: https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules + let mut left = true; for relation in rule.0.iter() { diff --git a/components/plurals/tests/plurals.rs b/components/plurals/tests/plurals.rs index 216cfd9ed6d..e6714d8ddd1 100644 --- a/components/plurals/tests/plurals.rs +++ b/components/plurals/tests/plurals.rs @@ -7,7 +7,6 @@ use icu_plurals::provider::{self, PluralRulesV1}; use icu_plurals::{PluralCategory, PluralRuleType, PluralRules}; use icu_provider::prelude::*; use icu_provider::struct_provider::StructProvider; -use std::borrow::Cow; use std::rc::Rc; #[test] diff --git a/ffi/diplomat/src/pluralrules.rs b/ffi/diplomat/src/pluralrules.rs index f6acdf9c9a9..8c86195c34f 100644 --- a/ffi/diplomat/src/pluralrules.rs +++ b/ffi/diplomat/src/pluralrules.rs @@ -8,9 +8,15 @@ pub mod ffi { use alloc::boxed::Box; - use icu_plurals::{PluralCategory, PluralOperands, PluralRuleType, PluralRules}; + use icu_plurals::{ + provider::PluralRulesV1Marker, PluralCategory, PluralOperands, PluralRuleType, PluralRules, + }; + use icu_provider::prelude::DataProvider; - use crate::{locale::ffi::ICU4XLocale, provider::ffi::ICU4XDataProvider}; + use crate::{ + locale::ffi::ICU4XLocale, + provider::ffi::{ICU4XDataProvider, ICU4XStaticDataProvider}, + }; pub struct ICU4XCreatePluralRulesResult { pub rules: Option>, @@ -38,22 +44,40 @@ pub mod ffi { /// FFI version of `PluralRules`. /// See [the Rust docs](https://unicode-org.github.io/icu4x-docs/doc/icu_plurals/struct.PluralRules.html) for more details. #[diplomat::opaque] - pub struct ICU4XPluralRules(PluralRules); + pub struct ICU4XPluralRules(PluralRules<'static>); impl ICU4XPluralRules { /// FFI version of `PluralRules::try_new()`. /// See [the Rust docs](https://unicode-org.github.io/icu4x-docs/doc/icu_plurals/struct.PluralRules.html#method.try_new) for more details. - pub fn create( + pub fn try_new( locale: &ICU4XLocale, provider: &ICU4XDataProvider, ty: ICU4XPluralRuleType, ) -> ICU4XCreatePluralRulesResult { - let langid = locale.0.as_ref().clone(); - let provider = - <&dyn icu_provider::serde::SerdeDeDataProvider>::clone(&provider.0.as_ref()); + let provider = provider.0.as_ref(); + Self::try_new_impl(locale, provider, ty) + } + + /// Creates a new [`ICU4XPluralRules`] from a [`ICU4XStaticDataProvider`]. + pub fn try_new_from_static( + locale: &ICU4XLocale, + provider: &ICU4XStaticDataProvider, + ty: ICU4XPluralRuleType, + ) -> ICU4XCreatePluralRulesResult { + let provider = provider.0.as_ref(); + Self::try_new_impl(locale, provider, ty) + } + fn try_new_impl( + locale: &ICU4XLocale, + provider: &D, + ty: ICU4XPluralRuleType, + ) -> ICU4XCreatePluralRulesResult + where + D: DataProvider<'static, PluralRulesV1Marker> + ?Sized, + { PluralRules::try_new( - langid, + locale.0.as_ref().clone(), provider, match ty { ICU4XPluralRuleType::Cardinal => PluralRuleType::Cardinal, diff --git a/ffi/ecma402/src/pluralrules.rs b/ffi/ecma402/src/pluralrules.rs index 93c54783488..d39e58eba0e 100644 --- a/ffi/ecma402/src/pluralrules.rs +++ b/ffi/ecma402/src/pluralrules.rs @@ -225,12 +225,12 @@ pub(crate) mod internal { } } -pub struct PluralRules { +pub struct PluralRules<'data> { opts: ecma402_traits::pluralrules::Options, - rep: ipr::PluralRules, + rep: ipr::PluralRules<'data>, } -impl ecma402_traits::pluralrules::PluralRules for PluralRules { +impl<'data> ecma402_traits::pluralrules::PluralRules for PluralRules<'data> { type Error = PluralRulesError; fn try_new(l: L, opts: ecma402_traits::pluralrules::Options) -> Result @@ -253,16 +253,16 @@ impl ecma402_traits::pluralrules::PluralRules for PluralRules { } } -impl PluralRules { +impl<'data> PluralRules<'data> { /// Creates a new [`PluralRules`], using the specified data provider. - pub fn try_new_with_provider<'data, L, P>( + pub fn try_new_with_provider( l: L, opts: ecma402_traits::pluralrules::Options, provider: &P, ) -> Result where L: ecma402_traits::Locale, - P: icu_provider::DataProvider<'data, ipr::provider::PluralRuleStringsV1Marker>, + P: icu_provider::DataProvider<'data, ipr::provider::PluralRulesV1Marker>, Self: Sized, { let locale: String = format!("{}", l); @@ -272,7 +272,7 @@ impl PluralRules { let rule_type = internal::to_icu4x_type(&opts.in_type); // Oops, there is no slot in the ECMA 402 APIs to add the data provider. What to do? - let rep = ipr::PluralRules::try_new(locale.into(), provider, rule_type)?; + let rep = ipr::PluralRules::try_new(locale, provider, rule_type)?; Ok(Self { opts, rep }) } } diff --git a/provider/fs/benches/provider_fs.rs b/provider/fs/benches/provider_fs.rs index fae8f263485..4e24266e7f7 100644 --- a/provider/fs/benches/provider_fs.rs +++ b/provider/fs/benches/provider_fs.rs @@ -17,7 +17,7 @@ fn overview_bench(c: &mut Criterion) { b.iter(|| { let provider = FsDataProvider::try_new("./tests/testdata/json") .expect("Loading file from testdata directory"); - let _: DataPayload = black_box(&provider) + let _: DataPayload = black_box(&provider) .load_payload(&DataRequest { resource_path: ResourcePath { key: key::CARDINAL_V1, From c6925e86f88e957d3f14fb89ba1e350ff92486b0 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Tue, 9 Nov 2021 18:43:34 -0600 Subject: [PATCH 17/30] Revise postcard file --- components/plurals/src/rules/mod.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/components/plurals/src/rules/mod.rs b/components/plurals/src/rules/mod.rs index 92a69ee68ba..cbf08e9ca57 100644 --- a/components/plurals/src/rules/mod.rs +++ b/components/plurals/src/rules/mod.rs @@ -156,4 +156,8 @@ //! [`Sample`]: super::rules::ast::Samples //! [`AST`]: super::rules::ast pub mod reference; -pub(crate) mod runtime; + +// Need to expose it for `icu::provider_cldr` use, but we don't +// have a reason to make it fully public, so hiding docs for now. +#[doc(hidden)] +pub mod runtime; From af0a5bf58982db9c09310a9ea2c357c5849e4ae1 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Tue, 9 Nov 2021 19:00:56 -0600 Subject: [PATCH 18/30] Fix provider benchmarks --- provider/fs/benches/provider_fs.rs | 8 ++++---- .../bincode/plurals/cardinal@1/sr.bincode | Bin 175 -> 199 bytes 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/provider/fs/benches/provider_fs.rs b/provider/fs/benches/provider_fs.rs index 4e24266e7f7..ab5d8fb6a1c 100644 --- a/provider/fs/benches/provider_fs.rs +++ b/provider/fs/benches/provider_fs.rs @@ -50,7 +50,7 @@ fn json_bench(c: &mut Criterion) { c.bench_function("json/generic", |b| { b.iter(|| { - let _: DataPayload = black_box(&provider) + let _: DataPayload = black_box(&provider) .load_payload(&DataRequest { resource_path: ResourcePath { key: key::CARDINAL_V1, @@ -68,7 +68,7 @@ fn json_bench(c: &mut Criterion) { c.bench_function("json/erased_serde", |b| { b.iter(|| { - let _: DataPayload = + let _: DataPayload = black_box(&provider as &dyn SerdeDeDataProvider) .load_payload(&DataRequest { resource_path: ResourcePath { @@ -93,7 +93,7 @@ fn bincode_bench(c: &mut Criterion) { c.bench_function("bincode/generic", |b| { b.iter(|| { - let _: DataPayload = black_box(&provider) + let _: DataPayload = black_box(&provider) .load_payload(&DataRequest { resource_path: ResourcePath { key: key::CARDINAL_V1, @@ -111,7 +111,7 @@ fn bincode_bench(c: &mut Criterion) { c.bench_function("bincode/erased_serde", |b| { b.iter(|| { - let _: DataPayload = + let _: DataPayload = black_box(&provider as &dyn SerdeDeDataProvider) .load_payload(&DataRequest { resource_path: ResourcePath { diff --git a/provider/fs/tests/testdata/bincode/plurals/cardinal@1/sr.bincode b/provider/fs/tests/testdata/bincode/plurals/cardinal@1/sr.bincode index 8544889282b82fc23829a77e2fe424d06bf03dd7..428f98e9e69600dc9a4bce0478022f200a42222f 100644 GIT binary patch literal 199 zcmZQzjAVcSRw&I2q@{pZ9f(bU_z)1o!9gw{lM#v=Q-BO^D0V@XX+f4@Aj1kKps6fC V47P#?$lwEFuoW;FuoW;F1^}5?34;It literal 175 zcmZQzbYp;kG6h=&1BJxA6opI$RRu!>AlDFyYoMSA6fjiCFH%TDm4|Sl@(he2xXm-t W(=)*^5-4bBf?_OO8M4V>Wefl=Fd<<8 From 53bdb26fb3a6131b5e6afeeac91649acecd0003c Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Tue, 9 Nov 2021 19:40:09 -0600 Subject: [PATCH 19/30] Fix more tests --- components/plurals/src/provider.rs | 2 +- components/plurals/src/rules/runtime/ast.rs | 3 +- provider/fs/tests/test_file_io.rs | 40 ++++++++++----------- 3 files changed, 21 insertions(+), 24 deletions(-) diff --git a/components/plurals/src/provider.rs b/components/plurals/src/provider.rs index e528ad853fc..861ede46c87 100644 --- a/components/plurals/src/provider.rs +++ b/components/plurals/src/provider.rs @@ -21,7 +21,7 @@ pub mod key { /// /// More information: #[icu_provider::data_struct] -#[derive(Default, Clone)] +#[derive(Default, Clone, PartialEq)] #[cfg_attr( feature = "provider_serde", derive(serde::Serialize, serde::Deserialize) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index ff9bcfa6880..143dde63531 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -17,7 +17,7 @@ use zerovec::{ {VarZeroVec, ZeroVec}, }; -#[derive(Yokeable, ZeroCopyFrom, Clone)] +#[derive(Yokeable, ZeroCopyFrom, Clone, PartialEq, Debug)] pub struct Rule<'data>(pub VarZeroVec<'data, RelationULE>); #[derive(Copy, Clone, Eq, PartialEq, Debug)] @@ -290,6 +290,7 @@ impl fmt::Display for Rule<'_> { /// # Constraints /// /// The model constraints the `Operand` to 64 variants, and `Modulo` to `u32::MAX`. +#[derive(Debug, PartialEq)] #[repr(packed)] pub struct RelationULE { /// This maps to (AndOr, Polarity, Operand), diff --git a/provider/fs/tests/test_file_io.rs b/provider/fs/tests/test_file_io.rs index 27e5dd8b9dd..6353aa39650 100644 --- a/provider/fs/tests/test_file_io.rs +++ b/provider/fs/tests/test_file_io.rs @@ -13,28 +13,26 @@ use icu_provider_fs::FsDataProvider; use std::borrow::Cow; #[cfg(feature = "provider_json")] -const EXPECTED_RU_DATA: PluralRuleStringsV1 = PluralRuleStringsV1 { +const EXPECTED_RU_DATA: PluralRulesV1 = PluralRulesV1 { zero: None, - one: Some(Cow::Borrowed("v = 0 and i % 10 = 1 and i % 100 != 11")), + one: "v = 0 and i % 10 = 1 and i % 100 != 11".parse().ok(), two: None, - few: Some(Cow::Borrowed( - "v = 0 and i % 10 = 2..4 and i % 100 != 12..14", - )), - many: Some(Cow::Borrowed( - "v = 0 and i % 10 = 0 or v = 0 and i % 10 = 5..9 or v = 0 and i % 100 = 11..14", - )), + few: "v = 0 and i % 10 = 2..4 and i % 100 != 12..14".parse().ok(), + many: "v = 0 and i % 10 = 0 or v = 0 and i % 10 = 5..9 or v = 0 and i % 100 = 11..14" + .parse() + .ok(), }; #[cfg(feature = "provider_bincode")] -const EXPECTED_SR_DATA: PluralRuleStringsV1 = PluralRuleStringsV1 { +const EXPECTED_SR_DATA: PluralRulesV1 = PluralRulesV1 { zero: None, - one: Some(Cow::Borrowed( - "v = 0 and i % 10 = 1 and i % 100 != 11 or f % 10 = 1 and f % 100 != 11", - )), + one: "v = 0 and i % 10 = 1 and i % 100 != 11 or f % 10 = 1 and f % 100 != 11" + .parse() + .ok(), two: None, - few: Some(Cow::Borrowed( - "v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14", - )), + few: "v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14" + .parse() + .ok(), many: None, }; @@ -63,7 +61,7 @@ fn test_json() { let provider = FsDataProvider::try_new("./tests/testdata/json") .expect("Loading file from testdata directory"); - let plurals_data: DataPayload = provider + let plurals_data: DataPayload = provider .load_payload(&get_request(langid!("ru"))) .expect("The data should be valid") .take_payload() @@ -77,8 +75,7 @@ fn test_json_dyn_erased_serde() { let provider = FsDataProvider::try_new("./tests/testdata/json") .expect("Loading file from testdata directory"); - let plurals_data: DataPayload = (&provider - as &dyn SerdeDeDataProvider) + let plurals_data: DataPayload = (&provider as &dyn SerdeDeDataProvider) .load_payload(&get_request(langid!("ru"))) .expect("The data should be valid") .take_payload() @@ -92,7 +89,7 @@ fn test_json_errors() { let provider = FsDataProvider::try_new("./tests/testdata/json") .expect("Loading file from testdata directory"); - type Provider<'data> = dyn DataProvider<'data, PluralRuleStringsV1Marker>; + type Provider<'data> = dyn DataProvider<'data, PluralRulesV1Marker>; assert!(matches!( Provider::load_payload( @@ -165,7 +162,7 @@ fn test_bincode() { let provider = FsDataProvider::try_new("./tests/testdata/bincode") .expect("Loading file from testdata directory"); - let plurals_data: DataPayload = provider + let plurals_data: DataPayload = provider .load_payload(&get_request(langid!("sr"))) .expect("The data should be valid") .take_payload() @@ -179,8 +176,7 @@ fn test_bincode_dyn_erased_serde() { let provider = FsDataProvider::try_new("./tests/testdata/bincode") .expect("Loading file from testdata directory"); - let plurals_data: DataPayload = (&provider - as &dyn SerdeDeDataProvider) + let plurals_data: DataPayload = (&provider as &dyn SerdeDeDataProvider) .load_payload(&get_request(langid!("sr"))) .expect("The data should be valid") .take_payload() From ac11dae5238dde6b516fa61d9ef5135a8639701d Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 09:40:41 -0600 Subject: [PATCH 20/30] Add rountrip test --- components/plurals/src/rules/runtime/ast.rs | 29 ++++++++++++++++----- 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index 143dde63531..c875e4170d4 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -498,15 +498,16 @@ mod serde { #[cfg(test)] mod test { use super::*; - use crate::rules::reference::ast; - use crate::rules::reference::parse; + use crate::rules::reference; use crate::rules::runtime::test_rule; use crate::PluralOperands; #[test] fn simple_rule_test() { + use reference::ast; + let input = "i = 1"; - let full_ast = parse(input.as_bytes()).unwrap(); + let full_ast = reference::parse(input.as_bytes()).unwrap(); assert_eq!( full_ast, ast::Rule { @@ -547,9 +548,9 @@ mod test { #[test] fn complex_rule_test() { - let input = "n % 10 = 3..4,9 and n % 100 != 10..19,70..79,90..99 or n = 0"; - let full_ast = parse(input.as_bytes()).unwrap(); - let rule = Rule::from(&full_ast); + let input = "n % 10 = 3..4, 9 and n % 100 != 10..19, 70..79, 90..99 or n = 0"; + let ref_rule = reference::parse(input.as_bytes()).unwrap(); + let rule = Rule::from(&ref_rule); let fd = fixed_decimal::decimal::FixedDecimal::from(0); let operands = PluralOperands::from(&fd); @@ -576,6 +577,22 @@ mod test { assert!(test_rule(&rule, &operands),); } + #[test] + fn complex_rule_ule_roundtrip_test() { + let input = "n % 10 = 3..4, 9 and n % 100 != 10..19, 70..79, 90..99 or n = 0"; + + let ref_rule = reference::parse(input.as_bytes()).unwrap(); + + // Create a ZVZ backed Rule from the reference one. + let rule = Rule::from(&ref_rule); + + // Convert it back to reference Rule and compare. + assert_eq!(ref_rule, reference::ast::Rule::from(&rule)); + + // Verify that the stringified output matches the input. + assert_eq!(input, rule.to_string(),); + } + #[test] fn range_or_value_ule_test() { let rov = RangeOrValue::Value(1); From 4344425445d14c405e45f5d84b8d2e4546d56d1a Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 09:55:17 -0600 Subject: [PATCH 21/30] Fix another test --- components/plurals/src/provider.rs | 2 +- provider/fs/tests/test_file_io.rs | 56 ++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/components/plurals/src/provider.rs b/components/plurals/src/provider.rs index 861ede46c87..60dfea887e4 100644 --- a/components/plurals/src/provider.rs +++ b/components/plurals/src/provider.rs @@ -21,7 +21,7 @@ pub mod key { /// /// More information: #[icu_provider::data_struct] -#[derive(Default, Clone, PartialEq)] +#[derive(Default, Clone, PartialEq, Debug)] #[cfg_attr( feature = "provider_serde", derive(serde::Serialize, serde::Deserialize) diff --git a/provider/fs/tests/test_file_io.rs b/provider/fs/tests/test_file_io.rs index 6353aa39650..3f2aeaa035e 100644 --- a/provider/fs/tests/test_file_io.rs +++ b/provider/fs/tests/test_file_io.rs @@ -6,33 +6,53 @@ use icu_locid::LanguageIdentifier; use icu_locid_macros::langid; -use icu_plurals::provider::*; +use icu_plurals::{provider::*, rules::runtime::ast::Rule}; use icu_provider::prelude::*; use icu_provider::serde::*; use icu_provider_fs::FsDataProvider; use std::borrow::Cow; +#[derive(Debug, PartialEq)] +struct PluralRulesTestData { + zero: Option<&'static str>, + one: Option<&'static str>, + two: Option<&'static str>, + few: Option<&'static str>, + many: Option<&'static str>, +} + +impl From<&PluralRulesTestData> for PluralRulesV1<'_> { + fn from(i: &PluralRulesTestData) -> Self { + fn parse(i: &'static str) -> Rule { + i.parse().expect("Failed to parse rule") + } + Self { + zero: i.zero.map(parse), + one: i.one.map(parse), + two: i.two.map(parse), + few: i.few.map(parse), + many: i.many.map(parse), + } + } +} + #[cfg(feature = "provider_json")] -const EXPECTED_RU_DATA: PluralRulesV1 = PluralRulesV1 { +const EXPECTED_RU_DATA: PluralRulesTestData = PluralRulesTestData { zero: None, - one: "v = 0 and i % 10 = 1 and i % 100 != 11".parse().ok(), + one: Some("v = 0 and i % 10 = 1 and i % 100 != 11"), two: None, - few: "v = 0 and i % 10 = 2..4 and i % 100 != 12..14".parse().ok(), - many: "v = 0 and i % 10 = 0 or v = 0 and i % 10 = 5..9 or v = 0 and i % 100 = 11..14" - .parse() - .ok(), + few: Some("v = 0 and i % 10 = 2..4 and i % 100 != 12..14"), + many: Some("v = 0 and i % 10 = 0 or v = 0 and i % 10 = 5..9 or v = 0 and i % 100 = 11..14"), }; #[cfg(feature = "provider_bincode")] -const EXPECTED_SR_DATA: PluralRulesV1 = PluralRulesV1 { +const EXPECTED_SR_DATA: PluralRulesTestData = PluralRulesTestData { zero: None, - one: "v = 0 and i % 10 = 1 and i % 100 != 11 or f % 10 = 1 and f % 100 != 11" - .parse() - .ok(), + one: Some("v = 0 and i % 10 = 1 and i % 100 != 11 or f % 10 = 1 and f % 100 != 11"), two: None, - few: "v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14" - .parse() - .ok(), + few: Some( + "v = 0 and i % 10 = 2..4 and i % 100 != 12..14 or f % 10 = 2..4 and f % 100 != 12..14", + ), many: None, }; @@ -66,7 +86,7 @@ fn test_json() { .expect("The data should be valid") .take_payload() .expect("The data should be present"); - assert_eq!(plurals_data.get(), &EXPECTED_RU_DATA); + assert_eq!(plurals_data.get(), &PluralRulesV1::from(&EXPECTED_RU_DATA)); } #[cfg(feature = "provider_json")] @@ -80,7 +100,7 @@ fn test_json_dyn_erased_serde() { .expect("The data should be valid") .take_payload() .expect("The data should be present"); - assert_eq!(plurals_data.get(), &EXPECTED_RU_DATA); + assert_eq!(plurals_data.get(), &PluralRulesV1::from(&EXPECTED_RU_DATA)); } #[cfg(feature = "provider_json")] @@ -167,7 +187,7 @@ fn test_bincode() { .expect("The data should be valid") .take_payload() .expect("The data should be present"); - assert_eq!(plurals_data.get(), &EXPECTED_SR_DATA); + assert_eq!(plurals_data.get(), &PluralRulesV1::from(&EXPECTED_SR_DATA)); } #[test] @@ -181,5 +201,5 @@ fn test_bincode_dyn_erased_serde() { .expect("The data should be valid") .take_payload() .expect("The data should be present"); - assert_eq!(plurals_data.get(), &EXPECTED_SR_DATA); + assert_eq!(plurals_data.get(), &PluralRulesV1::from(&EXPECTED_SR_DATA)); } From d3ac05c75bf33ce4b92c17e6d4faf7b73323b650 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 10:05:46 -0600 Subject: [PATCH 22/30] Fix more tests --- components/datetime/tests/datetime.rs | 6 +++--- provider/cldr/src/download/cldr_allinone.rs | 6 ++++-- provider/cldr/src/transform/plurals.rs | 15 +++++++-------- provider/testdata/README.md | 2 +- provider/testdata/src/lib.rs | 6 ++++-- 5 files changed, 19 insertions(+), 16 deletions(-) diff --git a/components/datetime/tests/datetime.rs b/components/datetime/tests/datetime.rs index cd5100f4ad9..4b00182d8b7 100644 --- a/components/datetime/tests/datetime.rs +++ b/components/datetime/tests/datetime.rs @@ -19,7 +19,7 @@ use icu_datetime::{ DateTimeFormat, DateTimeFormatOptions, ZonedDateTimeFormat, }; use icu_locid::{LanguageIdentifier, Locale}; -use icu_plurals::provider::PluralRuleStringsV1Marker; +use icu_plurals::provider::PluralRulesV1Marker; use icu_provider::prelude::*; use icu_provider::struct_provider::StructProvider; use patterns::{ @@ -65,11 +65,11 @@ impl<'data> DataProvider<'data, DatePatternsV1Marker> for MultiKeyStructProvider } } -impl<'data> DataProvider<'data, PluralRuleStringsV1Marker> for MultiKeyStructProvider<'data> { +impl<'data> DataProvider<'data, PluralRulesV1Marker> for MultiKeyStructProvider<'data> { fn load_payload( &self, _req: &DataRequest, - ) -> Result, icu_provider::DataError> { + ) -> Result, icu_provider::DataError> { Err(icu_provider::DataError::MissingPayload) } } diff --git a/provider/cldr/src/download/cldr_allinone.rs b/provider/cldr/src/download/cldr_allinone.rs index 5c7bd3de3ce..793dae9e121 100644 --- a/provider/cldr/src/download/cldr_allinone.rs +++ b/provider/cldr/src/download/cldr_allinone.rs @@ -40,7 +40,7 @@ use std::path::PathBuf; /// let data_provider = PluralsProvider::try_from(&paths as &dyn CldrPaths) /// .expect("The data should be well-formed after downloading"); /// -/// let data: DataPayload = data_provider +/// let data: DataPayload = data_provider /// .load_payload(&DataRequest { /// resource_path: ResourcePath { /// key: icu_plurals::provider::key::ORDINAL_V1, @@ -53,7 +53,9 @@ use std::path::PathBuf; /// .unwrap() /// .take_payload() /// .unwrap(); -/// assert_eq!(data.get().few, Some(Cow::Borrowed("n % 10 = 3 and n % 100 != 13"))); +/// let rule = "n % 10 = 3 and n % 100 != 13".parse() +/// .expect("Failed to parse plural rule"); +/// assert_eq!(data.get().few, Some(rule)); /// } /// /// // Calling demo(downloader) will cause the data to actually get downloaded. diff --git a/provider/cldr/src/transform/plurals.rs b/provider/cldr/src/transform/plurals.rs index 1c37e46d85e..b2a63c3e354 100644 --- a/provider/cldr/src/transform/plurals.rs +++ b/provider/cldr/src/transform/plurals.rs @@ -191,13 +191,12 @@ pub(self) mod cldr_json { #[test] fn test_basic() { use icu_locid_macros::langid; - use std::borrow::Borrow; let cldr_paths = crate::cldr_paths::for_test(); let provider = PluralsProvider::try_from(&cldr_paths as &dyn CldrPaths).unwrap(); // Spot-check locale 'cs' since it has some interesting entries - let cs_rules: DataPayload = provider + let cs_rules: DataPayload = provider .load_payload(&DataRequest { resource_path: ResourcePath { key: key::CARDINAL_V1, @@ -213,16 +212,16 @@ fn test_basic() { assert_eq!(None, cs_rules.get().zero); assert_eq!( - Some("i = 1 and v = 0"), - cs_rules.get().one.as_ref().map(|v| v.borrow()) + Some("i = 1 and v = 0".parse().expect("Failed to parse rule")), + cs_rules.get().one ); assert_eq!(None, cs_rules.get().two); assert_eq!( - Some("i = 2..4 and v = 0"), - cs_rules.get().few.as_ref().map(|v| v.borrow()) + Some("i = 2..4 and v = 0".parse().expect("Failed to parse rule")), + cs_rules.get().few ); assert_eq!( - Some("v != 0"), - cs_rules.get().many.as_ref().map(|v| v.borrow()) + Some("v != 0".parse().expect("Failed to parse rule")), + cs_rules.get().many ); } diff --git a/provider/testdata/README.md b/provider/testdata/README.md index 5ba846d59b5..3a575cc80a3 100644 --- a/provider/testdata/README.md +++ b/provider/testdata/README.md @@ -44,7 +44,7 @@ use icu_locid_macros::langid; let data_provider = icu_testdata::get_provider(); -let data: DataPayload = data_provider +let data: DataPayload = data_provider .load_payload(&DataRequest { resource_path: ResourcePath { key: icu_plurals::provider::key::CARDINAL_V1, diff --git a/provider/testdata/src/lib.rs b/provider/testdata/src/lib.rs index 4cf03e67099..4bceae337ec 100644 --- a/provider/testdata/src/lib.rs +++ b/provider/testdata/src/lib.rs @@ -46,7 +46,7 @@ //! //! let data_provider = icu_testdata::get_provider(); //! -//! let data: DataPayload = data_provider +//! let data: DataPayload = data_provider //! .load_payload(&DataRequest { //! resource_path: ResourcePath { //! key: icu_plurals::provider::key::CARDINAL_V1, @@ -59,7 +59,9 @@ //! .unwrap() //! .take_payload() //! .unwrap(); -//! assert_eq!(data.get().few, Some(Cow::Borrowed("v = 0 and i % 10 = 2..4 and i % 100 != 12..14"))); +//! let rule = "v = 0 and i % 10 = 2..4 and i % 100 != 12..14".parse() +//! .expect("Failed to parse plural rule"); +//! assert_eq!(data.get().few, Some(rule)); //! ``` //! //! [`ICU4X`]: ../icu/index.html From e64d626bac15a5551814603142d6fa90470614e0 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 10:22:14 -0600 Subject: [PATCH 23/30] Update postcard --- provider/testdata/data/testdata.postcard | Bin 846762 -> 847289 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/provider/testdata/data/testdata.postcard b/provider/testdata/data/testdata.postcard index 11617751ad582f4fc5f19fefd422422bbc8824ad..4b25cf0478434785adb83b26ba3aba63f97152b9 100644 GIT binary patch delta 1543 zcmcJP&r2IY6vyX96E*%Q2pZBN-V{nP2P;96fQJ49rM>jvL2Uw-ASJ&=%s%|Z$jz!`*vo#>zdF@2R=J*XWq=b_uV&p@qPLB$MWH|{3?4L znGqAV!1mfb8m5nu^v+!JmcDlxEboSz*CbyLjS?kOIo`Xg38r*}Zbt4N# zZ>|0;dQTiV;)c0tAxzjmt;m$u+-aSQku%9z7asQ#l)-zj2@-qtFc9TWl?XK^br7{K zRUfu+e==s_Dq|M(Y0S|eS1YpDY#$hzqo!;cM`-?y2iAWRPQRM}ZB^#IpOCD~eA(Ow zx(i>!XHP2*HpA_*3|VjFpH@;GDyJ(aU`%o0C;O4MqBC7ltYbQTJ_fbboxxrsV;%5I zBxhWhI4msDPWu*__Ej`}BC}0p{3fo&kvk;RV&L92>=rf+Is-c;if-;^baN*cQ&=g! z=y%8buN8UceL;RTM$8uQ;ZmSPgm;IGT3i7$DNQV@$N(dH8yJqe5wV{Urx`!7!>pYE z=+(%b)TH1wcK09seuB7uhjIOqzbWfG9;y>pmwy0ahl1e% delta 1025 zcmbtTJ#Q015ZwU+jst{9J{eF%28qycoNs;S#Kv(<1rjJ770?*vES+SXqqCqj9pV>k zoi3t)1_@D8Q&FWz1N;IaQ4r`5uFVd#4uV$8{Aq$$d6iWZTuoU4hdHv8Hb2{N-Agi0!7?)% zd)Y3&;TWNwCZoq{l20jUCvux(QoPMV_OIj*=~vJq!D$kS3xY<~95_B@QD)3}keQfx+>0^_2fvgRo#K*iic0*(P*zjt5gV%X)Fscpn-8AbB9_m_> zy;QJjKMbCe7%}}$#59MkyX+n7lS4-?Eo@{~y5O1j+pxA+v|+w&!wu*4R|l%^%3UCw cBb+B(AS@6r5-t%+ghfJ`u=J(ez5MCpDKzQh&;S4c From aa05c794f51ea75cfc083e5432ba7602853614f3 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 10:32:08 -0600 Subject: [PATCH 24/30] Fix readme --- provider/testdata/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/provider/testdata/README.md b/provider/testdata/README.md index 3a575cc80a3..8efc5ab5dc0 100644 --- a/provider/testdata/README.md +++ b/provider/testdata/README.md @@ -57,7 +57,9 @@ let data: DataPayload = data_provide .unwrap() .take_payload() .unwrap(); -assert_eq!(data.get().few, Some(Cow::Borrowed("v = 0 and i % 10 = 2..4 and i % 100 != 12..14"))); +let rule = "v = 0 and i % 10 = 2..4 and i % 100 != 12..14".parse() + .expect("Failed to parse plural rule"); +assert_eq!(data.get().few, Some(rule)); ``` [`ICU4X`]: ../icu/index.html From 30595429aca54315f3ef96d3df95a59b75dfc18d Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 11:57:20 -0600 Subject: [PATCH 25/30] Regenerate diplomat ffi --- ffi/diplomat/c/include/ICU4XPluralRules.h | 5 +++- .../cpp/docs/source/pluralrules_ffi.rst | 6 ++++- ffi/diplomat/cpp/include/ICU4XPluralRules.h | 5 +++- ffi/diplomat/cpp/include/ICU4XPluralRules.hpp | 24 ++++++++++++++++--- ffi/diplomat/wasm/docs/pluralrules_ffi.rst | 6 ++++- ffi/diplomat/wasm/lib/api.mjs | 22 +++++++++++++++-- 6 files changed, 59 insertions(+), 9 deletions(-) diff --git a/ffi/diplomat/c/include/ICU4XPluralRules.h b/ffi/diplomat/c/include/ICU4XPluralRules.h index de3d40eed14..c75d36e8f45 100644 --- a/ffi/diplomat/c/include/ICU4XPluralRules.h +++ b/ffi/diplomat/c/include/ICU4XPluralRules.h @@ -15,11 +15,14 @@ typedef struct ICU4XPluralRules ICU4XPluralRules; #include "ICU4XDataProvider.h" #include "ICU4XPluralRuleType.h" #include "ICU4XCreatePluralRulesResult.h" +#include "ICU4XStaticDataProvider.h" #include "ICU4XPluralOperands.h" #include "ICU4XPluralCategory.h" #include "ICU4XPluralCategories.h" -ICU4XCreatePluralRulesResult ICU4XPluralRules_create(const ICU4XLocale* locale, const ICU4XDataProvider* provider, ICU4XPluralRuleType ty); +ICU4XCreatePluralRulesResult ICU4XPluralRules_try_new(const ICU4XLocale* locale, const ICU4XDataProvider* provider, ICU4XPluralRuleType ty); + +ICU4XCreatePluralRulesResult ICU4XPluralRules_try_new_from_static(const ICU4XLocale* locale, const ICU4XStaticDataProvider* provider, ICU4XPluralRuleType ty); ICU4XPluralCategory ICU4XPluralRules_select(const ICU4XPluralRules* self, const ICU4XPluralOperands* op); diff --git a/ffi/diplomat/cpp/docs/source/pluralrules_ffi.rst b/ffi/diplomat/cpp/docs/source/pluralrules_ffi.rst index 4fb834f912a..808590ae5b3 100644 --- a/ffi/diplomat/cpp/docs/source/pluralrules_ffi.rst +++ b/ffi/diplomat/cpp/docs/source/pluralrules_ffi.rst @@ -79,10 +79,14 @@ FFI version of ``PluralRules``. See `the Rust docs `__ for more details. - .. cpp:function:: static ICU4XCreatePluralRulesResult create(const ICU4XLocale& locale, const ICU4XDataProvider& provider, ICU4XPluralRuleType ty) + .. cpp:function:: static ICU4XCreatePluralRulesResult try_new(const ICU4XLocale& locale, const ICU4XDataProvider& provider, ICU4XPluralRuleType ty) FFI version of ``PluralRules::try_new()``. See `the Rust docs `__ for more details. + .. cpp:function:: static ICU4XCreatePluralRulesResult try_new_from_static(const ICU4XLocale& locale, const ICU4XStaticDataProvider& provider, ICU4XPluralRuleType ty) + + Creates a new :cpp:class:`ICU4XPluralRules` from a :cpp:class:`ICU4XStaticDataProvider`. + .. cpp:function:: ICU4XPluralCategory select(const ICU4XPluralOperands& op) const FFI version of ``PluralRules::select()``. See `the Rust docs `__ for more details. diff --git a/ffi/diplomat/cpp/include/ICU4XPluralRules.h b/ffi/diplomat/cpp/include/ICU4XPluralRules.h index de3d40eed14..c75d36e8f45 100644 --- a/ffi/diplomat/cpp/include/ICU4XPluralRules.h +++ b/ffi/diplomat/cpp/include/ICU4XPluralRules.h @@ -15,11 +15,14 @@ typedef struct ICU4XPluralRules ICU4XPluralRules; #include "ICU4XDataProvider.h" #include "ICU4XPluralRuleType.h" #include "ICU4XCreatePluralRulesResult.h" +#include "ICU4XStaticDataProvider.h" #include "ICU4XPluralOperands.h" #include "ICU4XPluralCategory.h" #include "ICU4XPluralCategories.h" -ICU4XCreatePluralRulesResult ICU4XPluralRules_create(const ICU4XLocale* locale, const ICU4XDataProvider* provider, ICU4XPluralRuleType ty); +ICU4XCreatePluralRulesResult ICU4XPluralRules_try_new(const ICU4XLocale* locale, const ICU4XDataProvider* provider, ICU4XPluralRuleType ty); + +ICU4XCreatePluralRulesResult ICU4XPluralRules_try_new_from_static(const ICU4XLocale* locale, const ICU4XStaticDataProvider* provider, ICU4XPluralRuleType ty); ICU4XPluralCategory ICU4XPluralRules_select(const ICU4XPluralRules* self, const ICU4XPluralOperands* op); diff --git a/ffi/diplomat/cpp/include/ICU4XPluralRules.hpp b/ffi/diplomat/cpp/include/ICU4XPluralRules.hpp index d6db936dcfc..9495359c6dc 100644 --- a/ffi/diplomat/cpp/include/ICU4XPluralRules.hpp +++ b/ffi/diplomat/cpp/include/ICU4XPluralRules.hpp @@ -18,6 +18,7 @@ class ICU4XLocale; class ICU4XDataProvider; #include "ICU4XPluralRuleType.hpp" struct ICU4XCreatePluralRulesResult; +class ICU4XStaticDataProvider; struct ICU4XPluralOperands; #include "ICU4XPluralCategory.hpp" struct ICU4XPluralCategories; @@ -37,7 +38,12 @@ class ICU4XPluralRules { * FFI version of `PluralRules::try_new()`. * See [the Rust docs](https://unicode-org.github.io/icu4x-docs/doc/icu_plurals/struct.PluralRules.html#method.try_new) for more details. */ - static ICU4XCreatePluralRulesResult create(const ICU4XLocale& locale, const ICU4XDataProvider& provider, ICU4XPluralRuleType ty); + static ICU4XCreatePluralRulesResult try_new(const ICU4XLocale& locale, const ICU4XDataProvider& provider, ICU4XPluralRuleType ty); + + /** + * Creates a new [`ICU4XPluralRules`] from a [`ICU4XStaticDataProvider`]. + */ + static ICU4XCreatePluralRulesResult try_new_from_static(const ICU4XLocale& locale, const ICU4XStaticDataProvider& provider, ICU4XPluralRuleType ty); /** * FFI version of `PluralRules::select()`. @@ -60,11 +66,23 @@ class ICU4XPluralRules { #include "ICU4XLocale.hpp" #include "ICU4XDataProvider.hpp" #include "ICU4XCreatePluralRulesResult.hpp" +#include "ICU4XStaticDataProvider.hpp" #include "ICU4XPluralOperands.hpp" #include "ICU4XPluralCategories.hpp" -inline ICU4XCreatePluralRulesResult ICU4XPluralRules::create(const ICU4XLocale& locale, const ICU4XDataProvider& provider, ICU4XPluralRuleType ty) { - capi::ICU4XCreatePluralRulesResult diplomat_raw_struct_out_value = capi::ICU4XPluralRules_create(locale.AsFFI(), provider.AsFFI(), static_cast(ty)); +inline ICU4XCreatePluralRulesResult ICU4XPluralRules::try_new(const ICU4XLocale& locale, const ICU4XDataProvider& provider, ICU4XPluralRuleType ty) { + capi::ICU4XCreatePluralRulesResult diplomat_raw_struct_out_value = capi::ICU4XPluralRules_try_new(locale.AsFFI(), provider.AsFFI(), static_cast(ty)); + auto diplomat_optional_raw_out_value_rules = diplomat_raw_struct_out_value.rules; + std::optional diplomat_optional_out_value_rules; + if (diplomat_optional_raw_out_value_rules != nullptr) { + diplomat_optional_out_value_rules = ICU4XPluralRules(diplomat_optional_raw_out_value_rules); + } else { + diplomat_optional_out_value_rules = std::nullopt; + } + return ICU4XCreatePluralRulesResult{ .rules = std::move(diplomat_optional_out_value_rules), .success = std::move(diplomat_raw_struct_out_value.success) }; +} +inline ICU4XCreatePluralRulesResult ICU4XPluralRules::try_new_from_static(const ICU4XLocale& locale, const ICU4XStaticDataProvider& provider, ICU4XPluralRuleType ty) { + capi::ICU4XCreatePluralRulesResult diplomat_raw_struct_out_value = capi::ICU4XPluralRules_try_new_from_static(locale.AsFFI(), provider.AsFFI(), static_cast(ty)); auto diplomat_optional_raw_out_value_rules = diplomat_raw_struct_out_value.rules; std::optional diplomat_optional_out_value_rules; if (diplomat_optional_raw_out_value_rules != nullptr) { diff --git a/ffi/diplomat/wasm/docs/pluralrules_ffi.rst b/ffi/diplomat/wasm/docs/pluralrules_ffi.rst index 91afd4ca2da..59d2f4cf850 100644 --- a/ffi/diplomat/wasm/docs/pluralrules_ffi.rst +++ b/ffi/diplomat/wasm/docs/pluralrules_ffi.rst @@ -63,10 +63,14 @@ FFI version of ``PluralRules``. See `the Rust docs `__ for more details. - .. js:staticfunction:: create(locale, provider, ty) + .. js:staticfunction:: try_new(locale, provider, ty) FFI version of ``PluralRules::try_new()``. See `the Rust docs `__ for more details. + .. js:staticfunction:: try_new_from_static(locale, provider, ty) + + Creates a new :js:class:`ICU4XPluralRules` from a :js:class:`ICU4XStaticDataProvider`. + .. js:function:: select(op) FFI version of ``PluralRules::select()``. See `the Rust docs `__ for more details. diff --git a/ffi/diplomat/wasm/lib/api.mjs b/ffi/diplomat/wasm/lib/api.mjs index 036ddd3a7b7..c697737536d 100644 --- a/ffi/diplomat/wasm/lib/api.mjs +++ b/ffi/diplomat/wasm/lib/api.mjs @@ -884,10 +884,28 @@ export class ICU4XPluralRules { this.underlying = underlying; } - static create(locale, provider, ty) { + static try_new(locale, provider, ty) { const diplomat_out = (() => { const diplomat_receive_buffer = wasm.diplomat_alloc(5, 4); - wasm.ICU4XPluralRules_create(diplomat_receive_buffer, locale.underlying, provider.underlying, ICU4XPluralRuleType_js_to_rust[ty]); + wasm.ICU4XPluralRules_try_new(diplomat_receive_buffer, locale.underlying, provider.underlying, ICU4XPluralRuleType_js_to_rust[ty]); + const out = new ICU4XCreatePluralRulesResult(diplomat_receive_buffer); + const out_rules_value = out.rules; + ICU4XPluralRules_box_destroy_registry.register(out_rules_value, out_rules_value.underlying); + Object.defineProperty(out, "rules", { value: out_rules_value }); + diplomat_alloc_destroy_registry.register(out, { + ptr: out.underlying, + size: 5, + align: 4, + }); + return out; + })(); + return diplomat_out; + } + + static try_new_from_static(locale, provider, ty) { + const diplomat_out = (() => { + const diplomat_receive_buffer = wasm.diplomat_alloc(5, 4); + wasm.ICU4XPluralRules_try_new_from_static(diplomat_receive_buffer, locale.underlying, provider.underlying, ICU4XPluralRuleType_js_to_rust[ty]); const out = new ICU4XCreatePluralRulesResult(diplomat_receive_buffer); const out_rules_value = out.rules; ICU4XPluralRules_box_destroy_registry.register(out_rules_value, out_rules_value.underlying); From 8e64819f91582f1265d8fd11642f660c09e7b68a Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 12:23:50 -0600 Subject: [PATCH 26/30] Fix diplomat example --- ffi/diplomat/c/examples/pluralrules/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/diplomat/c/examples/pluralrules/test.c b/ffi/diplomat/c/examples/pluralrules/test.c index 3aa63a44b95..f63c567f7e1 100644 --- a/ffi/diplomat/c/examples/pluralrules/test.c +++ b/ffi/diplomat/c/examples/pluralrules/test.c @@ -15,7 +15,7 @@ int main() { return 1; } ICU4XDataProvider* provider = result.provider; - ICU4XCreatePluralRulesResult plural_result = ICU4XPluralRules_create(locale, provider, ICU4XPluralRuleType_Cardinal); + ICU4XCreatePluralRulesResult plural_result = ICU4XPluralRules_try_new(locale, provider, ICU4XPluralRuleType_Cardinal); if (!plural_result.success) { printf("Failed to create PluralRules\n"); return 1; From 4d8696f2129802101bc87e1f66368af8ee4dad33 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 12:35:55 -0600 Subject: [PATCH 27/30] Fix test-cpp --- ffi/diplomat/cpp/examples/pluralrules/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ffi/diplomat/cpp/examples/pluralrules/test.cpp b/ffi/diplomat/cpp/examples/pluralrules/test.cpp index 2413100a53a..67346dde532 100644 --- a/ffi/diplomat/cpp/examples/pluralrules/test.cpp +++ b/ffi/diplomat/cpp/examples/pluralrules/test.cpp @@ -12,7 +12,7 @@ int main() { ICU4XLocale locale = ICU4XLocale::create("ar").value(); std::cout << "Running test for locale " << locale.tostring().ok().value() << std::endl; ICU4XDataProvider dp = ICU4XDataProvider::create_fs(path).provider.value(); - ICU4XPluralRules pr = ICU4XPluralRules::create(locale, dp, ICU4XPluralRuleType::Cardinal).rules.value(); + ICU4XPluralRules pr = ICU4XPluralRules::try_new(locale, dp, ICU4XPluralRuleType::Cardinal).rules.value(); ICU4XPluralOperands op = { .i = 3 }; ICU4XPluralCategory cat = pr.select(op); From 365f8bd751ecbc48e68dc4c74c18b16f21bcbeac Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Thu, 11 Nov 2021 21:05:22 -0600 Subject: [PATCH 28/30] Update safety comment --- components/plurals/src/rules/runtime/ast.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index c875e4170d4..cec1bd104bd 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -352,9 +352,13 @@ impl RelationULE { // Safety (based on the safety checklist on the ULE trait): // 1. RelationULE does not include any uninitialized or padding bytes. -// 2. The impl of validate_byte_slice() returns an error if any byte is not valid. -// 3. The other ULE methods use the default impl. -// 4. RelationULE byte equality is semantic equality. +// (achieved by `#[repr(packed)]` on a type that satisfies this invariant) +// 2. RelationULE is aligned to 1 byte +// (achieved by `#[repr(packed)]` on a type that satisfies this invariant) +// 3. The impl of validate_byte_slice() returns an error if any byte is not valid. +// 4. The impl of validate_byte_slice() returns an error if there are extra bytes. +// 5 The other ULE methods use the default impl. +// 6. RelationULE byte equality is semantic equality. unsafe impl VarULE for RelationULE { type Error = &'static str; From 460b5b63a3821b067ebb91e3c4c736a2e4ab1978 Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Mon, 15 Nov 2021 10:23:53 -0600 Subject: [PATCH 29/30] Apply feedback --- components/plurals/src/lib.rs | 4 +-- components/plurals/src/rules/runtime/ast.rs | 19 ++++++++++-- components/plurals/tests/plurals.rs | 34 ++++++++++++++++++--- 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/components/plurals/src/lib.rs b/components/plurals/src/lib.rs index cf5d11330e4..4792fee2988 100644 --- a/components/plurals/src/lib.rs +++ b/components/plurals/src/lib.rs @@ -200,7 +200,7 @@ pub enum PluralCategory { impl PluralCategory { /// Returns an ordered iterator over variants of [`Plural Categories`]. /// - /// Categories are returned in UTS 35 order. + /// Categories are returned in alphabetical order. /// /// # Examples /// @@ -404,7 +404,7 @@ impl<'data> PluralRules<'data> { /// Returns all [`Plural Categories`] appropriate for a [`PluralRules`] object /// based on the [`LanguageIdentifier`] and [`PluralRuleType`]. /// - /// The [`Plural Categories`] are returned in alphabetically sorted order. + /// The [`Plural Categories`] are returned in UTS 35 sorted order. /// /// The category [`PluralCategory::Other`] is always included. /// diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index cec1bd104bd..ff4fadea40e 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -357,8 +357,9 @@ impl RelationULE { // (achieved by `#[repr(packed)]` on a type that satisfies this invariant) // 3. The impl of validate_byte_slice() returns an error if any byte is not valid. // 4. The impl of validate_byte_slice() returns an error if there are extra bytes. -// 5 The other ULE methods use the default impl. -// 6. RelationULE byte equality is semantic equality. +// 5. The impl of `from_byte_slice_unchecked()` returns a reference to the same data. +// 6. The other VarULE methods use the default impl. +// 7. RelationULE byte equality is semantic equality. unsafe impl VarULE for RelationULE { type Error = &'static str; @@ -368,6 +369,16 @@ unsafe impl VarULE for RelationULE { let len = bytes.len(); // subtract length of andor_polarity_operand and modulo and then convert between a slice of bytes and RangeOrValueULE let len_new = (len - 5) / 8; + + // Keep this in sync with `RelationULE` + #[cfg(debug_assertions)] + struct RelationULESized { + _andor_polarity_operand: u8, + _modulo: ::ULE, + } + debug_assert_eq!(core::mem::size_of::(), 5); + debug_assert_eq!(core::mem::size_of::(), 8); + // it's hard constructing custom DSTs, we fake a pointer/length construction // eventually we can use the Pointer::Metadata APIs when they stabilize let fake_slice = core::ptr::slice_from_raw_parts(ptr as *const RangeOrValueULE, len_new); @@ -390,6 +401,10 @@ unsafe impl VarULE for RelationULE { } } +// Safety +// +// the slices to the callback must, when concatenated, +// are a valid instance of the RelationULE type. unsafe impl EncodeAsVarULE for Relation<'_> { fn encode_var_ule_as_slices(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R { let encoded = diff --git a/components/plurals/tests/plurals.rs b/components/plurals/tests/plurals.rs index e6714d8ddd1..d281b00bddf 100644 --- a/components/plurals/tests/plurals.rs +++ b/components/plurals/tests/plurals.rs @@ -3,11 +3,14 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use icu_locid_macros::langid; -use icu_plurals::provider::{self, PluralRulesV1}; -use icu_plurals::{PluralCategory, PluralRuleType, PluralRules}; -use icu_provider::prelude::*; -use icu_provider::struct_provider::StructProvider; +use icu_plurals::{ + provider::{self, PluralRulesV1, PluralRulesV1Marker}, + rules::runtime::ast::Rule, + PluralCategory, PluralRuleType, PluralRules, +}; +use icu_provider::{prelude::*, struct_provider::StructProvider}; use std::rc::Rc; +use zerovec::VarZeroVec; #[test] fn test_plural_rules() { @@ -20,6 +23,29 @@ fn test_plural_rules() { assert_eq!(pr.select(5_usize), PluralCategory::Other); } +#[test] +fn test_static_provider_borrowed_rules() { + let provider = icu_testdata::get_static_provider(); + + let lid = langid!("en"); + + let rules: DataPayload<'_, PluralRulesV1Marker> = provider + .load_payload(&DataRequest { + resource_path: ResourcePath { + key: provider::key::CARDINAL_V1, + options: ResourceOptions { + variant: None, + langid: Some(lid), + }, + }, + }) + .expect("Failed to load payload") + .take_payload() + .expect("Failed to retrieve payload"); + let rules = rules.get(); + assert!(matches!(rules.one, Some(Rule(VarZeroVec::Borrowed(_))))); +} + #[test] fn test_plural_rules_missing() { let provider = icu_testdata::get_provider(); From 8db9c46900dfa6154ff4c521207a424731ca53ed Mon Sep 17 00:00:00 2001 From: Zibi Braniecki Date: Mon, 15 Nov 2021 10:30:36 -0600 Subject: [PATCH 30/30] No need to guard local type used for debug test. DCE will take care of it. --- components/plurals/src/rules/runtime/ast.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/components/plurals/src/rules/runtime/ast.rs b/components/plurals/src/rules/runtime/ast.rs index ff4fadea40e..e5d336096de 100644 --- a/components/plurals/src/rules/runtime/ast.rs +++ b/components/plurals/src/rules/runtime/ast.rs @@ -371,7 +371,6 @@ unsafe impl VarULE for RelationULE { let len_new = (len - 5) / 8; // Keep this in sync with `RelationULE` - #[cfg(debug_assertions)] struct RelationULESized { _andor_polarity_operand: u8, _modulo: ::ULE,