Implements From<FixedDecimal> for PluralOperands (#278)

* Implements From<FixedDecimal> for PluralOperands This will allow us to convert FixedDecimal representation without loss of precision into PluralOperands, for correct plural selection. For example, a number `25` may sometimes be pluralized differently from `25.0`. Added the benchmarks for the conversion, though without a baseline it is not just as useful yet. (Until we try to reimplement.) See issue #190. * Rewrite eq() to branchless * Adds a test for eq() We needed to add a custom implementation for eq() to account for loss of precision in PluralOperands. * fixup:first * Clean up the code for From * Adds more illustrative naming in From * Makes clippy happy * Pulls num_fractional_digits out of the loop It is possible to compute it based on the low end of the magnitude range. No performance change per benchmark. * fixup: benchmark was wrong * fixup: moves new operand tests to json Defines new tests for the data model for conversion, and places them into the JSON files instead of inline with the tests. * fixup: moves the benchmark loop in This allows criterion to run the benchmark loop with a specific time limit. * fixup: formatting * fixup: adds individual sample measurements Helps smoke out specific performance regressions.
unicode-org · Oct 1, 2020 · bd4d2a5 · bd4d2a5
1 parent e6579f8
commit bd4d2a5
Show file tree

Hide file tree

Showing 8 changed files with 304 additions and 1 deletion.
diff --git a/components/pluralrules/Cargo.toml b/components/pluralrules/Cargo.toml
@@ -17,6 +17,7 @@ include = [
 [dependencies]
 icu-locale = { path = "../locale" }
 icu-data-provider = { path = "../data-provider" }
+icu-num-util = { path = "../num-util" }
 
 [dev-dependencies]
 criterion = "0.3"

diff --git a/components/pluralrules/benches/fixtures/mod.rs b/components/pluralrules/benches/fixtures/mod.rs
@@ -9,6 +9,7 @@ pub(crate) struct NumbersFixture {
     pub isize: Vec<i64>,
     pub usize: Vec<u64>,
     pub string: Vec<String>,
+    pub fixed_decimals: Vec<FromFixedDecimals>,
 }
 
 #[derive(Debug, Deserialize)]
@@ -17,6 +18,14 @@ pub(crate) struct PluralsFixture {
     pub langs: Vec<LanguageIdentifier>,
 }
 
+/// Describes a number to construct from plural operands, as `value * 10^(exponent)`.  Construction
+/// from value and exponent is because sometimes we want to preserve trailing zeros.
+#[derive(Deserialize)]
+pub(crate) struct FromFixedDecimals {
+    pub value: i64,
+    pub exponent: i16,
+}
+
 #[derive(Debug, Deserialize)]
 pub(crate) struct LocalePluralRulesFixture {
     #[serde(rename = "pluralRule-count-zero")]

diff --git a/components/pluralrules/benches/fixtures/numbers.json b/components/pluralrules/benches/fixtures/numbers.json
@@ -10,5 +10,23 @@
   "string": [
     "1", "2", "3", "4", "5", "25", "134", "910293019", "-12", "1412", "12", "15", "2931", "31231", "3123", "13231", "91", "0", "231", "-2",
     "0.0", "1.0", "1.5", "2.3", "-3.30", "-0.2", "2.25", "230.12", "12.254", "2.1", "4.44", "99.99", "100.222", "31.32", "100.00", "0.12", "-2.00"
+  ],
+  "fixed_decimals": [
+    {
+      "value": 1,
+      "exponent": 0
+    },
+    {
+      "value": 123450,
+      "exponent": -4
+    },
+    {
+      "value": -123450,
+      "exponent": -4
+    },
+    {
+      "value": 2500,
+      "exponent": -2
+    }
   ]
 }
diff --git a/components/pluralrules/benches/operands.rs b/components/pluralrules/benches/operands.rs
@@ -1,9 +1,10 @@
 mod fixtures;
 mod helpers;
 
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
 use std::convert::TryInto;
 
+use icu_num_util::FixedDecimal;
 use icu_pluralrules::PluralOperands;
 
 const DATA_PATH: &str = "./benches/fixtures/numbers.json";
@@ -39,6 +40,81 @@ fn operands(c: &mut Criterion) {
             }
         })
     });
+
+    {
+        let samples = [
+            "0",
+            "10",
+            "200",
+            "3000",
+            "40000",
+            "500000",
+            "6000000",
+            "70000000",
+            "0012.3400",
+            "00.0012216734340",
+        ];
+        let mut group = c.benchmark_group("operands/create/string/samples");
+        for s in samples.iter() {
+            group.bench_with_input(BenchmarkId::from_parameter(s), s, |b, s| {
+                b.iter(|| {
+                    let _: PluralOperands = black_box(s).parse().unwrap();
+                })
+            });
+        }
+    }
+    c.bench_function("operands/eq/mostly_unequal", |b| {
+        let p: PluralOperands = "1".parse().expect("Parse successful");
+        b.iter(|| {
+            for s in &data.isize {
+                let q: PluralOperands = black_box(*s)
+                    .try_into()
+                    .expect("Failed to parse a number into operands.");
+                let _ = black_box(black_box(p) == black_box(q));
+            }
+        })
+    });
+
+    c.bench_function("operands/eq/mostly_equal", |b| {
+        b.iter(|| {
+            for s in &data.isize {
+                let p: PluralOperands = black_box(*s)
+                    .try_into()
+                    .expect("Failed to parse a number into operands.");
+                let q: PluralOperands = black_box(*s)
+                    .try_into()
+                    .expect("Failed to parse a number into operands.");
+                let _ = black_box(black_box(p) == black_box(q));
+            }
+        })
+    });
+
+    c.bench_function("operands/create/from_fixed_decimal", |b| {
+        b.iter(|| {
+            for s in &data.fixed_decimals {
+                let f: FixedDecimal = FixedDecimal::from(s.value)
+                    .multiplied_pow10(s.exponent)
+                    .unwrap();
+                let _: PluralOperands = PluralOperands::from(black_box(&f));
+            }
+        });
+    });
+
+    {
+        let samples = [
+            FixedDecimal::from(1).multiplied_pow10(0).unwrap(),
+            FixedDecimal::from(123450).multiplied_pow10(-4).unwrap(),
+            FixedDecimal::from(2500).multiplied_pow10(-2).unwrap(),
+        ];
+        let mut group = c.benchmark_group("operands/create/from_fixed_decimal/samples");
+        for s in samples.iter() {
+            group.bench_with_input(
+                BenchmarkId::from_parameter(format!("{:?}", &s)),
+                s,
+                |b, f| b.iter(|| PluralOperands::from(black_box(f))),
+            );
+        }
+    }
 }
 
 criterion_group!(benches, operands,);

diff --git a/components/pluralrules/src/operands.rs b/components/pluralrules/src/operands.rs
@@ -1,3 +1,4 @@
+use icu_num_util::FixedDecimal;
 use std::convert::TryFrom;
 use std::io::Error as IOError;
 use std::isize;
@@ -198,3 +199,51 @@ macro_rules! impl_signed_integer_type {
 
 impl_integer_type!(u8 u16 u32 u64 u128 usize);
 impl_signed_integer_type!(i8 i16 i32 i64 i128 isize);
+
+impl From<&FixedDecimal> for PluralOperands {
+    /// Converts a [icu_num_util::FixedDecimal] to [PluralOperands]
+    fn from(f: &FixedDecimal) -> Self {
+        let mag_range = f.magnitude_range();
+        let mag_high = *mag_range.end();
+        let mag_low = *mag_range.start();
+
+        let mut integer_part: u64 = 0;
+        let integer_range = 0..=mag_high;
+        for magnitude in integer_range.rev() {
+            integer_part *= 10;
+            integer_part += f.digit_at(magnitude) as u64;
+        }
+
+        // The fractional part of the number, expressed as an integer to preserve trailing zeroes.
+        // For example, could be "100" if the stored number is "0.100".
+        let mut fraction_part_full: u64 = 0;
+        // A running total of the number of trailing zeros seen in the fractional part of the
+        // number.
+        let mut num_trailing_zeros = 0;
+        // 10^num_trailing_zeros.
+        let mut weight_trailing_zeros = 1;
+        let fractional_magnitude_range = mag_low..=-1;
+        for magnitude in fractional_magnitude_range.rev() {
+            let digit = f.digit_at(magnitude);
+            if digit == 0 {
+                num_trailing_zeros += 1;
+                weight_trailing_zeros *= 10;
+            } else {
+                num_trailing_zeros = 0;
+                weight_trailing_zeros = 1;
+            }
+            fraction_part_full *= 10;
+            fraction_part_full += digit as u64;
+        }
+        let fraction_part_nozeros = fraction_part_full / weight_trailing_zeros;
+        let num_fractional_digits = (-std::cmp::min(0, mag_low)) as usize;
+
+        PluralOperands {
+            i: integer_part,
+            v: num_fractional_digits,
+            w: num_fractional_digits - num_trailing_zeros,
+            f: fraction_part_full,
+            t: fraction_part_nozeros,
+        }
+    }
+}
diff --git a/components/pluralrules/tests/fixtures/mod.rs b/components/pluralrules/tests/fixtures/mod.rs
@@ -1,12 +1,44 @@
+use icu_num_util::FixedDecimal;
 use icu_pluralrules::PluralOperands;
 use serde::Deserialize;
 use std::convert::TryInto;
 
+/// Defines the data-driven test sets for the operands.
 #[derive(Deserialize)]
 pub struct OperandsTestSet {
     pub string: Vec<OperandsTest<String>>,
     pub int: Vec<OperandsTest<isize>>,
     pub floats: Vec<OperandsTest<f64>>,
+    pub from_test: Vec<FromTestCase>,
+}
+
+/// A single test case verifying the conversion from [FixedDecimal] into
+/// [PluralOperands].
+#[derive(Debug, Deserialize)]
+pub struct FromTestCase {
+    /// The [FixedDecimal] input
+    pub input: FixedDecimalInput,
+    /// The expected value after conversion.
+    pub expected: PluralOperandsInput,
+}
+
+/// A serialized representation of [FixedDecimal] in the data driven tests.
+///
+/// Use the `From` trait to convert into [FixedDecimal] in tests.
+#[derive(Debug, Deserialize)]
+pub struct FixedDecimalInput {
+    /// Value supplied to [FixedDecimal::from] when constructing.
+    from: i64,
+    /// Value supplied to [FixedDecimal::multiplied_pow10] when constructing.
+    pow10: i16,
+}
+
+impl From<&FixedDecimalInput> for FixedDecimal {
+    fn from(f: &FixedDecimalInput) -> Self {
+        FixedDecimal::from(f.from)
+            .multiplied_pow10(f.pow10)
+            .unwrap()
+    }
 }
 
 #[derive(Debug, Clone, Deserialize)]

diff --git a/components/pluralrules/tests/fixtures/operands.json b/components/pluralrules/tests/fixtures/operands.json
@@ -116,5 +116,105 @@
       "input": -1000000,
       "output": [1000000, 1000000, 0, 0, 0, 0]
     }
+  ],
+  "from_test": [
+    {
+      "input": {
+        "from": 2500,
+        "pow10": -2
+      },
+      "expected": {
+        "n": 25.0,
+        "i": 25,
+        "v": 2,
+        "w": 0,
+        "f": 0,
+        "t": 0
+      }
+    },
+    {
+      "input": {
+        "from": 2500,
+        "pow10": 0
+      },
+      "expected": {
+        "n": 2500.0,
+        "i": 2500,
+        "v": 0,
+        "w": 0,
+        "f": 0,
+        "t": 0
+      }
+    },
+    {
+      "input": {
+        "from": -123450,
+        "pow10": -4
+      },
+      "expected": {
+        "n": 12.345,
+        "i": 12,
+        "v": 4,
+        "w": 3,
+        "f": 3450,
+        "t": 345
+      }
+    },
+    {
+      "input": {
+        "from": 123450,
+        "pow10": -4
+      },
+      "expected": {
+        "n": 12.345,
+        "i": 12,
+        "v": 4,
+        "w": 3,
+        "f": 3450,
+        "t": 345
+      }
+    },
+    {
+      "input": {
+        "from": 1,
+        "pow10": -2
+      },
+      "expected": {
+        "n": 0.01,
+        "i": 0,
+        "v": 2,
+        "w": 2,
+        "f": 1,
+        "t": 1
+      }
+    },
+    {
+      "input": {
+        "from": 123450,
+        "pow10": -7
+      },
+      "expected": {
+        "n": 0.012345,
+        "i": 0,
+        "v": 7,
+        "w": 6,
+        "f": 123450,
+        "t": 12345
+      }
+    },
+    {
+      "input": {
+        "from": 0,
+        "pow10": 0
+      },
+      "expected": {
+        "n": 0,
+        "i": 0,
+        "v": 0,
+        "w": 0,
+        "f": 0,
+        "t": 0
+      }
+    }
   ]
 }
diff --git a/components/pluralrules/tests/operands.rs b/components/pluralrules/tests/operands.rs
@@ -3,6 +3,7 @@ mod helpers;
 
 use std::convert::TryInto;
 
+use icu_num_util::FixedDecimal;
 use icu_pluralrules::PluralOperands;
 
 #[test]
@@ -46,3 +47,20 @@ fn test_parsing_operand_errors() {
     let operands: Result<PluralOperands, _> = "".parse();
     assert!(operands.is_err());
 }
+
+#[test]
+fn test_from_fixed_decimals() {
+    let path = "./tests/fixtures/operands.json";
+    let test_set: fixtures::OperandsTestSet =
+        helpers::read_fixture(path).expect("Failed to read a fixture");
+    for test in test_set.from_test {
+        let input: FixedDecimal = FixedDecimal::from(&test.input);
+        let actual: PluralOperands = PluralOperands::from(&input);
+        let expected: PluralOperands = PluralOperands::from(test.expected.clone());
+        assert_eq!(
+            expected, actual,
+            "\n\t(expected==left; actual==right)\n\t\t{:?}",
+            &test
+        );
+    }
+}