unicode-org · sffc · Dec 12, 2023 · Dec 2, 2023 · Dec 2, 2023 · Dec 2, 2023
@@ -6,10 +6,12 @@ use core::borrow::Borrow;
 use core::cmp::Ordering;
 use core::iter::FromIterator;
 use litemap::LiteMap;
+use writeable::Writeable;
 
 use super::Key;
 use super::Value;
 use crate::helpers::ShortSlice;
+#[allow(deprecated)]
 use crate::ordering::SubtagOrderingResult;
 
 /// A list of [`Key`]-[`Value`] pairs representing functional information
@@ -303,7 +305,7 @@ impl Keywords {
     /// }
     /// ```
     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
-        self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+        self.write_cmp_bytes(other)
     }
 
     /// Compare this [`Keywords`] with an iterator of BCP-47 subtags.
@@ -340,6 +342,8 @@ impl Keywords {
     ///     kwds.strict_cmp_iter(subtags.iter().copied()).end()
     /// );
     /// ```
+    #[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
+    #[allow(deprecated)]
     pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
     where
         I: Iterator<Item = &'l [u8]>,

@@ -5,6 +5,7 @@
 use core::cmp::Ordering;
 use core::str::FromStr;
 
+#[allow(deprecated)]
 use crate::ordering::SubtagOrderingResult;
 use crate::parser::{
     parse_language_identifier, parse_language_identifier_with_single_variant, ParserError,
@@ -199,7 +200,7 @@ impl LanguageIdentifier {
     /// }
     /// ```
     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
-        self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+        self.write_cmp_bytes(other)
     }
 
     /// Compare this [`LanguageIdentifier`] with an iterator of BCP-47 subtags.
@@ -235,6 +236,8 @@ impl LanguageIdentifier {
     ///     loc.strict_cmp_iter(subtags.iter().copied()).end()
     /// );
     /// ```
+    #[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
+    #[allow(deprecated)]
     pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
     where
         I: Iterator<Item = &'l [u8]>,

@@ -75,6 +75,7 @@ mod parser;
 
 pub use langid::LanguageIdentifier;
 pub use locale::Locale;
+#[allow(deprecated)]
 pub use ordering::SubtagOrderingResult;
 pub use parser::errors::ParserError;
 

@@ -2,6 +2,7 @@
 // called LICENSE at the top level of the ICU4X source tree
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
 
+#[allow(deprecated)]
 use crate::ordering::SubtagOrderingResult;
 use crate::parser::{
     parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension,
@@ -192,7 +193,7 @@ impl Locale {
     /// }
     /// ```
     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
-        self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
+        self.write_cmp_bytes(other)
     }
 
     /// Compare this [`Locale`] with an iterator of BCP-47 subtags.
@@ -229,6 +230,8 @@ impl Locale {
     ///     loc.strict_cmp_iter(subtags.iter().copied()).end()
     /// );
     /// ```
+    #[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
+    #[allow(deprecated)]
     pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
     where
         I: Iterator<Item = &'l [u8]>,

@@ -36,13 +36,17 @@ use core::cmp::Ordering;
 /// [`Locale::strict_cmp_iter`]: crate::Locale::strict_cmp_iter
 #[allow(clippy::exhaustive_enums)] // well-defined exhaustive enum semantics
 #[derive(Debug)]
+#[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
 pub enum SubtagOrderingResult<I> {
     /// Potentially remaining subtags after the comparison operation.
+    #[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
     Subtags(I),
     /// Resolved ordering between the locale object and the subtags.
+    #[deprecated(since = "1.5.0", note = "if you need this, please file an issue")]
     Ordering(Ordering),
 }
 
+#[allow(deprecated)]
 impl<I> SubtagOrderingResult<I>
 where
     I: Iterator,

@@ -50,7 +50,11 @@ serde_json = "1.0"
 icu_provider_adapters = { path = "../../provider/adapters" }
 icu_locid_transform = { path = "../../components/locid_transform" }
 
+[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
+criterion = "0.4"
+
 [features]
+bench = []
 std = ["icu_locid/std"]
 sync = []
 experimental = []
@@ -75,3 +79,10 @@ datagen = ["serde", "dep:erased-serde", "dep:databake", "std", "sync"]
 denylist = ["macros"]
 # We have tons of features here, limit the amount of tests we run
 max_combination_size = 3
+
+[lib]
+bench = false  # This option is required for Benchmark CI
+
+[[bench]]
+name = "data_locale_bench"
+harness = false
@@ -0,0 +1,86 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+extern crate alloc;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use icu_provider::prelude::*;
+use std::str::FromStr;
+use writeable::Writeable;
+
+static BCP47_STRINGS: &[&str] = &[
+    "ca",
+    "ca-ES",
+    "ca-ES-u-ca-buddhist",
+    "ca-ES-valencia",
+    "ca-ES-x-gbp",
+    "ca-ES-x-gbp-short",
+    "ca-ES-x-usd",
+    "ca-ES-xyzabc",
+    "ca-x-eur",
+    "cat",
+    "pl-Latn-PL",
+    "und",
+    "und-fonipa",
+    "und-u-ca-hebrew",
+    "und-u-ca-japanese",
+    "und-x-mxn",
+    "zh",
+];
+
+fn overview_bench(c: &mut Criterion) {
+    c.bench_function("data_locale/overview", |b| {
+        b.iter(|| {
+            for s in black_box(BCP47_STRINGS).iter() {
+                let loc = DataLocale::from_str(s).unwrap();
+                let loc = loc.clone();
+                let s = loc.write_to_string();
+                loc.strict_cmp(s.as_bytes());
+            }
+        });
+    });
+
+    #[cfg(feature = "bench")]
+    data_locale_bench(c);
+}
+
+#[cfg(feature = "bench")]
+fn data_locale_bench(c: &mut Criterion) {
+    c.bench_function("data_locale/parse", |b| {
+        b.iter(|| {
+            for s in black_box(BCP47_STRINGS).iter() {
+                DataLocale::from_str(s).unwrap();
+            }
+        });
+    });
+
+    let data_locales: Vec<DataLocale> = BCP47_STRINGS.iter().map(|s| s.parse().unwrap()).collect();
+
+    c.bench_function("data_locale/write_to_string", |b| {
+        b.iter(|| {
+            for loc in black_box(&data_locales).iter() {
+                loc.write_to_string();
+            }
+        });
+    });
+    c.bench_function("data_locale/clone", |b| {
+        b.iter(|| {
+            for loc in black_box(&data_locales).iter() {
+                let _ = loc.clone();
+            }
+        });
+    });
+    c.bench_function("data_locale/strict_cmp", |b| {
+        b.iter(|| {
+            for loc in black_box(&data_locales).iter() {
+                for s in black_box(BCP47_STRINGS).iter() {
+                    loc.strict_cmp(s.as_bytes());
+                }
+            }
+        });
+    });
+}
+
+criterion_group!(benches, overview_bench,);
+criterion_main!(benches);
@@ -11,7 +11,7 @@ use core::hash::Hash;
 use core::str::FromStr;
 use icu_locid::extensions::unicode as unicode_ext;
 use icu_locid::subtags::{Language, Region, Script, Variants};
-use icu_locid::{LanguageIdentifier, Locale, SubtagOrderingResult};
+use icu_locid::{LanguageIdentifier, Locale};
 use writeable::{LengthHint, Writeable};
 
 #[cfg(feature = "experimental")]
@@ -342,34 +342,7 @@ impl DataLocale {
     /// }
     /// ```
     pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
-        let subtags = other.split(|b| *b == b'-');
-        let mut subtag_result = self.langid.strict_cmp_iter(subtags);
-        if self.has_unicode_ext() {
-            let mut subtags = match subtag_result {
-                SubtagOrderingResult::Subtags(s) => s,
-                SubtagOrderingResult::Ordering(o) => return o,
-            };
-            match subtags.next() {
-                Some(b"u") => (),
-                Some(s) => return s.cmp(b"u").reverse(),
-                None => return Ordering::Greater,
-            }
-            subtag_result = self.keywords.strict_cmp_iter(subtags);
-        }
-        #[cfg(feature = "experimental")]
-        if let Some(aux) = self.get_aux() {
-            let mut subtags = match subtag_result {
-                SubtagOrderingResult::Subtags(s) => s,
-                SubtagOrderingResult::Ordering(o) => return o,
-            };
-            match subtags.next() {
-                Some(b"x") => (),
-                Some(s) => return s.cmp(b"x").reverse(),
-                None => return Ordering::Greater,
-            }
-            subtag_result = aux.strict_cmp_iter(subtags);
-        }
-        subtag_result.end()
+        self.write_cmp_bytes(other)
     }
 }
 
@@ -987,23 +960,6 @@ impl AuxiliaryKeys {
             })
     }
 
-    pub(crate) fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
-    where
-        I: Iterator<Item = &'l [u8]>,
-    {
-        for subtag in self.value.split(Self::separator()) {
-            if let Some(other) = subtags.next() {
-                match subtag.as_bytes().cmp(other) {
-                    Ordering::Equal => (),
-                    not_equal => return SubtagOrderingResult::Ordering(not_equal),
-                }
-            } else {
-                return SubtagOrderingResult::Ordering(Ordering::Greater);
-            }
-        }
-        SubtagOrderingResult::Subtags(subtags)
-    }
-
     /// Returns the internal separator byte used for auxiliary keys in data locales.
     ///
     /// This is, according to BCP-47, an ASCII hyphen.

@@ -0,0 +1,99 @@
+// This file is part of ICU4X. For terms of use, please see the file
+// called LICENSE at the top level of the ICU4X source tree
+// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
+
+use core::cmp::Ordering;
+use core::fmt;
+
+pub(crate) struct WriteComparator<'a> {
+    string: &'a [u8],
+    result: Ordering,
+}
+
+/// This is an infallible impl. Functions always return Ok, not Err.
+impl<'a> fmt::Write for WriteComparator<'a> {
+    #[inline]
+    fn write_str(&mut self, other: &str) -> fmt::Result {
+        if self.result != Ordering::Equal {
+            return Ok(());
+        }
+        let cmp_len = core::cmp::min(other.len(), self.string.len());
+        let (this, remainder) = self.string.split_at(cmp_len);
+        self.string = remainder;
+        self.result = this.cmp(other.as_bytes());
+        Ok(())
+    }
+}
+
+impl<'a> WriteComparator<'a> {
+    #[inline]
+    pub fn new(string: &'a (impl AsRef<[u8]> + ?Sized)) -> Self {
+        Self {
+            string: string.as_ref(),
+            result: Ordering::Equal,
+        }
+    }
+
+    #[inline]
+    pub fn finish(self) -> Ordering {
+        if matches!(self.result, Ordering::Equal) && !self.string.is_empty() {
+            // Self is longer than Other
+            Ordering::Greater
+        } else {
+            self.result
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use core::fmt::Write;
+
+    mod data {
+        include!("../tests/data/data.rs");
+    }
+
+    #[test]
+    fn test_write_char() {
+        for a in data::KEBAB_CASE_STRINGS {
+            for b in data::KEBAB_CASE_STRINGS {
+                let mut wc = WriteComparator::new(a);
+                for ch in b.chars() {
+                    wc.write_char(ch).unwrap();
+                }
+                assert_eq!(a.cmp(b), wc.finish(), "{a} <=> {b}");
+            }
+        }
+    }
+
+    #[test]
+    fn test_write_str() {
+        for a in data::KEBAB_CASE_STRINGS {
+            for b in data::KEBAB_CASE_STRINGS {
+                let mut wc = WriteComparator::new(a);
+                wc.write_str(b).unwrap();
+                assert_eq!(a.cmp(b), wc.finish(), "{a} <=> {b}");
+            }
+        }
+    }
+
+    #[test]
+    fn test_mixed() {
+        for a in data::KEBAB_CASE_STRINGS {
+            for b in data::KEBAB_CASE_STRINGS {
+                let mut wc = WriteComparator::new(a);
+                let mut first = true;
+                for substr in b.split('-') {
+                    if first {
+                        first = false;
+                    } else {
+                        wc.write_char('-').unwrap();
+                    }
+                    wc.write_str(substr).unwrap();
+                }
+                assert_eq!(a.cmp(b), wc.finish(), "{a} <=> {b}");
+            }
+        }
+    }
+}