Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide a getter for the resolved options for the collator #4047

Merged
merged 17 commits into from
Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion components/collator/src/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::provider::CollationJamoV1Marker;
use crate::provider::CollationMetadataV1Marker;
use crate::provider::CollationReorderingV1Marker;
use crate::provider::CollationSpecialPrimariesV1Marker;
use crate::{AlternateHandling, CollatorOptions, MaxVariable, Strength};
use crate::{AlternateHandling, CollatorOptions, MaxVariable, ResolvedCollatorOptions, Strength};
use core::cmp::Ordering;
use core::convert::TryFrom;
use icu_normalizer::provider::CanonicalDecompositionDataV1Marker;
Expand Down Expand Up @@ -247,6 +247,12 @@ impl Collator {
})
}

/// The resolved options showing how the default options, the requested options,
/// and the options from locale data were combined.
pub fn resolved_options(&self) -> ResolvedCollatorOptions {
self.options.into()
}

/// Compare potentially ill-formed UTF-16 slices. Unpaired surrogates
/// are compared as if each one was a REPLACEMENT CHARACTER.
pub fn compare_utf16(&self, left: &[u16], right: &[u16]) -> Ordering {
Expand Down
1 change: 1 addition & 0 deletions components/collator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ pub use options::CaseLevel;
pub use options::CollatorOptions;
pub use options::MaxVariable;
pub use options::Numeric;
pub use options::ResolvedCollatorOptions;
pub use options::Strength;

#[doc(no_inline)]
Expand Down
79 changes: 79 additions & 0 deletions components/collator/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,73 @@ impl CollatorOptions {
}
}

// Make it possible to easily copy the resolved options of
// one collator into another collator.
impl From<ResolvedCollatorOptions> for CollatorOptions {
/// Convenience conversion for copying the options from an
/// existing collator into a new one (overriding any locale-provided
/// defaults of the new one!).
fn from(options: ResolvedCollatorOptions) -> CollatorOptions {
Self {
strength: Some(options.strength),
alternate_handling: Some(options.alternate_handling),
case_first: Some(options.case_first),
max_variable: Some(options.max_variable),
case_level: Some(options.case_level),
numeric: Some(options.numeric),
backward_second_level: Some(options.backward_second_level),
}
}
}

/// The resolved (actually used) options used by the collator.
///
/// See the documentation of `CollatorOptions`.
#[non_exhaustive]
#[derive(Debug, Copy, Clone)]
pub struct ResolvedCollatorOptions {
/// Resolved strength collation option.
pub strength: Strength,
/// Resolved alternate handling collation option.
pub alternate_handling: AlternateHandling,
/// Resolved case first collation option.
pub case_first: CaseFirst,
/// Resolved max variable collation option.
pub max_variable: MaxVariable,
/// Resolved case level collation option.
pub case_level: CaseLevel,
/// Resolved numeric collation option.
pub numeric: Numeric,
/// Resolved backward second level collation option.
pub backward_second_level: BackwardSecondLevel,
}

impl From<CollatorOptionsBitField> for ResolvedCollatorOptions {
fn from(options: CollatorOptionsBitField) -> ResolvedCollatorOptions {
Self {
strength: options.strength(),
alternate_handling: options.alternate_handling(),
case_first: options.case_first(),
max_variable: options.max_variable(),
case_level: if options.case_level() {
CaseLevel::On
} else {
CaseLevel::Off
},
numeric: if options.numeric() {
Numeric::On
} else {
Numeric::Off
},
backward_second_level: if options.backward_second_level() {
BackwardSecondLevel::On
} else {
BackwardSecondLevel::Off
},
}
}
}

#[derive(Copy, Clone, Debug)]
pub(crate) struct CollatorOptionsBitField(u32);

Expand Down Expand Up @@ -513,6 +580,18 @@ impl CollatorOptionsBitField {
}
}

fn case_first(&self) -> CaseFirst {
if (self.0 & CollatorOptionsBitField::CASE_FIRST_MASK) != 0 {
if (self.0 & CollatorOptionsBitField::UPPER_FIRST_MASK) != 0 {
CaseFirst::UpperFirst
} else {
CaseFirst::LowerFirst
}
} else {
CaseFirst::Off
}
}

/// Whether case is the most significant part of the tertiary
/// level.
///
Expand Down
119 changes: 116 additions & 3 deletions components/collator/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1453,6 +1453,122 @@ fn test_case_level() {
);
}

#[test]
fn test_default_resolved_options() {
let collator = Collator::try_new(&Default::default(), CollatorOptions::new()).unwrap();
let resolved = collator.resolved_options();
assert_eq!(resolved.strength, Strength::Tertiary);
assert_eq!(resolved.alternate_handling, AlternateHandling::NonIgnorable);
assert_eq!(resolved.case_first, CaseFirst::Off);
assert_eq!(resolved.max_variable, MaxVariable::Punctuation);
assert_eq!(resolved.case_level, CaseLevel::Off);
assert_eq!(resolved.numeric, Numeric::Off);
assert_eq!(resolved.backward_second_level, BackwardSecondLevel::Off);

assert_eq!(collator.compare("𝕒", "A"), core::cmp::Ordering::Less);
assert_eq!(collator.compare("coté", "côte"), core::cmp::Ordering::Less);
}

#[test]
fn test_data_resolved_options_th() {
let locale: DataLocale = langid!("th").into();
let collator = Collator::try_new(&locale, CollatorOptions::new()).unwrap();
let resolved = collator.resolved_options();
assert_eq!(resolved.strength, Strength::Tertiary);
assert_eq!(resolved.alternate_handling, AlternateHandling::Shifted);
assert_eq!(resolved.case_first, CaseFirst::Off);
assert_eq!(resolved.max_variable, MaxVariable::Punctuation);
assert_eq!(resolved.case_level, CaseLevel::Off);
assert_eq!(resolved.numeric, Numeric::Off);
assert_eq!(resolved.backward_second_level, BackwardSecondLevel::Off);

// There's a separate more comprehensive test for the shifted behavior
assert_eq!(collator.compare("𝕒", "A"), core::cmp::Ordering::Less);
assert_eq!(collator.compare("coté", "côte"), core::cmp::Ordering::Less);
}

#[test]
fn test_data_resolved_options_da() {
let locale: DataLocale = langid!("da").into();
let collator = Collator::try_new(&locale, CollatorOptions::new()).unwrap();
let resolved = collator.resolved_options();
assert_eq!(resolved.strength, Strength::Tertiary);
assert_eq!(resolved.alternate_handling, AlternateHandling::NonIgnorable);
assert_eq!(resolved.case_first, CaseFirst::UpperFirst);
assert_eq!(resolved.max_variable, MaxVariable::Punctuation);
assert_eq!(resolved.case_level, CaseLevel::Off);
assert_eq!(resolved.numeric, Numeric::Off);
assert_eq!(resolved.backward_second_level, BackwardSecondLevel::Off);

assert_eq!(collator.compare("𝕒", "A"), core::cmp::Ordering::Greater);
assert_eq!(collator.compare("coté", "côte"), core::cmp::Ordering::Less);
}

#[test]
fn test_data_resolved_options_fr_ca() {
let locale: DataLocale = langid!("fr-CA").into();
let collator = Collator::try_new(&locale, CollatorOptions::new()).unwrap();
let resolved = collator.resolved_options();
assert_eq!(resolved.strength, Strength::Tertiary);
assert_eq!(resolved.alternate_handling, AlternateHandling::NonIgnorable);
assert_eq!(resolved.case_first, CaseFirst::Off);
assert_eq!(resolved.max_variable, MaxVariable::Punctuation);
assert_eq!(resolved.case_level, CaseLevel::Off);
assert_eq!(resolved.numeric, Numeric::Off);
assert_eq!(resolved.backward_second_level, BackwardSecondLevel::On);

assert_eq!(collator.compare("𝕒", "A"), core::cmp::Ordering::Less);
assert_eq!(
collator.compare("coté", "côte"),
core::cmp::Ordering::Greater
);
}

#[test]
fn test_manual_and_data_resolved_options_fr_ca() {
let locale: DataLocale = langid!("fr-CA").into();

let mut options = CollatorOptions::new();
options.case_first = Some(CaseFirst::UpperFirst);

let collator = Collator::try_new(&locale, options).unwrap();
let resolved = collator.resolved_options();
assert_eq!(resolved.strength, Strength::Tertiary);
assert_eq!(resolved.alternate_handling, AlternateHandling::NonIgnorable);
assert_eq!(resolved.case_first, CaseFirst::UpperFirst);
assert_eq!(resolved.max_variable, MaxVariable::Punctuation);
assert_eq!(resolved.case_level, CaseLevel::Off);
assert_eq!(resolved.numeric, Numeric::Off);
assert_eq!(resolved.backward_second_level, BackwardSecondLevel::On);

assert_eq!(collator.compare("𝕒", "A"), core::cmp::Ordering::Greater);
assert_eq!(
collator.compare("coté", "côte"),
core::cmp::Ordering::Greater
);
}

#[test]
fn test_manual_resolved_options_da() {
let locale: DataLocale = langid!("da").into();

let mut options = CollatorOptions::new();
options.case_first = Some(CaseFirst::Off);

let collator = Collator::try_new(&locale, options).unwrap();
let resolved = collator.resolved_options();
assert_eq!(resolved.strength, Strength::Tertiary);
assert_eq!(resolved.alternate_handling, AlternateHandling::NonIgnorable);
assert_eq!(resolved.case_first, CaseFirst::Off);
assert_eq!(resolved.max_variable, MaxVariable::Punctuation);
assert_eq!(resolved.case_level, CaseLevel::Off);
assert_eq!(resolved.numeric, Numeric::Off);
assert_eq!(resolved.backward_second_level, BackwardSecondLevel::Off);

assert_eq!(collator.compare("𝕒", "A"), core::cmp::Ordering::Less);
assert_eq!(collator.compare("coté", "côte"), core::cmp::Ordering::Less);
}

// TODO: Test languages that map to the root.
// The languages that map to root without script reordering are:
// ca (at least for now)
Expand Down Expand Up @@ -1499,6 +1615,3 @@ fn test_case_level() {
// TODO: Test Tibetan

// TODO: Test de-AT-u-co-phonebk vs de-DE-u-co-phonebk

// TODO: Test da defaulting to [caseFirst upper]
// TODO: Test fr-CA defaulting to backward second level
3 changes: 3 additions & 0 deletions ffi/capi/bindings/c/ICU4XCollator.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 50 additions & 0 deletions ffi/capi/bindings/c/ICU4XCollatorResolvedOptionsV1.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions ffi/capi/bindings/cpp/ICU4XCollator.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions ffi/capi/bindings/cpp/ICU4XCollator.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading