Skip to content

Commit

Permalink
Map -u-co- to provider variant
Browse files Browse the repository at this point in the history
  • Loading branch information
hsivonen committed Mar 31, 2022
1 parent cb9b12a commit c90040c
Showing 1 changed file with 78 additions and 7 deletions.
85 changes: 78 additions & 7 deletions experimental/collator/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ pub mod error;
pub mod provider;

extern crate alloc;
use crate::alloc::string::ToString;
use crate::provider::CollationDataV1Marker;
use crate::provider::CollationDiacriticsV1Marker;
use crate::provider::CollationJamoV1Marker;
Expand Down Expand Up @@ -2217,14 +2218,63 @@ impl<'data> Collator {
> + ?Sized,
{
let locale = locale.into();

// TODO: Alias mapping here.

let langid: icu_locid::LanguageIdentifier = locale.clone().into();

// TODO: Is there a compile-time macro for this?
let key = "co".parse::<icu_locid::extensions::unicode::Key>().unwrap();

// In the provider, the variant called "standard" in CLDR is represented
// as `variant: None`. It is the default for all but two languages:
// sv and zh. Since there are only two special cases, hard-coding them
// here for now instead of making the defaulting fancy and data driven.
// The Swedish naming seems ad hoc from
// https://unicode-org.atlassian.net/browse/CLDR-679 .
let variant = if let Some(extension) = locale.get_unicode_extension(&key) {
// Surely there should be a non-allocating way to get a
// &str out of a single-component extension.
let extension_string = extension.to_string();
match extension_string.as_str() {
"trad" => Some("traditional".into()),
"phonebk" => Some("phonebook".into()),
"dict" => Some("dictionary".into()),
"gb2312" => Some("gb2312han".into()),
"standard" => None,
_ => {
// XXX Test that the provider ignores non-matching
// variants.

// `Locale` should enforce that it's impossible
// to get these values that are longer than
// 8 ASCII characters:
debug_assert_ne!(extension_string, "traditional");
debug_assert_ne!(extension_string, "phonebook");
debug_assert_ne!(extension_string, "dictionary");
debug_assert_ne!(extension_string, "gb2312han");
Some(extension_string.into())
}
}
} else {
if &langid.language == &"zh" {
Some("pinyin".into())
} else if &langid.language == &"sv" {
Some("reformed".into())
} else {
None
}
};

// TODO: Handle POSIX, which is part of the language instead
// of the -u-co- extension.

let metadata_payload: DataPayload<crate::provider::CollationMetadataV1Marker> =
data_provider
.load_resource(&DataRequest {
options: ResourceOptions {
langid: Some(langid.clone()),
variant: None,
variant: variant.clone(),
},
metadata: Default::default(),
})?
Expand All @@ -2239,7 +2289,7 @@ impl<'data> Collator {
.load_resource(&DataRequest {
options: ResourceOptions {
langid: Some(langid.clone()),
variant: None,
variant: variant,
},
metadata: Default::default(),
})?
Expand Down Expand Up @@ -2267,6 +2317,8 @@ impl<'data> Collator {
} else {
Some(LanguageIdentifier::und())
},
// XXX: Check if this should go inside the condition
// above.
variant: None,
},
metadata: Default::default(),
Expand All @@ -2280,11 +2332,13 @@ impl<'data> Collator {
let jamo: DataPayload<CollationJamoV1Marker> = data_provider
.load_resource(&DataRequest {
options: ResourceOptions {
langid: if metadata.tailored_jamo() {
Some(langid.clone())
} else {
Some(LanguageIdentifier::und())
},
// TODO: load other jamo tables.
// langid: if metadata.tailored_jamo() {
// Some(langid.clone())
// } else {
// Some(LanguageIdentifier::und())
// },
langid: Some(LanguageIdentifier::und()),
variant: None,
},
metadata: Default::default(),
Expand Down Expand Up @@ -3580,6 +3634,23 @@ mod tests {
}
}

// TODO: This test should eventually test fallback
// TODO: Test Swedish and Chinese, also, since they have unusual
// variant defaults. (But are currently not part of the test data.)
#[test]
fn test_region_fallback() {
// There's no explicit fi-FI data.
let locale: Locale = "fi-u-co-standard".parse().unwrap();

// let locale: Locale = langid!("fi-FI").into();

let data_provider = icu_testdata::get_provider();

let collator: Collator =
Collator::try_new(locale, &data_provider, CollatorOptions::new()).unwrap();
assert_eq!(collator.compare("ä", "z"), Ordering::Greater);
}

// TODO: frcoll requires support for fr-CA

// TODO: Write a test for Bangla
Expand Down

0 comments on commit c90040c

Please sign in to comment.