diff --git a/CHANGELOG.md b/CHANGELOG.md index e6875ad8976..bc7fe82c6ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Datagen shows elapsed time for keys that are slow to generate (https://github.com/unicode-org/icu4x/pull/4469) - Datagen performance improvement by caching supported locales (https://github.com/unicode-org/icu4x/pull/4470) - Never use fallback for baked segmentation data (https://github.com/unicode-org/icu4x/pull/4510) + - Propagate extension keywords and auxiliary keys to explicit locales (https://github.com/unicode-org/icu4x/pull/4533) - `icu_provider` - (Small breakage) `DataPayload::new_owned()` is no longer `const`, this was a mistake (https://github.com/unicode-org/icu4x/pull/4456) - `icu_provider_blob` diff --git a/provider/datagen/src/driver.rs b/provider/datagen/src/driver.rs index 3c0b5323d8e..568597d1098 100644 --- a/provider/datagen/src/driver.rs +++ b/provider/datagen/src/driver.rs @@ -468,13 +468,68 @@ impl DatagenDriver { } } +struct ExplicitImplicitLocaleSets { + explicit: HashSet, + implicit: HashSet, +} + +/// Resolves the set of explicit langids and the supported locales into two sets of locales: +/// +/// - `explicit` contains the explicit langids but with aux keys and extension keywords included. +/// For example, if `ar-SA` is requested (explicit langid), and `ar` and `ar-u-nu-latn` are supported, +/// then `ar-SA` and `ar-SA-u-nu-latn` will be returned as `explicit`. +/// - `implcit` contains all supported locales reachable by fallback from an `explicit` locale. +/// These locales can be included without increasing data payload size. +fn make_explicit_implicit_sets( + key: DataKey, + explicit_langids: &HashSet, + supported_map: &HashMap>, + fallbacker: &Lazy< + Result, + impl FnOnce() -> Result, + >, +) -> Result { + let mut implicit = HashSet::new(); + // TODO: Make including the default locale configurable + implicit.insert(DataLocale::default()); + + let mut explicit: HashSet = Default::default(); + for explicit_langid in explicit_langids.iter() { + explicit.insert(explicit_langid.into()); + if let Some(locales) = supported_map.get(explicit_langid) { + explicit.extend(locales.iter().cloned()); // adds ar-EG-u-nu-latn + } + if explicit_langid == &LanguageIdentifier::UND { + continue; + } + let fallbacker = fallbacker.as_ref().map_err(|e| *e)?; + let fallbacker_with_config = fallbacker.for_config(key.fallback_config()); + let mut iter = fallbacker_with_config.fallback_for(explicit_langid.into()); + while !iter.get().is_und() { + implicit.insert(iter.get().clone()); + // Inherit aux keys and extension keywords from parent locales + let iter_langid = iter.get().get_langid(); + if let Some(locales) = supported_map.get(&iter_langid) { + implicit.extend(locales.iter().cloned()); // adds ar-u-nu-latn + for locale in locales { + let mut morphed_locale = locale.clone(); + morphed_locale.set_langid(explicit_langid.clone()); + explicit.insert(morphed_locale); // adds ar-SA-u-nu-latn + } + } + iter.step(); + } + } + Ok(ExplicitImplicitLocaleSets { explicit, implicit }) +} + /// Selects the maximal set of locales to export based on a [`DataKey`] and this datagen /// provider's options bag. The locales may be later optionally deduplicated for fallback. fn select_locales_for_key( provider: &dyn ExportableProvider, key: DataKey, fallback: FallbackMode, - locales: Option<&HashSet>, + explicit_langids: Option<&HashSet>, additional_collations: &HashSet, segmenter_models: &[String], fallbacker: &Lazy< @@ -482,28 +537,43 @@ fn select_locales_for_key( impl FnOnce() -> Result, >, ) -> Result, DataError> { - let mut result = provider + // A map from langid to data locales. Keys that have aux keys or extension keywords + // may have multiple data locales per langid. + let mut supported_map: HashMap> = Default::default(); + for locale in provider .supported_locales_for_key(key) .map_err(|e| e.with_key(key))? - .into_iter() - .collect::>(); + { + use std::collections::hash_map::Entry; + match supported_map.entry(locale.get_langid()) { + Entry::Occupied(mut entry) => entry.get_mut().insert(locale), + Entry::Vacant(entry) => entry.insert(Default::default()).insert(locale), + }; + } if key == icu_segmenter::provider::DictionaryForWordOnlyAutoV1Marker::KEY || key == icu_segmenter::provider::DictionaryForWordLineExtendedV1Marker::KEY { - result.retain(|locale| { - let model = crate::transform::segmenter::dictionary::data_locale_to_model_name(locale); - segmenter_models.iter().any(|m| Some(m.as_ref()) == model) + supported_map.retain(|_, locales| { + locales.retain(|locale| { + let model = + crate::transform::segmenter::dictionary::data_locale_to_model_name(locale); + segmenter_models.iter().any(|m| Some(m.as_ref()) == model) + }); + !locales.is_empty() }); // Don't perform additional locale filtering - return Ok(result); + return Ok(supported_map.into_values().flatten().collect()); } else if key == icu_segmenter::provider::LstmForWordLineAutoV1Marker::KEY { - result.retain(|locale| { - let model = crate::transform::segmenter::lstm::data_locale_to_model_name(locale); - segmenter_models.iter().any(|m| Some(m.as_ref()) == model) + supported_map.retain(|_, locales| { + locales.retain(|locale| { + let model = crate::transform::segmenter::lstm::data_locale_to_model_name(locale); + segmenter_models.iter().any(|m| Some(m.as_ref()) == model) + }); + !locales.is_empty() }); // Don't perform additional locale filtering - return Ok(result); + return Ok(supported_map.into_values().flatten().collect()); } else if key == icu_collator::provider::CollationDataV1Marker::KEY || key == icu_collator::provider::CollationDiacriticsV1Marker::KEY || key == icu_collator::provider::CollationJamoV1Marker::KEY @@ -511,54 +581,50 @@ fn select_locales_for_key( || key == icu_collator::provider::CollationReorderingV1Marker::KEY || key == icu_collator::provider::CollationSpecialPrimariesV1Marker::KEY { - result.retain(|locale| { - let Some(collation) = locale - .get_unicode_ext(&key!("co")) - .and_then(|co| co.as_single_subtag().copied()) - else { - return true; - }; - additional_collations.contains(collation.as_str()) - || if collation.starts_with("search") { - additional_collations.contains("search*") - } else { - !["big5han", "gb2312"].contains(&collation.as_str()) - } + supported_map.retain(|_, locales| { + locales.retain(|locale| { + let Some(collation) = locale + .get_unicode_ext(&key!("co")) + .and_then(|co| co.as_single_subtag().copied()) + else { + return true; + }; + additional_collations.contains(collation.as_str()) + || if collation.starts_with("search") { + additional_collations.contains("search*") + } else { + !["big5han", "gb2312"].contains(&collation.as_str()) + } + }); + !locales.is_empty() }); } - result = match (locales, fallback) { + let result = match (explicit_langids, fallback) { // Case 1: `None` simply exports all supported locales for this key. - (None, _) => result, + (None, _) => supported_map.into_values().flatten().collect(), // Case 2: `FallbackMode::Preresolved` exports all supported locales whose langid matches // one of the explicit locales. This ensures extensions are included. In addition, any // explicit locales are added to the list, even if they themselves don't contain data; // fallback should be performed upon exporting. - (Some(explicit), FallbackMode::Preresolved) => result - .into_iter() - .chain(explicit.iter().map(|langid| langid.into())) - .filter(|locale| explicit.contains(&locale.get_langid())) - .collect(), + (Some(explicit_langids), FallbackMode::Preresolved) => { + let ExplicitImplicitLocaleSets { explicit, .. } = + make_explicit_implicit_sets(key, explicit_langids, &supported_map, fallbacker)?; + explicit + } // Case 3: All other modes resolve to the "ancestors and descendants" strategy. - (Some(explicit), _) => { - let include_und = explicit.contains(&LanguageIdentifier::UND); - let explicit: HashSet = explicit.iter().map(DataLocale::from).collect(); - let mut implicit = HashSet::new(); - // TODO: Make including the default locale configurable - implicit.insert(DataLocale::default()); + (Some(explicit_langids), _) => { + let include_und = explicit_langids.contains(&LanguageIdentifier::UND); + + let ExplicitImplicitLocaleSets { explicit, implicit } = + make_explicit_implicit_sets(key, explicit_langids, &supported_map, fallbacker)?; + let fallbacker = fallbacker.as_ref().map_err(|e| *e)?; let fallbacker_with_config = fallbacker.for_config(key.fallback_config()); - for locale in explicit.iter() { - let mut iter = fallbacker_with_config.fallback_for(locale.clone()); - while !iter.get().is_und() { - implicit.insert(iter.get().clone()); - iter.step(); - } - } - - result - .into_iter() + supported_map + .into_values() + .flatten() .chain(explicit.iter().cloned()) .filter(|locale_orig| { let mut locale = locale_orig.clone(); @@ -705,7 +771,7 @@ fn test_collation_filtering() { Some(&HashSet::from_iter([cas.language.clone()])), &HashSet::from_iter(cas.include_collations.iter().copied().map(String::from)), &[], - &once_cell::sync::Lazy::new(|| unreachable!()), + &once_cell::sync::Lazy::new(|| Ok(LocaleFallbacker::new_without_data())), ) .unwrap() .into_iter() diff --git a/provider/datagen/tests/data/json/collator/data@1/ar-EG-u-co-compat.json b/provider/datagen/tests/data/json/collator/data@1/ar-EG-u-co-compat.json new file mode 100644 index 00000000000..96cac81e831 --- /dev/null +++ b/provider/datagen/tests/data/json/collator/data@1/ar-EG-u-co-compat.json @@ -0,0 +1,653 @@ +{ + "trie": { + "header": { + "high_start": 65536, + "shifted12_high_start": 16, + "index3_null_offset": 26, + "data_null_offset": 0, + "null_value": 192, + "trie_type": "Small" + }, + "index": [ + 0, + 64, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 87, + 142, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 259, + 267, + 277, + 299, + 0, + 16, + 32, + 48, + 64, + 80, + 96, + 112, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 87, + 103, + 119, + 135, + 142, + 158, + 174, + 190, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 206, + 218, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 226, + 0, + 0, + 0, + 0, + 0, + 236, + 0, + 0, + 0, + 252, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 265, + 0, + 0, + 0, + 0, + 270, + 286, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 68, + 76, + 76, + 108, + 76, + 76, + 76, + 76, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 140, + 146, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 26, + 173, + 204, + 227, + 65518 + ], + "data": [ + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1714046469, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1727022597, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1727022598, + 1727022599, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1727022603, + 192, + 192, + 1727022602, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1714046471, + 1714046470, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 1727022601, + 1727022600, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192 + ] + }, + "ces": [], + "ce32s": [], + "contexts": [] +} diff --git a/provider/datagen/tests/data/json/collator/data@1/es-AR-u-co-trad.json b/provider/datagen/tests/data/json/collator/data@1/es-AR-u-co-trad.json new file mode 100644 index 00000000000..bb0010ef083 --- /dev/null +++ b/provider/datagen/tests/data/json/collator/data@1/es-AR-u-co-trad.json @@ -0,0 +1,421 @@ +{ + "trie": { + "header": { + "high_start": 55296, + "shifted12_high_start": 14, + "index3_null_offset": 2, + "data_null_offset": 0, + "null_value": 192, + "trie_type": "Small" + }, + "index": [ + 0, + 64, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 146, + 154, + 164, + 186, + 0, + 16, + 32, + 48, + 64, + 80, + 96, + 112, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 0, + 16, + 32, + 48, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 128, + 140, + 0, + 0, + 0, + 0, + 0, + 68, + 76, + 76, + 76, + 76, + 76, + 76, + 76, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 108, + 114 + ], + "data": [ + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 2249, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 75977, + 192, + 165577, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 223433, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 272585, + 192, + 329417, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 460, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192, + 192 + ] + }, + "ces": [], + "ce32s": [], + "contexts": [ + 11776, + 1436, + 1, + 72, + 61186, + 1415, + 104, + 61186, + 1350, + 16384, + 1436, + 1, + 76, + 65535, + 16642, + 1415, + 108, + 65535, + 16642, + 1350, + 17408, + 1436, + 48, + 771, + 65535, + 17666, + 1414, + 11776, + 1285, + 48, + 104, + 61186, + 1285, + 16384, + 1285, + 48, + 108, + 65535, + 16642, + 1285, + 17408, + 1285, + 48, + 771, + 65535, + 17666, + 1285 + ] +} diff --git a/provider/datagen/tests/data/json/collator/meta@1/ar-EG-u-co-compat.json b/provider/datagen/tests/data/json/collator/meta@1/ar-EG-u-co-compat.json new file mode 100644 index 00000000000..5929352390a --- /dev/null +++ b/provider/datagen/tests/data/json/collator/meta@1/ar-EG-u-co-compat.json @@ -0,0 +1,3 @@ +{ + "bits": 41 +} diff --git a/provider/datagen/tests/data/json/collator/meta@1/es-AR-u-co-trad.json b/provider/datagen/tests/data/json/collator/meta@1/es-AR-u-co-trad.json new file mode 100644 index 00000000000..ebe110aba66 --- /dev/null +++ b/provider/datagen/tests/data/json/collator/meta@1/es-AR-u-co-trad.json @@ -0,0 +1,3 @@ +{ + "bits": 9 +} diff --git a/provider/datagen/tests/data/json/collator/reord@1/ar-EG-u-co-compat.json b/provider/datagen/tests/data/json/collator/reord@1/ar-EG-u-co-compat.json new file mode 100644 index 00000000000..7ae79ec8d6c --- /dev/null +++ b/provider/datagen/tests/data/json/collator/reord@1/ar-EG-u-co-compat.json @@ -0,0 +1,262 @@ +{ + "min_high_no_reorder": 1728053248, + "reorder_table": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 39, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255 + ], + "reorder_ranges": [] +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-f24.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-f24.json new file mode 100644 index 00000000000..f937308e275 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-f24.json @@ -0,0 +1,3 @@ +{ + "pattern": "HH:mm:ss v" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-l24.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-l24.json new file mode 100644 index 00000000000..f937308e275 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-l24.json @@ -0,0 +1,3 @@ +{ + "pattern": "HH:mm:ss v" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-m24.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-m24.json new file mode 100644 index 00000000000..9788145c306 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-m24.json @@ -0,0 +1,3 @@ +{ + "pattern": "HH:mm:ss" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-s24.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-s24.json new file mode 100644 index 00000000000..f360b85fc30 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/en-ZA-x-s24.json @@ -0,0 +1,3 @@ +{ + "pattern": "HH:mm" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-f12.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-f12.json new file mode 100644 index 00000000000..bb7442a1859 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-f12.json @@ -0,0 +1,3 @@ +{ + "pattern": "h:mm:ss a v" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-l12.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-l12.json new file mode 100644 index 00000000000..bb7442a1859 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-l12.json @@ -0,0 +1,3 @@ +{ + "pattern": "h:mm:ss a v" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-m12.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-m12.json new file mode 100644 index 00000000000..a874c4da57e --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-m12.json @@ -0,0 +1,3 @@ +{ + "pattern": "h:mm:ss a" +} diff --git a/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-s12.json b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-s12.json new file mode 100644 index 00000000000..7331777bec2 --- /dev/null +++ b/provider/datagen/tests/data/json/datetime/patterns/time@1/es-AR-x-s12.json @@ -0,0 +1,3 @@ +{ + "pattern": "h:mm a" +} diff --git a/provider/datagen/tests/data/postcard/fingerprints.csv b/provider/datagen/tests/data/postcard/fingerprints.csv index 3fc601c0e9b..d7396a17f94 100644 --- a/provider/datagen/tests/data/postcard/fingerprints.csv +++ b/provider/datagen/tests/data/postcard/fingerprints.csv @@ -4,6 +4,7 @@ calendar/japanese@1, und, 111B, b31e52deaf52706f calendar/japanext@1, und, 5216B, 6c20e216c8cd6e41 collator/data@1, ar, 8267B, fce742b37324adbe collator/data@1, ar-EG, 8267B, fce742b37324adbe +collator/data@1, ar-EG-u-co-compat, 1888B, e7b7e3fda37b8565 collator/data@1, ar-u-co-compat, 1888B, e7b7e3fda37b8565 collator/data@1, bn, 1304B, 4b0a44d6a365bcd1 collator/data@1, bn-u-co-trad, 11083B, 1d78781818d5ec49 @@ -13,6 +14,7 @@ collator/data@1, en-001, 126799B, a0ea0e0eecc34e27 collator/data@1, en-ZA, 126799B, a0ea0e0eecc34e27 collator/data@1, es, 1064B, 53d5d15868ead10a collator/data@1, es-AR, 1064B, 53d5d15868ead10a +collator/data@1, es-AR-u-co-trad, 1130B, 7f0603bad1cbb60d collator/data@1, es-u-co-trad, 1130B, 7f0603bad1cbb60d collator/data@1, fil, 1088B, f4b69509fc410230 collator/data@1, fr, 126799B, a0ea0e0eecc34e27 @@ -47,6 +49,7 @@ collator/dia@1, und, 160B, 8ace760351a33687 collator/jamo@1, und, 1026B, 8554e65df2b9cfbb collator/meta@1, ar, 1B, 9208c26164ee7a99 collator/meta@1, ar-EG, 1B, 9208c26164ee7a99 +collator/meta@1, ar-EG-u-co-compat, 1B, 9208c26164ee7a99 collator/meta@1, ar-u-co-compat, 1B, 9208c26164ee7a99 collator/meta@1, bn, 1B, 9208c26164ee7a99 collator/meta@1, bn-u-co-trad, 1B, 9208c26164ee7a99 @@ -56,6 +59,7 @@ collator/meta@1, en-001, 1B, 667dd5401e6fd800 collator/meta@1, en-ZA, 1B, 667dd5401e6fd800 collator/meta@1, es, 1B, 9aab82d56f3b362e collator/meta@1, es-AR, 1B, 9aab82d56f3b362e +collator/meta@1, es-AR-u-co-trad, 1B, 9aab82d56f3b362e collator/meta@1, es-u-co-trad, 1B, 9aab82d56f3b362e collator/meta@1, fil, 1B, 9aab82d56f3b362e collator/meta@1, fr, 1B, 667dd5401e6fd800 @@ -72,6 +76,7 @@ collator/meta@1, und-u-co-eor, 1B, 667dd5401e6fd800 collator/prim@1, und, 10B, 792009c72825eaba collator/reord@1, ar, 264B, 556a25539c4da116 collator/reord@1, ar-EG, 264B, 556a25539c4da116 +collator/reord@1, ar-EG-u-co-compat, 264B, 556a25539c4da116 collator/reord@1, ar-u-co-compat, 264B, 556a25539c4da116 collator/reord@1, bn, 268B, 99752b8b1cb4c37b collator/reord@1, bn-u-co-trad, 268B, 99752b8b1cb4c37b @@ -1591,12 +1596,16 @@ datetime/patterns/time@1, en-001-x-s, 17B, dd691ff921592b5 datetime/patterns/time@1, en-001-x-s24, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, en-ZA-x-f, 23B, 6a41da8c43bf6f45 datetime/patterns/time@1, en-ZA-x-f12, 29B, 7cab8ab71d571c6 +datetime/patterns/time@1, en-ZA-x-f24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, en-ZA-x-l, 23B, 2244959498606494 datetime/patterns/time@1, en-ZA-x-l12, 29B, 7cab8ab71d571c6 +datetime/patterns/time@1, en-ZA-x-l24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, en-ZA-x-m, 17B, d037d51a86bbe2 datetime/patterns/time@1, en-ZA-x-m12, 23B, 72dd914cf7818843 +datetime/patterns/time@1, en-ZA-x-m24, 17B, d037d51a86bbe2 datetime/patterns/time@1, en-ZA-x-s, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, en-ZA-x-s12, 17B, dd691ff921592b5 +datetime/patterns/time@1, en-ZA-x-s24, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, en-x-f, 29B, eab82b7bd4db3d8d datetime/patterns/time@1, en-x-f24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, en-x-l, 29B, 513d9b72aeff08ed @@ -1606,12 +1615,16 @@ datetime/patterns/time@1, en-x-m24, 17B, d037d51a86bbe2 datetime/patterns/time@1, en-x-s, 17B, dd691ff921592b5 datetime/patterns/time@1, en-x-s24, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, es-AR-x-f, 29B, eab82b7bd4db3d8d +datetime/patterns/time@1, es-AR-x-f12, 29B, 7cab8ab71d571c6 datetime/patterns/time@1, es-AR-x-f24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, es-AR-x-l, 29B, 513d9b72aeff08ed +datetime/patterns/time@1, es-AR-x-l12, 29B, 7cab8ab71d571c6 datetime/patterns/time@1, es-AR-x-l24, 23B, 1c9cddd11d68c33a datetime/patterns/time@1, es-AR-x-m, 23B, 72dd914cf7818843 +datetime/patterns/time@1, es-AR-x-m12, 23B, 72dd914cf7818843 datetime/patterns/time@1, es-AR-x-m24, 17B, d037d51a86bbe2 datetime/patterns/time@1, es-AR-x-s, 17B, dd691ff921592b5 +datetime/patterns/time@1, es-AR-x-s12, 17B, dd691ff921592b5 datetime/patterns/time@1, es-AR-x-s24, 11B, 7b06e5a4993a4e9a datetime/patterns/time@1, es-x-f, 29B, c1bdced58fc9f02f datetime/patterns/time@1, es-x-f12, 29B, 7cab8ab71d571c6 diff --git a/provider/datagen/tests/test-options.rs b/provider/datagen/tests/test-options.rs index 976fe0a4671..e4790df18af 100644 --- a/provider/datagen/tests/test-options.rs +++ b/provider/datagen/tests/test-options.rs @@ -290,6 +290,7 @@ fn explicit_hybrid() { .with_locales([ langid!("arc"), // Aramaic, not in supported list langid!("ar-EG"), + langid!("ar-SA"), langid!("en-GB"), langid!("es"), langid!("sr-ME"), @@ -323,16 +324,18 @@ fn explicit_hybrid() { ]), ); - // Explicit locales are "arc", "ar-EG", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" + // Explicit locales are "arc", "ar-EG", "ar-SA", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" let locales = [ "ar", // ancestor of ar-EG "ar-EG", // explicit locale "ar-EG-u-nu-latn", // descendant of ar-EG - // "ar-u-nu-latn", // ??? should this be included? - "arc", // Aramaic, inheriting from und - "en", // ancestor of en-GB - "en-001", // ancestor of en-GB - "en-GB", // explicit locale not in supported locales + "ar-SA", // explicit locale, inheriting from ar + "ar-SA-u-nu-latn", // extensions should be included (#4533) + "ar-u-nu-latn", // extensions should be included (#4533) + "arc", // Aramaic, inheriting from und + "en", // ancestor of en-GB + "en-001", // ancestor of en-GB + "en-GB", // explicit locale not in supported locales // "en-ZA", // not reachable "es", // explicit and supported "es-AR", // descendant of es @@ -356,6 +359,7 @@ fn explicit_runtime() { .with_locales([ langid!("arc"), // Aramaic, not in supported list langid!("ar-EG"), + langid!("ar-SA"), langid!("en-GB"), langid!("es"), langid!("sr-ME"), @@ -389,12 +393,15 @@ fn explicit_runtime() { ]), ); - // Explicit locales are "arc", "ar-EG", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" + // Explicit locales are "arc", "ar-EG", "ar-SA", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" let locales = [ "ar", // "ar-Arab-EG", (same as 'ar') // "ar-EG", (same as 'ar') "ar-EG-u-nu-latn", + // "ar-SA", (same as 'ar') + // "ar-SA-u-nu-latn", (same as 'ar-u-nu-latn') + "ar-u-nu-latn", // "arc", (same as 'und') // "en", (same as 'und') // "en-001", (same as 'und') @@ -420,6 +427,7 @@ fn explicit_preresolved() { .with_locales([ langid!("arc"), // Aramaic, not in supported list langid!("ar-EG"), + langid!("ar-SA"), langid!("en-GB"), langid!("es"), langid!("sr-ME"), @@ -453,10 +461,12 @@ fn explicit_preresolved() { ]), ); - // Explicit locales are "arc", "ar-EG", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" + // Explicit locales are "arc", "ar-EG", "ar-SA", "en-GB", "es", "sr-ME", "ru-Cyrl-RU" let locales = [ "ar-EG", "ar-EG-u-nu-latn", // extensions included even in preresolved mode + "ar-SA", + "ar-SA-u-nu-latn", // extensions included even in preresolved mode "arc", "en-GB", "es",