From 24761aaed62d0ec06eb369517e0aedc0a02900df Mon Sep 17 00:00:00 2001 From: Robert Bastian <4706271+robertbastian@users.noreply.github.com> Date: Tue, 4 Jun 2024 00:35:01 +0200 Subject: [PATCH] Follow-up #4985 (#4995) --- provider/datagen/src/provider.rs | 55 +++++++++++++++---- .../src/transform/cldr/characters/mod.rs | 2 +- .../src/transform/cldr/currency/essentials.rs | 2 +- .../src/transform/cldr/datetime/mod.rs | 2 +- .../src/transform/cldr/datetime/neo.rs | 35 ++++++------ .../transform/cldr/datetime/neo_skeleton.rs | 4 +- .../src/transform/cldr/datetime/week_data.rs | 6 +- .../src/transform/cldr/decimal/compact.rs | 8 +-- .../datagen/src/transform/cldr/decimal/mod.rs | 2 +- .../src/transform/cldr/decimal/symbols.rs | 4 +- .../transform/cldr/displaynames/language.rs | 4 +- .../src/transform/cldr/displaynames/region.rs | 2 +- .../src/transform/cldr/displaynames/script.rs | 2 +- .../transform/cldr/displaynames/variant.rs | 2 +- .../datagen/src/transform/cldr/list/mod.rs | 2 +- .../datagen/src/transform/cldr/percent/mod.rs | 2 +- .../person_names_format_data_providers.rs | 8 ++- .../datagen/src/transform/cldr/plurals/mod.rs | 4 +- .../src/transform/cldr/relativetime/mod.rs | 2 +- .../src/transform/cldr/time_zones/mod.rs | 2 +- .../src/transform/icuexport/collator/mod.rs | 9 ++- .../src/transform/segmenter/dictionary.rs | 2 +- .../datagen/src/transform/segmenter/lstm.rs | 8 ++- 23 files changed, 102 insertions(+), 67 deletions(-) diff --git a/provider/datagen/src/provider.rs b/provider/datagen/src/provider.rs index 7ef99dcc1a2..ab3aa736f9d 100644 --- a/provider/datagen/src/provider.rs +++ b/provider/datagen/src/provider.rs @@ -8,6 +8,7 @@ use elsa::sync::FrozenMap; use icu_provider::datagen::IterableDataProvider; use icu_provider::prelude::*; use source::{AbstractFs, SerdeCache}; +use std::borrow::Cow; use std::collections::HashSet; use std::fmt::Debug; use std::path::PathBuf; @@ -41,8 +42,17 @@ pub struct DatagenProvider { trie_type: TrieType, collation_han_database: CollationHanDatabase, #[allow(clippy::type_complexity)] // not as complex as it appears - supported_locales_cache: - Arc, DataError>>>>, + supported_requests_cache: Arc< + FrozenMap< + DataKey, + Box< + Result< + HashSet<(Cow<'static, DataLocale>, Cow<'static, DataKeyAttributes>)>, + DataError, + >, + >, + >, + >, } macro_rules! cb { @@ -109,7 +119,7 @@ impl DatagenProvider { segmenter_lstm_paths: None, trie_type: Default::default(), collation_han_database: Default::default(), - supported_locales_cache: Default::default(), + supported_requests_cache: Default::default(), } } @@ -348,19 +358,42 @@ impl std::fmt::Display for TrieType { } trait IterableDataProviderCached: DataProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError>; } +impl DatagenProvider { + #[allow(clippy::type_complexity)] // not as complex as it appears + fn populate_supported_requests_cache( + &self, + ) -> Result<&HashSet<(Cow<'static, DataLocale>, Cow<'static, DataKeyAttributes>)>, DataError> + where + DatagenProvider: IterableDataProviderCached, + { + self.supported_requests_cache + .insert_with(M::KEY, || { + Box::new(self.supported_requests_cached().map(|m| { + m.into_iter() + .map(|(k, v)| (Cow::Owned(k), Cow::Owned(v))) + .collect() + })) + }) + .as_ref() + .map_err(|&e| e) + } +} + impl IterableDataProvider for DatagenProvider where DatagenProvider: IterableDataProviderCached, { fn supported_requests(&self) -> Result, DataError> { - self.supported_locales_cache - .insert_with(M::KEY, || Box::new(self.supported_locales_cached())) - .clone() + Ok(self + .populate_supported_requests_cache()? + .iter() + .map(|(k, v)| (k.clone().into_owned(), v.clone().into_owned())) + .collect()) } fn supports_request( @@ -368,10 +401,8 @@ where locale: &DataLocale, key_attributes: &DataKeyAttributes, ) -> Result { - self.supported_locales_cache - .insert_with(M::KEY, || Box::new(self.supported_locales_cached())) - .as_ref() - .map_err(|e| *e) - .map(|v| v.contains(&(locale.clone(), key_attributes.clone()))) + Ok(self + .populate_supported_requests_cache()? + .contains(&(Cow::Borrowed(locale), Cow::Borrowed(key_attributes)))) } } diff --git a/provider/datagen/src/transform/cldr/characters/mod.rs b/provider/datagen/src/transform/cldr/characters/mod.rs index ed203f81dac..d4395fc18e5 100644 --- a/provider/datagen/src/transform/cldr/characters/mod.rs +++ b/provider/datagen/src/transform/cldr/characters/mod.rs @@ -48,7 +48,7 @@ macro_rules! exemplar_chars_impls { } impl IterableDataProviderCached<$data_marker_name> for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/currency/essentials.rs b/provider/datagen/src/transform/cldr/currency/essentials.rs index 812e80c6c69..554ecdf5edb 100644 --- a/provider/datagen/src/transform/cldr/currency/essentials.rs +++ b/provider/datagen/src/transform/cldr/currency/essentials.rs @@ -107,7 +107,7 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/datetime/mod.rs b/provider/datagen/src/transform/cldr/datetime/mod.rs index eedcb94879b..ae60d880390 100644 --- a/provider/datagen/src/transform/cldr/datetime/mod.rs +++ b/provider/datagen/src/transform/cldr/datetime/mod.rs @@ -228,7 +228,7 @@ macro_rules! impl_data_provider { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { let mut r = HashSet::new(); diff --git a/provider/datagen/src/transform/cldr/datetime/neo.rs b/provider/datagen/src/transform/cldr/datetime/neo.rs index fd5b49d966f..1ad96aa2b2f 100644 --- a/provider/datagen/src/transform/cldr/datetime/neo.rs +++ b/provider/datagen/src/transform/cldr/datetime/neo.rs @@ -168,26 +168,27 @@ impl DatagenProvider { }) } - fn supported_locales_neo( + fn supported_requests_neo( &self, calendar: Value, keylengths: &'static [TinyAsciiStr<8>], ) -> Result, DataError> { - let mut r = HashSet::new(); - let cldr_cal = supported_cals() .get(&calendar) .ok_or_else(|| DataErrorKind::MissingLocale.into_error())?; - r.extend(self.cldr()?.dates(cldr_cal).list_langs()?.flat_map(|lid| { - keylengths.iter().map(move |&length| { - ( - DataLocale::from(lid.clone()), - DataKeyAttributes::from_tinystr(length), - ) + Ok(self + .cldr()? + .dates(cldr_cal) + .list_langs()? + .flat_map(|lid| { + keylengths.iter().map(move |&length| { + ( + DataLocale::from(lid.clone()), + DataKeyAttributes::from_tinystr(length), + ) + }) }) - })); - - Ok(r) + .collect()) } } @@ -671,7 +672,7 @@ impl DataProvider for DatagenProvider { // and we can use a union of the H12/H24 key lengths arrays, instead checking for preferred hc // in timepattern_convert impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { let calendar = value!("gregory"); @@ -736,10 +737,10 @@ macro_rules! impl_symbols_datagen { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { - self.supported_locales_neo(value!($calendar), $lengths) + self.supported_requests_neo(value!($calendar), $lengths) } } }; @@ -754,10 +755,10 @@ macro_rules! impl_pattern_datagen { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { - self.supported_locales_neo(value!($calendar), $lengths) + self.supported_requests_neo(value!($calendar), $lengths) } } }; diff --git a/provider/datagen/src/transform/cldr/datetime/neo_skeleton.rs b/provider/datagen/src/transform/cldr/datetime/neo_skeleton.rs index 03f011c1a41..6e6212b34c6 100644 --- a/provider/datagen/src/transform/cldr/datetime/neo_skeleton.rs +++ b/provider/datagen/src/transform/cldr/datetime/neo_skeleton.rs @@ -206,7 +206,7 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { self.neo_time_skeleton_supported_locales() @@ -230,7 +230,7 @@ macro_rules! impl_neo_skeleton_datagen { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { self.neo_date_skeleton_supported_locales(&value!($calendar)) diff --git a/provider/datagen/src/transform/cldr/datetime/week_data.rs b/provider/datagen/src/transform/cldr/datetime/week_data.rs index 8cb0260e7e0..9b92e64908a 100644 --- a/provider/datagen/src/transform/cldr/datetime/week_data.rs +++ b/provider/datagen/src/transform/cldr/datetime/week_data.rs @@ -16,7 +16,7 @@ use icu_provider::prelude::*; use std::collections::HashSet; impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { let week_data: &cldr_serde::week_data::Resource = self @@ -200,10 +200,10 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { - IterableDataProviderCached::::supported_locales_cached(self) + IterableDataProviderCached::::supported_requests_cached(self) } } diff --git a/provider/datagen/src/transform/cldr/decimal/compact.rs b/provider/datagen/src/transform/cldr/decimal/compact.rs index 4b1b77f4021..1b7a463c717 100644 --- a/provider/datagen/src/transform/cldr/decimal/compact.rs +++ b/provider/datagen/src/transform/cldr/decimal/compact.rs @@ -108,18 +108,18 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { - self.supported_locales_for_numbers() + self.supported_requests_for_numbers() } } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { - self.supported_locales_for_numbers() + self.supported_requests_for_numbers() } } diff --git a/provider/datagen/src/transform/cldr/decimal/mod.rs b/provider/datagen/src/transform/cldr/decimal/mod.rs index 4a10c9bad58..f153a3ea3c1 100644 --- a/provider/datagen/src/transform/cldr/decimal/mod.rs +++ b/provider/datagen/src/transform/cldr/decimal/mod.rs @@ -77,7 +77,7 @@ impl DatagenProvider { .collect()) } - fn supported_locales_for_numbers( + fn supported_requests_for_numbers( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/decimal/symbols.rs b/provider/datagen/src/transform/cldr/decimal/symbols.rs index 572198f771c..f80c9718d8d 100644 --- a/provider/datagen/src/transform/cldr/decimal/symbols.rs +++ b/provider/datagen/src/transform/cldr/decimal/symbols.rs @@ -49,10 +49,10 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { - self.supported_locales_for_numbers() + self.supported_requests_for_numbers() } } diff --git a/provider/datagen/src/transform/cldr/displaynames/language.rs b/provider/datagen/src/transform/cldr/displaynames/language.rs index 3001827bdfc..241ce404eb1 100644 --- a/provider/datagen/src/transform/cldr/displaynames/language.rs +++ b/provider/datagen/src/transform/cldr/displaynames/language.rs @@ -52,7 +52,7 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self @@ -73,7 +73,7 @@ impl IterableDataProviderCached for DatagenProvide } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/displaynames/region.rs b/provider/datagen/src/transform/cldr/displaynames/region.rs index d9ffd103ca9..cc0b4ba5e2b 100644 --- a/provider/datagen/src/transform/cldr/displaynames/region.rs +++ b/provider/datagen/src/transform/cldr/displaynames/region.rs @@ -37,7 +37,7 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/displaynames/script.rs b/provider/datagen/src/transform/cldr/displaynames/script.rs index 4e0f2d4a9e0..349f3407638 100644 --- a/provider/datagen/src/transform/cldr/displaynames/script.rs +++ b/provider/datagen/src/transform/cldr/displaynames/script.rs @@ -37,7 +37,7 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/displaynames/variant.rs b/provider/datagen/src/transform/cldr/displaynames/variant.rs index 398a0d7cd29..6ff786afbaa 100644 --- a/provider/datagen/src/transform/cldr/displaynames/variant.rs +++ b/provider/datagen/src/transform/cldr/displaynames/variant.rs @@ -37,7 +37,7 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/list/mod.rs b/provider/datagen/src/transform/cldr/list/mod.rs index e376a3c9e54..81fd4845c22 100644 --- a/provider/datagen/src/transform/cldr/list/mod.rs +++ b/provider/datagen/src/transform/cldr/list/mod.rs @@ -128,7 +128,7 @@ macro_rules! implement { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/percent/mod.rs b/provider/datagen/src/transform/cldr/percent/mod.rs index b8a096d01d1..05e0ac0d21a 100644 --- a/provider/datagen/src/transform/cldr/percent/mod.rs +++ b/provider/datagen/src/transform/cldr/percent/mod.rs @@ -33,7 +33,7 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/personnames/person_names_format_data_providers.rs b/provider/datagen/src/transform/cldr/personnames/person_names_format_data_providers.rs index dce17524aab..1b5146f5342 100644 --- a/provider/datagen/src/transform/cldr/personnames/person_names_format_data_providers.rs +++ b/provider/datagen/src/transform/cldr/personnames/person_names_format_data_providers.rs @@ -7,11 +7,11 @@ use std::borrow::Cow; use std::collections::HashSet; use icu_experimental::personnames::provider::*; -use icu_provider::datagen::IterableDataProvider; use icu_provider::prelude::*; use zerovec::VarZeroVec; use crate::provider::transform::cldr::cldr_serde::personnames::person_name_format_json_struct::Resource; +use crate::provider::IterableDataProviderCached; impl DataProvider for crate::DatagenProvider { fn load(&self, req: DataRequest) -> Result, DataError> { @@ -34,8 +34,10 @@ impl DataProvider for crate::DatagenProvider { } } -impl IterableDataProvider for crate::DatagenProvider { - fn supported_requests(&self) -> Result, DataError> { +impl IterableDataProviderCached for crate::DatagenProvider { + fn supported_requests_cached( + &self, + ) -> Result, DataError> { Ok(self .cldr()? .personnames() diff --git a/provider/datagen/src/transform/cldr/plurals/mod.rs b/provider/datagen/src/transform/cldr/plurals/mod.rs index b6676f146cf..06663d60768 100644 --- a/provider/datagen/src/transform/cldr/plurals/mod.rs +++ b/provider/datagen/src/transform/cldr/plurals/mod.rs @@ -65,7 +65,7 @@ macro_rules! implement { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self @@ -124,7 +124,7 @@ impl DataProvider for DatagenProvider { } impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok(self diff --git a/provider/datagen/src/transform/cldr/relativetime/mod.rs b/provider/datagen/src/transform/cldr/relativetime/mod.rs index eadb280e3be..01e3b65dd20 100644 --- a/provider/datagen/src/transform/cldr/relativetime/mod.rs +++ b/provider/datagen/src/transform/cldr/relativetime/mod.rs @@ -98,7 +98,7 @@ macro_rules! make_data_provider { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached(&self) -> Result, DataError> { + fn supported_requests_cached(&self) -> Result, DataError> { Ok(self .cldr()? .dates("gregorian") diff --git a/provider/datagen/src/transform/cldr/time_zones/mod.rs b/provider/datagen/src/transform/cldr/time_zones/mod.rs index f72e80b8e66..c1d248f2feb 100644 --- a/provider/datagen/src/transform/cldr/time_zones/mod.rs +++ b/provider/datagen/src/transform/cldr/time_zones/mod.rs @@ -72,7 +72,7 @@ macro_rules! impl_data_provider { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached(&self) -> Result, DataError> { + fn supported_requests_cached(&self) -> Result, DataError> { if <$marker>::KEY == MetazonePeriodV1Marker::KEY { // MetazonePeriodV1 does not require localized time zone data Ok([Default::default()].into_iter().collect()) diff --git a/provider/datagen/src/transform/icuexport/collator/mod.rs b/provider/datagen/src/transform/icuexport/collator/mod.rs index 25c1bddf532..d0b1fd0a7a8 100644 --- a/provider/datagen/src/transform/icuexport/collator/mod.rs +++ b/provider/datagen/src/transform/icuexport/collator/mod.rs @@ -17,6 +17,7 @@ use icu_locale_core::subtags::Language; use icu_locale_core::subtags::Region; use icu_locale_core::subtags::Script; use icu_locale_core::LanguageIdentifier; +use icu_provider::datagen::IterableDataProvider; use icu_provider::prelude::*; use std::collections::HashSet; use std::convert::TryFrom; @@ -111,10 +112,8 @@ impl DataProvider for DatagenProvider { } } -impl IterableDataProviderCached for DatagenProvider { - fn supported_locales_cached( - &self, - ) -> Result, DataError> { +impl IterableDataProvider for DatagenProvider { + fn supported_requests(&self) -> Result, DataError> { Ok(HashSet::from_iter([Default::default()])) } } @@ -235,7 +234,7 @@ macro_rules! collation_provider { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached(&self) -> Result, DataError> { + fn supported_requests_cached(&self) -> Result, DataError> { Ok(self .icuexport()? .list(&format!( diff --git a/provider/datagen/src/transform/segmenter/dictionary.rs b/provider/datagen/src/transform/segmenter/dictionary.rs index e4045bab04e..cdff711ff18 100644 --- a/provider/datagen/src/transform/segmenter/dictionary.rs +++ b/provider/datagen/src/transform/segmenter/dictionary.rs @@ -48,7 +48,7 @@ macro_rules! implement { } impl IterableDataProviderCached<$marker> for DatagenProvider { - fn supported_locales_cached( + fn supported_requests_cached( &self, ) -> Result, DataError> { Ok($supported diff --git a/provider/datagen/src/transform/segmenter/lstm.rs b/provider/datagen/src/transform/segmenter/lstm.rs index c1a1b201e5f..081c8d0b530 100644 --- a/provider/datagen/src/transform/segmenter/lstm.rs +++ b/provider/datagen/src/transform/segmenter/lstm.rs @@ -4,7 +4,7 @@ //! This module contains provider implementations backed by LSTM segmentation data. -use crate::provider::DatagenProvider; +use crate::provider::{DatagenProvider, IterableDataProviderCached}; use icu_locale_core::langid; use icu_provider::datagen::IterableDataProvider; use icu_provider::prelude::*; @@ -206,8 +206,10 @@ impl DataProvider for DatagenProvider { } } -impl IterableDataProvider for DatagenProvider { - fn supported_requests(&self) -> Result, DataError> { +impl IterableDataProviderCached for DatagenProvider { + fn supported_requests_cached( + &self, + ) -> Result, DataError> { Ok([ "Burmese_codepoints_exclusive_model4_heavy", "Khmer_codepoints_exclusive_model4_heavy",