From 5458916b2707aa11ff33688b3b5e70708724fb2c Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Fri, 30 Jun 2023 14:11:45 +0200 Subject: [PATCH] Compiled data cleanups (#3597) --- Cargo.lock | 5 +- Makefile.toml | 1 - components/collections/Cargo.toml | 2 +- components/collections/src/iterator_utils.rs | 208 +++++------- components/icu/Cargo.toml | 20 +- components/icu/README.md | 7 +- components/icu/examples/tui.rs | 10 +- components/icu/src/lib.rs | 7 +- components/list/src/provider/mod.rs | 4 +- components/locid_transform/README.md | 9 +- .../benches/locale_canonicalizer.rs | 6 +- .../locid_transform/src/canonicalizer.rs | 6 +- .../locid_transform/src/directionality.rs | 43 +-- components/locid_transform/src/expander.rs | 16 +- .../src/fallback/algorithms.rs | 9 +- .../locid_transform/src/fallback/mod.rs | 42 +-- components/locid_transform/src/lib.rs | 9 +- .../tests/locale_canonicalizer.rs | 6 +- components/normalizer/fuzz/Cargo.lock | 209 ++---------- components/normalizer/fuzz/Cargo.toml | 3 +- .../fuzz/fuzz_targets/compare_self.rs | 12 +- .../fuzz/fuzz_targets/compare_utf16.rs | 2 +- components/plurals/Cargo.toml | 1 - components/plurals/benches/parser.rs | 2 +- components/plurals/benches/pluralrules.rs | 26 +- .../plurals/examples/elevator_floors.rs | 5 +- components/plurals/examples/unread_emails.rs | 7 +- components/plurals/src/provider.rs | 4 +- components/plurals/tests/categories.rs | 3 +- components/plurals/tests/plurals.rs | 22 +- components/timezone/Cargo.toml | 1 - components/timezone/README.md | 3 +- components/timezone/src/lib.rs | 3 +- components/timezone/src/provider.rs | 4 +- components/timezone/src/time_zone.rs | 3 +- ffi/diplomat/tests/missing_apis.txt | 1 + provider/core/src/hello_world.rs | 31 +- provider/core/src/helpers.rs | 309 ------------------ provider/core/src/key.rs | 103 +++++- provider/core/src/lib.rs | 4 - tools/make/data.toml | 10 - 41 files changed, 332 insertions(+), 846 deletions(-) delete mode 100644 provider/core/src/helpers.rs diff --git a/Cargo.lock b/Cargo.lock index e479c385675..e7a46674b19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1321,7 +1321,6 @@ dependencies = [ "icu_provider", "icu_relativetime", "icu_segmenter", - "icu_testdata", "icu_timezone", "writeable", ] @@ -1507,7 +1506,7 @@ dependencies = [ "iai", "icu", "icu_benchmark_macros", - "icu_testdata", + "icu_properties", "postcard", "serde", "serde_json", @@ -1844,7 +1843,6 @@ dependencies = [ "icu_locid_transform", "icu_plurals_data", "icu_provider", - "icu_testdata", "serde", "serde_json", "zerovec", @@ -2070,7 +2068,6 @@ dependencies = [ "icu_calendar", "icu_locid", "icu_provider", - "icu_testdata", "icu_timezone_data", "serde", "tinystr", diff --git a/Makefile.toml b/Makefile.toml index 97e1d1cb138..0bd146552cd 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -98,7 +98,6 @@ description = "Run full data generation on latest CLDR and ICU" category = "CI" dependencies = [ "bakeddata-check", - "full-data", ] [tasks.ci-job-ffi] diff --git a/components/collections/Cargo.toml b/components/collections/Cargo.toml index e663629a3a3..291b070a652 100644 --- a/components/collections/Cargo.toml +++ b/components/collections/Cargo.toml @@ -46,7 +46,7 @@ criterion = "0.4" icu_benchmark_macros = { path = "../../tools/benchmark/macros" } iai = "0.1.1" icu = { path = "../icu", default-features = false } -icu_testdata = { path = "../../provider/testdata", default-features = false, features = ["icu_properties", "icu_locid_transform"] } +icu_properties = { path = "../properties", features = ["data"] } [features] std = [] diff --git a/components/collections/src/iterator_utils.rs b/components/collections/src/iterator_utils.rs index cfc4b7d5fe0..937c133351c 100644 --- a/components/collections/src/iterator_utils.rs +++ b/components/collections/src/iterator_utils.rs @@ -63,12 +63,11 @@ mod tests { use core::fmt::Debug; use icu::collections::codepointinvlist::CodePointInversionListBuilder; use icu::collections::codepointtrie::TrieValue; - use icu::properties::maps::{self, CodePointMapData}; - use icu::properties::sets::{self, CodePointSetData}; + use icu::properties::maps::{self, CodePointMapDataBorrowed}; + use icu::properties::sets::{self, CodePointSetDataBorrowed}; use icu::properties::{GeneralCategory, Script}; - fn test_set(d: CodePointSetData, name: &str) { - let data = d.as_borrowed(); + fn test_set(data: CodePointSetDataBorrowed<'static>, name: &str) { let mut builder = CodePointInversionListBuilder::new(); let mut builder_complement = CodePointInversionListBuilder::new(); @@ -86,8 +85,11 @@ mod tests { assert_eq!(set1, set2, "Set {name} failed to complement correctly"); } - fn test_map(d: &CodePointMapData, value: T, name: &str) { - let data = d.as_borrowed(); + fn test_map( + data: &CodePointMapDataBorrowed<'static, T>, + value: T, + name: &str, + ) { let mut builder = CodePointInversionListBuilder::new(); let mut builder_complement = CodePointInversionListBuilder::new(); @@ -112,145 +114,89 @@ mod tests { fn test_complement_sets() { // Stress test the RangeListIteratorComplementer logic by ensuring it works for // a whole bunch of binary properties - let dp = icu_testdata::unstable(); - test_set(sets::load_ascii_hex_digit(&dp).unwrap(), "ASCII_Hex_Digit"); - test_set(sets::load_alnum(&dp).unwrap(), "Alnum"); - test_set(sets::load_alphabetic(&dp).unwrap(), "Alphabetic"); - test_set(sets::load_bidi_control(&dp).unwrap(), "Bidi_Control"); - test_set(sets::load_bidi_mirrored(&dp).unwrap(), "Bidi_Mirrored"); - test_set(sets::load_blank(&dp).unwrap(), "Blank"); - test_set(sets::load_cased(&dp).unwrap(), "Cased"); - test_set(sets::load_case_ignorable(&dp).unwrap(), "Case_Ignorable"); - test_set( - sets::load_full_composition_exclusion(&dp).unwrap(), + test_set(sets::ascii_hex_digit(), "ASCII_Hex_Digit"); + test_set(sets::alnum(), "Alnum"); + test_set(sets::alphabetic(), "Alphabetic"); + test_set(sets::bidi_control(), "Bidi_Control"); + test_set(sets::bidi_mirrored(), "Bidi_Mirrored"); + test_set(sets::blank(), "Blank"); + test_set(sets::cased(), "Cased"); + test_set(sets::case_ignorable(), "Case_Ignorable"); + test_set( + sets::full_composition_exclusion(), "Full_Composition_Exclusion", ); + test_set(sets::changes_when_casefolded(), "Changes_When_Casefolded"); + test_set(sets::changes_when_casemapped(), "Changes_When_Casemapped"); test_set( - sets::load_changes_when_casefolded(&dp).unwrap(), - "Changes_When_Casefolded", - ); - test_set( - sets::load_changes_when_casemapped(&dp).unwrap(), - "Changes_When_Casemapped", - ); - test_set( - sets::load_changes_when_nfkc_casefolded(&dp).unwrap(), + sets::changes_when_nfkc_casefolded(), "Changes_When_NFKC_Casefolded", ); + test_set(sets::changes_when_lowercased(), "Changes_When_Lowercased"); + test_set(sets::changes_when_titlecased(), "Changes_When_Titlecased"); + test_set(sets::changes_when_uppercased(), "Changes_When_Uppercased"); + test_set(sets::dash(), "Dash"); + test_set(sets::deprecated(), "Deprecated"); test_set( - sets::load_changes_when_lowercased(&dp).unwrap(), - "Changes_When_Lowercased", - ); - test_set( - sets::load_changes_when_titlecased(&dp).unwrap(), - "Changes_When_Titlecased", - ); - test_set( - sets::load_changes_when_uppercased(&dp).unwrap(), - "Changes_When_Uppercased", - ); - test_set(sets::load_dash(&dp).unwrap(), "Dash"); - test_set(sets::load_deprecated(&dp).unwrap(), "Deprecated"); - test_set( - sets::load_default_ignorable_code_point(&dp).unwrap(), + sets::default_ignorable_code_point(), "Default_Ignorable_Code_Point", ); - test_set(sets::load_diacritic(&dp).unwrap(), "Diacritic"); - test_set( - sets::load_emoji_modifier_base(&dp).unwrap(), - "Emoji_Modifier_Base", - ); - test_set(sets::load_emoji_component(&dp).unwrap(), "Emoji_Component"); - test_set(sets::load_emoji_modifier(&dp).unwrap(), "Emoji_Modifier"); - test_set(sets::load_emoji(&dp).unwrap(), "Emoji"); - test_set( - sets::load_emoji_presentation(&dp).unwrap(), - "Emoji_Presentation", - ); - test_set(sets::load_extender(&dp).unwrap(), "Extender"); - test_set( - sets::load_extended_pictographic(&dp).unwrap(), - "Extended_Pictographic", - ); - test_set(sets::load_graph(&dp).unwrap(), "Graph"); - test_set(sets::load_grapheme_base(&dp).unwrap(), "Grapheme_Base"); - test_set(sets::load_grapheme_extend(&dp).unwrap(), "Grapheme_Extend"); - test_set(sets::load_grapheme_link(&dp).unwrap(), "Grapheme_Link"); - test_set(sets::load_hex_digit(&dp).unwrap(), "Hex_Digit"); - test_set(sets::load_hyphen(&dp).unwrap(), "Hyphen"); - test_set(sets::load_id_continue(&dp).unwrap(), "Id_Continue"); - test_set(sets::load_ideographic(&dp).unwrap(), "Ideographic"); - test_set(sets::load_id_start(&dp).unwrap(), "Id_Start"); - test_set( - sets::load_ids_binary_operator(&dp).unwrap(), - "Ids_Binary_Operator", - ); - test_set( - sets::load_ids_trinary_operator(&dp).unwrap(), - "Ids_Trinary_Operator", - ); - test_set(sets::load_join_control(&dp).unwrap(), "Join_Control"); - test_set( - sets::load_logical_order_exception(&dp).unwrap(), - "Logical_Order_Exception", - ); - test_set(sets::load_lowercase(&dp).unwrap(), "Lowercase"); - test_set(sets::load_math(&dp).unwrap(), "Math"); - test_set( - sets::load_noncharacter_code_point(&dp).unwrap(), - "Noncharacter_Code_Point", - ); - test_set(sets::load_nfc_inert(&dp).unwrap(), "NFC_Inert"); - test_set(sets::load_nfd_inert(&dp).unwrap(), "NFD_Inert"); - test_set(sets::load_nfkc_inert(&dp).unwrap(), "NFKC_Inert"); - test_set(sets::load_nfkd_inert(&dp).unwrap(), "NFKD_Inert"); - test_set(sets::load_pattern_syntax(&dp).unwrap(), "Pattern_Syntax"); - test_set( - sets::load_pattern_white_space(&dp).unwrap(), - "Pattern_White_Space", - ); - test_set( - sets::load_prepended_concatenation_mark(&dp).unwrap(), + test_set(sets::diacritic(), "Diacritic"); + test_set(sets::emoji_modifier_base(), "Emoji_Modifier_Base"); + test_set(sets::emoji_component(), "Emoji_Component"); + test_set(sets::emoji_modifier(), "Emoji_Modifier"); + test_set(sets::emoji(), "Emoji"); + test_set(sets::emoji_presentation(), "Emoji_Presentation"); + test_set(sets::extender(), "Extender"); + test_set(sets::extended_pictographic(), "Extended_Pictographic"); + test_set(sets::graph(), "Graph"); + test_set(sets::grapheme_base(), "Grapheme_Base"); + test_set(sets::grapheme_extend(), "Grapheme_Extend"); + test_set(sets::grapheme_link(), "Grapheme_Link"); + test_set(sets::hex_digit(), "Hex_Digit"); + test_set(sets::hyphen(), "Hyphen"); + test_set(sets::id_continue(), "Id_Continue"); + test_set(sets::ideographic(), "Ideographic"); + test_set(sets::id_start(), "Id_Start"); + test_set(sets::ids_binary_operator(), "Ids_Binary_Operator"); + test_set(sets::ids_trinary_operator(), "Ids_Trinary_Operator"); + test_set(sets::join_control(), "Join_Control"); + test_set(sets::logical_order_exception(), "Logical_Order_Exception"); + test_set(sets::lowercase(), "Lowercase"); + test_set(sets::math(), "Math"); + test_set(sets::noncharacter_code_point(), "Noncharacter_Code_Point"); + test_set(sets::nfc_inert(), "NFC_Inert"); + test_set(sets::nfd_inert(), "NFD_Inert"); + test_set(sets::nfkc_inert(), "NFKC_Inert"); + test_set(sets::nfkd_inert(), "NFKD_Inert"); + test_set(sets::pattern_syntax(), "Pattern_Syntax"); + test_set(sets::pattern_white_space(), "Pattern_White_Space"); + test_set( + sets::prepended_concatenation_mark(), "Prepended_Concatenation_Mark", ); - test_set(sets::load_print(&dp).unwrap(), "Print"); - test_set(sets::load_quotation_mark(&dp).unwrap(), "Quotation_Mark"); - test_set(sets::load_radical(&dp).unwrap(), "Radical"); - test_set( - sets::load_regional_indicator(&dp).unwrap(), - "Regional_Indicator", - ); - test_set(sets::load_soft_dotted(&dp).unwrap(), "Soft_Dotted"); - test_set(sets::load_segment_starter(&dp).unwrap(), "Segment_Starter"); - test_set(sets::load_case_sensitive(&dp).unwrap(), "Case_Sensitive"); - test_set( - sets::load_sentence_terminal(&dp).unwrap(), - "Sentence_Terminal", - ); - test_set( - sets::load_terminal_punctuation(&dp).unwrap(), - "Terminal_Punctuation", - ); - test_set( - sets::load_unified_ideograph(&dp).unwrap(), - "Unified_Ideograph", - ); - test_set(sets::load_uppercase(&dp).unwrap(), "Uppercase"); - test_set( - sets::load_variation_selector(&dp).unwrap(), - "Variation_Selector", - ); - test_set(sets::load_white_space(&dp).unwrap(), "White_Space"); - test_set(sets::load_xdigit(&dp).unwrap(), "Xdigit"); - test_set(sets::load_xid_continue(&dp).unwrap(), "XID_Continue"); - test_set(sets::load_xid_start(&dp).unwrap(), "XID_Start"); + test_set(sets::print(), "Print"); + test_set(sets::quotation_mark(), "Quotation_Mark"); + test_set(sets::radical(), "Radical"); + test_set(sets::regional_indicator(), "Regional_Indicator"); + test_set(sets::soft_dotted(), "Soft_Dotted"); + test_set(sets::segment_starter(), "Segment_Starter"); + test_set(sets::case_sensitive(), "Case_Sensitive"); + test_set(sets::sentence_terminal(), "Sentence_Terminal"); + test_set(sets::terminal_punctuation(), "Terminal_Punctuation"); + test_set(sets::unified_ideograph(), "Unified_Ideograph"); + test_set(sets::uppercase(), "Uppercase"); + test_set(sets::variation_selector(), "Variation_Selector"); + test_set(sets::white_space(), "White_Space"); + test_set(sets::xdigit(), "Xdigit"); + test_set(sets::xid_continue(), "XID_Continue"); + test_set(sets::xid_start(), "XID_Start"); } #[test] fn test_complement_maps() { - let dp = icu_testdata::unstable(); - let gc = maps::load_general_category(&dp).unwrap(); - let script = maps::load_script(&dp).unwrap(); + let gc = maps::general_category(); + let script = maps::script(); test_map(&gc, GeneralCategory::UppercaseLetter, "gc"); test_map(&gc, GeneralCategory::OtherPunctuation, "gc"); test_map(&script, Script::Devanagari, "script"); diff --git a/components/icu/Cargo.toml b/components/icu/Cargo.toml index 8090e241938..060f531296f 100644 --- a/components/icu/Cargo.toml +++ b/components/icu/Cargo.toml @@ -45,15 +45,14 @@ icu_timezone = { version = "1.2.0", path = "../timezone", default-features = fal # Experimental components icu_casemapping = { version = "0.7.1", path = "../../experimental/casemapping", default-features = false, optional = true } +icu_compactdecimal = { version = "0.2.0", path = "../../experimental/compactdecimal", default-features = false, optional = true } icu_displaynames = { version = "0.10.0", path = "../../experimental/displaynames", default-features = false, optional = true } icu_relativetime = { version = "0.1.0", path = "../../experimental/relativetime", default-features = false, optional = true } -icu_compactdecimal = { version = "0.2.0", path = "../../experimental/compactdecimal", default-features = false, optional = true } # For docs links icu_provider = { version = "1.2.0", path = "../../provider/core", default-features = false } [dev-dependencies] -icu_testdata = { path = "../../provider/testdata" } writeable = { path = "../../utils/writeable" } [features] @@ -73,8 +72,8 @@ std = [ "icu_locid/std", "icu_normalizer/std", "icu_plurals/std", - "icu_relativetime?/std", "icu_properties/std", + "icu_relativetime?/std", "icu_segmenter/std", "icu_timezone/std", ] @@ -91,14 +90,27 @@ serde = [ "icu_locid/serde", "icu_normalizer/serde", "icu_plurals/serde", - "icu_relativetime?/serde", "icu_properties/serde", + "icu_relativetime?/serde", "icu_segmenter/serde", "icu_timezone/serde", ] data = [ + "icu_calendar/data", + "icu_casemapping?/data", + "icu_collator/data", + "icu_compactdecimal?/data", + "icu_datetime/data", + "icu_decimal/data", + "icu_displaynames?/data", "icu_list/data", + "icu_locid_transform/data", + "icu_normalizer/data", "icu_plurals/data", + "icu_properties/data", + "icu_relativetime?/data", + "icu_segmenter/data", + "icu_timezone/data", ] serde_human = [ "icu_list/serde_human" diff --git a/components/icu/README.md b/components/icu/README.md index c00cd839bf2..a8d9324717c 100644 --- a/components/icu/README.md +++ b/components/icu/README.md @@ -33,9 +33,6 @@ by `ICU4X` in separate crates: The data that is required by these providers (in `BakedDataProvider`'s case, the provider itself) can be generated and customized using the [`icu_datagen`] crate. -The following example uses the [`icu_testdata`] crate, which contains prepackaged data providers -for a small set of locales. - ## Example ```rust @@ -50,8 +47,7 @@ let options = length::Bag::from_date_time_style( ) .into(); -let dtf = DateTimeFormatter::try_new_unstable( - &icu_testdata::unstable(), +let dtf = DateTimeFormatter::try_new( &locale!("es").into(), options, ) @@ -101,7 +97,6 @@ There are additional features that, when enabled on specific crates, enable func [`DataPayload`]: icu_provider::DataPayload [`FsDataProvider`]: https://docs.rs/icu_provider_fs/latest/icu_provider_fs/struct.FsDataProvider.html [`BlobDataProvider`]: https://docs.rs/icu_provider_blob/latest/icu_provider_blob/struct.BlobDataProvider.html -[`icu_testdata`]: https://docs.rs/icu_testdata/latest/icu_testdata/ [`icu_provider_adapters`]: https://docs.rs/icu_provider_adapters/latest/icu_provider_adapters/ [`icu_datagen`]: https://docs.rs/icu_datagen/latest/icu_datagen/ [`Locale`]: crate::locid::Locale diff --git a/components/icu/examples/tui.rs b/components/icu/examples/tui.rs index 33b7d7d8f2a..e03e6e40df1 100644 --- a/components/icu/examples/tui.rs +++ b/components/icu/examples/tui.rs @@ -46,8 +46,7 @@ fn main(_argc: isize, _argv: *const *const u8) -> isize { print(format!("User: {user_name}")); { - let dtf = TypedZonedDateTimeFormatter::::try_new_unstable( - &icu_testdata::unstable(), + let dtf = TypedZonedDateTimeFormatter::::try_new( &locale.into(), DateTimeFormatterOptions::default(), TimeZoneFormatterOptions::default(), @@ -77,11 +76,8 @@ fn main(_argc: isize, _argv: *const *const u8) -> isize { } { - let pr = PluralRules::try_new_cardinal_unstable( - &icu_testdata::unstable(), - &locale!("en").into(), - ) - .expect("Failed to create PluralRules."); + let pr = PluralRules::try_new_cardinal(&locale!("en").into()) + .expect("Failed to create PluralRules."); match pr.category_for(email_count) { PluralCategory::One => print("Note: You have one unread email."), diff --git a/components/icu/src/lib.rs b/components/icu/src/lib.rs index e375c0662ae..b404998fd2b 100644 --- a/components/icu/src/lib.rs +++ b/components/icu/src/lib.rs @@ -35,9 +35,6 @@ //! The data that is required by these providers (in `BakedDataProvider`'s case, the provider itself) can be //! generated and customized using the [`icu_datagen`] crate. //! -//! The following example uses the [`icu_testdata`] crate, which contains prepackaged data providers -//! for a small set of locales. -//! //! # Example //! //! ``` @@ -52,8 +49,7 @@ //! ) //! .into(); //! -//! let dtf = DateTimeFormatter::try_new_unstable( -//! &icu_testdata::unstable(), +//! let dtf = DateTimeFormatter::try_new( //! &locale!("es").into(), //! options, //! ) @@ -103,7 +99,6 @@ //! [`DataPayload`]: icu_provider::DataPayload //! [`FsDataProvider`]: https://docs.rs/icu_provider_fs/latest/icu_provider_fs/struct.FsDataProvider.html //! [`BlobDataProvider`]: https://docs.rs/icu_provider_blob/latest/icu_provider_blob/struct.BlobDataProvider.html -//! [`icu_testdata`]: https://docs.rs/icu_testdata/latest/icu_testdata/ //! [`icu_provider_adapters`]: https://docs.rs/icu_provider_adapters/latest/icu_provider_adapters/ //! [`icu_datagen`]: https://docs.rs/icu_datagen/latest/icu_datagen/ //! [`Locale`]: crate::locid::Locale diff --git a/components/list/src/provider/mod.rs b/components/list/src/provider/mod.rs index 501b23f7598..074e84cea43 100644 --- a/components/list/src/provider/mod.rs +++ b/components/list/src/provider/mod.rs @@ -24,7 +24,9 @@ mod serde_dfa; pub use serde_dfa::SerdeDFA; #[cfg(feature = "data")] -pub(crate) struct Baked; +#[derive(Debug)] +/// Baked data +pub struct Baked; #[cfg(feature = "data")] const _: () = { diff --git a/components/locid_transform/README.md b/components/locid_transform/README.md index 1ded3ee55a1..250833e7925 100644 --- a/components/locid_transform/README.md +++ b/components/locid_transform/README.md @@ -23,8 +23,7 @@ This minimize method returns a new Locale that is the result of running the use icu::locid::Locale; use icu::locid_transform::{LocaleCanonicalizer, TransformResult}; -let lc = LocaleCanonicalizer::try_new_unstable(&icu_testdata::unstable()) - .expect("create failed"); +let lc = LocaleCanonicalizer::new(); let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc" .parse() @@ -37,8 +36,7 @@ assert_eq!(locale, "ja-Latn-alalc97-fonipa".parse::().unwrap()); use icu::locid::locale; use icu::locid_transform::{LocaleExpander, TransformResult}; -let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()) - .expect("create failed"); +let lc = LocaleExpander::new(); let mut locale = locale!("zh-CN"); assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); @@ -54,8 +52,7 @@ use icu::locid::locale; use icu::locid_transform::{LocaleExpander, TransformResult}; use writeable::assert_writeable_eq; -let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()) - .expect("create failed"); +let lc = LocaleExpander::new(); let mut locale = locale!("zh-Hans-CN"); assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); diff --git a/components/locid_transform/benches/locale_canonicalizer.rs b/components/locid_transform/benches/locale_canonicalizer.rs index 97e8f65469e..1ea8df6b39f 100644 --- a/components/locid_transform/benches/locale_canonicalizer.rs +++ b/components/locid_transform/benches/locale_canonicalizer.rs @@ -10,7 +10,7 @@ use icu_locid_transform::LocaleCanonicalizer; use icu_locid_transform::LocaleExpander; fn canonicalize_bench(c: &mut Criterion) { - let lc = LocaleCanonicalizer::try_new_unstable(&icu_testdata::unstable()).unwrap(); + let lc = LocaleCanonicalizer::new(); let mut group = c.benchmark_group("uncanonicalized"); @@ -39,7 +39,7 @@ fn canonicalize_bench(c: &mut Criterion) { } fn canonicalize_noop_bench(c: &mut Criterion) { - let lc = LocaleCanonicalizer::try_new_unstable(&icu_testdata::unstable()).unwrap(); + let lc = LocaleCanonicalizer::new(); let mut group = c.benchmark_group("canonicalized"); @@ -70,7 +70,7 @@ fn canonicalize_noop_bench(c: &mut Criterion) { } fn maximize_bench(c: &mut Criterion) { - let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()).unwrap(); + let lc = LocaleExpander::new(); let mut group = c.benchmark_group("likelysubtags"); diff --git a/components/locid_transform/src/canonicalizer.rs b/components/locid_transform/src/canonicalizer.rs index 9b21f152793..a47d071c8ee 100644 --- a/components/locid_transform/src/canonicalizer.rs +++ b/components/locid_transform/src/canonicalizer.rs @@ -32,8 +32,7 @@ use tinystr::TinyAsciiStr; /// use icu_locid::Locale; /// use icu_locid_transform::{LocaleCanonicalizer, TransformResult}; /// -/// let lc = LocaleCanonicalizer::try_new_unstable(&icu_testdata::unstable()) -/// .expect("create failed"); +/// let lc = LocaleCanonicalizer::new(); /// /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); @@ -303,8 +302,7 @@ impl LocaleCanonicalizer { /// use icu_locid::Locale; /// use icu_locid_transform::{LocaleCanonicalizer, TransformResult}; /// - /// let lc = LocaleCanonicalizer::try_new_unstable(&icu_testdata::unstable()) - /// .expect("create failed"); + /// let lc = LocaleCanonicalizer::new(); /// /// let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc".parse().unwrap(); /// assert_eq!(lc.canonicalize(&mut locale), TransformResult::Modified); diff --git a/components/locid_transform/src/directionality.rs b/components/locid_transform/src/directionality.rs index 62785010e81..36a3b9a8760 100644 --- a/components/locid_transform/src/directionality.rs +++ b/components/locid_transform/src/directionality.rs @@ -29,8 +29,7 @@ pub enum Direction { /// use icu_locid::locale; /// use icu_locid_transform::{Direction, LocaleDirectionality}; /// -/// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable()) -/// .expect("create failed"); +/// let ld = LocaleDirectionality::new(); /// /// assert_eq!(ld.get(&locale!("en")), Some(Direction::LeftToRight)); /// ``` @@ -46,12 +45,7 @@ impl LocaleDirectionality { /// A constructor which creates a [`LocaleDirectionality`]. #[cfg(feature = "data")] pub const fn new() -> Self { - Self { - script_direction: DataPayload::from_static_ref( - crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_SCRIPT_DIR_V1, - ), - expander: LocaleExpander::new(), - } + Self::new_with_expander(LocaleExpander::new()) } // Note: This is a custom impl because the bounds on `try_new_unstable` don't suffice @@ -89,31 +83,32 @@ impl LocaleDirectionality { /// /// For example, use this constructor if you wish to support all languages. /// - /// [📚 Help choosing a constructor](icu_provider::constructors) - ///
- /// ⚠️ The bounds on this function may change over time, including in SemVer minor releases. - ///
- /// /// # Examples /// /// ``` /// use icu_locid::locale; /// use icu_locid_transform::{Direction, LocaleDirectionality, LocaleExpander}; /// - /// let ld_default = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable()) - /// .expect("create failed"); + /// let ld_default = LocaleDirectionality::new(); /// /// assert_eq!(ld_default.get(&locale!("jbn")), None); /// - /// let expander = LocaleExpander::try_new_extended_unstable(&icu_testdata::unstable()) - /// .expect("create failed"); - /// let ld_extended = LocaleDirectionality::try_new_with_expander_unstable( - /// &icu_testdata::unstable(), - /// expander, - /// ).expect("create failed"); + /// let expander = LocaleExpander::new_extended(); + /// let ld_extended = LocaleDirectionality::new_with_expander(expander); /// /// assert_eq!(ld_extended.get(&locale!("jbn")), Some(Direction::RightToLeft)); /// ``` + #[cfg(feature = "data")] + pub const fn new_with_expander(expander: LocaleExpander) -> Self { + LocaleDirectionality { + script_direction: DataPayload::from_static_ref( + crate::provider::Baked::SINGLETON_LOCID_TRANSFORM_SCRIPT_DIR_V1, + ), + expander, + } + } + + #[doc = icu_provider::gen_any_buffer_unstable_docs!(UNSTABLE, Self::new_with_expander)] pub fn try_new_with_expander_unstable

( provider: &P, expander: LocaleExpander, @@ -146,8 +141,7 @@ impl LocaleDirectionality { /// use icu_locid::locale; /// use icu_locid_transform::{Direction, LocaleDirectionality}; /// - /// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable()) - /// .expect("create failed"); + /// let ld = LocaleDirectionality::new(); /// /// assert_eq!(ld.get(&locale!("en-US")), Some(Direction::LeftToRight)); /// @@ -163,8 +157,7 @@ impl LocaleDirectionality { /// use icu_locid::Locale; /// use icu_locid_transform::{Direction, LocaleDirectionality}; /// - /// let ld = LocaleDirectionality::try_new_unstable(&icu_testdata::unstable()) - /// .expect("create failed"); + /// let ld = LocaleDirectionality::new(); /// /// assert_eq!(ld.get(&Locale::from(Some(script!("Latn")))), Some(Direction::LeftToRight)); /// ``` diff --git a/components/locid_transform/src/expander.rs b/components/locid_transform/src/expander.rs index ef72035deac..cc31e70e5c3 100644 --- a/components/locid_transform/src/expander.rs +++ b/components/locid_transform/src/expander.rs @@ -29,8 +29,7 @@ use crate::TransformResult; /// use icu_locid::locale; /// use icu_locid_transform::{LocaleExpander, TransformResult}; /// -/// let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()) -/// .expect("create failed"); +/// let lc = LocaleExpander::new(); /// /// let mut locale = locale!("zh-CN"); /// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); @@ -47,8 +46,7 @@ use crate::TransformResult; /// use icu_locid::locale; /// use icu_locid_transform::{LocaleExpander, TransformResult}; /// -/// let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()) -/// .expect("create failed"); +/// let lc = LocaleExpander::new(); /// /// let mut locale = locale!("zh-Hans-CN"); /// assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); @@ -66,9 +64,7 @@ use crate::TransformResult; /// use icu_locid::locale; /// use icu_locid_transform::{LocaleExpander, TransformResult}; /// -/// let lc = -/// LocaleExpander::try_new_extended_unstable(&icu_testdata::unstable()) -/// .expect("create failed"); +/// let lc = LocaleExpander::new_extended(); /// /// let mut locale = locale!("atj"); /// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); @@ -366,8 +362,7 @@ impl LocaleExpander { /// use icu_locid::locale; /// use icu_locid_transform::{LocaleExpander, TransformResult}; /// - /// let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()) - /// .expect("create failed"); + /// let lc = LocaleExpander::new(); /// /// let mut locale = locale!("zh-CN"); /// assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); @@ -440,8 +435,7 @@ impl LocaleExpander { /// use icu_locid::locale; /// use icu_locid_transform::{LocaleExpander, TransformResult}; /// - /// let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()) - /// .expect("creation failed"); + /// let lc = LocaleExpander::new(); /// /// let mut locale = locale!("zh-Hans-CN"); /// assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); diff --git a/components/locid_transform/src/fallback/algorithms.rs b/components/locid_transform/src/fallback/algorithms.rs index 2baffa94610..6a52023ba6d 100644 --- a/components/locid_transform/src/fallback/algorithms.rs +++ b/components/locid_transform/src/fallback/algorithms.rs @@ -408,11 +408,10 @@ mod tests { ]; #[test] - #[cfg(feature = "serde")] fn test_fallback() { let fallbacker_no_data = LocaleFallbacker::new_without_data(); - let fallbacker_with_data = - LocaleFallbacker::try_new_with_buffer_provider(&icu_testdata::buffer()).unwrap(); + let fallbacker_no_data = fallbacker_no_data.as_borrowed(); + let fallbacker_with_data = LocaleFallbacker::new(); for cas in TEST_CASES { for (priority, expected_chain) in [ (FallbackPriority::Language, cas.expected_language_chain), @@ -424,9 +423,9 @@ mod tests { fallback_supplement: cas.fallback_supplement, }; let fallbacker = if cas.requires_data { - &fallbacker_with_data + fallbacker_with_data } else { - &fallbacker_no_data + fallbacker_no_data }; let mut it = fallbacker.fallback_for(config, Locale::from_str(cas.input).unwrap()); for &expected in expected_chain { diff --git a/components/locid_transform/src/fallback/mod.rs b/components/locid_transform/src/fallback/mod.rs index 1338205178b..bc2f6a3cfc3 100644 --- a/components/locid_transform/src/fallback/mod.rs +++ b/components/locid_transform/src/fallback/mod.rs @@ -17,7 +17,7 @@ //! use icu_locid_transform::fallback::LocaleFallbacker; //! //! // Set up a LocaleFallbacker with data. -//! let fallbacker = LocaleFallbacker::try_new_unstable(&icu_testdata::unstable()).expect("data"); +//! let fallbacker = LocaleFallbacker::new(); //! //! // Create a LocaleFallbackerIterator with a default configuration. //! // By default, uses language priority with no additional extension keywords. @@ -64,8 +64,7 @@ pub struct LocaleFallbackConfig { /// /// // Set up the fallback iterator. /// let fallbacker = - /// LocaleFallbacker::try_new_unstable(&icu_testdata::unstable()) - /// .expect("data"); + /// LocaleFallbacker::new(); /// let mut config = LocaleFallbackConfig::default(); /// config.priority = FallbackPriority::Language; /// let mut fallback_iterator = fallbacker.fallback_for(config, locale!("ca-ES-valencia")); @@ -95,8 +94,7 @@ pub struct LocaleFallbackConfig { /// /// // Set up the fallback iterator. /// let fallbacker = - /// LocaleFallbacker::try_new_unstable(&icu_testdata::unstable()) - /// .expect("data"); + /// LocaleFallbacker::new(); /// let mut config = LocaleFallbackConfig::default(); /// config.priority = FallbackPriority::Region; /// let mut fallback_iterator = fallbacker.fallback_for(config, locale!("ca-ES-valencia")); @@ -133,8 +131,7 @@ pub struct LocaleFallbackConfig { /// /// // Set up the fallback iterator. /// let fallbacker = - /// LocaleFallbacker::try_new_unstable(&icu_testdata::unstable()) - /// .expect("data"); + /// LocaleFallbacker::new(); /// let mut config = LocaleFallbackConfig::default(); /// config.extension_key = Some(icu_locid::extensions::unicode::key!("nu")); /// let mut fallback_iterator = fallbacker @@ -175,8 +172,7 @@ pub struct LocaleFallbackConfig { /// /// // Set up the fallback iterator. /// let fallbacker = - /// LocaleFallbacker::try_new_unstable(&icu_testdata::unstable()) - /// .expect("data"); + /// LocaleFallbacker::new(); /// let mut config = LocaleFallbackConfig::default(); /// config.priority = FallbackPriority::Collation; /// config.fallback_supplement = Some(FallbackSupplement::Collation); @@ -295,12 +291,14 @@ impl LocaleFallbacker { /// [📚 Help choosing a constructor](icu_provider::constructors) #[cfg(feature = "data")] #[allow(clippy::new_ret_no_self)] // keeping constructors together - pub const fn new() -> LocaleFallbackerBorrowed<'static> { - LocaleFallbackerBorrowed { + pub const fn new<'a>() -> LocaleFallbackerBorrowed<'a> { + let tickstatic = LocaleFallbackerBorrowed { likely_subtags: crate::provider::Baked::SINGLETON_FALLBACK_LIKELYSUBTAGS_V1, parents: crate::provider::Baked::SINGLETON_FALLBACK_PARENTS_V1, collation_supplement: Some(crate::provider::Baked::SINGLETON_FALLBACK_SUPPLEMENT_CO_V1), - } + }; + // Shitty covariance because the zeromaps confuse the compiler + unsafe { core::mem::transmute(tickstatic) } } icu_provider::gen_any_buffer_data_constructors!(locale: skip, options: skip, error: DataError, @@ -369,18 +367,22 @@ impl LocaleFallbacker { #[doc(hidden)] pub fn for_config(&self, config: LocaleFallbackConfig) -> LocaleFallbackerWithConfig { - LocaleFallbackerBorrowed { - likely_subtags: self.likely_subtags.get(), - parents: self.parents.get(), - collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()), - } - .for_config(config) + self.as_borrowed().for_config(config) } #[doc(hidden)] pub fn for_key(&self, data_key: DataKey) -> LocaleFallbackerWithConfig { self.for_config(data_key.into()) } + + #[doc(hidden)] + pub fn as_borrowed(&self) -> LocaleFallbackerBorrowed { + LocaleFallbackerBorrowed { + likely_subtags: self.likely_subtags.get(), + parents: self.parents.get(), + collation_supplement: self.collation_supplement.as_ref().map(|p| p.get()), + } + } } impl<'a> LocaleFallbackerBorrowed<'a> { @@ -395,7 +397,7 @@ impl<'a> LocaleFallbackerBorrowed<'a> { self, config: LocaleFallbackConfig, locale: impl Into, - ) -> LocaleFallbackIterator<'a, 'a> { + ) -> LocaleFallbackIterator<'a, 'static> { self.for_config(config).fallback_for(locale.into()) } @@ -413,7 +415,7 @@ impl<'a> LocaleFallbackerBorrowed<'a> { } impl<'a> LocaleFallbackerWithConfig<'a> { - pub fn fallback_for(&self, mut locale: DataLocale) -> LocaleFallbackIterator<'a, 'a> { + pub fn fallback_for(&self, mut locale: DataLocale) -> LocaleFallbackIterator<'a, 'static> { self.normalize(&mut locale); LocaleFallbackIterator { current: locale, diff --git a/components/locid_transform/src/lib.rs b/components/locid_transform/src/lib.rs index d5146e92d71..1fd40722248 100644 --- a/components/locid_transform/src/lib.rs +++ b/components/locid_transform/src/lib.rs @@ -25,8 +25,7 @@ //! use icu::locid::Locale; //! use icu::locid_transform::{LocaleCanonicalizer, TransformResult}; //! -//! let lc = LocaleCanonicalizer::try_new_unstable(&icu_testdata::unstable()) -//! .expect("create failed"); +//! let lc = LocaleCanonicalizer::new(); //! //! let mut locale: Locale = "ja-Latn-fonipa-hepburn-heploc" //! .parse() @@ -39,8 +38,7 @@ //! use icu::locid::locale; //! use icu::locid_transform::{LocaleExpander, TransformResult}; //! -//! let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()) -//! .expect("create failed"); +//! let lc = LocaleExpander::new(); //! //! let mut locale = locale!("zh-CN"); //! assert_eq!(lc.maximize(&mut locale), TransformResult::Modified); @@ -56,8 +54,7 @@ //! use icu::locid_transform::{LocaleExpander, TransformResult}; //! use writeable::assert_writeable_eq; //! -//! let lc = LocaleExpander::try_new_unstable(&icu_testdata::unstable()) -//! .expect("create failed"); +//! let lc = LocaleExpander::new(); //! //! let mut locale = locale!("zh-Hans-CN"); //! assert_eq!(lc.minimize(&mut locale), TransformResult::Modified); diff --git a/components/locid_transform/tests/locale_canonicalizer.rs b/components/locid_transform/tests/locale_canonicalizer.rs index 6ba06e295a5..06e360f53ca 100644 --- a/components/locid_transform/tests/locale_canonicalizer.rs +++ b/components/locid_transform/tests/locale_canonicalizer.rs @@ -11,7 +11,7 @@ use writeable::assert_writeable_eq; #[test] fn test_maximize() { - let lc = LocaleExpander::try_new_extended_unstable(&icu_testdata::unstable()).unwrap(); + let lc = LocaleExpander::new_extended(); let path = "./tests/fixtures/maximize.json"; let testcases: Vec = @@ -35,7 +35,7 @@ fn test_maximize() { #[test] fn test_minimize() { - let lc = LocaleExpander::try_new_extended_unstable(&icu_testdata::unstable()).unwrap(); + let lc = LocaleExpander::new_extended(); let path = "./tests/fixtures/minimize.json"; let testcases: Vec = @@ -59,7 +59,7 @@ fn test_minimize() { #[test] fn test_canonicalize() { - let lc = LocaleCanonicalizer::try_new_unstable(&icu_testdata::unstable()).unwrap(); + let lc = LocaleCanonicalizer::new(); let path = "./tests/fixtures/canonicalize.json"; let testcases: Vec = diff --git a/components/normalizer/fuzz/Cargo.lock b/components/normalizer/fuzz/Cargo.lock index ddefed64959..ff71ab04b09 100644 --- a/components/normalizer/fuzz/Cargo.lock +++ b/components/normalizer/fuzz/Cargo.lock @@ -140,12 +140,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "either" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" - [[package]] name = "encoding_rs" version = "0.8.32" @@ -168,15 +162,6 @@ dependencies = [ "termcolor", ] -[[package]] -name = "fixed_decimal" -version = "0.5.3" -dependencies = [ - "displaydoc", - "smallvec", - "writeable", -] - [[package]] name = "glob" version = "0.3.1" @@ -201,85 +186,16 @@ dependencies = [ "quick-error", ] -[[package]] -name = "icu_calendar" -version = "1.2.0" -dependencies = [ - "displaydoc", - "icu_locid", - "icu_provider", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_collator" -version = "1.2.0" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid", - "icu_normalizer", - "icu_properties", - "icu_provider", - "smallvec", - "utf16_iter", - "utf8_iter", - "zerovec", -] - [[package]] name = "icu_collections" version = "1.2.0" dependencies = [ "displaydoc", - "serde", "yoke", "zerofrom", "zerovec", ] -[[package]] -name = "icu_datetime" -version = "1.2.0" -dependencies = [ - "displaydoc", - "either", - "fixed_decimal", - "icu_calendar", - "icu_decimal", - "icu_locid", - "icu_plurals", - "icu_provider", - "icu_timezone", - "smallvec", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_decimal" -version = "1.2.0" -dependencies = [ - "displaydoc", - "fixed_decimal", - "icu_locid", - "icu_provider", - "writeable", -] - -[[package]] -name = "icu_list" -version = "1.2.0" -dependencies = [ - "displaydoc", - "icu_provider", - "regex-automata", - "writeable", -] - [[package]] name = "icu_locid" version = "1.2.0" @@ -293,24 +209,29 @@ dependencies = [ [[package]] name = "icu_locid_transform" -version = "1.2.0" +version = "1.2.1" dependencies = [ "displaydoc", "icu_locid", + "icu_locid_transform_data", "icu_provider", "tinystr", "zerovec", ] +[[package]] +name = "icu_locid_transform_data" +version = "0.0.0" + [[package]] name = "icu_normalizer" version = "1.2.0" dependencies = [ "displaydoc", "icu_collections", + "icu_normalizer_data", "icu_properties", "icu_provider", - "serde", "smallvec", "utf16_iter", "utf8_iter", @@ -324,7 +245,6 @@ version = "0.0.0" dependencies = [ "encoding_rs", "icu_normalizer", - "icu_testdata", "libfuzzer-sys", "rust_icu_sys", "rust_icu_unorm2", @@ -334,15 +254,8 @@ dependencies = [ ] [[package]] -name = "icu_plurals" -version = "1.2.0" -dependencies = [ - "displaydoc", - "fixed_decimal", - "icu_locid", - "icu_provider", - "zerovec", -] +name = "icu_normalizer_data" +version = "0.0.0" [[package]] name = "icu_properties" @@ -350,12 +263,17 @@ version = "1.2.0" dependencies = [ "displaydoc", "icu_collections", + "icu_locid_transform", + "icu_properties_data", "icu_provider", - "serde", "tinystr", "zerovec", ] +[[package]] +name = "icu_properties_data" +version = "0.0.0" + [[package]] name = "icu_provider" version = "1.2.0" @@ -363,7 +281,6 @@ dependencies = [ "displaydoc", "icu_locid", "icu_provider_macros", - "serde", "stable_deref_trait", "writeable", "yoke", @@ -371,70 +288,13 @@ dependencies = [ "zerovec", ] -[[package]] -name = "icu_provider_adapters" -version = "1.2.0" -dependencies = [ - "icu_locid", - "icu_provider", - "tinystr", - "yoke", - "zerovec", -] - [[package]] name = "icu_provider_macros" version = "1.2.0" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", -] - -[[package]] -name = "icu_segmenter" -version = "1.2.0" -dependencies = [ - "displaydoc", - "icu_collections", - "icu_locid", - "icu_provider", - "utf8_iter", - "zerovec", -] - -[[package]] -name = "icu_testdata" -version = "1.2.0" -dependencies = [ - "icu_calendar", - "icu_collator", - "icu_collections", - "icu_datetime", - "icu_decimal", - "icu_list", - "icu_locid", - "icu_locid_transform", - "icu_normalizer", - "icu_plurals", - "icu_properties", - "icu_provider", - "icu_provider_adapters", - "icu_segmenter", - "icu_timezone", - "zerovec", -] - -[[package]] -name = "icu_timezone" -version = "1.2.0" -dependencies = [ - "displaydoc", - "icu_calendar", - "icu_locid", - "icu_provider", - "tinystr", - "zerovec", + "syn 2.0.15", ] [[package]] @@ -567,15 +427,6 @@ dependencies = [ "regex-syntax", ] -[[package]] -name = "regex-automata" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9368763f5a9b804326f3af749e16f9abf378d227bcdee7634b13d8f17793782" -dependencies = [ - "memchr", -] - [[package]] name = "regex-syntax" version = "0.6.29" @@ -686,20 +537,6 @@ name = "serde" version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.160" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.15", -] [[package]] name = "shlex" @@ -749,13 +586,13 @@ dependencies = [ [[package]] name = "synstructure" -version = "0.12.6" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" +checksum = "285ba80e733fac80aa4270fbcdf83772a79b80aa35c97075320abfee4a915b06" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.15", "unicode-xid", ] @@ -802,7 +639,6 @@ name = "tinystr" version = "0.7.1" dependencies = [ "displaydoc", - "serde", "zerovec", ] @@ -914,7 +750,7 @@ version = "0.7.1" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.15", "synstructure", ] @@ -931,7 +767,7 @@ version = "0.1.2" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.15", "synstructure", ] @@ -939,7 +775,6 @@ dependencies = [ name = "zerovec" version = "0.9.4" dependencies = [ - "serde", "yoke", "zerofrom", "zerovec-derive", @@ -951,6 +786,6 @@ version = "0.9.4" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.15", "synstructure", ] diff --git a/components/normalizer/fuzz/Cargo.toml b/components/normalizer/fuzz/Cargo.toml index fce1e235ca0..ba696df628e 100644 --- a/components/normalizer/fuzz/Cargo.toml +++ b/components/normalizer/fuzz/Cargo.toml @@ -17,8 +17,7 @@ rust_icu_unorm2 = { version = "3", features = ["use-bindgen", "icu_config"] } rust_icu_sys = { version = "3", features = ["use-bindgen", "icu_config"] } rust_icu_ustring = { version = "3", features = ["use-bindgen", "icu_config"] } encoding_rs = "0.8.31" -icu_testdata = { path = "../../../provider/testdata" } -icu_normalizer = { path = "..", features = ["serde"] } +icu_normalizer = { path = "..", features = ["data"] } utf8_iter = "1.0.1" utf16_iter = "1.0.3" diff --git a/components/normalizer/fuzz/fuzz_targets/compare_self.rs b/components/normalizer/fuzz/fuzz_targets/compare_self.rs index fb859b0bd9c..3dbc6355cf6 100644 --- a/components/normalizer/fuzz/fuzz_targets/compare_self.rs +++ b/components/normalizer/fuzz/fuzz_targets/compare_self.rs @@ -13,14 +13,10 @@ fuzz_target!(|data: &[u8]| { let well_formed = String::from_utf8_lossy(data); let utf16: Vec = well_formed.encode_utf16().collect(); - let nfd = - DecomposingNormalizer::try_new_nfd_unstable(&icu_testdata::unstable()).unwrap(); - let nfkd = - DecomposingNormalizer::try_new_nfkd_unstable(&icu_testdata::unstable()).unwrap(); - let nfc = - ComposingNormalizer::try_new_nfc_unstable(&icu_testdata::unstable()).unwrap(); - let nfkc = - ComposingNormalizer::try_new_nfkc_unstable(&icu_testdata::unstable()).unwrap(); + let nfd = DecomposingNormalizer::new_nfd(); + let nfkd = DecomposingNormalizer::new_nfkd(); + let nfc = ComposingNormalizer::new_nfc(); + let nfkc = ComposingNormalizer::new_nfkc(); // Not macroizing these to get nice line numbers by default. diff --git a/components/normalizer/fuzz/fuzz_targets/compare_utf16.rs b/components/normalizer/fuzz/fuzz_targets/compare_utf16.rs index 36c0c004b68..583d4e481f5 100644 --- a/components/normalizer/fuzz/fuzz_targets/compare_utf16.rs +++ b/components/normalizer/fuzz/fuzz_targets/compare_utf16.rs @@ -23,7 +23,7 @@ fn slice_from_icu4c(string: &UChar) -> &[u16] { } fn normalize_icu4x(buffer: &[u16]) -> Vec { - let normalizer = DecomposingNormalizer::try_new(&icu_testdata::unstable()).unwrap(); + let normalizer = DecomposingNormalizer::new_nfd(); normalizer.normalize_utf16(buffer) } diff --git a/components/plurals/Cargo.toml b/components/plurals/Cargo.toml index 39d04454cc9..7338bbd4417 100644 --- a/components/plurals/Cargo.toml +++ b/components/plurals/Cargo.toml @@ -46,7 +46,6 @@ criterion = "0.4" icu = { path = "../icu" } icu_benchmark_macros = { path = "../../tools/benchmark/macros" } icu_provider = { path = "../../provider/core" } -icu_testdata = { path = "../../provider/testdata", default-features = false, features = ["icu_plurals", "icu_locid_transform"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/components/plurals/benches/parser.rs b/components/plurals/benches/parser.rs index 79c4bb80e87..7d6c2b8e465 100644 --- a/components/plurals/benches/parser.rs +++ b/components/plurals/benches/parser.rs @@ -18,7 +18,7 @@ fn parser(c: &mut Criterion) { for langid in fixture_data.langs { let data_payload: DataPayload = - icu_testdata::unstable() + icu_plurals::provider::Baked .load(DataRequest { locale: &langid.into(), metadata: Default::default(), diff --git a/components/plurals/benches/pluralrules.rs b/components/plurals/benches/pluralrules.rs index 667cf989b23..cb2b16e306e 100644 --- a/components/plurals/benches/pluralrules.rs +++ b/components/plurals/benches/pluralrules.rs @@ -13,17 +13,10 @@ fn pluralrules(c: &mut Criterion) { let plurals_data = helpers::get_plurals_data(); let numbers_data = helpers::get_numbers_data(); - let provider = icu_testdata::unstable(); - c.bench_function("plurals/pluralrules/overview", |b| { b.iter(|| { for lang in &plurals_data.langs { - let pr = PluralRules::try_new_unstable( - &provider, - &lang.into(), - PluralRuleType::Cardinal, - ) - .unwrap(); + let pr = PluralRules::try_new(&lang.into(), PluralRuleType::Cardinal).unwrap(); for s in &numbers_data.usize { let _ = pr.category_for(*s); } @@ -39,24 +32,13 @@ fn pluralrules(c: &mut Criterion) { c.bench_function("plurals/pluralrules/construct/fs", |b| { b.iter(|| { for lang in &plurals_data.langs { - PluralRules::try_new_unstable(&provider, &lang.into(), PluralRuleType::Ordinal) - .unwrap(); - PluralRules::try_new_unstable( - &provider, - &lang.into(), - PluralRuleType::Cardinal, - ) - .unwrap(); + PluralRules::try_new(&lang.into(), PluralRuleType::Ordinal).unwrap(); + PluralRules::try_new(&lang.into(), PluralRuleType::Cardinal).unwrap(); } }); }); - let pr = PluralRules::try_new_unstable( - &provider, - &locale!("ru").into(), - PluralRuleType::Cardinal, - ) - .unwrap(); + let pr = PluralRules::try_new(&locale!("ru").into(), PluralRuleType::Cardinal).unwrap(); c.bench_function("plurals/pluralrules/select/fs", |b| { b.iter(|| { for s in &numbers_data.usize { diff --git a/components/plurals/examples/elevator_floors.rs b/components/plurals/examples/elevator_floors.rs index 84ae99205a3..a59c6e6f8d5 100644 --- a/components/plurals/examples/elevator_floors.rs +++ b/components/plurals/examples/elevator_floors.rs @@ -29,9 +29,8 @@ fn main(_argc: isize, _argv: *const *const u8) -> isize { { print("\n====== Elevator Floor (en) example ============", None); - let pr = - PluralRules::try_new_ordinal_unstable(&icu_testdata::unstable(), &locale!("en").into()) - .expect("Failed to create a PluralRules instance."); + let pr = PluralRules::try_new_ordinal(&locale!("en").into()) + .expect("Failed to create a PluralRules instance."); for value in VALUES { match pr.category_for(*value) { diff --git a/components/plurals/examples/unread_emails.rs b/components/plurals/examples/unread_emails.rs index 386b4a789e9..61e6c01c631 100644 --- a/components/plurals/examples/unread_emails.rs +++ b/components/plurals/examples/unread_emails.rs @@ -29,11 +29,8 @@ fn main(_argc: isize, _argv: *const *const u8) -> isize { { print("\n====== Unread Emails (en) example ============", None); - let pr = PluralRules::try_new_cardinal_unstable( - &icu_testdata::unstable(), - &locale!("en").into(), - ) - .expect("Failed to create a PluralRules instance."); + let pr = PluralRules::try_new_cardinal(&locale!("en").into()) + .expect("Failed to create a PluralRules instance."); for value in VALUES { match pr.category_for(*value) { diff --git a/components/plurals/src/provider.rs b/components/plurals/src/provider.rs index b5efcc5be11..910703ec243 100644 --- a/components/plurals/src/provider.rs +++ b/components/plurals/src/provider.rs @@ -20,7 +20,9 @@ use icu_provider::prelude::*; use icu_provider::DataMarker; #[cfg(feature = "data")] -pub(crate) struct Baked; +#[derive(Debug)] +/// Baked data +pub struct Baked; #[cfg(feature = "data")] const _: () = { diff --git a/components/plurals/tests/categories.rs b/components/plurals/tests/categories.rs index 8b77e333e0a..071cfc456a0 100644 --- a/components/plurals/tests/categories.rs +++ b/components/plurals/tests/categories.rs @@ -16,8 +16,7 @@ fn test_categories() { helpers::read_fixture(path).expect("Failed to read a fixture"); for test in test_set { - let pr = PluralRules::try_new_unstable( - &icu_testdata::unstable(), + let pr = PluralRules::try_new( &LanguageIdentifier::from_str(&test.langid).unwrap().into(), test.plural_type.into(), ) diff --git a/components/plurals/tests/plurals.rs b/components/plurals/tests/plurals.rs index 1923d4b5546..f1dd7a2bd3e 100644 --- a/components/plurals/tests/plurals.rs +++ b/components/plurals/tests/plurals.rs @@ -9,13 +9,9 @@ use icu_provider::prelude::*; #[test] fn test_plural_rules() { assert_eq!( - PluralRules::try_new_unstable( - &icu_testdata::unstable(), - &locale!("en").into(), - PluralRuleType::Cardinal - ) - .unwrap() - .category_for(5_usize), + PluralRules::try_new(&locale!("en").into(), PluralRuleType::Cardinal) + .unwrap() + .category_for(5_usize), PluralCategory::Other ); } @@ -23,7 +19,7 @@ fn test_plural_rules() { #[test] fn test_static_load_works() { DataProvider::::load( - &icu_testdata::unstable(), + &icu_plurals::provider::Baked, DataRequest { locale: &locale!("en").into(), metadata: Default::default(), @@ -34,16 +30,6 @@ fn test_static_load_works() { .expect("Failed to retrieve payload"); } -#[test] -fn test_plural_rules_missing() { - assert!(PluralRules::try_new_unstable( - &icu_testdata::unstable_no_fallback(), - &locale!("xx").into(), - PluralRuleType::Cardinal - ) - .is_err()); -} - #[test] fn test_plural_category_all() { let categories: Vec = PluralCategory::all().collect(); diff --git a/components/timezone/Cargo.toml b/components/timezone/Cargo.toml index a69266eb771..4dc724d5b5d 100644 --- a/components/timezone/Cargo.toml +++ b/components/timezone/Cargo.toml @@ -43,7 +43,6 @@ icu_timezone_data = { path = "data", optional = true } [dev-dependencies] icu = { path = "../../components/icu", default-features = false } -icu_testdata = { path = "../../provider/testdata", default-features = false, features = ["icu_timezone", "icu_locid_transform"] } [features] std = ["icu_calendar/std", "icu_locid/std", "icu_provider/std"] diff --git a/components/timezone/README.md b/components/timezone/README.md index b7fe62d57d7..4f0e3dfe892 100644 --- a/components/timezone/README.md +++ b/components/timezone/README.md @@ -92,8 +92,7 @@ time_zone.time_zone_id = "uschi".parse::>().ok().map(Into::into); // Compute the metazone at January 1, 2022: -let mzc = MetazoneCalculator::try_new_unstable(&icu_testdata::unstable()) - .unwrap(); +let mzc = MetazoneCalculator::new(); let datetime = DateTime::try_new_iso_datetime(2022, 1, 1, 0, 0, 0).unwrap(); time_zone.maybe_calculate_metazone(&mzc, &datetime); diff --git a/components/timezone/src/lib.rs b/components/timezone/src/lib.rs index d5c02298e6d..ca8e16545f9 100644 --- a/components/timezone/src/lib.rs +++ b/components/timezone/src/lib.rs @@ -94,8 +94,7 @@ //! "uschi".parse::>().ok().map(Into::into); //! //! // Compute the metazone at January 1, 2022: -//! let mzc = MetazoneCalculator::try_new_unstable(&icu_testdata::unstable()) -//! .unwrap(); +//! let mzc = MetazoneCalculator::new(); //! let datetime = DateTime::try_new_iso_datetime(2022, 1, 1, 0, 0, 0).unwrap(); //! time_zone.maybe_calculate_metazone(&mzc, &datetime); //! diff --git a/components/timezone/src/provider.rs b/components/timezone/src/provider.rs index 42561eb78f9..6556fb54c9d 100644 --- a/components/timezone/src/provider.rs +++ b/components/timezone/src/provider.rs @@ -22,7 +22,9 @@ use zerovec::ule::{AsULE, ULE}; use zerovec::{ZeroMap2d, ZeroSlice, ZeroVec}; #[cfg(feature = "data")] -pub(crate) struct Baked; +#[derive(Debug)] +/// Baked data +pub struct Baked; #[cfg(feature = "data")] const _: () = { diff --git a/components/timezone/src/time_zone.rs b/components/timezone/src/time_zone.rs index 38d153fcaf6..ea6cdb44bce 100644 --- a/components/timezone/src/time_zone.rs +++ b/components/timezone/src/time_zone.rs @@ -127,8 +127,7 @@ impl CustomTimeZone { /// use icu_locid::locale; /// use tinystr::tinystr; /// - /// let mzc = MetazoneCalculator::try_new_unstable(&icu_testdata::unstable()) - /// .expect("data exists"); + /// let mzc = MetazoneCalculator::new(); /// let mut tz = CustomTimeZone { /// gmt_offset: Some("+11".parse().expect("Failed to parse a GMT offset.")), /// time_zone_id: Some(TimeZoneBcp47Id(tinystr!(8, "gugum"))), diff --git a/ffi/diplomat/tests/missing_apis.txt b/ffi/diplomat/tests/missing_apis.txt index f65ae4091d4..ed4652b4523 100644 --- a/ffi/diplomat/tests/missing_apis.txt +++ b/ffi/diplomat/tests/missing_apis.txt @@ -49,6 +49,7 @@ icu::locid_transform::LocaleDirectionality::get#FnInStruct icu::locid_transform::LocaleDirectionality::is_left_to_right#FnInStruct icu::locid_transform::LocaleDirectionality::is_right_to_left#FnInStruct icu::locid_transform::LocaleDirectionality::new#FnInStruct +icu::locid_transform::LocaleDirectionality::new_with_expander#FnInStruct icu::locid_transform::LocaleDirectionality::try_new_unstable#FnInStruct icu::locid_transform::LocaleDirectionality::try_new_with_expander_unstable#FnInStruct icu::locid_transform::LocaleExpander::new#FnInStruct diff --git a/provider/core/src/hello_world.rs b/provider/core/src/hello_world.rs index ec508ac48a1..edc465b0365 100644 --- a/provider/core/src/hello_world.rs +++ b/provider/core/src/hello_world.rs @@ -6,8 +6,6 @@ #![allow(clippy::exhaustive_structs)] // data struct module -#[cfg(feature = "datagen")] -use crate::datagen::IterableDataProvider; use crate::prelude::*; use alloc::borrow::Cow; use alloc::string::String; @@ -133,7 +131,7 @@ impl DataPayload { // AnyProvider support. #[cfg(not(feature = "datagen"))] -impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); +crate::impl_dynamic_data_provider!(HelloWorldProvider, [HelloWorldV1Marker,], AnyMarker); #[cfg(feature = "deserialize_json")] /// A data provider returning Hello World strings in different languages as JSON blobs. @@ -187,7 +185,7 @@ impl BufferProvider for HelloWorldJsonProvider { } #[cfg(feature = "datagen")] -impl IterableDataProvider for HelloWorldProvider { +impl crate::datagen::IterableDataProvider for HelloWorldProvider { fn supported_locales(&self) -> Result, DataError> { #[allow(clippy::unwrap_used)] // datagen Ok(Self::DATA @@ -199,7 +197,7 @@ impl IterableDataProvider for HelloWorldProvider { } #[cfg(feature = "datagen")] -make_exportable_provider!(HelloWorldProvider, [HelloWorldV1Marker,]); +crate::make_exportable_provider!(HelloWorldProvider, [HelloWorldV1Marker,]); /// A type that formats localized "hello world" strings. /// @@ -236,12 +234,22 @@ pub struct FormattedHelloWorld<'l> { impl HelloWorldFormatter { /// Creates a new [`HelloWorldFormatter`] for the specified locale. /// - /// See [`HelloWorldFormatter`] for an example. - /// /// [📚 Help choosing a constructor](crate::constructors) - ///

- /// ⚠️ The bounds on this function may change over time, including in SemVer minor releases. - ///
+ pub fn try_new(locale: &DataLocale) -> Result { + Self::try_new_unstable(&HelloWorldProvider, locale) + } + + crate::gen_any_buffer_data_constructors!(locale: include, options: skip, error: DataError, + #[cfg(skip_new)] + functions: [ + try_new, + try_new_with_any_provider, + try_new_with_buffer_provider, + try_new_unstable, + Self, + ]); + + #[doc = crate::gen_any_buffer_unstable_docs!(UNSTABLE, Self::try_new)] pub fn try_new_unstable

(provider: &P, locale: &DataLocale) -> Result where P: DataProvider, @@ -255,8 +263,6 @@ impl HelloWorldFormatter { Ok(Self { data }) } - crate::gen_any_buffer_constructors!(locale: include, options: skip, error: DataError); - /// Formats a hello world message, returning a [`FormattedHelloWorld`]. #[allow(clippy::needless_lifetimes)] // documentary example pub fn format<'l>(&'l self) -> FormattedHelloWorld<'l> { @@ -290,6 +296,7 @@ writeable::impl_display_with_writeable!(FormattedHelloWorld<'_>); #[cfg(feature = "datagen")] #[test] fn test_iter() { + use crate::datagen::IterableDataProvider; use icu_locid::locale; assert_eq!( diff --git a/provider/core/src/helpers.rs b/provider/core/src/helpers.rs deleted file mode 100644 index 998e656da8a..00000000000 --- a/provider/core/src/helpers.rs +++ /dev/null @@ -1,309 +0,0 @@ -// This file is part of ICU4X. For terms of use, please see the file -// called LICENSE at the top level of the ICU4X source tree -// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). - -//! Internal helper functions. - -/// Const function to compute the FxHash of a byte array with little-endian byte order. -/// -/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our -/// use case since the strings being hashed originate from a trusted source (the ICU4X -/// components), and the hashes are computed at compile time, so we can check for collisions. -/// -/// We could have considered a SHA or other cryptographic hash function. However, we are using -/// FxHash because: -/// -/// 1. There is precedent for this algorithm in Rust -/// 2. The algorithm is easy to implement as a const function -/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree -/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits, -/// such that truncation would be required in order to fit into a u32, partially reducing -/// the benefit of a cryptographically secure algorithm -// The indexing operations in this function have been reviewed in detail and won't panic. -#[allow(clippy::indexing_slicing)] -pub const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 { - // This code is adapted from https://github.com/rust-lang/rustc-hash, - // whose license text is reproduced below. - // - // Copyright 2015 The Rust Project Developers. See the COPYRIGHT - // file at the top-level directory of this distribution and at - // http://rust-lang.org/COPYRIGHT. - // - // Licensed under the Apache License, Version 2.0 or the MIT license - // , at your - // option. This file may not be copied, modified, or distributed - // except according to those terms. - - if ignore_leading + ignore_trailing >= bytes.len() { - return 0; - } - - #[inline] - const fn hash_word_32(mut hash: u32, word: u32) -> u32 { - const ROTATE: u32 = 5; - const SEED32: u32 = 0x9e_37_79_b9; - hash = hash.rotate_left(ROTATE); - hash ^= word; - hash = hash.wrapping_mul(SEED32); - hash - } - - let mut cursor = ignore_leading; - let end = bytes.len() - ignore_trailing; - let mut hash = 0; - - while end - cursor >= 4 { - let word = u32::from_le_bytes([ - bytes[cursor], - bytes[cursor + 1], - bytes[cursor + 2], - bytes[cursor + 3], - ]); - hash = hash_word_32(hash, word); - cursor += 4; - } - - if end - cursor >= 2 { - let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]); - hash = hash_word_32(hash, word as u32); - cursor += 2; - } - - if end - cursor >= 1 { - hash = hash_word_32(hash, bytes[cursor] as u32); - } - - hash -} - -#[test] -fn test_hash_word_32() { - assert_eq!(0, fxhash_32(b"", 0, 0)); - assert_eq!(0, fxhash_32(b"a", 1, 0)); - assert_eq!(0, fxhash_32(b"a", 0, 1)); - assert_eq!(0, fxhash_32(b"a", 0, 10)); - assert_eq!(0, fxhash_32(b"a", 10, 0)); - assert_eq!(0, fxhash_32(b"a", 1, 1)); - assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0)); - assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0)); - assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0)); - assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0)); - assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0)); - assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0)); - assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0)); - assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0)); - assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0)); - - assert_eq!( - fxhash_32(crate::tagged!("props/sc=Khmr@1").as_bytes(), 0, 0), - fxhash_32(crate::tagged!("props/sc=Samr@1").as_bytes(), 0, 0) - ); - - assert_ne!( - fxhash_32( - crate::tagged!("props/sc=Khmr@1").as_bytes(), - crate::leading_tag!().len(), - crate::trailing_tag!().len() - ), - fxhash_32( - crate::tagged!("props/sc=Samr@1").as_bytes(), - crate::leading_tag!().len(), - crate::trailing_tag!().len() - ) - ); -} - -#[doc(hidden)] -#[macro_export] -macro_rules! gen_any_buffer_docs { - (ANY, $krate:path, $see_also:path) => { - concat!( - "Creates a new instance using an [`AnyProvider`](", - stringify!($krate), - "::AnyProvider).\n\n", - "For details on the behavior of this function, see: [`", - stringify!($see_also), - "`]\n\n", - "[📚 Help choosing a constructor](", - stringify!($krate), - "::constructors)", - ) - }; - (BUFFER, $krate:path, $see_also:path) => { - concat!( - "✨ **Enabled with the `\"serde\"` feature.**\n\n", - "Creates a new instance using a [`BufferProvider`](", - stringify!($krate), - "::BufferProvider).\n\n", - "For details on the behavior of this function, see: [`", - stringify!($see_also), - "`]\n\n", - "[📚 Help choosing a constructor](", - stringify!($krate), - "::constructors)", - ) - }; -} - -#[doc(hidden)] -#[macro_export] -macro_rules! gen_any_buffer_constructors { - (locale: skip, options: skip, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: skip, - options: skip, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: skip, options: skip, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized)) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting()) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized)) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing()) - } - }; - - - (locale: skip, options: skip, result: $result_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized)) -> $result_ty { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting()) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized)) -> $result_ty { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing()) - } - }; - - (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: skip, - $options_arg: $options_ty, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: skip, $options_arg:ident: $options_ty:ty, result: $result_ty:ty, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), $options_arg) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> $result_ty { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), $options_arg) - } - }; - (locale: skip, $options_arg:ident: $options_ty:ty, error: $error_ty:ty, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), $options_arg: $options_ty) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), $options_arg) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), $options_arg: $options_ty) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), $options_arg) - } - }; - (locale: include, options: skip, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: include, - options: skip, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: include, options: skip, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), locale) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), locale) - } - }; - - (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: include, - $config_arg: $config_ty, - $options_arg: $options_ty, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: include, $config_arg:ident: $config_ty:path, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), locale, $config_arg, $options_arg) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $config_arg: $config_ty, $options_arg: $options_ty) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), locale, $config_arg, $options_arg) - } - }; - - (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path) => { - $crate::gen_any_buffer_constructors!( - locale: include, - $options_arg: $options_ty, - error: $error_ty, - functions: [ - Self::try_new_unstable, - try_new_with_any_provider, - try_new_with_buffer_provider - ] - ); - }; - (locale: include, $options_arg:ident: $options_ty:path, error: $error_ty:path, functions: [$f1:path, $f2:ident, $f3:ident]) => { - #[doc = $crate::gen_any_buffer_docs!(ANY, $crate, $f1)] - pub fn $f2(provider: &(impl $crate::AnyProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { - use $crate::AsDowncastingAnyProvider; - $f1(&provider.as_downcasting(), locale, $options_arg) - } - #[cfg(feature = "serde")] - #[doc = $crate::gen_any_buffer_docs!(BUFFER, $crate, $f1)] - pub fn $f3(provider: &(impl $crate::BufferProvider + ?Sized), locale: &$crate::DataLocale, $options_arg: $options_ty) -> Result { - use $crate::AsDeserializingBufferProvider; - $f1(&provider.as_deserializing(), locale, $options_arg) - } - }; -} diff --git a/provider/core/src/key.rs b/provider/core/src/key.rs index f1f19bac261..5a825beadf7 100644 --- a/provider/core/src/key.rs +++ b/provider/core/src/key.rs @@ -3,7 +3,6 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::error::{DataError, DataErrorKind}; -use crate::helpers; use alloc::borrow::Cow; use core::fmt; @@ -50,7 +49,7 @@ pub struct DataKeyHash([u8; 4]); impl DataKeyHash { const fn compute_from_path(path: DataKeyPath) -> Self { - let hash = helpers::fxhash_32( + let hash = fxhash_32( path.tagged.as_bytes(), leading_tag!().len(), trailing_tag!().len(), @@ -64,6 +63,79 @@ impl DataKeyHash { } } +/// Const function to compute the FxHash of a byte array. +/// +/// FxHash is a speedy hash algorithm used within rustc. The algorithm is satisfactory for our +/// use case since the strings being hashed originate from a trusted source (the ICU4X +/// components), and the hashes are computed at compile time, so we can check for collisions. +/// +/// We could have considered a SHA or other cryptographic hash function. However, we are using +/// FxHash because: +/// +/// 1. There is precedent for this algorithm in Rust +/// 2. The algorithm is easy to implement as a const function +/// 3. The amount of code is small enough that we can reasonably keep the algorithm in-tree +/// 4. FxHash is designed to output 32-bit or 64-bit values, whereas SHA outputs more bits, +/// such that truncation would be required in order to fit into a u32, partially reducing +/// the benefit of a cryptographically secure algorithm +// The indexing operations in this function have been reviewed in detail and won't panic. +#[allow(clippy::indexing_slicing)] +const fn fxhash_32(bytes: &[u8], ignore_leading: usize, ignore_trailing: usize) -> u32 { + // This code is adapted from https://github.com/rust-lang/rustc-hash, + // whose license text is reproduced below. + // + // Copyright 2015 The Rust Project Developers. See the COPYRIGHT + // file at the top-level directory of this distribution and at + // http://rust-lang.org/COPYRIGHT. + // + // Licensed under the Apache License, Version 2.0 or the MIT license + // , at your + // option. This file may not be copied, modified, or distributed + // except according to those terms. + + if ignore_leading + ignore_trailing >= bytes.len() { + return 0; + } + + #[inline] + const fn hash_word_32(mut hash: u32, word: u32) -> u32 { + const ROTATE: u32 = 5; + const SEED32: u32 = 0x9e_37_79_b9; + hash = hash.rotate_left(ROTATE); + hash ^= word; + hash = hash.wrapping_mul(SEED32); + hash + } + + let mut cursor = ignore_leading; + let end = bytes.len() - ignore_trailing; + let mut hash = 0; + + while end - cursor >= 4 { + let word = u32::from_le_bytes([ + bytes[cursor], + bytes[cursor + 1], + bytes[cursor + 2], + bytes[cursor + 3], + ]); + hash = hash_word_32(hash, word); + cursor += 4; + } + + if end - cursor >= 2 { + let word = u16::from_le_bytes([bytes[cursor], bytes[cursor + 1]]); + hash = hash_word_32(hash, word as u32); + cursor += 2; + } + + if end - cursor >= 1 { + hash = hash_word_32(hash, bytes[cursor] as u32); + } + + hash +} + impl<'a> zerovec::maps::ZeroMapKV<'a> for DataKeyHash { type Container = zerovec::ZeroVec<'a, DataKeyHash>; type Slice = zerovec::ZeroSlice; @@ -627,35 +699,50 @@ fn test_key_to_string() { }, ] { writeable::assert_writeable_eq!(&cas.key, cas.expected); + assert_eq!(cas.expected, &*cas.key.path()); } } +#[test] +fn test_hash_word_32() { + assert_eq!(0, fxhash_32(b"", 0, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 0)); + assert_eq!(0, fxhash_32(b"a", 0, 1)); + assert_eq!(0, fxhash_32(b"a", 0, 10)); + assert_eq!(0, fxhash_32(b"a", 10, 0)); + assert_eq!(0, fxhash_32(b"a", 1, 1)); + assert_eq!(0xF3051F19, fxhash_32(b"a", 0, 0)); + assert_eq!(0x2F9DF119, fxhash_32(b"ab", 0, 0)); + assert_eq!(0xCB1D9396, fxhash_32(b"abc", 0, 0)); + assert_eq!(0x8628F119, fxhash_32(b"abcd", 0, 0)); + assert_eq!(0xBEBDB56D, fxhash_32(b"abcde", 0, 0)); + assert_eq!(0x1CE8476D, fxhash_32(b"abcdef", 0, 0)); + assert_eq!(0xC0F176A4, fxhash_32(b"abcdefg", 0, 0)); + assert_eq!(0x09AB476D, fxhash_32(b"abcdefgh", 0, 0)); + assert_eq!(0xB72F5D88, fxhash_32(b"abcdefghi", 0, 0)); +} + #[test] fn test_key_hash() { struct KeyTestCase { pub key: DataKey, pub hash: DataKeyHash, - pub path: &'static str, } for cas in [ KeyTestCase { key: data_key!("core/cardinal@1"), hash: DataKeyHash([172, 207, 42, 236]), - path: "core/cardinal@1", }, KeyTestCase { key: data_key!("core/maxlengthsubcatg@1"), hash: DataKeyHash([193, 6, 79, 61]), - path: "core/maxlengthsubcatg@1", }, KeyTestCase { key: data_key!("core/cardinal@65535"), hash: DataKeyHash([176, 131, 182, 223]), - path: "core/cardinal@65535", }, ] { - assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.path); - assert_eq!(cas.path, &*cas.key.path(), "{}", cas.path); + assert_eq!(cas.hash, cas.key.hashed(), "{}", cas.key); } } diff --git a/provider/core/src/lib.rs b/provider/core/src/lib.rs index 6d3afc4e179..ce3263688c2 100644 --- a/provider/core/src/lib.rs +++ b/provider/core/src/lib.rs @@ -139,7 +139,6 @@ extern crate alloc; mod data_provider; mod error; -mod helpers; mod key; mod request; mod response; @@ -148,12 +147,9 @@ pub mod any; pub mod buf; pub mod constructors; #[cfg(feature = "datagen")] -#[macro_use] pub mod datagen; -#[macro_use] pub mod dynutil; pub mod hello_world; -#[macro_use] pub mod marker; #[cfg(feature = "serde")] pub mod serde; diff --git a/tools/make/data.toml b/tools/make/data.toml index 229c4773ba3..e01c2c5a27f 100644 --- a/tools/make/data.toml +++ b/tools/make/data.toml @@ -154,16 +154,6 @@ if greater_than ${output_length} 0 end ''' -[tasks.full-data] -description = "Builds a complete postcard file from latest data." -category = "ICU4X Data" -dependencies = ["bakeddata"] -script_runner = "@duckscript" -script = ''' -exec --fail-on-error cargo run -p icu_datagen --no-default-features --features rayon,provider_baked,bin,use_wasm,networking --release -- --locales basic moderate --keys experimental-all --format mod --out nonbake -rm -r nonbake -''' - [tasks.download-repo-sources] description = "Download fresh CLDR JSON and icuexportdata, overwriting the existing CLDR JSON/icuexportdata." category = "ICU4X Data"