Skip to content

Commit

Permalink
Adding datagen feature to meta crate (#5047)
Browse files Browse the repository at this point in the history
This has two advantages:
* The rustdoc for `icu::datetime` will match that of `icu_datetime`.
* It improves the feature matrix of `icu_datagen` and the future
`icu_datagen_provider`, in particular because the `icu/experimental`
feature can be used in conjunction with the `icu/datagen` feature
  • Loading branch information
robertbastian authored Jun 13, 2024
1 parent 5322942 commit 275f3f7
Show file tree
Hide file tree
Showing 74 changed files with 280 additions and 312 deletions.
14 changes: 0 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions components/icu/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,21 @@ compiled_data = [
"icu_timezone/compiled_data",
"icu_experimental?/compiled_data",
]
datagen = [
"icu_calendar/datagen",
"icu_casemap/datagen",
"icu_collator/datagen",
"icu_datetime/datagen",
"icu_decimal/datagen",
"icu_list/datagen",
"icu_locale/datagen",
"icu_normalizer/datagen",
"icu_plurals/datagen",
"icu_properties/datagen",
"icu_segmenter/datagen",
"icu_timezone/datagen",
"icu_experimental?/datagen",
]
serde_human = [
"icu_list/serde_human"
]
Expand Down
39 changes: 2 additions & 37 deletions provider/datagen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ all-features = true

# DatagenDriver
displaydoc = { workspace = true }
icu_locale_core = { workspace = true, features = ["std"] }
icu = { workspace = true }
icu_provider = { workspace = true, features = ["std", "logging", "datagen"]}
log = { workspace = true }
memchr = { workspace = true }
Expand All @@ -47,22 +47,6 @@ icu_provider_blob = { workspace = true, features = ["export"], optional = true }
icu_provider_fs = { workspace = true, features = ["export"], optional = true }
icu_provider_baked = { workspace = true, features = ["export"], optional = true }

# The components are needed for the registry, provider, baked exporter, with different features
icu = { workspace = true }
icu_calendar = { workspace = true }
icu_casemap = { workspace = true }
icu_collator = { workspace = true }
icu_datetime = { workspace = true }
icu_decimal = { workspace = true }
icu_list = { workspace = true }
icu_locale = { workspace = true }
icu_normalizer = { workspace = true }
icu_plurals = { workspace = true }
icu_properties = { workspace = true }
icu_segmenter = { workspace = true }
icu_timezone = { workspace = true }
icu_experimental = { workspace = true, optional = true }

# DatagenProvider

## ICU infrastructure
Expand Down Expand Up @@ -101,21 +85,7 @@ simple_logger = { workspace = true }
[features]
default = ["use_wasm", "networking", "rayon", "fs_exporter", "blob_exporter", "baked_exporter", "provider"]
provider = [
"icu_calendar/datagen",
"icu_casemap/datagen",
"icu_collator/datagen",
"icu_datetime/datagen",
"icu_decimal/datagen",
"icu_experimental?/datagen",
"icu_list/datagen",
"icu_locale/datagen",
"icu_locale_core/serde",
"icu_normalizer/datagen",
"icu_plurals/datagen",
"icu_properties/datagen",
"icu_segmenter/datagen",
"icu_segmenter/lstm",
"icu_timezone/datagen",
"icu/datagen",
"dep:calendrical_calculations",
"dep:icu_codepointtrie_builder",
"dep:icu_collections",
Expand Down Expand Up @@ -149,11 +119,6 @@ use_wasm = ["icu_codepointtrie_builder?/wasm"]
use_icu4c = ["icu_codepointtrie_builder?/icu4c"]
networking = ["dep:ureq"]
experimental_components = [
"dep:icu_experimental",
# For registry
"icu_datetime/experimental",
# For registry
"icu_plurals/experimental",
# Only required if both provider and experimental are enabled, but that's not expressible with features
"dep:num-bigint",
"dep:num-rational",
Expand Down
14 changes: 7 additions & 7 deletions provider/datagen/src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

use crate::rayon_prelude::*;
use displaydoc::Display;
use icu_locale::fallback::LocaleFallbackIterator;
use icu_locale::LocaleFallbacker;
use icu_locale_core::extensions::unicode::key;
use icu_locale_core::LanguageIdentifier;
use icu_locale_core::ParseError;
use icu::locale::extensions::unicode::key;
use icu::locale::fallback::LocaleFallbackIterator;
use icu::locale::LanguageIdentifier;
use icu::locale::LocaleFallbacker;
use icu::locale::ParseError;
use icu_provider::datagen::*;
use icu_provider::prelude::*;
use std::collections::HashMap;
Expand Down Expand Up @@ -1094,7 +1094,7 @@ impl fmt::Display for DisplayDuration {

#[test]
fn test_collation_filtering() {
use icu_locale_core::langid;
use icu::locale::langid;
use std::collections::BTreeSet;

#[derive(Debug)]
Expand Down Expand Up @@ -1177,7 +1177,7 @@ fn test_collation_filtering() {
for cas in cases {
let resolved_locales = select_locales_for_marker(
&crate::provider::DatagenProvider::new_testing(),
icu_collator::provider::CollationDataV1Marker::INFO,
icu::collator::provider::CollationDataV1Marker::INFO,
&LocalesWithOrWithoutFallback::WithoutFallback {
langids: [cas.language.clone()].into_iter().collect(),
},
Expand Down
24 changes: 12 additions & 12 deletions provider/datagen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ pub mod prelude {
DatagenDriver, DeduplicationStrategy, FallbackOptions, LocaleFamily, NoFallbackOptions,
};
#[doc(no_inline)]
pub use icu_locale_core::{langid, LanguageIdentifier};
pub use icu::locale::{langid, LanguageIdentifier};
#[doc(no_inline)]
pub use icu_provider::{datagen::DataExporter, DataMarker, DataMarkerInfo};
}
Expand Down Expand Up @@ -150,7 +150,7 @@ macro_rules! cb {
/// # use icu_provider::DataMarker;
/// assert_eq!(
/// icu_datagen::marker("list/and@1"),
/// Some(icu_list::provider::AndListV1Marker::INFO),
/// Some(icu::list::provider::AndListV1Marker::INFO),
/// );
/// ```
pub fn marker<S: AsRef<str>>(string: S) -> Option<DataMarkerInfo> {
Expand Down Expand Up @@ -278,9 +278,9 @@ fn test_markers() {
"trash",
]),
vec![
icu_list::provider::AndListV1Marker::INFO,
icu_datetime::provider::calendar::GregorianDateLengthsV1Marker::INFO,
icu_decimal::provider::DecimalSymbolsV1Marker::INFO,
icu::list::provider::AndListV1Marker::INFO,
icu::datetime::provider::calendar::GregorianDateLengthsV1Marker::INFO,
icu::decimal::provider::DecimalSymbolsV1Marker::INFO,
]
);
}
Expand All @@ -290,13 +290,13 @@ fn test_markers_from_bin() {
assert_eq!(
markers_from_bin_inner(include_bytes!("../tests/data/tutorial_buffer.wasm")),
vec![
icu_datetime::provider::calendar::GregorianDateLengthsV1Marker::INFO,
icu_datetime::provider::calendar::GregorianDateSymbolsV1Marker::INFO,
icu_datetime::provider::calendar::TimeLengthsV1Marker::INFO,
icu_datetime::provider::calendar::TimeSymbolsV1Marker::INFO,
icu_calendar::provider::WeekDataV1Marker::INFO,
icu_decimal::provider::DecimalSymbolsV1Marker::INFO,
icu_plurals::provider::OrdinalV1Marker::INFO,
icu::datetime::provider::calendar::GregorianDateLengthsV1Marker::INFO,
icu::datetime::provider::calendar::GregorianDateSymbolsV1Marker::INFO,
icu::datetime::provider::calendar::TimeLengthsV1Marker::INFO,
icu::datetime::provider::calendar::TimeSymbolsV1Marker::INFO,
icu::calendar::provider::WeekDataV1Marker::INFO,
icu::decimal::provider::DecimalSymbolsV1Marker::INFO,
icu::plurals::provider::OrdinalV1Marker::INFO,
]
);
}
2 changes: 1 addition & 1 deletion provider/datagen/src/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ impl DatagenProvider {
pub fn locales_for_coverage_levels(
&self,
levels: impl IntoIterator<Item = CoverageLevel>,
) -> Result<impl IntoIterator<Item = icu_locale_core::LanguageIdentifier>, DataError> {
) -> Result<impl IntoIterator<Item = icu::locale::LanguageIdentifier>, DataError> {
self.cldr()?.locales(levels)
}
}
Expand Down
8 changes: 4 additions & 4 deletions provider/datagen/src/provider/tests/make_testdata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ struct ZeroCopyCheckExporter {
// Every entry in this list is a bug that needs to be addressed before stabilization.
const EXPECTED_VIOLATIONS: &[DataMarkerInfo] = &[
// https://github.com/unicode-org/icu4x/issues/1678
icu_datetime::provider::calendar::DateSkeletonPatternsV1Marker::INFO,
icu::datetime::provider::calendar::DateSkeletonPatternsV1Marker::INFO,
];

// Types in this list can be zero-copy deserialized (and do not contain allocated data),
Expand All @@ -133,9 +133,9 @@ const EXPECTED_TRANSIENT_VIOLATIONS: &[DataMarkerInfo] = &[
// Regex DFAs need to be validated, which involved creating a BTreeMap.
// If required we could avoid this using one of the approaches in
// https://github.com/unicode-org/icu4x/pulls/3697.
icu_list::provider::AndListV1Marker::INFO,
icu_list::provider::OrListV1Marker::INFO,
icu_list::provider::UnitListV1Marker::INFO,
icu::list::provider::AndListV1Marker::INFO,
icu::list::provider::OrListV1Marker::INFO,
icu::list::provider::UnitListV1Marker::INFO,
];

impl DataExporter for ZeroCopyCheckExporter {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::collections::HashSet;

use crate::provider::DatagenProvider;
use calendrical_calculations::chinese_based::{Chinese, ChineseBased, Dangi};
use icu_calendar::provider::chinese_based::*;
use icu::calendar::provider::chinese_based::*;
use icu_provider::datagen::IterableDataProvider;
use icu_provider::prelude::*;

Expand Down
2 changes: 1 addition & 1 deletion provider/datagen/src/transform/cldr/calendar/islamic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use calendrical_calculations::islamic::{
IslamicBasedMarker, ObservationalIslamicMarker, SaudiIslamicMarker,
};
use calendrical_calculations::iso;
use icu_calendar::provider::islamic::*;
use icu::calendar::provider::islamic::*;
use icu_provider::datagen::IterableDataProvider;
use icu_provider::prelude::*;

Expand Down
4 changes: 2 additions & 2 deletions provider/datagen/src/transform/cldr/calendar/japanese.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

use crate::provider::transform::cldr::cldr_serde;
use crate::provider::DatagenProvider;
use icu_calendar::provider::*;
use icu_locale_core::langid;
use icu::calendar::provider::*;
use icu::locale::langid;
use icu_provider::datagen::IterableDataProvider;
use icu_provider::prelude::*;
use std::collections::BTreeMap;
Expand Down
8 changes: 4 additions & 4 deletions provider/datagen/src/transform/cldr/characters/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ use std::marker::PhantomData;
use crate::provider::transform::cldr::cldr_serde;
use crate::provider::DatagenProvider;
use crate::provider::IterableDataProviderCached;
use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
use icu_properties::provider::*;
use icu::collections::codepointinvliststringlist::CodePointInversionListAndStringList;
use icu::properties::provider::*;
use icu_provider::prelude::*;
use itertools::Itertools;

Expand Down Expand Up @@ -322,8 +322,8 @@ fn string_to_prop_unicodeset(s: &str) -> PropertyUnicodeSetV1<'static> {
#[cfg(test)]
mod tests {
use super::*;
use icu_locale_core::langid;
use icu_properties::sets::UnicodeSetData;
use icu::locale::langid;
use icu::properties::sets::UnicodeSetData;

#[test]
fn test_parse_exemplar_chars() {
Expand Down
2 changes: 1 addition & 1 deletion provider/datagen/src/transform/cldr/cldr_serde/ca.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
//! Sample file:
//! <https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-dates-full/main/en/ca-gregorian.json>

use icu_datetime::provider::neo::marker_attrs::{Context, Length, PatternLength};
use icu::datetime::provider::neo::marker_attrs::{Context, Length, PatternLength};
use serde::Deserialize;
use std::borrow::Cow;
use std::collections::BTreeMap;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ use std::collections::HashMap;
pub(in crate::provider) struct Resource {
#[serde(rename = "coverageLevels")]
pub(in crate::provider) coverage_levels:
HashMap<icu_locale_core::LanguageIdentifier, crate::CoverageLevel>,
HashMap<icu::locale::LanguageIdentifier, crate::CoverageLevel>,
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//! Sample file:
//! <https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/likelySubtags.json>

use icu_locale_core::LanguageIdentifier;
use icu::locale::LanguageIdentifier;
use serde::Deserialize;
use std::collections::HashMap;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use std::marker::PhantomData;

use icu_locale_core::LanguageIdentifier;
use icu::locale::LanguageIdentifier;
use serde::de::Error;
use serde::de::MapAccess;
use serde::de::Visitor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//! Sample file:
//! <https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/parentLocales.json>

use icu_locale_core::LanguageIdentifier;
use icu::locale::LanguageIdentifier;
use serde::Deserialize;
use std::collections::{BTreeMap, HashMap};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//! Sample file:
//! <https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/pluralRanges.json>

use icu_locale_core::LanguageIdentifier;
use icu::locale::LanguageIdentifier;
use serde::{de::Visitor, Deserialize};
use std::collections::HashMap;

Expand Down
2 changes: 1 addition & 1 deletion provider/datagen/src/transform/cldr/cldr_serde/plurals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//! Sample file:
//! <https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/plurals.json>

use icu_locale_core::LanguageIdentifier;
use icu::locale::LanguageIdentifier;
use serde::Deserialize;
use std::collections::HashMap;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//! Sample file:
//! <https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-bcp47/bcp47/timezone.json>

use icu_datetime::provider::time_zones::TimeZoneBcp47Id;
use icu::datetime::provider::time_zones::TimeZoneBcp47Id;
use serde::Deserialize;
use std::collections::BTreeMap;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
//! Sample file:
//! <https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/metaZones.json>

use icu_datetime::provider::time_zones::MetazoneId;
use icu::datetime::provider::time_zones::MetazoneId;
use serde::Deserialize;
use std::collections::BTreeMap;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

use std::fmt::Display;

use icu_locale_core::Locale;
use icu::locale::Locale;
use serde::{Deserialize, Deserializer};

#[derive(PartialEq, Debug, Deserialize)]
Expand Down
Loading

0 comments on commit 275f3f7

Please sign in to comment.