Skip to content

Commit

Permalink
Move collator and normalizer from experimental to components (#2058)
Browse files Browse the repository at this point in the history
Closes #971. Closes #972.
  • Loading branch information
hsivonen authored Jul 15, 2022
1 parent 01599b8 commit 5fa8537
Show file tree
Hide file tree
Showing 43 changed files with 39 additions and 28 deletions.
4 changes: 2 additions & 2 deletions CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@

# List of components with at least two owners as specified in docs/crate-ownership.

components/collator/ @hsivonen @echeran
components/datetime/ @zbraniecki @gregtatum @nordzilla
components/decimal/ @sffc
components/icu/ @unicode-org/icu4x-owners
components/icu4x/ @unicode-org/icu4x-owners
components/locale_canonicalizer/ @dminor @zbraniecki
components/locid/ @zbraniecki @nciric
components/normalizer/ @hsivonen @echeran
components/plurals/ @zbraniecki @sffc
components/uniset/ @echeran @iainireland
experimental/bies/ @sffc
experimental/calendar/ @Manishearth @sffc
experimental/codepointtrie/ @echeran
experimental/collator/ @hsivonen @echeran
experimental/normalizer/ @hsivonen @echeran
experimental/provider_ppucd/ @echeran
experimental/segmenter/ @aethanyc @makotokato @sffc
ffi/capi/ @Manishearth
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,22 @@
resolver = "2"
members = [
"components/calendar",
"components/collator",
"components/datetime",
"components/decimal",
"components/icu",
"components/icu4x",
"components/list",
"components/locale_canonicalizer",
"components/locid",
"components/normalizer",
"components/plurals",
"components/properties",
"experimental/bies",
"experimental/casemapping",
"experimental/char16trie",
"utils/databake",
"utils/databake/derive",
"experimental/collator",
"experimental/normalizer",
"experimental/segmenter",
"ffi/capi_cdylib",
"ffi/diplomat",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ icu_codepointtrie = { version = "0.4", path = "../../utils/codepointtrie" }
icu_char16trie = { version = "0.1", path = "../../experimental/char16trie" }
icu_provider = { version = "0.6", path = "../../provider/core", features = ["macros"] }
icu_locid = { version = "0.6", path = "../../components/locid" }
icu_normalizer = { version = "0.6", path = "../../experimental/normalizer" }
icu_normalizer = { version = "0.6", path = "../../components/normalizer" }
icu_properties = { version = "0.6", path = "../../components/properties" }
icu_uniset = { version = "0.5", path = "../../utils/uniset" }
serde = { version = "1.0", default-features = false, features = ["derive", "alloc"], optional = true }
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ Clone `rust_icu` from <https://github.com/google/rust_icu> to `$PROJECTS/rust_ic

In `$PROJECTS/icu-build` run `make install`.

`cd $PROJECTS/icu4x/experimental/collator`
`cd $PROJECTS/icu4x/components/collator`

Run the fuzzer until a panic:

Expand All @@ -52,7 +52,7 @@ Once there is a panic, recompile with debug symbols by adding `--dev`:

Record with

`LD_LIBRARY_PATH="$PROJECTS/localicu/lib" rr fuzz/target/x86_64-unknown-linux-gnu/debug/compare_utf16 -artifact_prefix=$PROJECTS/icu4x/experimental/collator/fuzz/artifacts/compare_utf16/ fuzz/artifacts/compare_utf16/crash-$ARTIFACTHASH`
`LD_LIBRARY_PATH="$PROJECTS/localicu/lib" rr fuzz/target/x86_64-unknown-linux-gnu/debug/compare_utf16 -artifact_prefix=$PROJECTS/icu4x/components/collator/fuzz/artifacts/compare_utf16/ fuzz/artifacts/compare_utf16/crash-$ARTIFACTHASH`

## Design notes

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
//!
//! In `$PROJECTS/icu-build` run `make install`.
//!
//! `cd $PROJECTS/icu4x/experimental/collator`
//! `cd $PROJECTS/icu4x/components/collator`
//!
//! Run the fuzzer until a panic:
//!
Expand All @@ -71,7 +71,7 @@
//!
//! Record with
//!
//! `LD_LIBRARY_PATH="$PROJECTS/localicu/lib" rr fuzz/target/x86_64-unknown-linux-gnu/debug/compare_utf16 -artifact_prefix=$PROJECTS/icu4x/experimental/collator/fuzz/artifacts/compare_utf16/ fuzz/artifacts/compare_utf16/crash-$ARTIFACTHASH`
//! `LD_LIBRARY_PATH="$PROJECTS/localicu/lib" rr fuzz/target/x86_64-unknown-linux-gnu/debug/compare_utf16 -artifact_prefix=$PROJECTS/icu4x/components/collator/fuzz/artifacts/compare_utf16/ fuzz/artifacts/compare_utf16/crash-$ARTIFACTHASH`
//!
//! # Design notes
//!
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@ bench = false # This option is required for Benchmark CI
default = []
serde = ["dep:serde", "icu_codepointtrie/serde", "zerovec/serde", "icu_uniset/serde", "icu_properties/serde", "icu_char16trie/serde"]
datagen = ["serde", "databake", "icu_codepointtrie/databake", "zerovec/databake", "icu_uniset/databake", "icu_properties/databake", "icu_char16trie/databake"]
experimental = []
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ use crate::error::NormalizerError;
use crate::provider::CanonicalDecompositionDataV1Marker;
use crate::provider::CompatibilityDecompositionSupplementV1Marker;
use crate::provider::DecompositionDataV1;
#[cfg(any(test, feature = "experimental"))]
use crate::provider::Uts46DecompositionSupplementV1Marker;
use alloc::string::String;
use alloc::vec::Vec;
Expand All @@ -93,6 +94,7 @@ use provider::CompatibilityDecompositionTablesV1Marker;
use provider::CompositionPassthroughV1;
use provider::DecompositionSupplementV1;
use provider::DecompositionTablesV1;
#[cfg(any(test, feature = "experimental"))]
use provider::Uts46CompositionPassthroughV1Marker;
use smallvec::SmallVec;
use u24::EMPTY_U24;
Expand All @@ -105,13 +107,15 @@ use zerovec::ZeroSlice;

enum SupplementPayloadHolder {
Compatibility(DataPayload<CompatibilityDecompositionSupplementV1Marker>),
#[cfg(any(test, feature = "experimental"))]
Uts46(DataPayload<Uts46DecompositionSupplementV1Marker>),
}

impl SupplementPayloadHolder {
fn get(&self) -> &DecompositionSupplementV1 {
match self {
SupplementPayloadHolder::Compatibility(d) => d.get(),
#[cfg(any(test, feature = "experimental"))]
SupplementPayloadHolder::Uts46(d) => d.get(),
}
}
Expand All @@ -120,6 +124,7 @@ impl SupplementPayloadHolder {
enum PassthroughPayloadHolder {
Canonical(DataPayload<CanonicalCompositionPassthroughV1Marker>),
Compatibility(DataPayload<CompatibilityCompositionPassthroughV1Marker>),
#[cfg(any(test, feature = "experimental"))]
Uts46(DataPayload<Uts46CompositionPassthroughV1Marker>),
}

Expand All @@ -128,6 +133,7 @@ impl PassthroughPayloadHolder {
match self {
PassthroughPayloadHolder::Canonical(d) => d.get(),
PassthroughPayloadHolder::Compatibility(d) => d.get(),
#[cfg(any(test, feature = "experimental"))]
PassthroughPayloadHolder::Uts46(d) => d.get(),
}
}
Expand Down Expand Up @@ -1159,6 +1165,7 @@ macro_rules! normalizer_methods {
}

/// Normalize a string slice into a `Write` sink.
#[cfg(feature = "experimental")]
pub fn normalize_to<W: core::fmt::Write + ?Sized>(
&self,
text: &str,
Expand Down Expand Up @@ -1193,6 +1200,7 @@ macro_rules! normalizer_methods {
///
/// Unpaired surrogates are mapped to the REPLACEMENT CHARACTER
/// before normalizing.
#[cfg(feature = "experimental")]
pub fn normalize_utf16_to<W: core::fmt::Write + ?Sized>(
&self,
text: &[u16],
Expand Down Expand Up @@ -1223,6 +1231,7 @@ macro_rules! normalizer_methods {
///
/// Errors are mapped to the REPLACEMENT CHARACTER according
/// to the WHATWG Encoding Standard.
#[cfg(feature = "experimental")]
pub fn normalize_utf8_to<W: core::fmt::Write + ?Sized>(
&self,
text: &[u8],
Expand Down Expand Up @@ -1363,6 +1372,7 @@ impl DecomposingNormalizer {
/// to other reorderable characters.
///
/// Deliberately private and not available outside the crate.
#[cfg(any(test, feature = "experimental"))]
fn try_new_uts46_decomposed_without_ignored_and_disallowed<D>(
data_provider: &D,
) -> Result<Self, NormalizerError>
Expand Down Expand Up @@ -1527,9 +1537,9 @@ impl ComposingNormalizer {
/// canonically equivant with each other if they differ by how U+0345 is ordered relative
/// to other reorderable characters.
///
/// NOTE: This method should probably remain experimental when this crate moves to
/// `components` until suitability of this feature as part of IDNA processing has
/// been demonstrated.
/// NOTE: This method remains experimental until suitability of this feature as part of
/// IDNA processing has been demonstrated.
#[cfg(any(test, feature = "experimental"))]
pub fn try_new_uts46_without_ignored_and_disallowed<D>(
data_provider: &D,
) -> Result<Self, NormalizerError>
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 3 additions & 3 deletions provider/datagen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,17 @@ all-features = true

# ICU components
icu_calendar = { version = "0.6", path = "../../components/calendar", features = ["datagen"] }
icu_collator = { version = "0.6", path = "../../components/collator", features = ["datagen"] }
icu_datetime = { version = "0.6", path = "../../components/datetime", features = ["datagen"] }
icu_decimal = { version = "0.6", path = "../../components/decimal", features = ["datagen"] }
icu_list = { version = "0.6", path = "../../components/list", features = ["datagen"]}
icu_locale_canonicalizer = { version = "0.6", path = "../../components/locale_canonicalizer", features = ["datagen"] }
icu_normalizer = { version = "0.6", path = "../../components/normalizer", features = ["datagen"] }
icu_plurals = { version = "0.6", path = "../../components/plurals", features = ["datagen"] }
icu_properties = { version = "0.6", path = "../../components/properties", features = ["datagen"]}
# (experimental)
icu_casemapping = { version = "0.1", path = "../../experimental/casemapping", features = ["datagen"], optional = true }
icu_segmenter = { version = "0.6", path = "../../experimental/segmenter", features = ["datagen"], optional = true }
icu_collator = { version = "0.6", path = "../../experimental/collator", features = ["datagen"], optional = true }
icu_normalizer = { version = "0.6", path = "../../experimental/normalizer", features = ["datagen"], optional = true }

# ICU provider infrastructure
icu_provider = { version = "0.6", path = "../core", features = ["std", "log_error_context", "datagen"]}
Expand Down Expand Up @@ -92,7 +92,7 @@ icu_testdata = { path = "../testdata", features = ["metadata"] }

[features]
default = []
experimental = ["icu_casemapping", "icu_segmenter", "icu_collator", "icu_normalizer"]
experimental = ["icu_casemapping", "icu_segmenter"]
bin = ["clap", "eyre", "simple_logger"]

[[bin]]
Expand Down
18 changes: 10 additions & 8 deletions provider/datagen/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,29 +33,21 @@ pub fn all_keys() -> Vec<ResourceKey> {
icu_provider_adapters::fallback::provider::LocaleFallbackParentsV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_casemapping::provider::CaseMappingV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CanonicalDecompositionDataV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CompatibilityDecompositionSupplementV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::Uts46DecompositionSupplementV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CanonicalDecompositionTablesV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CompatibilityDecompositionTablesV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CanonicalCompositionsV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CanonicalCompositionPassthroughV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::CompatibilityCompositionPassthroughV1Marker::KEY,
#[cfg(feature = "experimental")]
icu_normalizer::provider::Uts46CompositionPassthroughV1Marker::KEY,
];
v.extend(icu_properties::provider::ALL_KEYS);
#[cfg(feature = "experimental")]
v.extend(icu_segmenter::ALL_KEYS);
#[cfg(feature = "experimental")]
v.extend(crate::transform::icuexport::collator::ALL_KEYS);
v
}
Expand Down Expand Up @@ -110,6 +102,16 @@ macro_rules! create_datagen_provider {
$crate::transform::cldr::TimeZonesProvider,
$crate::transform::cldr::WeekDataProvider,
$crate::transform::cldr::ListProvider,
$crate::transform::icuexport::collator::CollationProvider,
$crate::transform::icuexport::normalizer::CanonicalDecompositionDataProvider,
$crate::transform::icuexport::normalizer::CompatibilityDecompositionSupplementProvider,
$crate::transform::icuexport::normalizer::Uts46DecompositionSupplementProvider,
$crate::transform::icuexport::normalizer::CanonicalDecompositionTablesProvider,
$crate::transform::icuexport::normalizer::CompatibilityDecompositionTablesProvider,
$crate::transform::icuexport::normalizer::CanonicalCompositionsProvider,
$crate::transform::icuexport::normalizer::CanonicalCompositionPassthroughProvider,
$crate::transform::icuexport::normalizer::CompatibilityCompositionPassthroughProvider,
$crate::transform::icuexport::normalizer::Uts46CompositionPassthroughProvider,
$crate::transform::icuexport::uprops::EnumeratedPropertyCodePointTrieProvider,
$crate::transform::icuexport::uprops::ScriptWithExtensionsPropertyProvider,
$crate::transform::icuexport::uprops::BinaryPropertyUnicodeSetDataProvider,
Expand Down
2 changes: 0 additions & 2 deletions provider/datagen/src/transform/icuexport/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
//! This module contains provider implementations backed by TOML files
//! exported from ICU.

#[cfg(feature = "experimental")]
pub mod collator;
#[cfg(feature = "experimental")]
pub mod normalizer;
#[cfg(feature = "experimental")]
pub mod ucase;
Expand Down
4 changes: 2 additions & 2 deletions provider/testdata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -171,19 +171,19 @@ writeable = { version = "0.4", path = "../../utils/writeable", optional = true }
# databake deps
icu_calendar = { version = "0.6", path = "../../components/calendar", optional = true }
icu_casemapping = { version = "0.1", path = "../../experimental/casemapping", optional = true }
icu_collator = { version = "0.6", path = "../../components/collator", optional = true }
icu_datetime = { version = "0.6", path = "../../components/datetime", optional = true }
icu_decimal = { version = "0.6", path = "../../components/decimal", optional = true }
icu_list = { version = "0.6", path = "../../components/list", optional = true }
icu_locale_canonicalizer = { version = "0.6", path = "../../components/locale_canonicalizer", optional = true }
icu_normalizer = { version = "0.6", path = "../../components/normalizer", optional = true }
icu_plurals = { version = "0.6", path = "../../components/plurals", optional = true }
icu_properties = { version = "0.6", path = "../../components/properties", optional = true }
icu_provider_adapters = { path = "../adapters" }
icu_segmenter = { version = "0.6", path = "../../experimental/segmenter", optional = true }
icu_char16trie = { version = "0.1", path = "../../experimental/char16trie", optional = true }
icu_codepointtrie = { version = "0.4", path = "../../utils/codepointtrie", optional = true }
icu_uniset = { version = "0.5", path = "../../utils/uniset", optional = true }
icu_normalizer = { version = "0.6", path = "../../experimental/normalizer", optional = true }
icu_collator = { version = "0.6", path = "../../experimental/collator", optional = true }
tinystr = { version = "0.6", path = "../../utils/tinystr", optional = true }
zerovec = { version = "0.7", path = "../../utils/zerovec", optional = true }

Expand Down
2 changes: 1 addition & 1 deletion tools/scripts/tidy.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ script = '''
exit_on_error true
glob_pattern = set "./**/Cargo.toml"
skip_paths = set_new "experimental/collator/fuzz/Cargo.toml" "experimental/normalizer/fuzz/Cargo.toml"
skip_paths = set_new "components/collator/fuzz/Cargo.toml" "components/normalizer/fuzz/Cargo.toml"
template = canonicalize README.tpl
Expand Down

0 comments on commit 5fa8537

Please sign in to comment.