diff --git a/boa_ast/src/lib.rs b/boa_ast/src/lib.rs index b0a12de497a..49ad17f7016 100644 --- a/boa_ast/src/lib.rs +++ b/boa_ast/src/lib.rs @@ -1,7 +1,7 @@ //! Boa's **`boa_ast`** crate implements an ECMAScript abstract syntax tree. //! //! # Crate Overview -//! **boa_ast** contains representations of [**Parse Nodes**][grammar] as defined by the ECMAScript +//! **`boa_ast`** contains representations of [**Parse Nodes**][grammar] as defined by the ECMAScript //! spec. Some `Parse Node`s are not represented by Boa's AST, because a lot of grammar productions //! are only used to throw [**Early Errors**][early], and don't influence the evaluation of the AST //! itself. @@ -17,14 +17,14 @@ //! Try out the most recent release with Boa's live demo [playground][boa-playground]. //! //! # Boa Crates -//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree. -//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution. -//! - **boa_gc** - Boa's garbage collector. -//! - **boa_interner** - Boa's string interner. -//! - **boa_parser** - Boa's lexer and parser. -//! - **boa_profiler** - Boa's code profiler. -//! - **boa_unicode** - Boa's Unicode identifier. -//! - **boa_icu_provider** - Boa's ICU4X data provider. +//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. +//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. +//! - **`boa_gc`** - Boa's garbage collector. +//! - **`boa_interner`** - Boa's string interner. +//! - **`boa_parser`** - Boa's lexer and parser. +//! - **`boa_profiler`** - Boa's code profiler. +//! - **`boa_unicode`** - Boa's Unicode identifier. +//! - **`boa_icu_provider`** - Boa's ICU4X data provider. //! //! [grammar]: https://tc39.es/ecma262/#sec-syntactic-grammar //! [early]: https://tc39.es/ecma262/#sec-static-semantic-rules diff --git a/boa_engine/src/builtins/intl/collator/mod.rs b/boa_engine/src/builtins/intl/collator/mod.rs index b0aae75f2e8..fe60adbe556 100644 --- a/boa_engine/src/builtins/intl/collator/mod.rs +++ b/boa_engine/src/builtins/intl/collator/mod.rs @@ -94,8 +94,16 @@ where .collation .take() .filter(|co| validate_extension(locale.id.clone(), key!("co"), co, provider)) - .or_else(|| locale.extensions.unicode.keywords.get(&key!("co")).cloned()) - .filter(|co| validate_extension(locale.id.clone(), key!("co"), co, provider)); + .or_else(|| { + locale + .extensions + .unicode + .keywords + .get(&key!("co")) + .cloned() + .filter(|co| validate_extension(locale.id.clone(), key!("co"), co, provider)) + }) + .filter(|co| co != &value!("search")); let numeric = options.numeric.or_else( @@ -251,9 +259,22 @@ impl Collator { // 18. Let relevantExtensionKeys be %Collator%.[[RelevantExtensionKeys]]. // 19. Let r be ResolveLocale(%Collator%.[[AvailableLocales]], requestedLocales, opt, relevantExtensionKeys, localeData). - let locale = + let mut locale = resolve_locale::(&requested_locales, &mut intl_options, context.icu()); + let collator_locale = { + // `collator_locale` needs to be different from the resolved locale because ECMA402 doesn't + // define `search` as a resolvable extension of a locale, so we need to add that extension + // only to the locale passed to the collator. + let mut col_loc = DataLocale::from(&locale); + if usage == Usage::Search { + intl_options.service_options.collation = None; + locale.extensions.unicode.keywords.remove(key!("co")); + col_loc.set_unicode_ext(key!("co"), value!("search")); + } + col_loc + }; + // 20. Set collator.[[Locale]] to r.[[locale]]. // 21. Let collation be r.[[co]]. @@ -282,32 +303,25 @@ impl Collator { // i. Let dataLocale be r.[[dataLocale]]. // ii. Let dataLocaleData be localeData.[[]]. // iii. Let sensitivity be dataLocaleData.[[sensitivity]]. - // For now, `Collator` always uses `Strength::Tertiary` as the default. - .unwrap_or(Sensitivity::Variant); + .or_else(|| (usage == Usage::Sort).then_some(Sensitivity::Variant)); // 29. Let ignorePunctuation be ? GetOption(options, "ignorePunctuation", boolean, empty, false). // 30. Set collator.[[IgnorePunctuation]] to ignorePunctuation. let ignore_punctuation = get_option::(&options, "ignorePunctuation", false, context)?.unwrap_or_default(); - let (strength, case_level) = sensitivity.to_collator_options(); + let (strength, case_level) = sensitivity.map(Sensitivity::to_collator_options).unzip(); - // TODO: change to use `unzip` when 1.66 releases. - let (alternate_handling, max_variable) = if ignore_punctuation { - ( - Some(AlternateHandling::Shifted), - Some(MaxVariable::Punctuation), - ) - } else { - (None, None) - }; + let (alternate_handling, max_variable) = ignore_punctuation + .then_some((AlternateHandling::Shifted, MaxVariable::Punctuation)) + .unzip(); let collator = context .icu() .provider() - .try_new_collator(&DataLocale::from(&locale), { + .try_new_collator(&collator_locale, { let mut options = icu_collator::CollatorOptions::new(); - options.strength = Some(strength); + options.strength = strength; options.case_level = case_level; options.case_first = case_first; options.numeric = Some(if numeric { Numeric::On } else { Numeric::Off }); @@ -327,7 +341,7 @@ impl Collator { numeric, case_first, usage, - sensitivity, + sensitivity: sensitivity.unwrap_or(Sensitivity::Variant), ignore_punctuation, collator, bound_compare: None, diff --git a/boa_engine/src/builtins/intl/collator/options.rs b/boa_engine/src/builtins/intl/collator/options.rs index e819a1d38ae..928c038b0ac 100644 --- a/boa_engine/src/builtins/intl/collator/options.rs +++ b/boa_engine/src/builtins/intl/collator/options.rs @@ -14,12 +14,12 @@ pub(crate) enum Sensitivity { impl Sensitivity { /// Converts the sensitivity option to the equivalent ICU4X collator options. - pub(crate) const fn to_collator_options(self) -> (Strength, Option) { + pub(crate) const fn to_collator_options(self) -> (Strength, CaseLevel) { match self { - Sensitivity::Base => (Strength::Primary, None), - Sensitivity::Accent => (Strength::Secondary, None), - Sensitivity::Case => (Strength::Primary, Some(CaseLevel::On)), - Sensitivity::Variant => (Strength::Tertiary, None), + Sensitivity::Base => (Strength::Primary, CaseLevel::Off), + Sensitivity::Accent => (Strength::Secondary, CaseLevel::Off), + Sensitivity::Case => (Strength::Primary, CaseLevel::On), + Sensitivity::Variant => (Strength::Tertiary, CaseLevel::On), } } } diff --git a/boa_engine/src/builtins/intl/locale/utils.rs b/boa_engine/src/builtins/intl/locale/utils.rs index 3e6cc6f0de4..981a5cfc00d 100644 --- a/boa_engine/src/builtins/intl/locale/utils.rs +++ b/boa_engine/src/builtins/intl/locale/utils.rs @@ -11,6 +11,7 @@ use crate::{ Context, JsNativeError, JsResult, JsValue, }; +use icu_collator::provider::CollationMetadataV1Marker; use icu_locid::{ extensions::unicode::{Key, Value}, subtags::Variants, @@ -178,7 +179,13 @@ pub(crate) fn best_available_locale( // the fallback algorithm, even if the used locale is exactly the same as the required // locale. match req.metadata.locale { - Some(loc) if loc == candidate => return Some(candidate.into_locale().id), + Some(loc) + if loc == candidate + // TODO: ugly hack to accept locales that fallback to "und" in the collator service + || (loc.is_empty() && M::KEY.path() == CollationMetadataV1Marker::KEY.path()) => + { + return Some(candidate.into_locale().id) + } None => return Some(candidate.into_locale().id), _ => {} } @@ -233,7 +240,14 @@ pub(crate) fn best_locale_for_provider( response .metadata .locale - .map(|dl| dl.into_locale().id) + .map(|dl| { + // TODO: ugly hack to accept locales that fallback to "und" in the collator service + if M::KEY.path() == CollationMetadataV1Marker::KEY.path() && dl.is_empty() { + candidate.clone() + } else { + dl.into_locale().id + } + }) .or(Some(candidate)) .filter(|loc| loc != &LanguageIdentifier::UND) } diff --git a/boa_engine/src/lib.rs b/boa_engine/src/lib.rs index c56607096b8..c3c8a0da550 100644 --- a/boa_engine/src/lib.rs +++ b/boa_engine/src/lib.rs @@ -14,14 +14,14 @@ //! Try out the most recent release with Boa's live demo [playground][boa-playground]. //! //! # Boa Crates -//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree. -//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution. -//! - **boa_gc** - Boa's garbage collector. -//! - **boa_interner** - Boa's string interner. -//! - **boa_parser** - Boa's lexer and parser. -//! - **boa_profiler** - Boa's code profiler. -//! - **boa_unicode** - Boa's Unicode identifier. -//! - **boa_icu_provider** - Boa's ICU4X data provider. +//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. +//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. +//! - **`boa_gc`** - Boa's garbage collector. +//! - **`boa_interner`** - Boa's string interner. +//! - **`boa_parser`** - Boa's lexer and parser. +//! - **`boa_profiler`** - Boa's code profiler. +//! - **`boa_unicode`** - Boa's Unicode identifier. +//! - **`boa_icu_provider`** - Boa's ICU4X data provider. //! //! [whatwg]: https://console.spec.whatwg.org //! [ecma-402]: https://tc39.es/ecma402 diff --git a/boa_gc/src/lib.rs b/boa_gc/src/lib.rs index 6985a707ded..dcc132ee4c8 100644 --- a/boa_gc/src/lib.rs +++ b/boa_gc/src/lib.rs @@ -1,7 +1,7 @@ //! Boa's **`boa_gc`** crate implements a garbage collector. //! //! # Crate Overview -//! **boa_gc** is a mark-sweep garbage collector that implements a Trace and Finalize trait +//! **`boa_gc`** is a mark-sweep garbage collector that implements a Trace and Finalize trait //! for garbage collected values. //! //! # About Boa @@ -11,14 +11,14 @@ //! Try out the most recent release with Boa's live demo [playground][boa-playground]. //! //! # Boa Crates -//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree. -//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution. -//! - **boa_gc** - Boa's garbage collector. -//! - **boa_interner** - Boa's string interner. -//! - **boa_parser** - Boa's lexer and parser. -//! - **boa_profiler** - Boa's code profiler. -//! - **boa_unicode** - Boa's Unicode identifier. -//! - **boa_icu_provider** - Boa's ICU4X data provider. +//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. +//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. +//! - **`boa_gc`** - Boa's garbage collector. +//! - **`boa_interner`** - Boa's string interner. +//! - **`boa_parser`** - Boa's lexer and parser. +//! - **`boa_profiler`** - Boa's code profiler. +//! - **`boa_unicode`** - Boa's Unicode identifier. +//! - **`boa_icu_provider`** - Boa's ICU4X data provider. //! //! [boa-conformance]: https://boa-dev.github.io/boa/test262/ //! [boa-web]: https://boa-dev.github.io/ diff --git a/boa_interner/src/lib.rs b/boa_interner/src/lib.rs index 888c30df14c..c14bac8ca75 100644 --- a/boa_interner/src/lib.rs +++ b/boa_interner/src/lib.rs @@ -16,14 +16,14 @@ //! Try out the most recent release with Boa's live demo [playground][boa-playground]. //! //! # Boa Crates -//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree. -//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution. -//! - **boa_gc** - Boa's garbage collector. -//! - **boa_interner** - Boa's string interner. -//! - **boa_parser** - Boa's lexer and parser. -//! - **boa_profiler** - Boa's code profiler. -//! - **boa_unicode** - Boa's Unicode identifier. -//! - **boa_icu_provider** - Boa's ICU4X data provider. +//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. +//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. +//! - **`boa_gc`** - Boa's garbage collector. +//! - **`boa_interner`** - Boa's string interner. +//! - **`boa_parser`** - Boa's lexer and parser. +//! - **`boa_profiler`** - Boa's code profiler. +//! - **`boa_unicode`** - Boa's Unicode identifier. +//! - **`boa_icu_provider`** - Boa's ICU4X data provider. //! //! [boa-conformance]: https://boa-dev.github.io/boa/test262/ //! [boa-web]: https://boa-dev.github.io/ diff --git a/boa_parser/src/lib.rs b/boa_parser/src/lib.rs index b75cdbe5d73..9c3043a33d7 100644 --- a/boa_parser/src/lib.rs +++ b/boa_parser/src/lib.rs @@ -12,14 +12,14 @@ //! Try out the most recent release with Boa's live demo [playground][boa-playground]. //! //! # Boa Crates -//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree. -//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution. -//! - **boa_gc** - Boa's garbage collector. -//! - **boa_interner** - Boa's string interner. -//! - **boa_parser** - Boa's lexer and parser. -//! - **boa_profiler** - Boa's code profiler. -//! - **boa_unicode** - Boa's Unicode identifier. -//! - **boa_icu_provider** - Boa's ICU4X data provider. +//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. +//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. +//! - **`boa_gc`** - Boa's garbage collector. +//! - **`boa_interner`** - Boa's string interner. +//! - **`boa_parser`** - Boa's lexer and parser. +//! - **`boa_profiler`** - Boa's code profiler. +//! - **`boa_unicode`** - Boa's Unicode identifier. +//! - **`boa_icu_provider`** - Boa's ICU4X data provider. //! //! [spec]: https://tc39.es/ecma262 //! [lex]: https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar diff --git a/boa_profiler/src/lib.rs b/boa_profiler/src/lib.rs index e91f9246b06..1eecc051bae 100644 --- a/boa_profiler/src/lib.rs +++ b/boa_profiler/src/lib.rs @@ -11,14 +11,14 @@ //! Try out the most recent release with Boa's live demo [playground][boa-playground]. //! //! # Boa Crates -//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree. -//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution. -//! - **boa_gc** - Boa's garbage collector. -//! - **boa_interner** - Boa's string interner. -//! - **boa_parser** - Boa's lexer and parser. -//! - **boa_profiler** - Boa's code profiler. -//! - **boa_unicode** - Boa's Unicode identifier. -//! - **boa_icu_provider** - Boa's ICU4X data provider. +//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. +//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. +//! - **`boa_gc`** - Boa's garbage collector. +//! - **`boa_interner`** - Boa's string interner. +//! - **`boa_parser`** - Boa's lexer and parser. +//! - **`boa_profiler`** - Boa's code profiler. +//! - **`boa_unicode`** - Boa's Unicode identifier. +//! - **`boa_icu_provider`** - Boa's ICU4X data provider. //! //! [profiler-md]: https://github.com/boa-dev/boa/blob/main/docs/profiling.md //! [boa-conformance]: https://boa-dev.github.io/boa/test262/ diff --git a/boa_unicode/src/lib.rs b/boa_unicode/src/lib.rs index feb1b24fae1..7ea372b4f5b 100644 --- a/boa_unicode/src/lib.rs +++ b/boa_unicode/src/lib.rs @@ -16,14 +16,14 @@ //! Try out the most recent release with Boa's live demo [playground][boa-playground]. //! //! # Boa Crates -//! - **boa_ast** - Boa's ECMAScript Abstract Syntax Tree. -//! - **boa_engine** - Boa's implementation of ECMAScript builtin objects and execution. -//! - **boa_gc** - Boa's garbage collector. -//! - **boa_interner** - Boa's string interner. -//! - **boa_parser** - Boa's lexer and parser. -//! - **boa_profiler** - Boa's code profiler. -//! - **boa_unicode** - Boa's Unicode identifier. -//! - **boa_icu_provider** - Boa's ICU4X data provider. +//! - **`boa_ast`** - Boa's ECMAScript Abstract Syntax Tree. +//! - **`boa_engine`** - Boa's implementation of ECMAScript builtin objects and execution. +//! - **`boa_gc`** - Boa's garbage collector. +//! - **`boa_interner`** - Boa's string interner. +//! - **`boa_parser`** - Boa's lexer and parser. +//! - **`boa_profiler`** - Boa's code profiler. +//! - **`boa_unicode`** - Boa's Unicode identifier. +//! - **`boa_icu_provider`** - Boa's ICU4X data provider. //! //! [uax31]: http://unicode.org/reports/tr31 //! [boa-conformance]: https://boa-dev.github.io/boa/test262/