diff --git a/components/locale_core/src/extensions/mod.rs b/components/locale_core/src/extensions/mod.rs index 2de8909a0ef..d35bbe9bdda 100644 --- a/components/locale_core/src/extensions/mod.rs +++ b/components/locale_core/src/extensions/mod.rs @@ -52,9 +52,9 @@ pub mod unicode; use core::cmp::Ordering; use other::Other; -use private::Private; -use transform::Transform; -use unicode::Unicode; +use private::{Private, PRIVATE_EXT_CHAR}; +use transform::{Transform, TRANSFORM_EXT_CHAR}; +use unicode::{Unicode, UNICODE_EXT_CHAR}; use alloc::vec::Vec; @@ -77,13 +77,21 @@ pub enum ExtensionType { } impl ExtensionType { + pub(crate) const fn try_from_byte_slice(key: &[u8]) -> Result { + if let [b] = key { + Self::try_from_byte(*b) + } else { + Err(ParserError::InvalidExtension) + } + } + pub(crate) const fn try_from_byte(key: u8) -> Result { let key = key.to_ascii_lowercase(); - match key { - b'u' => Ok(Self::Unicode), - b't' => Ok(Self::Transform), - b'x' => Ok(Self::Private), - b'a'..=b'z' => Ok(Self::Other(key)), + match key as char { + UNICODE_EXT_CHAR => Ok(Self::Unicode), + TRANSFORM_EXT_CHAR => Ok(Self::Transform), + PRIVATE_EXT_CHAR => Ok(Self::Private), + 'a'..='z' => Ok(Self::Other(key)), _ => Err(ParserError::InvalidExtension), } } @@ -301,27 +309,27 @@ impl Extensions { let mut wrote_tu = false; // Alphabetic by singleton self.other.iter().try_for_each(|other| { - if other.get_ext() > 't' && !wrote_tu { + if other.get_ext() > TRANSFORM_EXT_CHAR && !wrote_tu { // Since 't' and 'u' are next to each other in alphabetical // order, write both now. - self.transform.for_each_subtag_str(f)?; - self.unicode.for_each_subtag_str(f)?; + self.transform.for_each_subtag_str(f, true)?; + self.unicode.for_each_subtag_str(f, true)?; wrote_tu = true; } - other.for_each_subtag_str(f)?; + other.for_each_subtag_str(f, true)?; Ok(()) })?; if !wrote_tu { - self.transform.for_each_subtag_str(f)?; - self.unicode.for_each_subtag_str(f)?; + self.transform.for_each_subtag_str(f, true)?; + self.unicode.for_each_subtag_str(f, true)?; } // Private must be written last, since it allows single character // keys. Extensions must also be written in alphabetical order, // which would seem to imply that other extensions `y` and `z` are // invalid, but this is not specified. - self.private.for_each_subtag_str(f)?; + self.private.for_each_subtag_str(f, true)?; Ok(()) } } diff --git a/components/locale_core/src/extensions/other/mod.rs b/components/locale_core/src/extensions/other/mod.rs index 24a0dcf7ac6..6e7596fcc47 100644 --- a/components/locale_core/src/extensions/other/mod.rs +++ b/components/locale_core/src/extensions/other/mod.rs @@ -19,6 +19,9 @@ //! let mut loc: Locale = "en-US-a-foo-faa".parse().expect("Parsing failed."); //! ``` +use core::str::FromStr; + +use super::ExtensionType; use crate::parser::ParserError; use crate::parser::SubtagIterator; use crate::shortvec::ShortBoxSlice; @@ -80,8 +83,22 @@ impl Other { Self { ext, keys } } + pub(crate) fn try_from_bytes(t: &[u8]) -> Result { + let mut iter = SubtagIterator::new(t); + + let ext = iter.next().ok_or(ParserError::InvalidExtension)?; + if let ExtensionType::Other(b) = ExtensionType::try_from_byte_slice(ext)? { + return Self::try_from_iter(b, &mut iter); + } + + Err(ParserError::InvalidExtension) + } + pub(crate) fn try_from_iter(ext: u8, iter: &mut SubtagIterator) -> Result { - debug_assert!(ext.is_ascii_alphabetic()); + debug_assert!(matches!( + ExtensionType::try_from_byte(ext), + Ok(ExtensionType::Other(_)), + )); let mut keys = ShortBoxSlice::new(); while let Some(subtag) = iter.peek() { @@ -143,15 +160,29 @@ impl Other { self.ext } - pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + pub(crate) fn for_each_subtag_str(&self, f: &mut F, with_ext: bool) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>, { - f(self.get_ext_str())?; + if self.keys.is_empty() { + return Ok(()); + } + + if with_ext { + f(self.get_ext_str())?; + } self.keys.iter().map(|t| t.as_str()).try_for_each(f) } } +impl FromStr for Other { + type Err = ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } +} + writeable::impl_display_with_writeable!(Other); impl writeable::Writeable for Other { @@ -183,3 +214,14 @@ impl writeable::Writeable for Other { alloc::borrow::Cow::Owned(string) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_other_extension_fromstr() { + let pe: Other = "o-foo-bar".parse().expect("Failed to parse Other"); + assert_eq!(pe.to_string(), "o-foo-bar"); + } +} diff --git a/components/locale_core/src/extensions/private/mod.rs b/components/locale_core/src/extensions/private/mod.rs index 9f3bf577561..4bde5748d9b 100644 --- a/components/locale_core/src/extensions/private/mod.rs +++ b/components/locale_core/src/extensions/private/mod.rs @@ -31,14 +31,19 @@ mod other; use alloc::vec::Vec; use core::ops::Deref; +use core::str::FromStr; #[doc(inline)] pub use other::{subtag, Subtag}; +use super::ExtensionType; use crate::parser::ParserError; use crate::parser::SubtagIterator; use crate::shortvec::ShortBoxSlice; +pub(crate) const PRIVATE_EXT_CHAR: char = 'x'; +pub(crate) const PRIVATE_EXT_STR: &str = "x"; + /// A list of [`Private Use Extensions`] as defined in [`Unicode Locale /// Identifier`] specification. /// @@ -110,6 +115,17 @@ impl Private { Self(ShortBoxSlice::new_single(input)) } + pub(crate) fn try_from_bytes(t: &[u8]) -> Result { + let mut iter = SubtagIterator::new(t); + + let ext = iter.next().ok_or(ParserError::InvalidExtension)?; + if let ExtensionType::Private = ExtensionType::try_from_byte_slice(ext)? { + return Self::try_from_iter(&mut iter); + } + + Err(ParserError::InvalidExtension) + } + /// Empties the [`Private`] list. /// /// # Examples @@ -139,18 +155,28 @@ impl Private { Ok(Self(keys)) } - pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + pub(crate) fn for_each_subtag_str(&self, f: &mut F, with_ext: bool) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>, { if self.is_empty() { return Ok(()); } - f("x")?; + if with_ext { + f(PRIVATE_EXT_STR)?; + } self.deref().iter().map(|t| t.as_str()).try_for_each(f) } } +impl FromStr for Private { + type Err = ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } +} + writeable::impl_display_with_writeable!(Private); impl writeable::Writeable for Private { @@ -158,7 +184,7 @@ impl writeable::Writeable for Private { if self.is_empty() { return Ok(()); } - sink.write_str("x")?; + sink.write_char(PRIVATE_EXT_CHAR)?; for key in self.iter() { sink.write_char('-')?; writeable::Writeable::write_to(key, sink)?; @@ -185,3 +211,14 @@ impl Deref for Private { self.0.deref() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_private_extension_fromstr() { + let pe: Private = "x-foo-bar-l-baz".parse().expect("Failed to parse Private"); + assert_eq!(pe.to_string(), "x-foo-bar-l-baz"); + } +} diff --git a/components/locale_core/src/extensions/transform/mod.rs b/components/locale_core/src/extensions/transform/mod.rs index 728e04ed965..21a92082b96 100644 --- a/components/locale_core/src/extensions/transform/mod.rs +++ b/components/locale_core/src/extensions/transform/mod.rs @@ -35,12 +35,14 @@ mod key; mod value; use core::cmp::Ordering; +use core::str::FromStr; pub use fields::Fields; #[doc(inline)] pub use key::{key, Key}; pub use value::Value; +use super::ExtensionType; use crate::parser::SubtagIterator; use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode}; use crate::shortvec::ShortBoxSlice; @@ -48,6 +50,9 @@ use crate::subtags::{self, Language}; use crate::LanguageIdentifier; use litemap::LiteMap; +pub(crate) const TRANSFORM_EXT_CHAR: char = 't'; +pub(crate) const TRANSFORM_EXT_STR: &str = "t"; + /// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale /// Identifier`] specification. /// @@ -117,6 +122,17 @@ impl Transform { self.lang.is_none() && self.fields.is_empty() } + pub(crate) fn try_from_bytes(t: &[u8]) -> Result { + let mut iter = SubtagIterator::new(t); + + let ext = iter.next().ok_or(ParserError::InvalidExtension)?; + if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? { + return Self::try_from_iter(&mut iter); + } + + Err(ParserError::InvalidExtension) + } + /// Clears the transform extension, effectively removing it from the locale. /// /// # Examples @@ -214,14 +230,16 @@ impl Transform { }) } - pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + pub(crate) fn for_each_subtag_str(&self, f: &mut F, with_ext: bool) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>, { if self.is_empty() { return Ok(()); } - f("t")?; + if with_ext { + f(TRANSFORM_EXT_STR)?; + } if let Some(lang) = &self.lang { lang.for_each_subtag_str_lowercased(f)?; } @@ -229,6 +247,14 @@ impl Transform { } } +impl FromStr for Transform { + type Err = ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } +} + writeable::impl_display_with_writeable!(Transform); impl writeable::Writeable for Transform { @@ -236,7 +262,7 @@ impl writeable::Writeable for Transform { if self.is_empty() { return Ok(()); } - sink.write_str("t")?; + sink.write_char(TRANSFORM_EXT_CHAR)?; if let Some(lang) = &self.lang { sink.write_char('-')?; lang.write_lowercased_to(sink)?; @@ -262,3 +288,16 @@ impl writeable::Writeable for Transform { result } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_transform_extension_fromstr() { + let te: Transform = "t-en-us-h0-hybrid" + .parse() + .expect("Failed to parse Transform"); + assert_eq!(te.to_string(), "t-en-us-h0-hybrid"); + } +} diff --git a/components/locale_core/src/extensions/unicode/attributes.rs b/components/locale_core/src/extensions/unicode/attributes.rs index 37087ae61b5..9f6bfd0ff1f 100644 --- a/components/locale_core/src/extensions/unicode/attributes.rs +++ b/components/locale_core/src/extensions/unicode/attributes.rs @@ -4,9 +4,12 @@ use super::Attribute; +use crate::parser::SubtagIterator; use crate::shortvec::ShortBoxSlice; +use crate::ParserError; use alloc::vec::Vec; use core::ops::Deref; +use core::str::FromStr; /// A set of [`Attribute`] elements as defined in [`Unicode Extension Attributes`]. /// @@ -72,8 +75,9 @@ impl Attributes { Self(input.into()) } - pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice) -> Self { - Self(input) + pub(crate) fn try_from_bytes(t: &[u8]) -> Result { + let mut iter = SubtagIterator::new(t); + Self::try_from_iter(&mut iter) } /// Empties the [`Attributes`] list. @@ -101,6 +105,22 @@ impl Attributes { core::mem::take(self) } + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { + let mut attributes = ShortBoxSlice::new(); + + while let Some(subtag) = iter.peek() { + if let Ok(attr) = Attribute::try_from_bytes(subtag) { + if let Err(idx) = attributes.binary_search(&attr) { + attributes.insert(idx, attr); + } + } else { + break; + } + iter.next(); + } + Ok(Self(attributes)) + } + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>, @@ -109,6 +129,14 @@ impl Attributes { } } +impl FromStr for Attributes { + type Err = ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } +} + impl_writeable_for_subtag_list!(Attributes, "foobar", "testing"); impl Deref for Attributes { @@ -118,3 +146,14 @@ impl Deref for Attributes { self.0.deref() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_attributes_fromstr() { + let attrs: Attributes = "foo-bar".parse().expect("Failed to parse Attributes"); + assert_eq!(attrs.to_string(), "bar-foo"); + } +} diff --git a/components/locale_core/src/extensions/unicode/keywords.rs b/components/locale_core/src/extensions/unicode/keywords.rs index 8a0c3fc201f..d886bab661b 100644 --- a/components/locale_core/src/extensions/unicode/keywords.rs +++ b/components/locale_core/src/extensions/unicode/keywords.rs @@ -5,6 +5,7 @@ use core::borrow::Borrow; use core::cmp::Ordering; use core::iter::FromIterator; +use core::str::FromStr; use litemap::LiteMap; use writeable::Writeable; @@ -12,6 +13,8 @@ use super::Key; use super::Value; #[allow(deprecated)] use crate::ordering::SubtagOrderingResult; +use crate::parser::ParserError; +use crate::parser::SubtagIterator; use crate::shortvec::ShortBoxSlice; /// A list of [`Key`]-[`Value`] pairs representing functional information @@ -90,6 +93,11 @@ impl Keywords { )) } + pub(crate) fn try_from_bytes(t: &[u8]) -> Result { + let mut iter = SubtagIterator::new(t); + Self::try_from_iter(&mut iter) + } + /// Returns `true` if there are no keywords. /// /// # Examples @@ -356,6 +364,44 @@ impl Keywords { } } + pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { + let mut keywords = LiteMap::new(); + + let mut current_keyword = None; + let mut current_value = ShortBoxSlice::new(); + + while let Some(subtag) = iter.peek() { + let slen = subtag.len(); + if slen == 2 { + if let Some(kw) = current_keyword.take() { + keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); + current_value = ShortBoxSlice::new(); + } + current_keyword = Some(Key::try_from_bytes(subtag)?); + } else if current_keyword.is_some() { + match Value::parse_subtag(subtag) { + Ok(Some(t)) => current_value.push(t), + Ok(None) => {} + Err(_) => break, + } + } else { + break; + } + iter.next(); + } + + if let Some(kw) = current_keyword.take() { + keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); + } + + Ok(keywords.into()) + } + + /// Produce an ordered iterator over key-value pairs + pub fn iter(&self) -> impl Iterator { + self.0.iter() + } + pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>, @@ -386,4 +432,23 @@ impl FromIterator<(Key, Value)> for Keywords { } } +impl FromStr for Keywords { + type Err = ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } +} + impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm"); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_keywords_fromstr() { + let kw: Keywords = "hc-h12".parse().expect("Failed to parse Keywords"); + assert_eq!(kw.to_string(), "hc-h12"); + } +} diff --git a/components/locale_core/src/extensions/unicode/mod.rs b/components/locale_core/src/extensions/unicode/mod.rs index a315e067a9f..882afd5a4d1 100644 --- a/components/locale_core/src/extensions/unicode/mod.rs +++ b/components/locale_core/src/extensions/unicode/mod.rs @@ -33,6 +33,7 @@ mod keywords; mod value; use core::cmp::Ordering; +use core::str::FromStr; #[doc(inline)] pub use attribute::{attribute, Attribute}; @@ -43,10 +44,12 @@ pub use keywords::Keywords; #[doc(inline)] pub use value::{value, Value}; +use super::ExtensionType; use crate::parser::ParserError; use crate::parser::SubtagIterator; -use crate::shortvec::ShortBoxSlice; -use litemap::LiteMap; + +pub(crate) const UNICODE_EXT_CHAR: char = 'u'; +pub(crate) const UNICODE_EXT_STR: &str = "u"; /// Unicode Extensions provide information about user preferences in a given locale. /// @@ -118,6 +121,17 @@ impl Unicode { self.keywords.is_empty() && self.attributes.is_empty() } + pub(crate) fn try_from_bytes(t: &[u8]) -> Result { + let mut iter = SubtagIterator::new(t); + + let ext = iter.next().ok_or(ParserError::InvalidExtension)?; + if let ExtensionType::Unicode = ExtensionType::try_from_byte_slice(ext)? { + return Self::try_from_iter(&mut iter); + } + + Err(ParserError::InvalidExtension) + } + /// Clears all Unicode extension keywords and attributes, effectively removing /// the Unicode extension. /// @@ -151,47 +165,8 @@ impl Unicode { } pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result { - let mut attributes = ShortBoxSlice::new(); - - while let Some(subtag) = iter.peek() { - if let Ok(attr) = Attribute::try_from_bytes(subtag) { - if let Err(idx) = attributes.binary_search(&attr) { - attributes.insert(idx, attr); - } - } else { - break; - } - iter.next(); - } - - let mut keywords = LiteMap::new(); - - let mut current_keyword = None; - let mut current_value = ShortBoxSlice::new(); - - while let Some(subtag) = iter.peek() { - let slen = subtag.len(); - if slen == 2 { - if let Some(kw) = current_keyword.take() { - keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); - current_value = ShortBoxSlice::new(); - } - current_keyword = Some(Key::try_from_bytes(subtag)?); - } else if current_keyword.is_some() { - match Value::parse_subtag(subtag) { - Ok(Some(t)) => current_value.push(t), - Ok(None) => {} - Err(_) => break, - } - } else { - break; - } - iter.next(); - } - - if let Some(kw) = current_keyword.take() { - keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value)); - } + let attributes = Attributes::try_from_iter(iter)?; + let keywords = Keywords::try_from_iter(iter)?; // Ensure we've defined at least one attribute or keyword if attributes.is_empty() && keywords.is_empty() { @@ -199,33 +174,40 @@ impl Unicode { } Ok(Self { - keywords: keywords.into(), - attributes: Attributes::from_short_slice_unchecked(attributes), + keywords, + attributes, }) } - pub(crate) fn for_each_subtag_str(&self, f: &mut F) -> Result<(), E> + pub(crate) fn for_each_subtag_str(&self, f: &mut F, with_ext: bool) -> Result<(), E> where F: FnMut(&str) -> Result<(), E>, { - if self.is_empty() { - return Ok(()); + if !self.is_empty() { + if with_ext { + f(UNICODE_EXT_STR)?; + } + self.attributes.for_each_subtag_str(f)?; + self.keywords.for_each_subtag_str(f)?; } - f("u")?; - self.attributes.for_each_subtag_str(f)?; - self.keywords.for_each_subtag_str(f)?; Ok(()) } } +impl FromStr for Unicode { + type Err = ParserError; + + fn from_str(source: &str) -> Result { + Self::try_from_bytes(source.as_bytes()) + } +} + writeable::impl_display_with_writeable!(Unicode); impl writeable::Writeable for Unicode { fn write_to(&self, sink: &mut W) -> core::fmt::Result { - if self.is_empty() { - return Ok(()); - } - sink.write_str("u")?; + sink.write_char(UNICODE_EXT_CHAR)?; + if !self.attributes.is_empty() { sink.write_char('-')?; writeable::Writeable::write_to(&self.attributes, sink)?; @@ -251,3 +233,14 @@ impl writeable::Writeable for Unicode { result } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_unicode_extension_fromstr() { + let ue: Unicode = "u-foo-hc-h12".parse().expect("Failed to parse Unicode"); + assert_eq!(ue.to_string(), "u-foo-hc-h12"); + } +} diff --git a/components/locale_core/src/extensions/unicode/value.rs b/components/locale_core/src/extensions/unicode/value.rs index 5c93512ebfc..8aa80c3fadb 100644 --- a/components/locale_core/src/extensions/unicode/value.rs +++ b/components/locale_core/src/extensions/unicode/value.rs @@ -4,6 +4,7 @@ use crate::parser::{ParserError, SubtagIterator}; use crate::shortvec::ShortBoxSlice; +use alloc::vec::Vec; use core::ops::RangeInclusive; use core::str::FromStr; use tinystr::TinyAsciiStr; @@ -102,6 +103,31 @@ impl Value { } } + /// A constructor which takes a pre-sorted list of [`Value`] elements. + /// + /// + /// # Examples + /// + /// ``` + /// use icu::locale::extensions::unicode::Value; + /// use tinystr::{TinyAsciiStr, tinystr}; + /// + /// let tinystr1: TinyAsciiStr<8> = tinystr!(8, "foobar"); + /// let tinystr2: TinyAsciiStr<8> = tinystr!(8, "testing"); + /// let mut v = vec![tinystr1, tinystr2]; + /// v.sort(); + /// v.dedup(); + /// + /// let value = Value::from_vec_unchecked(v); + /// ``` + /// + /// Notice: For performance- and memory-constrained environments, it is recommended + /// for the caller to use [`binary_search`](slice::binary_search) instead of [`sort`](slice::sort) + /// and [`dedup`](Vec::dedup()). + pub fn from_vec_unchecked(input: Vec>) -> Self { + Self(input.into()) + } + pub(crate) fn from_short_slice_unchecked(input: ShortBoxSlice>) -> Self { Self(input) } diff --git a/components/locale_core/src/helpers.rs b/components/locale_core/src/helpers.rs index 7a20d5092be..00a82c1e387 100644 --- a/components/locale_core/src/helpers.rs +++ b/components/locale_core/src/helpers.rs @@ -377,14 +377,14 @@ macro_rules! impl_writeable_for_subtag_list { fn test_writeable() { writeable::assert_writeable_eq!(&$type::default(), ""); writeable::assert_writeable_eq!( - &$type::from_short_slice_unchecked(alloc::vec![$sample1.parse().unwrap()].into()), + &$type::from_vec_unchecked(alloc::vec![$sample1.parse().unwrap()]), $sample1, ); writeable::assert_writeable_eq!( - &$type::from_short_slice_unchecked(vec![ + &$type::from_vec_unchecked(vec![ $sample1.parse().unwrap(), $sample2.parse().unwrap() - ].into()), + ]), core::concat!($sample1, "-", $sample2), ); }