Skip to content

Commit

Permalink
Locid extensions tofromstr (unicode-org#4934)
Browse files Browse the repository at this point in the history
Second part of unicode-org#1833.

This one cleans up from/to str for locid extensions. This prepares
ground for reuse of Subtag in Value API which will be introduced in the
next PR.
  • Loading branch information
zbraniecki authored Jun 1, 2024
1 parent 0ab2630 commit 77da96b
Show file tree
Hide file tree
Showing 9 changed files with 333 additions and 84 deletions.
38 changes: 23 additions & 15 deletions components/locale_core/src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ pub mod unicode;
use core::cmp::Ordering;

use other::Other;
use private::Private;
use transform::Transform;
use unicode::Unicode;
use private::{Private, PRIVATE_EXT_CHAR};
use transform::{Transform, TRANSFORM_EXT_CHAR};
use unicode::{Unicode, UNICODE_EXT_CHAR};

use alloc::vec::Vec;

Expand All @@ -77,13 +77,21 @@ pub enum ExtensionType {
}

impl ExtensionType {
pub(crate) const fn try_from_byte_slice(key: &[u8]) -> Result<Self, ParserError> {
if let [b] = key {
Self::try_from_byte(*b)
} else {
Err(ParserError::InvalidExtension)
}
}

pub(crate) const fn try_from_byte(key: u8) -> Result<Self, ParserError> {
let key = key.to_ascii_lowercase();
match key {
b'u' => Ok(Self::Unicode),
b't' => Ok(Self::Transform),
b'x' => Ok(Self::Private),
b'a'..=b'z' => Ok(Self::Other(key)),
match key as char {
UNICODE_EXT_CHAR => Ok(Self::Unicode),
TRANSFORM_EXT_CHAR => Ok(Self::Transform),
PRIVATE_EXT_CHAR => Ok(Self::Private),
'a'..='z' => Ok(Self::Other(key)),
_ => Err(ParserError::InvalidExtension),
}
}
Expand Down Expand Up @@ -301,27 +309,27 @@ impl Extensions {
let mut wrote_tu = false;
// Alphabetic by singleton
self.other.iter().try_for_each(|other| {
if other.get_ext() > 't' && !wrote_tu {
if other.get_ext() > TRANSFORM_EXT_CHAR && !wrote_tu {
// Since 't' and 'u' are next to each other in alphabetical
// order, write both now.
self.transform.for_each_subtag_str(f)?;
self.unicode.for_each_subtag_str(f)?;
self.transform.for_each_subtag_str(f, true)?;
self.unicode.for_each_subtag_str(f, true)?;
wrote_tu = true;
}
other.for_each_subtag_str(f)?;
other.for_each_subtag_str(f, true)?;
Ok(())
})?;

if !wrote_tu {
self.transform.for_each_subtag_str(f)?;
self.unicode.for_each_subtag_str(f)?;
self.transform.for_each_subtag_str(f, true)?;
self.unicode.for_each_subtag_str(f, true)?;
}

// Private must be written last, since it allows single character
// keys. Extensions must also be written in alphabetical order,
// which would seem to imply that other extensions `y` and `z` are
// invalid, but this is not specified.
self.private.for_each_subtag_str(f)?;
self.private.for_each_subtag_str(f, true)?;
Ok(())
}
}
Expand Down
48 changes: 45 additions & 3 deletions components/locale_core/src/extensions/other/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
//! let mut loc: Locale = "en-US-a-foo-faa".parse().expect("Parsing failed.");
//! ```

use core::str::FromStr;

use super::ExtensionType;
use crate::parser::ParserError;
use crate::parser::SubtagIterator;
use crate::shortvec::ShortBoxSlice;
Expand Down Expand Up @@ -80,8 +83,22 @@ impl Other {
Self { ext, keys }
}

pub(crate) fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);

let ext = iter.next().ok_or(ParserError::InvalidExtension)?;
if let ExtensionType::Other(b) = ExtensionType::try_from_byte_slice(ext)? {
return Self::try_from_iter(b, &mut iter);
}

Err(ParserError::InvalidExtension)
}

pub(crate) fn try_from_iter(ext: u8, iter: &mut SubtagIterator) -> Result<Self, ParserError> {
debug_assert!(ext.is_ascii_alphabetic());
debug_assert!(matches!(
ExtensionType::try_from_byte(ext),
Ok(ExtensionType::Other(_)),
));

let mut keys = ShortBoxSlice::new();
while let Some(subtag) = iter.peek() {
Expand Down Expand Up @@ -143,15 +160,29 @@ impl Other {
self.ext
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
{
f(self.get_ext_str())?;
if self.keys.is_empty() {
return Ok(());
}

if with_ext {
f(self.get_ext_str())?;
}
self.keys.iter().map(|t| t.as_str()).try_for_each(f)
}
}

impl FromStr for Other {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

writeable::impl_display_with_writeable!(Other);

impl writeable::Writeable for Other {
Expand Down Expand Up @@ -183,3 +214,14 @@ impl writeable::Writeable for Other {
alloc::borrow::Cow::Owned(string)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_other_extension_fromstr() {
let pe: Other = "o-foo-bar".parse().expect("Failed to parse Other");
assert_eq!(pe.to_string(), "o-foo-bar");
}
}
43 changes: 40 additions & 3 deletions components/locale_core/src/extensions/private/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,19 @@ mod other;

use alloc::vec::Vec;
use core::ops::Deref;
use core::str::FromStr;

#[doc(inline)]
pub use other::{subtag, Subtag};

use super::ExtensionType;
use crate::parser::ParserError;
use crate::parser::SubtagIterator;
use crate::shortvec::ShortBoxSlice;

pub(crate) const PRIVATE_EXT_CHAR: char = 'x';
pub(crate) const PRIVATE_EXT_STR: &str = "x";

/// A list of [`Private Use Extensions`] as defined in [`Unicode Locale
/// Identifier`] specification.
///
Expand Down Expand Up @@ -110,6 +115,17 @@ impl Private {
Self(ShortBoxSlice::new_single(input))
}

pub(crate) fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);

let ext = iter.next().ok_or(ParserError::InvalidExtension)?;
if let ExtensionType::Private = ExtensionType::try_from_byte_slice(ext)? {
return Self::try_from_iter(&mut iter);
}

Err(ParserError::InvalidExtension)
}

/// Empties the [`Private`] list.
///
/// # Examples
Expand Down Expand Up @@ -139,26 +155,36 @@ impl Private {
Ok(Self(keys))
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
{
if self.is_empty() {
return Ok(());
}
f("x")?;
if with_ext {
f(PRIVATE_EXT_STR)?;
}
self.deref().iter().map(|t| t.as_str()).try_for_each(f)
}
}

impl FromStr for Private {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

writeable::impl_display_with_writeable!(Private);

impl writeable::Writeable for Private {
fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
if self.is_empty() {
return Ok(());
}
sink.write_str("x")?;
sink.write_char(PRIVATE_EXT_CHAR)?;
for key in self.iter() {
sink.write_char('-')?;
writeable::Writeable::write_to(key, sink)?;
Expand All @@ -185,3 +211,14 @@ impl Deref for Private {
self.0.deref()
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_private_extension_fromstr() {
let pe: Private = "x-foo-bar-l-baz".parse().expect("Failed to parse Private");
assert_eq!(pe.to_string(), "x-foo-bar-l-baz");
}
}
45 changes: 42 additions & 3 deletions components/locale_core/src/extensions/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,24 @@ mod key;
mod value;

use core::cmp::Ordering;
use core::str::FromStr;

pub use fields::Fields;
#[doc(inline)]
pub use key::{key, Key};
pub use value::Value;

use super::ExtensionType;
use crate::parser::SubtagIterator;
use crate::parser::{parse_language_identifier_from_iter, ParserError, ParserMode};
use crate::shortvec::ShortBoxSlice;
use crate::subtags::{self, Language};
use crate::LanguageIdentifier;
use litemap::LiteMap;

pub(crate) const TRANSFORM_EXT_CHAR: char = 't';
pub(crate) const TRANSFORM_EXT_STR: &str = "t";

/// A list of [`Unicode BCP47 T Extensions`] as defined in [`Unicode Locale
/// Identifier`] specification.
///
Expand Down Expand Up @@ -117,6 +122,17 @@ impl Transform {
self.lang.is_none() && self.fields.is_empty()
}

pub(crate) fn try_from_bytes(t: &[u8]) -> Result<Self, ParserError> {
let mut iter = SubtagIterator::new(t);

let ext = iter.next().ok_or(ParserError::InvalidExtension)?;
if let ExtensionType::Transform = ExtensionType::try_from_byte_slice(ext)? {
return Self::try_from_iter(&mut iter);
}

Err(ParserError::InvalidExtension)
}

/// Clears the transform extension, effectively removing it from the locale.
///
/// # Examples
Expand Down Expand Up @@ -214,29 +230,39 @@ impl Transform {
})
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F, with_ext: bool) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
{
if self.is_empty() {
return Ok(());
}
f("t")?;
if with_ext {
f(TRANSFORM_EXT_STR)?;
}
if let Some(lang) = &self.lang {
lang.for_each_subtag_str_lowercased(f)?;
}
self.fields.for_each_subtag_str(f)
}
}

impl FromStr for Transform {
type Err = ParserError;

fn from_str(source: &str) -> Result<Self, Self::Err> {
Self::try_from_bytes(source.as_bytes())
}
}

writeable::impl_display_with_writeable!(Transform);

impl writeable::Writeable for Transform {
fn write_to<W: core::fmt::Write + ?Sized>(&self, sink: &mut W) -> core::fmt::Result {
if self.is_empty() {
return Ok(());
}
sink.write_str("t")?;
sink.write_char(TRANSFORM_EXT_CHAR)?;
if let Some(lang) = &self.lang {
sink.write_char('-')?;
lang.write_lowercased_to(sink)?;
Expand All @@ -262,3 +288,16 @@ impl writeable::Writeable for Transform {
result
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_transform_extension_fromstr() {
let te: Transform = "t-en-us-h0-hybrid"
.parse()
.expect("Failed to parse Transform");
assert_eq!(te.to_string(), "t-en-us-h0-hybrid");
}
}
Loading

0 comments on commit 77da96b

Please sign in to comment.