diff --git a/components/locale_core/src/extensions/mod.rs b/components/locale_core/src/extensions/mod.rs index 57221d86efe..ef99e1365db 100644 --- a/components/locale_core/src/extensions/mod.rs +++ b/components/locale_core/src/extensions/mod.rs @@ -96,16 +96,12 @@ impl ExtensionType { } } - pub(crate) const fn try_from_utf8_manual_slice( - code_units: &[u8], - start: usize, - end: usize, - ) -> Result { - if end - start != 1 { + pub(crate) const fn try_from_utf8(code_units: &[u8]) -> Result { + let &[first] = code_units else { return Err(ParseError::InvalidExtension); - } - #[allow(clippy::indexing_slicing)] - Self::try_from_byte(code_units[start]) + }; + + Self::try_from_byte(first) } } diff --git a/components/locale_core/src/extensions/unicode/keywords.rs b/components/locale_core/src/extensions/unicode/keywords.rs index fde29580667..3fec4990b63 100644 --- a/components/locale_core/src/extensions/unicode/keywords.rs +++ b/components/locale_core/src/extensions/unicode/keywords.rs @@ -330,7 +330,7 @@ impl Keywords { } current_keyword = Some(Key::try_from_utf8(subtag)?); } else if current_keyword.is_some() { - match Value::parse_subtag(subtag) { + match Value::parse_subtag_from_utf8(subtag) { Ok(Some(t)) => current_value.push(t), Ok(None) => {} Err(_) => break, diff --git a/components/locale_core/src/extensions/unicode/value.rs b/components/locale_core/src/extensions/unicode/value.rs index 1ac15fb2607..6640fa6a821 100644 --- a/components/locale_core/src/extensions/unicode/value.rs +++ b/components/locale_core/src/extensions/unicode/value.rs @@ -255,16 +255,8 @@ impl Value { Self(input) } - pub(crate) fn parse_subtag(t: &[u8]) -> Result, ParseError> { - Self::parse_subtag_from_utf8_manual_slice(t, 0, t.len()) - } - - pub(crate) const fn parse_subtag_from_utf8_manual_slice( - code_units: &[u8], - start: usize, - end: usize, - ) -> Result, ParseError> { - match Subtag::try_from_utf8_manual_slice(code_units, start, end) { + pub(crate) const fn parse_subtag_from_utf8(t: &[u8]) -> Result, ParseError> { + match Subtag::try_from_utf8(t) { Ok(TRUE_VALUE) => Ok(None), Ok(s) => Ok(Some(s)), Err(_) => Err(ParseError::InvalidSubtag), diff --git a/components/locale_core/src/helpers.rs b/components/locale_core/src/helpers.rs index 689f1936ad7..1e246430060 100644 --- a/components/locale_core/src/helpers.rs +++ b/components/locale_core/src/helpers.rs @@ -42,26 +42,15 @@ macro_rules! impl_tinystr_subtag { } /// See [`Self::try_from_str`] - #[inline] - pub const fn try_from_utf8(code_units: &[u8]) -> Result { - Self::try_from_utf8_manual_slice(code_units, 0, code_units.len()) - } - - /// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8), - /// but callable in a `const` context (which range indexing is not). - pub const fn try_from_utf8_manual_slice( + pub const fn try_from_utf8( code_units: &[u8], - start: usize, - end: usize, ) -> Result { - let slen = end - start; - - #[allow(clippy::double_comparisons)] // if len_start == len_end - if slen < $len_start || slen > $len_end { + #[allow(clippy::double_comparisons)] // if code_units.len() === 0 + if code_units.len() < $len_start || code_units.len() > $len_end { return Err(crate::parser::errors::ParseError::$error); } - match tinystr::TinyAsciiStr::try_from_utf8_manual_slice(code_units, start, end) { + match tinystr::TinyAsciiStr::try_from_utf8(code_units) { Ok($tinystr_ident) if $validate => Ok(Self($normalize)), _ => Err(crate::parser::errors::ParseError::$error), } diff --git a/components/locale_core/src/parser/langid.rs b/components/locale_core/src/parser/langid.rs index 7a4bce53f84..92ce6f9159b 100644 --- a/components/locale_core/src/parser/langid.rs +++ b/components/locale_core/src/parser/langid.rs @@ -127,9 +127,9 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f let mut variant = None; let mut keyword = None; - if let (i, Some((start, end))) = iter.next_manual() { + if let (i, Some(subtag)) = iter.next_const() { iter = i; - match subtags::Language::try_from_utf8_manual_slice(iter.slice, start, end) { + match subtags::Language::try_from_utf8(subtag) { Ok(l) => language = l, Err(e) => return Err(e), } @@ -139,23 +139,19 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f let mut position = ParserPosition::Script; - while let Some((start, end)) = iter.peek_manual() { - if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 { + while let Some(subtag) = iter.peek() { + if !matches!(mode, ParserMode::LanguageIdentifier) && subtag.len() == 1 { break; } if matches!(position, ParserPosition::Script) { - if let Ok(s) = subtags::Script::try_from_utf8_manual_slice(iter.slice, start, end) { + if let Ok(s) = subtags::Script::try_from_utf8(subtag) { script = Some(s); position = ParserPosition::Region; - } else if let Ok(r) = - subtags::Region::try_from_utf8_manual_slice(iter.slice, start, end) - { + } else if let Ok(r) = subtags::Region::try_from_utf8(subtag) { region = Some(r); position = ParserPosition::Variant; - } else if let Ok(v) = - subtags::Variant::try_from_utf8_manual_slice(iter.slice, start, end) - { + } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) { // We cannot handle multiple variants in a const context debug_assert!(variant.is_none()); variant = Some(v); @@ -166,12 +162,10 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f return Err(ParseError::InvalidSubtag); } } else if matches!(position, ParserPosition::Region) { - if let Ok(s) = subtags::Region::try_from_utf8_manual_slice(iter.slice, start, end) { + if let Ok(s) = subtags::Region::try_from_utf8(subtag) { region = Some(s); position = ParserPosition::Variant; - } else if let Ok(v) = - subtags::Variant::try_from_utf8_manual_slice(iter.slice, start, end) - { + } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) { // We cannot handle multiple variants in a const context debug_assert!(variant.is_none()); variant = Some(v); @@ -181,7 +175,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f } else { return Err(ParseError::InvalidSubtag); } - } else if let Ok(v) = subtags::Variant::try_from_utf8_manual_slice(iter.slice, start, end) { + } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) { debug_assert!(matches!(position, ParserPosition::Variant)); if variant.is_some() { // We cannot handle multiple variants in a const context @@ -194,16 +188,16 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f return Err(ParseError::InvalidSubtag); } - iter = iter.next_manual().0; + iter = iter.next_const().0; } if matches!(mode, ParserMode::Locale) { - if let Some((start, end)) = iter.peek_manual() { - match ExtensionType::try_from_utf8_manual_slice(iter.slice, start, end) { + if let Some(subtag) = iter.peek() { + match ExtensionType::try_from_utf8(subtag) { Ok(ExtensionType::Unicode) => { - iter = iter.next_manual().0; - if let Some((start, end)) = iter.peek_manual() { - if Attribute::try_from_utf8_manual_slice(iter.slice, start, end).is_ok() { + iter = iter.next_const().0; + if let Some(peek) = iter.peek() { + if Attribute::try_from_utf8(peek).is_ok() { // We cannot handle Attributes in a const context return Err(ParseError::InvalidSubtag); } @@ -212,20 +206,18 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f let mut key = None; let mut current_type = None; - while let Some((start, end)) = iter.peek_manual() { - let slen = end - start; - if slen == 2 { + while let Some(peek) = iter.peek() { + if peek.len() == 2 { if key.is_some() { // We cannot handle more than one Key in a const context return Err(ParseError::InvalidSubtag); } - match Key::try_from_utf8_manual_slice(iter.slice, start, end) { + match Key::try_from_utf8(peek) { Ok(k) => key = Some(k), Err(e) => return Err(e), }; } else if key.is_some() { - match Value::parse_subtag_from_utf8_manual_slice(iter.slice, start, end) - { + match Value::parse_subtag_from_utf8(peek) { Ok(Some(t)) => { if current_type.is_some() { // We cannot handle more than one type in a const context @@ -239,7 +231,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f } else { break; } - iter = iter.next_manual().0 + iter = iter.next_const().0 } if let Some(k) = key { keyword = Some((k, current_type)); diff --git a/components/locale_core/src/parser/mod.rs b/components/locale_core/src/parser/mod.rs index ba7ee1c0c80..367ac057917 100644 --- a/components/locale_core/src/parser/mod.rs +++ b/components/locale_core/src/parser/mod.rs @@ -32,7 +32,7 @@ const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) { // If it's a separator, set the start to idx+1 and advance the idx to the next char. (idx + 1, idx + 1) } else { - // If it's idx=0, start is 0 and end is set to 1 + // If it's idx=0, end is set to 1 debug_assert!(idx == 0); (0, 1) }; @@ -45,6 +45,14 @@ const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) { (start, end) } +pub const fn split_out_range(slice: &[u8], start: usize, end: usize) -> &[u8] { + assert!(start <= slice.len()); + assert!(end <= slice.len()); + assert!(start <= end); + // SAFETY: assertions and align = size = 1. + unsafe { core::slice::from_raw_parts(slice.as_ptr().add(start), end - start) } +} + // `SubtagIterator` is a helper iterator for [`LanguageIdentifier`] and [`Locale`] parsing. // // It is quite extraordinary due to focus on performance and Rust limitations for `const` @@ -81,7 +89,7 @@ impl<'a> SubtagIterator<'a> { } } - pub const fn next_manual(mut self) -> (Self, Option<(usize, usize)>) { + pub const fn next_const(mut self) -> (Self, Option<&'a [u8]>) { if self.done { return (self, None); } @@ -91,19 +99,14 @@ impl<'a> SubtagIterator<'a> { } else { self.done = true; } - (self, Some(result)) + (self, Some(split_out_range(self.slice, result.0, result.1))) } - pub const fn peek_manual(&self) -> Option<(usize, usize)> { + pub const fn peek(&self) -> Option<&'a [u8]> { if self.done { return None; } - Some(self.subtag) - } - - pub fn peek(&self) -> Option<&'a [u8]> { - #[allow(clippy::indexing_slicing)] // peek_manual returns valid indices - self.peek_manual().map(|(s, e)| &self.slice[s..e]) + Some(split_out_range(self.slice, self.subtag.0, self.subtag.1)) } } @@ -111,10 +114,9 @@ impl<'a> Iterator for SubtagIterator<'a> { type Item = &'a [u8]; fn next(&mut self) -> Option { - let (s, res) = self.next_manual(); + let (s, res) = self.next_const(); *self = s; - #[allow(clippy::indexing_slicing)] // next_manual returns valid indices - res.map(|(s, e)| &self.slice[s..e]) + res } } diff --git a/utils/tinystr/src/ascii.rs b/utils/tinystr/src/ascii.rs index a54f5d81d98..0daf216ca5a 100644 --- a/utils/tinystr/src/ascii.rs +++ b/utils/tinystr/src/ascii.rs @@ -25,7 +25,7 @@ impl TinyAsciiStr { /// `code_units` may contain at most `N` non-null ASCII code points. #[inline] pub const fn try_from_utf8(code_units: &[u8]) -> Result { - Self::try_from_utf8_inner(code_units, 0, code_units.len(), false) + Self::try_from_utf8_inner(code_units, false) } /// Creates a `TinyAsciiStr` from the given UTF-16 slice. @@ -126,40 +126,18 @@ impl TinyAsciiStr { /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_))); /// ``` pub const fn try_from_raw(raw: [u8; N]) -> Result { - Self::try_from_utf8_inner(&raw, 0, N, true) - } - - /// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8), - /// but callable in a `const` context (which range indexing is not). - #[inline] - pub const fn try_from_utf8_manual_slice( - code_units: &[u8], - start: usize, - end: usize, - ) -> Result { - Self::try_from_utf8_inner(code_units, start, end, false) - } - - /// Equivalent to [`try_from_utf16(bytes[start..end])`](Self::try_from_utf16), - /// but callable in a `const` context (which range indexing is not). - #[inline] - pub const fn try_from_utf16_manual_slice( - code_units: &[u16], - start: usize, - end: usize, - ) -> Result { - Self::try_from_utf16_inner(code_units, start, end, false) + Self::try_from_utf8_inner(&raw, true) } pub(crate) const fn try_from_utf8_inner( code_units: &[u8], - start: usize, - end: usize, allow_trailing_null: bool, ) -> Result { - let len = end - start; - if len > N { - return Err(TinyStrError::TooLarge { max: N, len }); + if code_units.len() > N { + return Err(TinyStrError::TooLarge { + max: N, + len: code_units.len(), + }); } let mut out = [0; N]; @@ -167,8 +145,8 @@ impl TinyAsciiStr { let mut found_null = false; // Indexing is protected by TinyStrError::TooLarge #[allow(clippy::indexing_slicing)] - while i < len { - let b = code_units[start + i]; + while i < code_units.len() { + let b = code_units[i]; if b == 0 { found_null = true; diff --git a/utils/tinystr/src/ule.rs b/utils/tinystr/src/ule.rs index 66bbbaef0d0..2ceaccbcff8 100644 --- a/utils/tinystr/src/ule.rs +++ b/utils/tinystr/src/ule.rs @@ -24,7 +24,7 @@ unsafe impl ULE for TinyAsciiStr { } // Validate the bytes for chunk in bytes.chunks_exact(N) { - let _ = TinyAsciiStr::::try_from_utf8_inner(chunk, 0, N, true) + let _ = TinyAsciiStr::::try_from_utf8_inner(chunk, true) .map_err(|_| ZeroVecError::parse::())?; } Ok(())