From 882aafcb76d3611791f7ce0f6b20fd42f951df66 Mon Sep 17 00:00:00 2001 From: Robert Bastian Date: Fri, 26 Jul 2024 10:22:54 +0200 Subject: [PATCH] -manual --- components/locale_core/src/extensions/mod.rs | 14 +++---- .../src/extensions/unicode/keywords.rs | 2 +- .../src/extensions/unicode/value.rs | 12 +----- components/locale_core/src/helpers.rs | 19 ++------- components/locale_core/src/parser/langid.rs | 29 +++++--------- components/locale_core/src/parser/mod.rs | 2 +- provider/core/src/marker.rs | 20 ++++------ utils/tinystr/src/ascii.rs | 40 +++++-------------- utils/tinystr/src/ule.rs | 2 +- utils/zerotrie/src/builder/konst/store.rs | 18 ++++----- 10 files changed, 49 insertions(+), 109 deletions(-) diff --git a/components/locale_core/src/extensions/mod.rs b/components/locale_core/src/extensions/mod.rs index 57221d86efe..ef99e1365db 100644 --- a/components/locale_core/src/extensions/mod.rs +++ b/components/locale_core/src/extensions/mod.rs @@ -96,16 +96,12 @@ impl ExtensionType { } } - pub(crate) const fn try_from_utf8_manual_slice( - code_units: &[u8], - start: usize, - end: usize, - ) -> Result { - if end - start != 1 { + pub(crate) const fn try_from_utf8(code_units: &[u8]) -> Result { + let &[first] = code_units else { return Err(ParseError::InvalidExtension); - } - #[allow(clippy::indexing_slicing)] - Self::try_from_byte(code_units[start]) + }; + + Self::try_from_byte(first) } } diff --git a/components/locale_core/src/extensions/unicode/keywords.rs b/components/locale_core/src/extensions/unicode/keywords.rs index fde29580667..3fec4990b63 100644 --- a/components/locale_core/src/extensions/unicode/keywords.rs +++ b/components/locale_core/src/extensions/unicode/keywords.rs @@ -330,7 +330,7 @@ impl Keywords { } current_keyword = Some(Key::try_from_utf8(subtag)?); } else if current_keyword.is_some() { - match Value::parse_subtag(subtag) { + match Value::parse_subtag_from_utf8(subtag) { Ok(Some(t)) => current_value.push(t), Ok(None) => {} Err(_) => break, diff --git a/components/locale_core/src/extensions/unicode/value.rs b/components/locale_core/src/extensions/unicode/value.rs index 1ac15fb2607..6640fa6a821 100644 --- a/components/locale_core/src/extensions/unicode/value.rs +++ b/components/locale_core/src/extensions/unicode/value.rs @@ -255,16 +255,8 @@ impl Value { Self(input) } - pub(crate) fn parse_subtag(t: &[u8]) -> Result, ParseError> { - Self::parse_subtag_from_utf8_manual_slice(t, 0, t.len()) - } - - pub(crate) const fn parse_subtag_from_utf8_manual_slice( - code_units: &[u8], - start: usize, - end: usize, - ) -> Result, ParseError> { - match Subtag::try_from_utf8_manual_slice(code_units, start, end) { + pub(crate) const fn parse_subtag_from_utf8(t: &[u8]) -> Result, ParseError> { + match Subtag::try_from_utf8(t) { Ok(TRUE_VALUE) => Ok(None), Ok(s) => Ok(Some(s)), Err(_) => Err(ParseError::InvalidSubtag), diff --git a/components/locale_core/src/helpers.rs b/components/locale_core/src/helpers.rs index 689f1936ad7..1e246430060 100644 --- a/components/locale_core/src/helpers.rs +++ b/components/locale_core/src/helpers.rs @@ -42,26 +42,15 @@ macro_rules! impl_tinystr_subtag { } /// See [`Self::try_from_str`] - #[inline] - pub const fn try_from_utf8(code_units: &[u8]) -> Result { - Self::try_from_utf8_manual_slice(code_units, 0, code_units.len()) - } - - /// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8), - /// but callable in a `const` context (which range indexing is not). - pub const fn try_from_utf8_manual_slice( + pub const fn try_from_utf8( code_units: &[u8], - start: usize, - end: usize, ) -> Result { - let slen = end - start; - - #[allow(clippy::double_comparisons)] // if len_start == len_end - if slen < $len_start || slen > $len_end { + #[allow(clippy::double_comparisons)] // if code_units.len() === 0 + if code_units.len() < $len_start || code_units.len() > $len_end { return Err(crate::parser::errors::ParseError::$error); } - match tinystr::TinyAsciiStr::try_from_utf8_manual_slice(code_units, start, end) { + match tinystr::TinyAsciiStr::try_from_utf8(code_units) { Ok($tinystr_ident) if $validate => Ok(Self($normalize)), _ => Err(crate::parser::errors::ParseError::$error), } diff --git a/components/locale_core/src/parser/langid.rs b/components/locale_core/src/parser/langid.rs index 63524b358f1..92ce6f9159b 100644 --- a/components/locale_core/src/parser/langid.rs +++ b/components/locale_core/src/parser/langid.rs @@ -129,7 +129,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f if let (i, Some(subtag)) = iter.next_const() { iter = i; - match subtags::Language::try_from_utf8_manual_slice(iter.slice, 0, subtag.len()) { + match subtags::Language::try_from_utf8(subtag) { Ok(l) => language = l, Err(e) => return Err(e), } @@ -145,17 +145,13 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f } if matches!(position, ParserPosition::Script) { - if let Ok(s) = subtags::Script::try_from_utf8_manual_slice(subtag, 0, subtag.len()) { + if let Ok(s) = subtags::Script::try_from_utf8(subtag) { script = Some(s); position = ParserPosition::Region; - } else if let Ok(r) = - subtags::Region::try_from_utf8_manual_slice(subtag, 0, subtag.len()) - { + } else if let Ok(r) = subtags::Region::try_from_utf8(subtag) { region = Some(r); position = ParserPosition::Variant; - } else if let Ok(v) = - subtags::Variant::try_from_utf8_manual_slice(subtag, 0, subtag.len()) - { + } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) { // We cannot handle multiple variants in a const context debug_assert!(variant.is_none()); variant = Some(v); @@ -166,12 +162,10 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f return Err(ParseError::InvalidSubtag); } } else if matches!(position, ParserPosition::Region) { - if let Ok(s) = subtags::Region::try_from_utf8_manual_slice(subtag, 0, subtag.len()) { + if let Ok(s) = subtags::Region::try_from_utf8(subtag) { region = Some(s); position = ParserPosition::Variant; - } else if let Ok(v) = - subtags::Variant::try_from_utf8_manual_slice(subtag, 0, subtag.len()) - { + } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) { // We cannot handle multiple variants in a const context debug_assert!(variant.is_none()); variant = Some(v); @@ -181,8 +175,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f } else { return Err(ParseError::InvalidSubtag); } - } else if let Ok(v) = subtags::Variant::try_from_utf8_manual_slice(subtag, 0, subtag.len()) - { + } else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) { debug_assert!(matches!(position, ParserPosition::Variant)); if variant.is_some() { // We cannot handle multiple variants in a const context @@ -200,11 +193,11 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f if matches!(mode, ParserMode::Locale) { if let Some(subtag) = iter.peek() { - match ExtensionType::try_from_utf8_manual_slice(subtag, 0, subtag.len()) { + match ExtensionType::try_from_utf8(subtag) { Ok(ExtensionType::Unicode) => { iter = iter.next_const().0; if let Some(peek) = iter.peek() { - if Attribute::try_from_utf8_manual_slice(peek, 0, peek.len()).is_ok() { + if Attribute::try_from_utf8(peek).is_ok() { // We cannot handle Attributes in a const context return Err(ParseError::InvalidSubtag); } @@ -219,12 +212,12 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f // We cannot handle more than one Key in a const context return Err(ParseError::InvalidSubtag); } - match Key::try_from_utf8_manual_slice(peek, 0, peek.len()) { + match Key::try_from_utf8(peek) { Ok(k) => key = Some(k), Err(e) => return Err(e), }; } else if key.is_some() { - match Value::parse_subtag_from_utf8_manual_slice(peek, 0, peek.len()) { + match Value::parse_subtag_from_utf8(peek) { Ok(Some(t)) => { if current_type.is_some() { // We cannot handle more than one type in a const context diff --git a/components/locale_core/src/parser/mod.rs b/components/locale_core/src/parser/mod.rs index a5a8ae24328..a0463e6d397 100644 --- a/components/locale_core/src/parser/mod.rs +++ b/components/locale_core/src/parser/mod.rs @@ -32,7 +32,7 @@ const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) { // If it's a separator, set the start to idx+1 and advance the idx to the next char. (idx + 1, idx + 1) } else { - // If it's idx=0, start is 0 and end is set to 1 + // If it's idx=0, end is set to 1 debug_assert!(idx == 0); (0, 1) }; diff --git a/provider/core/src/marker.rs b/provider/core/src/marker.rs index 5d05800ec0d..7938cfcfb24 100644 --- a/provider/core/src/marker.rs +++ b/provider/core/src/marker.rs @@ -423,9 +423,9 @@ impl DataMarkerPath { i += 1; } - match Self::validate_path_manual_slice(tagged, start, end) { + match Self::validate_path(tagged.as_bytes().split_at(end).0.split_at(start).1) { Ok(()) => (), - Err(e) => return Err(e), + Err((expected, index)) => return Err((expected, start + index)), }; let hash = DataMarkerPathHash( @@ -440,13 +440,7 @@ impl DataMarkerPath { Ok(Self { tagged, hash }) } - const fn validate_path_manual_slice( - path: &'static str, - start: usize, - end: usize, - ) -> Result<(), (&'static str, usize)> { - debug_assert!(start <= end); - debug_assert!(end <= path.len()); + const fn validate_path(path: &'static [u8]) -> Result<(), (&'static str, usize)> { // Regex: [a-zA-Z0-9_][a-zA-Z0-9_/]*@[0-9]+ enum State { Empty, @@ -455,12 +449,12 @@ impl DataMarkerPath { Version, } use State::*; - let mut i = start; + let mut i = 0; let mut state = Empty; loop { - let byte = if i < end { - #[allow(clippy::indexing_slicing)] // protected by debug assertion - Some(path.as_bytes()[i]) + let byte = if i < path.len() { + #[allow(clippy::indexing_slicing)] // iterator not const + Some(path[i]) } else { None }; diff --git a/utils/tinystr/src/ascii.rs b/utils/tinystr/src/ascii.rs index a54f5d81d98..0daf216ca5a 100644 --- a/utils/tinystr/src/ascii.rs +++ b/utils/tinystr/src/ascii.rs @@ -25,7 +25,7 @@ impl TinyAsciiStr { /// `code_units` may contain at most `N` non-null ASCII code points. #[inline] pub const fn try_from_utf8(code_units: &[u8]) -> Result { - Self::try_from_utf8_inner(code_units, 0, code_units.len(), false) + Self::try_from_utf8_inner(code_units, false) } /// Creates a `TinyAsciiStr` from the given UTF-16 slice. @@ -126,40 +126,18 @@ impl TinyAsciiStr { /// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_))); /// ``` pub const fn try_from_raw(raw: [u8; N]) -> Result { - Self::try_from_utf8_inner(&raw, 0, N, true) - } - - /// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8), - /// but callable in a `const` context (which range indexing is not). - #[inline] - pub const fn try_from_utf8_manual_slice( - code_units: &[u8], - start: usize, - end: usize, - ) -> Result { - Self::try_from_utf8_inner(code_units, start, end, false) - } - - /// Equivalent to [`try_from_utf16(bytes[start..end])`](Self::try_from_utf16), - /// but callable in a `const` context (which range indexing is not). - #[inline] - pub const fn try_from_utf16_manual_slice( - code_units: &[u16], - start: usize, - end: usize, - ) -> Result { - Self::try_from_utf16_inner(code_units, start, end, false) + Self::try_from_utf8_inner(&raw, true) } pub(crate) const fn try_from_utf8_inner( code_units: &[u8], - start: usize, - end: usize, allow_trailing_null: bool, ) -> Result { - let len = end - start; - if len > N { - return Err(TinyStrError::TooLarge { max: N, len }); + if code_units.len() > N { + return Err(TinyStrError::TooLarge { + max: N, + len: code_units.len(), + }); } let mut out = [0; N]; @@ -167,8 +145,8 @@ impl TinyAsciiStr { let mut found_null = false; // Indexing is protected by TinyStrError::TooLarge #[allow(clippy::indexing_slicing)] - while i < len { - let b = code_units[start + i]; + while i < code_units.len() { + let b = code_units[i]; if b == 0 { found_null = true; diff --git a/utils/tinystr/src/ule.rs b/utils/tinystr/src/ule.rs index 66bbbaef0d0..2ceaccbcff8 100644 --- a/utils/tinystr/src/ule.rs +++ b/utils/tinystr/src/ule.rs @@ -24,7 +24,7 @@ unsafe impl ULE for TinyAsciiStr { } // Validate the bytes for chunk in bytes.chunks_exact(N) { - let _ = TinyAsciiStr::::try_from_utf8_inner(chunk, 0, N, true) + let _ = TinyAsciiStr::::try_from_utf8_inner(chunk, true) .map_err(|_| ZeroVecError::parse::())?; } Ok(()) diff --git a/utils/zerotrie/src/builder/konst/store.rs b/utils/zerotrie/src/builder/konst/store.rs index 73c72290b38..c51c8acfc94 100644 --- a/utils/zerotrie/src/builder/konst/store.rs +++ b/utils/zerotrie/src/builder/konst/store.rs @@ -29,15 +29,6 @@ impl<'a, T> ConstSlice<'a, T> { } } - /// Creates a [`ConstSlice`] with the given start and limit. - pub const fn from_manual_slice(full_slice: &'a [T], start: usize, limit: usize) -> Self { - ConstSlice { - full_slice, - start, - limit, - } - } - /// Returns the length of the [`ConstSlice`]. pub const fn len(&self) -> usize { self.limit - self.start @@ -147,7 +138,14 @@ impl ConstArrayBuilder { /// Returns the initialized elements as a [`ConstSlice`]. pub const fn as_const_slice(&self) -> ConstSlice { - ConstSlice::from_manual_slice(&self.full_array, self.start, self.limit) + ConstSlice::from_slice( + &self + .full_array + .split_at(self.limit) + .0 + .split_at(self.start) + .1, + ) } /// Non-const function that returns a slice of the initialized elements.