Skip to content

Commit

Permalink
Remove _manual_slice constructors (#5312)
Browse files Browse the repository at this point in the history
We added these because range slicing `&foo[x..y]` is not available in
const. However, it can be worked around with
`slice::split_at`/`slice::split_at_checked`/`slice::split_at_unchecked`.
Removing this simplifies our API surface as well as the implementation.
  • Loading branch information
robertbastian authored Jul 30, 2024
1 parent 6d39119 commit aff4632
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 109 deletions.
14 changes: 5 additions & 9 deletions components/locale_core/src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,12 @@ impl ExtensionType {
}
}

pub(crate) const fn try_from_utf8_manual_slice(
code_units: &[u8],
start: usize,
end: usize,
) -> Result<Self, ParseError> {
if end - start != 1 {
pub(crate) const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
let &[first] = code_units else {
return Err(ParseError::InvalidExtension);
}
#[allow(clippy::indexing_slicing)]
Self::try_from_byte(code_units[start])
};

Self::try_from_byte(first)
}
}

Expand Down
2 changes: 1 addition & 1 deletion components/locale_core/src/extensions/unicode/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ impl Keywords {
}
current_keyword = Some(Key::try_from_utf8(subtag)?);
} else if current_keyword.is_some() {
match Value::parse_subtag(subtag) {
match Value::parse_subtag_from_utf8(subtag) {
Ok(Some(t)) => current_value.push(t),
Ok(None) => {}
Err(_) => break,
Expand Down
12 changes: 2 additions & 10 deletions components/locale_core/src/extensions/unicode/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,16 +255,8 @@ impl Value {
Self(input)
}

pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<Subtag>, ParseError> {
Self::parse_subtag_from_utf8_manual_slice(t, 0, t.len())
}

pub(crate) const fn parse_subtag_from_utf8_manual_slice(
code_units: &[u8],
start: usize,
end: usize,
) -> Result<Option<Subtag>, ParseError> {
match Subtag::try_from_utf8_manual_slice(code_units, start, end) {
pub(crate) const fn parse_subtag_from_utf8(t: &[u8]) -> Result<Option<Subtag>, ParseError> {
match Subtag::try_from_utf8(t) {
Ok(TRUE_VALUE) => Ok(None),
Ok(s) => Ok(Some(s)),
Err(_) => Err(ParseError::InvalidSubtag),
Expand Down
19 changes: 4 additions & 15 deletions components/locale_core/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,15 @@ macro_rules! impl_tinystr_subtag {
}

/// See [`Self::try_from_str`]
#[inline]
pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, crate::parser::errors::ParseError> {
Self::try_from_utf8_manual_slice(code_units, 0, code_units.len())
}

/// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8),
/// but callable in a `const` context (which range indexing is not).
pub const fn try_from_utf8_manual_slice(
pub const fn try_from_utf8(
code_units: &[u8],
start: usize,
end: usize,
) -> Result<Self, crate::parser::errors::ParseError> {
let slen = end - start;

#[allow(clippy::double_comparisons)] // if len_start == len_end
if slen < $len_start || slen > $len_end {
#[allow(clippy::double_comparisons)] // if code_units.len() === 0
if code_units.len() < $len_start || code_units.len() > $len_end {
return Err(crate::parser::errors::ParseError::$error);
}

match tinystr::TinyAsciiStr::try_from_utf8_manual_slice(code_units, start, end) {
match tinystr::TinyAsciiStr::try_from_utf8(code_units) {
Ok($tinystr_ident) if $validate => Ok(Self($normalize)),
_ => Err(crate::parser::errors::ParseError::$error),
}
Expand Down
50 changes: 21 additions & 29 deletions components/locale_core/src/parser/langid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,9 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
let mut variant = None;
let mut keyword = None;

if let (i, Some((start, end))) = iter.next_manual() {
if let (i, Some(subtag)) = iter.next_const() {
iter = i;
match subtags::Language::try_from_utf8_manual_slice(iter.slice, start, end) {
match subtags::Language::try_from_utf8(subtag) {
Ok(l) => language = l,
Err(e) => return Err(e),
}
Expand All @@ -139,23 +139,19 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f

let mut position = ParserPosition::Script;

while let Some((start, end)) = iter.peek_manual() {
if !matches!(mode, ParserMode::LanguageIdentifier) && end - start == 1 {
while let Some(subtag) = iter.peek() {
if !matches!(mode, ParserMode::LanguageIdentifier) && subtag.len() == 1 {
break;
}

if matches!(position, ParserPosition::Script) {
if let Ok(s) = subtags::Script::try_from_utf8_manual_slice(iter.slice, start, end) {
if let Ok(s) = subtags::Script::try_from_utf8(subtag) {
script = Some(s);
position = ParserPosition::Region;
} else if let Ok(r) =
subtags::Region::try_from_utf8_manual_slice(iter.slice, start, end)
{
} else if let Ok(r) = subtags::Region::try_from_utf8(subtag) {
region = Some(r);
position = ParserPosition::Variant;
} else if let Ok(v) =
subtags::Variant::try_from_utf8_manual_slice(iter.slice, start, end)
{
} else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
// We cannot handle multiple variants in a const context
debug_assert!(variant.is_none());
variant = Some(v);
Expand All @@ -166,12 +162,10 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
return Err(ParseError::InvalidSubtag);
}
} else if matches!(position, ParserPosition::Region) {
if let Ok(s) = subtags::Region::try_from_utf8_manual_slice(iter.slice, start, end) {
if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
region = Some(s);
position = ParserPosition::Variant;
} else if let Ok(v) =
subtags::Variant::try_from_utf8_manual_slice(iter.slice, start, end)
{
} else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
// We cannot handle multiple variants in a const context
debug_assert!(variant.is_none());
variant = Some(v);
Expand All @@ -181,7 +175,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
} else {
return Err(ParseError::InvalidSubtag);
}
} else if let Ok(v) = subtags::Variant::try_from_utf8_manual_slice(iter.slice, start, end) {
} else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
debug_assert!(matches!(position, ParserPosition::Variant));
if variant.is_some() {
// We cannot handle multiple variants in a const context
Expand All @@ -194,16 +188,16 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
return Err(ParseError::InvalidSubtag);
}

iter = iter.next_manual().0;
iter = iter.next_const().0;
}

if matches!(mode, ParserMode::Locale) {
if let Some((start, end)) = iter.peek_manual() {
match ExtensionType::try_from_utf8_manual_slice(iter.slice, start, end) {
if let Some(subtag) = iter.peek() {
match ExtensionType::try_from_utf8(subtag) {
Ok(ExtensionType::Unicode) => {
iter = iter.next_manual().0;
if let Some((start, end)) = iter.peek_manual() {
if Attribute::try_from_utf8_manual_slice(iter.slice, start, end).is_ok() {
iter = iter.next_const().0;
if let Some(peek) = iter.peek() {
if Attribute::try_from_utf8(peek).is_ok() {
// We cannot handle Attributes in a const context
return Err(ParseError::InvalidSubtag);
}
Expand All @@ -212,20 +206,18 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
let mut key = None;
let mut current_type = None;

while let Some((start, end)) = iter.peek_manual() {
let slen = end - start;
if slen == 2 {
while let Some(peek) = iter.peek() {
if peek.len() == 2 {
if key.is_some() {
// We cannot handle more than one Key in a const context
return Err(ParseError::InvalidSubtag);
}
match Key::try_from_utf8_manual_slice(iter.slice, start, end) {
match Key::try_from_utf8(peek) {
Ok(k) => key = Some(k),
Err(e) => return Err(e),
};
} else if key.is_some() {
match Value::parse_subtag_from_utf8_manual_slice(iter.slice, start, end)
{
match Value::parse_subtag_from_utf8(peek) {
Ok(Some(t)) => {
if current_type.is_some() {
// We cannot handle more than one type in a const context
Expand All @@ -239,7 +231,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
} else {
break;
}
iter = iter.next_manual().0
iter = iter.next_const().0
}
if let Some(k) = key {
keyword = Some((k, current_type));
Expand Down
28 changes: 15 additions & 13 deletions components/locale_core/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) {
// If it's a separator, set the start to idx+1 and advance the idx to the next char.
(idx + 1, idx + 1)
} else {
// If it's idx=0, start is 0 and end is set to 1
// If it's idx=0, end is set to 1
debug_assert!(idx == 0);
(0, 1)
};
Expand All @@ -45,6 +45,14 @@ const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) {
(start, end)
}

pub const fn split_out_range(slice: &[u8], start: usize, end: usize) -> &[u8] {
assert!(start <= slice.len());
assert!(end <= slice.len());
assert!(start <= end);
// SAFETY: assertions and align = size = 1.
unsafe { core::slice::from_raw_parts(slice.as_ptr().add(start), end - start) }
}

// `SubtagIterator` is a helper iterator for [`LanguageIdentifier`] and [`Locale`] parsing.
//
// It is quite extraordinary due to focus on performance and Rust limitations for `const`
Expand Down Expand Up @@ -81,7 +89,7 @@ impl<'a> SubtagIterator<'a> {
}
}

pub const fn next_manual(mut self) -> (Self, Option<(usize, usize)>) {
pub const fn next_const(mut self) -> (Self, Option<&'a [u8]>) {
if self.done {
return (self, None);
}
Expand All @@ -91,30 +99,24 @@ impl<'a> SubtagIterator<'a> {
} else {
self.done = true;
}
(self, Some(result))
(self, Some(split_out_range(self.slice, result.0, result.1)))
}

pub const fn peek_manual(&self) -> Option<(usize, usize)> {
pub const fn peek(&self) -> Option<&'a [u8]> {
if self.done {
return None;
}
Some(self.subtag)
}

pub fn peek(&self) -> Option<&'a [u8]> {
#[allow(clippy::indexing_slicing)] // peek_manual returns valid indices
self.peek_manual().map(|(s, e)| &self.slice[s..e])
Some(split_out_range(self.slice, self.subtag.0, self.subtag.1))
}
}

impl<'a> Iterator for SubtagIterator<'a> {
type Item = &'a [u8];

fn next(&mut self) -> Option<Self::Item> {
let (s, res) = self.next_manual();
let (s, res) = self.next_const();
*self = s;
#[allow(clippy::indexing_slicing)] // next_manual returns valid indices
res.map(|(s, e)| &self.slice[s..e])
res
}
}

Expand Down
40 changes: 9 additions & 31 deletions utils/tinystr/src/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl<const N: usize> TinyAsciiStr<N> {
/// `code_units` may contain at most `N` non-null ASCII code points.
#[inline]
pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, TinyStrError> {
Self::try_from_utf8_inner(code_units, 0, code_units.len(), false)
Self::try_from_utf8_inner(code_units, false)
}

/// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice.
Expand Down Expand Up @@ -126,49 +126,27 @@ impl<const N: usize> TinyAsciiStr<N> {
/// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_)));
/// ```
pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> {
Self::try_from_utf8_inner(&raw, 0, N, true)
}

/// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8),
/// but callable in a `const` context (which range indexing is not).
#[inline]
pub const fn try_from_utf8_manual_slice(
code_units: &[u8],
start: usize,
end: usize,
) -> Result<Self, TinyStrError> {
Self::try_from_utf8_inner(code_units, start, end, false)
}

/// Equivalent to [`try_from_utf16(bytes[start..end])`](Self::try_from_utf16),
/// but callable in a `const` context (which range indexing is not).
#[inline]
pub const fn try_from_utf16_manual_slice(
code_units: &[u16],
start: usize,
end: usize,
) -> Result<Self, TinyStrError> {
Self::try_from_utf16_inner(code_units, start, end, false)
Self::try_from_utf8_inner(&raw, true)
}

pub(crate) const fn try_from_utf8_inner(
code_units: &[u8],
start: usize,
end: usize,
allow_trailing_null: bool,
) -> Result<Self, TinyStrError> {
let len = end - start;
if len > N {
return Err(TinyStrError::TooLarge { max: N, len });
if code_units.len() > N {
return Err(TinyStrError::TooLarge {
max: N,
len: code_units.len(),
});
}

let mut out = [0; N];
let mut i = 0;
let mut found_null = false;
// Indexing is protected by TinyStrError::TooLarge
#[allow(clippy::indexing_slicing)]
while i < len {
let b = code_units[start + i];
while i < code_units.len() {
let b = code_units[i];

if b == 0 {
found_null = true;
Expand Down
2 changes: 1 addition & 1 deletion utils/tinystr/src/ule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ unsafe impl<const N: usize> ULE for TinyAsciiStr<N> {
}
// Validate the bytes
for chunk in bytes.chunks_exact(N) {
let _ = TinyAsciiStr::<N>::try_from_utf8_inner(chunk, 0, N, true)
let _ = TinyAsciiStr::<N>::try_from_utf8_inner(chunk, true)
.map_err(|_| ZeroVecError::parse::<Self>())?;
}
Ok(())
Expand Down

0 comments on commit aff4632

Please sign in to comment.