Skip to content

Commit

Permalink
-manual
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed Jul 26, 2024
1 parent 886919e commit 882aafc
Show file tree
Hide file tree
Showing 10 changed files with 49 additions and 109 deletions.
14 changes: 5 additions & 9 deletions components/locale_core/src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,12 @@ impl ExtensionType {
}
}

pub(crate) const fn try_from_utf8_manual_slice(
code_units: &[u8],
start: usize,
end: usize,
) -> Result<Self, ParseError> {
if end - start != 1 {
pub(crate) const fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
let &[first] = code_units else {
return Err(ParseError::InvalidExtension);
}
#[allow(clippy::indexing_slicing)]
Self::try_from_byte(code_units[start])
};

Self::try_from_byte(first)
}
}

Expand Down
2 changes: 1 addition & 1 deletion components/locale_core/src/extensions/unicode/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ impl Keywords {
}
current_keyword = Some(Key::try_from_utf8(subtag)?);
} else if current_keyword.is_some() {
match Value::parse_subtag(subtag) {
match Value::parse_subtag_from_utf8(subtag) {
Ok(Some(t)) => current_value.push(t),
Ok(None) => {}
Err(_) => break,
Expand Down
12 changes: 2 additions & 10 deletions components/locale_core/src/extensions/unicode/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,16 +255,8 @@ impl Value {
Self(input)
}

pub(crate) fn parse_subtag(t: &[u8]) -> Result<Option<Subtag>, ParseError> {
Self::parse_subtag_from_utf8_manual_slice(t, 0, t.len())
}

pub(crate) const fn parse_subtag_from_utf8_manual_slice(
code_units: &[u8],
start: usize,
end: usize,
) -> Result<Option<Subtag>, ParseError> {
match Subtag::try_from_utf8_manual_slice(code_units, start, end) {
pub(crate) const fn parse_subtag_from_utf8(t: &[u8]) -> Result<Option<Subtag>, ParseError> {
match Subtag::try_from_utf8(t) {
Ok(TRUE_VALUE) => Ok(None),
Ok(s) => Ok(Some(s)),
Err(_) => Err(ParseError::InvalidSubtag),
Expand Down
19 changes: 4 additions & 15 deletions components/locale_core/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,15 @@ macro_rules! impl_tinystr_subtag {
}

/// See [`Self::try_from_str`]
#[inline]
pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, crate::parser::errors::ParseError> {
Self::try_from_utf8_manual_slice(code_units, 0, code_units.len())
}

/// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8),
/// but callable in a `const` context (which range indexing is not).
pub const fn try_from_utf8_manual_slice(
pub const fn try_from_utf8(
code_units: &[u8],
start: usize,
end: usize,
) -> Result<Self, crate::parser::errors::ParseError> {
let slen = end - start;

#[allow(clippy::double_comparisons)] // if len_start == len_end
if slen < $len_start || slen > $len_end {
#[allow(clippy::double_comparisons)] // if code_units.len() === 0
if code_units.len() < $len_start || code_units.len() > $len_end {
return Err(crate::parser::errors::ParseError::$error);
}

match tinystr::TinyAsciiStr::try_from_utf8_manual_slice(code_units, start, end) {
match tinystr::TinyAsciiStr::try_from_utf8(code_units) {
Ok($tinystr_ident) if $validate => Ok(Self($normalize)),
_ => Err(crate::parser::errors::ParseError::$error),
}
Expand Down
29 changes: 11 additions & 18 deletions components/locale_core/src/parser/langid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f

if let (i, Some(subtag)) = iter.next_const() {
iter = i;
match subtags::Language::try_from_utf8_manual_slice(iter.slice, 0, subtag.len()) {
match subtags::Language::try_from_utf8(subtag) {
Ok(l) => language = l,
Err(e) => return Err(e),
}
Expand All @@ -145,17 +145,13 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
}

if matches!(position, ParserPosition::Script) {
if let Ok(s) = subtags::Script::try_from_utf8_manual_slice(subtag, 0, subtag.len()) {
if let Ok(s) = subtags::Script::try_from_utf8(subtag) {
script = Some(s);
position = ParserPosition::Region;
} else if let Ok(r) =
subtags::Region::try_from_utf8_manual_slice(subtag, 0, subtag.len())
{
} else if let Ok(r) = subtags::Region::try_from_utf8(subtag) {
region = Some(r);
position = ParserPosition::Variant;
} else if let Ok(v) =
subtags::Variant::try_from_utf8_manual_slice(subtag, 0, subtag.len())
{
} else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
// We cannot handle multiple variants in a const context
debug_assert!(variant.is_none());
variant = Some(v);
Expand All @@ -166,12 +162,10 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
return Err(ParseError::InvalidSubtag);
}
} else if matches!(position, ParserPosition::Region) {
if let Ok(s) = subtags::Region::try_from_utf8_manual_slice(subtag, 0, subtag.len()) {
if let Ok(s) = subtags::Region::try_from_utf8(subtag) {
region = Some(s);
position = ParserPosition::Variant;
} else if let Ok(v) =
subtags::Variant::try_from_utf8_manual_slice(subtag, 0, subtag.len())
{
} else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
// We cannot handle multiple variants in a const context
debug_assert!(variant.is_none());
variant = Some(v);
Expand All @@ -181,8 +175,7 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
} else {
return Err(ParseError::InvalidSubtag);
}
} else if let Ok(v) = subtags::Variant::try_from_utf8_manual_slice(subtag, 0, subtag.len())
{
} else if let Ok(v) = subtags::Variant::try_from_utf8(subtag) {
debug_assert!(matches!(position, ParserPosition::Variant));
if variant.is_some() {
// We cannot handle multiple variants in a const context
Expand All @@ -200,11 +193,11 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f

if matches!(mode, ParserMode::Locale) {
if let Some(subtag) = iter.peek() {
match ExtensionType::try_from_utf8_manual_slice(subtag, 0, subtag.len()) {
match ExtensionType::try_from_utf8(subtag) {
Ok(ExtensionType::Unicode) => {
iter = iter.next_const().0;
if let Some(peek) = iter.peek() {
if Attribute::try_from_utf8_manual_slice(peek, 0, peek.len()).is_ok() {
if Attribute::try_from_utf8(peek).is_ok() {
// We cannot handle Attributes in a const context
return Err(ParseError::InvalidSubtag);
}
Expand All @@ -219,12 +212,12 @@ pub const fn parse_locale_with_single_variant_single_keyword_unicode_extension_f
// We cannot handle more than one Key in a const context
return Err(ParseError::InvalidSubtag);
}
match Key::try_from_utf8_manual_slice(peek, 0, peek.len()) {
match Key::try_from_utf8(peek) {
Ok(k) => key = Some(k),
Err(e) => return Err(e),
};
} else if key.is_some() {
match Value::parse_subtag_from_utf8_manual_slice(peek, 0, peek.len()) {
match Value::parse_subtag_from_utf8(peek) {
Ok(Some(t)) => {
if current_type.is_some() {
// We cannot handle more than one type in a const context
Expand Down
2 changes: 1 addition & 1 deletion components/locale_core/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const fn get_current_subtag(slice: &[u8], idx: usize) -> (usize, usize) {
// If it's a separator, set the start to idx+1 and advance the idx to the next char.
(idx + 1, idx + 1)
} else {
// If it's idx=0, start is 0 and end is set to 1
// If it's idx=0, end is set to 1
debug_assert!(idx == 0);
(0, 1)
};
Expand Down
20 changes: 7 additions & 13 deletions provider/core/src/marker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,9 +423,9 @@ impl DataMarkerPath {
i += 1;
}

match Self::validate_path_manual_slice(tagged, start, end) {
match Self::validate_path(tagged.as_bytes().split_at(end).0.split_at(start).1) {
Ok(()) => (),
Err(e) => return Err(e),
Err((expected, index)) => return Err((expected, start + index)),
};

let hash = DataMarkerPathHash(
Expand All @@ -440,13 +440,7 @@ impl DataMarkerPath {
Ok(Self { tagged, hash })
}

const fn validate_path_manual_slice(
path: &'static str,
start: usize,
end: usize,
) -> Result<(), (&'static str, usize)> {
debug_assert!(start <= end);
debug_assert!(end <= path.len());
const fn validate_path(path: &'static [u8]) -> Result<(), (&'static str, usize)> {
// Regex: [a-zA-Z0-9_][a-zA-Z0-9_/]*@[0-9]+
enum State {
Empty,
Expand All @@ -455,12 +449,12 @@ impl DataMarkerPath {
Version,
}
use State::*;
let mut i = start;
let mut i = 0;
let mut state = Empty;
loop {
let byte = if i < end {
#[allow(clippy::indexing_slicing)] // protected by debug assertion
Some(path.as_bytes()[i])
let byte = if i < path.len() {
#[allow(clippy::indexing_slicing)] // iterator not const
Some(path[i])
} else {
None
};
Expand Down
40 changes: 9 additions & 31 deletions utils/tinystr/src/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl<const N: usize> TinyAsciiStr<N> {
/// `code_units` may contain at most `N` non-null ASCII code points.
#[inline]
pub const fn try_from_utf8(code_units: &[u8]) -> Result<Self, TinyStrError> {
Self::try_from_utf8_inner(code_units, 0, code_units.len(), false)
Self::try_from_utf8_inner(code_units, false)
}

/// Creates a `TinyAsciiStr<N>` from the given UTF-16 slice.
Expand Down Expand Up @@ -126,49 +126,27 @@ impl<const N: usize> TinyAsciiStr<N> {
/// assert!(matches!(TinyAsciiStr::<3>::try_from_raw(*b"\0A\0"), Err(_)));
/// ```
pub const fn try_from_raw(raw: [u8; N]) -> Result<Self, TinyStrError> {
Self::try_from_utf8_inner(&raw, 0, N, true)
}

/// Equivalent to [`try_from_utf8(bytes[start..end])`](Self::try_from_utf8),
/// but callable in a `const` context (which range indexing is not).
#[inline]
pub const fn try_from_utf8_manual_slice(
code_units: &[u8],
start: usize,
end: usize,
) -> Result<Self, TinyStrError> {
Self::try_from_utf8_inner(code_units, start, end, false)
}

/// Equivalent to [`try_from_utf16(bytes[start..end])`](Self::try_from_utf16),
/// but callable in a `const` context (which range indexing is not).
#[inline]
pub const fn try_from_utf16_manual_slice(
code_units: &[u16],
start: usize,
end: usize,
) -> Result<Self, TinyStrError> {
Self::try_from_utf16_inner(code_units, start, end, false)
Self::try_from_utf8_inner(&raw, true)
}

pub(crate) const fn try_from_utf8_inner(
code_units: &[u8],
start: usize,
end: usize,
allow_trailing_null: bool,
) -> Result<Self, TinyStrError> {
let len = end - start;
if len > N {
return Err(TinyStrError::TooLarge { max: N, len });
if code_units.len() > N {
return Err(TinyStrError::TooLarge {
max: N,
len: code_units.len(),
});
}

let mut out = [0; N];
let mut i = 0;
let mut found_null = false;
// Indexing is protected by TinyStrError::TooLarge
#[allow(clippy::indexing_slicing)]
while i < len {
let b = code_units[start + i];
while i < code_units.len() {
let b = code_units[i];

if b == 0 {
found_null = true;
Expand Down
2 changes: 1 addition & 1 deletion utils/tinystr/src/ule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ unsafe impl<const N: usize> ULE for TinyAsciiStr<N> {
}
// Validate the bytes
for chunk in bytes.chunks_exact(N) {
let _ = TinyAsciiStr::<N>::try_from_utf8_inner(chunk, 0, N, true)
let _ = TinyAsciiStr::<N>::try_from_utf8_inner(chunk, true)
.map_err(|_| ZeroVecError::parse::<Self>())?;
}
Ok(())
Expand Down
18 changes: 8 additions & 10 deletions utils/zerotrie/src/builder/konst/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,6 @@ impl<'a, T> ConstSlice<'a, T> {
}
}

/// Creates a [`ConstSlice`] with the given start and limit.
pub const fn from_manual_slice(full_slice: &'a [T], start: usize, limit: usize) -> Self {
ConstSlice {
full_slice,
start,
limit,
}
}

/// Returns the length of the [`ConstSlice`].
pub const fn len(&self) -> usize {
self.limit - self.start
Expand Down Expand Up @@ -147,7 +138,14 @@ impl<const N: usize, T> ConstArrayBuilder<N, T> {

/// Returns the initialized elements as a [`ConstSlice`].
pub const fn as_const_slice(&self) -> ConstSlice<T> {
ConstSlice::from_manual_slice(&self.full_array, self.start, self.limit)
ConstSlice::from_slice(
&self
.full_array
.split_at(self.limit)
.0
.split_at(self.start)
.1,
)
}

/// Non-const function that returns a slice of the initialized elements.
Expand Down

0 comments on commit 882aafc

Please sign in to comment.