Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support char::encode_utf8 in const scenarios. #130511

Merged
merged 1 commit into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 14 additions & 18 deletions library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -672,8 +672,9 @@ impl char {
/// 'ß'.encode_utf8(&mut b);
/// ```
#[stable(feature = "unicode_encode_char", since = "1.15.0")]
#[rustc_const_unstable(feature = "const_char_encode_utf8", issue = "130512")]
#[inline]
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
pub const fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
// SAFETY: `char` is not a surrogate, so this is valid UTF-8.
unsafe { from_utf8_unchecked_mut(encode_utf8_raw(self as u32, dst)) }
}
Expand Down Expand Up @@ -1735,14 +1736,11 @@ impl EscapeDebugExtArgs {

#[inline]
const fn len_utf8(code: u32) -> usize {
if code < MAX_ONE_B {
1
} else if code < MAX_TWO_B {
2
} else if code < MAX_THREE_B {
3
} else {
4
match code {
..MAX_ONE_B => 1,
..MAX_TWO_B => 2,
..MAX_THREE_B => 3,
_ => 4,
}
}

Expand All @@ -1760,11 +1758,12 @@ const fn len_utf8(code: u32) -> usize {
/// Panics if the buffer is not large enough.
/// A buffer of length four is large enough to encode any `char`.
#[unstable(feature = "char_internals", reason = "exposed only for libstd", issue = "none")]
#[rustc_const_unstable(feature = "const_char_encode_utf8", issue = "130512")]
#[doc(hidden)]
#[inline]
pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
pub const fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
let len = len_utf8(code);
match (len, &mut dst[..]) {
bjoernager marked this conversation as resolved.
Show resolved Hide resolved
match (len, &mut *dst) {
(1, [a, ..]) => {
*a = code as u8;
}
Expand All @@ -1783,14 +1782,11 @@ pub fn encode_utf8_raw(code: u32, dst: &mut [u8]) -> &mut [u8] {
*c = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*d = (code & 0x3F) as u8 | TAG_CONT;
}
_ => panic!(
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
len,
code,
dst.len(),
),
// Note that we cannot format in constant expressions.
bjoernager marked this conversation as resolved.
Show resolved Hide resolved
_ => panic!("encode_utf8: buffer does not have enough bytes to encode code point"),
};
&mut dst[..len]
// SAFETY: `<&mut [u8]>::as_mut_ptr` is guaranteed to return a valid pointer and `len` has been tested to be within bounds.
unsafe { slice::from_raw_parts_mut(dst.as_mut_ptr(), len) }
bjoernager marked this conversation as resolved.
Show resolved Hide resolved
}

/// Encodes a raw u32 value as UTF-16 into the provided `u16` buffer,
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
#![feature(const_bigint_helper_methods)]
#![feature(const_black_box)]
#![feature(const_cell_into_inner)]
#![feature(const_char_encode_utf8)]
#![feature(const_eval_select)]
#![feature(const_exact_div)]
#![feature(const_float_classify)]
Expand Down
Loading