forked from rust-lang/rust
-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of rust-lang#76325 - lzutao:split-core-str, r=Amanieu
Split core/str/mod.rs to smaller files Note for reviewer: * I split to multiple commits for easier reviewing, but I could git squash them all to one if requested. * Recommend pulling this change locally and using advanced git diff viewer or this command: ```bash git show --reverse --color-moved=dimmed-zebra --color-moved-ws=ignore-all-space master.. ``` --- I split `core/str/mod.rs` to these modules: * `converts`: Contains helper functions to convert from bytes to str. * `error`: For error structs like Utf8Error. * `iter`: For iterators of many str methods. * `traits`: For indexing operations and build in traits on str. * `validations`: For functions validating utf8 --- This name is awkward, maybe utf8.rs is better.
- Loading branch information
Showing
7 changed files
with
2,507 additions
and
2,445 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
//! Ways to create a `str` from bytes slice. | ||
|
||
use crate::mem; | ||
|
||
use super::validations::run_utf8_validation; | ||
use super::Utf8Error; | ||
|
||
/// Converts a slice of bytes to a string slice. | ||
/// | ||
/// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice | ||
/// ([`&[u8]`][byteslice]) is made of bytes, so this function converts between | ||
/// the two. Not all byte slices are valid string slices, however: [`&str`] requires | ||
/// that it is valid UTF-8. `from_utf8()` checks to ensure that the bytes are valid | ||
/// UTF-8, and then does the conversion. | ||
/// | ||
/// [`&str`]: str | ||
/// [byteslice]: ../../std/primitive.slice.html | ||
/// | ||
/// If you are sure that the byte slice is valid UTF-8, and you don't want to | ||
/// incur the overhead of the validity check, there is an unsafe version of | ||
/// this function, [`from_utf8_unchecked`], which has the same | ||
/// behavior but skips the check. | ||
/// | ||
/// If you need a `String` instead of a `&str`, consider | ||
/// [`String::from_utf8`][string]. | ||
/// | ||
/// [string]: ../../std/string/struct.String.html#method.from_utf8 | ||
/// | ||
/// Because you can stack-allocate a `[u8; N]`, and you can take a | ||
/// [`&[u8]`][byteslice] of it, this function is one way to have a | ||
/// stack-allocated string. There is an example of this in the | ||
/// examples section below. | ||
/// | ||
/// [byteslice]: ../../std/primitive.slice.html | ||
/// | ||
/// # Errors | ||
/// | ||
/// Returns `Err` if the slice is not UTF-8 with a description as to why the | ||
/// provided slice is not UTF-8. | ||
/// | ||
/// # Examples | ||
/// | ||
/// Basic usage: | ||
/// | ||
/// ``` | ||
/// use std::str; | ||
/// | ||
/// // some bytes, in a vector | ||
/// let sparkle_heart = vec![240, 159, 146, 150]; | ||
/// | ||
/// // We know these bytes are valid, so just use `unwrap()`. | ||
/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap(); | ||
/// | ||
/// assert_eq!("💖", sparkle_heart); | ||
/// ``` | ||
/// | ||
/// Incorrect bytes: | ||
/// | ||
/// ``` | ||
/// use std::str; | ||
/// | ||
/// // some invalid bytes, in a vector | ||
/// let sparkle_heart = vec![0, 159, 146, 150]; | ||
/// | ||
/// assert!(str::from_utf8(&sparkle_heart).is_err()); | ||
/// ``` | ||
/// | ||
/// See the docs for [`Utf8Error`] for more details on the kinds of | ||
/// errors that can be returned. | ||
/// | ||
/// A "stack allocated string": | ||
/// | ||
/// ``` | ||
/// use std::str; | ||
/// | ||
/// // some bytes, in a stack-allocated array | ||
/// let sparkle_heart = [240, 159, 146, 150]; | ||
/// | ||
/// // We know these bytes are valid, so just use `unwrap()`. | ||
/// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap(); | ||
/// | ||
/// assert_eq!("💖", sparkle_heart); | ||
/// ``` | ||
#[stable(feature = "rust1", since = "1.0.0")] | ||
pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> { | ||
run_utf8_validation(v)?; | ||
// SAFETY: Just ran validation. | ||
Ok(unsafe { from_utf8_unchecked(v) }) | ||
} | ||
|
||
/// Converts a mutable slice of bytes to a mutable string slice. | ||
/// | ||
/// # Examples | ||
/// | ||
/// Basic usage: | ||
/// | ||
/// ``` | ||
/// use std::str; | ||
/// | ||
/// // "Hello, Rust!" as a mutable vector | ||
/// let mut hellorust = vec![72, 101, 108, 108, 111, 44, 32, 82, 117, 115, 116, 33]; | ||
/// | ||
/// // As we know these bytes are valid, we can use `unwrap()` | ||
/// let outstr = str::from_utf8_mut(&mut hellorust).unwrap(); | ||
/// | ||
/// assert_eq!("Hello, Rust!", outstr); | ||
/// ``` | ||
/// | ||
/// Incorrect bytes: | ||
/// | ||
/// ``` | ||
/// use std::str; | ||
/// | ||
/// // Some invalid bytes in a mutable vector | ||
/// let mut invalid = vec![128, 223]; | ||
/// | ||
/// assert!(str::from_utf8_mut(&mut invalid).is_err()); | ||
/// ``` | ||
/// See the docs for [`Utf8Error`] for more details on the kinds of | ||
/// errors that can be returned. | ||
#[stable(feature = "str_mut_extras", since = "1.20.0")] | ||
pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> { | ||
run_utf8_validation(v)?; | ||
// SAFETY: Just ran validation. | ||
Ok(unsafe { from_utf8_unchecked_mut(v) }) | ||
} | ||
|
||
/// Converts a slice of bytes to a string slice without checking | ||
/// that the string contains valid UTF-8. | ||
/// | ||
/// See the safe version, [`from_utf8`], for more information. | ||
/// | ||
/// # Safety | ||
/// | ||
/// This function is unsafe because it does not check that the bytes passed to | ||
/// it are valid UTF-8. If this constraint is violated, undefined behavior | ||
/// results, as the rest of Rust assumes that [`&str`]s are valid UTF-8. | ||
/// | ||
/// [`&str`]: str | ||
/// | ||
/// # Examples | ||
/// | ||
/// Basic usage: | ||
/// | ||
/// ``` | ||
/// use std::str; | ||
/// | ||
/// // some bytes, in a vector | ||
/// let sparkle_heart = vec![240, 159, 146, 150]; | ||
/// | ||
/// let sparkle_heart = unsafe { | ||
/// str::from_utf8_unchecked(&sparkle_heart) | ||
/// }; | ||
/// | ||
/// assert_eq!("💖", sparkle_heart); | ||
/// ``` | ||
#[inline] | ||
#[stable(feature = "rust1", since = "1.0.0")] | ||
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked", issue = "75196")] | ||
#[allow_internal_unstable(const_fn_transmute)] | ||
pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str { | ||
// SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8. | ||
// Also relies on `&str` and `&[u8]` having the same layout. | ||
unsafe { mem::transmute(v) } | ||
} | ||
|
||
/// Converts a slice of bytes to a string slice without checking | ||
/// that the string contains valid UTF-8; mutable version. | ||
/// | ||
/// See the immutable version, [`from_utf8_unchecked()`] for more information. | ||
/// | ||
/// # Examples | ||
/// | ||
/// Basic usage: | ||
/// | ||
/// ``` | ||
/// use std::str; | ||
/// | ||
/// let mut heart = vec![240, 159, 146, 150]; | ||
/// let heart = unsafe { str::from_utf8_unchecked_mut(&mut heart) }; | ||
/// | ||
/// assert_eq!("💖", heart); | ||
/// ``` | ||
#[inline] | ||
#[stable(feature = "str_mut_extras", since = "1.20.0")] | ||
pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { | ||
// SAFETY: the caller must guarantee that the bytes `v` | ||
// are valid UTF-8, thus the cast to `*mut str` is safe. | ||
// Also, the pointer dereference is safe because that pointer | ||
// comes from a reference which is guaranteed to be valid for writes. | ||
unsafe { &mut *(v as *mut [u8] as *mut str) } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
//! Defines utf8 error type. | ||
|
||
use crate::fmt; | ||
|
||
/// Errors which can occur when attempting to interpret a sequence of [`u8`] | ||
/// as a string. | ||
/// | ||
/// As such, the `from_utf8` family of functions and methods for both [`String`]s | ||
/// and [`&str`]s make use of this error, for example. | ||
/// | ||
/// [`String`]: ../../std/string/struct.String.html#method.from_utf8 | ||
/// [`&str`]: super::from_utf8 | ||
/// | ||
/// # Examples | ||
/// | ||
/// This error type’s methods can be used to create functionality | ||
/// similar to `String::from_utf8_lossy` without allocating heap memory: | ||
/// | ||
/// ``` | ||
/// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) { | ||
/// loop { | ||
/// match std::str::from_utf8(input) { | ||
/// Ok(valid) => { | ||
/// push(valid); | ||
/// break | ||
/// } | ||
/// Err(error) => { | ||
/// let (valid, after_valid) = input.split_at(error.valid_up_to()); | ||
/// unsafe { | ||
/// push(std::str::from_utf8_unchecked(valid)) | ||
/// } | ||
/// push("\u{FFFD}"); | ||
/// | ||
/// if let Some(invalid_sequence_length) = error.error_len() { | ||
/// input = &after_valid[invalid_sequence_length..] | ||
/// } else { | ||
/// break | ||
/// } | ||
/// } | ||
/// } | ||
/// } | ||
/// } | ||
/// ``` | ||
#[derive(Copy, Eq, PartialEq, Clone, Debug)] | ||
#[stable(feature = "rust1", since = "1.0.0")] | ||
pub struct Utf8Error { | ||
pub(super) valid_up_to: usize, | ||
pub(super) error_len: Option<u8>, | ||
} | ||
|
||
impl Utf8Error { | ||
/// Returns the index in the given string up to which valid UTF-8 was | ||
/// verified. | ||
/// | ||
/// It is the maximum index such that `from_utf8(&input[..index])` | ||
/// would return `Ok(_)`. | ||
/// | ||
/// # Examples | ||
/// | ||
/// Basic usage: | ||
/// | ||
/// ``` | ||
/// use std::str; | ||
/// | ||
/// // some invalid bytes, in a vector | ||
/// let sparkle_heart = vec![0, 159, 146, 150]; | ||
/// | ||
/// // std::str::from_utf8 returns a Utf8Error | ||
/// let error = str::from_utf8(&sparkle_heart).unwrap_err(); | ||
/// | ||
/// // the second byte is invalid here | ||
/// assert_eq!(1, error.valid_up_to()); | ||
/// ``` | ||
#[stable(feature = "utf8_error", since = "1.5.0")] | ||
pub fn valid_up_to(&self) -> usize { | ||
self.valid_up_to | ||
} | ||
|
||
/// Provides more information about the failure: | ||
/// | ||
/// * `None`: the end of the input was reached unexpectedly. | ||
/// `self.valid_up_to()` is 1 to 3 bytes from the end of the input. | ||
/// If a byte stream (such as a file or a network socket) is being decoded incrementally, | ||
/// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks. | ||
/// | ||
/// * `Some(len)`: an unexpected byte was encountered. | ||
/// The length provided is that of the invalid byte sequence | ||
/// that starts at the index given by `valid_up_to()`. | ||
/// Decoding should resume after that sequence | ||
/// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of | ||
/// lossy decoding. | ||
/// | ||
/// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html | ||
#[stable(feature = "utf8_error_error_len", since = "1.20.0")] | ||
pub fn error_len(&self) -> Option<usize> { | ||
self.error_len.map(|len| len as usize) | ||
} | ||
} | ||
|
||
#[stable(feature = "rust1", since = "1.0.0")] | ||
impl fmt::Display for Utf8Error { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
if let Some(error_len) = self.error_len { | ||
write!( | ||
f, | ||
"invalid utf-8 sequence of {} bytes from index {}", | ||
error_len, self.valid_up_to | ||
) | ||
} else { | ||
write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to) | ||
} | ||
} | ||
} | ||
|
||
/// An error returned when parsing a `bool` using [`from_str`] fails | ||
/// | ||
/// [`from_str`]: super::FromStr::from_str | ||
#[derive(Debug, Clone, PartialEq, Eq)] | ||
#[stable(feature = "rust1", since = "1.0.0")] | ||
pub struct ParseBoolError { | ||
pub(super) _priv: (), | ||
} | ||
|
||
#[stable(feature = "rust1", since = "1.0.0")] | ||
impl fmt::Display for ParseBoolError { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
"provided string was not `true` or `false`".fmt(f) | ||
} | ||
} |
Oops, something went wrong.