Skip to content

Commit

Permalink
improve code and doc
Browse files Browse the repository at this point in the history
  • Loading branch information
magiclen committed Nov 20, 2023
1 parent feb8ae6 commit 39ad661
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "utf8-width"
version = "0.1.6"
version = "0.1.7"
authors = ["Magic Len <len@magiclen.org>"]
edition = "2021"
rust-version = "1.56"
Expand Down
32 changes: 17 additions & 15 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ assert_eq!(3, utf8_width::get_width("中".as_bytes()[0]));
```bash
cargo bench
```
*/

#![no_std]
Expand All @@ -36,33 +35,33 @@ pub const MIN_4: u8 = 0xF0;
pub const MAX_4: u8 = 0xF4;

#[inline]
pub fn is_width_1(byte: u8) -> bool {
pub const fn is_width_1(byte: u8) -> bool {
byte <= MAX_1 // no need to check `MIN_1 <= byte`
}

#[inline]
pub fn is_width_2(byte: u8) -> bool {
(MIN_2..=MAX_2).contains(&byte)
pub const fn is_width_2(byte: u8) -> bool {
byte >= MIN_2 && byte <= MAX_2
}

#[inline]
pub fn is_width_3(byte: u8) -> bool {
(MIN_3..=MAX_3).contains(&byte)
pub const fn is_width_3(byte: u8) -> bool {
byte >= MIN_3 && byte <= MAX_3
}

#[inline]
pub fn is_width_4(byte: u8) -> bool {
(MIN_4..=MAX_4).contains(&byte)
pub const fn is_width_4(byte: u8) -> bool {
byte >= MIN_4 && byte <= MAX_4
}

#[inline]
pub fn is_width_0(byte: u8) -> bool {
(MIN_0_1..=MAX_0_1).contains(&byte) || MIN_0_2 <= byte // no need to check `byte <= MAX_0_2`
pub const fn is_width_0(byte: u8) -> bool {
byte >= MIN_0_1 && byte <= MAX_0_1 || MIN_0_2 <= byte // no need to check `byte <= MAX_0_2`
}

/// Given a first byte, determines how many bytes are in this UTF-8 character. If the UTF-8 character is invalid, returns `0`, otherwise returns `1` ~ `4`,
/// Given a first byte, determine how many bytes are in this UTF-8 character. If the UTF-8 character is invalid, return `0`; otherwise, return `1` to `4`.
#[inline]
pub fn get_width(byte: u8) -> usize {
pub const fn get_width(byte: u8) -> usize {
if is_width_1(byte) {
1
} else if is_width_2(byte) {
Expand All @@ -78,10 +77,13 @@ pub fn get_width(byte: u8) -> usize {
}
}

#[allow(clippy::missing_safety_doc)]
/// *Assume the input first byte is from a valid UTF-8 character.* Given a first byte, determines how many bytes are in this UTF-8 character. It returns `1` ~ `4`,
/// *Assuming the input first byte is from a valid UTF-8 character*, determine how many bytes are in this UTF-8 character. It returns `1` to `4`.
///
/// # Safety
///
/// You must ensure that the input byte is a valid UTF-8 first byte on your own.
#[inline]
pub unsafe fn get_width_assume_valid(byte: u8) -> usize {
pub const unsafe fn get_width_assume_valid(byte: u8) -> usize {
if byte <= MAX_1 {
1
} else if byte <= MAX_2 {
Expand Down

0 comments on commit 39ad661

Please sign in to comment.