forked from rust-lang/rust
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of rust-lang#123778 - jhorstmann:optimize-upper-lower-auto…
…-vectorization, r=the8472 Improve autovectorization of to_lowercase / to_uppercase functions Refactor the code in the `convert_while_ascii` helper function to make it more suitable for auto-vectorization and also process the full ascii prefix of the string. The generic case conversion logic will only be invoked starting from the first non-ascii character. The runtime on a microbenchmark with a small ascii-only input decreases from ~55ns to ~18ns per iteration. The new implementation also reduces the amount of unsafe code and encapsulates all unsafe inside the helper function. Fixes rust-lang#123712
- Loading branch information
Showing
4 changed files
with
122 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
tests/codegen/issues/issue-123712-str-to-lower-autovectorization.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
//@ compile-flags: -Copt-level=3 | ||
#![crate_type = "lib"] | ||
|
||
/// Ensure that the ascii-prefix loop for `str::to_lowercase` and `str::to_uppercase` uses vector | ||
/// instructions. Since these methods do not get inlined, the relevant code is duplicated here and | ||
/// should be updated when the implementation changes. | ||
// CHECK-LABEL: @lower_while_ascii | ||
// CHECK: [[A:%[0-9]]] = load <16 x i8> | ||
// CHECK-NEXT: [[B:%[0-9]]] = icmp slt <16 x i8> [[A]], zeroinitializer | ||
// CHECK-NEXT: [[C:%[0-9]]] = bitcast <16 x i1> [[B]] to i16 | ||
#[no_mangle] | ||
pub fn lower_while_ascii(mut input: &[u8], mut output: &mut [u8]) -> usize { | ||
// process the input in chunks to enable auto-vectorization | ||
const USIZE_SIZE: usize = core::mem::size_of::<usize>(); | ||
const MAGIC_UNROLL: usize = 2; | ||
const N: usize = USIZE_SIZE * MAGIC_UNROLL; | ||
|
||
output = &mut output[..input.len()]; | ||
|
||
let mut ascii_prefix_len = 0_usize; | ||
let mut is_ascii = [false; N]; | ||
|
||
while input.len() >= N { | ||
let chunk = unsafe { input.get_unchecked(..N) }; | ||
let out_chunk = unsafe { output.get_unchecked_mut(..N) }; | ||
|
||
for j in 0..N { | ||
is_ascii[j] = chunk[j] <= 127; | ||
} | ||
|
||
// auto-vectorization for this check is a bit fragile, | ||
// sum and comparing against the chunk size gives the best result, | ||
// specifically a pmovmsk instruction on x86. | ||
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N { | ||
break; | ||
} | ||
|
||
for j in 0..N { | ||
out_chunk[j] = chunk[j].to_ascii_lowercase(); | ||
} | ||
|
||
ascii_prefix_len += N; | ||
input = unsafe { input.get_unchecked(N..) }; | ||
output = unsafe { output.get_unchecked_mut(N..) }; | ||
} | ||
|
||
ascii_prefix_len | ||
} |