Skip to content

Commit

Permalink
Tweak the threshold for chunked swapping
Browse files Browse the repository at this point in the history
Thanks to 98892 for the tests I brought in here, as it demonstrated that 3×usize is currently suboptimal.
  • Loading branch information
AngelicosPhosphoros authored and scottmcm committed Jan 20, 2024
1 parent 128148d commit e871acb
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 10 deletions.
2 changes: 1 addition & 1 deletion library/core/src/mem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -736,7 +736,7 @@ pub const fn swap<T>(x: &mut T, y: &mut T) {
// tends to copy the whole thing to stack rather than doing it one part
// at a time, so instead treat them as one-element slices and piggy-back
// the slice optimizations that will split up the swaps.
if size_of::<T>() / align_of::<T>() > 4 {
if const { size_of::<T>() / align_of::<T>() > 2 } {
// SAFETY: exclusive references always point to one non-overlapping
// element and are non-null and properly aligned.
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
Expand Down
48 changes: 39 additions & 9 deletions tests/codegen/swap-small-types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,32 @@ pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
swap(x, y)
}

// CHECK-LABEL: @swap_vecs
#[no_mangle]
pub fn swap_vecs(x: &mut Vec<u32>, y: &mut Vec<u32>) {
// CHECK-NOT: alloca
// CHECK: ret void
swap(x, y)
}

// CHECK-LABEL: @swap_slices
#[no_mangle]
pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
// CHECK-NOT: alloca
// CHECK: ret void
swap(x, y)
}

// LLVM doesn't vectorize a loop over 3-byte elements,
// so we chunk it down to bytes and loop over those instead.
type RGB24 = [u8; 3];

// CHECK-LABEL: @swap_rgb24_slices
#[no_mangle]
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i8>
// CHECK: store <{{[0-9]+}} x i8>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i8>
// CHECK: store <{{[0-9]+}} x i8>
if x.len() == y.len() {
x.swap_with_slice(y);
}
Expand All @@ -69,9 +85,9 @@ type RGBA32 = [u8; 4];
// CHECK-LABEL: @swap_rgba32_slices
#[no_mangle]
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i32>
// CHECK: store <{{[0-9]+}} x i32>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i32>
// CHECK: store <{{[0-9]+}} x i32>
if x.len() == y.len() {
x.swap_with_slice(y);
}
Expand All @@ -84,10 +100,24 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
// CHECK-LABEL: @swap_string_slices
#[no_mangle]
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i64>
// CHECK: store <{{[0-9]+}} x i64>
// CHECK-NOT: alloca
// CHECK: load <{{[0-9]+}} x i64>
// CHECK: store <{{[0-9]+}} x i64>
if x.len() == y.len() {
x.swap_with_slice(y);
}
}

#[repr(C, packed)]
pub struct Packed {
pub first: bool,
pub second: usize,
}

// CHECK-LABEL: @swap_packed_structs
#[no_mangle]
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
// CHECK-NOT: alloca
// CHECK: ret void
swap(x, y)
}

0 comments on commit e871acb

Please sign in to comment.