From a1e5c65aa451473d1614fc04cf3c0e67247a17f1 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Tue, 2 May 2023 17:23:54 -0700 Subject: [PATCH] `assume` the runtime range of `align_offset` Found when I saw code with `align_to` having extraneous checks. --- library/core/src/lib.rs | 1 + library/core/src/ptr/mod.rs | 14 +++++-- tests/codegen/align-offset.rs | 78 +++++++++++++++++++++++++++++++++++ 3 files changed, 89 insertions(+), 4 deletions(-) create mode 100644 tests/codegen/align-offset.rs diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index a535a011aafe2..ed0c05a686319 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -107,6 +107,7 @@ #![feature(const_arguments_as_str)] #![feature(const_array_from_ref)] #![feature(const_array_into_iter_constructors)] +#![feature(const_assume)] #![feature(const_bigint_helper_methods)] #![feature(const_black_box)] #![feature(const_caller_location)] diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 5f55f762ad555..4737ff5d756d1 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -1632,8 +1632,8 @@ pub(crate) const unsafe fn align_offset(p: *const T, a: usize) -> usiz // FIXME(#75598): Direct use of these intrinsics improves codegen significantly at opt-level <= // 1, where the method versions of these operations are not inlined. use intrinsics::{ - cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl, unchecked_shr, - unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub, + assume, cttz_nonzero, exact_div, mul_with_overflow, unchecked_rem, unchecked_shl, + unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub, }; /// Calculate multiplicative modular inverse of `x` modulo `m`. @@ -1724,12 +1724,18 @@ pub(crate) const unsafe fn align_offset(p: *const T, a: usize) -> usiz // in a branch-free way and then bitwise-OR it with whatever result the `-p mod a` // computation produces. + let aligned_address = wrapping_add(addr, a_minus_one) & wrapping_sub(0, a); + let byte_offset = wrapping_sub(aligned_address, addr); + // FIXME: Remove the assume after + // SAFETY: Masking by `-a` can only affect the low bits, and thus cannot have reduced + // the value by more than `a-1`, so even though the intermediate values might have + // wrapped, the byte_offset is always in `[0, a)`. + unsafe { assume(byte_offset < a) }; + // SAFETY: `stride == 0` case has been handled by the special case above. let addr_mod_stride = unsafe { unchecked_rem(addr, stride) }; return if addr_mod_stride == 0 { - let aligned_address = wrapping_add(addr, a_minus_one) & wrapping_sub(0, a); - let byte_offset = wrapping_sub(aligned_address, addr); // SAFETY: `stride` is non-zero. This is guaranteed to divide exactly as well, because // addr has been verified to be aligned to the original type’s alignment requirements. unsafe { exact_div(byte_offset, stride) } diff --git a/tests/codegen/align-offset.rs b/tests/codegen/align-offset.rs new file mode 100644 index 0000000000000..7c7660c5a55ab --- /dev/null +++ b/tests/codegen/align-offset.rs @@ -0,0 +1,78 @@ +// compile-flags: -O +// min-llvm-version: 15.0 (because we're using opaque pointers) +// ignore-debug (debug assertions in `slice::from_raw_parts` block optimizations) + +#![crate_type = "lib"] + +// CHECK-LABEL: @align8 +#[no_mangle] +pub fn align8(p: *const u8) -> bool { + // CHECK: ret i1 true + p.align_offset(8) < 8 +} + +#[repr(align(4))] +pub struct Align4([u8; 4]); + +// CHECK-LABEL: @align_to4 +#[no_mangle] +pub fn align_to4(x: &[u8]) -> bool { + // CHECK: ret i1 true + let (prefix, _middle, suffix) = unsafe { x.align_to::() }; + prefix.len() < 4 && suffix.len() < 4 +} + +// CHECK-LABEL: @align_offset_byte_ptr(ptr{{.+}}%ptr) +#[no_mangle] +pub fn align_offset_byte_ptr(ptr: *const u8) -> usize { + // CHECK: %[[ADDR:.+]] = ptrtoint ptr %ptr to [[USIZE:i[0-9]+]] + // CHECK: %[[UP:.+]] = add [[USIZE]] %[[ADDR]], 31 + // CHECK: %[[ALIGNED:.+]] = and [[USIZE]] %[[UP]], -32 + // CHECK: %[[OFFSET:.+]] = sub [[USIZE]] %[[ALIGNED]], %[[ADDR]] + + // Since we're offsetting a byte pointer, there's no further fixups + // CHECK-NOT: shr + // CHECK-NOT: div + // CHECK-NOT: select + + // CHECK: ret [[USIZE]] %[[OFFSET]] + ptr.align_offset(32) +} + +// CHECK-LABEL: @align_offset_word_slice(ptr{{.+}}align 4{{.+}}%slice.0 +#[no_mangle] +pub fn align_offset_word_slice(slice: &[Align4]) -> usize { + // CHECK: %[[ADDR:.+]] = ptrtoint ptr %slice.0 to [[USIZE]] + // CHECK: %[[UP:.+]] = add [[USIZE]] %[[ADDR]], 31 + // CHECK: %[[ALIGNED:.+]] = and [[USIZE]] %[[UP]], -32 + // CHECK: %[[BOFFSET:.+]] = sub [[USIZE]] %[[ALIGNED]], %[[ADDR]] + // CHECK: %[[OFFSET:.+]] = lshr exact [[USIZE]] %[[BOFFSET]], 2 + + // Slices are known to be aligned, so we don't need the "maybe -1" path + // CHECK-NOT: select + + // CHECK: ret [[USIZE]] %[[OFFSET]] + slice.as_ptr().align_offset(32) +} + + +// CHECK-LABEL: @align_offset_word_ptr(ptr{{.+}}%ptr +#[no_mangle] +pub fn align_offset_word_ptr(ptr: *const Align4) -> usize { + // CHECK: %[[ADDR:.+]] = ptrtoint ptr %ptr to [[USIZE]] + // CHECK: %[[UP:.+]] = add [[USIZE]] %[[ADDR]], 31 + // CHECK: %[[ALIGNED:.+]] = and [[USIZE]] %[[UP]], -32 + // CHECK: %[[BOFFSET:.+]] = sub [[USIZE]] %[[ALIGNED]], %[[ADDR]] + + // While we can always get a *byte* offset that will work, if the original + // pointer is unaligned it might be impossible to return an *element* offset + // that will make it aligned. We want it to be a `select`, not a `br`, so + // that the assembly will be branchless. + // CHECK: %[[LOW:.+]] = and [[USIZE]] %[[ADDR]], 3 + // CHECK: %[[ORIGINAL_ALIGNED:.+]] = icmp eq [[USIZE]] %[[LOW]], 0 + // CHECK: %[[OFFSET:.+]] = lshr exact [[USIZE]] %[[BOFFSET]], 2 + // CHECK: %[[R:.+]] = select i1 %[[ORIGINAL_ALIGNED]], [[USIZE]] %[[OFFSET]], [[USIZE]] -1 + + // CHECK: ret [[USIZE]] %[[R]] + ptr.align_offset(32) +}