Skip to content

Commit

Permalink
nontemporal_store: make sure that the intrinsic is truly just a hint
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfJung committed Jul 24, 2024
1 parent 2ccafed commit d1e92af
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 15 deletions.
3 changes: 2 additions & 1 deletion compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -725,7 +725,8 @@ fn codegen_regular_intrinsic_call<'tcx>(

// Cranelift treats stores as volatile by default
// FIXME correctly handle unaligned_volatile_store
// FIXME actually do nontemporal stores if requested
// FIXME actually do nontemporal stores if requested (but do not just emit MOVNT on x86;
// see the LLVM backend for details)
let dest = CPlace::for_ptr(Pointer::new(ptr), val.layout());
dest.write_cvalue(fx, val);
}
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_codegen_gcc/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1128,6 +1128,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
self.llbb().add_assignment(self.location, aligned_destination, val);
// TODO(antoyo): handle align and flags.
// NOTE: dummy value here since it's never used. FIXME(antoyo): API should not return a value here?
// When adding support for NONTEMPORAL, make sure to not just emit MOVNT on x86; see the
// LLVM backend for details.
self.cx.context.new_rvalue_zero(self.type_i32())
}

Expand Down
30 changes: 23 additions & 7 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -725,13 +725,29 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
llvm::LLVMSetVolatile(store, llvm::True);
}
if flags.contains(MemFlags::NONTEMPORAL) {
// According to LLVM [1] building a nontemporal store must
// *always* point to a metadata value of the integer 1.
//
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
let one = self.cx.const_i32(1);
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
// Make sure that the current target architectures supports "sane" non-temporal
// stores, i.e., non-temporal stores that are equivalent to regular stores except
// for performance. LLVM doesn't seem to care about this, and will happily treat
// `!nontemporal` stores as-if they were normal stores (for reordering optimizations
// etc) even on x86, despite later lowering them to MOVNT which do *not* behave like
// regular stores but require special fences.
// So we keep a list of architectures where `!nontemporal` is known to be truly just
// a hint, and use regular stores everywhere else.
// (In the future, we could alternatively ensure that an sfence gets emitted after a sequence of movnt
// before any kind of synchronizing operation. But it's not clear how to do that with LLVM.)
const WELL_BEHAVED_NONTEMPORAL_ARCHS: &[&str] = &["aarch64", "arm"];

let use_nontemporal =
WELL_BEHAVED_NONTEMPORAL_ARCHS.contains(&&*self.cx.tcx.sess.target.arch);
if use_nontemporal {
// According to LLVM [1] building a nontemporal store must
// *always* point to a metadata value of the integer 1.
//
// [1]: https://llvm.org/docs/LangRef.html#store-instruction
let one = self.cx.const_i32(1);
let node = llvm::LLVMMDNodeInContext(self.cx.llcx, &one, 1);
llvm::LLVMSetMetadata(store, llvm::MD_nontemporal as c_uint, node);
}
}
store
}
Expand Down
10 changes: 5 additions & 5 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2405,12 +2405,12 @@ extern "rust-intrinsic" {
#[rustc_nounwind]
pub fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32;

/// Emits a `!nontemporal` store according to LLVM (see their docs).
/// Probably will never become stable.
/// Emits a `nontemporal` store, which gives a hint to the CPU that the data should not be held
/// in cache. Except for performance, this is fully equivalent to `ptr.write(val)`.
///
/// Do NOT use this intrinsic; "nontemporal" operations do not exist in our memory model!
/// It exists to support current stdarch, but the plan is to change stdarch and remove this intrinsic.
/// See <https://github.com/rust-lang/rust/issues/114582> for some more discussion.
/// Not all architectures provide such an operation. For instance, x86 does not: while `MOVNT`
/// exists, that operation is *not* equivalent to `ptr.write(val)` (`MOVNT` writes can be reordered
/// in ways that are not allowed for regular writes).
#[rustc_nounwind]
pub fn nontemporal_store<T>(ptr: *mut T, val: T);

Expand Down
19 changes: 17 additions & 2 deletions tests/codegen/intrinsics/nontemporal.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,28 @@
//@ compile-flags: -O
//@ compile-flags: --target aarch64-unknown-linux-gnu
//@ needs-llvm-components: aarch64

#![feature(core_intrinsics)]
#![feature(no_core, lang_items, intrinsics)]
#![no_core]
#![crate_type = "lib"]

#[lang = "sized"]
pub trait Sized {}
#[lang = "copy"]
pub trait Copy {}

impl Copy for u32 {}
impl<T> Copy for *mut T {}

extern "rust-intrinsic" {
pub fn nontemporal_store<T>(ptr: *mut T, val: T);
}

#[no_mangle]
pub fn a(a: &mut u32, b: u32) {
// CHECK-LABEL: define{{.*}}void @a
// CHECK: store i32 %b, ptr %a, align 4, !nontemporal
unsafe {
std::intrinsics::nontemporal_store(a, b);
nontemporal_store(a, b);
}
}

0 comments on commit d1e92af

Please sign in to comment.