diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
index 910aec9b8e1de..97326fe99a21a 100644
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -1014,7 +1014,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
     ///
     /// We do this so Miri's allocation access tracking does not show the validation
     /// reads as spurious accesses.
-    pub(super) fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
+    pub fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
         // This deliberately uses `==` on `bool` to follow the pattern
         // `assert!(val.replace(new) == old)`.
         assert!(
diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs
index 9df0d95f1f275..6fd207c92b937 100644
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@@ -617,9 +617,10 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
         // Only metadata on the location itself is used.
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place))?;
-        this.buffered_atomic_read(place, atomic, scalar, || {
+        let buffered_scalar = this.buffered_atomic_read(place, atomic, scalar, || {
             this.validate_atomic_load(place, atomic)
-        })
+        })?;
+        Ok(buffered_scalar.ok_or_else(|| err_ub!(InvalidUninitBytes(None)))?)
     }
 
     /// Perform an atomic write operation at the memory location.
@@ -632,14 +633,14 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
         let this = self.eval_context_mut();
         this.atomic_access_check(dest, AtomicAccessType::Store)?;
 
+        // Read the previous value so we can put it in the store buffer later.
+        // The program didn't actually do a read, so suppress the memory access hooks.
+        // This is also a very special exception where we just ignore an error -- if this read
+        // was UB e.g. because the memory is uninitialized, we don't want to know!
+        let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok();
         this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
         this.validate_atomic_store(dest, atomic)?;
-        // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
-        // side effects from a read the program did not perform. So we have to initialise
-        // the store buffer with the value currently being written
-        // ONCE this is fixed please remove the hack in buffered_atomic_write() in weak_memory.rs
-        // https://github.com/rust-lang/miri/issues/2164
-        this.buffered_atomic_write(val, dest, atomic, val)
+        this.buffered_atomic_write(val, dest, atomic, old_val)
     }
 
     /// Perform an atomic RMW operation on a memory location.
@@ -768,7 +769,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
             // in the modification order.
             // Since `old` is only a value and not the store element, we need to separately
             // find it in our store buffer and perform load_impl on it.
-            this.perform_read_on_buffered_latest(place, fail, old.to_scalar())?;
+            this.perform_read_on_buffered_latest(place, fail)?;
         }
 
         // Return the old value.
diff --git a/src/tools/miri/src/concurrency/weak_memory.rs b/src/tools/miri/src/concurrency/weak_memory.rs
index 6f4171584a8f7..0605b744e6afd 100644
--- a/src/tools/miri/src/concurrency/weak_memory.rs
+++ b/src/tools/miri/src/concurrency/weak_memory.rs
@@ -39,11 +39,10 @@
 //! to attach store buffers to atomic objects. However, Rust follows LLVM in that it only has
 //! 'atomic accesses'. Therefore Miri cannot know when and where atomic 'objects' are being
 //! created or destroyed, to manage its store buffers. Instead, we hence lazily create an
-//! atomic object on the first atomic access to a given region, and we destroy that object
-//! on the next non-atomic or imperfectly overlapping atomic access to that region.
+//! atomic object on the first atomic write to a given region, and we destroy that object
+//! on the next non-atomic or imperfectly overlapping atomic write to that region.
 //! These lazy (de)allocations happen in memory_accessed() on non-atomic accesses, and
-//! get_or_create_store_buffer() on atomic accesses. This mostly works well, but it does
-//! lead to some issues (<https://github.com/rust-lang/miri/issues/2164>).
+//! get_or_create_store_buffer_mut() on atomic writes.
 //!
 //! One consequence of this difference is that safe/sound Rust allows for more operations on atomic locations
 //! than the C++20 atomic API was intended to allow, such as non-atomically accessing
@@ -144,11 +143,9 @@ struct StoreElement {
 
     /// The timestamp of the storing thread when it performed the store
     timestamp: VTimestamp,
-    /// The value of this store
-    // FIXME: this means the store must be fully initialized;
-    // we will have to change this if we want to support atomics on
-    // (partially) uninitialized data.
-    val: Scalar,
+    /// The value of this store. `None` means uninitialized.
+    // FIXME: Currently, we cannot represent partial initialization.
+    val: Option<Scalar>,
 
     /// Metadata about loads from this store element,
     /// behind a RefCell to keep load op take &self
@@ -170,7 +167,7 @@ impl StoreBufferAlloc {
 
     /// When a non-atomic access happens on a location that has been atomically accessed
     /// before without data race, we can determine that the non-atomic access fully happens
-    /// after all the prior atomic accesses so the location no longer needs to exhibit
+    /// after all the prior atomic writes so the location no longer needs to exhibit
     /// any weak memory behaviours until further atomic accesses.
     pub fn memory_accessed(&self, range: AllocRange, global: &DataRaceState) {
         if !global.ongoing_action_data_race_free() {
@@ -192,37 +189,29 @@ impl StoreBufferAlloc {
         }
     }
 
-    /// Gets a store buffer associated with an atomic object in this allocation,
-    /// or creates one with the specified initial value if no atomic object exists yet.
-    fn get_or_create_store_buffer<'tcx>(
+    /// Gets a store buffer associated with an atomic object in this allocation.
+    /// Returns `None` if there is no store buffer.
+    fn get_store_buffer<'tcx>(
         &self,
         range: AllocRange,
-        init: Scalar,
-    ) -> InterpResult<'tcx, Ref<'_, StoreBuffer>> {
+    ) -> InterpResult<'tcx, Option<Ref<'_, StoreBuffer>>> {
         let access_type = self.store_buffers.borrow().access_type(range);
         let pos = match access_type {
             AccessType::PerfectlyOverlapping(pos) => pos,
-            AccessType::Empty(pos) => {
-                let mut buffers = self.store_buffers.borrow_mut();
-                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
-                pos
-            }
-            AccessType::ImperfectlyOverlapping(pos_range) => {
-                // Once we reach here we would've already checked that this access is not racy.
-                let mut buffers = self.store_buffers.borrow_mut();
-                buffers.remove_pos_range(pos_range.clone());
-                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
-                pos_range.start
-            }
+            // If there is nothing here yet, that means there wasn't an atomic write yet so
+            // we can't return anything outdated.
+            _ => return Ok(None),
         };
-        Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]))
+        let store_buffer = Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]);
+        Ok(Some(store_buffer))
     }
 
-    /// Gets a mutable store buffer associated with an atomic object in this allocation
+    /// Gets a mutable store buffer associated with an atomic object in this allocation,
+    /// or creates one with the specified initial value if no atomic object exists yet.
     fn get_or_create_store_buffer_mut<'tcx>(
         &mut self,
         range: AllocRange,
-        init: Scalar,
+        init: Option<Scalar>,
     ) -> InterpResult<'tcx, &mut StoreBuffer> {
         let buffers = self.store_buffers.get_mut();
         let access_type = buffers.access_type(range);
@@ -244,10 +233,8 @@ impl StoreBufferAlloc {
 }
 
 impl<'tcx> StoreBuffer {
-    fn new(init: Scalar) -> Self {
+    fn new(init: Option<Scalar>) -> Self {
         let mut buffer = VecDeque::new();
-        buffer.reserve(STORE_BUFFER_LIMIT);
-        let mut ret = Self { buffer };
         let store_elem = StoreElement {
             // The thread index and timestamp of the initialisation write
             // are never meaningfully used, so it's fine to leave them as 0
@@ -257,11 +244,11 @@ impl<'tcx> StoreBuffer {
             is_seqcst: false,
             load_info: RefCell::new(LoadInfo::default()),
         };
-        ret.buffer.push_back(store_elem);
-        ret
+        buffer.push_back(store_elem);
+        Self { buffer }
     }
 
-    /// Reads from the last store in modification order
+    /// Reads from the last store in modification order, if any.
     fn read_from_last_store(
         &self,
         global: &DataRaceState,
@@ -282,7 +269,7 @@ impl<'tcx> StoreBuffer {
         is_seqcst: bool,
         rng: &mut (impl rand::Rng + ?Sized),
         validate: impl FnOnce() -> InterpResult<'tcx>,
-    ) -> InterpResult<'tcx, (Scalar, LoadRecency)> {
+    ) -> InterpResult<'tcx, (Option<Scalar>, LoadRecency)> {
         // Having a live borrow to store_buffer while calling validate_atomic_load is fine
         // because the race detector doesn't touch store_buffer
 
@@ -419,15 +406,15 @@ impl<'tcx> StoreBuffer {
             // In the language provided in the paper, an atomic store takes the value from a
             // non-atomic memory location.
             // But we already have the immediate value here so we don't need to do the memory
-            // access
-            val,
+            // access.
+            val: Some(val),
             is_seqcst,
             load_info: RefCell::new(LoadInfo::default()),
         };
-        self.buffer.push_back(store_elem);
-        if self.buffer.len() > STORE_BUFFER_LIMIT {
+        if self.buffer.len() >= STORE_BUFFER_LIMIT {
             self.buffer.pop_front();
         }
+        self.buffer.push_back(store_elem);
         if is_seqcst {
             // Every store that happens before this needs to be marked as SC
             // so that in a later SC load, only the last SC store (i.e. this one) or stores that
@@ -450,7 +437,12 @@ impl StoreElement {
     /// buffer regardless of subsequent loads by the same thread; if the earliest load of another
     /// thread doesn't happen before the current one, then no subsequent load by the other thread
     /// can happen before the current one.
-    fn load_impl(&self, index: VectorIdx, clocks: &ThreadClockSet, is_seqcst: bool) -> Scalar {
+    fn load_impl(
+        &self,
+        index: VectorIdx,
+        clocks: &ThreadClockSet,
+        is_seqcst: bool,
+    ) -> Option<Scalar> {
         let mut load_info = self.load_info.borrow_mut();
         load_info.sc_loaded |= is_seqcst;
         let _ = load_info.timestamps.try_insert(index, clocks.clock[index]);
@@ -479,7 +471,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 global.sc_write(threads);
             }
             let range = alloc_range(base_offset, place.layout.size);
-            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, init)?;
+            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Some(init))?;
             buffer.read_from_last_store(global, threads, atomic == AtomicRwOrd::SeqCst);
             buffer.buffered_write(new_val, global, threads, atomic == AtomicRwOrd::SeqCst)?;
         }
@@ -492,47 +484,55 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         atomic: AtomicReadOrd,
         latest_in_mo: Scalar,
         validate: impl FnOnce() -> InterpResult<'tcx>,
-    ) -> InterpResult<'tcx, Scalar> {
+    ) -> InterpResult<'tcx, Option<Scalar>> {
         let this = self.eval_context_ref();
-        if let Some(global) = &this.machine.data_race {
-            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?;
-            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
-                if atomic == AtomicReadOrd::SeqCst {
-                    global.sc_read(&this.machine.threads);
-                }
-                let mut rng = this.machine.rng.borrow_mut();
-                let buffer = alloc_buffers.get_or_create_store_buffer(
-                    alloc_range(base_offset, place.layout.size),
-                    latest_in_mo,
-                )?;
-                let (loaded, recency) = buffer.buffered_read(
-                    global,
-                    &this.machine.threads,
-                    atomic == AtomicReadOrd::SeqCst,
-                    &mut *rng,
-                    validate,
-                )?;
-                if global.track_outdated_loads && recency == LoadRecency::Outdated {
-                    this.emit_diagnostic(NonHaltingDiagnostic::WeakMemoryOutdatedLoad {
-                        ptr: place.ptr(),
-                    });
+        'fallback: {
+            if let Some(global) = &this.machine.data_race {
+                let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?;
+                if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
+                    if atomic == AtomicReadOrd::SeqCst {
+                        global.sc_read(&this.machine.threads);
+                    }
+                    let mut rng = this.machine.rng.borrow_mut();
+                    let Some(buffer) = alloc_buffers
+                        .get_store_buffer(alloc_range(base_offset, place.layout.size))?
+                    else {
+                        // No old writes available, fall back to base case.
+                        break 'fallback;
+                    };
+                    let (loaded, recency) = buffer.buffered_read(
+                        global,
+                        &this.machine.threads,
+                        atomic == AtomicReadOrd::SeqCst,
+                        &mut *rng,
+                        validate,
+                    )?;
+                    if global.track_outdated_loads && recency == LoadRecency::Outdated {
+                        this.emit_diagnostic(NonHaltingDiagnostic::WeakMemoryOutdatedLoad {
+                            ptr: place.ptr(),
+                        });
+                    }
+
+                    return Ok(loaded);
                 }
-
-                return Ok(loaded);
             }
         }
 
         // Race detector or weak memory disabled, simply read the latest value
         validate()?;
-        Ok(latest_in_mo)
+        Ok(Some(latest_in_mo))
     }
 
+    /// Add the given write to the store buffer. (Does not change machine memory.)
+    ///
+    /// `init` says with which value to initialize the store buffer in case there wasn't a store
+    /// buffer for this memory range before.
     fn buffered_atomic_write(
         &mut self,
         val: Scalar,
         dest: &MPlaceTy<'tcx>,
         atomic: AtomicWriteOrd,
-        init: Scalar,
+        init: Option<Scalar>,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr(), 0)?;
@@ -545,23 +545,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 global.sc_write(threads);
             }
 
-            // UGLY HACK: in write_scalar_atomic() we don't know the value before our write,
-            // so init == val always. If the buffer is fresh then we would've duplicated an entry,
-            // so we need to remove it.
-            // See https://github.com/rust-lang/miri/issues/2164
-            let was_empty = matches!(
-                alloc_buffers
-                    .store_buffers
-                    .borrow()
-                    .access_type(alloc_range(base_offset, dest.layout.size)),
-                AccessType::Empty(_)
-            );
             let buffer = alloc_buffers
                 .get_or_create_store_buffer_mut(alloc_range(base_offset, dest.layout.size), init)?;
-            if was_empty {
-                buffer.buffer.pop_front();
-            }
-
             buffer.buffered_write(val, global, threads, atomic == AtomicWriteOrd::SeqCst)?;
         }
 
@@ -576,7 +561,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         &self,
         place: &MPlaceTy<'tcx>,
         atomic: AtomicReadOrd,
-        init: Scalar,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
 
@@ -587,8 +571,12 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
             let size = place.layout.size;
             let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?;
             if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
-                let buffer = alloc_buffers
-                    .get_or_create_store_buffer(alloc_range(base_offset, size), init)?;
+                let Some(buffer) =
+                    alloc_buffers.get_store_buffer(alloc_range(base_offset, size))?
+                else {
+                    // No store buffer, nothing to do.
+                    return Ok(());
+                };
                 buffer.read_from_last_store(
                     global,
                     &this.machine.threads,
diff --git a/src/tools/miri/tests/fail/weak_memory/weak_uninit.rs b/src/tools/miri/tests/fail/weak_memory/weak_uninit.rs
new file mode 100644
index 0000000000000..54bea6c6908e3
--- /dev/null
+++ b/src/tools/miri/tests/fail/weak_memory/weak_uninit.rs
@@ -0,0 +1,43 @@
+//@compile-flags: -Zmiri-ignore-leaks -Zmiri-preemption-rate=0
+
+// Tests showing weak memory behaviours are exhibited. All tests
+// return true when the desired behaviour is seen.
+// This is scheduler and pseudo-RNG dependent, so each test is
+// run multiple times until one try returns true.
+// Spurious failure is possible, if you are really unlucky with
+// the RNG and always read the latest value from the store buffer.
+#![feature(new_uninit)]
+
+use std::sync::atomic::*;
+use std::thread::spawn;
+
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+// We can't create static items because we need to run each test multiple times.
+fn static_uninit_atomic() -> &'static AtomicUsize {
+    unsafe { Box::leak(Box::new_uninit()).assume_init_ref() }
+}
+
+fn relaxed() {
+    let x = static_uninit_atomic();
+    let j1 = spawn(move || {
+        x.store(1, Ordering::Relaxed);
+    });
+
+    let j2 = spawn(move || x.load(Ordering::Relaxed)); //~ERROR: using uninitialized data
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
+
+pub fn main() {
+    // If we try often enough, we should hit UB.
+    for _ in 0..100 {
+        relaxed();
+    }
+}
diff --git a/src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr b/src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr
new file mode 100644
index 0000000000000..9aa5bc2fa76df
--- /dev/null
+++ b/src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr
@@ -0,0 +1,15 @@
+error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
+  --> $DIR/weak_uninit.rs:LL:CC
+   |
+LL |     let j2 = spawn(move || x.load(Ordering::Relaxed));
+   |                            ^^^^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE on thread `unnamed-ID`:
+   = note: inside closure at $DIR/weak_uninit.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/pass/weak_memory/weak.rs b/src/tools/miri/tests/pass/weak_memory/weak.rs
index dac63eeeb0b24..1b5c98cd51826 100644
--- a/src/tools/miri/tests/pass/weak_memory/weak.rs
+++ b/src/tools/miri/tests/pass/weak_memory/weak.rs
@@ -18,11 +18,9 @@ struct EvilSend<T>(pub T);
 unsafe impl<T> Send for EvilSend<T> {}
 unsafe impl<T> Sync for EvilSend<T> {}
 
-// We can't create static items because we need to run each test
-// multiple times
+// We can't create static items because we need to run each test multiple times.
 fn static_atomic(val: usize) -> &'static AtomicUsize {
-    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
-    ret
+    Box::leak(Box::new(AtomicUsize::new(val)))
 }
 
 // Spins until it reads the given value
@@ -33,7 +31,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
     val
 }
 
-fn relaxed() -> bool {
+fn relaxed(initial_read: bool) -> bool {
     let x = static_atomic(0);
     let j1 = spawn(move || {
         x.store(1, Relaxed);
@@ -47,7 +45,9 @@ fn relaxed() -> bool {
     j1.join().unwrap();
     let r2 = j2.join().unwrap();
 
-    r2 == 1
+    // There are three possible values here: 0 (from the initial read), 1 (from the first relaxed
+    // read), and 2 (the last read). The last case is boring and we cover the other two.
+    r2 == if initial_read { 0 } else { 1 }
 }
 
 // https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf Figure 8
@@ -74,7 +74,6 @@ fn seq_cst() -> bool {
 
 fn initialization_write(add_fence: bool) -> bool {
     let x = static_atomic(11);
-    assert_eq!(x.load(Relaxed), 11); // work around https://github.com/rust-lang/miri/issues/2164
 
     let wait = static_atomic(0);
 
@@ -112,11 +111,8 @@ fn faa_replaced_by_load() -> bool {
     }
 
     let x = static_atomic(0);
-    assert_eq!(x.load(Relaxed), 0); // work around https://github.com/rust-lang/miri/issues/2164
     let y = static_atomic(0);
-    assert_eq!(y.load(Relaxed), 0); // work around https://github.com/rust-lang/miri/issues/2164
     let z = static_atomic(0);
-    assert_eq!(z.load(Relaxed), 0); // work around https://github.com/rust-lang/miri/issues/2164
 
     // Since each thread is so short, we need to make sure that they truely run at the same time
     // Otherwise t1 will finish before t2 even starts
@@ -146,7 +142,8 @@ fn assert_once(f: fn() -> bool) {
 }
 
 pub fn main() {
-    assert_once(relaxed);
+    assert_once(|| relaxed(false));
+    assert_once(|| relaxed(true));
     assert_once(seq_cst);
     assert_once(|| initialization_write(false));
     assert_once(|| initialization_write(true));