From 5d0211dc0307f41476768cc482d2df92c639c38e Mon Sep 17 00:00:00 2001 From: joboet Date: Sun, 4 Sep 2022 18:47:59 +0200 Subject: [PATCH] std: use futex in `Once` --- library/std/src/sync/once.rs | 312 ++------------------- library/std/src/sys_common/mod.rs | 1 + library/std/src/sys_common/once/futex.rs | 134 +++++++++ library/std/src/sys_common/once/generic.rs | 282 +++++++++++++++++++ library/std/src/sys_common/once/mod.rs | 43 +++ 5 files changed, 483 insertions(+), 289 deletions(-) create mode 100644 library/std/src/sys_common/once/futex.rs create mode 100644 library/std/src/sys_common/once/generic.rs create mode 100644 library/std/src/sys_common/once/mod.rs diff --git a/library/std/src/sync/once.rs b/library/std/src/sync/once.rs index a7feea588598c..0f25417d6b5d0 100644 --- a/library/std/src/sync/once.rs +++ b/library/std/src/sync/once.rs @@ -3,99 +3,12 @@ //! This primitive is meant to be used to run one-time initialization. An //! example use case would be for initializing an FFI library. -// A "once" is a relatively simple primitive, and it's also typically provided -// by the OS as well (see `pthread_once` or `InitOnceExecuteOnce`). The OS -// primitives, however, tend to have surprising restrictions, such as the Unix -// one doesn't allow an argument to be passed to the function. -// -// As a result, we end up implementing it ourselves in the standard library. -// This also gives us the opportunity to optimize the implementation a bit which -// should help the fast path on call sites. Consequently, let's explain how this -// primitive works now! -// -// So to recap, the guarantees of a Once are that it will call the -// initialization closure at most once, and it will never return until the one -// that's running has finished running. This means that we need some form of -// blocking here while the custom callback is running at the very least. -// Additionally, we add on the restriction of **poisoning**. Whenever an -// initialization closure panics, the Once enters a "poisoned" state which means -// that all future calls will immediately panic as well. -// -// So to implement this, one might first reach for a `Mutex`, but those cannot -// be put into a `static`. It also gets a lot harder with poisoning to figure -// out when the mutex needs to be deallocated because it's not after the closure -// finishes, but after the first successful closure finishes. -// -// All in all, this is instead implemented with atomics and lock-free -// operations! Whee! Each `Once` has one word of atomic state, and this state is -// CAS'd on to determine what to do. There are four possible state of a `Once`: -// -// * Incomplete - no initialization has run yet, and no thread is currently -// using the Once. -// * Poisoned - some thread has previously attempted to initialize the Once, but -// it panicked, so the Once is now poisoned. There are no other -// threads currently accessing this Once. -// * Running - some thread is currently attempting to run initialization. It may -// succeed, so all future threads need to wait for it to finish. -// Note that this state is accompanied with a payload, described -// below. -// * Complete - initialization has completed and all future calls should finish -// immediately. -// -// With 4 states we need 2 bits to encode this, and we use the remaining bits -// in the word we have allocated as a queue of threads waiting for the thread -// responsible for entering the RUNNING state. This queue is just a linked list -// of Waiter nodes which is monotonically increasing in size. Each node is -// allocated on the stack, and whenever the running closure finishes it will -// consume the entire queue and notify all waiters they should try again. -// -// You'll find a few more details in the implementation, but that's the gist of -// it! -// -// Atomic orderings: -// When running `Once` we deal with multiple atomics: -// `Once.state_and_queue` and an unknown number of `Waiter.signaled`. -// * `state_and_queue` is used (1) as a state flag, (2) for synchronizing the -// result of the `Once`, and (3) for synchronizing `Waiter` nodes. -// - At the end of the `call_inner` function we have to make sure the result -// of the `Once` is acquired. So every load which can be the only one to -// load COMPLETED must have at least Acquire ordering, which means all -// three of them. -// - `WaiterQueue::Drop` is the only place that may store COMPLETED, and -// must do so with Release ordering to make the result available. -// - `wait` inserts `Waiter` nodes as a pointer in `state_and_queue`, and -// needs to make the nodes available with Release ordering. The load in -// its `compare_exchange` can be Relaxed because it only has to compare -// the atomic, not to read other data. -// - `WaiterQueue::Drop` must see the `Waiter` nodes, so it must load -// `state_and_queue` with Acquire ordering. -// - There is just one store where `state_and_queue` is used only as a -// state flag, without having to synchronize data: switching the state -// from INCOMPLETE to RUNNING in `call_inner`. This store can be Relaxed, -// but the read has to be Acquire because of the requirements mentioned -// above. -// * `Waiter.signaled` is both used as a flag, and to protect a field with -// interior mutability in `Waiter`. `Waiter.thread` is changed in -// `WaiterQueue::Drop` which then sets `signaled` with Release ordering. -// After `wait` loads `signaled` with Acquire and sees it is true, it needs to -// see the changes to drop the `Waiter` struct correctly. -// * There is one place where the two atomics `Once.state_and_queue` and -// `Waiter.signaled` come together, and might be reordered by the compiler or -// processor. Because both use Acquire ordering such a reordering is not -// allowed, so no need for SeqCst. - #[cfg(all(test, not(target_os = "emscripten")))] mod tests; -use crate::cell::Cell; use crate::fmt; -use crate::marker; use crate::panic::{RefUnwindSafe, UnwindSafe}; -use crate::ptr; -use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; -use crate::thread::{self, Thread}; - -type Masked = (); +use crate::sys_common::once as sys; /// A synchronization primitive which can be used to run a one-time global /// initialization. Useful for one-time initialization for FFI or related @@ -114,19 +27,9 @@ type Masked = (); /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub struct Once { - // `state_and_queue` is actually a pointer to a `Waiter` with extra state - // bits, so we add the `PhantomData` appropriately. - state_and_queue: AtomicPtr, - _marker: marker::PhantomData<*const Waiter>, + inner: sys::Once, } -// The `PhantomData` of a raw pointer removes these two auto traits, but we -// enforce both below in the implementation so this should be safe to add. -#[stable(feature = "rust1", since = "1.0.0")] -unsafe impl Sync for Once {} -#[stable(feature = "rust1", since = "1.0.0")] -unsafe impl Send for Once {} - #[stable(feature = "sync_once_unwind_safe", since = "1.59.0")] impl UnwindSafe for Once {} @@ -136,10 +39,8 @@ impl RefUnwindSafe for Once {} /// State yielded to [`Once::call_once_force()`]’s closure parameter. The state /// can be used to query the poison status of the [`Once`]. #[stable(feature = "once_poison", since = "1.51.0")] -#[derive(Debug)] pub struct OnceState { - poisoned: bool, - set_state_on_drop_to: Cell<*mut Masked>, + pub(crate) inner: sys::OnceState, } /// Initialization value for static [`Once`] values. @@ -159,38 +60,6 @@ pub struct OnceState { )] pub const ONCE_INIT: Once = Once::new(); -// Four states that a Once can be in, encoded into the lower bits of -// `state_and_queue` in the Once structure. -const INCOMPLETE: usize = 0x0; -const POISONED: usize = 0x1; -const RUNNING: usize = 0x2; -const COMPLETE: usize = 0x3; - -// Mask to learn about the state. All other bits are the queue of waiters if -// this is in the RUNNING state. -const STATE_MASK: usize = 0x3; - -// Representation of a node in the linked list of waiters, used while in the -// RUNNING state. -// Note: `Waiter` can't hold a mutable pointer to the next thread, because then -// `wait` would both hand out a mutable reference to its `Waiter` node, and keep -// a shared reference to check `signaled`. Instead we hold shared references and -// use interior mutability. -#[repr(align(4))] // Ensure the two lower bits are free to use as state bits. -struct Waiter { - thread: Cell>, - signaled: AtomicBool, - next: *const Waiter, -} - -// Head of a linked list of waiters. -// Every node is a struct on the stack of a waiting thread. -// Will wake up the waiters when it gets dropped, i.e. also on panic. -struct WaiterQueue<'a> { - state_and_queue: &'a AtomicPtr, - set_state_on_drop_to: *mut Masked, -} - impl Once { /// Creates a new `Once` value. #[inline] @@ -198,10 +67,7 @@ impl Once { #[rustc_const_stable(feature = "const_once_new", since = "1.32.0")] #[must_use] pub const fn new() -> Once { - Once { - state_and_queue: AtomicPtr::new(ptr::invalid_mut(INCOMPLETE)), - _marker: marker::PhantomData, - } + Once { inner: sys::Once::new() } } /// Performs an initialization routine once and only once. The given closure @@ -261,6 +127,7 @@ impl Once { /// This is similar to [poisoning with mutexes][poison]. /// /// [poison]: struct.Mutex.html#poisoning + #[inline] #[stable(feature = "rust1", since = "1.0.0")] #[track_caller] pub fn call_once(&self, f: F) @@ -268,12 +135,12 @@ impl Once { F: FnOnce(), { // Fast path check - if self.is_completed() { + if self.inner.is_completed() { return; } let mut f = Some(f); - self.call_inner(false, &mut |_| f.take().unwrap()()); + self.inner.call(false, &mut |_| f.take().unwrap()()); } /// Performs the same function as [`call_once()`] except ignores poisoning. @@ -320,18 +187,19 @@ impl Once { /// // once any success happens, we stop propagating the poison /// INIT.call_once(|| {}); /// ``` + #[inline] #[stable(feature = "once_poison", since = "1.51.0")] pub fn call_once_force(&self, f: F) where F: FnOnce(&OnceState), { // Fast path check - if self.is_completed() { + if self.inner.is_completed() { return; } let mut f = Some(f); - self.call_inner(true, &mut |p| f.take().unwrap()(p)); + self.inner.call(true, &mut |p| f.take().unwrap()(p)); } /// Returns `true` if some [`call_once()`] call has completed @@ -378,119 +246,7 @@ impl Once { #[stable(feature = "once_is_completed", since = "1.43.0")] #[inline] pub fn is_completed(&self) -> bool { - // An `Acquire` load is enough because that makes all the initialization - // operations visible to us, and, this being a fast path, weaker - // ordering helps with performance. This `Acquire` synchronizes with - // `Release` operations on the slow path. - self.state_and_queue.load(Ordering::Acquire).addr() == COMPLETE - } - - // This is a non-generic function to reduce the monomorphization cost of - // using `call_once` (this isn't exactly a trivial or small implementation). - // - // Additionally, this is tagged with `#[cold]` as it should indeed be cold - // and it helps let LLVM know that calls to this function should be off the - // fast path. Essentially, this should help generate more straight line code - // in LLVM. - // - // Finally, this takes an `FnMut` instead of a `FnOnce` because there's - // currently no way to take an `FnOnce` and call it via virtual dispatch - // without some allocation overhead. - #[cold] - #[track_caller] - fn call_inner(&self, ignore_poisoning: bool, init: &mut dyn FnMut(&OnceState)) { - let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire); - loop { - match state_and_queue.addr() { - COMPLETE => break, - POISONED if !ignore_poisoning => { - // Panic to propagate the poison. - panic!("Once instance has previously been poisoned"); - } - POISONED | INCOMPLETE => { - // Try to register this thread as the one RUNNING. - let exchange_result = self.state_and_queue.compare_exchange( - state_and_queue, - ptr::invalid_mut(RUNNING), - Ordering::Acquire, - Ordering::Acquire, - ); - if let Err(old) = exchange_result { - state_and_queue = old; - continue; - } - // `waiter_queue` will manage other waiting threads, and - // wake them up on drop. - let mut waiter_queue = WaiterQueue { - state_and_queue: &self.state_and_queue, - set_state_on_drop_to: ptr::invalid_mut(POISONED), - }; - // Run the initialization function, letting it know if we're - // poisoned or not. - let init_state = OnceState { - poisoned: state_and_queue.addr() == POISONED, - set_state_on_drop_to: Cell::new(ptr::invalid_mut(COMPLETE)), - }; - init(&init_state); - waiter_queue.set_state_on_drop_to = init_state.set_state_on_drop_to.get(); - break; - } - _ => { - // All other values must be RUNNING with possibly a - // pointer to the waiter queue in the more significant bits. - assert!(state_and_queue.addr() & STATE_MASK == RUNNING); - wait(&self.state_and_queue, state_and_queue); - state_and_queue = self.state_and_queue.load(Ordering::Acquire); - } - } - } - } -} - -fn wait(state_and_queue: &AtomicPtr, mut current_state: *mut Masked) { - // Note: the following code was carefully written to avoid creating a - // mutable reference to `node` that gets aliased. - loop { - // Don't queue this thread if the status is no longer running, - // otherwise we will not be woken up. - if current_state.addr() & STATE_MASK != RUNNING { - return; - } - - // Create the node for our current thread. - let node = Waiter { - thread: Cell::new(Some(thread::current())), - signaled: AtomicBool::new(false), - next: current_state.with_addr(current_state.addr() & !STATE_MASK) as *const Waiter, - }; - let me = &node as *const Waiter as *const Masked as *mut Masked; - - // Try to slide in the node at the head of the linked list, making sure - // that another thread didn't just replace the head of the linked list. - let exchange_result = state_and_queue.compare_exchange( - current_state, - me.with_addr(me.addr() | RUNNING), - Ordering::Release, - Ordering::Relaxed, - ); - if let Err(old) = exchange_result { - current_state = old; - continue; - } - - // We have enqueued ourselves, now lets wait. - // It is important not to return before being signaled, otherwise we - // would drop our `Waiter` node and leave a hole in the linked list - // (and a dangling reference). Guard against spurious wakeups by - // reparking ourselves until we are signaled. - while !node.signaled.load(Ordering::Acquire) { - // If the managing thread happens to signal and unpark us before we - // can park ourselves, the result could be this thread never gets - // unparked. Luckily `park` comes with the guarantee that if it got - // an `unpark` just before on an unparked thread it does not park. - thread::park(); - } - break; + self.inner.is_completed() } } @@ -501,37 +257,6 @@ impl fmt::Debug for Once { } } -impl Drop for WaiterQueue<'_> { - fn drop(&mut self) { - // Swap out our state with however we finished. - let state_and_queue = - self.state_and_queue.swap(self.set_state_on_drop_to, Ordering::AcqRel); - - // We should only ever see an old state which was RUNNING. - assert_eq!(state_and_queue.addr() & STATE_MASK, RUNNING); - - // Walk the entire linked list of waiters and wake them up (in lifo - // order, last to register is first to wake up). - unsafe { - // Right after setting `node.signaled = true` the other thread may - // free `node` if there happens to be has a spurious wakeup. - // So we have to take out the `thread` field and copy the pointer to - // `next` first. - let mut queue = - state_and_queue.with_addr(state_and_queue.addr() & !STATE_MASK) as *const Waiter; - while !queue.is_null() { - let next = (*queue).next; - let thread = (*queue).thread.take().unwrap(); - (*queue).signaled.store(true, Ordering::Release); - // ^- FIXME (maybe): This is another case of issue #55005 - // `store()` has a potentially dangling ref to `signaled`. - queue = next; - thread.unpark(); - } - } - } -} - impl OnceState { /// Returns `true` if the associated [`Once`] was poisoned prior to the /// invocation of the closure passed to [`Once::call_once_force()`]. @@ -568,13 +293,22 @@ impl OnceState { /// assert!(!state.is_poisoned()); /// }); #[stable(feature = "once_poison", since = "1.51.0")] + #[inline] pub fn is_poisoned(&self) -> bool { - self.poisoned + self.inner.is_poisoned() } /// Poison the associated [`Once`] without explicitly panicking. - // NOTE: This is currently only exposed for the `lazy` module + // NOTE: This is currently only exposed for `OnceLock`. + #[inline] pub(crate) fn poison(&self) { - self.set_state_on_drop_to.set(ptr::invalid_mut(POISONED)); + self.inner.poison(); + } +} + +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for OnceState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("OnceState").field("poisoned", &self.is_poisoned()).finish() } } diff --git a/library/std/src/sys_common/mod.rs b/library/std/src/sys_common/mod.rs index 80f56bf7522b6..e4dd0253668b8 100644 --- a/library/std/src/sys_common/mod.rs +++ b/library/std/src/sys_common/mod.rs @@ -27,6 +27,7 @@ pub mod io; pub mod lazy_box; pub mod memchr; pub mod mutex; +pub mod once; pub mod process; pub mod remutex; pub mod rwlock; diff --git a/library/std/src/sys_common/once/futex.rs b/library/std/src/sys_common/once/futex.rs new file mode 100644 index 0000000000000..5c7e6c013715d --- /dev/null +++ b/library/std/src/sys_common/once/futex.rs @@ -0,0 +1,134 @@ +use crate::cell::Cell; +use crate::sync as public; +use crate::sync::atomic::{ + AtomicU32, + Ordering::{Acquire, Relaxed, Release}, +}; +use crate::sys::futex::{futex_wait, futex_wake_all}; + +// On some platforms, the OS is very nice and handles the waiter queue for us. +// This means we only need one atomic value with 5 states: + +/// No initialization has run yet, and no thread is currently using the Once. +const INCOMPLETE: u32 = 0; +/// Some thread has previously attempted to initialize the Once, but it panicked, +/// so the Once is now poisoned. There are no other threads currently accessing +/// this Once. +const POISONED: u32 = 1; +/// Some thread is currently attempting to run initialization. It may succeed, +/// so all future threads need to wait for it to finish. +const RUNNING: u32 = 2; +/// Some thread is currently attempting to run initialization and there are threads +/// waiting for it to finish. +const QUEUED: u32 = 3; +/// Initialization has completed and all future calls should finish immediately. +const COMPLETE: u32 = 4; + +// Threads wait by setting the state to QUEUED and calling `futex_wait` on the state +// variable. When the running thread finishes, it will wake all waiting threads using +// `futex_wake_all`. + +pub struct OnceState { + poisoned: bool, + set_state_to: Cell, +} + +impl OnceState { + #[inline] + pub fn is_poisoned(&self) -> bool { + self.poisoned + } + + #[inline] + pub fn poison(&self) { + self.set_state_to.set(POISONED); + } +} + +struct CompletionGuard<'a> { + state: &'a AtomicU32, + set_state_on_drop_to: u32, +} + +impl<'a> Drop for CompletionGuard<'a> { + fn drop(&mut self) { + // Use release ordering to propagate changes to all threads checking + // up on the Once. `futex_wake_all` does its own synchronization, hence + // we do not need `AcqRel`. + if self.state.swap(self.set_state_on_drop_to, Release) == QUEUED { + futex_wake_all(&self.state); + } + } +} + +pub struct Once { + state: AtomicU32, +} + +impl Once { + #[inline] + pub const fn new() -> Once { + Once { state: AtomicU32::new(INCOMPLETE) } + } + + #[inline] + pub fn is_completed(&self) -> bool { + // Use acquire ordering to make all initialization changes visible to the + // current thread. + self.state.load(Acquire) == COMPLETE + } + + // This uses FnMut to match the API of the generic implementation. As this + // implementation is quite light-weight, it is generic over the closure and + // so avoids the cost of dynamic dispatch. + #[cold] + #[track_caller] + pub fn call(&self, ignore_poisoning: bool, f: &mut impl FnMut(&public::OnceState)) { + let mut state = self.state.load(Acquire); + loop { + match state { + POISONED if !ignore_poisoning => { + // Panic to propagate the poison. + panic!("Once instance has previously been poisoned"); + } + INCOMPLETE | POISONED => { + // Try to register the current thread as the one running. + if let Err(new) = + self.state.compare_exchange_weak(state, RUNNING, Acquire, Acquire) + { + state = new; + continue; + } + // `waiter_queue` will manage other waiting threads, and + // wake them up on drop. + let mut waiter_queue = + CompletionGuard { state: &self.state, set_state_on_drop_to: POISONED }; + // Run the function, letting it know if we're poisoned or not. + let f_state = public::OnceState { + inner: OnceState { + poisoned: state == POISONED, + set_state_to: Cell::new(COMPLETE), + }, + }; + f(&f_state); + waiter_queue.set_state_on_drop_to = f_state.inner.set_state_to.get(); + return; + } + RUNNING | QUEUED => { + // Set the state to QUEUED if it is not already. + if state == RUNNING + && let Err(new) = self.state.compare_exchange_weak(RUNNING, QUEUED, Relaxed, Acquire) + { + state = new; + continue; + } + + futex_wait(&self.state, QUEUED, None); + state = self.state.load(Acquire); + } + COMPLETE => return, + _ => unreachable!("state is never set to invalid values"), + } + } + } +} diff --git a/library/std/src/sys_common/once/generic.rs b/library/std/src/sys_common/once/generic.rs new file mode 100644 index 0000000000000..acf5f247164a7 --- /dev/null +++ b/library/std/src/sys_common/once/generic.rs @@ -0,0 +1,282 @@ +// Each `Once` has one word of atomic state, and this state is CAS'd on to +// determine what to do. There are four possible state of a `Once`: +// +// * Incomplete - no initialization has run yet, and no thread is currently +// using the Once. +// * Poisoned - some thread has previously attempted to initialize the Once, but +// it panicked, so the Once is now poisoned. There are no other +// threads currently accessing this Once. +// * Running - some thread is currently attempting to run initialization. It may +// succeed, so all future threads need to wait for it to finish. +// Note that this state is accompanied with a payload, described +// below. +// * Complete - initialization has completed and all future calls should finish +// immediately. +// +// With 4 states we need 2 bits to encode this, and we use the remaining bits +// in the word we have allocated as a queue of threads waiting for the thread +// responsible for entering the RUNNING state. This queue is just a linked list +// of Waiter nodes which is monotonically increasing in size. Each node is +// allocated on the stack, and whenever the running closure finishes it will +// consume the entire queue and notify all waiters they should try again. +// +// You'll find a few more details in the implementation, but that's the gist of +// it! +// +// Atomic orderings: +// When running `Once` we deal with multiple atomics: +// `Once.state_and_queue` and an unknown number of `Waiter.signaled`. +// * `state_and_queue` is used (1) as a state flag, (2) for synchronizing the +// result of the `Once`, and (3) for synchronizing `Waiter` nodes. +// - At the end of the `call` function we have to make sure the result +// of the `Once` is acquired. So every load which can be the only one to +// load COMPLETED must have at least acquire ordering, which means all +// three of them. +// - `WaiterQueue::drop` is the only place that may store COMPLETED, and +// must do so with release ordering to make the result available. +// - `wait` inserts `Waiter` nodes as a pointer in `state_and_queue`, and +// needs to make the nodes available with release ordering. The load in +// its `compare_exchange` can be relaxed because it only has to compare +// the atomic, not to read other data. +// - `WaiterQueue::drop` must see the `Waiter` nodes, so it must load +// `state_and_queue` with acquire ordering. +// - There is just one store where `state_and_queue` is used only as a +// state flag, without having to synchronize data: switching the state +// from INCOMPLETE to RUNNING in `call`. This store can be Relaxed, +// but the read has to be Acquire because of the requirements mentioned +// above. +// * `Waiter.signaled` is both used as a flag, and to protect a field with +// interior mutability in `Waiter`. `Waiter.thread` is changed in +// `WaiterQueue::drop` which then sets `signaled` with release ordering. +// After `wait` loads `signaled` with acquire ordering and sees it is true, +// it needs to see the changes to drop the `Waiter` struct correctly. +// * There is one place where the two atomics `Once.state_and_queue` and +// `Waiter.signaled` come together, and might be reordered by the compiler or +// processor. Because both use acquire ordering such a reordering is not +// allowed, so no need for `SeqCst`. + +use crate::cell::Cell; +use crate::fmt; +use crate::ptr; +use crate::sync as public; +use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; +use crate::thread::{self, Thread}; + +type Masked = (); + +pub struct Once { + state_and_queue: AtomicPtr, +} + +pub struct OnceState { + poisoned: bool, + set_state_on_drop_to: Cell<*mut Masked>, +} + +// Four states that a Once can be in, encoded into the lower bits of +// `state_and_queue` in the Once structure. +const INCOMPLETE: usize = 0x0; +const POISONED: usize = 0x1; +const RUNNING: usize = 0x2; +const COMPLETE: usize = 0x3; + +// Mask to learn about the state. All other bits are the queue of waiters if +// this is in the RUNNING state. +const STATE_MASK: usize = 0x3; + +// Representation of a node in the linked list of waiters, used while in the +// RUNNING state. +// Note: `Waiter` can't hold a mutable pointer to the next thread, because then +// `wait` would both hand out a mutable reference to its `Waiter` node, and keep +// a shared reference to check `signaled`. Instead we hold shared references and +// use interior mutability. +#[repr(align(4))] // Ensure the two lower bits are free to use as state bits. +struct Waiter { + thread: Cell>, + signaled: AtomicBool, + next: *const Waiter, +} + +// Head of a linked list of waiters. +// Every node is a struct on the stack of a waiting thread. +// Will wake up the waiters when it gets dropped, i.e. also on panic. +struct WaiterQueue<'a> { + state_and_queue: &'a AtomicPtr, + set_state_on_drop_to: *mut Masked, +} + +impl Once { + #[inline] + pub const fn new() -> Once { + Once { state_and_queue: AtomicPtr::new(ptr::invalid_mut(INCOMPLETE)) } + } + + #[inline] + pub fn is_completed(&self) -> bool { + // An `Acquire` load is enough because that makes all the initialization + // operations visible to us, and, this being a fast path, weaker + // ordering helps with performance. This `Acquire` synchronizes with + // `Release` operations on the slow path. + self.state_and_queue.load(Ordering::Acquire).addr() == COMPLETE + } + + // This is a non-generic function to reduce the monomorphization cost of + // using `call_once` (this isn't exactly a trivial or small implementation). + // + // Additionally, this is tagged with `#[cold]` as it should indeed be cold + // and it helps let LLVM know that calls to this function should be off the + // fast path. Essentially, this should help generate more straight line code + // in LLVM. + // + // Finally, this takes an `FnMut` instead of a `FnOnce` because there's + // currently no way to take an `FnOnce` and call it via virtual dispatch + // without some allocation overhead. + #[cold] + #[track_caller] + pub fn call(&self, ignore_poisoning: bool, init: &mut dyn FnMut(&public::OnceState)) { + let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire); + loop { + match state_and_queue.addr() { + COMPLETE => break, + POISONED if !ignore_poisoning => { + // Panic to propagate the poison. + panic!("Once instance has previously been poisoned"); + } + POISONED | INCOMPLETE => { + // Try to register this thread as the one RUNNING. + let exchange_result = self.state_and_queue.compare_exchange( + state_and_queue, + ptr::invalid_mut(RUNNING), + Ordering::Acquire, + Ordering::Acquire, + ); + if let Err(old) = exchange_result { + state_and_queue = old; + continue; + } + // `waiter_queue` will manage other waiting threads, and + // wake them up on drop. + let mut waiter_queue = WaiterQueue { + state_and_queue: &self.state_and_queue, + set_state_on_drop_to: ptr::invalid_mut(POISONED), + }; + // Run the initialization function, letting it know if we're + // poisoned or not. + let init_state = public::OnceState { + inner: OnceState { + poisoned: state_and_queue.addr() == POISONED, + set_state_on_drop_to: Cell::new(ptr::invalid_mut(COMPLETE)), + }, + }; + init(&init_state); + waiter_queue.set_state_on_drop_to = init_state.inner.set_state_on_drop_to.get(); + break; + } + _ => { + // All other values must be RUNNING with possibly a + // pointer to the waiter queue in the more significant bits. + assert!(state_and_queue.addr() & STATE_MASK == RUNNING); + wait(&self.state_and_queue, state_and_queue); + state_and_queue = self.state_and_queue.load(Ordering::Acquire); + } + } + } + } +} + +fn wait(state_and_queue: &AtomicPtr, mut current_state: *mut Masked) { + // Note: the following code was carefully written to avoid creating a + // mutable reference to `node` that gets aliased. + loop { + // Don't queue this thread if the status is no longer running, + // otherwise we will not be woken up. + if current_state.addr() & STATE_MASK != RUNNING { + return; + } + + // Create the node for our current thread. + let node = Waiter { + thread: Cell::new(Some(thread::current())), + signaled: AtomicBool::new(false), + next: current_state.with_addr(current_state.addr() & !STATE_MASK) as *const Waiter, + }; + let me = &node as *const Waiter as *const Masked as *mut Masked; + + // Try to slide in the node at the head of the linked list, making sure + // that another thread didn't just replace the head of the linked list. + let exchange_result = state_and_queue.compare_exchange( + current_state, + me.with_addr(me.addr() | RUNNING), + Ordering::Release, + Ordering::Relaxed, + ); + if let Err(old) = exchange_result { + current_state = old; + continue; + } + + // We have enqueued ourselves, now lets wait. + // It is important not to return before being signaled, otherwise we + // would drop our `Waiter` node and leave a hole in the linked list + // (and a dangling reference). Guard against spurious wakeups by + // reparking ourselves until we are signaled. + while !node.signaled.load(Ordering::Acquire) { + // If the managing thread happens to signal and unpark us before we + // can park ourselves, the result could be this thread never gets + // unparked. Luckily `park` comes with the guarantee that if it got + // an `unpark` just before on an unparked thread it does not park. + thread::park(); + } + break; + } +} + +#[stable(feature = "std_debug", since = "1.16.0")] +impl fmt::Debug for Once { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Once").finish_non_exhaustive() + } +} + +impl Drop for WaiterQueue<'_> { + fn drop(&mut self) { + // Swap out our state with however we finished. + let state_and_queue = + self.state_and_queue.swap(self.set_state_on_drop_to, Ordering::AcqRel); + + // We should only ever see an old state which was RUNNING. + assert_eq!(state_and_queue.addr() & STATE_MASK, RUNNING); + + // Walk the entire linked list of waiters and wake them up (in lifo + // order, last to register is first to wake up). + unsafe { + // Right after setting `node.signaled = true` the other thread may + // free `node` if there happens to be has a spurious wakeup. + // So we have to take out the `thread` field and copy the pointer to + // `next` first. + let mut queue = + state_and_queue.with_addr(state_and_queue.addr() & !STATE_MASK) as *const Waiter; + while !queue.is_null() { + let next = (*queue).next; + let thread = (*queue).thread.take().unwrap(); + (*queue).signaled.store(true, Ordering::Release); + // ^- FIXME (maybe): This is another case of issue #55005 + // `store()` has a potentially dangling ref to `signaled`. + queue = next; + thread.unpark(); + } + } + } +} + +impl OnceState { + #[inline] + pub fn is_poisoned(&self) -> bool { + self.poisoned + } + + #[inline] + pub fn poison(&self) { + self.set_state_on_drop_to.set(ptr::invalid_mut(POISONED)); + } +} diff --git a/library/std/src/sys_common/once/mod.rs b/library/std/src/sys_common/once/mod.rs new file mode 100644 index 0000000000000..8742e68cc7ac5 --- /dev/null +++ b/library/std/src/sys_common/once/mod.rs @@ -0,0 +1,43 @@ +// A "once" is a relatively simple primitive, and it's also typically provided +// by the OS as well (see `pthread_once` or `InitOnceExecuteOnce`). The OS +// primitives, however, tend to have surprising restrictions, such as the Unix +// one doesn't allow an argument to be passed to the function. +// +// As a result, we end up implementing it ourselves in the standard library. +// This also gives us the opportunity to optimize the implementation a bit which +// should help the fast path on call sites. +// +// So to recap, the guarantees of a Once are that it will call the +// initialization closure at most once, and it will never return until the one +// that's running has finished running. This means that we need some form of +// blocking here while the custom callback is running at the very least. +// Additionally, we add on the restriction of **poisoning**. Whenever an +// initialization closure panics, the Once enters a "poisoned" state which means +// that all future calls will immediately panic as well. +// +// So to implement this, one might first reach for a `Mutex`, but those cannot +// be put into a `static`. It also gets a lot harder with poisoning to figure +// out when the mutex needs to be deallocated because it's not after the closure +// finishes, but after the first successful closure finishes. +// +// All in all, this is instead implemented with atomics and lock-free +// operations! Whee! + +cfg_if::cfg_if! { + if #[cfg(any( + target_os = "linux", + target_os = "android", + all(target_arch = "wasm32", target_feature = "atomics"), + target_os = "freebsd", + target_os = "openbsd", + target_os = "dragonfly", + target_os = "fuchsia", + target_os = "hermit", + ))] { + mod futex; + pub use futex::{Once, OnceState}; + } else { + mod generic; + pub use generic::{Once, OnceState}; + } +}