From 897693bea0f47bff2b54c5e7ba086ed5034051fe Mon Sep 17 00:00:00 2001 From: ksolana <110843012+ksolana@users.noreply.github.com> Date: Mon, 30 Sep 2024 21:00:19 -0700 Subject: [PATCH] mpsc ringbuffer adapted from crossbeam::ArrayQueue --- poh/src/lib.rs | 2 + poh/src/mpsc_ringbuffer.rs | 810 +++++++++++++++++++++++++++++++++++++ 2 files changed, 812 insertions(+) create mode 100644 poh/src/mpsc_ringbuffer.rs diff --git a/poh/src/lib.rs b/poh/src/lib.rs index ff60171d7a8d89..c9925caf02b407 100644 --- a/poh/src/lib.rs +++ b/poh/src/lib.rs @@ -3,6 +3,8 @@ pub mod leader_bank_notifier; pub mod poh_recorder; pub mod poh_service; +mod mpsc_ringbuffer; + #[macro_use] extern crate solana_metrics; diff --git a/poh/src/mpsc_ringbuffer.rs b/poh/src/mpsc_ringbuffer.rs new file mode 100644 index 00000000000000..26dbe44769ebc0 --- /dev/null +++ b/poh/src/mpsc_ringbuffer.rs @@ -0,0 +1,810 @@ +//! The implementation is based on Dmitry Vyukov's bounded MPMC queue. +//! +//! Source: +//! - +/// Adapted from https://github.com/crossbeam-rs/crossbeam.git +/// Customized to Multiple producer single consumer ring buffer +/// - Fixed capacity +/// - Fixed number of producers + +use std::boxed::Box; +use core::cell::UnsafeCell; +use core::fmt; +use core::mem::{self, MaybeUninit}; +use core::panic::{RefUnwindSafe, UnwindSafe}; +use core::sync::atomic::{self, AtomicUsize, Ordering}; +use core::cell::Cell; + +const SPIN_LIMIT: u32 = 6; +const YIELD_LIMIT: u32 = 10; + +/// Performs exponential backoff in spin loops. +/// +/// Backing off in spin loops reduces contention and improves overall performance. +/// +/// This primitive can execute *YIELD* and *PAUSE* instructions, yield the current thread to the OS +/// scheduler, and tell when is a good time to block the thread using a different synchronization +/// mechanism. Each step of the back off procedure takes roughly twice as long as the previous +/// step. +/// +/// # Examples +/// +/// Backing off in a lock-free loop: +/// +/// ``` +/// use std::sync::atomic::AtomicUsize; +/// use std::sync::atomic::Ordering::SeqCst; +/// +/// fn fetch_mul(a: &AtomicUsize, b: usize) -> usize { +/// let backoff = Backoff::new(); +/// loop { +/// let val = a.load(SeqCst); +/// if a.compare_exchange(val, val.wrapping_mul(b), SeqCst, SeqCst).is_ok() { +/// return val; +/// } +/// backoff.spin(); +/// } +/// } +/// ``` +/// +/// Waiting for an [`AtomicBool`] to become `true`: +/// +/// ``` +/// use std::sync::atomic::AtomicBool; +/// use std::sync::atomic::Ordering::SeqCst; +/// +/// fn spin_wait(ready: &AtomicBool) { +/// let backoff = Backoff::new(); +/// while !ready.load(SeqCst) { +/// backoff.snooze(); +/// } +/// } +/// ``` +/// +/// Waiting for an [`AtomicBool`] to become `true` and parking the thread after a long wait. +/// Note that whoever sets the atomic variable to `true` must notify the parked thread by calling +/// [`unpark()`]: +/// +/// ``` +/// use std::sync::atomic::AtomicBool; +/// use std::sync::atomic::Ordering::SeqCst; +/// use std::thread; +/// +/// fn blocking_wait(ready: &AtomicBool) { +/// let backoff = Backoff::new(); +/// while !ready.load(SeqCst) { +/// if backoff.is_completed() { +/// thread::park(); +/// } else { +/// backoff.snooze(); +/// } +/// } +/// } +/// ``` +/// +/// [`is_completed`]: Backoff::is_completed +/// [`std::thread::park()`]: std::thread::park +/// [`Condvar`]: std::sync::Condvar +/// [`AtomicBool`]: std::sync::atomic::AtomicBool +/// [`unpark()`]: std::thread::Thread::unpark +pub struct Backoff { + step: Cell, +} + +impl Backoff { + /// Creates a new `Backoff`. + /// + /// # Examples + /// + /// ``` + /// + /// let backoff = Backoff::new(); + /// ``` + #[inline] + pub fn new() -> Self { + Self { step: Cell::new(0) } + } + + /// Resets the `Backoff`. + /// + /// # Examples + /// + /// ``` + /// + /// let backoff = Backoff::new(); + /// backoff.reset(); + /// ``` + #[inline] + pub fn reset(&self) { + self.step.set(0); + } + + /// Backs off in a lock-free loop. + /// + /// This method should be used when we need to retry an operation because another thread made + /// progress. + /// + /// The processor may yield using the *YIELD* or *PAUSE* instruction. + /// + /// # Examples + /// + /// Backing off in a lock-free loop: + /// + /// ``` + /// use std::sync::atomic::AtomicUsize; + /// use std::sync::atomic::Ordering::SeqCst; + /// + /// fn fetch_mul(a: &AtomicUsize, b: usize) -> usize { + /// let backoff = Backoff::new(); + /// loop { + /// let val = a.load(SeqCst); + /// if a.compare_exchange(val, val.wrapping_mul(b), SeqCst, SeqCst).is_ok() { + /// return val; + /// } + /// backoff.spin(); + /// } + /// } + /// + /// let a = AtomicUsize::new(7); + /// assert_eq!(fetch_mul(&a, 8), 7); + /// assert_eq!(a.load(SeqCst), 56); + /// ``` + #[inline] + pub fn spin(&self) { + for _ in 0..1 << self.step.get().min(SPIN_LIMIT) { + std::hint::spin_loop(); + } + + if self.step.get() <= SPIN_LIMIT { + self.step.set(self.step.get() + 1); + } + } + + /// Backs off in a blocking loop. + /// + /// This method should be used when we need to wait for another thread to make progress. + /// + /// The processor may yield using the *YIELD* or *PAUSE* instruction and the current thread + /// may yield by giving up a timeslice to the OS scheduler. + /// + /// In `#[no_std]` environments, this method is equivalent to [`spin`]. + /// + /// If possible, use [`is_completed`] to check when it is advised to stop using backoff and + /// block the current thread using a different synchronization mechanism instead. + /// + /// [`spin`]: Backoff::spin + /// [`is_completed`]: Backoff::is_completed + /// + /// # Examples + /// + /// Waiting for an [`AtomicBool`] to become `true`: + /// + /// ``` + /// use std::sync::Arc; + /// use std::sync::atomic::AtomicBool; + /// use std::sync::atomic::Ordering::SeqCst; + /// use std::thread; + /// use std::time::Duration; + /// + /// fn spin_wait(ready: &AtomicBool) { + /// let backoff = Backoff::new(); + /// while !ready.load(SeqCst) { + /// backoff.snooze(); + /// } + /// } + /// + /// let ready = Arc::new(AtomicBool::new(false)); + /// let ready2 = ready.clone(); + /// + /// # let t = + /// thread::spawn(move || { + /// thread::sleep(Duration::from_millis(100)); + /// ready2.store(true, SeqCst); + /// }); + /// + /// assert_eq!(ready.load(SeqCst), false); + /// spin_wait(&ready); + /// assert_eq!(ready.load(SeqCst), true); + /// # t.join().unwrap(); // join thread to avoid https://github.com/rust-lang/miri/issues/1371 + /// ``` + /// + /// [`AtomicBool`]: std::sync::atomic::AtomicBool + #[inline] + pub fn snooze(&self) { + if self.step.get() <= SPIN_LIMIT { + for _ in 0..1 << self.step.get() { + std::hint::spin_loop(); + } + } else { + #[cfg(not(feature = "std"))] + for _ in 0..1 << self.step.get() { + std::hint::spin_loop(); + } + + #[cfg(feature = "std")] + ::std::thread::yield_now(); + } + + if self.step.get() <= YIELD_LIMIT { + self.step.set(self.step.get() + 1); + } + } + + /// Returns `true` if exponential backoff has completed and blocking the thread is advised. + /// + /// # Examples + /// + /// Waiting for an [`AtomicBool`] to become `true` and parking the thread after a long wait: + /// + /// ``` + /// use std::sync::Arc; + /// use std::sync::atomic::AtomicBool; + /// use std::sync::atomic::Ordering::SeqCst; + /// use std::thread; + /// use std::time::Duration; + /// + /// fn blocking_wait(ready: &AtomicBool) { + /// let backoff = Backoff::new(); + /// while !ready.load(SeqCst) { + /// if backoff.is_completed() { + /// thread::park(); + /// } else { + /// backoff.snooze(); + /// } + /// } + /// } + /// + /// let ready = Arc::new(AtomicBool::new(false)); + /// let ready2 = ready.clone(); + /// let waiter = thread::current(); + /// + /// # let t = + /// thread::spawn(move || { + /// thread::sleep(Duration::from_millis(100)); + /// ready2.store(true, SeqCst); + /// waiter.unpark(); + /// }); + /// + /// assert_eq!(ready.load(SeqCst), false); + /// blocking_wait(&ready); + /// assert_eq!(ready.load(SeqCst), true); + /// # t.join().unwrap(); // join thread to avoid https://github.com/rust-lang/miri/issues/1371 + /// ``` + /// + /// [`AtomicBool`]: std::sync::atomic::AtomicBool + #[inline] + pub fn is_completed(&self) -> bool { + self.step.get() > YIELD_LIMIT + } +} + +impl fmt::Debug for Backoff { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("Backoff") + .field("step", &self.step) + .field("is_completed", &self.is_completed()) + .finish() + } +} + +impl Default for Backoff { + fn default() -> Self { + Self::new() + } +} + +/// A slot in a queue. +struct Slot { + /// The current stamp. + /// + /// If the stamp equals the tail, this node will be next written to. If it equals head + 1, + /// this node will be next read from. + stamp: AtomicUsize, + + /// The value in this slot. + value: UnsafeCell>, +} + +/// A bounded multi-producer multi-consumer queue. +/// +/// This queue allocates a fixed-capacity buffer on construction, which is used to store pushed +/// elements. The queue cannot hold more elements than the buffer allows. Attempting to push an +/// element into a full queue will fail. Alternatively, [`force_push`] makes it possible for +/// this queue to be used as a ring-buffer. Having a buffer allocated upfront makes this queue +/// a bit faster than [`SegQueue`]. +/// +/// [`force_push`]: ArrayQueue::force_push +/// [`SegQueue`]: super::SegQueue +/// +/// # Examples +/// +/// ``` +/// +/// let q = ArrayQueue::new(2); +/// +/// assert_eq!(q.push('a'), Ok(())); +/// assert_eq!(q.push('b'), Ok(())); +/// assert_eq!(q.push('c'), Err('c')); +/// assert_eq!(q.pop(), Some('a')); +/// ``` +pub struct ArrayQueue { + /// The head of the queue. + /// + /// This value is a "stamp" consisting of an index into the buffer and a lap, but packed into a + /// single `usize`. The lower bits represent the index, while the upper bits represent the lap. + /// + /// Elements are popped from the head of the queue. + head: AtomicUsize, + + /// The tail of the queue. + /// + /// This value is a "stamp" consisting of an index into the buffer and a lap, but packed into a + /// single `usize`. The lower bits represent the index, while the upper bits represent the lap. + /// + /// Elements are pushed into the tail of the queue. + tail: AtomicUsize, + + /// The buffer holding slots. + buffer: Box<[Slot]>, + + /// A stamp with the value of `{ lap: 1, index: 0 }`. + one_lap: usize, +} + +unsafe impl Sync for ArrayQueue {} +unsafe impl Send for ArrayQueue {} + +impl UnwindSafe for ArrayQueue {} +impl RefUnwindSafe for ArrayQueue {} + +impl ArrayQueue { + /// Creates a new bounded queue with the given capacity. + /// + /// # Panics + /// + /// Panics if the capacity is zero. + /// + /// # Examples + /// + /// ``` + /// + /// let q = ArrayQueue::::new(100); + /// ``` + pub fn new(cap: usize) -> Self { + assert!(cap > 0, "capacity must be non-zero"); + + // Head is initialized to `{ lap: 0, index: 0 }`. + // Tail is initialized to `{ lap: 0, index: 0 }`. + let head = 0; + let tail = 0; + + // Allocate a buffer of `cap` slots initialized + // with stamps. + let buffer: Box<[Slot]> = (0..cap) + .map(|i| { + // Set the stamp to `{ lap: 0, index: i }`. + Slot { + stamp: AtomicUsize::new(i), + value: UnsafeCell::new(MaybeUninit::uninit()), + } + }) + .collect(); + + // One lap is the smallest power of two greater than `cap`. + let one_lap = (cap + 1).next_power_of_two(); + + Self { + buffer, + one_lap, + head: AtomicUsize::new(head), + tail: AtomicUsize::new(tail), + } + } + + fn push_or_else(&self, mut value: T, f: F) -> Result<(), T> + where + F: Fn(T, usize, usize, &Slot) -> Result, + { + let backoff = Backoff::new(); + let mut tail = self.tail.load(Ordering::Relaxed); + + loop { + // Deconstruct the tail. + let index = tail & (self.one_lap - 1); + let lap = tail & !(self.one_lap - 1); + + let new_tail = if index + 1 < self.capacity() { + // Same lap, incremented index. + // Set to `{ lap: lap, index: index + 1 }`. + tail + 1 + } else { + // One lap forward, index wraps around to zero. + // Set to `{ lap: lap.wrapping_add(1), index: 0 }`. + lap.wrapping_add(self.one_lap) + }; + + // Inspect the corresponding slot. + debug_assert!(index < self.buffer.len()); + let slot = unsafe { self.buffer.get_unchecked(index) }; + let stamp = slot.stamp.load(Ordering::Acquire); + + // If the tail and the stamp match, we may attempt to push. + if tail == stamp { + // Try moving the tail. + match self.tail.compare_exchange_weak( + tail, + new_tail, + Ordering::SeqCst, + Ordering::Relaxed, + ) { + Ok(_) => { + // Write the value into the slot and update the stamp. + unsafe { + slot.value.get().write(MaybeUninit::new(value)); + } + slot.stamp.store(tail + 1, Ordering::Release); + return Ok(()); + } + Err(t) => { + tail = t; + backoff.spin(); + } + } + } else if stamp.wrapping_add(self.one_lap) == tail + 1 { + atomic::fence(Ordering::SeqCst); + value = f(value, tail, new_tail, slot)?; + backoff.spin(); + tail = self.tail.load(Ordering::Relaxed); + } else { + // Snooze because we need to wait for the stamp to get updated. + backoff.snooze(); + tail = self.tail.load(Ordering::Relaxed); + } + } + } + + /// Attempts to push an element into the queue. + /// + /// If the queue is full, the element is returned back as an error. + /// + /// # Examples + /// + /// ``` + /// + /// let q = ArrayQueue::new(1); + /// + /// assert_eq!(q.push(10), Ok(())); + /// assert_eq!(q.push(20), Err(20)); + /// ``` + pub fn push(&self, value: T) -> Result<(), T> { + self.push_or_else(value, |v, tail, _, _| { + let head = self.head.load(Ordering::Relaxed); + + // If the head lags one lap behind the tail as well... + if head.wrapping_add(self.one_lap) == tail { + // ...then the queue is full. + Err(v) + } else { + Ok(v) + } + }) + } + + /// Pushes an element into the queue, replacing the oldest element if necessary. + /// + /// If the queue is full, the oldest element is replaced and returned, + /// otherwise `None` is returned. + /// + /// # Examples + /// + /// ``` + /// + /// let q = ArrayQueue::new(2); + /// + /// assert_eq!(q.force_push(10), None); + /// assert_eq!(q.force_push(20), None); + /// assert_eq!(q.force_push(30), Some(10)); + /// assert_eq!(q.pop(), Some(20)); + /// ``` + pub fn force_push(&self, value: T) -> Option { + self.push_or_else(value, |v, tail, new_tail, slot| { + let head = tail.wrapping_sub(self.one_lap); + let new_head = new_tail.wrapping_sub(self.one_lap); + + // Try moving the head. + if self + .head + .compare_exchange_weak(head, new_head, Ordering::SeqCst, Ordering::Relaxed) + .is_ok() + { + // Move the tail. + self.tail.store(new_tail, Ordering::SeqCst); + + // Swap the previous value. + let old = unsafe { slot.value.get().replace(MaybeUninit::new(v)).assume_init() }; + + // Update the stamp. + slot.stamp.store(tail + 1, Ordering::Release); + + Err(old) + } else { + Ok(v) + } + }) + .err() + } + + /// Attempts to pop an element from the queue. + /// + /// If the queue is empty, `None` is returned. + /// + /// # Examples + /// + /// ``` + /// + /// let q = ArrayQueue::new(1); + /// assert_eq!(q.push(10), Ok(())); + /// + /// assert_eq!(q.pop(), Some(10)); + /// assert!(q.pop().is_none()); + /// ``` + pub fn pop(&self) -> Option { + let backoff = Backoff::new(); + let mut head = self.head.load(Ordering::Relaxed); + + loop { + // Deconstruct the head. + let index = head & (self.one_lap - 1); + let lap = head & !(self.one_lap - 1); + + // Inspect the corresponding slot. + debug_assert!(index < self.buffer.len()); + let slot = unsafe { self.buffer.get_unchecked(index) }; + let stamp = slot.stamp.load(Ordering::Acquire); + + // If the stamp is ahead of the head by 1, we may attempt to pop. + if head + 1 == stamp { + let new = if index + 1 < self.capacity() { + // Same lap, incremented index. + // Set to `{ lap: lap, index: index + 1 }`. + head + 1 + } else { + // One lap forward, index wraps around to zero. + // Set to `{ lap: lap.wrapping_add(1), index: 0 }`. + lap.wrapping_add(self.one_lap) + }; + + // Try moving the head. + match self.head.compare_exchange_weak( + head, + new, + Ordering::SeqCst, + Ordering::Relaxed, + ) { + Ok(_) => { + // Read the value from the slot and update the stamp. + let msg = unsafe { slot.value.get().read().assume_init() }; + slot.stamp + .store(head.wrapping_add(self.one_lap), Ordering::Release); + return Some(msg); + } + Err(h) => { + head = h; + backoff.spin(); + } + } + } else if stamp == head { + atomic::fence(Ordering::SeqCst); + let tail = self.tail.load(Ordering::Relaxed); + + // If the tail equals the head, that means the channel is empty. + if tail == head { + return None; + } + + backoff.spin(); + head = self.head.load(Ordering::Relaxed); + } else { + // Snooze because we need to wait for the stamp to get updated. + backoff.snooze(); + head = self.head.load(Ordering::Relaxed); + } + } + } + + /// Returns the capacity of the queue. + /// + /// # Examples + /// + /// ``` + /// + /// let q = ArrayQueue::::new(100); + /// + /// assert_eq!(q.capacity(), 100); + /// ``` + #[inline] + pub fn capacity(&self) -> usize { + self.buffer.len() + } + + /// Returns `true` if the queue is empty. + /// + /// # Examples + /// + /// ``` + /// + /// let q = ArrayQueue::new(100); + /// + /// assert!(q.is_empty()); + /// q.push(1).unwrap(); + /// assert!(!q.is_empty()); + /// ``` + pub fn is_empty(&self) -> bool { + let head = self.head.load(Ordering::SeqCst); + let tail = self.tail.load(Ordering::SeqCst); + + // Is the tail lagging one lap behind head? + // Is the tail equal to the head? + // + // Note: If the head changes just before we load the tail, that means there was a moment + // when the channel was not empty, so it is safe to just return `false`. + tail == head + } + + /// Returns `true` if the queue is full. + /// + /// # Examples + /// + /// ``` + /// + /// let q = ArrayQueue::new(1); + /// + /// assert!(!q.is_full()); + /// q.push(1).unwrap(); + /// assert!(q.is_full()); + /// ``` + pub fn is_full(&self) -> bool { + let tail = self.tail.load(Ordering::SeqCst); + let head = self.head.load(Ordering::SeqCst); + + // Is the head lagging one lap behind tail? + // + // Note: If the tail changes just before we load the head, that means there was a moment + // when the queue was not full, so it is safe to just return `false`. + head.wrapping_add(self.one_lap) == tail + } + + /// Returns the number of elements in the queue. + /// + /// # Examples + /// + /// ``` + /// + /// let q = ArrayQueue::new(100); + /// assert_eq!(q.len(), 0); + /// + /// q.push(10).unwrap(); + /// assert_eq!(q.len(), 1); + /// + /// q.push(20).unwrap(); + /// assert_eq!(q.len(), 2); + /// ``` + pub fn len(&self) -> usize { + loop { + // Load the tail, then load the head. + let tail = self.tail.load(Ordering::SeqCst); + let head = self.head.load(Ordering::SeqCst); + + // If the tail didn't change, we've got consistent values to work with. + if self.tail.load(Ordering::SeqCst) == tail { + let hix = head & (self.one_lap - 1); + let tix = tail & (self.one_lap - 1); + + return if hix < tix { + tix - hix + } else if hix > tix { + self.capacity() - hix + tix + } else if tail == head { + 0 + } else { + self.capacity() + }; + } + } + } +} + +impl Drop for ArrayQueue { + fn drop(&mut self) { + if mem::needs_drop::() { + // Get the index of the head. + let head = *self.head.get_mut(); + let tail = *self.tail.get_mut(); + + let hix = head & (self.one_lap - 1); + let tix = tail & (self.one_lap - 1); + + let len = if hix < tix { + tix - hix + } else if hix > tix { + self.capacity() - hix + tix + } else if tail == head { + 0 + } else { + self.capacity() + }; + + // Loop over all slots that hold a message and drop them. + for i in 0..len { + // Compute the index of the next slot holding a message. + let index = if hix + i < self.capacity() { + hix + i + } else { + hix + i - self.capacity() + }; + + unsafe { + debug_assert!(index < self.buffer.len()); + let slot = self.buffer.get_unchecked_mut(index); + (*slot.value.get()).assume_init_drop(); + } + } + } + } +} + +impl fmt::Debug for ArrayQueue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad("ArrayQueue { .. }") + } +} + +impl IntoIterator for ArrayQueue { + type Item = T; + + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + IntoIter { value: self } + } +} + +#[derive(Debug)] +pub struct IntoIter { + value: ArrayQueue, +} + +impl Iterator for IntoIter { + type Item = T; + + fn next(&mut self) -> Option { + let value = &mut self.value; + let head = *value.head.get_mut(); + if value.head.get_mut() != value.tail.get_mut() { + let index = head & (value.one_lap - 1); + let lap = head & !(value.one_lap - 1); + // SAFETY: We have mutable access to this, so we can read without + // worrying about concurrency. Furthermore, we know this is + // initialized because it is the value pointed at by `value.head` + // and this is a non-empty queue. + let val = unsafe { + debug_assert!(index < value.buffer.len()); + let slot = value.buffer.get_unchecked_mut(index); + slot.value.get().read().assume_init() + }; + let new = if index + 1 < value.capacity() { + // Same lap, incremented index. + // Set to `{ lap: lap, index: index + 1 }`. + head + 1 + } else { + // One lap forward, index wraps around to zero. + // Set to `{ lap: lap.wrapping_add(1), index: 0 }`. + lap.wrapping_add(value.one_lap) + }; + *value.head.get_mut() = new; + Some(val) + } else { + None + } + } +}