From 31cc68a2163033cba095eaa137d6d0e2b6b229c3 Mon Sep 17 00:00:00 2001 From: Ibraheem Ahmed Date: Mon, 1 Jul 2024 23:53:46 -0400 Subject: [PATCH] write readme --- Cargo.toml | 1 + README.md | 162 +++++++++++++++++++++++++++++++++++++++++++++++-- src/lib.rs | 3 +- src/map.rs | 157 +++++++++++++++++++++++++---------------------- src/raw/mod.rs | 3 +- 5 files changed, 249 insertions(+), 77 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3a30102..a3dca86 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ seize = "0.4.2" rand = "0.8.5" dashmap = "5.5.3" criterion = "0.5.1" +tokio = { version = "1.38.0", features = ["fs", "rt"] } [[bench]] name = "compare" diff --git a/README.md b/README.md index a59cfd8..1016175 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,161 @@ # Papaya -[![Crate](https://img.shields.io/crates/v/papaya?style=for-the-badge)](https://crates.io/crates/papaya) -[![Github](https://img.shields.io/badge/github-papaya-success?style=for-the-badge)](https://github.com/ibraheemdev/papaya) -[![Docs](https://img.shields.io/badge/docs.rs-0.0.1-4d76ae?style=for-the-badge)](https://docs.rs/papaya) +A fast and ergonomic concurrent hash-table that features: -A fast concurrent hash-table for read-heavy workloads. +- An ergonomic lock-free API — no more deadlocks! +- Powerful atomic operations. +- Seamless usage in async contexts. +- Extremely fast and scalable reads (see [benchmarks]). +- Predictable latency across all operations. +- Efficient memory usage, with garbage collection powered by [`seize`]. + +# Overview + +The top-level crate documentation is organized as follows: + +- [Usage](#usage) shows how to interact with the concurrent `HashMap`. +- [Atomic Operations](#atomic-operations) shows how to modify a map atomically. +- [Async Support](#async-support) shows how to use the map in an async context. +- [Advanced Lifetimes](#advanced-lifetimes) explains how to use guards when working with nested types. +- [Performance](#performance) provides details of expected performance characteristics. + +# Usage + +`papaya` aims to provide an ergonomic API without sacrificing performance. The `HashMap` provided by this crate exposes a lock-free API and can hand out direct references to objects in the map without the need for wrapper types that are clunky and prone to deadlocks. + +However, you can't hold on to references forever due to concurrent removals. Because of this, the `HashMap` API is structured around *pinning*: + +```rust,ignore +let map = papaya::HashMap::new(); + +// Pin the map. +let map = map.pin(); +``` + +Once you create a pin you can access the map just like a standard `HashMap`. The pinned table is similar to a lock guard, so any references that are returned will be tied to the lifetime of the guard. Unlike a lock however, pinning is cheap and can never cause a deadlock. + +```rust +let map = papaya::HashMap::new(); + +// Pin the map. +let map = map.pin(); + +// Use the map as normal. +map.insert('A', 1); +assert_eq!(map.get(&'A'), Some(&1)); +assert_eq!(map.len(), 1); +``` + +As expected of a concurrent `HashMap`, all operations take a shared reference, allowing the map to be freely pinned and accessed from multiple threads: + +```rust +let map = papaya::HashMap::new(); + +// Use the map from multiple threads. +std::thread::scope(|s| { + // Insert some values. + s.spawn(|| { + let map = map.pin(); + for i in 'A'..='Z' { + map.insert(i, 1); + } + }); + + // Remove the values. + s.spawn(|| { + let map = map.pin(); + for i in 'A'..='Z' { + map.remove(&i); + } + }); + + // Read the values. + s.spawn(|| { + for (key, value) in map.pin().iter() { + println!("{key}: {value}"); + } + }); +}); +``` + +It is important to note that as long as you are holding on to a guard, you are preventing the map from performing garbage collection. Pinning and unpinning the table is relatively cheap but not free, similar to the cost of locking and unlocking an uncontended or lightly contended `Mutex`. Thus guard reuse is encouraged, within reason. See the [`seize`] crate for advanced usage and specifics of the garbage collection algorithm. + +# Atomic Operations + +TODO + +# Async Support + +By default, a pinned map guard does not implement `Send` as it is tied to the current thread, similar to a lock. This leads to an issue in work-stealing schedulers as guards are not valid across `.await` points. + +To overcome this, you can use an *owned* guard. + +```rust,ignore +tokio::spawn(async move { + // Pin the map with an owned guard. + let map = map.pin_owned(); + + // Hold references across await points. + let value = map.get(37); + bar().await; + println!("{}", value); +}); +``` + +Note that owned guards are more expensive to create than regular guards, so they should only be used if necessary. In the above example, you could instead drop the reference and call `get` a second time after the asynchronous call. A more fitting example involves asynchronous iteration: + +```rust,ignore +tokio::spawn(async move { + for (key, value) in map.pin_owned().iter() { + tokio::fs::write("db.txt", format!("{key}: {value}\n")).await; + } +}); +``` + +# Advanced Lifetimes + +You may run into issues when you try to return a reference to a map contained within an outer type. For example: + +```rust,ignore +pub struct Metrics { + map: papaya::HashMap> +} + +impl Metrics { + pub fn get(&self, name: &str) -> Option<&[u64]> { + // error[E0515]: cannot return value referencing temporary value + Some(self.map.pin().get(name)?.as_slice()) + } +} +``` + +This is a similar issue to that of locks, as the guard is created within the method and cannot be referenced outside of it. The solution is to accept a guard in the method directly, tying the lifetime to the caller's stack frame: + +```rust +use papaya::Guard; + +pub struct Metrics { + map: papaya::HashMap> +} + +impl Metrics { + pub fn guard(&self) -> impl Guard + '_ { + self.map.guard() + } + + pub fn get<'guard>(&self, name: &str, guard: &'guard impl Guard) -> Option<&'guard [u64]> { + Some(self.map.get(name, guard)?.as_slice()) + } +} +``` + +The `Guard` trait supports both local and owned guards. Note the `'guard` lifetime that ties the guard to the returned reference. No complicated wrapper types or closure mapping is necessary. + +# Performance + +`papaya` is built with read-heavy workloads in mind. As such, reads are extremely scalable and provide consistent performance that scales with concurrency, meaning `papaya` will excel in any workload where you read more than you write. In write heavy workloads, `papaya` will still provide competitive performance despite not being it's primary use case. See the [benchmarks] for details. + +`papaya` also aims to provide predictable, consistent latency across all operations. Most operations are lock-free, and those that aren't only block under rare and constrained conditions. `papaya` also features [incremental resizing], meaning operations aren't required to block when resizing the hash-table. Predictable latency is an important part of performance that doesn't often show up in benchmarks, but has significant implications for real-world usage. + +[benchmarks]: google.com +[incremental resizing]: https://docs.rs/papaya/latest/papaya/enum.ResizeMode.html diff --git a/src/lib.rs b/src/lib.rs index ed1c8ab..b082ef7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,8 @@ #![allow(unstable_name_collisions)] +#![doc = include_str!("../README.md")] mod map; mod raw; pub use map::{HashMap, HashMapBuilder, HashMapRef, Iter, Keys, ResizeMode, Values}; -pub use seize::{Guard, OwnedGuard}; +pub use seize::Guard; diff --git a/src/map.rs b/src/map.rs index 98cd5de..dc73d52 100644 --- a/src/map.rs +++ b/src/map.rs @@ -10,7 +10,7 @@ use std::marker::PhantomData; /// A concurrent hash table. /// /// Most hash table operations require a [`Guard`](crate::Guard), which can be acquired through -/// [`HashMap::guard`] or using the [`HashMap::pin`] API. See the [crate-level documentation](crate) +/// [`HashMap::guard`] or using the [`HashMap::pin`] API. See the [crate-level documentation](crate#usage) /// for details. pub struct HashMap { raw: raw::HashMap, @@ -32,15 +32,15 @@ unsafe impl Sync for HashMap {} /// use std::collections::hash_map::RandomState; /// /// let map: HashMap = HashMap::builder() -/// // set the initial capacity +/// // Set the initial capacity. /// .capacity(2048) -/// // set the hasher +/// // Set the hasher. /// .hasher(RandomState::new()) -/// // set the resize mode +/// // Set the resize mode. /// .resize_mode(ResizeMode::Blocking) -/// // set a custom collector +/// // Set a custom garbage collector. /// .collector(Collector::new().batch_size(128)) -/// // construct the hash map +/// // Construct the hash map. /// .build(); /// ``` pub struct HashMapBuilder { @@ -76,7 +76,7 @@ impl HashMapBuilder { /// Set the initial capacity of the map. /// /// Note the table should be able to hold at least `capacity` elements before - /// resizing, but may prematurely resize due to poor hash distribution. If `capacity` + /// resizing, but may prematurely resize due to poor hash distributions. If `capacity` /// is 0, the hash map will not allocate. pub fn capacity(self, capacity: usize) -> HashMapBuilder { HashMapBuilder { @@ -88,9 +88,7 @@ impl HashMapBuilder { } } - /// Set the resizing mode of the map. - /// - /// See [`ResizeMode`] for details. + /// Set the resizing mode of the map. See [`ResizeMode`] for details. pub fn resize_mode(self, resize_mode: ResizeMode) -> Self { HashMapBuilder { resize_mode, @@ -101,9 +99,9 @@ impl HashMapBuilder { } } - /// Set the [`seize::Collector`] used for memory reclamation. + /// Set the [`seize::Collector`] used for garbage collection. /// - /// This method may be useful when you want more control over memory reclamation. + /// This method may be useful when you want more control over garbage collection. /// See [`seize::Collector`] for details. /// /// Note that all `Guard` references used to access the map must be produced by @@ -137,7 +135,7 @@ pub enum ResizeMode { /// /// Incremental resizes avoids latency spikes that can occur when insert operations have /// to resize a large table. However, they reduce parallelism during the resize and so can reduce - /// overall throughput. Incremental resizing also means all reads or write operations during an + /// overall throughput. Incremental resizing also means reads or write operations during an /// in-progress resize may have to search both the current and new table before succeeding, trading /// off median latency during a resize for tail latency. /// @@ -280,32 +278,10 @@ impl HashMap { } } - /// Returns a guard for use with this map. - /// - /// Note that holding on to a guard pins the current thread, preventing garbage - /// collection. See the [crate-level documentation](crate) for details. - #[inline] - pub fn guard(&self) -> LocalGuard<'_> { - self.raw.collector().enter() - } - - /// Returns an owned guard for use with this map. - /// - /// Owned guards implement `Send` and `Sync`, allowing them to be held across - /// `.await` points in multi-threaded schedulers. This is especially useful - /// for iterators. - /// - /// Note that holding on to a guard pins the current thread, preventing garbage - /// collection. See the [crate-level documentation](crate) for details. - #[inline] - pub fn owned_guard(&self) -> OwnedGuard<'_> { - self.raw.collector().enter_owned() - } - /// Returns a pinned reference to the map. /// /// The returned reference manages a guard internally, preventing garbage collection - /// for as long as it is held. See the [crate-level documentation](crate) for details. + /// for as long as it is held. See the [crate-level documentation](crate#usage) for details. #[inline] pub fn pin(&self) -> HashMapRef<'_, K, V, S, LocalGuard<'_>> { HashMapRef { @@ -316,12 +292,12 @@ impl HashMap { /// Returns a pinned reference to the map. /// - /// Unlike [`pin`](HashMap::pin), the returned reference implements `Send` - /// and `Sync`, allowing it to be held across `.await` points in multi-threaded + /// Unlike [`HashMap::pin`], the returned reference implements `Send` and `Sync`, + /// allowing it to be held across `.await` points in multi-threaded /// schedulers. This is especially useful for iterators. /// /// The returned reference manages a guard internally, preventing garbage collection - /// for as long as it is held. See the [crate-level documentation](crate) for details. + /// for as long as it is held. See the [crate-level documentation](crate#usage) for details. #[inline] pub fn pin_owned(&self) -> HashMapRef<'_, K, V, S, OwnedGuard<'_>> { HashMapRef { @@ -329,6 +305,28 @@ impl HashMap { map: self, } } + + /// Returns a guard for use with this map. + /// + /// Note that holding on to a guard prevents garbage collection. + /// See the [crate-level documentation](crate#usage) for details. + #[inline] + pub fn guard(&self) -> LocalGuard<'_> { + self.raw.collector().enter() + } + + /// Returns an owned guard for use with this map. + /// + /// Owned guards implement `Send` and `Sync`, allowing them to be held across + /// `.await` points in work-stealing schedulers. This is especially useful + /// for iterators. + /// + /// Note that holding on to a guard prevents garbage collection. + /// See the [crate-level documentation](crate#usage) for details. + #[inline] + pub fn owned_guard(&self) -> OwnedGuard<'_> { + self.raw.collector().enter_owned() + } } impl HashMap @@ -464,7 +462,7 @@ where /// If the map did have this key present, the value is updated, and the old /// value is returned. The key is not updated, though; this matters for /// types that can be `==` without being identical. See the [standard library - /// documentation] for more. + /// documentation] for details. /// /// [standard library documentation]: https://doc.rust-lang.org/std/collections/index.html#insert-and-complex-keys /// @@ -501,8 +499,6 @@ where /// /// # Examples /// - /// Basic usage: - /// /// ``` /// use papaya::HashMap; /// @@ -534,29 +530,30 @@ where } } - // Update an entry with a remapping function. - // - // If the value for the specified `key` is present, the new value is computed and stored the - // using the provided update function, and the new value is returned. Otherwise, `None` - // is returned. - // - // The update function should be pure, as it may be called multiple times if the current value - // changes during the execution of this function. However, the update is performed atomically, - // meaning the value is only updated from it's previous value using the call to `update` with that - // value. - // - // # Examples - // - // ``` - // use papaya::HashMap; - // - // let mut map = HashMap::new(); + /// Update an entry atomically. + /// + /// If the value for the specified `key` is present, the new value is computed and stored the + /// using the provided update function, and the new value is returned. Otherwise, `None` + /// is returned. + /// + /// The update function should be pure, as it may be called multiple times if the current value + /// changes during the execution of this function. However, the update is performed atomically, + /// meaning the value is only updated using the call to `update` with the previous value — + /// similar to a traditional [compare-and-swap](https://en.wikipedia.org/wiki/Compare-and-swap) + /// operation. + /// + /// # Examples + /// + /// ``` + /// use papaya::HashMap; + /// + /// let mut map = HashMap::new(); /// map.pin().insert("a", 1); /// assert_eq!(m.get(&"a"), Some(&1)); /// /// map.pin().update("a", |v| v + 1); /// assert_eq!(m.get(&"a"), Some(&2)); - // ``` + /// ``` pub fn update<'g, F>(&self, key: K, update: F, guard: &'g impl Guard) -> Option<&'g V> where F: Fn(&V) -> V, @@ -625,13 +622,16 @@ where /// /// The collection may reserve more space to avoid frequent reallocations. /// + /// # Panics + /// + /// Panics if the new allocation size overflows `usize`. + /// /// # Examples /// /// ``` /// use papaya::HashMap; /// /// let map: HashMap<&str, i32> = HashMap::new(); - /// /// map.pin().reserve(10); /// ``` pub fn reserve(&self, additional: usize, guard: &impl Guard) { @@ -657,7 +657,7 @@ where } /// An iterator visiting all key-value pairs in arbitrary order. - /// The iterator element type is `(&'a K, &'a V)`. + /// The iterator element type is `(&K, &V)`. /// /// # Examples /// @@ -684,7 +684,7 @@ where } /// An iterator visiting all keys in arbitrary order. - /// The iterator element type is `&'a K`. + /// The iterator element type is `&K`. /// /// # Examples /// @@ -712,7 +712,7 @@ where } /// An iterator visiting all values in arbitrary order. - /// The iterator element type is `&'a V`. + /// The iterator element type is `&V`. /// /// # Examples /// @@ -888,8 +888,9 @@ pub struct OccupiedError<'a, V: 'a> { pub not_inserted: V, } -/// A pinned reference to a hash table. +/// A pinned reference to a [`HashMap`]. /// +/// This type can be used to easily access a [`HashMap`] without explicitly managing a guard. /// See [`HashMap::pin`] for details. pub struct HashMapRef<'map, K, V, S, G> { guard: G, @@ -1032,7 +1033,7 @@ where } /// An iterator visiting all key-value pairs in arbitrary order. - /// The iterator element type is `(&'a K, &'a V)`. + /// The iterator element type is `(&K, &V)`. /// /// See [`HashMap::iter`] for details. #[inline] @@ -1041,7 +1042,7 @@ where } /// An iterator visiting all keys in arbitrary order. - /// The iterator element type is `&'a K`. + /// The iterator element type is `&K`. /// /// See [`HashMap::keys`] for details. #[inline] @@ -1050,7 +1051,7 @@ where } /// An iterator visiting all values in arbitrary order. - /// The iterator element type is `&'a V`. + /// The iterator element type is `&V`. /// /// See [`HashMap::values`] for details. #[inline] @@ -1059,9 +1060,23 @@ where } } +impl<'a, K, V, S, G> IntoIterator for &'a HashMapRef<'_, K, V, S, G> +where + K: Hash + Eq, + S: BuildHasher, + G: Guard, +{ + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V, G>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + /// An iterator over a map's entries. /// -/// See [`HashMap::iter`](crate::HashMap::iter) for details. +/// This struct is created by the [`iter`](HashMap::iter) method on [`HashMap`]. See its documentation for details. pub struct Iter<'g, K, V, G> { raw: raw::Iter<'g, K, V, G>, } @@ -1094,7 +1109,7 @@ where /// An iterator over a map's keys. /// -/// See [`HashMap::keys`](crate::HashMap::keys) for details. +/// This struct is created by the [`keys`](HashMap::keys) method on [`HashMap`]. See its documentation for details. pub struct Keys<'g, K, V, G> { iter: Iter<'g, K, V, G>, } @@ -1124,7 +1139,7 @@ where /// An iterator over a map's values. /// -/// See [`HashMap::values`](crate::HashMap::values) for details. +/// This struct is created by the [`values`](HashMap::values) method on [`HashMap`]. See its documentation for details. pub struct Values<'g, K, V, G> { iter: Iter<'g, K, V, G>, } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 121d747..e5b24c8 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1384,7 +1384,8 @@ where } loop { - let capacity = entries_for(self.root.count.active() + additional); + let active = self.root.count.active(); + let capacity = entries_for(active.checked_add(additional).unwrap()); // we have enough capacity if self.table.len >= capacity {