From 7762511757d081efdaf766e7e8e24153e3cda0f7 Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 1 Oct 2024 11:11:14 +0200 Subject: [PATCH] Change the default hasher to foldhash [Foldhash](https://github.com/orlp/foldhash) performs generally better than AHash while still avoiding the pitfalls of FxHash with certain distributions (such as only hashing aligned values). --- Cargo.toml | 6 +++--- README.md | 51 +++------------------------------------------ benches/bench.rs | 54 ++++++++++++++++++++++++------------------------ src/lib.rs | 6 +++--- src/map.rs | 20 +++++++++--------- src/set.rs | 16 +++++++------- 6 files changed, 54 insertions(+), 99 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8b5ed89ce..16d56c524 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ rust-version = "1.63.0" [dependencies] # For the default hasher -ahash = { version = "0.8.7", default-features = false, optional = true } +foldhash = { version = "0.1.2", default-features = false, optional = true } # For external trait impls rayon = { version = "1.0", optional = true } @@ -66,10 +66,10 @@ rustc-dep-of-std = [ # Enables the deprecated RawEntry API. raw-entry = [] -# Provides a default hasher. Currently this is AHash but this is subject to +# Provides a default hasher. Currently this is foldhash but this is subject to # change in the future. Note that the default hasher does *not* provide HashDoS # resistance, unlike the one in the standard library. -default-hasher = ["dep:ahash"] +default-hasher = ["dep:foldhash"] # Enables usage of `#[inline]` on far more functions than by default in this # crate. This may lead to a performance increase but often comes at a compile diff --git a/README.md b/README.md index 578a8ef94..789be0d03 100644 --- a/README.md +++ b/README.md @@ -26,59 +26,14 @@ in environments without `std`, such as embedded systems and kernels. ## Features - Drop-in replacement for the standard library `HashMap` and `HashSet` types. -- Uses [AHash](https://github.com/tkaitchuck/aHash) as the default hasher, which is much faster than SipHash. - However, AHash does *not provide the same level of HashDoS resistance* as SipHash, so if that is important to you, you might want to consider using a different hasher. +- Uses [foldhash](https://github.com/orlp/foldhash) as the default hasher, which is much faster than SipHash. + However, foldhash does *not provide the same level of HashDoS resistance* as SipHash, so if that is important to you, you might want to consider using a different hasher. - Around 2x faster than the previous standard library `HashMap`. - Lower memory usage: only 1 byte of overhead per entry instead of 8. - Compatible with `#[no_std]` (but requires a global allocator with the `alloc` crate). - Empty hash maps do not allocate any memory. - SIMD lookups to scan multiple hash entries in parallel. -## Performance - -Compared to the previous implementation of `std::collections::HashMap` (Rust 1.35). - -With the hashbrown default AHash hasher: - -| name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | -| :-------------------------- | :----------------: | ----------------: | :----------: | ------: | ------- | -| insert_ahash_highbits | 18,865 | 8,020 | -10,845 | -57.49% | x 2.35 | -| insert_ahash_random | 19,711 | 8,019 | -11,692 | -59.32% | x 2.46 | -| insert_ahash_serial | 19,365 | 6,463 | -12,902 | -66.63% | x 3.00 | -| insert_erase_ahash_highbits | 51,136 | 17,916 | -33,220 | -64.96% | x 2.85 | -| insert_erase_ahash_random | 51,157 | 17,688 | -33,469 | -65.42% | x 2.89 | -| insert_erase_ahash_serial | 45,479 | 14,895 | -30,584 | -67.25% | x 3.05 | -| iter_ahash_highbits | 1,399 | 1,092 | -307 | -21.94% | x 1.28 | -| iter_ahash_random | 1,586 | 1,059 | -527 | -33.23% | x 1.50 | -| iter_ahash_serial | 3,168 | 1,079 | -2,089 | -65.94% | x 2.94 | -| lookup_ahash_highbits | 32,351 | 4,792 | -27,559 | -85.19% | x 6.75 | -| lookup_ahash_random | 17,419 | 4,817 | -12,602 | -72.35% | x 3.62 | -| lookup_ahash_serial | 15,254 | 3,606 | -11,648 | -76.36% | x 4.23 | -| lookup_fail_ahash_highbits | 21,187 | 4,369 | -16,818 | -79.38% | x 4.85 | -| lookup_fail_ahash_random | 21,550 | 4,395 | -17,155 | -79.61% | x 4.90 | -| lookup_fail_ahash_serial | 19,450 | 3,176 | -16,274 | -83.67% | x 6.12 | - - -With the libstd default SipHash hasher: - -| name | oldstdhash ns/iter | hashbrown ns/iter | diff ns/iter | diff % | speedup | -| :------------------------ | :----------------: | ----------------: | :----------: | ------: | ------- | -| insert_std_highbits | 19,216 | 16,885 | -2,331 | -12.13% | x 1.14 | -| insert_std_random | 19,179 | 17,034 | -2,145 | -11.18% | x 1.13 | -| insert_std_serial | 19,462 | 17,493 | -1,969 | -10.12% | x 1.11 | -| insert_erase_std_highbits | 50,825 | 35,847 | -14,978 | -29.47% | x 1.42 | -| insert_erase_std_random | 51,448 | 35,392 | -16,056 | -31.21% | x 1.45 | -| insert_erase_std_serial | 87,711 | 38,091 | -49,620 | -56.57% | x 2.30 | -| iter_std_highbits | 1,378 | 1,159 | -219 | -15.89% | x 1.19 | -| iter_std_random | 1,395 | 1,132 | -263 | -18.85% | x 1.23 | -| iter_std_serial | 1,704 | 1,105 | -599 | -35.15% | x 1.54 | -| lookup_std_highbits | 17,195 | 13,642 | -3,553 | -20.66% | x 1.26 | -| lookup_std_random | 17,181 | 13,773 | -3,408 | -19.84% | x 1.25 | -| lookup_std_serial | 15,483 | 13,651 | -1,832 | -11.83% | x 1.13 | -| lookup_fail_std_highbits | 20,926 | 13,474 | -7,452 | -35.61% | x 1.55 | -| lookup_fail_std_random | 21,766 | 13,505 | -8,261 | -37.95% | x 1.61 | -| lookup_fail_std_serial | 19,336 | 13,519 | -5,817 | -30.08% | x 1.43 | - ## Usage Add this to your `Cargo.toml`: @@ -107,7 +62,7 @@ This crate has the following Cargo features: - `raw-entry`: Enables access to the deprecated `RawEntry` API. - `inline-more`: Adds inline hints to most functions, improving run-time performance at the cost of compilation time. (enabled by default) -- `default-hasher`: Compiles with ahash as default hasher. (enabled by default) +- `default-hasher`: Compiles with foldhash as default hasher. (enabled by default) - `allocator-api2`: Enables support for allocators that support `allocator-api2`. (enabled by default) ## License diff --git a/benches/bench.rs b/benches/bench.rs index bfa830825..dd55159dc 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -1,5 +1,5 @@ // This benchmark suite contains some benchmarks along a set of dimensions: -// Hasher: std default (SipHash) and crate default (AHash). +// Hasher: std default (SipHash) and crate default (foldhash). // Int key distribution: low bit heavy, top bit heavy, and random. // Task: basic functionality: insert, insert_erase, lookup, lookup_fail, iter #![feature(test)] @@ -18,7 +18,7 @@ use std::{ const SIZE: usize = 1000; // The default hashmap when using this crate directly. -type AHashMap = HashMap; +type FoldHashMap = HashMap; // This uses the hashmap from this crate with the default hasher of the stdlib. type StdHashMap = HashMap; @@ -58,14 +58,14 @@ impl Drop for DropType { } macro_rules! bench_suite { - ($bench_macro:ident, $bench_ahash_serial:ident, $bench_std_serial:ident, - $bench_ahash_highbits:ident, $bench_std_highbits:ident, - $bench_ahash_random:ident, $bench_std_random:ident) => { - $bench_macro!($bench_ahash_serial, AHashMap, 0..); + ($bench_macro:ident, $bench_foldhash_serial:ident, $bench_std_serial:ident, + $bench_foldhash_highbits:ident, $bench_std_highbits:ident, + $bench_foldhash_random:ident, $bench_std_random:ident) => { + $bench_macro!($bench_foldhash_serial, FoldHashMap, 0..); $bench_macro!($bench_std_serial, StdHashMap, 0..); $bench_macro!( - $bench_ahash_highbits, - AHashMap, + $bench_foldhash_highbits, + FoldHashMap, (0..).map(usize::swap_bytes) ); $bench_macro!( @@ -73,7 +73,7 @@ macro_rules! bench_suite { StdHashMap, (0..).map(usize::swap_bytes) ); - $bench_macro!($bench_ahash_random, AHashMap, RandomKeys::new()); + $bench_macro!($bench_foldhash_random, FoldHashMap, RandomKeys::new()); $bench_macro!($bench_std_random, StdHashMap, RandomKeys::new()); }; } @@ -97,11 +97,11 @@ macro_rules! bench_insert { bench_suite!( bench_insert, - insert_ahash_serial, + insert_foldhash_serial, insert_std_serial, - insert_ahash_highbits, + insert_foldhash_highbits, insert_std_highbits, - insert_ahash_random, + insert_foldhash_random, insert_std_random ); @@ -122,11 +122,11 @@ macro_rules! bench_grow_insert { bench_suite!( bench_grow_insert, - grow_insert_ahash_serial, + grow_insert_foldhash_serial, grow_insert_std_serial, - grow_insert_ahash_highbits, + grow_insert_foldhash_highbits, grow_insert_std_highbits, - grow_insert_ahash_random, + grow_insert_foldhash_random, grow_insert_std_random ); @@ -158,11 +158,11 @@ macro_rules! bench_insert_erase { bench_suite!( bench_insert_erase, - insert_erase_ahash_serial, + insert_erase_foldhash_serial, insert_erase_std_serial, - insert_erase_ahash_highbits, + insert_erase_foldhash_highbits, insert_erase_std_highbits, - insert_erase_ahash_random, + insert_erase_foldhash_random, insert_erase_std_random ); @@ -187,11 +187,11 @@ macro_rules! bench_lookup { bench_suite!( bench_lookup, - lookup_ahash_serial, + lookup_foldhash_serial, lookup_std_serial, - lookup_ahash_highbits, + lookup_foldhash_highbits, lookup_std_highbits, - lookup_ahash_random, + lookup_foldhash_random, lookup_std_random ); @@ -216,11 +216,11 @@ macro_rules! bench_lookup_fail { bench_suite!( bench_lookup_fail, - lookup_fail_ahash_serial, + lookup_fail_foldhash_serial, lookup_fail_std_serial, - lookup_fail_ahash_highbits, + lookup_fail_foldhash_highbits, lookup_fail_std_highbits, - lookup_fail_ahash_random, + lookup_fail_foldhash_random, lookup_fail_std_random ); @@ -244,11 +244,11 @@ macro_rules! bench_iter { bench_suite!( bench_iter, - iter_ahash_serial, + iter_foldhash_serial, iter_std_serial, - iter_ahash_highbits, + iter_foldhash_highbits, iter_std_highbits, - iter_ahash_random, + iter_foldhash_random, iter_std_random ); diff --git a/src/lib.rs b/src/lib.rs index 482057d32..a637ccbef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,11 +39,11 @@ #![cfg_attr(feature = "nightly", warn(fuzzy_provenance_casts))] #![cfg_attr(feature = "nightly", allow(internal_features))] -/// Default hasher for [`HashMap`], [`HashSet`] and [`HashTable`]. +/// Default hasher for [`HashMap`] and [`HashSet`]. #[cfg(feature = "default-hasher")] -pub type DefaultHashBuilder = core::hash::BuildHasherDefault; +pub type DefaultHashBuilder = foldhash::fast::RandomState; -/// Dummy default hasher for [`HashMap`], [`HashSet`] and [`HashTable`]. +/// Dummy default hasher for [`HashMap`] and [`HashSet`]. #[cfg(not(feature = "default-hasher"))] pub enum DefaultHashBuilder {} diff --git a/src/map.rs b/src/map.rs index 1e794ca4f..d2076edad 100644 --- a/src/map.rs +++ b/src/map.rs @@ -15,7 +15,7 @@ pub use crate::raw_entry::*; /// A hash map implemented with quadratic probing and SIMD lookup. /// -/// The default hashing algorithm is currently [`AHash`], though this is +/// The default hashing algorithm is currently [`foldhash`], though this is /// subject to change at any point in the future. This hash function is very /// fast for all types of keys, but this algorithm will typically *not* protect /// against attacks such as HashDoS. @@ -142,7 +142,7 @@ pub use crate::raw_entry::*; /// [`with_hasher`]: #method.with_hasher /// [`with_capacity_and_hasher`]: #method.with_capacity_and_hasher /// [`fnv`]: https://crates.io/crates/fnv -/// [`AHash`]: https://crates.io/crates/ahash +/// [`foldhash`]: https://crates.io/crates/foldhash /// /// ``` /// use hashbrown::HashMap; @@ -270,7 +270,7 @@ impl HashMap { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashMap`], for example with /// [`with_hasher`](HashMap::with_hasher) method. /// @@ -300,7 +300,7 @@ impl HashMap { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashMap`], for example with /// [`with_capacity_and_hasher`](HashMap::with_capacity_and_hasher) method. /// @@ -333,7 +333,7 @@ impl HashMap { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashMap`], for example with /// [`with_hasher_in`](HashMap::with_hasher_in) method. /// @@ -377,7 +377,7 @@ impl HashMap { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashMap`], for example with /// [`with_capacity_and_hasher_in`](HashMap::with_capacity_and_hasher_in) method. /// @@ -429,7 +429,7 @@ impl HashMap { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashMap`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for @@ -471,7 +471,7 @@ impl HashMap { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashMap`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for @@ -521,7 +521,7 @@ impl HashMap { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashMap`]. /// /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack @@ -556,7 +556,7 @@ impl HashMap { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashMap` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashMap`]. /// /// [`HashDoS`]: https://en.wikipedia.org/wiki/Collision_attack diff --git a/src/set.rs b/src/set.rs index 97e2e6f31..442e1cf6c 100644 --- a/src/set.rs +++ b/src/set.rs @@ -139,7 +139,7 @@ impl HashSet { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashSet`], for example with /// [`with_hasher`](HashSet::with_hasher) method. /// @@ -169,7 +169,7 @@ impl HashSet { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashSet`], for example with /// [`with_capacity_and_hasher`](HashSet::with_capacity_and_hasher) method. /// @@ -203,7 +203,7 @@ impl HashSet { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashSet`], for example with /// [`with_hasher_in`](HashSet::with_hasher_in) method. /// @@ -233,7 +233,7 @@ impl HashSet { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashSet`], for example with /// [`with_capacity_and_hasher_in`](HashSet::with_capacity_and_hasher_in) method. /// @@ -444,7 +444,7 @@ impl HashSet { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashSet`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for @@ -482,7 +482,7 @@ impl HashSet { /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashSet`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for @@ -531,7 +531,7 @@ where /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashSet`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for @@ -569,7 +569,7 @@ where /// The `hash_builder` normally use a fixed key by default and that does /// not allow the `HashSet` to be protected against attacks such as [`HashDoS`]. /// Users who require HashDoS resistance should explicitly use - /// [`ahash::RandomState`] or [`std::collections::hash_map::RandomState`] + /// [`std::collections::hash_map::RandomState`] /// as the hasher when creating a [`HashSet`]. /// /// The `hash_builder` passed should implement the [`BuildHasher`] trait for