Skip to content

Commit

Permalink
add crate to calculate prehashes (#8262)
Browse files Browse the repository at this point in the history
### Description

We have a few cases where certain keys are repeatedly inserted into
hashmaps. In these situations we can prehash items. This crate will do
that.

### Testing Instructions

There is a basic doctest to demonstrate the functionality. The code is
pretty straight forward.
  • Loading branch information
arlyon authored Jun 3, 2024
1 parent 226101f commit 13aba96
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 0 deletions.
4 changes: 4 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ members = [
"crates/node-file-trace",
"crates/tower-uds",
"crates/turbo-tasks*",
"crates/turbo-prehash",
"crates/turbopack*",
"crates/turborepo*",
"packages/turbo-repository/rust",
Expand Down Expand Up @@ -128,6 +129,7 @@ signposter = { path = "crates/turbo-tasks-signposter" }
signposter-sys = { path = "crates/turbo-tasks-signposter-sys" }
tracing-signpost = { path = "crates/turbo-tasks-tracing-signpost" }
swc-ast-explorer = { path = "crates/turbopack-swc-ast-explorer" }
turbo-prehash = { path = "crates/turbo-prehash" }
turbo-tasks-malloc = { path = "crates/turbo-tasks-malloc", default-features = false }
turbo-tasks = { path = "crates/turbo-tasks" }
turbo-tasks-build = { path = "crates/turbo-tasks-build" }
Expand Down
10 changes: 10 additions & 0 deletions crates/turbo-prehash/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[package]
name = "turbo-prehash"
version = "0.1.0"
edition = "2021"
license = "MPL-2.0"

[dependencies]

[lints]
workspace = true
145 changes: 145 additions & 0 deletions crates/turbo-prehash/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
//! turbo-prehash
//!
//! A small wrapper around `std::hash::Hasher` that allows you to pre-hash a
//! value before hashing it.
//!
//! This is useful for when you want to hash a value that is expensive to
//! compute (e.g. a large string) but you want to avoid re-hashing it every
//! time.
//!
//! # Example
//!
//! ```
//! use turbo_prehash::{BuildHasherExt, PreHashed};
//! use std::collections::HashMap;
//! use std::hash::{BuildHasherDefault, RandomState, Hash};
//!
//! /// hash a key, returning a prehashed value
//! fn hash_key<T: Hash>(key: T) -> PreHashed<T> {
//! RandomState::new().prehash(key)
//! }
//!
//! // create hashmap to hold pre-hashed values
//! let mut map: HashMap<PreHashed<String>, String> = Default::default();
//!
//! // insert a prehashed value
//! let hashed_key = hash_key("hello".to_string());
//! map.insert(hashed_key.clone(), "world".to_string());
//!
//! // get the value
//! assert_eq!(map.get(&hashed_key), Some(&"world".to_string()));
//! ```

use std::{
fmt,
hash::{BuildHasher, Hash, Hasher},
ops::Deref,
};

/// A wrapper type that hashes some `inner` on creation, implementing [Hash]
/// by simply returning the pre-computed hash.
#[derive(Copy, Debug, Clone)]
pub struct PreHashed<I, H = u64> {
hash: H,
inner: I,
}

impl<I, H> PreHashed<I, H> {
/// Create a new [PreHashed] value with the given hash and inner value.
///
/// SAFETY: The hash must be a valid hash of the inner value.
pub fn new(hash: H, inner: I) -> Self {
Self { hash, inner }
}

/// Split the [PreHashed] value into its hash and inner value.
pub fn into_parts(self) -> (H, I) {
(self.hash, self.inner)
}

fn inner(&self) -> &I {
&self.inner
}
}

impl<I: Hash> PreHashed<I, u64> {
/// Create a new [PreHashed] value from a [BuildHasher].
fn new_from_builder<B: BuildHasher>(hasher: &B, inner: I) -> Self {
Self::new(hasher.hash_one(&inner), inner)
}
}

impl<I> Deref for PreHashed<I> {
type Target = I;

fn deref(&self) -> &Self::Target {
self.inner()
}
}

impl<I, H> AsRef<I> for PreHashed<I, H> {
fn as_ref(&self) -> &I {
self.inner()
}
}

impl<I, H: Hash> Hash for PreHashed<I, H> {
fn hash<S: Hasher>(&self, state: &mut S) {
self.hash.hash(state)
}
}

impl<I: Eq, H> Eq for PreHashed<I, H> {}

impl<I: PartialEq, H> PartialEq for PreHashed<I, H> {
// note: we compare the values, not the hashes
fn eq(&self, other: &Self) -> bool {
self.inner.eq(&other.inner)
}
}

impl<I: fmt::Display, H> fmt::Display for PreHashed<I, H> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.inner.fmt(f)
}
}

/// An implementer of [Hash] that simply returns the pre-computed hash.
#[derive(Copy, Clone, Debug, Default)]
pub struct PassThroughHash(u64);

impl PassThroughHash {
pub fn new() -> Self {
Default::default()
}
}

impl Hasher for PassThroughHash {
fn write(&mut self, _bytes: &[u8]) {
unimplemented!("do not use")
}

fn write_u64(&mut self, i: u64) {
self.0 = i;
}

fn finish(&self) -> u64 {
self.0
}
}

/// An extension trait for [BuildHasher] that provides the
/// [BuildHasherExt::prehash] method.
pub trait BuildHasherExt: BuildHasher {
type Hash;

fn prehash<T: Hash>(&self, value: T) -> PreHashed<T, Self::Hash>;
}

impl<B: BuildHasher> BuildHasherExt for B {
type Hash = u64;

fn prehash<T: Hash>(&self, value: T) -> PreHashed<T, Self::Hash> {
PreHashed::new_from_builder(self, value)
}
}

0 comments on commit 13aba96

Please sign in to comment.