Skip to content

Commit

Permalink
Use hasher to compute resolution hash (#5495)
Browse files Browse the repository at this point in the history
## Summary

Addressing one TODO. This should be more efficient.
  • Loading branch information
charliermarsh committed Jul 26, 2024
1 parent e8d7c0c commit 561625e
Show file tree
Hide file tree
Showing 18 changed files with 78 additions and 69 deletions.
34 changes: 23 additions & 11 deletions crates/cache-key/src/digest.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,34 @@
use std::hash::Hasher;

use crate::cache_key::{CacheKey, CacheKeyHasher};
use seahash::SeaHasher;
use std::hash::{Hash, Hasher};

/// Compute a hex string hash of a `CacheKey` object.
///
/// The value returned by [`digest`] should be stable across releases and platforms.
pub fn digest<H: CacheKey>(hashable: &H) -> String {
/// The value returned by [`cache_digest`] should be stable across releases and platforms.
pub fn cache_digest<H: CacheKey>(hashable: &H) -> String {
/// Compute a u64 hash of a [`CacheKey`] object.
fn cache_key_u64<H: CacheKey>(hashable: &H) -> u64 {
let mut hasher = CacheKeyHasher::new();
hashable.cache_key(&mut hasher);
hasher.finish()
}

to_hex(cache_key_u64(hashable))
}

/// Compute a hex string hash of a hashable object.
pub fn hash_digest<H: Hash>(hashable: &H) -> String {
/// Compute a u64 hash of a hashable object.
fn hash_u64<H: Hash>(hashable: &H) -> u64 {
let mut hasher = SeaHasher::new();
hashable.hash(&mut hasher);
hasher.finish()
}

to_hex(hash_u64(hashable))
}

/// Convert a u64 to a hex string.
fn to_hex(num: u64) -> String {
hex::encode(num.to_le_bytes())
}

/// Compute a u64 hash of a [`CacheKey`] object.
fn cache_key_u64<H: CacheKey>(hashable: &H) -> u64 {
let mut hasher = CacheKeyHasher::new();
hashable.cache_key(&mut hasher);
hasher.finish()
}
2 changes: 1 addition & 1 deletion crates/cache-key/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pub use cache_key::{CacheKey, CacheKeyHasher};
pub use canonical_url::{CanonicalUrl, RepositoryUrl};
pub use digest::digest;
pub use digest::{cache_digest, hash_digest};

mod cache_key;
mod canonical_url;
Expand Down
1 change: 1 addition & 0 deletions crates/distribution-types/src/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::{InstalledMetadata, InstalledVersion, Name};

/// A distribution which is either installable, is a wheel in our cache or is already installed.
#[derive(Debug, Clone)]
#[allow(clippy::large_enum_variant)]
pub enum LocalDist {
Cached(CachedDist),
Installed(InstalledDist),
Expand Down
11 changes: 6 additions & 5 deletions crates/distribution-types/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub enum FileConversionError {

/// Internal analog to [`pypi_types::File`].
#[derive(
Debug, Clone, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
Debug, Clone, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
)]
#[archive(check_bytes)]
#[archive_attr(derive(Debug))]
Expand Down Expand Up @@ -67,7 +67,7 @@ impl File {

/// While a registry file is generally a remote URL, it can also be a file if it comes from a directory flat indexes.
#[derive(
Debug, Clone, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
Debug, Clone, Hash, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
)]
#[archive(check_bytes)]
#[archive_attr(derive(Debug))]
Expand Down Expand Up @@ -147,13 +147,14 @@ impl Display for FileLocation {
#[derive(
Debug,
Clone,
PartialEq,
Eq,
Hash,
Serialize,
Deserialize,
rkyv::Archive,
rkyv::Deserialize,
rkyv::Serialize,
PartialEq,
Eq,
)]
#[archive(check_bytes)]
#[archive_attr(derive(Debug))]
Expand Down Expand Up @@ -185,7 +186,7 @@ impl From<UrlString> for String {
}
}

impl fmt::Display for UrlString {
impl Display for UrlString {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
Expand Down
12 changes: 6 additions & 6 deletions crates/distribution-types/src/installed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use uv_normalize::PackageName;
use crate::{DistributionMetadata, InstalledMetadata, InstalledVersion, Name, VersionOrUrlRef};

/// A built distribution (wheel) that is installed in a virtual environment.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub enum InstalledDist {
/// The distribution was derived from a registry, like `PyPI`.
Registry(InstalledRegistryDist),
Expand All @@ -30,14 +30,14 @@ pub enum InstalledDist {
LegacyEditable(InstalledLegacyEditable),
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct InstalledRegistryDist {
pub name: PackageName,
pub version: Version,
pub path: PathBuf,
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct InstalledDirectUrlDist {
pub name: PackageName,
pub version: Version,
Expand All @@ -47,21 +47,21 @@ pub struct InstalledDirectUrlDist {
pub path: PathBuf,
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct InstalledEggInfoFile {
pub name: PackageName,
pub version: Version,
pub path: PathBuf,
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct InstalledEggInfoDirectory {
pub name: PackageName,
pub version: Version,
pub path: PathBuf,
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct InstalledLegacyEditable {
pub name: PackageName,
pub version: Version,
Expand Down
24 changes: 12 additions & 12 deletions crates/distribution-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,14 @@ impl std::fmt::Display for InstalledVersion<'_> {
/// Either a built distribution, a wheel, or a source distribution that exists at some location.
///
/// The location can be an index, URL or path (wheel), or index, URL, path or Git repository (source distribution).
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub enum Dist {
Built(BuiltDist),
Source(SourceDist),
}

/// A wheel, with its three possible origins (index, url, path)
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
#[allow(clippy::large_enum_variant)]
pub enum BuiltDist {
Registry(RegistryBuiltDist),
Expand All @@ -142,7 +142,7 @@ pub enum BuiltDist {
}

/// A source distribution, with its possible origins (index, url, path, git)
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
#[allow(clippy::large_enum_variant)]
pub enum SourceDist {
Registry(RegistrySourceDist),
Expand All @@ -153,15 +153,15 @@ pub enum SourceDist {
}

/// A built distribution (wheel) that exists in a registry, like `PyPI`.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct RegistryBuiltWheel {
pub filename: WheelFilename,
pub file: Box<File>,
pub index: IndexUrl,
}

/// A built distribution (wheel) that exists in a registry, like `PyPI`.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct RegistryBuiltDist {
/// All wheels associated with this distribution. It is guaranteed
/// that there is at least one wheel.
Expand Down Expand Up @@ -191,7 +191,7 @@ pub struct RegistryBuiltDist {
}

/// A built distribution (wheel) that exists at an arbitrary URL.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct DirectUrlBuiltDist {
/// We require that wheel urls end in the full wheel filename, e.g.
/// `https://example.org/packages/flask-3.0.0-py3-none-any.whl`
Expand All @@ -203,7 +203,7 @@ pub struct DirectUrlBuiltDist {
}

/// A built distribution (wheel) that exists in a local directory.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct PathBuiltDist {
pub filename: WheelFilename,
/// The path to the wheel.
Expand All @@ -213,7 +213,7 @@ pub struct PathBuiltDist {
}

/// A source distribution that exists in a registry, like `PyPI`.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct RegistrySourceDist {
pub name: PackageName,
pub version: Version,
Expand All @@ -230,7 +230,7 @@ pub struct RegistrySourceDist {
}

/// A source distribution that exists at an arbitrary URL.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct DirectUrlSourceDist {
/// Unlike [`DirectUrlBuiltDist`], we can't require a full filename with a version here, people
/// like using e.g. `foo @ https://github.com/org/repo/archive/master.zip`
Expand All @@ -244,7 +244,7 @@ pub struct DirectUrlSourceDist {
}

/// A source distribution that exists in a Git repository.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct GitSourceDist {
pub name: PackageName,
/// The URL without the revision and subdirectory fragment.
Expand All @@ -256,7 +256,7 @@ pub struct GitSourceDist {
}

/// A source distribution that exists in a local archive (e.g., a `.tar.gz` file).
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct PathSourceDist {
pub name: PackageName,
/// The resolved, absolute path to the distribution which we use for installing.
Expand All @@ -270,7 +270,7 @@ pub struct PathSourceDist {
}

/// A source distribution that exists in a local directory.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub struct DirectorySourceDist {
pub name: PackageName,
/// The resolved, absolute path to the distribution which we use for installing.
Expand Down
2 changes: 1 addition & 1 deletion crates/distribution-types/src/resolution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ impl Resolution {
}
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
pub enum ResolutionDiagnostic {
MissingExtra {
/// The distribution that was requested with a non-existent extra. For example,
Expand Down
3 changes: 2 additions & 1 deletion crates/distribution-types/src/resolved.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ use crate::{
/// A distribution that can be used for resolution and installation.
///
/// Either an already-installed distribution or a distribution that can be installed.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Hash)]
#[allow(clippy::large_enum_variant)]
pub enum ResolvedDist {
Installed(InstalledDist),
Installable(Dist),
Expand Down
14 changes: 7 additions & 7 deletions crates/pypi-types/src/direct_url.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::collections::HashMap;
use std::collections::BTreeMap;
use std::path::PathBuf;

use serde::{Deserialize, Serialize};
Expand All @@ -7,7 +7,7 @@ use url::Url;
/// Metadata for a distribution that was installed via a direct URL.
///
/// See: <https://packaging.python.org/en/latest/specifications/direct-url-data-structure/>
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", untagged)]
pub enum DirectUrl {
/// The direct URL is a local directory. For example:
Expand Down Expand Up @@ -41,23 +41,23 @@ pub enum DirectUrl {
},
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct DirInfo {
#[serde(skip_serializing_if = "Option::is_none")]
pub editable: Option<bool>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct ArchiveInfo {
#[serde(skip_serializing_if = "Option::is_none")]
pub hash: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub hashes: Option<HashMap<String, String>>,
pub hashes: Option<BTreeMap<String, String>>,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub struct VcsInfo {
pub vcs: VcsKind,
Expand All @@ -67,7 +67,7 @@ pub struct VcsInfo {
pub requested_revision: Option<String>,
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum VcsKind {
Git,
Expand Down
1 change: 1 addition & 0 deletions crates/pypi-types/src/simple_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ impl CoreMetadata {
Clone,
PartialEq,
Eq,
Hash,
Serialize,
Deserialize,
rkyv::Archive,
Expand Down
12 changes: 6 additions & 6 deletions crates/uv-cache/src/wheel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::path::{Path, PathBuf};

use url::Url;

use cache_key::{digest, CanonicalUrl};
use cache_key::{cache_digest, CanonicalUrl};
use distribution_types::IndexUrl;

/// Cache wheels and their metadata, both from remote wheels and built from source distributions.
Expand Down Expand Up @@ -30,19 +30,19 @@ impl<'a> WheelCache<'a> {
WheelCache::Index(IndexUrl::Pypi(_)) => WheelCacheKind::Pypi.root(),
WheelCache::Index(url) => WheelCacheKind::Index
.root()
.join(digest(&CanonicalUrl::new(url))),
.join(cache_digest(&CanonicalUrl::new(url))),
WheelCache::Url(url) => WheelCacheKind::Url
.root()
.join(digest(&CanonicalUrl::new(url))),
.join(cache_digest(&CanonicalUrl::new(url))),
WheelCache::Path(url) => WheelCacheKind::Path
.root()
.join(digest(&CanonicalUrl::new(url))),
.join(cache_digest(&CanonicalUrl::new(url))),
WheelCache::Editable(url) => WheelCacheKind::Editable
.root()
.join(digest(&CanonicalUrl::new(url))),
.join(cache_digest(&CanonicalUrl::new(url))),
WheelCache::Git(url, sha) => WheelCacheKind::Git
.root()
.join(digest(&CanonicalUrl::new(url)))
.join(cache_digest(&CanonicalUrl::new(url)))
.join(sha),
}
}
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-client/src/flat_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ impl<'a> FlatIndexClient<'a> {
let cache_entry = self.cache.entry(
CacheBucket::FlatIndex,
"html",
format!("{}.msgpack", cache_key::digest(&url.to_string())),
format!("{}.msgpack", cache_key::cache_digest(&url.to_string())),
);
let cache_control = match self.client.connectivity() {
Connectivity::Online => CacheControl::from(
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-git/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ impl GitResolver {
fs::create_dir_all(&lock_dir).await?;
let repository_url = RepositoryUrl::new(url.repository());
let _lock = LockedFile::acquire(
lock_dir.join(cache_key::digest(&repository_url)),
lock_dir.join(cache_key::cache_digest(&repository_url)),
&repository_url,
)?;

Expand Down
Loading

0 comments on commit 561625e

Please sign in to comment.