From 4a4838d0ac56f34174811914bb2761a67bbb4451 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 5 Apr 2024 00:18:37 -0400 Subject: [PATCH] Add notes about hash generation Add basic hash More notes Looking at cache... --- Cargo.lock | 17 + Cargo.toml | 1 + constraints.txt | 4 - crates/distribution-types/src/cached.rs | 48 +- crates/distribution-types/src/hashes.rs | 23 + crates/distribution-types/src/lib.rs | 2 + crates/pypi-types/src/simple_json.rs | 8 - crates/uv-client/src/cached_client.rs | 35 + crates/uv-client/src/flat_index.rs | 4 +- crates/uv-dev/src/resolve_cli.rs | 4 +- crates/uv-dispatch/src/lib.rs | 10 +- crates/uv-distribution/Cargo.toml | 2 + crates/uv-distribution/src/archive.rs | 36 + .../src/distribution_database.rs | 281 ++++-- crates/uv-distribution/src/download.rs | 19 +- crates/uv-distribution/src/error.rs | 45 + .../src/index/built_wheel_index.rs | 41 +- .../uv-distribution/src/index/cached_wheel.rs | 70 +- .../src/index/registry_wheel_index.rs | 78 +- crates/uv-distribution/src/lib.rs | 2 + .../src/source/built_wheel_metadata.rs | 11 + crates/uv-distribution/src/source/mod.rs | 292 +++++-- crates/uv-distribution/src/source/revision.rs | 40 +- crates/uv-extract/Cargo.toml | 4 + crates/uv-extract/src/hash.rs | 138 +++ crates/uv-extract/src/lib.rs | 1 + crates/uv-extract/src/stream.rs | 3 +- crates/uv-installer/Cargo.toml | 1 + crates/uv-installer/src/downloader.rs | 28 +- crates/uv-installer/src/plan.rs | 116 ++- crates/uv-requirements/src/lookahead.rs | 12 +- crates/uv-requirements/src/source_tree.rs | 17 +- crates/uv-requirements/src/unnamed.rs | 18 +- crates/uv-resolver/src/error.rs | 3 + crates/uv-resolver/src/hash_checking_mode.rs | 15 + crates/uv-resolver/src/hashes.rs | 71 -- crates/uv-resolver/src/lib.rs | 2 + crates/uv-resolver/src/manifest.rs | 10 +- crates/uv-resolver/src/options.rs | 19 +- crates/uv-resolver/src/resolution.rs | 3 + crates/uv-resolver/src/resolver/mod.rs | 32 +- crates/uv-resolver/src/resolver/provider.rs | 9 +- crates/uv-resolver/src/version_map.rs | 6 +- crates/uv-resolver/tests/resolver.rs | 5 - crates/uv-types/Cargo.toml | 1 + crates/uv-types/src/hashes.rs | 53 +- crates/uv/src/commands/pip_compile.rs | 29 +- crates/uv/src/commands/pip_install.rs | 68 +- crates/uv/src/commands/pip_sync.rs | 52 +- crates/uv/src/commands/venv.rs | 4 +- crates/uv/tests/pip_install.rs | 59 ++ crates/uv/tests/pip_sync.rs | 798 ++++++++++++++++++ requirements.in | 2 - requirements.txt | 11 - 54 files changed, 2222 insertions(+), 441 deletions(-) delete mode 100644 constraints.txt create mode 100644 crates/distribution-types/src/hashes.rs create mode 100644 crates/uv-distribution/src/archive.rs create mode 100644 crates/uv-extract/src/hash.rs create mode 100644 crates/uv-resolver/src/hash_checking_mode.rs delete mode 100644 crates/uv-resolver/src/hashes.rs delete mode 100644 requirements.in delete mode 100644 requirements.txt diff --git a/Cargo.lock b/Cargo.lock index afc71c0fb3e0c..16848f9f101aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2071,6 +2071,16 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + [[package]] name = "memchr" version = "2.7.1" @@ -4561,6 +4571,7 @@ dependencies = [ "fs-err", "futures", "install-wheel-rs", + "md-5", "nanoid", "once_cell", "pep440_rs", @@ -4572,6 +4583,7 @@ dependencies = [ "rmp-serde", "rustc-hash", "serde", + "sha2", "tempfile", "thiserror", "tokio", @@ -4597,8 +4609,11 @@ dependencies = [ "async_zip", "fs-err", "futures", + "md-5", + "pypi-types", "rayon", "rustc-hash", + "sha2", "thiserror", "tokio", "tokio-tar", @@ -4663,6 +4678,7 @@ dependencies = [ "pypi-types", "rayon", "requirements-txt", + "rmp-serde", "rustc-hash", "serde", "tempfile", @@ -4811,6 +4827,7 @@ dependencies = [ "distribution-types", "itertools 0.12.1", "once-map", + "pep440_rs", "pep508_rs", "pypi-types", "rustc-hash", diff --git a/Cargo.toml b/Cargo.toml index 5e4a53df1573c..496d2ee135c25 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,6 +93,7 @@ indoc = { version = "2.0.4" } itertools = { version = "0.12.1" } junction = { version = "1.0.0" } mailparse = { version = "0.14.0" } +md-5 = { version = "0.10.6" } miette = { version = "7.2.0" } nanoid = { version = "0.4.0" } once_cell = { version = "1.19.0" } diff --git a/constraints.txt b/constraints.txt deleted file mode 100644 index 3285ee87ac80f..0000000000000 --- a/constraints.txt +++ /dev/null @@ -1,4 +0,0 @@ -click==8.1.7 \ - --hash=sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28 \ - --hash=sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de - # via flask diff --git a/crates/distribution-types/src/cached.rs b/crates/distribution-types/src/cached.rs index 2a0da761d2ed1..af986aeb329cc 100644 --- a/crates/distribution-types/src/cached.rs +++ b/crates/distribution-types/src/cached.rs @@ -4,9 +4,11 @@ use anyhow::Result; use distribution_filename::WheelFilename; use pep508_rs::VerbatimUrl; +use pypi_types::HashDigest; use uv_normalize::PackageName; use crate::direct_url::{DirectUrl, LocalFileUrl}; +use crate::hashes::Hashed; use crate::{ BuiltDist, Dist, DistributionMetadata, InstalledMetadata, InstalledVersion, Name, SourceDist, VersionOrUrl, @@ -25,6 +27,7 @@ pub enum CachedDist { pub struct CachedRegistryDist { pub filename: WheelFilename, pub path: PathBuf, + pub hashes: Vec, } #[derive(Debug, Clone)] @@ -33,45 +36,60 @@ pub struct CachedDirectUrlDist { pub url: VerbatimUrl, pub path: PathBuf, pub editable: bool, + pub hashes: Vec, } impl CachedDist { /// Initialize a [`CachedDist`] from a [`Dist`]. - pub fn from_remote(remote: Dist, filename: WheelFilename, path: PathBuf) -> Self { + pub fn from_remote( + remote: Dist, + filename: WheelFilename, + hashes: Vec, + path: PathBuf, + ) -> Self { match remote { - Dist::Built(BuiltDist::Registry(_dist)) => { - Self::Registry(CachedRegistryDist { filename, path }) - } + Dist::Built(BuiltDist::Registry(_dist)) => Self::Registry(CachedRegistryDist { + filename, + path, + hashes, + }), Dist::Built(BuiltDist::DirectUrl(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: false, }), Dist::Built(BuiltDist::Path(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: false, }), - Dist::Source(SourceDist::Registry(_dist)) => { - Self::Registry(CachedRegistryDist { filename, path }) - } + Dist::Source(SourceDist::Registry(_dist)) => Self::Registry(CachedRegistryDist { + filename, + path, + hashes, + }), Dist::Source(SourceDist::DirectUrl(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: false, }), Dist::Source(SourceDist::Git(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: false, }), Dist::Source(SourceDist::Path(dist)) => Self::Url(CachedDirectUrlDist { filename, url: dist.url, + hashes, path, editable: dist.editable, }), @@ -104,6 +122,7 @@ impl CachedDist { } } + /// Returns `true` if the distribution is editable. pub fn editable(&self) -> bool { match self { Self::Registry(_) => false, @@ -111,6 +130,7 @@ impl CachedDist { } } + /// Returns the [`WheelFilename`] of the distribution. pub fn filename(&self) -> &WheelFilename { match self { Self::Registry(dist) => &dist.filename, @@ -119,12 +139,24 @@ impl CachedDist { } } +impl Hashed for CachedRegistryDist { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } +} + impl CachedDirectUrlDist { /// Initialize a [`CachedDirectUrlDist`] from a [`WheelFilename`], [`url::Url`], and [`Path`]. - pub fn from_url(filename: WheelFilename, url: VerbatimUrl, path: PathBuf) -> Self { + pub fn from_url( + filename: WheelFilename, + url: VerbatimUrl, + hashes: Vec, + path: PathBuf, + ) -> Self { Self { filename, url, + hashes, path, editable: false, } diff --git a/crates/distribution-types/src/hashes.rs b/crates/distribution-types/src/hashes.rs new file mode 100644 index 0000000000000..ae672d60ea72f --- /dev/null +++ b/crates/distribution-types/src/hashes.rs @@ -0,0 +1,23 @@ +use pypi_types::HashDigest; + +pub trait Hashed { + /// Return the [`HashDigest`]s for the archive. + fn hashes(&self) -> &[HashDigest]; + + /// Returns `true` if the archive satisfies the given hashes. + fn satisfies(&self, hashes: &[HashDigest]) -> bool { + if hashes.is_empty() { + true + } else { + self.hashes().iter().any(|hash| hashes.contains(hash)) + } + } + + /// Returns `true` if the archive includes a hash for at least one of the given algorithms. + fn has_digests(&self, hashes: &[HashDigest]) -> bool { + hashes + .iter() + .map(HashDigest::algorithm) + .any(|algorithm| self.hashes().iter().any(|hash| hash.algorithm == algorithm)) + } +} diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 4b84e6e38a704..a39a2ff654b70 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -51,6 +51,7 @@ pub use crate::direct_url::*; pub use crate::editable::*; pub use crate::error::*; pub use crate::file::*; +pub use crate::hashes::*; pub use crate::id::*; pub use crate::index_url::*; pub use crate::installed::*; @@ -66,6 +67,7 @@ mod direct_url; mod editable; mod error; mod file; +mod hashes; mod id; mod index_url; mod installed; diff --git a/crates/pypi-types/src/simple_json.rs b/crates/pypi-types/src/simple_json.rs index 2cebf0a8fb11b..199b3c7c50f13 100644 --- a/crates/pypi-types/src/simple_json.rs +++ b/crates/pypi-types/src/simple_json.rs @@ -159,14 +159,6 @@ impl Hashes { } digests } - - /// Returns `true` if the hash is empty. - pub fn is_empty(&self) -> bool { - self.sha512.is_none() - && self.sha384.is_none() - && self.sha256.is_none() - && self.md5.is_none() - } } impl FromStr for Hashes { diff --git a/crates/uv-client/src/cached_client.rs b/crates/uv-client/src/cached_client.rs index a80726e8bb958..d406ac376fbac 100644 --- a/crates/uv-client/src/cached_client.rs +++ b/crates/uv-client/src/cached_client.rs @@ -170,6 +170,41 @@ impl CachedClient { self.0.clone() } + pub async fn get_cached( + cache_entry: &CacheEntry, + ) -> Option { + let cached = Self::read_cache(cache_entry).await?; + Payload::from_aligned_bytes(cached.data).ok() + } + + #[instrument(skip_all)] + pub async fn skip_cache< + Payload: Serialize + DeserializeOwned + Send + 'static, + CallBackError, + Callback, + CallbackReturn, + >( + &self, + req: Request, + cache_entry: &CacheEntry, + response_callback: Callback, + ) -> Result> + where + Callback: FnOnce(Response) -> CallbackReturn + Send, + CallbackReturn: Future> + Send, + { + let (response, cache_policy) = self.fresh_request(req).await?; + + let payload = self + .run_response_callback(cache_entry, cache_policy, response, move |resp| async { + let payload = response_callback(resp).await?; + Ok(SerdeCacheable { inner: payload }) + }) + .await?; + + Ok(payload) + } + /// Make a cached request with a custom response transformation /// while using serde to (de)serialize cached responses. /// diff --git a/crates/uv-client/src/flat_index.rs b/crates/uv-client/src/flat_index.rs index 3ca8c64d86e6e..ace3da1de002c 100644 --- a/crates/uv-client/src/flat_index.rs +++ b/crates/uv-client/src/flat_index.rs @@ -5,7 +5,7 @@ use std::path::PathBuf; use futures::{FutureExt, StreamExt}; use reqwest::Response; use rustc_hash::FxHashMap; -use tracing::{debug, info_span, instrument, Instrument, warn}; +use tracing::{debug, info_span, instrument, warn, Instrument}; use url::Url; use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; @@ -23,9 +23,9 @@ use uv_configuration::{NoBinary, NoBuild}; use uv_normalize::PackageName; use uv_types::RequiredHashes; -use crate::{Connectivity, Error, ErrorKind, RegistryClient}; use crate::cached_client::{CacheControl, CachedClientError}; use crate::html::SimpleHtml; +use crate::{Connectivity, Error, ErrorKind, RegistryClient}; #[derive(Debug, thiserror::Error)] pub enum FlatIndexError { diff --git a/crates/uv-dev/src/resolve_cli.rs b/crates/uv-dev/src/resolve_cli.rs index 6e9c09a25a6df..658717f914450 100644 --- a/crates/uv-dev/src/resolve_cli.rs +++ b/crates/uv-dev/src/resolve_cli.rs @@ -58,6 +58,7 @@ pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> Result<()> { let index_locations = IndexLocations::new(args.index_url, args.extra_index_url, args.find_links, false); let index = InMemoryIndex::default(); + let hashes = RequiredHashes::default(); let in_flight = InFlight::default(); let no_build = if args.no_build { NoBuild::All @@ -100,7 +101,7 @@ pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> Result<()> { // Copied from `BuildDispatch` let tags = venv.interpreter().tags()?; let resolver = Resolver::new( - Manifest::simple(args.requirements.clone(), RequiredHashes::default()), + Manifest::simple(args.requirements.clone()), Options::default(), venv.interpreter().markers(), venv.interpreter(), @@ -108,6 +109,7 @@ pub(crate) async fn resolve_cli(args: ResolveCliArgs) -> Result<()> { &client, &flat_index, &index, + &hashes, &build_dispatch, &site_packages, )?; diff --git a/crates/uv-dispatch/src/lib.rs b/crates/uv-dispatch/src/lib.rs index 22d6dcb42b844..d3b8219849724 100644 --- a/crates/uv-dispatch/src/lib.rs +++ b/crates/uv-dispatch/src/lib.rs @@ -134,8 +134,9 @@ impl<'a> BuildContext for BuildDispatch<'a> { async fn resolve<'data>(&'data self, requirements: &'data [Requirement]) -> Result { let markers = self.interpreter.markers(); let tags = self.interpreter.tags()?; + let hashes = RequiredHashes::default(); let resolver = Resolver::new( - Manifest::simple(requirements.to_vec(), RequiredHashes::default()), + Manifest::simple(requirements.to_vec()), self.options, markers, self.interpreter, @@ -143,6 +144,7 @@ impl<'a> BuildContext for BuildDispatch<'a> { self.client, self.flat_index, self.index, + &hashes, self, &EmptyInstalledPackages, )?; @@ -178,6 +180,9 @@ impl<'a> BuildContext for BuildDispatch<'a> { venv.root().display(), ); + // Don't enforce hashes for build dependencies. + let hashes = RequiredHashes::default(); + // Determine the current environment markers. let tags = self.interpreter.tags()?; @@ -194,6 +199,7 @@ impl<'a> BuildContext for BuildDispatch<'a> { site_packages, &Reinstall::None, &NoBinary::None, + &RequiredHashes::default(), self.index_locations, self.cache(), venv, @@ -222,7 +228,7 @@ impl<'a> BuildContext for BuildDispatch<'a> { vec![] } else { // TODO(konstin): Check that there is no endless recursion. - let downloader = Downloader::new(self.cache, tags, self.client, self); + let downloader = Downloader::new(self.cache, tags, &hashes, self.client, self); debug!( "Downloading and building requirement{} for build: {}", if remote.len() == 1 { "" } else { "s" }, diff --git a/crates/uv-distribution/Cargo.toml b/crates/uv-distribution/Cargo.toml index 53d2444334cac..a4814ff6da2d2 100644 --- a/crates/uv-distribution/Cargo.toml +++ b/crates/uv-distribution/Cargo.toml @@ -33,6 +33,7 @@ uv-configuration = { workspace = true } anyhow = { workspace = true } fs-err = { workspace = true } futures = { workspace = true } +md-5 = { workspace = true } nanoid = { workspace = true } once_cell = { workspace = true } reqwest = { workspace = true } @@ -40,6 +41,7 @@ reqwest-middleware = { workspace = true } rmp-serde = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } +sha2 = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } diff --git a/crates/uv-distribution/src/archive.rs b/crates/uv-distribution/src/archive.rs new file mode 100644 index 0000000000000..a53de619d0dc6 --- /dev/null +++ b/crates/uv-distribution/src/archive.rs @@ -0,0 +1,36 @@ +use std::path::PathBuf; + +use distribution_types::Hashed; +use pypi_types::HashDigest; + +/// An archive (unzipped wheel) that exists in the local cache. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct Archive { + /// The path to the archive entry in the wheel's archive bucket. + pub path: PathBuf, + /// The computed hashes of the archive. + pub hashes: Vec, +} + +impl Archive { + /// Create a new [`Archive`] with the given path and hashes. + pub(crate) fn new(path: PathBuf, hashes: Vec) -> Self { + Self { path, hashes } + } + + /// Return the path to the archive entry in the wheel's archive bucket. + pub fn path(&self) -> &PathBuf { + &self.path + } + + /// Return the computed hashes of the archive. + pub fn hashes(&self) -> &[HashDigest] { + &self.hashes + } +} + +impl Hashed for Archive { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } +} diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index 6ece58e51858e..67ae1059b39ce 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -4,23 +4,26 @@ use std::sync::Arc; use futures::{FutureExt, TryStreamExt}; use tempfile::TempDir; -use tokio::io::AsyncSeekExt; +use tokio::io::{AsyncReadExt, AsyncSeekExt}; use tokio_util::compat::FuturesAsyncReadCompatExt; use tracing::{info_span, instrument, warn, Instrument}; use url::Url; use distribution_filename::WheelFilename; use distribution_types::{ - BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name, SourceDist, + BuildableSource, BuiltDist, Dist, FileLocation, Hashed, IndexLocations, LocalEditable, Name, + SourceDist, }; use platform_tags::Tags; -use pypi_types::Metadata23; +use pypi_types::{HashDigest, Metadata23}; use uv_cache::{ArchiveTimestamp, CacheBucket, CacheEntry, CachedByTimestamp, WheelCache}; use uv_client::{CacheControl, CachedClientError, Connectivity, RegistryClient}; use uv_configuration::{NoBinary, NoBuild}; +use uv_extract::hash::Hasher; use uv_fs::write_atomic; use uv_types::BuildContext; +use crate::archive::Archive; use crate::locks::Locks; use crate::{Error, LocalWheel, Reporter, SourceDistributionBuilder}; @@ -79,28 +82,38 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> /// Either fetch the wheel or fetch and build the source distribution /// - /// If `no_remote_wheel` is set, the wheel will be built from a source distribution - /// even if compatible pre-built wheels are available. + /// Returns a wheel that's compliant with the given platform tags. + /// + /// While hashes will be generated in some cases, hash-checking is only enforced for source + /// distributions, and should be enforced by the caller for wheels. #[instrument(skip_all, fields(%dist))] - pub async fn get_or_build_wheel(&self, dist: &Dist, tags: &Tags) -> Result { + pub async fn get_or_build_wheel( + &self, + dist: &Dist, + tags: &Tags, + hashes: &[HashDigest], + ) -> Result { match dist { - Dist::Built(built) => self.get_wheel(built).await, - Dist::Source(source) => self.build_wheel(source, tags).await, + Dist::Built(built) => self.get_wheel(built, hashes).await, + Dist::Source(source) => self.build_wheel(source, tags, hashes).await, } } /// Either fetch the only wheel metadata (directly from the index or with range requests) or /// fetch and build the source distribution. /// - /// Returns the [`Metadata23`], along with a "precise" URL for the source distribution, if - /// possible. For example, given a Git dependency with a reference to a branch or tag, return a - /// URL with a precise reference to the current commit of that branch or tag. + /// While hashes will be generated in some cases, hash-checking is only enforced for source + /// distributions, and should be enforced by the caller for wheels. #[instrument(skip_all, fields(%dist))] - pub async fn get_or_build_wheel_metadata(&self, dist: &Dist) -> Result { + pub async fn get_or_build_wheel_metadata( + &self, + dist: &Dist, + hashes: &[HashDigest], + ) -> Result { match dist { - Dist::Built(built) => self.get_wheel_metadata(built).await, + Dist::Built(built) => self.get_wheel_metadata(built, hashes).await, Dist::Source(source) => { - self.build_wheel_metadata(&BuildableSource::Dist(source)) + self.build_wheel_metadata(&BuildableSource::Dist(source), hashes) .await } } @@ -118,7 +131,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .build_editable(editable, editable_wheel_dir) .await?; - // Unzip. + // Unzip into the editable wheel directory. let path = editable_wheel_dir.join(&disk_filename); let target = editable_wheel_dir.join(cache_key::digest(&editable.path)); let archive = self.unzip_wheel(&path, &target).await?; @@ -126,13 +139,21 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> dist, filename, archive, + hashes: vec![], }; Ok((wheel, metadata)) } /// Fetch a wheel from the cache or download it from the index. - async fn get_wheel(&self, dist: &BuiltDist) -> Result { + /// + /// While hashes will be generated in some cases, hash-checking is _not_ enforced and should + /// instead be enforced by the caller. + async fn get_wheel( + &self, + dist: &BuiltDist, + hashes: &[HashDigest], + ) -> Result { let no_binary = match self.build_context.no_binary() { NoBinary::None => false, NoBinary::All => true, @@ -157,8 +178,9 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> WheelCache::Index(&wheel.index).wheel_dir(wheel.name().as_ref()), wheel.filename.stem(), ); + return self - .load_wheel(path, &wheel.filename, cache_entry, dist) + .load_wheel(path, &wheel.filename, cache_entry, dist, hashes) .await; } }; @@ -172,12 +194,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Download and unzip. match self - .stream_wheel(url.clone(), &wheel.filename, &wheel_entry, dist) + .stream_wheel(url.clone(), &wheel.filename, &wheel_entry, dist, hashes) .await { Ok(archive) => Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: wheel.filename.clone(), }), Err(Error::Extract(err)) if err.is_http_streaming_unsupported() => { @@ -188,11 +211,12 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // If the request failed because streaming is unsupported, download the // wheel directly. let archive = self - .download_wheel(url, &wheel.filename, &wheel_entry, dist) + .download_wheel(url, &wheel.filename, &wheel_entry, dist, hashes) .await?; Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: wheel.filename.clone(), }) } @@ -210,12 +234,19 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Download and unzip. match self - .stream_wheel(wheel.url.raw().clone(), &wheel.filename, &wheel_entry, dist) + .stream_wheel( + wheel.url.raw().clone(), + &wheel.filename, + &wheel_entry, + dist, + hashes, + ) .await { Ok(archive) => Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: wheel.filename.clone(), }), Err(Error::Client(err)) if err.is_http_streaming_unsupported() => { @@ -231,11 +262,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> &wheel.filename, &wheel_entry, dist, + hashes, ) .await?; Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, + hashes: archive.hashes, filename: wheel.filename.clone(), }) } @@ -249,7 +282,8 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), wheel.filename.stem(), ); - self.load_wheel(&wheel.path, &wheel.filename, cache_entry, dist) + + self.load_wheel(&wheel.path, &wheel.filename, cache_entry, dist, hashes) .await } } @@ -257,24 +291,33 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> /// Convert a source distribution into a wheel, fetching it from the cache or building it if /// necessary. - async fn build_wheel(&self, dist: &SourceDist, tags: &Tags) -> Result { + /// + /// The returned wheel is guaranteed to come from a distribution with a matching hash, and + /// no build processes will be executed for distributions with mismatched hashes. + async fn build_wheel( + &self, + dist: &SourceDist, + tags: &Tags, + hashes: &[HashDigest], + ) -> Result { let lock = self.locks.acquire(&Dist::Source(dist.clone())).await; let _guard = lock.lock().await; let built_wheel = self .builder - .download_and_build(&BuildableSource::Dist(dist), tags) + .download_and_build(&BuildableSource::Dist(dist), tags, hashes) .boxed() .await?; // If the wheel was unzipped previously, respect it. Source distributions are - // cached under a unique build ID, so unzipped directories are never stale. + // cached under a unique revision ID, so unzipped directories are never stale. match built_wheel.target.canonicalize() { Ok(archive) => { return Ok(LocalWheel { dist: Dist::Source(dist.clone()), archive, filename: built_wheel.filename, + hashes: built_wheel.hashes, }); } Err(err) if err.kind() == io::ErrorKind::NotFound => {} @@ -287,12 +330,20 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> archive: self .unzip_wheel(&built_wheel.path, &built_wheel.target) .await?, + hashes: built_wheel.hashes, filename: built_wheel.filename, }) } /// Fetch the wheel metadata from the index, or from the cache if possible. - pub async fn get_wheel_metadata(&self, dist: &BuiltDist) -> Result { + /// + /// While hashes will be generated in some cases, hash-checking is _not_ enforced and should + /// instead be enforced by the caller. + pub async fn get_wheel_metadata( + &self, + dist: &BuiltDist, + hashes: &[HashDigest], + ) -> Result { match self.client.wheel_metadata(dist).boxed().await { Ok(metadata) => Ok(metadata), Err(err) if err.is_http_streaming_unsupported() => { @@ -300,7 +351,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // If the request failed due to an error that could be resolved by // downloading the wheel directly, try that. - let wheel = self.get_wheel(dist).await?; + let wheel = self.get_wheel(dist, hashes).await?; Ok(wheel.metadata()?) } Err(err) => Err(err.into()), @@ -308,9 +359,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } /// Build the wheel metadata for a source distribution, or fetch it from the cache if possible. + /// + /// The returned metadata is guaranteed to come from a distribution with a matching hash, and + /// no build processes will be executed for distributions with mismatched hashes. pub async fn build_wheel_metadata( &self, source: &BuildableSource<'_>, + hashes: &[HashDigest], ) -> Result { let no_build = match self.build_context.no_build() { NoBuild::All => true, @@ -330,7 +385,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> let metadata = self .builder - .download_and_build_metadata(source) + .download_and_build_metadata(source, hashes) .boxed() .await?; Ok(metadata) @@ -343,7 +398,8 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> filename: &WheelFilename, wheel_entry: &CacheEntry, dist: &BuiltDist, - ) -> Result { + hashes: &[HashDigest], + ) -> Result { // Create an entry for the HTTP cache. let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem())); @@ -354,23 +410,43 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .map_err(|err| self.handle_response_errors(err)) .into_async_read(); + // Create a hasher for each hash algorithm. + let mut hashers = hashes + .iter() + .map(HashDigest::algorithm) + .map(Hasher::from) + .collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(reader.compat(), &mut hashers); + // Download and unzip the wheel to a temporary directory. let temp_dir = tempfile::tempdir_in(self.build_context.cache().root()) .map_err(Error::CacheWrite)?; - uv_extract::stream::unzip(reader.compat(), temp_dir.path()).await?; + uv_extract::stream::unzip(&mut hasher, temp_dir.path()).await?; + + // If necessary, exhaust the reader to compute the hash. + if !hashes.is_empty() { + hasher + .read_to_end(&mut Vec::new()) + .await + .map_err(Error::HashExhaustion)?; + } // Persist the temporary directory to the directory store. - let archive = self + let path = self .build_context .cache() .persist(temp_dir.into_path(), wheel_entry.path()) .await .map_err(Error::CacheRead)?; - Ok(archive) + Ok(Archive::new( + path, + hashers.into_iter().map(HashDigest::from).collect(), + )) } .instrument(info_span!("wheel", wheel = %dist)) }; + // Fetch the archive from the cache, or download it if necessary. let req = self.request(url.clone())?; let cache_control = match self.client.connectivity() { Connectivity::Online => CacheControl::from( @@ -391,6 +467,20 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> CachedClientError::Client(err) => Error::Client(err), })?; + // If the archive is missing the required hashes, force a refresh. + let archive = if archive.has_digests(hashes) { + archive + } else { + self.client + .cached_client() + .skip_cache(self.request(url)?, &http_entry, download) + .await + .map_err(|err| match err { + CachedClientError::Callback(err) => err, + CachedClientError::Client(err) => Error::Client(err), + })? + }; + Ok(archive) } @@ -401,7 +491,8 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> filename: &WheelFilename, wheel_entry: &CacheEntry, dist: &BuiltDist, - ) -> Result { + hashes: &[HashDigest], + ) -> Result { // Create an entry for the HTTP cache. let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem())); @@ -427,16 +518,49 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> file.seek(io::SeekFrom::Start(0)) .await .map_err(Error::CacheWrite)?; - uv_extract::seek::unzip(file, temp_dir.path()).await?; + + // If no hashes are required, parallelize the unzip operation. + let hashes = if hashes.is_empty() { + let file = file.into_std().await; + tokio::task::spawn_blocking({ + let target = temp_dir.path().to_owned(); + move || -> Result<(), uv_extract::Error> { + // Unzip the wheel into a temporary directory. + uv_extract::unzip(file, &target)?; + Ok(()) + } + }) + .await??; + + vec![] + } else { + // Create a hasher for each hash algorithm. + let mut hashers = hashes + .iter() + .map(HashDigest::algorithm) + .map(Hasher::from) + .collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers); + uv_extract::stream::unzip(&mut hasher, temp_dir.path()).await?; + + // If necessary, exhaust the reader to compute the hash. + hasher + .read_to_end(&mut Vec::new()) + .await + .map_err(Error::HashExhaustion)?; + + hashers.into_iter().map(HashDigest::from).collect() + }; // Persist the temporary directory to the directory store. - let archive = self + let path = self .build_context .cache() .persist(temp_dir.into_path(), wheel_entry.path()) .await .map_err(Error::CacheRead)?; - Ok(archive) + + Ok(Archive::new(path, hashes)) } .instrument(info_span!("wheel", wheel = %dist)) }; @@ -451,7 +575,6 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> ), Connectivity::Offline => CacheControl::AllowStale, }; - let archive = self .client .cached_client() @@ -462,6 +585,20 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> CachedClientError::Client(err) => Error::Client(err), })?; + // If the archive is missing the required hashes, force a refresh. + let archive = if archive.has_digests(hashes) { + archive + } else { + self.client + .cached_client() + .skip_cache(self.request(url)?, &http_entry, download) + .await + .map_err(|err| match err { + CachedClientError::Callback(err) => err, + CachedClientError::Client(err) => Error::Client(err), + })? + }; + Ok(archive) } @@ -472,6 +609,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> filename: &WheelFilename, wheel_entry: CacheEntry, dist: &BuiltDist, + hashes: &[HashDigest], ) -> Result { // Determine the last-modified time of the wheel. let modified = ArchiveTimestamp::from_file(path).map_err(Error::CacheRead)?; @@ -481,21 +619,68 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> let archive = read_timestamped_archive(&archive_entry, modified)?; // If the file is already unzipped, and the cache is up-to-date, return it. - if let Some(archive) = archive { + if let Some(archive) = archive.filter(|archive| archive.has_digests(hashes)) { Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, filename: filename.clone(), + hashes: archive.hashes, }) - } else { + } else if hashes.is_empty() { // Otherwise, unzip the wheel. - let archive = self.unzip_wheel(path, wheel_entry.path()).await?; + let archive = Archive::new(self.unzip_wheel(path, wheel_entry.path()).await?, vec![]); + write_timestamped_archive(&archive_entry, archive.clone(), modified).await?; + + Ok(LocalWheel { + dist: Dist::Built(dist.clone()), + archive: archive.path, + filename: filename.clone(), + hashes: vec![], + }) + } else { + // If necessary, compute the hashes of the wheel. + let file = fs_err::tokio::File::open(path) + .await + .map_err(Error::CacheRead)?; + let temp_dir = tempfile::tempdir_in(self.build_context.cache().root()) + .map_err(Error::CacheWrite)?; + + // Create a hasher for each hash algorithm. + let mut hashers = hashes + .iter() + .map(HashDigest::algorithm) + .map(Hasher::from) + .collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(file, &mut hashers); + + // Unzip the wheel to a temporary directory. + uv_extract::stream::unzip(&mut hasher, temp_dir.path()).await?; + + // Exhaust the reader to compute the hash. + { + let mut buffer = Vec::new(); + hasher.read_to_end(&mut buffer).await?; + } + + // Persist the temporary directory to the directory store. + let archive = self + .build_context + .cache() + .persist(temp_dir.into_path(), wheel_entry.path()) + .await + .map_err(Error::CacheWrite)?; + + let hashes = hashers.into_iter().map(HashDigest::from).collect(); + + // Write the archive pointer to the cache. + let archive = Archive::new(archive, hashes); write_timestamped_archive(&archive_entry, archive.clone(), modified).await?; Ok(LocalWheel { dist: Dist::Built(dist.clone()), - archive, + archive: archive.path, filename: filename.clone(), + hashes: archive.hashes, }) } } @@ -549,7 +734,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> /// Write a timestamped archive path to the cache. async fn write_timestamped_archive( cache_entry: &CacheEntry, - data: PathBuf, + data: Archive, modified: ArchiveTimestamp, ) -> Result<(), Error> { write_atomic( @@ -567,10 +752,10 @@ async fn write_timestamped_archive( fn read_timestamped_archive( cache_entry: &CacheEntry, modified: ArchiveTimestamp, -) -> Result, Error> { +) -> Result, Error> { match fs_err::read(cache_entry.path()) { Ok(cached) => { - let cached = rmp_serde::from_slice::>(&cached)?; + let cached = rmp_serde::from_slice::>(&cached)?; if cached.timestamp == modified.timestamp() { return Ok(Some(cached.data)); } diff --git a/crates/uv-distribution/src/download.rs b/crates/uv-distribution/src/download.rs index b7123ff7c4382..0120cd6f792ae 100644 --- a/crates/uv-distribution/src/download.rs +++ b/crates/uv-distribution/src/download.rs @@ -1,8 +1,8 @@ use std::path::{Path, PathBuf}; use distribution_filename::WheelFilename; -use distribution_types::{CachedDist, Dist}; -use pypi_types::Metadata23; +use distribution_types::{CachedDist, Dist, Hashed}; +use pypi_types::{HashDigest, Metadata23}; use crate::Error; @@ -16,6 +16,8 @@ pub struct LocalWheel { /// The canonicalized path in the cache directory to which the wheel was downloaded. /// Typically, a directory within the archive bucket. pub(crate) archive: PathBuf, + /// The computed hashes of the wheel. + pub(crate) hashes: Vec, } impl LocalWheel { @@ -34,16 +36,27 @@ impl LocalWheel { &self.filename } + /// Return the computed hashes of the wheel. + pub fn hashes(&self) -> &[HashDigest] { + &self.hashes + } + /// Read the [`Metadata23`] from a wheel. pub fn metadata(&self) -> Result { read_flat_wheel_metadata(&self.filename, &self.archive) } } +impl Hashed for LocalWheel { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } +} + /// Convert a [`LocalWheel`] into a [`CachedDist`]. impl From for CachedDist { fn from(wheel: LocalWheel) -> CachedDist { - CachedDist::from_remote(wheel.dist, wheel.filename, wheel.archive) + CachedDist::from_remote(wheel.dist, wheel.filename, wheel.hashes, wheel.archive) } } diff --git a/crates/uv-distribution/src/error.rs b/crates/uv-distribution/src/error.rs index fe61b95dab528..50c91ad2771b3 100644 --- a/crates/uv-distribution/src/error.rs +++ b/crates/uv-distribution/src/error.rs @@ -3,6 +3,7 @@ use tokio::task::JoinError; use zip::result::ZipError; use distribution_filename::WheelFilenameError; +use pypi_types::HashDigest; use uv_client::BetterReqwestError; use uv_normalize::PackageName; @@ -78,6 +79,23 @@ pub enum Error { /// Should not occur; only seen when another task panicked. #[error("The task executor is broken, did some other task panic?")] Join(#[from] JoinError), + + /// An I/O error that occurs while exhausting a reader to compute a hash. + #[error(transparent)] + HashExhaustion(#[from] std::io::Error), + + #[error("Hash mismatch for {distribution}\n\nExpected:\n{expected}\n\nComputed:\n{actual}")] + HashMismatch { + distribution: String, + expected: String, + actual: String, + }, + + #[error("Hash-checking is not supported for local directories: {0}")] + HashesNotSupportedSourceTree(String), + + #[error("Hash-checking is not supported for Git repositories: {0}")] + HashesNotSupportedGit(String), } impl From for Error { @@ -96,3 +114,30 @@ impl From for Error { } } } + +impl Error { + /// Construct a hash mismatch error. + pub fn hash_mismatch( + distribution: String, + expected: &[HashDigest], + actual: &[HashDigest], + ) -> Error { + let expected = expected + .iter() + .map(|hash| format!(" {hash}")) + .collect::>() + .join("\n"); + + let actual = actual + .iter() + .map(|hash| format!(" {hash}")) + .collect::>() + .join("\n"); + + Self::HashMismatch { + distribution, + expected, + actual, + } + } +} diff --git a/crates/uv-distribution/src/index/built_wheel_index.rs b/crates/uv-distribution/src/index/built_wheel_index.rs index ca4f59b45d2f4..ea7fd1df680b6 100644 --- a/crates/uv-distribution/src/index/built_wheel_index.rs +++ b/crates/uv-distribution/src/index/built_wheel_index.rs @@ -1,7 +1,10 @@ -use distribution_types::{git_reference, DirectUrlSourceDist, GitSourceDist, PathSourceDist}; +use distribution_types::{ + git_reference, DirectUrlSourceDist, GitSourceDist, Hashed, PathSourceDist, +}; use platform_tags::Tags; use uv_cache::{ArchiveTimestamp, Cache, CacheBucket, CacheShard, WheelCache}; use uv_fs::symlinks; +use uv_types::RequiredHashes; use crate::index::cached_wheel::CachedWheel; use crate::source::{read_http_revision, read_timestamped_revision, REVISION}; @@ -12,12 +15,17 @@ use crate::Error; pub struct BuiltWheelIndex<'a> { cache: &'a Cache, tags: &'a Tags, + hashes: &'a RequiredHashes, } impl<'a> BuiltWheelIndex<'a> { /// Initialize an index of built distributions. - pub fn new(cache: &'a Cache, tags: &'a Tags) -> Self { - Self { cache, tags } + pub fn new(cache: &'a Cache, tags: &'a Tags, hashes: &'a RequiredHashes) -> Self { + Self { + cache, + tags, + hashes, + } } /// Return the most compatible [`CachedWheel`] for a given source distribution at a direct URL. @@ -31,13 +39,19 @@ impl<'a> BuiltWheelIndex<'a> { WheelCache::Url(source_dist.url.raw()).root(), ); - // Read the revision from the cache. There's no need to enforce freshness, since we - // enforce freshness on the entries. + // Read the revision from the cache. let revision_entry = cache_shard.entry(REVISION); let Some(revision) = read_http_revision(&revision_entry)? else { return Ok(None); }; + // Enforce hash-checking by omitting any wheels that don't satisfy the required hashes. + if let Some(hashes) = self.hashes.get(&source_dist.name) { + if !revision.satisfies(hashes) { + return Ok(None); + } + } + Ok(self.find(&cache_shard.shard(revision.id()))) } @@ -55,18 +69,29 @@ impl<'a> BuiltWheelIndex<'a> { return Err(Error::DirWithoutEntrypoint); }; - // Read the revision from the cache. There's no need to enforce freshness, since we - // enforce freshness on the entries. + // Read the revision from the cache. let revision_entry = cache_shard.entry(REVISION); let Some(revision) = read_timestamped_revision(&revision_entry, modified)? else { return Ok(None); }; + // Enforce hash-checking by omitting any wheels that don't satisfy the required hashes. + if let Some(hashes) = self.hashes.get(&source_dist.name) { + if !revision.satisfies(hashes) { + return Ok(None); + } + } + Ok(self.find(&cache_shard.shard(revision.id()))) } /// Return the most compatible [`CachedWheel`] for a given source distribution at a git URL. pub fn git(&self, source_dist: &GitSourceDist) -> Option { + // Enforce hash-checking, which isn't supported for Git distributions. + if self.hashes.get(&source_dist.name).is_some() { + return None; + } + let Ok(Some(git_sha)) = git_reference(&source_dist.url) else { return None; }; @@ -100,7 +125,7 @@ impl<'a> BuiltWheelIndex<'a> { // Unzipped wheels are stored as symlinks into the archive directory. for subdir in symlinks(shard) { - match CachedWheel::from_path(&subdir) { + match CachedWheel::from_built_source(&subdir) { None => {} Some(dist_info) => { // Pick the wheel with the highest priority diff --git a/crates/uv-distribution/src/index/cached_wheel.rs b/crates/uv-distribution/src/index/cached_wheel.rs index a8e4172aa3f3f..f73c2ecc6ccb0 100644 --- a/crates/uv-distribution/src/index/cached_wheel.rs +++ b/crates/uv-distribution/src/index/cached_wheel.rs @@ -1,9 +1,13 @@ use std::path::Path; use distribution_filename::WheelFilename; -use distribution_types::{CachedDirectUrlDist, CachedRegistryDist}; +use distribution_types::{CachedDirectUrlDist, CachedRegistryDist, Hashed}; use pep508_rs::VerbatimUrl; -use uv_cache::CacheEntry; +use pypi_types::HashDigest; +use uv_cache::{CacheEntry, CachedByTimestamp}; +use uv_client::DataWithCachePolicy; + +use crate::archive::Archive; #[derive(Debug, Clone)] pub struct CachedWheel { @@ -11,16 +15,23 @@ pub struct CachedWheel { pub filename: WheelFilename, /// The [`CacheEntry`] for the wheel. pub entry: CacheEntry, + /// The [`HashDigest`]s for the wheel. + pub hashes: Vec, } impl CachedWheel { /// Try to parse a distribution from a cached directory name (like `typing-extensions-4.8.0-py3-none-any`). - pub fn from_path(path: &Path) -> Option { + pub fn from_built_source(path: &Path) -> Option { let filename = path.file_name()?.to_str()?; let filename = WheelFilename::from_stem(filename).ok()?; let archive = path.canonicalize().ok()?; let entry = CacheEntry::from_path(archive); - Some(Self { filename, entry }) + let hashes = Vec::new(); + Some(Self { + filename, + entry, + hashes, + }) } /// Convert a [`CachedWheel`] into a [`CachedRegistryDist`]. @@ -28,6 +39,7 @@ impl CachedWheel { CachedRegistryDist { filename: self.filename, path: self.entry.into_path_buf(), + hashes: self.hashes, } } @@ -38,6 +50,56 @@ impl CachedWheel { url, path: self.entry.into_path_buf(), editable: false, + hashes: self.hashes, } } + + /// Read a cached wheel from a `.http` pointer (e.g., `anyio-4.0.0-py3-none-any.http`). + pub fn from_http_pointer(path: &Path) -> Option { + // Determine the wheel filename. + let filename = path.file_name()?.to_str()?; + let filename = WheelFilename::from_stem(filename).ok()?; + + // Read the pointer. + let file = std::fs::File::open(path).ok()?; + let data = DataWithCachePolicy::from_reader(file).ok()?.data; + let archive = rmp_serde::from_slice::(&data).ok()?; + + // Convert to a cached wheel. + let entry = CacheEntry::from_path(archive.path); + let hashes = archive.hashes; + Some(Self { + filename, + entry, + hashes, + }) + } + + /// Read a cached wheel from a `.rev` pointer (e.g., `anyio-4.0.0-py3-none-any.rev`). + pub fn from_revision_pointer(path: &Path) -> Option { + // Determine the wheel filename. + let filename = path.file_name()?.to_str()?; + let filename = WheelFilename::from_stem(filename).ok()?; + + // Read the pointer.?; + let cached = fs_err::read(path).ok()?; + let archive = rmp_serde::from_slice::>(&cached) + .ok()? + .data; + + // Convert to a cached wheel. + let entry = CacheEntry::from_path(archive.path); + let hashes = archive.hashes; + Some(Self { + filename, + entry, + hashes, + }) + } +} + +impl Hashed for CachedWheel { + fn hashes(&self) -> &[HashDigest] { + &self.hashes + } } diff --git a/crates/uv-distribution/src/index/registry_wheel_index.rs b/crates/uv-distribution/src/index/registry_wheel_index.rs index d1dbf251fd822..0e42e997ebab4 100644 --- a/crates/uv-distribution/src/index/registry_wheel_index.rs +++ b/crates/uv-distribution/src/index/registry_wheel_index.rs @@ -1,16 +1,16 @@ use std::collections::hash_map::Entry; use std::collections::BTreeMap; -use std::path::Path; use rustc_hash::FxHashMap; -use distribution_types::{CachedRegistryDist, FlatIndexLocation, IndexLocations, IndexUrl}; +use distribution_types::{CachedRegistryDist, FlatIndexLocation, Hashed, IndexLocations, IndexUrl}; use pep440_rs::Version; use pep508_rs::VerbatimUrl; use platform_tags::Tags; use uv_cache::{Cache, CacheBucket, WheelCache}; -use uv_fs::{directories, symlinks}; +use uv_fs::{directories, files, symlinks}; use uv_normalize::PackageName; +use uv_types::RequiredHashes; use crate::index::cached_wheel::CachedWheel; use crate::source::{read_http_revision, REVISION}; @@ -21,16 +21,23 @@ pub struct RegistryWheelIndex<'a> { cache: &'a Cache, tags: &'a Tags, index_locations: &'a IndexLocations, + hashes: &'a RequiredHashes, index: FxHashMap<&'a PackageName, BTreeMap>, } impl<'a> RegistryWheelIndex<'a> { /// Initialize an index of registry distributions. - pub fn new(cache: &'a Cache, tags: &'a Tags, index_locations: &'a IndexLocations) -> Self { + pub fn new( + cache: &'a Cache, + tags: &'a Tags, + index_locations: &'a IndexLocations, + hashes: &'a RequiredHashes, + ) -> Self { Self { cache, tags, index_locations, + hashes, index: FxHashMap::default(), } } @@ -65,6 +72,7 @@ impl<'a> RegistryWheelIndex<'a> { self.cache, self.tags, self.index_locations, + self.hashes, )), }; versions @@ -76,8 +84,10 @@ impl<'a> RegistryWheelIndex<'a> { cache: &Cache, tags: &Tags, index_locations: &IndexLocations, + hashes: &RequiredHashes, ) -> BTreeMap { let mut versions = BTreeMap::new(); + let hashes = hashes.get(package).unwrap_or_default(); // Collect into owned `IndexUrl` let flat_index_urls: Vec = index_locations @@ -100,7 +110,20 @@ impl<'a> RegistryWheelIndex<'a> { WheelCache::Index(index_url).wheel_dir(package.to_string()), ); - Self::add_directory(&wheel_dir, tags, &mut versions); + // For registry wheels, the cache structure is: `//.http`. + for file in files(&wheel_dir) { + if file + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("http")) + { + if let Some(wheel) = CachedWheel::from_http_pointer(&wheel_dir.join(&file)) { + // Enforce hash-checking based on the built distribution. + if wheel.satisfies(hashes) { + Self::add_wheel(wheel, tags, &mut versions); + } + } + } + } // Index all the built wheels, created by downloading and building source distributions // from the registry. @@ -115,7 +138,14 @@ impl<'a> RegistryWheelIndex<'a> { let cache_shard = cache_shard.shard(shard); let revision_entry = cache_shard.entry(REVISION); if let Ok(Some(revision)) = read_http_revision(&revision_entry) { - Self::add_directory(cache_shard.join(revision.id()), tags, &mut versions); + // Enforce hash-checking based on the source distribution. + if revision.satisfies(hashes) { + for wheel_dir in symlinks(cache_shard.join(revision.id())) { + if let Some(wheel) = CachedWheel::from_built_source(&wheel_dir) { + Self::add_wheel(wheel, tags, &mut versions); + } + } + } }; } } @@ -123,33 +153,23 @@ impl<'a> RegistryWheelIndex<'a> { versions } - /// Add the wheels in a given directory to the index. - /// - /// Each subdirectory in the given path is expected to be that of an unzipped wheel. - fn add_directory( - path: impl AsRef, + /// Add the [`CachedWheel`] to the index. + fn add_wheel( + wheel: CachedWheel, tags: &Tags, versions: &mut BTreeMap, ) { - // Unzipped wheels are stored as symlinks into the archive directory. - for wheel_dir in symlinks(path.as_ref()) { - match CachedWheel::from_path(&wheel_dir) { - None => {} - Some(dist_info) => { - let dist_info = dist_info.into_registry_dist(); - - // Pick the wheel with the highest priority - let compatibility = dist_info.filename.compatibility(tags); - if let Some(existing) = versions.get_mut(&dist_info.filename.version) { - // Override if we have better compatibility - if compatibility > existing.filename.compatibility(tags) { - *existing = dist_info; - } - } else if compatibility.is_compatible() { - versions.insert(dist_info.filename.version.clone(), dist_info); - } - } + let dist_info = wheel.into_registry_dist(); + + // Pick the wheel with the highest priority + let compatibility = dist_info.filename.compatibility(tags); + if let Some(existing) = versions.get_mut(&dist_info.filename.version) { + // Override if we have better compatibility + if compatibility > existing.filename.compatibility(tags) { + *existing = dist_info; } + } else if compatibility.is_compatible() { + versions.insert(dist_info.filename.version.clone(), dist_info); } } } diff --git a/crates/uv-distribution/src/lib.rs b/crates/uv-distribution/src/lib.rs index f74b0fc9d3f8a..7fe85248f2e61 100644 --- a/crates/uv-distribution/src/lib.rs +++ b/crates/uv-distribution/src/lib.rs @@ -1,3 +1,4 @@ +pub use archive::Archive; pub use distribution_database::DistributionDatabase; pub use download::LocalWheel; pub use error::Error; @@ -6,6 +7,7 @@ pub use index::{BuiltWheelIndex, RegistryWheelIndex}; pub use reporter::Reporter; pub use source::SourceDistributionBuilder; +mod archive; mod distribution_database; mod download; mod error; diff --git a/crates/uv-distribution/src/source/built_wheel_metadata.rs b/crates/uv-distribution/src/source/built_wheel_metadata.rs index 3115d9882a9da..f1fc655529ec4 100644 --- a/crates/uv-distribution/src/source/built_wheel_metadata.rs +++ b/crates/uv-distribution/src/source/built_wheel_metadata.rs @@ -3,6 +3,7 @@ use std::str::FromStr; use distribution_filename::WheelFilename; use platform_tags::Tags; +use pypi_types::HashDigest; use uv_cache::CacheShard; use uv_fs::files; @@ -15,6 +16,8 @@ pub struct BuiltWheelMetadata { pub(crate) target: PathBuf, /// The parsed filename. pub(crate) filename: WheelFilename, + /// The computed hashes of the wheel. + pub(crate) hashes: Vec, } impl BuiltWheelMetadata { @@ -39,6 +42,14 @@ impl BuiltWheelMetadata { target: cache_shard.join(filename.stem()), path, filename, + hashes: vec![], }) } + + /// Set the computed hashes of the wheel. + #[must_use] + pub(crate) fn with_hashes(mut self, hashes: Vec) -> Self { + self.hashes = hashes; + self + } } diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 34b52c105257d..ce28f8f24f2b8 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -9,6 +9,7 @@ use anyhow::Result; use fs_err::tokio as fs; use futures::{FutureExt, TryStreamExt}; use reqwest::Response; +use tokio::io::AsyncReadExt; use tokio_util::compat::FuturesAsyncReadCompatExt; use tracing::{debug, info_span, instrument, Instrument}; use url::Url; @@ -16,12 +17,12 @@ use zip::ZipArchive; use distribution_filename::WheelFilename; use distribution_types::{ - BuildableSource, DirectArchiveUrl, Dist, FileLocation, GitSourceUrl, LocalEditable, + BuildableSource, DirectArchiveUrl, Dist, FileLocation, GitSourceUrl, Hashed, LocalEditable, PathSourceDist, PathSourceUrl, RemoteSource, SourceDist, SourceUrl, }; use install_wheel_rs::metadata::read_archive_metadata; use platform_tags::Tags; -use pypi_types::Metadata23; +use pypi_types::{HashDigest, Metadata23}; use uv_cache::{ ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Freshness, WheelCache, }; @@ -29,6 +30,7 @@ use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, }; use uv_configuration::{BuildKind, NoBuild}; +use uv_extract::hash::Hasher; use uv_fs::write_atomic; use uv_types::{BuildContext, SourceBuildTrait}; @@ -80,6 +82,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &self, source: &BuildableSource<'_>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { let built_wheel_metadata = match &source { BuildableSource::Dist(SourceDist::Registry(dist)) => { @@ -100,6 +103,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: Cow::Borrowed(path), }, tags, + hashes, ) .boxed() .await; @@ -115,9 +119,17 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .join(dist.filename.version.to_string()), ); - self.url(source, &dist.file.filename, &url, &cache_shard, None, tags) - .boxed() - .await? + self.url( + source, + &dist.file.filename, + &url, + &cache_shard, + None, + tags, + hashes, + ) + .boxed() + .await? } BuildableSource::Dist(SourceDist::DirectUrl(dist)) => { let filename = dist.filename().expect("Distribution must have a filename"); @@ -136,22 +148,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &cache_shard, subdirectory.as_deref(), tags, + hashes, ) .boxed() .await? } BuildableSource::Dist(SourceDist::Git(dist)) => { - self.git(source, &GitSourceUrl::from(dist), tags) + self.git(source, &GitSourceUrl::from(dist), tags, hashes) .boxed() .await? } BuildableSource::Dist(SourceDist::Path(dist)) => { if dist.path.is_dir() { - self.source_tree(source, &PathSourceUrl::from(dist), tags) + self.source_tree(source, &PathSourceUrl::from(dist), tags, hashes) .boxed() .await? } else { - self.archive(source, &PathSourceUrl::from(dist), tags) + self.archive(source, &PathSourceUrl::from(dist), tags, hashes) .boxed() .await? } @@ -176,18 +189,21 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &cache_shard, subdirectory.as_deref(), tags, + hashes, ) .boxed() .await? } BuildableSource::Url(SourceUrl::Git(resource)) => { - self.git(source, resource, tags).boxed().await? + self.git(source, resource, tags, hashes).boxed().await? } BuildableSource::Url(SourceUrl::Path(resource)) => { if resource.path.is_dir() { - self.source_tree(source, resource, tags).boxed().await? + self.source_tree(source, resource, tags, hashes) + .boxed() + .await? } else { - self.archive(source, resource, tags).boxed().await? + self.archive(source, resource, tags, hashes).boxed().await? } } }; @@ -201,6 +217,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { pub async fn download_and_build_metadata( &self, source: &BuildableSource<'_>, + hashes: &[HashDigest], ) -> Result { let metadata = match &source { BuildableSource::Dist(SourceDist::Registry(dist)) => { @@ -220,6 +237,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { url: &url, path: Cow::Borrowed(path), }, + hashes, ) .boxed() .await; @@ -234,9 +252,16 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .join(dist.filename.version.to_string()), ); - self.url_metadata(source, &dist.file.filename, &url, &cache_shard, None) - .boxed() - .await? + self.url_metadata( + source, + &dist.file.filename, + &url, + &cache_shard, + None, + hashes, + ) + .boxed() + .await? } BuildableSource::Dist(SourceDist::DirectUrl(dist)) => { let filename = dist.filename().expect("Distribution must have a filename"); @@ -254,22 +279,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &url, &cache_shard, subdirectory.as_deref(), + hashes, ) .boxed() .await? } BuildableSource::Dist(SourceDist::Git(dist)) => { - self.git_metadata(source, &GitSourceUrl::from(dist)) + self.git_metadata(source, &GitSourceUrl::from(dist), hashes) .boxed() .await? } BuildableSource::Dist(SourceDist::Path(dist)) => { if dist.path.is_dir() { - self.source_tree_metadata(source, &PathSourceUrl::from(dist)) + self.source_tree_metadata(source, &PathSourceUrl::from(dist), hashes) .boxed() .await? } else { - self.archive_metadata(source, &PathSourceUrl::from(dist)) + self.archive_metadata(source, &PathSourceUrl::from(dist), hashes) .boxed() .await? } @@ -293,18 +319,23 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &url, &cache_shard, subdirectory.as_deref(), + hashes, ) .boxed() .await? } BuildableSource::Url(SourceUrl::Git(resource)) => { - self.git_metadata(source, resource).boxed().await? + self.git_metadata(source, resource, hashes).boxed().await? } BuildableSource::Url(SourceUrl::Path(resource)) => { if resource.path.is_dir() { - self.source_tree_metadata(source, resource).boxed().await? + self.source_tree_metadata(source, resource, hashes) + .boxed() + .await? } else { - self.archive_metadata(source, resource).boxed().await? + self.archive_metadata(source, resource, hashes) + .boxed() + .await? } } }; @@ -322,19 +353,31 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { cache_shard: &CacheShard, subdirectory: Option<&'data Path>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { // Fetch the revision for the source distribution. let revision = self - .url_revision(source, filename, url, cache_shard) + .url_revision(source, filename, url, cache_shard, hashes) .await?; + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + if revision.hashes().iter().all(|hash| !hashes.contains(hash)) { + return Err(Error::hash_mismatch( + source.to_string(), + hashes, + revision.hashes(), + )); + } + } + // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); // If the cache contains a compatible wheel, return it. if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) { - return Ok(built_wheel); + return Ok(built_wheel.with_hashes(revision.into_hashes())); } let task = self @@ -364,6 +407,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(wheel_filename.stem()), filename: wheel_filename, + hashes: revision.into_hashes(), }) } @@ -379,12 +423,24 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { url: &'data Url, cache_shard: &CacheShard, subdirectory: Option<&'data Path>, + hashes: &[HashDigest], ) -> Result { // Fetch the revision for the source distribution. let revision = self - .url_revision(source, filename, url, cache_shard) + .url_revision(source, filename, url, cache_shard, hashes) .await?; + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + if revision.hashes().iter().all(|hash| !hashes.contains(hash)) { + return Err(Error::hash_mismatch( + source.to_string(), + hashes, + revision.hashes(), + )); + } + } + // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); @@ -449,6 +505,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { filename: &str, url: &Url, cache_shard: &CacheShard, + hashes: &[HashDigest], ) -> Result { let cache_entry = cache_shard.entry(REVISION); let cache_control = match self.client.connectivity() { @@ -469,24 +526,40 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Download the source distribution. debug!("Downloading source distribution: {source}"); - let source_dist_entry = cache_shard.shard(revision.id()).entry(filename); - self.persist_url(response, source, filename, &source_dist_entry) + let entry = cache_shard.shard(revision.id()).entry(filename); + let hashes = self + .download_archive(response, source, filename, entry.path(), hashes) .await?; - Ok(revision) + Ok(revision.with_hashes(hashes)) } .boxed() .instrument(info_span!("download", source_dist = %source)) }; let req = self.request(url.clone())?; - self.client + let revision = self + .client .cached_client() .get_serde(req, &cache_entry, cache_control, download) .await .map_err(|err| match err { CachedClientError::Callback(err) => err, CachedClientError::Client(err) => Error::Client(err), - }) + })?; + + // If the archive is missing the required hashes, force a refresh. + if revision.has_digests(hashes) { + Ok(revision) + } else { + self.client + .cached_client() + .skip_cache(self.request(url.clone())?, &cache_entry, download) + .await + .map_err(|err| match err { + CachedClientError::Callback(err) => err, + CachedClientError::Client(err) => Error::Client(err), + }) + } } /// Build a source distribution from a local archive (e.g., `.tar.gz` or `.zip`). @@ -495,6 +568,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { let cache_shard = self.build_context.cache().shard( CacheBucket::BuiltWheels, @@ -503,9 +577,20 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Fetch the revision for the source distribution. let revision = self - .archive_revision(source, resource, &cache_shard) + .archive_revision(source, resource, &cache_shard, hashes) .await?; + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + if revision.hashes().iter().all(|hash| !hashes.contains(hash)) { + return Err(Error::hash_mismatch( + source.to_string(), + hashes, + revision.hashes(), + )); + } + } + // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); @@ -543,6 +628,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(filename.stem()), filename, + hashes: revision.into_hashes(), }) } @@ -554,6 +640,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &self, source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, + hashes: &[HashDigest], ) -> Result { let cache_shard = self.build_context.cache().shard( CacheBucket::BuiltWheels, @@ -562,9 +649,20 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Fetch the revision for the source distribution. let revision = self - .archive_revision(source, resource, &cache_shard) + .archive_revision(source, resource, &cache_shard, hashes) .await?; + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + if revision.hashes().iter().all(|hash| !hashes.contains(hash)) { + return Err(Error::hash_mismatch( + source.to_string(), + hashes, + revision.hashes(), + )); + } + } + // Scope all operations to the revision. Within the revision, there's no need to check for // freshness, since entries have to be fresher than the revision itself. let cache_shard = cache_shard.shard(revision.id()); @@ -627,6 +725,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, cache_shard: &CacheShard, + hashes: &[HashDigest], ) -> Result { // Determine the last-modified time of the source distribution. let modified = ArchiveTimestamp::from_file(&resource.path).map_err(Error::CacheRead)?; @@ -637,7 +736,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // If the revision already exists, return it. There's no need to check for freshness, since // we use an exact timestamp. if let Some(revision) = read_timestamped_revision(&revision_entry, modified)? { - return Ok(revision); + if revision.has_digests(hashes) { + return Ok(revision); + } } // Otherwise, we need to create a new revision. @@ -646,7 +747,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Unzip the archive to a temporary directory. debug!("Unpacking source distribution: {source}"); let entry = cache_shard.shard(revision.id()).entry("source"); - self.persist_archive(&resource.path, source, &entry).await?; + let hashes = self + .persist_archive(&resource.path, entry.path(), hashes) + .await?; + let revision = revision.with_hashes(hashes); // Persist the revision. write_atomic( @@ -668,7 +772,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + return Err(Error::HashesNotSupportedSourceTree(source.to_string())); + } + let cache_shard = self.build_context.cache().shard( CacheBucket::BuiltWheels, WheelCache::Path(resource.url).root(), @@ -714,6 +824,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(filename.stem()), filename, + hashes: vec![], }) } @@ -725,7 +836,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &self, source: &BuildableSource<'_>, resource: &PathSourceUrl<'_>, + hashes: &[HashDigest], ) -> Result { + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + return Err(Error::HashesNotSupportedSourceTree(source.to_string())); + } + let cache_shard = self.build_context.cache().shard( CacheBucket::BuiltWheels, WheelCache::Path(resource.url).root(), @@ -742,16 +859,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // If the cache contains compatible metadata, return it. let metadata_entry = cache_shard.entry(METADATA); - if self - .build_context - .cache() - .freshness(&metadata_entry, source.name()) - .is_ok_and(Freshness::is_fresh) - { - if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { - debug!("Using cached metadata for: {source}"); - return Ok(metadata); - } + if let Some(metadata) = read_cached_metadata(&metadata_entry).await? { + debug!("Using cached metadata for: {source}"); + return Ok(metadata); } // If the backend supports `prepare_metadata_for_build_wheel`, use it. @@ -828,7 +938,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { source: &BuildableSource<'_>, resource: &GitSourceUrl<'_>, tags: &Tags, + hashes: &[HashDigest], ) -> Result { + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + return Err(Error::HashesNotSupportedGit(source.to_string())); + } + // Resolve to a precise Git SHA. let url = if let Some(url) = resolve_precise( resource.url, @@ -882,6 +998,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { path: cache_shard.join(&disk_filename), target: cache_shard.join(filename.stem()), filename, + hashes: vec![], }) } @@ -893,7 +1010,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &self, source: &BuildableSource<'_>, resource: &GitSourceUrl<'_>, + hashes: &[HashDigest], ) -> Result { + // Before running the build, check that the hashes match. + if !hashes.is_empty() { + return Err(Error::HashesNotSupportedGit(source.to_string())); + } + // Resolve to a precise Git SHA. let url = if let Some(url) = resolve_precise( resource.url, @@ -975,21 +1098,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } /// Download and unzip a source distribution into the cache from an HTTP response. - async fn persist_url( + async fn download_archive( &self, response: Response, source: &BuildableSource<'_>, filename: &str, - cache_entry: &CacheEntry, - ) -> Result<(), Error> { - let cache_path = cache_entry.path(); - if cache_path.is_dir() { - debug!("Distribution is already cached: {source}"); - return Ok(()); - } - - // Download and unzip the source distribution into a temporary directory. - let span = info_span!("persist_url", filename = filename, source_dist = %source); + target: &Path, + hashes: &[HashDigest], + ) -> Result, Error> { let temp_dir = tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels)) .map_err(Error::CacheWrite)?; @@ -997,9 +1113,30 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .bytes_stream() .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)) .into_async_read(); - uv_extract::stream::archive(reader.compat(), filename, temp_dir.path()).await?; + + // Create a hasher for each hash algorithm. + let mut hashers = hashes + .iter() + .map(HashDigest::algorithm) + .map(Hasher::from) + .collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(reader.compat(), &mut hashers); + + // Download and unzip the source distribution into a temporary directory. + let span = info_span!("download_source_dist", filename = filename, source_dist = %source); + uv_extract::stream::archive(&mut hasher, filename, temp_dir.path()).await?; drop(span); + // If necessary, exhaust the reader to compute the hash. + if !hashes.is_empty() { + hasher + .read_to_end(&mut Vec::new()) + .await + .map_err(Error::HashExhaustion)?; + } + + let hashes = hashers.into_iter().map(HashDigest::from).collect(); + // Extract the top-level directory. let extracted = match uv_extract::strip_component(temp_dir.path()) { Ok(top_level) => top_level, @@ -1008,39 +1145,52 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { }; // Persist it to the cache. - fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent")) + fs_err::tokio::create_dir_all(target.parent().expect("Cache entry to have parent")) .await .map_err(Error::CacheWrite)?; - fs_err::tokio::rename(extracted, &cache_path) + fs_err::tokio::rename(extracted, target) .await .map_err(Error::CacheWrite)?; - Ok(()) + Ok(hashes) } /// Extract a local archive, and store it at the given [`CacheEntry`]. async fn persist_archive( &self, path: &Path, - source: &BuildableSource<'_>, - cache_entry: &CacheEntry, - ) -> Result<(), Error> { - let cache_path = cache_entry.path(); - if cache_path.is_dir() { - debug!("Distribution is already cached: {source}"); - return Ok(()); - } - + target: &Path, + hashes: &[HashDigest], + ) -> Result, Error> { debug!("Unpacking for build: {}", path.display()); - // Unzip the archive into a temporary directory. let temp_dir = tempfile::tempdir_in(self.build_context.cache().bucket(CacheBucket::BuiltWheels)) .map_err(Error::CacheWrite)?; let reader = fs_err::tokio::File::open(&path) .await .map_err(Error::CacheRead)?; - uv_extract::seek::archive(reader, path, &temp_dir.path()).await?; + + // Create a hasher for each hash algorithm. + let mut hashers = hashes + .iter() + .map(HashDigest::algorithm) + .map(Hasher::from) + .collect::>(); + let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers); + + // Unzip the archive into a temporary directory. + uv_extract::stream::archive(&mut hasher, path, &temp_dir.path()).await?; + + // If necessary, exhaust the reader to compute the hash. + if !hashes.is_empty() { + hasher + .read_to_end(&mut Vec::new()) + .await + .map_err(Error::HashExhaustion)?; + } + + let hashes = hashers.into_iter().map(HashDigest::from).collect(); // Extract the top-level directory from the archive. let extracted = match uv_extract::strip_component(temp_dir.path()) { @@ -1050,14 +1200,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { }; // Persist it to the cache. - fs_err::tokio::create_dir_all(cache_path.parent().expect("Cache entry to have parent")) + fs_err::tokio::create_dir_all(target.parent().expect("Cache entry to have parent")) .await .map_err(Error::CacheWrite)?; - fs_err::tokio::rename(extracted, &cache_path) + fs_err::tokio::rename(extracted, &target) .await .map_err(Error::CacheWrite)?; - Ok(()) + Ok(hashes) } /// Build a source distribution, storing the built wheel in the cache. diff --git a/crates/uv-distribution/src/source/revision.rs b/crates/uv-distribution/src/source/revision.rs index b2f6d5b9a0d2c..aadc2945acd2a 100644 --- a/crates/uv-distribution/src/source/revision.rs +++ b/crates/uv-distribution/src/source/revision.rs @@ -1,5 +1,8 @@ +use distribution_types::Hashed; use serde::{Deserialize, Serialize}; +use pypi_types::HashDigest; + /// The [`Revision`] is a thin wrapper around a unique identifier for the source distribution. /// /// A revision represents a unique version of a source distribution, at a level more granular than @@ -7,16 +10,45 @@ use serde::{Deserialize, Serialize}; /// at a URL or a local file path may have multiple revisions, each representing a unique state of /// the distribution, despite the reported version number remaining the same. #[derive(Debug, Clone, Serialize, Deserialize)] -pub(crate) struct Revision(String); +pub(crate) struct Revision { + id: String, + hashes: Vec, +} impl Revision { /// Initialize a new [`Revision`] with a random UUID. pub(crate) fn new() -> Self { - Self(nanoid::nanoid!()) + Self { + id: nanoid::nanoid!(), + hashes: vec![], + } } - /// Return the unique ID of the revision. + /// Return the unique ID of the manifest. pub(crate) fn id(&self) -> &str { - &self.0 + &self.id + } + + /// Return the computed hashes of the archive. + pub(crate) fn hashes(&self) -> &[HashDigest] { + &self.hashes + } + + /// Return the computed hashes of the archive. + pub(crate) fn into_hashes(self) -> Vec { + self.hashes + } + + /// Set the computed hashes of the archive. + #[must_use] + pub(crate) fn with_hashes(mut self, hashes: Vec) -> Self { + self.hashes = hashes; + self + } +} + +impl Hashed for Revision { + fn hashes(&self) -> &[HashDigest] { + &self.hashes } } diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index f40dee266202b..0c3dbb8fb73e3 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -13,12 +13,16 @@ license = { workspace = true } workspace = true [dependencies] +pypi-types = { workspace = true } + async-compression = { workspace = true, features = ["gzip", "zstd"] } async_zip = { workspace = true, features = ["tokio"] } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } +md-5.workspace = true rayon = { workspace = true } rustc-hash = { workspace = true } +sha2 = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true, features = ["io-util"] } tokio-tar = { workspace = true } diff --git a/crates/uv-extract/src/hash.rs b/crates/uv-extract/src/hash.rs new file mode 100644 index 0000000000000..a448cde17c961 --- /dev/null +++ b/crates/uv-extract/src/hash.rs @@ -0,0 +1,138 @@ +use std::pin::Pin; +use std::task::{Context, Poll}; + +use pypi_types::{HashAlgorithm, HashDigest}; +use sha2::Digest; +use tokio::io::ReadBuf; + +pub struct Sha256Reader<'a, R> { + reader: R, + hasher: &'a mut sha2::Sha256, +} + +impl<'a, R> Sha256Reader<'a, R> +where + R: tokio::io::AsyncRead + Unpin, +{ + pub fn new(reader: R, hasher: &'a mut sha2::Sha256) -> Self { + Sha256Reader { reader, hasher } + } +} + +impl<'a, R> tokio::io::AsyncRead for Sha256Reader<'a, R> +where + R: tokio::io::AsyncRead + Unpin, +{ + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let reader = Pin::new(&mut self.reader); + match reader.poll_read(cx, buf) { + Poll::Ready(Ok(())) => { + self.hasher.update(buf.filled()); + Poll::Ready(Ok(())) + } + other => other, + } + } +} + +#[derive(Debug)] +pub enum Hasher { + Md5(md5::Md5), + Sha256(sha2::Sha256), + Sha384(sha2::Sha384), + Sha512(sha2::Sha512), +} + +impl Hasher { + pub fn update(&mut self, data: &[u8]) { + match self { + Hasher::Md5(hasher) => hasher.update(data), + Hasher::Sha256(hasher) => hasher.update(data), + Hasher::Sha384(hasher) => hasher.update(data), + Hasher::Sha512(hasher) => hasher.update(data), + } + } + + pub fn finalize(self) -> Vec { + match self { + Hasher::Md5(hasher) => hasher.finalize().to_vec(), + Hasher::Sha256(hasher) => hasher.finalize().to_vec(), + Hasher::Sha384(hasher) => hasher.finalize().to_vec(), + Hasher::Sha512(hasher) => hasher.finalize().to_vec(), + } + } +} + +impl From for Hasher { + fn from(algorithm: HashAlgorithm) -> Self { + match algorithm { + HashAlgorithm::Md5 => Hasher::Md5(md5::Md5::new()), + HashAlgorithm::Sha256 => Hasher::Sha256(sha2::Sha256::new()), + HashAlgorithm::Sha384 => Hasher::Sha384(sha2::Sha384::new()), + HashAlgorithm::Sha512 => Hasher::Sha512(sha2::Sha512::new()), + } + } +} + +impl From for HashDigest { + fn from(hasher: Hasher) -> Self { + match hasher { + Hasher::Md5(hasher) => HashDigest { + algorithm: HashAlgorithm::Md5, + digest: format!("{:x}", hasher.finalize()).into_boxed_str(), + }, + Hasher::Sha256(hasher) => HashDigest { + algorithm: HashAlgorithm::Sha256, + digest: format!("{:x}", hasher.finalize()).into_boxed_str(), + }, + Hasher::Sha384(hasher) => HashDigest { + algorithm: HashAlgorithm::Sha384, + digest: format!("{:x}", hasher.finalize()).into_boxed_str(), + }, + Hasher::Sha512(hasher) => HashDigest { + algorithm: HashAlgorithm::Sha512, + digest: format!("{:x}", hasher.finalize()).into_boxed_str(), + }, + } + } +} + +pub struct HashReader<'a, R> { + reader: R, + hashers: &'a mut [Hasher], +} + +impl<'a, R> HashReader<'a, R> +where + R: tokio::io::AsyncRead + Unpin, +{ + pub fn new(reader: R, hashers: &'a mut [Hasher]) -> Self { + HashReader { reader, hashers } + } +} + +impl<'a, R> tokio::io::AsyncRead for HashReader<'a, R> +where + R: tokio::io::AsyncRead + Unpin, +{ + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let reader = Pin::new(&mut self.reader); + match reader.poll_read(cx, buf) { + Poll::Ready(Ok(())) => { + for hasher in self.hashers.iter_mut() { + hasher.update(buf.filled()); + } + Poll::Ready(Ok(())) + } + other => other, + } + } +} diff --git a/crates/uv-extract/src/lib.rs b/crates/uv-extract/src/lib.rs index 20d4330714817..192aaa8e0a9ff 100644 --- a/crates/uv-extract/src/lib.rs +++ b/crates/uv-extract/src/lib.rs @@ -2,6 +2,7 @@ pub use error::Error; pub use sync::*; mod error; +pub mod hash; pub mod seek; pub mod stream; mod sync; diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index cff2825dc2030..a18ac41ab7deb 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -160,7 +160,8 @@ pub async fn untar_gz( let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes) .set_preserve_mtime(false) .build(); - Ok(untar_in(&mut archive, target.as_ref()).await?) + untar_in(&mut archive, target.as_ref()).await?; + Ok(()) } /// Unzip a `.tar.zst` archive into the target directory, without requiring `Seek`. diff --git a/crates/uv-installer/Cargo.toml b/crates/uv-installer/Cargo.toml index 413664f628d5b..157aa0d4f2870 100644 --- a/crates/uv-installer/Cargo.toml +++ b/crates/uv-installer/Cargo.toml @@ -45,3 +45,4 @@ toml = { workspace = true } tracing = { workspace = true } url = { workspace = true } walkdir = { workspace = true } +rmp-serde = { workspace = true } diff --git a/crates/uv-installer/src/downloader.rs b/crates/uv-installer/src/downloader.rs index bba3c2fa7bd20..1c60f46e5d862 100644 --- a/crates/uv-installer/src/downloader.rs +++ b/crates/uv-installer/src/downloader.rs @@ -8,13 +8,14 @@ use tracing::instrument; use url::Url; use distribution_types::{ - BuildableSource, CachedDist, Dist, Identifier, LocalEditable, LocalEditables, RemoteSource, + BuildableSource, CachedDist, Dist, Hashed, Identifier, LocalEditable, LocalEditables, Name, + RemoteSource, }; use platform_tags::Tags; use uv_cache::Cache; use uv_client::RegistryClient; -use uv_distribution::DistributionDatabase; -use uv_types::{BuildContext, InFlight}; +use uv_distribution::{DistributionDatabase, LocalWheel}; +use uv_types::{BuildContext, InFlight, RequiredHashes}; use crate::editable::BuiltEditable; @@ -39,6 +40,7 @@ pub enum Error { pub struct Downloader<'a, Context: BuildContext + Send + Sync> { tags: &'a Tags, cache: &'a Cache, + hashes: &'a RequiredHashes, database: DistributionDatabase<'a, Context>, reporter: Option>, } @@ -47,12 +49,14 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { pub fn new( cache: &'a Cache, tags: &'a Tags, + hashes: &'a RequiredHashes, client: &'a RegistryClient, build_context: &'a Context, ) -> Self { Self { tags, cache, + hashes, database: DistributionDatabase::new(client, build_context), reporter: None, } @@ -65,6 +69,7 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { Self { tags: self.tags, cache: self.cache, + hashes: self.hashes, database: self.database.with_reporter(Facade::from(reporter.clone())), reporter: Some(reporter.clone()), } @@ -165,12 +170,27 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { pub async fn get_wheel(&self, dist: Dist, in_flight: &InFlight) -> Result { let id = dist.distribution_id(); if in_flight.downloads.register(id.clone()) { + let hashes = self.hashes.get(dist.name()).unwrap_or_default(); let result = self .database - .get_or_build_wheel(&dist, self.tags) + .get_or_build_wheel(&dist, self.tags, hashes) .boxed() .map_err(|err| Error::Fetch(dist.clone(), err)) .await + .and_then(|wheel: LocalWheel| { + if wheel.satisfies(hashes) { + Ok(wheel) + } else { + Err(Error::Fetch( + dist.clone(), + uv_distribution::Error::hash_mismatch( + dist.to_string(), + hashes, + wheel.hashes(), + ), + )) + } + }) .map(CachedDist::from); match result { Ok(cached) => { diff --git a/crates/uv-installer/src/plan.rs b/crates/uv-installer/src/plan.rs index 25f74a48d429f..a9aab73169392 100644 --- a/crates/uv-installer/src/plan.rs +++ b/crates/uv-installer/src/plan.rs @@ -6,17 +6,22 @@ use anyhow::{bail, Result}; use rustc_hash::FxHashMap; use tracing::{debug, warn}; +use distribution_types::Hashed; use distribution_types::{ BuiltDist, CachedDirectUrlDist, CachedDist, Dist, IndexLocations, InstalledDist, InstalledMetadata, InstalledVersion, Name, SourceDist, }; use pep508_rs::{Requirement, VersionOrUrl}; use platform_tags::Tags; -use uv_cache::{ArchiveTarget, ArchiveTimestamp, Cache, CacheBucket, WheelCache}; +use uv_cache::{ + ArchiveTarget, ArchiveTimestamp, Cache, CacheBucket, CachedByTimestamp, WheelCache, +}; +use uv_client::DataWithCachePolicy; use uv_configuration::{NoBinary, Reinstall}; -use uv_distribution::{BuiltWheelIndex, RegistryWheelIndex}; +use uv_distribution::{Archive, BuiltWheelIndex, RegistryWheelIndex}; use uv_fs::Simplified; use uv_interpreter::PythonEnvironment; +use uv_types::RequiredHashes; use crate::{ResolvedEditable, SitePackages}; @@ -53,20 +58,25 @@ impl<'a> Planner<'a> { /// plan will respect cache entries created after the current time (as per the [`Refresh`] /// policy). Otherwise, entries will be ignored. The downstream distribution database may still /// read those entries from the cache after revalidating them. + /// + /// The install plan will also respect the required hashes, such that it will never return a + /// cached distribution that does not match the required hash. Like pip, though, it _will_ + /// return an _installed_ distribution that does not match the required hash. #[allow(clippy::too_many_arguments)] pub fn build( self, mut site_packages: SitePackages<'_>, reinstall: &Reinstall, no_binary: &NoBinary, + hashes: &RequiredHashes, index_locations: &IndexLocations, cache: &Cache, venv: &PythonEnvironment, tags: &Tags, ) -> Result { // Index all the already-downloaded wheels in the cache. - let mut registry_index = RegistryWheelIndex::new(cache, tags, index_locations); - let built_index = BuiltWheelIndex::new(cache, tags); + let mut registry_index = RegistryWheelIndex::new(cache, tags, index_locations, hashes); + let built_index = BuiltWheelIndex::new(cache, tags, hashes); let mut cached = vec![]; let mut remote = vec![]; @@ -206,16 +216,9 @@ impl<'a> Planner<'a> { } } Some(VersionOrUrl::VersionSpecifier(specifier)) => { - if let Some(distribution) = - registry_index - .get(&requirement.name) - .find_map(|(version, distribution)| { - if specifier.contains(version) { - Some(distribution) - } else { - None - } - }) + if let Some((_version, distribution)) = registry_index + .get(&requirement.name) + .find(|(version, _)| specifier.contains(version)) { debug!("Requirement already cached: {distribution}"); cached.push(CachedDist::Registry(distribution.clone())); @@ -252,19 +255,30 @@ impl<'a> Planner<'a> { CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), ) - .entry(wheel.filename.stem()); - - match cache_entry.path().canonicalize() { - Ok(archive) => { - let cached_dist = CachedDirectUrlDist::from_url( - wheel.filename, - wheel.url, - archive, - ); - - debug!("URL wheel requirement already cached: {cached_dist}"); - cached.push(CachedDist::Url(cached_dist)); - continue; + .entry(format!("{}.http", wheel.filename.stem())); + + // Read the HTTP pointer. + match std::fs::File::open(cache_entry.path()) { + Ok(file) => { + let data = DataWithCachePolicy::from_reader(file)?.data; + let archive = rmp_serde::from_slice::(&data)?; + + // Enforce hash checking. + let hashes = hashes.get(&requirement.name).unwrap_or_default(); + if archive.satisfies(hashes) { + let cached_dist = CachedDirectUrlDist::from_url( + wheel.filename, + wheel.url, + archive.hashes, + archive.path, + ); + + debug!( + "URL wheel requirement already cached: {cached_dist}" + ); + cached.push(CachedDist::Url(cached_dist)); + continue; + } } Err(err) if err.kind() == io::ErrorKind::NotFound => { // The cache entry doesn't exist, so it's not fresh. @@ -294,25 +308,35 @@ impl<'a> Planner<'a> { CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), ) - .entry(wheel.filename.stem()); - - match cache_entry.path().canonicalize() { - Ok(archive) => { - if ArchiveTimestamp::up_to_date_with( - &wheel.path, - ArchiveTarget::Cache(&archive), - )? { - let cached_dist = CachedDirectUrlDist::from_url( - wheel.filename, - wheel.url, - archive, - ); - - debug!( - "URL wheel requirement already cached: {cached_dist}" - ); - cached.push(CachedDist::Url(cached_dist)); - continue; + .entry(format!("{}.rev", wheel.filename.stem())); + + match fs_err::read(cache_entry.path()) { + Ok(data) => { + let rev = + rmp_serde::from_slice::>(&data)?; + let modified_at = ArchiveTimestamp::from_file(&wheel.path)?; + + // Enforce freshness. + if rev.timestamp == modified_at.timestamp() { + // Enforce hash checking. + let archive = rev.data; + + let hashes = + hashes.get(&requirement.name).unwrap_or_default(); + if archive.satisfies(hashes) { + let cached_dist = CachedDirectUrlDist::from_url( + wheel.filename, + wheel.url, + archive.hashes, + archive.path, + ); + + debug!( + "Path wheel requirement already cached: {cached_dist}" + ); + cached.push(CachedDist::Url(cached_dist)); + continue; + } } } Err(err) if err.kind() == io::ErrorKind::NotFound => { diff --git a/crates/uv-requirements/src/lookahead.rs b/crates/uv-requirements/src/lookahead.rs index d2c0131fcf2cc..a31407309a9af 100644 --- a/crates/uv-requirements/src/lookahead.rs +++ b/crates/uv-requirements/src/lookahead.rs @@ -1,19 +1,18 @@ use std::collections::VecDeque; use anyhow::{Context, Result}; - use futures::stream::FuturesUnordered; use futures::StreamExt; use rustc_hash::FxHashSet; -use distribution_types::{Dist, DistributionMetadata, LocalEditable}; +use distribution_types::{Dist, DistributionMetadata, LocalEditable, Name}; use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl}; use pypi_types::Metadata23; use uv_client::RegistryClient; use uv_configuration::{Constraints, Overrides}; use uv_distribution::{DistributionDatabase, Reporter}; use uv_resolver::{InMemoryIndex, MetadataResponse}; -use uv_types::{BuildContext, RequestedRequirements}; +use uv_types::{BuildContext, RequestedRequirements, RequiredHashes}; /// A resolver for resolving lookahead requirements from direct URLs. /// @@ -40,6 +39,8 @@ pub struct LookaheadResolver<'a, Context: BuildContext + Send + Sync> { overrides: &'a Overrides, /// The editable requirements for the project. editables: &'a [(LocalEditable, Metadata23)], + /// The required hashes for the project. + hashes: &'a RequiredHashes, /// The in-memory index for resolving dependencies. index: &'a InMemoryIndex, /// The database for fetching and building distributions. @@ -53,6 +54,7 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> { constraints: &'a Constraints, overrides: &'a Overrides, editables: &'a [(LocalEditable, Metadata23)], + hashes: &'a RequiredHashes, context: &'a Context, client: &'a RegistryClient, index: &'a InMemoryIndex, @@ -62,6 +64,7 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> { constraints, overrides, editables, + hashes, index, database: DistributionDatabase::new(client, context), } @@ -151,9 +154,10 @@ impl<'a, Context: BuildContext + Send + Sync> LookaheadResolver<'a, Context> { metadata.requires_dist.clone() } else { // Run the PEP 517 build process to extract metadata from the source distribution. + let hashes = self.hashes.get(dist.name()).unwrap_or_default(); let metadata = self .database - .get_or_build_wheel_metadata(&dist) + .get_or_build_wheel_metadata(&dist, hashes) .await .with_context(|| match &dist { Dist::Built(built) => format!("Failed to download: {built}"), diff --git a/crates/uv-requirements/src/source_tree.rs b/crates/uv-requirements/src/source_tree.rs index a734db17b3054..b6cccd01b6e24 100644 --- a/crates/uv-requirements/src/source_tree.rs +++ b/crates/uv-requirements/src/source_tree.rs @@ -1,5 +1,4 @@ use std::borrow::Cow; - use std::path::{Path, PathBuf}; use anyhow::{Context, Result}; @@ -25,6 +24,8 @@ pub struct SourceTreeResolver<'a, Context: BuildContext + Send + Sync> { source_trees: Vec, /// The extras to include when resolving requirements. extras: &'a ExtrasSpecification<'a>, + /// Whether to require hashes for all dependencies. + require_hashes: bool, /// The in-memory index for resolving dependencies. index: &'a InMemoryIndex, /// The database for fetching and building distributions. @@ -36,6 +37,7 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> { pub fn new( source_trees: Vec, extras: &'a ExtrasSpecification<'a>, + require_hashes: bool, context: &'a Context, client: &'a RegistryClient, index: &'a InMemoryIndex, @@ -43,6 +45,7 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> { Self { source_trees, extras, + require_hashes, index, database: DistributionDatabase::new(client, context), } @@ -84,6 +87,16 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> { path: Cow::Owned(path), }); + // TODO(charlie): Should we enforce this earlier? If the metadata can be extracted + // statically, it won't go through this resolver. But we'll fail anyway, since the + // dependencies (when extracted from a `pyproject.toml` or `setup.py`) won't include hashes. + if self.require_hashes { + return Err(anyhow::anyhow!( + "Hash-checking is not supported for local directories: {}", + source_tree.user_display() + )); + } + // Fetch the metadata for the distribution. let metadata = { let id = PackageId::from_url(source.url()); @@ -104,7 +117,7 @@ impl<'a, Context: BuildContext + Send + Sync> SourceTreeResolver<'a, Context> { } else { // Run the PEP 517 build process to extract metadata from the source distribution. let source = BuildableSource::Url(source); - let metadata = self.database.build_wheel_metadata(&source).await?; + let metadata = self.database.build_wheel_metadata(&source, &[]).await?; // Insert the metadata into the index. self.index diff --git a/crates/uv-requirements/src/unnamed.rs b/crates/uv-requirements/src/unnamed.rs index 96e5cc179fd68..e658852ae56df 100644 --- a/crates/uv-requirements/src/unnamed.rs +++ b/crates/uv-requirements/src/unnamed.rs @@ -27,6 +27,8 @@ use uv_types::BuildContext; pub struct NamedRequirementsResolver<'a, Context: BuildContext + Send + Sync> { /// The requirements for the project. requirements: Vec, + /// Whether to check hashes for distributions. + require_hashes: bool, /// The in-memory index for resolving dependencies. index: &'a InMemoryIndex, /// The database for fetching and building distributions. @@ -37,12 +39,14 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont /// Instantiate a new [`NamedRequirementsResolver`] for a given set of requirements. pub fn new( requirements: Vec, + require_hashes: bool, context: &'a Context, client: &'a RegistryClient, index: &'a InMemoryIndex, ) -> Self { Self { requirements, + require_hashes, index, database: DistributionDatabase::new(client, context), } @@ -61,6 +65,7 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont pub async fn resolve(self) -> Result> { let Self { requirements, + require_hashes, index, database, } = self; @@ -69,7 +74,8 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont match requirement { RequirementsTxtRequirement::Pep508(requirement) => Ok(requirement), RequirementsTxtRequirement::Unnamed(requirement) => { - Self::resolve_requirement(requirement, index, &database).await + Self::resolve_requirement(requirement, require_hashes, index, &database) + .await } } }) @@ -81,6 +87,7 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont /// Infer the package name for a given "unnamed" requirement. async fn resolve_requirement( requirement: UnnamedRequirement, + require_hashes: bool, index: &InMemoryIndex, database: &DistributionDatabase<'a, Context>, ) -> Result { @@ -233,6 +240,13 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont } }; + // TODO(charlie): Support `--require-hashes` for unnamed requirements. + if require_hashes { + return Err(anyhow::anyhow!( + "Unnamed requirements are not supported with `--require-hashes`" + )); + } + // Fetch the metadata for the distribution. let name = { let id = PackageId::from_url(source.url()); @@ -248,7 +262,7 @@ impl<'a, Context: BuildContext + Send + Sync> NamedRequirementsResolver<'a, Cont } else { // Run the PEP 517 build process to extract metadata from the source distribution. let source = BuildableSource::Url(source); - let metadata = database.build_wheel_metadata(&source).await?; + let metadata = database.build_wheel_metadata(&source, &[]).await?; let name = metadata.name.clone(); diff --git a/crates/uv-resolver/src/error.rs b/crates/uv-resolver/src/error.rs index 96c46fcb1fb98..78e930bd51a54 100644 --- a/crates/uv-resolver/src/error.rs +++ b/crates/uv-resolver/src/error.rs @@ -93,6 +93,9 @@ pub enum ResolveError { #[error("Attempted to construct an invalid version specifier")] InvalidVersion(#[from] pep440_rs::VersionSpecifierBuildError), + #[error("In `--require-hashes` mode, all requirements must be pinned upfront with `==`, but found: {0}")] + UnhashedPackage(PackageName), + /// Something unexpected happened. #[error("{0}")] Failure(String), diff --git a/crates/uv-resolver/src/hash_checking_mode.rs b/crates/uv-resolver/src/hash_checking_mode.rs new file mode 100644 index 0000000000000..080939339e592 --- /dev/null +++ b/crates/uv-resolver/src/hash_checking_mode.rs @@ -0,0 +1,15 @@ +#[derive(Debug, Default, Clone, Copy)] +pub enum HashCheckingMode { + /// Hash-checking mode is disabled. + #[default] + Disabled, + /// Hash-checking mode is enabled. + Enabled, +} + +impl HashCheckingMode { + /// Returns `true` if hash-checking is enabled. + pub fn is_enabled(self) -> bool { + matches!(self, Self::Enabled) + } +} diff --git a/crates/uv-resolver/src/hashes.rs b/crates/uv-resolver/src/hashes.rs deleted file mode 100644 index 69c5bd934fe44..0000000000000 --- a/crates/uv-resolver/src/hashes.rs +++ /dev/null @@ -1,71 +0,0 @@ -use std::str::FromStr; - -use rustc_hash::{FxHashMap, FxHashSet}; - -use pep508_rs::{MarkerEnvironment, RequirementsTxtRequirement}; -use pypi_types::{HashError, Hashes}; -use requirements_txt::RequirementEntry; -use uv_normalize::PackageName; - -/// A set of package versions that are permitted, even if they're marked as yanked by the -/// relevant index. -#[derive(Debug, Default, Clone)] -pub struct RequiredHashes(FxHashMap>); - -impl RequiredHashes { - /// Generate the [`RequiredHashes`] from a set of requirement entries. - pub fn from_entries( - entries: &[RequirementEntry], - markers: &MarkerEnvironment, - ) -> Result { - let mut allowed_hashes = FxHashMap::>::default(); - - // For each requirement, map from name to allowed hashes. We use the last entry for each - // package. - // - // For now, unnamed requirements are unsupported. This should be fine, since `--require-hashes` - // tends to be used after `pip-compile`, which will always output named requirements. - // - // TODO(charlie): Preserve hashes from `requirements.txt` through to this pass, so that we - // can iterate over requirements directly, rather than iterating over the entries. - for entry in entries - .iter() - .filter(|entry| entry.requirement.evaluate_markers(markers, &[])) - { - // Extract the requirement name. - let name = match &entry.requirement { - RequirementsTxtRequirement::Pep508(requirement) => requirement.name.clone(), - RequirementsTxtRequirement::Unnamed(_) => { - return Err(RequiredHashesError::UnnamedRequirement) - } - }; - - // Parse the hashes. - let hashes = entry - .hashes - .iter() - .map(|hash| Hashes::from_str(hash)) - .collect::, _>>() - .unwrap(); - - // TODO(charlie): Extract hashes from URL fragments. - allowed_hashes.insert(name, hashes); - } - - Ok(Self(allowed_hashes)) - } - - /// Returns versions for the given package which are allowed even if marked as yanked by the - /// relevant index. - pub fn get(&self, package_name: &PackageName) -> Option<&FxHashSet> { - self.0.get(package_name) - } -} - -#[derive(thiserror::Error, Debug)] -pub enum RequiredHashesError { - #[error(transparent)] - Hash(#[from] HashError), - #[error("Unnamed requirements are not supported in `--require-hashes`")] - UnnamedRequirement, -} diff --git a/crates/uv-resolver/src/lib.rs b/crates/uv-resolver/src/lib.rs index 9c083ee3f8b32..040ffe315a182 100644 --- a/crates/uv-resolver/src/lib.rs +++ b/crates/uv-resolver/src/lib.rs @@ -1,6 +1,7 @@ pub use dependency_mode::DependencyMode; pub use error::ResolveError; pub use exclusions::Exclusions; +pub use hash_checking_mode::HashCheckingMode; pub use manifest::Manifest; pub use options::{Options, OptionsBuilder}; pub use preferences::{Preference, PreferenceError}; @@ -24,6 +25,7 @@ mod dependency_provider; mod editables; mod error; mod exclusions; +mod hash_checking_mode; mod manifest; mod options; mod pins; diff --git a/crates/uv-resolver/src/manifest.rs b/crates/uv-resolver/src/manifest.rs index 3d52d17eca433..2801aef1b51bc 100644 --- a/crates/uv-resolver/src/manifest.rs +++ b/crates/uv-resolver/src/manifest.rs @@ -3,7 +3,7 @@ use pep508_rs::{MarkerEnvironment, Requirement}; use pypi_types::Metadata23; use uv_configuration::{Constraints, Overrides}; use uv_normalize::PackageName; -use uv_types::{RequestedRequirements, RequiredHashes}; +use uv_types::RequestedRequirements; use crate::{preferences::Preference, Exclusions}; @@ -35,9 +35,6 @@ pub struct Manifest { /// direct requirements in their own right. pub(crate) editables: Vec<(LocalEditable, Metadata23)>, - /// The required hashes for the project. - pub(crate) hashes: RequiredHashes, - /// The installed packages to exclude from consideration during resolution. /// /// These typically represent packages that are being upgraded or reinstalled @@ -61,7 +58,6 @@ impl Manifest { preferences: Vec, project: Option, editables: Vec<(LocalEditable, Metadata23)>, - hashes: RequiredHashes, exclusions: Exclusions, lookaheads: Vec, ) -> Self { @@ -72,13 +68,12 @@ impl Manifest { preferences, project, editables, - hashes, exclusions, lookaheads, } } - pub fn simple(requirements: Vec, hashes: RequiredHashes) -> Self { + pub fn simple(requirements: Vec) -> Self { Self { requirements, constraints: Constraints::default(), @@ -86,7 +81,6 @@ impl Manifest { preferences: Vec::new(), project: None, editables: Vec::new(), - hashes, exclusions: Exclusions::default(), lookaheads: Vec::new(), } diff --git a/crates/uv-resolver/src/options.rs b/crates/uv-resolver/src/options.rs index 764728391a0e1..bc9481fed1dca 100644 --- a/crates/uv-resolver/src/options.rs +++ b/crates/uv-resolver/src/options.rs @@ -1,5 +1,6 @@ use chrono::{DateTime, Utc}; +use crate::hash_checking_mode::HashCheckingMode; use crate::{DependencyMode, PreReleaseMode, ResolutionMode}; /// Options for resolving a manifest. @@ -8,8 +9,8 @@ pub struct Options { pub resolution_mode: ResolutionMode, pub prerelease_mode: PreReleaseMode, pub dependency_mode: DependencyMode, + pub hash_checking_mode: HashCheckingMode, pub exclude_newer: Option>, - pub require_hashes: bool, } /// Builder for [`Options`]. @@ -18,8 +19,8 @@ pub struct OptionsBuilder { resolution_mode: ResolutionMode, prerelease_mode: PreReleaseMode, dependency_mode: DependencyMode, + hash_checking_mode: HashCheckingMode, exclude_newer: Option>, - require_hashes: bool, } impl OptionsBuilder { @@ -49,17 +50,17 @@ impl OptionsBuilder { self } - /// Sets the exclusion date. + /// Sets the hash-checking mode. #[must_use] - pub fn exclude_newer(mut self, exclude_newer: Option>) -> Self { - self.exclude_newer = exclude_newer; + pub fn hash_checking_mode(mut self, hash_checking_mode: HashCheckingMode) -> Self { + self.hash_checking_mode = hash_checking_mode; self } - /// Sets the `--requires-hash` flag. + /// Sets the exclusion date. #[must_use] - pub fn require_hashes(mut self, require_hashes: bool) -> Self { - self.require_hashes = require_hashes; + pub fn exclude_newer(mut self, exclude_newer: Option>) -> Self { + self.exclude_newer = exclude_newer; self } @@ -69,8 +70,8 @@ impl OptionsBuilder { resolution_mode: self.resolution_mode, prerelease_mode: self.prerelease_mode, dependency_mode: self.dependency_mode, + hash_checking_mode: self.hash_checking_mode, exclude_newer: self.exclude_newer, - require_hashes: self.require_hashes, } } } diff --git a/crates/uv-resolver/src/resolution.rs b/crates/uv-resolver/src/resolution.rs index c3fa9f30cfa7e..167ab339332ad 100644 --- a/crates/uv-resolver/src/resolution.rs +++ b/crates/uv-resolver/src/resolution.rs @@ -199,6 +199,9 @@ impl ResolutionGraph { } } PubGrubPackage::Package(package_name, Some(extra), Some(url)) => { + // STOPSHIP(charlie): For URLs, it's possible that we haven't computed the + // hashes yet. + // Validate that the `extra` exists. let dist = PubGrubDistribution::from_url(package_name, url); diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index 01d812f5afff7..32b943ed9768e 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -31,11 +31,12 @@ use uv_configuration::{Constraints, Overrides}; use uv_distribution::DistributionDatabase; use uv_interpreter::Interpreter; use uv_normalize::PackageName; -use uv_types::{BuildContext, InstalledPackagesProvider}; +use uv_types::{BuildContext, InstalledPackagesProvider, RequiredHashes}; use crate::candidate_selector::{CandidateDist, CandidateSelector}; use crate::editables::Editables; use crate::error::ResolveError; +use crate::hash_checking_mode::HashCheckingMode; use crate::manifest::Manifest; use crate::pins::FilePins; use crate::preferences::Preferences; @@ -108,6 +109,8 @@ pub struct Resolver< urls: Urls, locals: Locals, dependency_mode: DependencyMode, + hash_checking_mode: HashCheckingMode, + hashes: &'a RequiredHashes, markers: &'a MarkerEnvironment, python_requirement: PythonRequirement, selector: CandidateSelector, @@ -140,6 +143,7 @@ impl< client: &'a RegistryClient, flat_index: &'a FlatIndex, index: &'a InMemoryIndex, + hashes: &'a RequiredHashes, build_context: &'a Context, installed_packages: &'a InstalledPackages, ) -> Result { @@ -150,7 +154,7 @@ impl< tags, PythonRequirement::new(interpreter, markers), AllowedYanks::from_manifest(&manifest, markers), - manifest.hashes.clone(), + hashes, options.exclude_newer, build_context.no_binary(), build_context.no_build(), @@ -158,6 +162,7 @@ impl< Self::new_custom_io( manifest, options, + hashes, markers, PythonRequirement::new(interpreter, markers), index, @@ -177,6 +182,7 @@ impl< pub fn new_custom_io( manifest: Manifest, options: Options, + hashes: &'a RequiredHashes, markers: &'a MarkerEnvironment, python_requirement: PythonRequirement, index: &'a InMemoryIndex, @@ -189,6 +195,7 @@ impl< visited: DashSet::default(), selector: CandidateSelector::for_resolution(options, &manifest, markers), dependency_mode: options.dependency_mode, + hash_checking_mode: options.hash_checking_mode, urls: Urls::from_manifest(&manifest, markers)?, locals: Locals::from_manifest(&manifest, markers), project: manifest.project, @@ -198,6 +205,7 @@ impl< preferences: Preferences::from_iter(manifest.preferences, markers), exclusions: manifest.exclusions, editables: Editables::from_requirements(manifest.editables), + hashes, markers, python_requirement, reporter: None, @@ -501,6 +509,13 @@ impl< PubGrubPackage::Root(_) => {} PubGrubPackage::Python(_) => {} PubGrubPackage::Package(package_name, _extra, None) => { + // Validate that the package is permitted under hash-checking mode. + if self.hash_checking_mode.is_enabled() { + if !self.hashes.contains(package_name) { + return Err(ResolveError::UnhashedPackage(package_name.clone())); + } + } + // Emit a request to fetch the metadata for this package. if self.index.packages.register(package_name.clone()) { priorities.add(package_name.clone()); @@ -510,6 +525,13 @@ impl< } } PubGrubPackage::Package(package_name, _extra, Some(url)) => { + // Validate that the package is permitted under hash-checking mode. + if self.hash_checking_mode.is_enabled() { + if !self.hashes.contains(package_name) { + return Err(ResolveError::UnhashedPackage(package_name.clone())); + } + } + // Emit a request to fetch the metadata for this distribution. let dist = Dist::from_url(package_name.clone(), url.clone())?; if self.index.distributions.register(dist.package_id()) { @@ -797,17 +819,11 @@ impl< for (package, version) in constraints.iter() { debug!("Adding direct dependency: {package}{version}"); - // STOPSHIP(charlie): If `--require-hashes` is enabled, fail if: - // - Any requirement is a VCS requirement. (But it's fine if it's already installed...) - // - Any requirement is a source tree. (But it's fine if it's already installed...) - // Emit a request to fetch the metadata for this package. self.visit_package(package, priorities, request_sink) .await?; } - // STOPSHIP(charlie): If `--require-hashes` is enabled, fail if editables are provided. - // Add a dependency on each editable. for (editable, metadata) in self.editables.iter() { constraints.push( diff --git a/crates/uv-resolver/src/resolver/provider.rs b/crates/uv-resolver/src/resolver/provider.rs index ec7a521b3074b..0353f464eefab 100644 --- a/crates/uv-resolver/src/resolver/provider.rs +++ b/crates/uv-resolver/src/resolver/provider.rs @@ -3,7 +3,7 @@ use std::future::Future; use anyhow::Result; use chrono::{DateTime, Utc}; -use distribution_types::{Dist, IndexLocations}; +use distribution_types::{Dist, IndexLocations, Name}; use platform_tags::Tags; use pypi_types::Metadata23; use uv_client::{FlatIndex, RegistryClient}; @@ -96,7 +96,7 @@ impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Contex tags: &'a Tags, python_requirement: PythonRequirement, allowed_yanks: AllowedYanks, - required_hashes: RequiredHashes, + required_hashes: &'a RequiredHashes, exclude_newer: Option>, no_binary: &'a NoBinary, no_build: &'a NoBuild, @@ -108,7 +108,7 @@ impl<'a, Context: BuildContext + Send + Sync> DefaultResolverProvider<'a, Contex tags: tags.clone(), python_requirement, allowed_yanks, - required_hashes, + required_hashes: required_hashes.clone(), exclude_newer, no_binary: no_binary.clone(), no_build: no_build.clone(), @@ -176,7 +176,8 @@ impl<'a, Context: BuildContext + Send + Sync> ResolverProvider /// Fetch the metadata for a distribution, building it if necessary. async fn get_or_build_wheel_metadata<'io>(&'io self, dist: &'io Dist) -> WheelMetadataResult { - match self.fetcher.get_or_build_wheel_metadata(dist).await { + let hashes = self.required_hashes.get(dist.name()).unwrap_or_default(); + match self.fetcher.get_or_build_wheel_metadata(dist, hashes).await { Ok(metadata) => Ok(MetadataResponse::Found(metadata)), Err(err) => match err { uv_distribution::Error::Client(client) => match client.into_kind() { diff --git a/crates/uv-resolver/src/version_map.rs b/crates/uv-resolver/src/version_map.rs index 267be106ce4c1..d224258cfbeae 100644 --- a/crates/uv-resolver/src/version_map.rs +++ b/crates/uv-resolver/src/version_map.rs @@ -113,8 +113,8 @@ impl VersionMap { .unwrap_or_default(); let required_hashes = required_hashes .get(package_name) - .cloned() - .unwrap_or_default(); + .unwrap_or_default() + .to_vec(); Self { inner: VersionMapInner::Lazy(VersionMapLazy { map, @@ -310,7 +310,7 @@ struct VersionMapLazy { /// Which yanked versions are allowed allowed_yanks: FxHashSet, /// The hashes of allowed distributions. - required_hashes: FxHashSet, + required_hashes: Vec, } impl VersionMapLazy { diff --git a/crates/uv-resolver/tests/resolver.rs b/crates/uv-resolver/tests/resolver.rs index 3581aab4ea8fc..96c1d9df5b1f2 100644 --- a/crates/uv-resolver/tests/resolver.rs +++ b/crates/uv-resolver/tests/resolver.rs @@ -274,7 +274,6 @@ async fn black_mypy_extensions() -> Result<()> { vec![], None, vec![], - RequiredHashes::default(), Exclusions::default(), vec![], ); @@ -314,7 +313,6 @@ async fn black_mypy_extensions_extra() -> Result<()> { vec![], None, vec![], - RequiredHashes::default(), Exclusions::default(), vec![], ); @@ -352,7 +350,6 @@ async fn black_flake8() -> Result<()> { vec![], None, vec![], - RequiredHashes::default(), Exclusions::default(), vec![], ); @@ -444,7 +441,6 @@ async fn black_respect_preference() -> Result<()> { )?)], None, vec![], - RequiredHashes::default(), Exclusions::default(), vec![], ); @@ -483,7 +479,6 @@ async fn black_ignore_preference() -> Result<()> { )?)], None, vec![], - RequiredHashes::default(), Exclusions::default(), vec![], ); diff --git a/crates/uv-types/Cargo.toml b/crates/uv-types/Cargo.toml index cf3339c6321a2..2ee16ce44cf96 100644 --- a/crates/uv-types/Cargo.toml +++ b/crates/uv-types/Cargo.toml @@ -15,6 +15,7 @@ workspace = true [dependencies] distribution-types = { workspace = true } once-map = { workspace = true } +pep440_rs = { workspace = true } pep508_rs = { workspace = true } pypi-types = { workspace = true } uv-cache = { workspace = true } diff --git a/crates/uv-types/src/hashes.rs b/crates/uv-types/src/hashes.rs index d8e75b526567e..4f12c18c73045 100644 --- a/crates/uv-types/src/hashes.rs +++ b/crates/uv-types/src/hashes.rs @@ -1,15 +1,14 @@ +use rustc_hash::FxHashMap; use std::str::FromStr; -use rustc_hash::{FxHashMap, FxHashSet}; - -use pep508_rs::{MarkerEnvironment, Requirement}; +use pep508_rs::{MarkerEnvironment, Requirement, VersionOrUrl}; use pypi_types::{HashDigest, HashError}; use uv_normalize::PackageName; /// A set of package versions that are permitted, even if they're marked as yanked by the /// relevant index. #[derive(Debug, Default, Clone)] -pub struct RequiredHashes(FxHashMap>); +pub struct RequiredHashes(FxHashMap>); impl RequiredHashes { /// Generate the [`RequiredHashes`] from a set of requirement entries. @@ -17,7 +16,7 @@ impl RequiredHashes { requirements: impl Iterator)>, markers: &MarkerEnvironment, ) -> Result { - let mut allowed_hashes = FxHashMap::>::default(); + let mut allowed_hashes = FxHashMap::>::default(); // For each requirement, map from name to allowed hashes. We use the last entry for each // package. @@ -32,11 +31,40 @@ impl RequiredHashes { continue; } + // Every requirement must be either a pinned version or a direct URL. + match requirement.version_or_url.as_ref() { + Some(VersionOrUrl::Url(_)) => { + // Direct URLs are always allowed. + } + Some(VersionOrUrl::VersionSpecifier(specifiers)) => { + if specifiers + .iter() + .any(|specifier| matches!(specifier.operator(), pep440_rs::Operator::Equal)) + { + // Pinned versions are allowed. + } else { + return Err(RequiredHashesError::UnpinnedRequirement( + requirement.to_string(), + )); + } + } + None => { + return Err(RequiredHashesError::UnpinnedRequirement( + requirement.to_string(), + )) + } + } + + // Every requirement must include a hash. + if hashes.is_empty() { + return Err(RequiredHashesError::MissingHashes(requirement.to_string())); + } + // Parse the hashes. let hashes = hashes .iter() .map(|hash| HashDigest::from_str(hash)) - .collect::, _>>() + .collect::, _>>() .unwrap(); // TODO(charlie): Extract hashes from URL fragments. @@ -48,8 +76,13 @@ impl RequiredHashes { /// Returns versions for the given package which are allowed even if marked as yanked by the /// relevant index. - pub fn get(&self, package_name: &PackageName) -> Option<&FxHashSet> { - self.0.get(package_name) + pub fn get(&self, package_name: &PackageName) -> Option<&[HashDigest]> { + self.0.get(package_name).map(Vec::as_slice) + } + + /// Returns whether the given package is allowed even if marked as yanked by the relevant index. + pub fn contains(&self, package_name: &PackageName) -> bool { + self.0.contains_key(package_name) } } @@ -59,4 +92,8 @@ pub enum RequiredHashesError { Hash(#[from] HashError), #[error("Unnamed requirements are not supported in `--require-hashes`")] UnnamedRequirement, + #[error("In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: {0}")] + UnpinnedRequirement(String), + #[error("In `--require-hashes` mode, all requirement must have a hash, but none were provided for: {0}")] + MissingHashes(String), } diff --git a/crates/uv/src/commands/pip_compile.rs b/crates/uv/src/commands/pip_compile.rs index 1ac1f50744bbf..55a0a9b9c7cd4 100644 --- a/crates/uv/src/commands/pip_compile.rs +++ b/crates/uv/src/commands/pip_compile.rs @@ -6,7 +6,7 @@ use std::ops::Deref; use std::path::Path; use std::str::FromStr; -use anstream::{AutoStream, eprint, StripStream}; +use anstream::{eprint, AutoStream, StripStream}; use anyhow::{anyhow, Context, Result}; use chrono::{DateTime, Utc}; use itertools::Itertools; @@ -17,7 +17,7 @@ use tracing::debug; use distribution_types::{IndexLocations, LocalEditable, LocalEditables, Verbatim}; use platform_tags::Tags; use requirements_txt::EditableRequirement; -use uv_auth::{GLOBAL_AUTH_STORE, KeyringProvider}; +use uv_auth::{KeyringProvider, GLOBAL_AUTH_STORE}; use uv_cache::Cache; use uv_client::{ BaseClientBuilder, Connectivity, FlatIndex, FlatIndexClient, RegistryClientBuilder, @@ -32,8 +32,8 @@ use uv_installer::Downloader; use uv_interpreter::{find_best_python, PythonEnvironment, PythonVersion}; use uv_normalize::{ExtraName, PackageName}; use uv_requirements::{ - ExtrasSpecification, LookaheadResolver, NamedRequirementsResolver, RequirementsSource, - RequirementsSpecification, SourceTreeResolver, upgrade::read_lockfile, + upgrade::read_lockfile, ExtrasSpecification, LookaheadResolver, NamedRequirementsResolver, + RequirementsSource, RequirementsSpecification, SourceTreeResolver, }; use uv_resolver::{ AnnotationStyle, DependencyMode, DisplayResolutionGraph, Exclusions, InMemoryIndex, Manifest, @@ -42,8 +42,8 @@ use uv_resolver::{ use uv_types::{BuildIsolation, EmptyInstalledPackages, InFlight, RequiredHashes}; use uv_warnings::warn_user; -use crate::commands::{elapsed, ExitStatus}; use crate::commands::reporters::{DownloadReporter, ResolverReporter}; +use crate::commands::{elapsed, ExitStatus}; use crate::printer::Printer; /// Resolve a set of requirements into a set of pinned versions. @@ -199,6 +199,9 @@ pub(crate) async fn pip_compile( |python_version| Cow::Owned(python_version.markers(interpreter.markers())), ); + // Don't enforce hashes during resolution. + let hashes = RequiredHashes::default(); + // Incorporate any index locations from the provided sources. let index_locations = index_locations.combine(index_url, extra_index_urls, find_links, no_index); @@ -230,13 +233,7 @@ pub(crate) async fn pip_compile( let flat_index = { let client = FlatIndexClient::new(&client, &cache); let entries = client.fetch(index_locations.flat_index()).await?; - FlatIndex::from_entries( - entries, - &tags, - &RequiredHashes::default(), - &no_build, - &NoBinary::None, - ) + FlatIndex::from_entries(entries, &tags, &hashes, &no_build, &NoBinary::None) }; // Track in-flight downloads, builds, etc., across resolutions. @@ -275,6 +272,7 @@ pub(crate) async fn pip_compile( // Convert from unnamed to named requirements. let mut requirements = NamedRequirementsResolver::new( requirements, + false, &build_dispatch, &client, &top_level_index, @@ -289,6 +287,7 @@ pub(crate) async fn pip_compile( SourceTreeResolver::new( source_trees, &extras, + false, &build_dispatch, &client, &top_level_index, @@ -313,7 +312,7 @@ pub(crate) async fn pip_compile( LocalEditable { url, path, extras } })); - let downloader = Downloader::new(&cache, &tags, &client, &build_dispatch) + let downloader = Downloader::new(&cache, &tags, &hashes, &client, &build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(editables.len() as u64)); // Build all editables. @@ -361,6 +360,7 @@ pub(crate) async fn pip_compile( &constraints, &overrides, &editables, + &hashes, &build_dispatch, &client, &top_level_index, @@ -377,8 +377,6 @@ pub(crate) async fn pip_compile( preferences, project, editables, - // Do not require hashes during resolution. - RequiredHashes::default(), // Do not consider any installed packages during resolution. Exclusions::All, lookaheads, @@ -401,6 +399,7 @@ pub(crate) async fn pip_compile( &client, &flat_index, &top_level_index, + &hashes, &build_dispatch, &EmptyInstalledPackages, )? diff --git a/crates/uv/src/commands/pip_install.rs b/crates/uv/src/commands/pip_install.rs index 5f8a72c13d811..0853b05bef38d 100644 --- a/crates/uv/src/commands/pip_install.rs +++ b/crates/uv/src/commands/pip_install.rs @@ -18,7 +18,7 @@ use pep508_rs::{MarkerEnvironment, Requirement, RequirementsTxtRequirement}; use platform_tags::Tags; use pypi_types::{Metadata23, Yanked}; use requirements_txt::EditableRequirement; -use uv_auth::{GLOBAL_AUTH_STORE, KeyringProvider}; +use uv_auth::{KeyringProvider, GLOBAL_AUTH_STORE}; use uv_cache::Cache; use uv_client::{ BaseClientBuilder, Connectivity, FlatIndex, FlatIndexClient, RegistryClient, @@ -38,14 +38,14 @@ use uv_requirements::{ RequirementsSpecification, SourceTreeResolver, }; use uv_resolver::{ - DependencyMode, Exclusions, InMemoryIndex, Manifest, Options, OptionsBuilder, Preference, - PreReleaseMode, ResolutionGraph, ResolutionMode, Resolver, + DependencyMode, Exclusions, HashCheckingMode, InMemoryIndex, Manifest, Options, OptionsBuilder, + PreReleaseMode, Preference, ResolutionGraph, ResolutionMode, Resolver, }; use uv_types::{BuildIsolation, InFlight, RequiredHashes}; use uv_warnings::warn_user; -use crate::commands::{ChangeEvent, ChangeEventKind, compile_bytecode, elapsed, ExitStatus}; use crate::commands::reporters::{DownloadReporter, InstallReporter, ResolverReporter}; +use crate::commands::{compile_bytecode, elapsed, ChangeEvent, ChangeEventKind, ExitStatus}; use crate::printer::Printer; use super::DryRunEvent; @@ -86,10 +86,6 @@ pub(crate) async fn pip_install( ) -> Result { let start = std::time::Instant::now(); - if require_hashes { - warn_user!("Hash-checking mode (via `--require-hashes`) is not yet supported."); - } - let client_builder = BaseClientBuilder::new() .connectivity(connectivity) .native_tls(native_tls) @@ -191,7 +187,7 @@ pub(crate) async fn pip_install( let markers = venv.interpreter().markers(); // Collect the set of required hashes. - let required_hashes = if require_hashes { + let hashes = if require_hashes { RequiredHashes::from_requirements( entries .into_iter() @@ -229,7 +225,7 @@ pub(crate) async fn pip_install( let flat_index = { let client = FlatIndexClient::new(&client, &cache); let entries = client.fetch(index_locations.flat_index()).await?; - FlatIndex::from_entries(entries, tags, &required_hashes, &no_build, &no_binary) + FlatIndex::from_entries(entries, tags, &hashes, &no_build, &no_binary) }; // Determine whether to enable build isolation. @@ -269,19 +265,31 @@ pub(crate) async fn pip_install( // Resolve the requirements from the provided sources. let requirements = { // Convert from unnamed to named requirements. - let mut requirements = - NamedRequirementsResolver::new(requirements, &resolve_dispatch, &client, &index) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?; + let mut requirements = NamedRequirementsResolver::new( + requirements, + require_hashes, + &resolve_dispatch, + &client, + &index, + ) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?; // Resolve any source trees into requirements. if !source_trees.is_empty() { requirements.extend( - SourceTreeResolver::new(source_trees, extras, &resolve_dispatch, &client, &index) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?, + SourceTreeResolver::new( + source_trees, + extras, + require_hashes, + &resolve_dispatch, + &client, + &index, + ) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?, ); } @@ -299,6 +307,7 @@ pub(crate) async fn pip_install( build_editables( &editables, editable_wheel_dir.path(), + &hashes, &cache, &interpreter, tags, @@ -313,8 +322,12 @@ pub(crate) async fn pip_install( .resolution_mode(resolution_mode) .prerelease_mode(prerelease_mode) .dependency_mode(dependency_mode) + .hash_checking_mode(if require_hashes { + HashCheckingMode::Enabled + } else { + HashCheckingMode::Disabled + }) .exclude_newer(exclude_newer) - .require_hashes(require_hashes) .build(); // Resolve the requirements. @@ -324,7 +337,7 @@ pub(crate) async fn pip_install( overrides, project, &editables, - required_hashes, + &hashes, &site_packages, &reinstall, &upgrade, @@ -385,6 +398,7 @@ pub(crate) async fn pip_install( link_mode, compile, &index_locations, + &hashes, tags, &client, &in_flight, @@ -460,6 +474,7 @@ async fn read_requirements( async fn build_editables( editables: &[EditableRequirement], editable_wheel_dir: &Path, + hashes: &RequiredHashes, cache: &Cache, interpreter: &Interpreter, tags: &Tags, @@ -469,7 +484,7 @@ async fn build_editables( ) -> Result, Error> { let start = std::time::Instant::now(); - let downloader = Downloader::new(cache, tags, client, build_dispatch) + let downloader = Downloader::new(cache, tags, hashes, client, build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(editables.len() as u64)); let editables = LocalEditables::from_editables(editables.iter().map(|editable| { @@ -526,7 +541,7 @@ async fn resolve( overrides: Vec, project: Option, editables: &[BuiltEditable], - required_hashes: RequiredHashes, + hashes: &RequiredHashes, site_packages: &SitePackages<'_>, reinstall: &Reinstall, upgrade: &Upgrade, @@ -573,6 +588,7 @@ async fn resolve( &constraints, &overrides, &editables, + &hashes, build_dispatch, client, index, @@ -589,7 +605,6 @@ async fn resolve( preferences, project, editables, - required_hashes, exclusions, lookaheads, ); @@ -604,6 +619,7 @@ async fn resolve( client, flat_index, index, + &hashes, build_dispatch, site_packages, )? @@ -647,6 +663,7 @@ async fn install( link_mode: LinkMode, compile: bool, index_urls: &IndexLocations, + hashes: &RequiredHashes, tags: &Tags, client: &RegistryClient, in_flight: &InFlight, @@ -674,6 +691,7 @@ async fn install( site_packages, reinstall, no_binary, + hashes, index_urls, cache, venv, @@ -726,7 +744,7 @@ async fn install( } else { let start = std::time::Instant::now(); - let downloader = Downloader::new(cache, tags, client, build_dispatch) + let downloader = Downloader::new(cache, tags, hashes, client, build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(remote.len() as u64)); let wheels = downloader diff --git a/crates/uv/src/commands/pip_sync.rs b/crates/uv/src/commands/pip_sync.rs index c8a1737a4418e..f23ff233c1790 100644 --- a/crates/uv/src/commands/pip_sync.rs +++ b/crates/uv/src/commands/pip_sync.rs @@ -14,7 +14,7 @@ use pep508_rs::RequirementsTxtRequirement; use platform_tags::Tags; use pypi_types::Yanked; use requirements_txt::EditableRequirement; -use uv_auth::{GLOBAL_AUTH_STORE, KeyringProvider}; +use uv_auth::{KeyringProvider, GLOBAL_AUTH_STORE}; use uv_cache::{ArchiveTarget, ArchiveTimestamp, Cache}; use uv_client::{ BaseClientBuilder, Connectivity, FlatIndex, FlatIndexClient, RegistryClient, @@ -25,18 +25,20 @@ use uv_configuration::{ }; use uv_dispatch::BuildDispatch; use uv_fs::Simplified; -use uv_installer::{Downloader, is_dynamic, Plan, Planner, ResolvedEditable, SitePackages}; +use uv_installer::{is_dynamic, Downloader, Plan, Planner, ResolvedEditable, SitePackages}; use uv_interpreter::{Interpreter, PythonEnvironment}; use uv_requirements::{ ExtrasSpecification, NamedRequirementsResolver, RequirementsSource, RequirementsSpecification, SourceTreeResolver, }; -use uv_resolver::{DependencyMode, InMemoryIndex, Manifest, OptionsBuilder, Resolver}; +use uv_resolver::{ + DependencyMode, HashCheckingMode, InMemoryIndex, Manifest, OptionsBuilder, Resolver, +}; use uv_types::{BuildIsolation, EmptyInstalledPackages, InFlight, RequiredHashes}; use uv_warnings::warn_user; -use crate::commands::{ChangeEvent, ChangeEventKind, compile_bytecode, elapsed, ExitStatus}; use crate::commands::reporters::{DownloadReporter, InstallReporter, ResolverReporter}; +use crate::commands::{compile_bytecode, elapsed, ChangeEvent, ChangeEventKind, ExitStatus}; use crate::printer::Printer; /// Install a set of locked requirements into the current Python environment. @@ -66,10 +68,6 @@ pub(crate) async fn pip_sync( ) -> Result { let start = std::time::Instant::now(); - if require_hashes { - warn_user!("Hash-checking mode (via `--require-hashes`) is not yet supported."); - } - let client_builder = BaseClientBuilder::new() .connectivity(connectivity) .native_tls(native_tls) @@ -141,7 +139,7 @@ pub(crate) async fn pip_sync( let markers = venv.interpreter().markers(); // Collect the set of required hashes. - let required_hashes = if require_hashes { + let hashes = if require_hashes { RequiredHashes::from_requirements( entries .into_iter() @@ -179,7 +177,7 @@ pub(crate) async fn pip_sync( let flat_index = { let client = FlatIndexClient::new(&client, &cache); let entries = client.fetch(index_locations.flat_index()).await?; - FlatIndex::from_entries(entries, tags, &required_hashes, &no_build, &no_binary) + FlatIndex::from_entries(entries, tags, &hashes, &no_build, &no_binary) }; // Create a shared in-memory index. @@ -221,11 +219,16 @@ pub(crate) async fn pip_sync( // Convert from unnamed to named requirements. let requirements = { // Convert from unnamed to named requirements. - let mut requirements = - NamedRequirementsResolver::new(requirements, &build_dispatch, &client, &index) - .with_reporter(ResolverReporter::from(printer)) - .resolve() - .await?; + let mut requirements = NamedRequirementsResolver::new( + requirements, + require_hashes, + &build_dispatch, + &client, + &index, + ) + .with_reporter(ResolverReporter::from(printer)) + .resolve() + .await?; // Resolve any source trees into requirements. if !source_trees.is_empty() { @@ -233,6 +236,7 @@ pub(crate) async fn pip_sync( SourceTreeResolver::new( source_trees, &ExtrasSpecification::None, + false, &build_dispatch, &client, &index, @@ -251,6 +255,7 @@ pub(crate) async fn pip_sync( editables, &site_packages, reinstall, + &hashes, venv.interpreter(), tags, &cache, @@ -262,6 +267,8 @@ pub(crate) async fn pip_sync( // Partition into those that should be linked from the cache (`cached`), those that need to be // downloaded (`remote`), and those that should be removed (`extraneous`). + // STOPSHIP(charlie): Guarantee that anything in `cached` matches the required hashes. If it + // doesn't... don't return it. let Plan { cached, remote, @@ -274,6 +281,7 @@ pub(crate) async fn pip_sync( site_packages, reinstall, &no_binary, + &hashes, &index_locations, &cache, &venv, @@ -312,7 +320,11 @@ pub(crate) async fn pip_sync( // Resolve with `--no-deps`. let options = OptionsBuilder::new() .dependency_mode(DependencyMode::Direct) - .require_hashes(require_hashes) + .hash_checking_mode(if require_hashes { + HashCheckingMode::Enabled + } else { + HashCheckingMode::Disabled + }) .build(); // Create a bound on the progress bar, since we know the number of packages upfront. @@ -320,7 +332,7 @@ pub(crate) async fn pip_sync( // Run the resolver. let resolver = Resolver::new( - Manifest::simple(remote, required_hashes), + Manifest::simple(remote), options, markers, interpreter, @@ -328,6 +340,7 @@ pub(crate) async fn pip_sync( &client, &flat_index, &index, + &hashes, &build_dispatch, // TODO(zanieb): We should consider support for installed packages in pip sync &EmptyInstalledPackages, @@ -371,7 +384,7 @@ pub(crate) async fn pip_sync( } else { let start = std::time::Instant::now(); - let downloader = Downloader::new(&cache, tags, &client, &build_dispatch) + let downloader = Downloader::new(&cache, tags, &hashes, &client, &build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(remote.len() as u64)); let wheels = downloader @@ -567,6 +580,7 @@ async fn resolve_editables( editables: Vec, site_packages: &SitePackages<'_>, reinstall: &Reinstall, + hashes: &RequiredHashes, interpreter: &Interpreter, tags: &Tags, cache: &Cache, @@ -633,7 +647,7 @@ async fn resolve_editables( } else { let start = std::time::Instant::now(); - let downloader = Downloader::new(cache, tags, client, build_dispatch) + let downloader = Downloader::new(cache, tags, &hashes, client, build_dispatch) .with_reporter(DownloadReporter::from(printer).with_length(uninstalled.len() as u64)); let editables = LocalEditables::from_editables(uninstalled.iter().map(|editable| { diff --git a/crates/uv/src/commands/venv.rs b/crates/uv/src/commands/venv.rs index e81f96913ab77..a6e67f0402830 100644 --- a/crates/uv/src/commands/venv.rs +++ b/crates/uv/src/commands/venv.rs @@ -13,13 +13,13 @@ use thiserror::Error; use distribution_types::{DistributionMetadata, IndexLocations, Name, ResolvedDist}; use pep508_rs::Requirement; -use uv_auth::{GLOBAL_AUTH_STORE, KeyringProvider}; +use uv_auth::{KeyringProvider, GLOBAL_AUTH_STORE}; use uv_cache::Cache; use uv_client::{Connectivity, FlatIndex, FlatIndexClient, RegistryClientBuilder}; use uv_configuration::{ConfigSettings, IndexStrategy, NoBinary, NoBuild, SetupPyStrategy}; use uv_dispatch::BuildDispatch; use uv_fs::Simplified; -use uv_interpreter::{Error, find_default_python, find_requested_python}; +use uv_interpreter::{find_default_python, find_requested_python, Error}; use uv_resolver::{InMemoryIndex, OptionsBuilder}; use uv_types::{BuildContext, BuildIsolation, InFlight, RequiredHashes}; diff --git a/crates/uv/tests/pip_install.rs b/crates/uv/tests/pip_install.rs index 85a890dc12c5d..2350c4195b374 100644 --- a/crates/uv/tests/pip_install.rs +++ b/crates/uv/tests/pip_install.rs @@ -3756,3 +3756,62 @@ fn find_links_no_binary() -> Result<()> { Ok(()) } + +/// Provide the wrong hash with `--require-hashes`. +#[test] +fn require_hashes_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str( + "anyio==4.0.0 --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + )?; + + // Raise an error. + uv_snapshot!(context.install() + .arg("-r") + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Omit a transitive dependency in `--require-hashes`. +#[test] +fn require_hashes_missing_dependency() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str( + "anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + )?; + + // Install without error when `--require-hashes` is omitted. + uv_snapshot!(context.install() + .arg("-r") + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + error: In `--require-hashes` mode, all requirements must be pinned upfront with `==`, but found: idna + "### + ); + + Ok(()) +} diff --git a/crates/uv/tests/pip_sync.rs b/crates/uv/tests/pip_sync.rs index b387eb9acf237..174bda459b1b6 100644 --- a/crates/uv/tests/pip_sync.rs +++ b/crates/uv/tests/pip_sync.rs @@ -3078,3 +3078,801 @@ requires-python = "<=3.5" Ok(()) } + +/// Omit the hash with `--require-hashes`. +#[test] +fn require_hashes_missing_hash() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio==4.0.0")?; + + // Install without error when `--require-hashes` is omitted. + uv_snapshot!(command(&context) + .arg("requirements.txt"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + // Error when `--require-hashes` is provided. + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + error: In `--require-hashes` mode, all requirement must have a hash, but none were provided for: anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Omit the version with `--require-hashes`. +#[test] +fn require_hashes_missing_version() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str( + "anyio --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + )?; + + // Install without error when `--require-hashes` is omitted. + uv_snapshot!(command(&context) + .arg("requirements.txt"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.3.0 + "### + ); + + // Error when `--require-hashes` is provided. + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + error: In `--require-hashes` mode, all requirement must have their versions pinned with `==`, but found: anyio + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the wheel with `--no-binary`. +#[test] +fn require_hashes_wheel_no_binary() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--no-binary") + .arg(":all:") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the wheel with `--only-binary`. +#[test] +fn require_hashes_wheel_only_binary() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--only-binary") + .arg(":all:") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the source distribution with `--no-binary`. +#[test] +fn require_hashes_source_no_binary() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--no-binary") + .arg(":all:") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the source distribution, with `--binary-only`. +#[test] +fn require_hashes_source_only_binary() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--only-binary") + .arg(":all:") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because no wheels are usable and building from source is disabled and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Include the correct hash algorithm, but the wrong digest. +#[test] +fn require_hashes_wrong_digest() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Include the correct hash, but the wrong algorithm. +#[test] +fn require_hashes_wrong_algorithm() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha512:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + Ok(()) +} + +/// Include the hash for a source distribution specified as a direct URL dependency. +#[test] +fn require_hashes_source_url() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz) + "### + ); + + // Reinstall with the right hash, and verify that it's reused. + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 (from https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz) + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz) + "### + ); + + // Reinstall with the wrong hash, and verify that it's rejected despite being cached. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz --hash=sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + error: Failed to download and build: anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz + + Expected: + sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + + Computed: + sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + "### + ); + + Ok(()) +} + +/// Include the _wrong_ hash for a source distribution specified as a direct URL dependency. +#[test] +fn require_hashes_source_url_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz --hash=sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + error: Failed to download and build: anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/74/17/5075225ee1abbb93cd7fc30a2d343c6a3f5f71cf388f14768a7a38256581/anyio-4.0.0.tar.gz + + Expected: + sha256:a7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + + Computed: + sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a + "### + ); + + Ok(()) +} + +/// Include the hash for a built distribution specified as a direct URL dependency. +#[test] +fn require_hashes_wheel_url() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + "### + ); + + // Reinstall with the right hash, and verify that it's reused. + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + + anyio==4.0.0 (from https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl) + "### + ); + + // Reinstall with the wrong hash, and verify that it's rejected despite being cached. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + + Expected: + sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + + Computed: + sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + "### + ); + + // Sync a new dependency and include the wrong hash for anyio. Verify that we reuse anyio + // despite the wrong hash, like pip, since we don't validate hashes for already-installed + // distributions. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f\niniconfig==2.0.0 --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + iniconfig==2.0.0 + "### + ); + + Ok(()) +} + +/// Include the _wrong_ hash for a built distribution specified as a direct URL dependency. +#[test] +fn require_hashes_wheel_url_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + Caused by: Hash mismatch for anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl + + Expected: + sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + + Computed: + sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + "### + ); + + Ok(()) +} + +/// Reject Git dependencies when `--require-hashes` is provided. +#[test] +fn require_hashes_git() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio @ git+https://github.com/agronholm/anyio@4a23745badf5bf5ef7928f1e346e9986bd696d82 --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + error: Failed to download and build: anyio @ git+https://github.com/agronholm/anyio + Caused by: Hashes are not supported for Git repositories: anyio @ git+https://github.com/agronholm/anyio + "### + ); + + Ok(()) +} + +/// Reject local directory dependencies when `--require-hashes` is provided. +#[test] +fn require_hashes_source_tree() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "black @ {} --hash=sha256:f7ed51751b2c2add651e5747c891b47e26d2a21be5d32d9311dfe9692f3e5d7a", + context + .workspace_root + .join("scripts/packages/black_editable") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + error: Failed to build: black @ file://[WORKSPACE]/scripts/packages/black_editable + Caused by: Hash-checking is not supported for local directories: black @ file://[WORKSPACE]/scripts/packages/black_editable + "### + ); + + Ok(()) +} + +/// Include the hash for _just_ the wheel with `--only-binary`. +#[test] +fn require_hashes_re_download() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio==4.0.0")?; + + // Install without `--require-hashes`. + uv_snapshot!(command(&context) + .arg("requirements.txt"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + anyio==4.0.0 + "### + ); + + // Reinstall with `--require-hashes`, and the wrong hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:afdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + × No solution found when resolving dependencies: + ╰─▶ Because anyio==4.0.0 is unusable because the hash does not match and you require anyio==4.0.0, we can conclude that the requirements are unsatisfiable. + "### + ); + + // Reinstall with `--require-hashes`, and the right hash. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("anyio==4.0.0 --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--reinstall") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + - anyio==4.0.0 + + anyio==4.0.0 + "### + ); + + Ok(()) +} + +/// Include the hash for a built distribution specified as a local path dependency. +#[test] +fn require_hashes_wheel_path() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "tqdm @ {} --hash=sha256:a34996d4bd5abb2336e14ff0a2d22b92cfd0f0ed344e6883041ce01953276a13", + context + .workspace_root + .join("scripts/links/tqdm-1000.0.0-py3-none-any.whl") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + tqdm==1000.0.0 (from file://[WORKSPACE]/scripts/links/tqdm-1000.0.0-py3-none-any.whl) + "### + ); + + Ok(()) +} + +/// Include the _wrong_ hash for a built distribution specified as a local path dependency. +#[test] +fn require_hashes_wheel_path_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "tqdm @ {} --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + context + .workspace_root + .join("scripts/links/tqdm-1000.0.0-py3-none-any.whl") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + error: Failed to download distributions + Caused by: Failed to fetch wheel: tqdm @ file://[WORKSPACE]/scripts/links/tqdm-1000.0.0-py3-none-any.whl + Caused by: Hash mismatch for tqdm @ file://[WORKSPACE]/scripts/links/tqdm-1000.0.0-py3-none-any.whl + + Expected: + sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + + Computed: + sha256:a34996d4bd5abb2336e14ff0a2d22b92cfd0f0ed344e6883041ce01953276a13 + "### + ); + + Ok(()) +} + +/// Include the hash for a source distribution specified as a local path dependency. +#[test] +fn require_hashes_source_path() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "tqdm @ {} --hash=sha256:89fa05cffa7f457658373b85de302d24d0c205ceda2819a8739e324b75e9430b", + context + .workspace_root + .join("scripts/links/tqdm-999.0.0.tar.gz") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + Resolved 1 package in [TIME] + Downloaded 1 package in [TIME] + Installed 1 package in [TIME] + + tqdm==999.0.0 (from file://[WORKSPACE]/scripts/links/tqdm-999.0.0.tar.gz) + "### + ); + + Ok(()) +} + +/// Include the _wrong_ hash for a source distribution specified as a local path dependency. +#[test] +fn require_hashes_source_path_mismatch() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str(&format!( + "tqdm @ {} --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f", + context + .workspace_root + .join("scripts/links/tqdm-999.0.0.tar.gz") + .display() + ))?; + + uv_snapshot!(context.filters(), command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + warning: Hash-checking mode (via `--require-hashes`) is not yet supported. + error: Failed to build: tqdm @ file://[WORKSPACE]/scripts/links/tqdm-999.0.0.tar.gz + Caused by: Hash mismatch for tqdm @ file://[WORKSPACE]/scripts/links/tqdm-999.0.0.tar.gz + + Expected: + sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f + + Computed: + sha256:89fa05cffa7f457658373b85de302d24d0c205ceda2819a8739e324b75e9430b + "### + ); + + Ok(()) +} + +/// `--require-hashes` isn't supported for unnamed requirements (yet). +#[test] +fn require_hashes_unnamed() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write to a requirements file. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt + .write_str("https://foo.com --hash=sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f")?; + + uv_snapshot!(command(&context) + .arg("requirements.txt") + .arg("--require-hashes"), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Unnamed requirements are not supported with `--require-hashes` + "### + ); + + Ok(()) +} diff --git a/requirements.in b/requirements.in deleted file mode 100644 index fb58a566623ae..0000000000000 --- a/requirements.in +++ /dev/null @@ -1,2 +0,0 @@ -example-a==1.0.0 - diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 529017497f583..0000000000000 --- a/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.12 -# by the following command: -# -# pip-compile --generate-hashes --index-url=http://localhost:8000/index/simple-html/ requirements.in -# ---index-url http://localhost:8000/index/simple-html/ - -example-a==1.0.0 \ - --hash=sha256:105f52f5cb7b5a677b4810004ec487f6420fbee6a368038cf8cf8384de5be1939 \ - --hash=sha256:2df9bbf9c4e7940190f11d70c9d6168880c66bb5a19a0d88de7c8eda233e38f6e