From dfdcce68fda6293106836bec97ae2548aadb1b70 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 1 Apr 2024 19:52:21 -0400 Subject: [PATCH] Split `get_or_build_wheel_metadata` into distinct methods (#2765) ## Summary Internal refactor which makes `DistributionDatabase` for unnamed requirements (or, at least, source distributions). --- crates/distribution-types/src/lib.rs | 68 +++++++- .../src/distribution_database.rs | 147 ++++++++++++------ crates/uv-distribution/src/git.rs | 10 +- 3 files changed, 164 insertions(+), 61 deletions(-) diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 6ecb5ce25150..95ca6c9125fb 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -424,10 +424,6 @@ impl SourceDist { #[must_use] pub fn with_url(self, url: Url) -> Self { match self { - Self::DirectUrl(dist) => Self::DirectUrl(DirectUrlSourceDist { - url: VerbatimUrl::unknown(url), - ..dist - }), Self::Git(dist) => Self::Git(GitSourceDist { url: VerbatimUrl::unknown(url), ..dist @@ -996,6 +992,70 @@ impl Identifier for Dist { } } +impl Identifier for DirectSourceUrl<'_> { + fn distribution_id(&self) -> DistributionId { + self.url.distribution_id() + } + + fn resource_id(&self) -> ResourceId { + self.url.resource_id() + } +} + +impl Identifier for GitSourceUrl<'_> { + fn distribution_id(&self) -> DistributionId { + self.url.distribution_id() + } + + fn resource_id(&self) -> ResourceId { + self.url.resource_id() + } +} + +impl Identifier for PathSourceUrl<'_> { + fn distribution_id(&self) -> DistributionId { + self.url.distribution_id() + } + + fn resource_id(&self) -> ResourceId { + self.url.resource_id() + } +} + +impl Identifier for SourceUrl<'_> { + fn distribution_id(&self) -> DistributionId { + match self { + Self::Direct(url) => url.distribution_id(), + Self::Git(url) => url.distribution_id(), + Self::Path(url) => url.distribution_id(), + } + } + + fn resource_id(&self) -> ResourceId { + match self { + Self::Direct(url) => url.resource_id(), + Self::Git(url) => url.resource_id(), + Self::Path(url) => url.resource_id(), + } + } +} + +impl Identifier for BuildableSource<'_> { + fn distribution_id(&self) -> DistributionId { + match self { + BuildableSource::Dist(source) => source.distribution_id(), + BuildableSource::Url(source) => source.distribution_id(), + } + } + + fn resource_id(&self) -> ResourceId { + match self { + BuildableSource::Dist(source) => source.resource_id(), + BuildableSource::Url(source) => source.resource_id(), + } + } +} + #[cfg(test)] mod test { use crate::{BuiltDist, Dist, SourceDist}; diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index 8c7236c45323..26b0feddaeb7 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -1,4 +1,3 @@ -use std::borrow::Cow; use std::io; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -11,8 +10,10 @@ use url::Url; use distribution_filename::WheelFilename; use distribution_types::{ - BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name, SourceDist, + BuildableSource, BuiltDist, Dist, FileLocation, GitSourceDist, GitSourceUrl, IndexLocations, + LocalEditable, Name, SourceDist, SourceUrl, }; +use pep508_rs::VerbatimUrl; use platform_tags::Tags; use pypi_types::Metadata23; use uv_cache::{ArchiveTarget, ArchiveTimestamp, CacheBucket, CacheEntry, WheelCache}; @@ -104,54 +105,13 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> dist: &Dist, ) -> Result<(Metadata23, Option), Error> { match dist { - Dist::Built(built_dist) => { - match self.client.wheel_metadata(built_dist).boxed().await { - Ok(metadata) => Ok((metadata, None)), - Err(err) if err.is_http_streaming_unsupported() => { - warn!("Streaming unsupported when fetching metadata for {built_dist}; downloading wheel directly ({err})"); - - // If the request failed due to an error that could be resolved by - // downloading the wheel directly, try that. - let wheel = self.get_wheel(built_dist).await?; - Ok((wheel.metadata()?, None)) - } - Err(err) => Err(err.into()), - } - } - Dist::Source(source_dist) => { - let no_build = match self.build_context.no_build() { - NoBuild::All => true, - NoBuild::None => false, - NoBuild::Packages(packages) => packages.contains(source_dist.name()), - }; - - // Optimization: Skip source dist download when we must not build them anyway. - if no_build { - return Err(Error::NoBuild); - } - - let lock = self.locks.acquire(dist).await; - let _guard = lock.lock().await; - - // Insert the `precise` URL, if it exists. - let precise = resolve_precise( - source_dist, - self.build_context.cache(), - self.reporter.as_ref(), - ) - .await?; - - let source_dist = match precise.as_ref() { - Some(url) => Cow::Owned(source_dist.clone().with_url(url.clone())), - None => Cow::Borrowed(source_dist), - }; - - let metadata = self - .builder - .download_and_build_metadata(&BuildableSource::Dist(&source_dist)) - .boxed() - .await?; - Ok((metadata, precise)) + Dist::Built(built) => self + .get_wheel_metadata(built) + .await + .map(|metadata| (metadata, None)), + Dist::Source(source) => { + self.build_wheel_metadata(&BuildableSource::Dist(source)) + .await } } } @@ -385,6 +345,93 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } } + /// Fetch the wheel metadata from the index, or from the cache if possible. + pub async fn get_wheel_metadata(&self, dist: &BuiltDist) -> Result { + match self.client.wheel_metadata(dist).boxed().await { + Ok(metadata) => Ok(metadata), + Err(err) if err.is_http_streaming_unsupported() => { + warn!("Streaming unsupported when fetching metadata for {dist}; downloading wheel directly ({err})"); + + // If the request failed due to an error that could be resolved by + // downloading the wheel directly, try that. + let wheel = self.get_wheel(dist).await?; + Ok(wheel.metadata()?) + } + Err(err) => Err(err.into()), + } + } + + /// Build the wheel metadata for a source distribution, or fetch it from the cache if possible. + pub async fn build_wheel_metadata( + &self, + source: &BuildableSource<'_>, + ) -> Result<(Metadata23, Option), Error> { + let no_build = match self.build_context.no_build() { + NoBuild::All => true, + NoBuild::None => false, + NoBuild::Packages(packages) => { + source.name().is_some_and(|name| packages.contains(name)) + } + }; + + // Optimization: Skip source dist download when we must not build them anyway. + if no_build { + return Err(Error::NoBuild); + } + + let lock = self.locks.acquire(source).await; + let _guard = lock.lock().await; + + // Insert the `precise` URL, if it exists. + if let BuildableSource::Dist(SourceDist::Git(source)) = source { + if let Some(precise) = resolve_precise( + &source.url, + self.build_context.cache(), + self.reporter.as_ref(), + ) + .await? + { + let source = SourceDist::Git(GitSourceDist { + url: VerbatimUrl::unknown(precise.clone()), + ..source.clone() + }); + let source = BuildableSource::Dist(&source); + let metadata = self + .builder + .download_and_build_metadata(&source) + .boxed() + .await?; + return Ok((metadata, Some(precise))); + } + } + + if let BuildableSource::Url(SourceUrl::Git(source)) = source { + if let Some(precise) = resolve_precise( + source.url, + self.build_context.cache(), + self.reporter.as_ref(), + ) + .await? + { + let source = SourceUrl::Git(GitSourceUrl { url: &precise }); + let source = BuildableSource::Url(source); + let metadata = self + .builder + .download_and_build_metadata(&source) + .boxed() + .await?; + return Ok((metadata, Some(precise))); + } + } + + let metadata = self + .builder + .download_and_build_metadata(source) + .boxed() + .await?; + Ok((metadata, None)) + } + /// Stream a wheel from a URL, unzipping it into the cache as it's downloaded. async fn stream_wheel( &self, diff --git a/crates/uv-distribution/src/git.rs b/crates/uv-distribution/src/git.rs index fcda0f8cccc8..549e943a270d 100644 --- a/crates/uv-distribution/src/git.rs +++ b/crates/uv-distribution/src/git.rs @@ -8,7 +8,7 @@ use rustc_hash::FxHashMap; use tracing::debug; use url::Url; -use distribution_types::{DirectGitUrl, SourceDist}; +use distribution_types::DirectGitUrl; use uv_cache::{Cache, CacheBucket}; use uv_fs::LockedFile; use uv_git::{Fetch, GitSource, GitUrl}; @@ -87,17 +87,13 @@ pub(crate) async fn fetch_git_archive( /// layer. For example: removing `#subdirectory=pkg_dir`-like fragments, and removing `git+` /// prefix kinds. pub(crate) async fn resolve_precise( - dist: &SourceDist, + url: &Url, cache: &Cache, reporter: Option<&Arc>, ) -> Result, Error> { - let SourceDist::Git(source_dist) = dist else { - return Ok(None); - }; let git_dir = cache.bucket(CacheBucket::Git); - let DirectGitUrl { url, subdirectory } = - DirectGitUrl::try_from(source_dist.url.raw()).map_err(Error::Git)?; + let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?; // If the Git reference already contains a complete SHA, short-circuit. if url.precise().is_some() {