diff --git a/Cargo.lock b/Cargo.lock index 852f234bd09a..6bfa1da48418 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4812,6 +4812,7 @@ dependencies = [ "uv-types", "uv-warnings", "uv-workspace", + "walkdir", "zip", ] diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index 67fae57887b6..e2810b156960 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -18,7 +18,7 @@ use uv_normalize::PackageName; pub use crate::by_timestamp::CachedByTimestamp; #[cfg(feature = "clap")] pub use crate::cli::CacheArgs; -use crate::removal::{rm_rf, Removal}; +pub use crate::removal::{rm_rf, Removal}; pub use crate::timestamp::Timestamp; pub use crate::wheel::WheelCache; use crate::wheel::WheelCacheKind; @@ -458,9 +458,7 @@ impl Cache { } } - // Third, remove any unused archives (by searching for archives that are not symlinked). - // TODO(charlie): Remove any unused source distributions. This requires introspecting the - // cache contents, e.g., reading and deserializing the manifests. + // Fourth, remove any unused archives (by searching for archives that are not symlinked). let mut references = FxHashSet::default(); for bucket in CacheBucket::iter() { diff --git a/crates/uv-cache/src/removal.rs b/crates/uv-cache/src/removal.rs index 156f48a3832b..80794e30bee3 100644 --- a/crates/uv-cache/src/removal.rs +++ b/crates/uv-cache/src/removal.rs @@ -7,7 +7,7 @@ use std::path::Path; /// Remove a file or directory and all its contents, returning a [`Removal`] with /// the number of files and directories removed, along with a total byte count. -pub(crate) fn rm_rf(path: impl AsRef) -> io::Result { +pub fn rm_rf(path: impl AsRef) -> io::Result { let mut removal = Removal::default(); removal.rm_rf(path.as_ref())?; Ok(removal) diff --git a/crates/uv-distribution/Cargo.toml b/crates/uv-distribution/Cargo.toml index 0aaef1a3e320..717df6e31fbe 100644 --- a/crates/uv-distribution/Cargo.toml +++ b/crates/uv-distribution/Cargo.toml @@ -46,6 +46,7 @@ tokio = { workspace = true } tokio-util = { workspace = true, features = ["compat"] } tracing = { workspace = true } url = { workspace = true } +walkdir = { workspace = true } zip = { workspace = true } [dev-dependencies] diff --git a/crates/uv-distribution/src/error.rs b/crates/uv-distribution/src/error.rs index f724c5833c09..9a13a6a7f706 100644 --- a/crates/uv-distribution/src/error.rs +++ b/crates/uv-distribution/src/error.rs @@ -44,6 +44,8 @@ pub enum Error { CacheDecode(#[from] rmp_serde::decode::Error), #[error("Failed to serialize cache entry")] CacheEncode(#[from] rmp_serde::encode::Error), + #[error("Failed to walk the distribution cache")] + CacheWalk(#[source] walkdir::Error), // Build error #[error(transparent)] diff --git a/crates/uv-distribution/src/lib.rs b/crates/uv-distribution/src/lib.rs index c8a2110a1d60..7a15a44c31fe 100644 --- a/crates/uv-distribution/src/lib.rs +++ b/crates/uv-distribution/src/lib.rs @@ -4,6 +4,7 @@ pub use error::Error; pub use index::{BuiltWheelIndex, RegistryWheelIndex}; pub use metadata::{ArchiveMetadata, LoweredRequirement, Metadata, RequiresDist}; pub use reporter::Reporter; +pub use source::prune; mod archive; mod distribution_database; diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 6e7acccb519d..c621ba91225d 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -22,7 +22,8 @@ use install_wheel_rs::metadata::read_archive_metadata; use platform_tags::Tags; use pypi_types::{HashDigest, Metadata12, Metadata23, RequiresTxt}; use uv_cache::{ - ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Timestamp, WheelCache, + ArchiveTimestamp, Cache, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Removal, + Timestamp, WheelCache, }; use uv_client::{ CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient, @@ -1610,6 +1611,78 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } } +/// Prune any unused source distributions from the cache. +pub fn prune(cache: &Cache) -> Result { + let mut removal = Removal::default(); + + let bucket = cache.bucket(CacheBucket::SourceDistributions); + if bucket.is_dir() { + for entry in walkdir::WalkDir::new(bucket) { + let entry = entry.map_err(Error::CacheWalk)?; + + // If we find a `revision.http` file, read the pointer, and remove any extraneous + // directories. + if entry.file_name() == "revision.http" { + let pointer = HttpRevisionPointer::read_from(entry.path())?; + if let Some(pointer) = pointer { + // Remove all sibling directories that are not referenced by the pointer. + for sibling in entry + .path() + .parent() + .unwrap() + .read_dir() + .map_err(Error::CacheRead)? + { + let sibling = sibling.map_err(Error::CacheRead)?; + if sibling.file_type().map_err(Error::CacheRead)?.is_dir() { + let sibling_name = sibling.file_name(); + if sibling_name != pointer.revision.id().as_str() { + debug!( + "Removing dangling source revision: {}", + sibling.path().display() + ); + removal += + uv_cache::rm_rf(sibling.path()).map_err(Error::CacheWrite)?; + } + } + } + } + } + + // If we find a `revision.rev` file, read the pointer, and remove any extraneous + // directories. + if entry.file_name() == "revision.rev" { + let pointer = LocalRevisionPointer::read_from(entry.path())?; + if let Some(pointer) = pointer { + // Remove all sibling directories that are not referenced by the pointer. + for sibling in entry + .path() + .parent() + .unwrap() + .read_dir() + .map_err(Error::CacheRead)? + { + let sibling = sibling.map_err(Error::CacheRead)?; + if sibling.file_type().map_err(Error::CacheRead)?.is_dir() { + let sibling_name = sibling.file_name(); + if sibling_name != pointer.revision.id().as_str() { + debug!( + "Removing dangling source revision: {}", + sibling.path().display() + ); + removal += + uv_cache::rm_rf(sibling.path()).map_err(Error::CacheWrite)?; + } + } + } + } + } + } + } + + Ok(removal) +} + /// Validate that the source distribution matches the built metadata. fn validate(source: &BuildableSource<'_>, metadata: &Metadata23) -> Result<(), Error> { if let Some(name) = source.name() { diff --git a/crates/uv-distribution/src/source/revision.rs b/crates/uv-distribution/src/source/revision.rs index 64cbc127ba2e..809fc60e6f80 100644 --- a/crates/uv-distribution/src/source/revision.rs +++ b/crates/uv-distribution/src/source/revision.rs @@ -63,6 +63,16 @@ impl RevisionId { fn new() -> Self { Self(nanoid::nanoid!()) } + + pub(crate) fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl AsRef for RevisionId { + fn as_ref(&self) -> &str { + self.0.as_ref() + } } impl AsRef for RevisionId { diff --git a/crates/uv/src/commands/cache_prune.rs b/crates/uv/src/commands/cache_prune.rs index 8a21b2cb7000..68123699317d 100644 --- a/crates/uv/src/commands/cache_prune.rs +++ b/crates/uv/src/commands/cache_prune.rs @@ -3,7 +3,7 @@ use std::fmt::Write; use anyhow::{Context, Result}; use owo_colors::OwoColorize; -use uv_cache::Cache; +use uv_cache::{Cache, Removal}; use uv_fs::Simplified; use crate::commands::{human_readable_bytes, ExitStatus}; @@ -26,7 +26,14 @@ pub(crate) fn cache_prune(ci: bool, cache: &Cache, printer: Printer) -> Result Result<()> { .chain([ // The cache entry does not have a stable key, so we filter it out ( - r"\[CACHE_DIR\](\\|\/)(.+)(\\|\/).*", + r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*", "[CACHE_DIR]/$2/[ENTRY]", ), ]) @@ -252,3 +252,113 @@ fn prune_unzipped() -> Result<()> { Ok(()) } + +/// `cache prune` should remove any stale source distribution revisions. +#[test] +fn prune_stale_revision() -> Result<()> { + let context = TestContext::new("3.12"); + + let pyproject_toml = context.temp_dir.child("pyproject.toml"); + pyproject_toml.write_str( + r#" + [project] + name = "project" + version = "0.1.0" + requires-python = ">=3.12" + dependencies = [] + + [build-system] + requires = ["setuptools>=42"] + build-backend = "setuptools.build_meta" + "#, + )?; + + context.temp_dir.child("src").child("__init__.py").touch()?; + context.temp_dir.child("README").touch()?; + + // Install the same package twice, with `--reinstall`. + uv_snapshot!(context.filters(), context + .pip_install() + .arg(".") + .arg("--reinstall"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Prepared 1 package in [TIME] + Installed 1 package in [TIME] + + project==0.1.0 (from file://[TEMP_DIR]/) + "###); + + uv_snapshot!(context.filters(), context + .pip_install() + .arg(".") + .arg("--reinstall"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Prepared 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + ~ project==0.1.0 (from file://[TEMP_DIR]/) + "###); + + let filters: Vec<_> = context + .filters() + .into_iter() + .chain([ + // The cache entry does not have a stable key, so we filter it out + ( + r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*", + "[CACHE_DIR]/$2/[ENTRY]", + ), + ]) + .collect(); + + // Pruning should remove the unused revision. + uv_snapshot!(&filters, context.prune().arg("--verbose"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + DEBUG uv [VERSION] ([COMMIT] DATE) + Pruning cache at: [CACHE_DIR]/ + DEBUG Removing dangling source revision: [CACHE_DIR]/built-wheels-v3/[ENTRY] + DEBUG Removing dangling cache entry: [CACHE_DIR]/archive-v0/[ENTRY] + Removed 8 files ([SIZE]) + "###); + + // Uninstall and reinstall the package. We should use the cached version. + uv_snapshot!(context.filters(), context + .pip_uninstall() + .arg("."), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Uninstalled 1 package in [TIME] + - project==0.1.0 (from file://[TEMP_DIR]/) + "###); + + uv_snapshot!(context.filters(), context + .pip_install() + .arg("."), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Installed 1 package in [TIME] + + project==0.1.0 (from file://[TEMP_DIR]/) + "###); + + Ok(()) +}