Skip to content

Commit

Permalink
Prune unused source distributions from the cache
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Sep 6, 2024
1 parent d0f9016 commit 6b99ff9
Show file tree
Hide file tree
Showing 11 changed files with 215 additions and 10 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions crates/uv-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use uv_normalize::PackageName;
pub use crate::by_timestamp::CachedByTimestamp;
#[cfg(feature = "clap")]
pub use crate::cli::CacheArgs;
use crate::removal::{rm_rf, Removal};
pub use crate::removal::{rm_rf, Removal};
pub use crate::timestamp::Timestamp;
pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind;
Expand Down Expand Up @@ -458,9 +458,7 @@ impl Cache {
}
}

// Third, remove any unused archives (by searching for archives that are not symlinked).
// TODO(charlie): Remove any unused source distributions. This requires introspecting the
// cache contents, e.g., reading and deserializing the manifests.
// Fourth, remove any unused archives (by searching for archives that are not symlinked).
let mut references = FxHashSet::default();

for bucket in CacheBucket::iter() {
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-cache/src/removal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::path::Path;

/// Remove a file or directory and all its contents, returning a [`Removal`] with
/// the number of files and directories removed, along with a total byte count.
pub(crate) fn rm_rf(path: impl AsRef<Path>) -> io::Result<Removal> {
pub fn rm_rf(path: impl AsRef<Path>) -> io::Result<Removal> {
let mut removal = Removal::default();
removal.rm_rf(path.as_ref())?;
Ok(removal)
Expand Down
1 change: 1 addition & 0 deletions crates/uv-distribution/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ tokio = { workspace = true }
tokio-util = { workspace = true, features = ["compat"] }
tracing = { workspace = true }
url = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true }

[dev-dependencies]
Expand Down
2 changes: 2 additions & 0 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ pub enum Error {
CacheDecode(#[from] rmp_serde::decode::Error),
#[error("Failed to serialize cache entry")]
CacheEncode(#[from] rmp_serde::encode::Error),
#[error("Failed to walk the distribution cache")]
CacheWalk(#[source] walkdir::Error),

// Build error
#[error(transparent)]
Expand Down
1 change: 1 addition & 0 deletions crates/uv-distribution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub use error::Error;
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
pub use metadata::{ArchiveMetadata, LoweredRequirement, Metadata, RequiresDist};
pub use reporter::Reporter;
pub use source::prune;

mod archive;
mod distribution_database;
Expand Down
75 changes: 74 additions & 1 deletion crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ use install_wheel_rs::metadata::read_archive_metadata;
use platform_tags::Tags;
use pypi_types::{HashDigest, Metadata12, Metadata23, RequiresTxt};
use uv_cache::{
ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Timestamp, WheelCache,
ArchiveTimestamp, Cache, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Removal,
Timestamp, WheelCache,
};
use uv_client::{
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
Expand Down Expand Up @@ -1610,6 +1611,78 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}
}

/// Prune any unused source distributions from the cache.
pub fn prune(cache: &Cache) -> Result<Removal, Error> {
let mut removal = Removal::default();

let bucket = cache.bucket(CacheBucket::SourceDistributions);
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket) {
let entry = entry.map_err(Error::CacheWalk)?;

// If we find a `revision.http` file, read the pointer, and remove any extraneous
// directories.
if entry.file_name() == "revision.http" {
let pointer = HttpRevisionPointer::read_from(entry.path())?;
if let Some(pointer) = pointer {
// Remove all sibling directories that are not referenced by the pointer.
for sibling in entry
.path()
.parent()
.unwrap()
.read_dir()
.map_err(Error::CacheRead)?
{
let sibling = sibling.map_err(Error::CacheRead)?;
if sibling.file_type().map_err(Error::CacheRead)?.is_dir() {
let sibling_name = sibling.file_name();
if sibling_name != pointer.revision.id().as_str() {
debug!(
"Removing dangling source revision: {}",
sibling.path().display()
);
removal +=
uv_cache::rm_rf(sibling.path()).map_err(Error::CacheWrite)?;
}
}
}
}
}

// If we find a `revision.rev` file, read the pointer, and remove any extraneous
// directories.
if entry.file_name() == "revision.rev" {
let pointer = LocalRevisionPointer::read_from(entry.path())?;
if let Some(pointer) = pointer {
// Remove all sibling directories that are not referenced by the pointer.
for sibling in entry
.path()
.parent()
.unwrap()
.read_dir()
.map_err(Error::CacheRead)?
{
let sibling = sibling.map_err(Error::CacheRead)?;
if sibling.file_type().map_err(Error::CacheRead)?.is_dir() {
let sibling_name = sibling.file_name();
if sibling_name != pointer.revision.id().as_str() {
debug!(
"Removing dangling source revision: {}",
sibling.path().display()
);
removal +=
uv_cache::rm_rf(sibling.path()).map_err(Error::CacheWrite)?;
}
}
}
}
}
}
}

Ok(removal)
}

/// Validate that the source distribution matches the built metadata.
fn validate(source: &BuildableSource<'_>, metadata: &Metadata23) -> Result<(), Error> {
if let Some(name) = source.name() {
Expand Down
10 changes: 10 additions & 0 deletions crates/uv-distribution/src/source/revision.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ impl RevisionId {
fn new() -> Self {
Self(nanoid::nanoid!())
}

pub(crate) fn as_str(&self) -> &str {
self.0.as_str()
}
}

impl AsRef<str> for RevisionId {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}

impl AsRef<Path> for RevisionId {
Expand Down
1 change: 1 addition & 0 deletions crates/uv/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ tracing-tree = { workspace = true }
unicode-width = { workspace = true }
url = { workspace = true }
which = { workspace = true }
walkdir = "2.5.0"

[target.'cfg(target_os = "windows")'.dependencies]
mimalloc = { version = "0.1.39" }
Expand Down
11 changes: 9 additions & 2 deletions crates/uv/src/commands/cache_prune.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::fmt::Write;
use anyhow::{Context, Result};
use owo_colors::OwoColorize;

use uv_cache::Cache;
use uv_cache::{Cache, Removal};
use uv_fs::Simplified;

use crate::commands::{human_readable_bytes, ExitStatus};
Expand All @@ -26,7 +26,14 @@ pub(crate) fn cache_prune(ci: bool, cache: &Cache, printer: Printer) -> Result<E
cache.root().user_display().cyan()
)?;

let summary = cache
let mut summary = Removal::default();

// Prune the source distribution cache, which is tightly coupled to the builder crate.
summary += uv_distribution::prune(cache)
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;

// Prune the remaining cache buckets.
summary += cache
.prune(ci)
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;

Expand Down
114 changes: 112 additions & 2 deletions crates/uv/tests/cache_prune.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ fn prune_cached_env() {
.chain([
// The cache entry does not have a stable key, so we filter it out
(
r"\[CACHE_DIR\](\\|\/)(.+)(\\|\/).*",
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
"[CACHE_DIR]/$2/[ENTRY]",
),
])
Expand Down Expand Up @@ -151,7 +151,7 @@ fn prune_stale_symlink() -> Result<()> {
.chain([
// The cache entry does not have a stable key, so we filter it out
(
r"\[CACHE_DIR\](\\|\/)(.+)(\\|\/).*",
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
"[CACHE_DIR]/$2/[ENTRY]",
),
])
Expand Down Expand Up @@ -252,3 +252,113 @@ fn prune_unzipped() -> Result<()> {

Ok(())
}

/// `cache prune` should remove any stale source distribution revisions.
#[test]
fn prune_stale_revision() -> Result<()> {
let context = TestContext::new("3.12");

let pyproject_toml = context.temp_dir.child("pyproject.toml");
pyproject_toml.write_str(
r#"
[project]
name = "project"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = []
[build-system]
requires = ["setuptools>=42"]
build-backend = "setuptools.build_meta"
"#,
)?;

context.temp_dir.child("src").child("__init__.py").touch()?;
context.temp_dir.child("README").touch()?;

// Install the same package twice, with `--reinstall`.
uv_snapshot!(context.filters(), context
.pip_install()
.arg(".")
.arg("--reinstall"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Prepared 1 package in [TIME]
Installed 1 package in [TIME]
+ project==0.1.0 (from file://[TEMP_DIR]/)
"###);

uv_snapshot!(context.filters(), context
.pip_install()
.arg(".")
.arg("--reinstall"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ project==0.1.0 (from file://[TEMP_DIR]/)
"###);

let filters: Vec<_> = context
.filters()
.into_iter()
.chain([
// The cache entry does not have a stable key, so we filter it out
(
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
"[CACHE_DIR]/$2/[ENTRY]",
),
])
.collect();

// Pruning should remove the unused revision.
uv_snapshot!(&filters, context.prune().arg("--verbose"), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
DEBUG uv [VERSION] ([COMMIT] DATE)
Pruning cache at: [CACHE_DIR]/
DEBUG Removing dangling source revision: [CACHE_DIR]/built-wheels-v3/[ENTRY]
DEBUG Removing dangling cache entry: [CACHE_DIR]/archive-v0/[ENTRY]
Removed 8 files ([SIZE])
"###);

// Uninstall and reinstall the package. We should use the cached version.
uv_snapshot!(context.filters(), context
.pip_uninstall()
.arg("."), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Uninstalled 1 package in [TIME]
- project==0.1.0 (from file://[TEMP_DIR]/)
"###);

uv_snapshot!(context.filters(), context
.pip_install()
.arg("."), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 1 package in [TIME]
Installed 1 package in [TIME]
+ project==0.1.0 (from file://[TEMP_DIR]/)
"###);

Ok(())
}

0 comments on commit 6b99ff9

Please sign in to comment.