Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prune unused source distributions from the cache #7112

Merged
merged 1 commit into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions crates/uv-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use uv_normalize::PackageName;
pub use crate::by_timestamp::CachedByTimestamp;
#[cfg(feature = "clap")]
pub use crate::cli::CacheArgs;
use crate::removal::{rm_rf, Removal};
pub use crate::removal::{rm_rf, Removal};
pub use crate::timestamp::Timestamp;
pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind;
Expand Down Expand Up @@ -458,9 +458,7 @@ impl Cache {
}
}

// Third, remove any unused archives (by searching for archives that are not symlinked).
// TODO(charlie): Remove any unused source distributions. This requires introspecting the
// cache contents, e.g., reading and deserializing the manifests.
// Fourth, remove any unused archives (by searching for archives that are not symlinked).
let mut references = FxHashSet::default();

for bucket in CacheBucket::iter() {
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-cache/src/removal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::path::Path;

/// Remove a file or directory and all its contents, returning a [`Removal`] with
/// the number of files and directories removed, along with a total byte count.
pub(crate) fn rm_rf(path: impl AsRef<Path>) -> io::Result<Removal> {
pub fn rm_rf(path: impl AsRef<Path>) -> io::Result<Removal> {
let mut removal = Removal::default();
removal.rm_rf(path.as_ref())?;
Ok(removal)
Expand Down
1 change: 1 addition & 0 deletions crates/uv-distribution/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ tokio = { workspace = true }
tokio-util = { workspace = true, features = ["compat"] }
tracing = { workspace = true }
url = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true }

[dev-dependencies]
Expand Down
2 changes: 2 additions & 0 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ pub enum Error {
CacheDecode(#[from] rmp_serde::decode::Error),
#[error("Failed to serialize cache entry")]
CacheEncode(#[from] rmp_serde::encode::Error),
#[error("Failed to walk the distribution cache")]
CacheWalk(#[source] walkdir::Error),

// Build error
#[error(transparent)]
Expand Down
1 change: 1 addition & 0 deletions crates/uv-distribution/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub use error::Error;
pub use index::{BuiltWheelIndex, RegistryWheelIndex};
pub use metadata::{ArchiveMetadata, LoweredRequirement, Metadata, RequiresDist};
pub use reporter::Reporter;
pub use source::prune;

mod archive;
mod distribution_database;
Expand Down
75 changes: 74 additions & 1 deletion crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ use install_wheel_rs::metadata::read_archive_metadata;
use platform_tags::Tags;
use pypi_types::{HashDigest, Metadata12, Metadata23, RequiresTxt};
use uv_cache::{
ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Timestamp, WheelCache,
ArchiveTimestamp, Cache, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Removal,
Timestamp, WheelCache,
};
use uv_client::{
CacheControl, CachedClientError, Connectivity, DataWithCachePolicy, RegistryClient,
Expand Down Expand Up @@ -1610,6 +1611,78 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}
}

/// Prune any unused source distributions from the cache.
pub fn prune(cache: &Cache) -> Result<Removal, Error> {
let mut removal = Removal::default();

let bucket = cache.bucket(CacheBucket::SourceDistributions);
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket) {
let entry = entry.map_err(Error::CacheWalk)?;

// If we find a `revision.http` file, read the pointer, and remove any extraneous
// directories.
if entry.file_name() == "revision.http" {
let pointer = HttpRevisionPointer::read_from(entry.path())?;
if let Some(pointer) = pointer {
// Remove all sibling directories that are not referenced by the pointer.
for sibling in entry
.path()
.parent()
.unwrap()
.read_dir()
.map_err(Error::CacheRead)?
{
let sibling = sibling.map_err(Error::CacheRead)?;
if sibling.file_type().map_err(Error::CacheRead)?.is_dir() {
let sibling_name = sibling.file_name();
if sibling_name != pointer.revision.id().as_str() {
debug!(
"Removing dangling source revision: {}",
sibling.path().display()
);
removal +=
uv_cache::rm_rf(sibling.path()).map_err(Error::CacheWrite)?;
}
}
}
}
}

// If we find a `revision.rev` file, read the pointer, and remove any extraneous
// directories.
if entry.file_name() == "revision.rev" {
let pointer = LocalRevisionPointer::read_from(entry.path())?;
if let Some(pointer) = pointer {
// Remove all sibling directories that are not referenced by the pointer.
for sibling in entry
.path()
.parent()
.unwrap()
.read_dir()
.map_err(Error::CacheRead)?
{
let sibling = sibling.map_err(Error::CacheRead)?;
if sibling.file_type().map_err(Error::CacheRead)?.is_dir() {
let sibling_name = sibling.file_name();
if sibling_name != pointer.revision.id().as_str() {
debug!(
"Removing dangling source revision: {}",
sibling.path().display()
);
removal +=
uv_cache::rm_rf(sibling.path()).map_err(Error::CacheWrite)?;
}
}
}
}
}
}
}

Ok(removal)
}

/// Validate that the source distribution matches the built metadata.
fn validate(source: &BuildableSource<'_>, metadata: &Metadata23) -> Result<(), Error> {
if let Some(name) = source.name() {
Expand Down
10 changes: 10 additions & 0 deletions crates/uv-distribution/src/source/revision.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ impl RevisionId {
fn new() -> Self {
Self(nanoid::nanoid!())
}

pub(crate) fn as_str(&self) -> &str {
self.0.as_str()
}
}

impl AsRef<str> for RevisionId {
fn as_ref(&self) -> &str {
self.0.as_ref()
}
}

impl AsRef<Path> for RevisionId {
Expand Down
11 changes: 9 additions & 2 deletions crates/uv/src/commands/cache_prune.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::fmt::Write;
use anyhow::{Context, Result};
use owo_colors::OwoColorize;

use uv_cache::Cache;
use uv_cache::{Cache, Removal};
use uv_fs::Simplified;

use crate::commands::{human_readable_bytes, ExitStatus};
Expand All @@ -26,7 +26,14 @@ pub(crate) fn cache_prune(ci: bool, cache: &Cache, printer: Printer) -> Result<E
cache.root().user_display().cyan()
)?;

let summary = cache
let mut summary = Removal::default();

// Prune the source distribution cache, which is tightly coupled to the builder crate.
summary += uv_distribution::prune(cache)
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;

// Prune the remaining cache buckets.
summary += cache
.prune(ci)
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;

Expand Down
114 changes: 112 additions & 2 deletions crates/uv/tests/cache_prune.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ fn prune_cached_env() {
.chain([
// The cache entry does not have a stable key, so we filter it out
(
r"\[CACHE_DIR\](\\|\/)(.+)(\\|\/).*",
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
"[CACHE_DIR]/$2/[ENTRY]",
),
])
Expand Down Expand Up @@ -151,7 +151,7 @@ fn prune_stale_symlink() -> Result<()> {
.chain([
// The cache entry does not have a stable key, so we filter it out
(
r"\[CACHE_DIR\](\\|\/)(.+)(\\|\/).*",
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
"[CACHE_DIR]/$2/[ENTRY]",
),
])
Expand Down Expand Up @@ -252,3 +252,113 @@ fn prune_unzipped() -> Result<()> {

Ok(())
}

/// `cache prune` should remove any stale source distribution revisions.
#[test]
fn prune_stale_revision() -> Result<()> {
let context = TestContext::new("3.12");

let pyproject_toml = context.temp_dir.child("pyproject.toml");
pyproject_toml.write_str(
r#"
[project]
name = "project"
version = "0.1.0"
requires-python = ">=3.12"
dependencies = []

[build-system]
requires = ["setuptools>=42"]
build-backend = "setuptools.build_meta"
"#,
)?;

context.temp_dir.child("src").child("__init__.py").touch()?;
context.temp_dir.child("README").touch()?;

// Install the same package twice, with `--reinstall`.
uv_snapshot!(context.filters(), context
.pip_install()
.arg(".")
.arg("--reinstall"), @r###"
success: true
exit_code: 0
----- stdout -----

----- stderr -----
Resolved 1 package in [TIME]
Prepared 1 package in [TIME]
Installed 1 package in [TIME]
+ project==0.1.0 (from file://[TEMP_DIR]/)
"###);

uv_snapshot!(context.filters(), context
.pip_install()
.arg(".")
.arg("--reinstall"), @r###"
success: true
exit_code: 0
----- stdout -----

----- stderr -----
Resolved 1 package in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ project==0.1.0 (from file://[TEMP_DIR]/)
"###);

let filters: Vec<_> = context
.filters()
.into_iter()
.chain([
// The cache entry does not have a stable key, so we filter it out
(
r"\[CACHE_DIR\](\\|\/)(.*?)(\\|\/).*",
"[CACHE_DIR]/$2/[ENTRY]",
),
])
.collect();

// Pruning should remove the unused revision.
uv_snapshot!(&filters, context.prune().arg("--verbose"), @r###"
success: true
exit_code: 0
----- stdout -----

----- stderr -----
DEBUG uv [VERSION] ([COMMIT] DATE)
Pruning cache at: [CACHE_DIR]/
DEBUG Removing dangling source revision: [CACHE_DIR]/built-wheels-v3/[ENTRY]
DEBUG Removing dangling cache entry: [CACHE_DIR]/archive-v0/[ENTRY]
Removed 8 files ([SIZE])
"###);

// Uninstall and reinstall the package. We should use the cached version.
uv_snapshot!(context.filters(), context
.pip_uninstall()
.arg("."), @r###"
success: true
exit_code: 0
----- stdout -----

----- stderr -----
Uninstalled 1 package in [TIME]
- project==0.1.0 (from file://[TEMP_DIR]/)
"###);

uv_snapshot!(context.filters(), context
.pip_install()
.arg("."), @r###"
success: true
exit_code: 0
----- stdout -----

----- stderr -----
Resolved 1 package in [TIME]
Installed 1 package in [TIME]
+ project==0.1.0 (from file://[TEMP_DIR]/)
"###);

Ok(())
}
Loading