Skip to content

Commit

Permalink
Add a garbage collection mechanism to the CLI (#1217)
Browse files Browse the repository at this point in the history
## Summary

Detects unused cache entries, which can come in a few forms:

1. Directories that are out-dated via our versioning scheme.
2. Old source distribution builds (i.e., we have a more recent version).
3. Old wheels (stored in `archive-v0`, but not symlinked-to from
anywhere in the cache).

Closes #1059.
  • Loading branch information
charliermarsh committed Mar 21, 2024
1 parent 7ee90dc commit 0f96386
Show file tree
Hide file tree
Showing 8 changed files with 348 additions and 28 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/uv-cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ clap = { workspace = true, features = ["derive", "env"], optional = true }
directories = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
nanoid = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true, features = ["derive"] }
tempfile = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }
walkdir = { workspace = true }
88 changes: 80 additions & 8 deletions crates/uv-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ use std::ops::Deref;
use std::path::{Path, PathBuf};
use std::sync::Arc;

use distribution_types::InstalledDist;
use fs_err as fs;
use rustc_hash::FxHashSet;
use tempfile::{tempdir, TempDir};
use tracing::debug;

use distribution_types::InstalledDist;
use uv_fs::directories;
use uv_normalize::PackageName;

Expand Down Expand Up @@ -283,17 +285,72 @@ impl Cache {
/// Returns the number of entries removed from the cache.
pub fn remove(&self, name: &PackageName) -> Result<Removal, io::Error> {
let mut summary = Removal::default();
for bucket in [
CacheBucket::Wheels,
CacheBucket::BuiltWheels,
CacheBucket::Git,
CacheBucket::Interpreter,
CacheBucket::Simple,
] {
for bucket in CacheBucket::iter() {
summary += bucket.remove(self, name)?;
}
Ok(summary)
}

/// Run the garbage collector on the cache, removing any dangling entries.
pub fn prune(&self) -> Result<Removal, io::Error> {
let mut summary = Removal::default();

// First, remove any top-level directories that are unused. These typically represent
// outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
for entry in fs::read_dir(&self.root)? {
let entry = entry?;
let metadata = entry.metadata()?;

if entry.file_name() == "CACHEDIR.TAG"
|| entry.file_name() == ".gitignore"
|| entry.file_name() == ".git"
{
continue;
}

if metadata.is_dir() {
// If the directory is not a cache bucket, remove it.
if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) {
let path = entry.path();
debug!("Removing dangling cache entry: {}", path.display());
summary += rm_rf(path)?;
}
} else {
// If the file is not a marker file, remove it.
let path = entry.path();
debug!("Removing dangling cache entry: {}", path.display());
summary += rm_rf(path)?;
}
}

// Second, remove any unused archives (by searching for archives that are not symlinked).
// TODO(charlie): Remove any unused source distributions. This requires introspecting the
// cache contents, e.g., reading and deserializing the manifests.
let mut references = FxHashSet::default();

for bucket in CacheBucket::iter() {
let bucket = self.bucket(bucket);
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket) {
let entry = entry?;
if entry.file_type().is_symlink() {
references.insert(entry.path().canonicalize()?);
}
}
}
}

for entry in fs::read_dir(self.bucket(CacheBucket::Archive))? {
let entry = entry?;
let path = entry.path().canonicalize()?;
if !references.contains(&path) {
debug!("Removing dangling cache entry: {}", path.display());
summary += rm_rf(path)?;
}
}

Ok(summary)
}
}

/// The different kinds of data in the cache are stored in different bucket, which in our case
Expand Down Expand Up @@ -633,6 +690,21 @@ impl CacheBucket {
}
Ok(summary)
}

/// Return an iterator over all cache buckets.
pub fn iter() -> impl Iterator<Item = CacheBucket> {
[
CacheBucket::Wheels,
CacheBucket::BuiltWheels,
CacheBucket::FlatIndex,
CacheBucket::Git,
CacheBucket::Interpreter,
CacheBucket::Simple,
CacheBucket::Archive,
]
.iter()
.copied()
}
}

impl Display for CacheBucket {
Expand Down
20 changes: 2 additions & 18 deletions crates/uv/src/commands/cache_clean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ use uv_cache::Cache;
use uv_fs::Simplified;
use uv_normalize::PackageName;

use crate::commands::ExitStatus;
use crate::commands::{human_readable_bytes, ExitStatus};
use crate::printer::Printer;

/// Clear the cache.
/// Clear the cache, removing all entries or those linked to specific packages.
pub(crate) fn cache_clean(
packages: &[PackageName],
cache: &Cache,
Expand Down Expand Up @@ -123,19 +123,3 @@ pub(crate) fn cache_clean(

Ok(ExitStatus::Success)
}

/// Formats a number of bytes into a human readable SI-prefixed size.
///
/// Returns a tuple of `(quantity, units)`.
#[allow(
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_precision_loss,
clippy::cast_sign_loss
)]
fn human_readable_bytes(bytes: u64) -> (f32, &'static str) {
static UNITS: [&str; 7] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"];
let bytes = bytes as f32;
let i = ((bytes.log2() / 10.0) as usize).min(UNITS.len() - 1);
(bytes / 1024_f32.powi(i as i32), UNITS[i])
}
66 changes: 66 additions & 0 deletions crates/uv/src/commands/cache_prune.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
use std::fmt::Write;

use anyhow::{Context, Result};
use owo_colors::OwoColorize;

use uv_cache::Cache;
use uv_fs::Simplified;

use crate::commands::{human_readable_bytes, ExitStatus};
use crate::printer::Printer;

/// Prune all unreachable objects from the cache.
pub(crate) fn cache_prune(cache: &Cache, printer: Printer) -> Result<ExitStatus> {
if !cache.root().exists() {
writeln!(
printer.stderr(),
"No cache found at: {}",
cache.root().user_display().cyan()
)?;
return Ok(ExitStatus::Success);
}

writeln!(
printer.stderr(),
"Pruning cache at: {}",
cache.root().user_display().cyan()
)?;

let summary = cache
.prune()
.with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?;

// Write a summary of the number of files and directories removed.
match (summary.num_files, summary.num_dirs) {
(0, 0) => {
write!(printer.stderr(), "No unused entries found")?;
}
(0, 1) => {
write!(printer.stderr(), "Removed 1 directory")?;
}
(0, num_dirs_removed) => {
write!(printer.stderr(), "Removed {num_dirs_removed} directories")?;
}
(1, _) => {
write!(printer.stderr(), "Removed 1 file")?;
}
(num_files_removed, _) => {
write!(printer.stderr(), "Removed {num_files_removed} files")?;
}
}

// If any, write a summary of the total byte count removed.
if summary.total_bytes > 0 {
let bytes = if summary.total_bytes < 1024 {
format!("{}B", summary.total_bytes)
} else {
let (bytes, unit) = human_readable_bytes(summary.total_bytes);
format!("{bytes:.1}{unit}")
};
write!(printer.stderr(), " ({})", bytes.green())?;
}

writeln!(printer.stderr())?;

Ok(ExitStatus::Success)
}
18 changes: 18 additions & 0 deletions crates/uv/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use owo_colors::OwoColorize;

pub(crate) use cache_clean::cache_clean;
pub(crate) use cache_dir::cache_dir;
pub(crate) use cache_prune::cache_prune;
use distribution_types::InstalledMetadata;
pub(crate) use pip_check::pip_check;
pub(crate) use pip_compile::{extra_name_with_clap_error, pip_compile};
Expand All @@ -28,6 +29,7 @@ use crate::printer::Printer;

mod cache_clean;
mod cache_dir;
mod cache_prune;
mod pip_check;
mod pip_compile;
mod pip_freeze;
Expand Down Expand Up @@ -155,3 +157,19 @@ pub(super) async fn compile_bytecode(
)?;
Ok(())
}

/// Formats a number of bytes into a human readable SI-prefixed size.
///
/// Returns a tuple of `(quantity, units)`.
#[allow(
clippy::cast_possible_truncation,
clippy::cast_possible_wrap,
clippy::cast_precision_loss,
clippy::cast_sign_loss
)]
pub(super) fn human_readable_bytes(bytes: u64) -> (f32, &'static str) {
static UNITS: [&str; 7] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"];
let bytes = bytes as f32;
let i = ((bytes.log2() / 10.0) as usize).min(UNITS.len() - 1);
(bytes / 1024_f32.powi(i as i32), UNITS[i])
}
9 changes: 7 additions & 2 deletions crates/uv/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ enum Commands {
/// Manage the `uv` executable.
#[clap(name = "self")]
Self_(SelfNamespace),
/// Remove all items from the cache.
/// Clear the cache, removing all entries or those linked to specific packages.
#[clap(hide = true)]
Clean(CleanArgs),
/// Display uv's version
Expand Down Expand Up @@ -170,8 +170,10 @@ struct CacheNamespace {

#[derive(Subcommand)]
enum CacheCommand {
/// Remove all items from the cache.
/// Clear the cache, removing all entries or those linked to specific packages.
Clean(CleanArgs),
/// Prune all unreachable objects from the cache.
Prune,
/// Show the cache directory.
Dir,
}
Expand Down Expand Up @@ -1759,6 +1761,9 @@ async fn run() -> Result<ExitStatus> {
command: CacheCommand::Clean(args),
})
| Commands::Clean(args) => commands::cache_clean(&args.package, &cache, printer),
Commands::Cache(CacheNamespace {
command: CacheCommand::Prune,
}) => commands::cache_prune(&cache, printer),
Commands::Cache(CacheNamespace {
command: CacheCommand::Dir,
}) => {
Expand Down
Loading

0 comments on commit 0f96386

Please sign in to comment.