Skip to content

Commit

Permalink
Support globs as cache keys
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Sep 10, 2024
1 parent cfa9299 commit 81c4602
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 21 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/uv-cache-info/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ workspace = true

[dependencies]
fs-err = { workspace = true }
glob = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
Expand Down
95 changes: 79 additions & 16 deletions crates/uv-cache-info/src/cache_info.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use crate::commit_info::CacheCommit;
use crate::timestamp::Timestamp;

use glob::MatchOptions;
use serde::Deserialize;
use std::cmp::max;
use std::io;
use std::path::{Path, PathBuf};
use tracing::debug;
use tracing::{debug, warn};

/// The information used to determine whether a built distribution is up-to-date, based on the
/// timestamps of relevant files, the current commit of a repository, etc.
Expand Down Expand Up @@ -64,24 +65,81 @@ impl CacheInfo {
// If no cache keys were defined, use the defaults.
let cache_keys = cache_keys.unwrap_or_else(|| {
vec![
CacheKey::Path(directory.join("pyproject.toml")),
CacheKey::Path(directory.join("setup.py")),
CacheKey::Path(directory.join("setup.cfg")),
CacheKey::Path("pyproject.toml".to_string()),
CacheKey::Path("setup.py".to_string()),
CacheKey::Path("setup.cfg".to_string()),
]
});

// Incorporate any additional timestamps or VCS information.
for cache_key in &cache_keys {
match cache_key {
CacheKey::Path(file) | CacheKey::File { file } => {
timestamp = max(
timestamp,
file.metadata()
.ok()
.filter(std::fs::Metadata::is_file)
.as_ref()
.map(Timestamp::from_metadata),
);
if file.chars().any(|c| matches!(c, '*' | '?' | '[')) {
// Treat the path as a glob.
let path = directory.join(file);
let Some(pattern) = path.to_str() else {
warn!("Failed to convert pattern to string: {}", path.display());
continue;
};
let paths = match glob::glob_with(
pattern,
MatchOptions {
case_sensitive: true,
require_literal_separator: true,
require_literal_leading_dot: false,
},
) {
Ok(paths) => paths,
Err(err) => {
warn!("Failed to parse glob pattern: {err}");
continue;
}
};
for entry in paths {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for glob entry: {err}");
continue;
}
};
if metadata.is_file() {
timestamp =
max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
entry.display()
);
}
}
} else {
// Treat the path as a file.
let path = directory.join(file);
let metadata = match path.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for file: {err}");
continue;
}
};
if metadata.is_file() {
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
path.display()
);
}
}
}
CacheKey::Git { git: true } => match CacheCommit::from_repository(directory) {
Ok(commit_info) => commit = Some(commit_info),
Expand Down Expand Up @@ -165,10 +223,15 @@ struct ToolUv {
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)]
pub enum CacheKey {
/// Ex) `"Cargo.lock"`
Path(PathBuf),
/// Ex) `{ file = "Cargo.lock" }`
File { file: PathBuf },
/// Ex) `"Cargo.lock"` or `"**/*.toml"`
Path(String),
/// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }`
File { file: String },
/// Ex) `{ git = true }`
Git { git: bool },
}

pub enum FilePattern {
Glob(String),
Path(PathBuf),
}
9 changes: 8 additions & 1 deletion crates/uv-settings/src/settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,20 @@ pub struct Options {
/// to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in
/// addition to watching the `pyproject.toml`).
///
/// Globs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html)
/// crate. For example, to invalidate the cache whenever a `.toml` file in the project directory
/// or any of its subdirectories is modified, you can specify `cache-keys = [{ file = "**/*.toml" }]`.
/// Note that the use of globs can be expensive, as uv may need to walk the filesystem to
/// determine whether any files have changed.
///
/// Cache keys can also include version control information. For example, if a project uses
/// `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]`
/// to include the current Git commit hash in the cache key (in addition to the
/// `pyproject.toml`).
///
/// Cache keys only affect the project defined by the `pyproject.toml` in which they're
/// specified (as opposed to, e.g., affecting all members in a workspace).
/// specified (as opposed to, e.g., affecting all members in a workspace), and all paths and
/// globs are interpreted as relative to the project directory.
#[option(
default = r#"[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]"#,
value_type = "list[dict]",
Expand Down
56 changes: 56 additions & 0 deletions crates/uv/tests/pip_install.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3265,6 +3265,62 @@ fn invalidate_path_on_cache_key() -> Result<()> {
"###
);

// Modify the `pyproject.toml` to use a glob.
pyproject_toml.write_str(
r#"[project]
name = "example"
version = "0.0.0"
dependencies = ["anyio==4.0.0"]
requires-python = ">=3.8"
[tool.uv]
cache-keys = [{ file = "**/*.txt" }]
"#,
)?;

// Write a new file.
editable_dir
.child("resources")
.child("data.txt")
.write_str("data")?;

// Installing again should update the package.
uv_snapshot!(context.filters(), context.pip_install()
.arg("example @ .")
.current_dir(editable_dir.path()), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 4 packages in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ example==0.0.0 (from file://[TEMP_DIR]/editable)
"###
);

// Write a new file in the current directory.
editable_dir.child("data.txt").write_str("data")?;

// Installing again should update the package.
uv_snapshot!(context.filters(), context.pip_install()
.arg("example @ .")
.current_dir(editable_dir.path()), @r###"
success: true
exit_code: 0
----- stdout -----
----- stderr -----
Resolved 4 packages in [TIME]
Prepared 1 package in [TIME]
Uninstalled 1 package in [TIME]
Installed 1 package in [TIME]
~ example==0.0.0 (from file://[TEMP_DIR]/editable)
"###
);

Ok(())
}

Expand Down
15 changes: 15 additions & 0 deletions docs/concepts/cache.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ the following to the project's `pyproject.toml`:
cache-keys = [{ file = "requirements.txt" }]
```

Globs are supported, following the syntax of the
[`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) crate. For example, to invalidate the
cache whenever a `.toml` file in the project directory or any of its subdirectories is modified, use
the following:

```toml title="pyproject.toml"
[tool.uv]
cache-keys = [{ file = "**/*.toml" }]
```

!!! note

The use of globs can be expensive, as uv may need to walk the filesystem to determine whether any files have changed.
This may, in turn, requiring traversal of large or deeply nested directories.

As an escape hatch, if a project uses `dynamic` metadata that isn't covered by `tool.uv.cache-keys`,
you can instruct uv to _always_ rebuild and reinstall it by adding the project to the
`tool.uv.reinstall-package` list:
Expand Down
9 changes: 8 additions & 1 deletion docs/reference/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,20 @@ As an example: if a project uses dynamic metadata to read its dependencies from
to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in
addition to watching the `pyproject.toml`).

Globs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html)
crate. For example, to invalidate the cache whenever a `.toml` file in the project directory
or any of its subdirectories is modified, you can specify `cache-keys = [{ file = "**/*.toml" }]`.
Note that the use of globs can be expensive, as uv may need to walk the filesystem to
determine whether any files have changed.

Cache keys can also include version control information. For example, if a project uses
`setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]`
to include the current Git commit hash in the cache key (in addition to the
`pyproject.toml`).

Cache keys only affect the project defined by the `pyproject.toml` in which they're
specified (as opposed to, e.g., affecting all members in a workspace).
specified (as opposed to, e.g., affecting all members in a workspace), and all paths and
globs are interpreted as relative to the project directory.

**Default value**: `[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]`

Expand Down
6 changes: 3 additions & 3 deletions uv.schema.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 81c4602

Please sign in to comment.