diff --git a/Cargo.lock b/Cargo.lock index 500c3f810607..9c2321e60528 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4609,6 +4609,7 @@ name = "uv-cache-info" version = "0.0.1" dependencies = [ "fs-err", + "glob", "schemars", "serde", "thiserror", diff --git a/crates/uv-cache-info/Cargo.toml b/crates/uv-cache-info/Cargo.toml index e54d1f7af608..db8bff21262e 100644 --- a/crates/uv-cache-info/Cargo.toml +++ b/crates/uv-cache-info/Cargo.toml @@ -14,6 +14,7 @@ workspace = true [dependencies] fs-err = { workspace = true } +glob = { workspace = true } schemars = { workspace = true, optional = true } serde = { workspace = true, features = ["derive"] } thiserror = { workspace = true } diff --git a/crates/uv-cache-info/src/cache_info.rs b/crates/uv-cache-info/src/cache_info.rs index 5f45aa3eb51c..2ce0dea03aff 100644 --- a/crates/uv-cache-info/src/cache_info.rs +++ b/crates/uv-cache-info/src/cache_info.rs @@ -1,11 +1,12 @@ use crate::commit_info::CacheCommit; use crate::timestamp::Timestamp; +use glob::MatchOptions; use serde::Deserialize; use std::cmp::max; use std::io; use std::path::{Path, PathBuf}; -use tracing::debug; +use tracing::{debug, warn}; /// The information used to determine whether a built distribution is up-to-date, based on the /// timestamps of relevant files, the current commit of a repository, etc. @@ -64,9 +65,9 @@ impl CacheInfo { // If no cache keys were defined, use the defaults. let cache_keys = cache_keys.unwrap_or_else(|| { vec![ - CacheKey::Path(directory.join("pyproject.toml")), - CacheKey::Path(directory.join("setup.py")), - CacheKey::Path(directory.join("setup.cfg")), + CacheKey::Path("pyproject.toml".to_string()), + CacheKey::Path("setup.py".to_string()), + CacheKey::Path("setup.cfg".to_string()), ] }); @@ -74,14 +75,71 @@ impl CacheInfo { for cache_key in &cache_keys { match cache_key { CacheKey::Path(file) | CacheKey::File { file } => { - timestamp = max( - timestamp, - file.metadata() - .ok() - .filter(std::fs::Metadata::is_file) - .as_ref() - .map(Timestamp::from_metadata), - ); + if file.chars().any(|c| matches!(c, '*' | '?' | '[')) { + // Treat the path as a glob. + let path = directory.join(file); + let Some(pattern) = path.to_str() else { + warn!("Failed to convert pattern to string: {}", path.display()); + continue; + }; + let paths = match glob::glob_with( + pattern, + MatchOptions { + case_sensitive: true, + require_literal_separator: true, + require_literal_leading_dot: false, + }, + ) { + Ok(paths) => paths, + Err(err) => { + warn!("Failed to parse glob pattern: {err}"); + continue; + } + }; + for entry in paths { + let entry = match entry { + Ok(entry) => entry, + Err(err) => { + warn!("Failed to read glob entry: {err}"); + continue; + } + }; + let metadata = match entry.metadata() { + Ok(metadata) => metadata, + Err(err) => { + warn!("Failed to read metadata for glob entry: {err}"); + continue; + } + }; + if metadata.is_file() { + timestamp = + max(timestamp, Some(Timestamp::from_metadata(&metadata))); + } else { + warn!( + "Expected file for cache key, but found directory: `{}`", + entry.display() + ); + } + } + } else { + // Treat the path as a file. + let path = directory.join(file); + let metadata = match path.metadata() { + Ok(metadata) => metadata, + Err(err) => { + warn!("Failed to read metadata for file: {err}"); + continue; + } + }; + if metadata.is_file() { + timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata))); + } else { + warn!( + "Expected file for cache key, but found directory: `{}`", + path.display() + ); + } + } } CacheKey::Git { git: true } => match CacheCommit::from_repository(directory) { Ok(commit_info) => commit = Some(commit_info), @@ -165,10 +223,15 @@ struct ToolUv { #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] #[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] pub enum CacheKey { - /// Ex) `"Cargo.lock"` - Path(PathBuf), - /// Ex) `{ file = "Cargo.lock" }` - File { file: PathBuf }, + /// Ex) `"Cargo.lock"` or `"**/*.toml"` + Path(String), + /// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }` + File { file: String }, /// Ex) `{ git = true }` Git { git: bool }, } + +pub enum FilePattern { + Glob(String), + Path(PathBuf), +} diff --git a/crates/uv-settings/src/settings.rs b/crates/uv-settings/src/settings.rs index 48031e81d74a..565b7b119788 100644 --- a/crates/uv-settings/src/settings.rs +++ b/crates/uv-settings/src/settings.rs @@ -58,13 +58,20 @@ pub struct Options { /// to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in /// addition to watching the `pyproject.toml`). /// + /// Globs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) + /// crate. For example, to invalidate the cache whenever a `.toml` file in the project directory + /// or any of its subdirectories is modified, you can specify `cache-keys = [{ file = "**/*.toml" }]`. + /// Note that the use of globs can be expensive, as uv may need to walk the filesystem to + /// determine whether any files have changed. + /// /// Cache keys can also include version control information. For example, if a project uses /// `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]` /// to include the current Git commit hash in the cache key (in addition to the /// `pyproject.toml`). /// /// Cache keys only affect the project defined by the `pyproject.toml` in which they're - /// specified (as opposed to, e.g., affecting all members in a workspace). + /// specified (as opposed to, e.g., affecting all members in a workspace), and all paths and + /// globs are interpreted as relative to the project directory. #[option( default = r#"[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]"#, value_type = "list[dict]", diff --git a/crates/uv/tests/pip_install.rs b/crates/uv/tests/pip_install.rs index b00d4f58eb30..2987ec439954 100644 --- a/crates/uv/tests/pip_install.rs +++ b/crates/uv/tests/pip_install.rs @@ -3265,6 +3265,62 @@ fn invalidate_path_on_cache_key() -> Result<()> { "### ); + // Modify the `pyproject.toml` to use a glob. + pyproject_toml.write_str( + r#"[project] + name = "example" + version = "0.0.0" + dependencies = ["anyio==4.0.0"] + requires-python = ">=3.8" + + [tool.uv] + cache-keys = [{ file = "**/*.txt" }] +"#, + )?; + + // Write a new file. + editable_dir + .child("resources") + .child("data.txt") + .write_str("data")?; + + // Installing again should update the package. + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 4 packages in [TIME] + Prepared 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + ~ example==0.0.0 (from file://[TEMP_DIR]/editable) + "### + ); + + // Write a new file in the current directory. + editable_dir.child("data.txt").write_str("data")?; + + // Installing again should update the package. + uv_snapshot!(context.filters(), context.pip_install() + .arg("example @ .") + .current_dir(editable_dir.path()), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 4 packages in [TIME] + Prepared 1 package in [TIME] + Uninstalled 1 package in [TIME] + Installed 1 package in [TIME] + ~ example==0.0.0 (from file://[TEMP_DIR]/editable) + "### + ); + Ok(()) } diff --git a/docs/concepts/cache.md b/docs/concepts/cache.md index 0cbcdf355d7a..04caf9889d35 100644 --- a/docs/concepts/cache.md +++ b/docs/concepts/cache.md @@ -52,6 +52,21 @@ the following to the project's `pyproject.toml`: cache-keys = [{ file = "requirements.txt" }] ``` +Globs are supported, following the syntax of the +[`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) crate. For example, to invalidate the +cache whenever a `.toml` file in the project directory or any of its subdirectories is modified, use +the following: + +```toml title="pyproject.toml" +[tool.uv] +cache-keys = [{ file = "**/*.toml" }] +``` + +!!! note + + The use of globs can be expensive, as uv may need to walk the filesystem to determine whether any files have changed. + This may, in turn, requiring traversal of large or deeply nested directories. + As an escape hatch, if a project uses `dynamic` metadata that isn't covered by `tool.uv.cache-keys`, you can instruct uv to _always_ rebuild and reinstall it by adding the project to the `tool.uv.reinstall-package` list: diff --git a/docs/reference/settings.md b/docs/reference/settings.md index baea1c771d94..45302b5acccb 100644 --- a/docs/reference/settings.md +++ b/docs/reference/settings.md @@ -76,13 +76,20 @@ As an example: if a project uses dynamic metadata to read its dependencies from to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in addition to watching the `pyproject.toml`). +Globs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) +crate. For example, to invalidate the cache whenever a `.toml` file in the project directory +or any of its subdirectories is modified, you can specify `cache-keys = [{ file = "**/*.toml" }]`. +Note that the use of globs can be expensive, as uv may need to walk the filesystem to +determine whether any files have changed. + Cache keys can also include version control information. For example, if a project uses `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]` to include the current Git commit hash in the cache key (in addition to the `pyproject.toml`). Cache keys only affect the project defined by the `pyproject.toml` in which they're -specified (as opposed to, e.g., affecting all members in a workspace). +specified (as opposed to, e.g., affecting all members in a workspace), and all paths and +globs are interpreted as relative to the project directory. **Default value**: `[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]` diff --git a/uv.schema.json b/uv.schema.json index 7eb08ffce99a..f28b74468ab5 100644 --- a/uv.schema.json +++ b/uv.schema.json @@ -22,7 +22,7 @@ ] }, "cache-keys": { - "description": "The keys to consider when caching builds for the project.\n\nCache keys enable you to specify the files or directories that should trigger a rebuild when modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, or `setup.cfg` files in the project directory are modified, i.e.:\n\n```toml cache-keys = [{ file = \"pyproject.toml\" }, { file = \"setup.py\" }, { file = \"setup.cfg\" }] ```\n\nAs an example: if a project uses dynamic metadata to read its dependencies from a `requirements.txt` file, you can specify `cache-keys = [{ file = \"requirements.txt\" }, { file = \"pyproject.toml\" }]` to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in addition to watching the `pyproject.toml`).\n\nCache keys can also include version control information. For example, if a project uses `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = \"pyproject.toml\" }]` to include the current Git commit hash in the cache key (in addition to the `pyproject.toml`).\n\nCache keys only affect the project defined by the `pyproject.toml` in which they're specified (as opposed to, e.g., affecting all members in a workspace).", + "description": "The keys to consider when caching builds for the project.\n\nCache keys enable you to specify the files or directories that should trigger a rebuild when modified. By default, uv will rebuild a project whenever the `pyproject.toml`, `setup.py`, or `setup.cfg` files in the project directory are modified, i.e.:\n\n```toml cache-keys = [{ file = \"pyproject.toml\" }, { file = \"setup.py\" }, { file = \"setup.cfg\" }] ```\n\nAs an example: if a project uses dynamic metadata to read its dependencies from a `requirements.txt` file, you can specify `cache-keys = [{ file = \"requirements.txt\" }, { file = \"pyproject.toml\" }]` to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in addition to watching the `pyproject.toml`).\n\nGlobs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) crate. For example, to invalidate the cache whenever a `.toml` file in the project directory or any of its subdirectories is modified, you can specify `cache-keys = [{ file = \"**/*.toml\" }]`. Note that the use of globs can be expensive, as uv may need to walk the filesystem to determine whether any files have changed.\n\nCache keys can also include version control information. For example, if a project uses `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = \"pyproject.toml\" }]` to include the current Git commit hash in the cache key (in addition to the `pyproject.toml`).\n\nCache keys only affect the project defined by the `pyproject.toml` in which they're specified (as opposed to, e.g., affecting all members in a workspace), and all paths and globs are interpreted as relative to the project directory.", "writeOnly": true, "type": [ "array", @@ -432,11 +432,11 @@ "CacheKey": { "anyOf": [ { - "description": "Ex) `\"Cargo.lock\"`", + "description": "Ex) `\"Cargo.lock\"` or `\"**/*.toml\"`", "type": "string" }, { - "description": "Ex) `{ file = \"Cargo.lock\" }`", + "description": "Ex) `{ file = \"Cargo.lock\" }` or `{ file = \"**/*.toml\" }`", "type": "object", "required": [ "file"