From c43757ad4c1d7b97232f3ce52b25f297df3b490d Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Fri, 12 Apr 2024 14:48:03 -0400 Subject: [PATCH] Avoid calling `normalize_path` with relative paths that extend beyond the current directory (#3013) ## Summary It turns out that `normalize_path` (sourced from Cargo) has a subtle bug. If you pass it a relative path that traverses beyond the root, it silently drops components. So, e.g., passing `../foo/bar`, it will just drop the leading `..` and return `foo/bar`. This PR encodes that behavior as a `Result` and avoids using it in such cases. Closes https://github.com/astral-sh/uv/issues/3012. --- Cargo.lock | 19 +++++++++ Cargo.toml | 1 + crates/pep508-rs/src/verbatim_url.rs | 10 +++-- crates/uv-fs/Cargo.toml | 1 + crates/uv-fs/src/path.rs | 54 +++++++++++++++++++++--- crates/uv-interpreter/src/find_python.rs | 4 +- crates/uv-interpreter/src/interpreter.rs | 4 +- req.txt | 1 + 8 files changed, 80 insertions(+), 14 deletions(-) create mode 100644 req.txt diff --git a/Cargo.lock b/Cargo.lock index 6a58eb02511a..fee504a2deb9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2373,6 +2373,24 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "path-absolutize" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4af381fe79fa195b4909485d99f73a80792331df0625188e707854f0b3383f5" +dependencies = [ + "path-dedot", +] + +[[package]] +name = "path-dedot" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07ba0ad7e047712414213ff67533e6dd477af0a4e1d14fb52343e53d30ea9397" +dependencies = [ + "once_cell", +] + [[package]] name = "pathdiff" version = "0.2.1" @@ -4616,6 +4634,7 @@ dependencies = [ "fs2", "junction", "once_cell", + "path-absolutize", "tempfile", "tokio", "tracing", diff --git a/Cargo.toml b/Cargo.toml index b253d8a362cd..ac7fe99ab18c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -99,6 +99,7 @@ miette = { version = "7.2.0" } nanoid = { version = "0.4.0" } once_cell = { version = "1.19.0" } owo-colors = { version = "4.0.0" } +path-absolutize = { version = "3.1.1" } pathdiff = { version = "0.2.1" } petgraph = { version = "0.6.4" } platform-info = { version = "2.0.2" } diff --git a/crates/pep508-rs/src/verbatim_url.rs b/crates/pep508-rs/src/verbatim_url.rs index 78c76ee318ef..803e96b273ff 100644 --- a/crates/pep508-rs/src/verbatim_url.rs +++ b/crates/pep508-rs/src/verbatim_url.rs @@ -36,11 +36,13 @@ impl VerbatimUrl { } /// Create a [`VerbatimUrl`] from a file path. + /// + /// Assumes that the path is absolute. pub fn from_path(path: impl AsRef) -> Self { let path = path.as_ref(); // Normalize the path. - let path = normalize_path(path); + let path = normalize_path(path).expect("path is absolute"); // Extract the fragment, if it exists. let (path, fragment) = split_fragment(&path); @@ -76,7 +78,7 @@ impl VerbatimUrl { }; // Normalize the path. - let path = normalize_path(path); + let path = normalize_path(&path).expect("path is absolute"); // Extract the fragment, if it exists. let (path, fragment) = split_fragment(&path); @@ -110,7 +112,9 @@ impl VerbatimUrl { }; // Normalize the path. - let path = normalize_path(path); + let Ok(path) = normalize_path(&path) else { + return Err(VerbatimUrlError::RelativePath(path)); + }; // Extract the fragment, if it exists. let (path, fragment) = split_fragment(&path); diff --git a/crates/uv-fs/Cargo.toml b/crates/uv-fs/Cargo.toml index f6ab7fb9ce2f..208558ed83ab 100644 --- a/crates/uv-fs/Cargo.toml +++ b/crates/uv-fs/Cargo.toml @@ -22,6 +22,7 @@ fs-err = { workspace = true } fs2 = { workspace = true } junction = { workspace = true } once_cell = { workspace = true } +path-absolutize = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true, optional = true } tracing = { workspace = true } diff --git a/crates/uv-fs/src/path.rs b/crates/uv-fs/src/path.rs index f6e94bcf022e..3ae7102ae16a 100644 --- a/crates/uv-fs/src/path.rs +++ b/crates/uv-fs/src/path.rs @@ -84,8 +84,21 @@ pub fn normalize_url_path(path: &str) -> Cow<'_, str> { /// Normalize a path, removing things like `.` and `..`. /// /// Source: -pub fn normalize_path(path: impl AsRef) -> PathBuf { - let mut components = path.as_ref().components().peekable(); +/// +/// CAUTION: Assumes that the path is already absolute. +/// +/// CAUTION: This does not resolve symlinks (unlike +/// [`std::fs::canonicalize`]). This may cause incorrect or surprising +/// behavior at times. This should be used carefully. Unfortunately, +/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often +/// fail, or on Windows returns annoying device paths. +/// +/// # Errors +/// +/// When a relative path is provided with `..` components that extend beyond the base directory. +/// For example, `./a/../../b` cannot be normalized because it escapes the base directory. +pub fn normalize_path(path: &Path) -> Result { + let mut components = path.components().peekable(); let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().copied() { components.next(); PathBuf::from(c.as_os_str()) @@ -101,14 +114,29 @@ pub fn normalize_path(path: impl AsRef) -> PathBuf { } Component::CurDir => {} Component::ParentDir => { - ret.pop(); + if !ret.pop() { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "cannot normalize a relative path beyond the base directory", + )); + } } Component::Normal(c) => { ret.push(c); } } } - ret + Ok(ret) +} + +/// Convert a path to an absolute path, relative to the current working directory. +/// +/// Unlike [`std::fs::canonicalize`], this function does not resolve symlinks and does not require +/// the path to exist. +pub fn absolutize_path(path: &Path) -> Result, std::io::Error> { + use path_absolutize::Absolutize; + + path.absolutize_from(&*CWD) } /// Like `fs_err::canonicalize`, but with permissive failures on Windows. @@ -235,7 +263,7 @@ mod tests { use super::*; #[test] - fn normalize() { + fn test_normalize_url() { if cfg!(windows) { assert_eq!( normalize_url_path("/C:/Users/ferris/wheel-0.42.0.tar.gz"), @@ -272,4 +300,20 @@ mod tests { ); } } + + #[test] + fn test_normalize_path() { + let path = Path::new("/a/b/../c/./d"); + let normalized = normalize_path(path).unwrap(); + assert_eq!(normalized, Path::new("/a/c/d")); + + let path = Path::new("/a/../c/./d"); + let normalized = normalize_path(path).unwrap(); + assert_eq!(normalized, Path::new("/c/d")); + + // This should be an error. + let path = Path::new("/a/../../c/./d"); + let err = normalize_path(path).unwrap_err(); + assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); + } } diff --git a/crates/uv-interpreter/src/find_python.rs b/crates/uv-interpreter/src/find_python.rs index 2fc45bdecd69..0acc5b557568 100644 --- a/crates/uv-interpreter/src/find_python.rs +++ b/crates/uv-interpreter/src/find_python.rs @@ -6,7 +6,6 @@ use std::path::PathBuf; use tracing::{debug, instrument}; use uv_cache::Cache; -use uv_fs::normalize_path; use uv_toolchain::PythonVersion; use crate::interpreter::InterpreterInfoError; @@ -52,8 +51,7 @@ pub fn find_requested_python(request: &str, cache: &Cache) -> Result Result { - let executable_bytes = executable.as_os_str().as_encoded_bytes(); - let cache_entry = cache.entry( CacheBucket::Interpreter, "", - format!("{}.msgpack", digest(&executable_bytes)), + format!("{}.msgpack", digest(&executable)), ); let modified = Timestamp::from_path(uv_fs::canonicalize_executable(executable)?)?; diff --git a/req.txt b/req.txt new file mode 100644 index 000000000000..8d0fce4a443f --- /dev/null +++ b/req.txt @@ -0,0 +1 @@ +httpx @ git+https://github.com/encode/httpx@master