diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 98bffb0a11f7..98a2a473f7f2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -71,7 +71,7 @@ Source distributions can run arbitrary code on build and can make unwanted modif ```bash docker buildx build -t uv-builder -f builder.dockerfile --load . # Build for musl to avoid glibc errors, might not be required with your OS version -cargo build --target x86_64-unknown-linux-musl --profile profiling --features vendored-openssl +cargo build --target x86_64-unknown-linux-musl --profile profiling docker run --rm -it -v $(pwd):/app uv-builder /app/target/x86_64-unknown-linux-musl/profiling/uv-dev resolve-many --cache-dir /app/cache-docker /app/scripts/popular_packages/pypi_10k_most_dependents.txt ``` diff --git a/Cargo.lock b/Cargo.lock index 7e90c7417e94..99c12639debb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1058,7 +1058,6 @@ checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", - "subtle", ] [[package]] @@ -1465,21 +1464,6 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" -[[package]] -name = "git2" -version = "0.18.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "232e6a7bfe35766bf715e55a88b39a700596c0ccfd88cd3680b4cdb40d66ef70" -dependencies = [ - "bitflags 2.5.0", - "libc", - "libgit2-sys", - "log", - "openssl-probe", - "openssl-sys", - "url", -] - [[package]] name = "glob" version = "0.3.1" @@ -1578,15 +1562,6 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - [[package]] name = "home" version = "0.5.9" @@ -2002,20 +1977,6 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" -[[package]] -name = "libgit2-sys" -version = "0.16.2+1.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8" -dependencies = [ - "cc", - "libc", - "libssh2-sys", - "libz-sys", - "openssl-sys", - "pkg-config", -] - [[package]] name = "libmimalloc-sys" version = "0.1.38" @@ -2036,20 +1997,6 @@ dependencies = [ "libc", ] -[[package]] -name = "libssh2-sys" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dc8a030b787e2119a731f1951d6a773e2280c660f8ec4b0f5e1505a386e71ee" -dependencies = [ - "cc", - "libc", - "libz-sys", - "openssl-sys", - "pkg-config", - "vcpkg", -] - [[package]] name = "libz-ng-sys" version = "1.1.15" @@ -2060,18 +2007,6 @@ dependencies = [ "libc", ] -[[package]] -name = "libz-sys" -version = "1.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e143b5e666b2695d28f6bca6497720813f699c9602dd7f5cac91008b8ada7f9" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "linked-hash-map" version = "0.5.6" @@ -2368,28 +2303,6 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" -[[package]] -name = "openssl-src" -version = "300.2.3+3.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cff92b6f71555b61bb9315f7c64da3ca43d87531622120fea0195fc761b4843" -dependencies = [ - "cc", -] - -[[package]] -name = "openssl-sys" -version = "0.9.102" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" -dependencies = [ - "cc", - "libc", - "openssl-src", - "pkg-config", - "vcpkg", -] - [[package]] name = "option-ext" version = "0.2.0" @@ -2866,7 +2779,6 @@ version = "0.0.1" dependencies = [ "anyhow", "chrono", - "git2", "indexmap", "mailparse", "once_cell", @@ -3573,17 +3485,6 @@ dependencies = [ "serde", ] -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - [[package]] name = "sha2" version = "0.10.8" @@ -4832,17 +4733,10 @@ name = "uv-git" version = "0.0.1" dependencies = [ "anyhow", - "base64 0.22.1", "cache-key", "cargo-util", - "fs-err", - "git2", - "glob", - "hmac", - "home", - "rand", "reqwest", - "sha1", + "thiserror", "tokio", "tracing", "url", @@ -5119,12 +5013,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "version_check" version = "0.9.4" diff --git a/Cargo.toml b/Cargo.toml index d48c72db3da0..a91c89910e10 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,11 +80,8 @@ flate2 = { version = "1.0.28", default-features = false } fs-err = { version = "2.11.0" } fs2 = { version = "0.4.3" } futures = { version = "0.3.30" } -git2 = { version = "0.18.1" } glob = { version = "0.3.1" } hex = { version = "0.4.3" } -hmac = { version = "0.12.1" } -home = { version = "0.5.9" } html-escape = { version = "0.2.13" } http = { version = "1.1.0" } indexmap = { version = "2.2.5" } @@ -106,7 +103,6 @@ platform-info = { version = "2.0.2" } pubgrub = { git = "https://github.com/astral-sh/pubgrub", rev = "0e684a874c9fb8f74738cd8875524c80e3d4820b" } pyo3 = { version = "0.21.0" } pyo3-log = { version = "0.10.0" } -rand = { version = "0.8.5" } rayon = { version = "1.8.0" } reflink-copy = { version = "0.1.15" } regex = { version = "1.10.2" } @@ -122,7 +118,6 @@ schemars = { version = "0.8.16", features = ["url"] } seahash = { version = "4.1.0" } serde = { version = "1.0.197" } serde_json = { version = "1.0.114" } -sha1 = { version = "0.10.6" } sha2 = { version = "0.10.8" } sys-info = { version = "0.9.1" } tempfile = { version = "3.9.0" } diff --git a/crates/distribution-types/Cargo.toml b/crates/distribution-types/Cargo.toml index 1c30d1d55d6f..1538b10325d2 100644 --- a/crates/distribution-types/Cargo.toml +++ b/crates/distribution-types/Cargo.toml @@ -20,7 +20,7 @@ pep508_rs = { workspace = true } platform-tags = { workspace = true } pypi-types = { workspace = true } uv-fs = { workspace = true } -uv-git = { workspace = true, features = ["vendored-openssl"] } +uv-git = { workspace = true } uv-normalize = { workspace = true } anyhow = { workspace = true } diff --git a/crates/pypi-types/Cargo.toml b/crates/pypi-types/Cargo.toml index 9a68eb749ab1..1041e5a9381d 100644 --- a/crates/pypi-types/Cargo.toml +++ b/crates/pypi-types/Cargo.toml @@ -19,7 +19,6 @@ uv-normalize = { workspace = true } uv-git = { workspace = true } chrono = { workspace = true, features = ["serde"] } -git2 = { workspace = true } indexmap = { workspace = true, features = ["serde"] } mailparse = { workspace = true } once_cell = { workspace = true } diff --git a/crates/pypi-types/src/parsed_url.rs b/crates/pypi-types/src/parsed_url.rs index 213f8403919c..dcec32bfb58f 100644 --- a/crates/pypi-types/src/parsed_url.rs +++ b/crates/pypi-types/src/parsed_url.rs @@ -5,7 +5,7 @@ use thiserror::Error; use url::{ParseError, Url}; use pep508_rs::{Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError}; -use uv_git::GitUrl; +use uv_git::{GitUrl, OidParseError}; use crate::{ArchiveInfo, DirInfo, DirectUrl, VcsInfo, VcsKind}; @@ -20,7 +20,7 @@ pub enum ParsedUrlError { #[error("Invalid path in file URL: `{0}`")] InvalidFileUrl(Url), #[error("Failed to parse Git reference from URL: `{0}`")] - GitShaParse(Url, #[source] git2::Error), + GitShaParse(Url, #[source] OidParseError), #[error("Not a valid URL: `{0}`")] UrlParse(String, #[source] ParseError), #[error(transparent)] diff --git a/crates/uv-distribution/Cargo.toml b/crates/uv-distribution/Cargo.toml index dc1d1aa25aad..d3319a13ddef 100644 --- a/crates/uv-distribution/Cargo.toml +++ b/crates/uv-distribution/Cargo.toml @@ -25,7 +25,7 @@ uv-cache = { workspace = true } uv-client = { workspace = true } uv-extract = { workspace = true } uv-fs = { workspace = true, features = ["tokio"] } -uv-git = { workspace = true, features = ["vendored-openssl"] } +uv-git = { workspace = true } uv-normalize = { workspace = true } uv-types = { workspace = true } uv-configuration = { workspace = true } diff --git a/crates/uv-git/Cargo.toml b/crates/uv-git/Cargo.toml index 56a271195edd..b97926e37933 100644 --- a/crates/uv-git/Cargo.toml +++ b/crates/uv-git/Cargo.toml @@ -17,20 +17,9 @@ cache-key = { workspace = true } uv-fs = { workspace = true } anyhow = { workspace = true } -base64 = { workspace = true } +thiserror = { workspace = true } cargo-util = { workspace = true } -git2 = { workspace = true } -glob = { workspace = true } -hmac = { workspace = true } -home = { workspace = true } -rand = { workspace = true } reqwest = { workspace = true, features = ["blocking"] } -sha1 = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } url = { workspace = true } -fs-err = { workspace = true } - -[features] -vendored-libgit2 = ["git2/vendored-libgit2"] -vendored-openssl = ["git2/vendored-openssl"] diff --git a/crates/uv-git/src/git.rs b/crates/uv-git/src/git.rs index 3fb01ad00913..fb141c2f2075 100644 --- a/crates/uv-git/src/git.rs +++ b/crates/uv-git/src/git.rs @@ -1,22 +1,19 @@ //! Git support is derived from Cargo's implementation. //! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice. //! Source: -use std::borrow::Cow; +use std::fmt::Display; use std::path::{Path, PathBuf}; -use std::process::Command; -use std::{env, str}; +use std::str::{self}; use anyhow::{anyhow, Context, Result}; use cargo_util::{paths, ProcessBuilder}; -use git2::{ErrorClass, ObjectType}; use reqwest::Client; use reqwest::StatusCode; -use tracing::{debug, warn}; +use tracing::debug; use url::Url; use uv_fs::Simplified; -use crate::util::retry; -use crate::FetchStrategy; +use crate::sha::GitOid; /// A file indicates that if present, `git reset` has been done and a repo /// checkout is ready to go. See [`GitCheckout::reset`] for why we need this. @@ -111,15 +108,9 @@ impl GitReference { } } -/// A short abbreviated OID. -/// -/// Exists for avoiding extra allocations in [`GitDatabase::to_short_id`]. -pub(crate) struct GitShortID(git2::Buf); - -impl GitShortID { - /// Views the short ID as a `str`. - pub(crate) fn as_str(&self) -> &str { - self.0.as_str().unwrap() +impl Display for GitReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str().unwrap_or("HEAD")) } } @@ -133,24 +124,69 @@ pub(crate) struct GitRemote { /// A local clone of a remote repository's database. Multiple [`GitCheckout`]s /// can be cloned from a single [`GitDatabase`]. pub(crate) struct GitDatabase { - /// The remote repository where this database is fetched from. - remote: GitRemote, - /// Path to the root of the underlying Git repository on the local filesystem. - path: PathBuf, /// Underlying Git repository instance for this database. - repo: git2::Repository, + repo: GitRepository, } -/// A local checkout of a particular revision from a [`GitDatabase`]. -pub(crate) struct GitCheckout<'a> { - /// The git database where this checkout is cloned from. - database: &'a GitDatabase, - /// Path to the root of the underlying Git repository on the local filesystem. - path: PathBuf, +/// A local checkout of a particular revision from a [`GitRepository`]. +pub(crate) struct GitCheckout { /// The git revision this checkout is for. - revision: git2::Oid, + revision: GitOid, /// Underlying Git repository instance for this checkout. - repo: git2::Repository, + repo: GitRepository, +} + +/// A local Git repository. +pub(crate) struct GitRepository { + /// Path to the underlying Git repository on the local filesystem. + path: PathBuf, +} + +impl GitRepository { + /// Opens an existing Git repository at `path`. + pub(crate) fn open(path: &Path) -> Result { + // Make sure there is a Git repository at the specified path. + ProcessBuilder::new("git") + .arg("rev-parse") + .cwd(path) + .exec_with_output()?; + + Ok(GitRepository { + path: path.to_path_buf(), + }) + } + + /// Initializes a Git repository at `path`. + fn init(path: &Path) -> Result { + // TODO(ibraheem): see if this still necessary now that we no longer use libgit2 + // Skip anything related to templates, they just call all sorts of issues as + // we really don't want to use them yet they insist on being used. See #6240 + // for an example issue that comes up. + // opts.external_template(false); + + // Initialize the repository. + ProcessBuilder::new("git") + .arg("init") + .cwd(path) + .exec_with_output()?; + + Ok(GitRepository { + path: path.to_path_buf(), + }) + } + + /// Fetches the object ID of the given `refname`. + fn rev_parse(&self, refname: &str) -> Result { + let result = ProcessBuilder::new("git") + .arg("rev-parse") + .arg(refname) + .cwd(&self.path) + .exec_with_output()?; + + let mut result = String::from_utf8(result.stdout)?; + result.truncate(result.trim_end().len()); + Ok(result.parse()?) + } } impl GitRemote { @@ -181,20 +217,20 @@ impl GitRemote { into: &Path, db: Option, reference: &GitReference, - locked_rev: Option, - strategy: FetchStrategy, + locked_rev: Option, client: &Client, - ) -> Result<(GitDatabase, git2::Oid)> { + ) -> Result<(GitDatabase, GitOid)> { let locked_ref = locked_rev.map(|oid| GitReference::FullCommit(oid.to_string())); let reference = locked_ref.as_ref().unwrap_or(reference); if let Some(mut db) = db { - fetch(&mut db.repo, self.url.as_str(), reference, strategy, client) + fetch(&mut db.repo, self.url.as_str(), reference, client) .with_context(|| format!("failed to fetch into: {}", into.user_display()))?; let resolved_commit_hash = match locked_rev { Some(rev) => db.contains(rev).then_some(rev), None => reference.resolve(&db.repo).ok(), }; + if let Some(rev) = resolved_commit_hash { return Ok((db, rev)); } @@ -206,266 +242,154 @@ impl GitRemote { if into.exists() { paths::remove_dir_all(into)?; } + paths::create_dir_all(into)?; - let mut repo = init(into, true)?; - fetch(&mut repo, self.url.as_str(), reference, strategy, client) + let mut repo = GitRepository::init(into)?; + fetch(&mut repo, self.url.as_str(), reference, client) .with_context(|| format!("failed to clone into: {}", into.user_display()))?; let rev = match locked_rev { Some(rev) => rev, None => reference.resolve(&repo)?, }; - Ok(( - GitDatabase { - remote: self.clone(), - path: into.to_path_buf(), - repo, - }, - rev, - )) + Ok((GitDatabase { repo }, rev)) } /// Creates a [`GitDatabase`] of this remote at `db_path`. + #[allow(clippy::unused_self)] pub(crate) fn db_at(&self, db_path: &Path) -> Result { - let repo = git2::Repository::open(db_path)?; - Ok(GitDatabase { - remote: self.clone(), - path: db_path.to_path_buf(), - repo, - }) + let repo = GitRepository::open(db_path)?; + Ok(GitDatabase { repo }) } } impl GitDatabase { /// Checkouts to a revision at `destination` from this database. - pub(crate) fn copy_to( - &self, - rev: git2::Oid, - destination: &Path, - strategy: FetchStrategy, - client: &Client, - ) -> Result> { + pub(crate) fn copy_to(&self, rev: GitOid, destination: &Path) -> Result { // If the existing checkout exists, and it is fresh, use it. // A non-fresh checkout can happen if the checkout operation was // interrupted. In that case, the checkout gets deleted and a new // clone is created. - let checkout = match git2::Repository::open(destination) + let checkout = match GitRepository::open(destination) .ok() - .map(|repo| GitCheckout::new(self, rev, repo)) + .map(|repo| GitCheckout::new(rev, repo)) .filter(GitCheckout::is_fresh) { Some(co) => co, None => GitCheckout::clone_into(destination, self, rev)?, }; - checkout.update_submodules(strategy, client)?; + checkout.update_submodules()?; Ok(checkout) } /// Get a short OID for a `revision`, usually 7 chars or more if ambiguous. - pub(crate) fn to_short_id(&self, revision: git2::Oid) -> Result { - let obj = self.repo.find_object(revision, None)?; - Ok(GitShortID(obj.short_id()?)) + pub(crate) fn to_short_id(&self, revision: GitOid) -> Result { + let output = ProcessBuilder::new("git") + .arg("rev-parse") + .arg("--short") + .arg(revision.as_str()) + .cwd(&self.repo.path) + .exec_with_output()?; + + let mut result = String::from_utf8(output.stdout)?; + result.truncate(result.trim_end().len()); + Ok(result) } /// Checks if the database contains the object of this `oid`. - pub(crate) fn contains(&self, oid: git2::Oid) -> bool { - self.repo.revparse_single(&oid.to_string()).is_ok() + pub(crate) fn contains(&self, oid: GitOid) -> bool { + self.repo.rev_parse(oid.as_str()).is_ok() } } impl GitReference { /// Resolves self to an object ID with objects the `repo` currently has. - pub(crate) fn resolve(&self, repo: &git2::Repository) -> Result { + pub(crate) fn resolve(&self, repo: &GitRepository) -> Result { let refkind = self.kind_str(); - let id = match self { - Self::Tag(s) => (|| -> Result { - let refname = format!("refs/remotes/origin/tags/{s}"); - let id = repo.refname_to_id(&refname)?; - let obj = repo.find_object(id, None)?; - let obj = obj.peel(ObjectType::Commit)?; - Ok(obj.id()) - })() - .with_context(|| format!("failed to find tag `{s}`"))?, - - Self::Branch(s) => { - let name = format!("origin/{s}"); - - // Resolve the remote name since that's all we're configuring in - // `fetch` below. - repo.find_branch(&name, git2::BranchType::Remote) - .ok() - .and_then(|b| b.get().target()) - .ok_or_else(|| anyhow::format_err!("failed to find {refkind} `{s}`"))? - } + let result = match self { + // Resolve the commit pointed to by the tag. + // + // `^0` recursively peels away from the revision to the underlying commit object. + // This also verifies that the tag indeed refers to a commit. + Self::Tag(s) => repo.rev_parse(&format!("refs/remotes/origin/tags/{s}^0")), + + // Resolve the commit pointed to by the branch. + Self::Branch(s) => repo.rev_parse(&format!("origin/{s}^0")), // Attempt to resolve the branch, then the tag. - Self::BranchOrTag(s) => { - let name = format!("origin/{s}"); - - // Resolve the remote name since that's all we're configuring in - // `fetch` below. - repo.find_branch(&name, git2::BranchType::Remote) - .ok() - .and_then(|b| b.get().target()) - .or_else(|| { - // Note that we resolve the named tag here in sync with where it's - // fetched into via `fetch` below. - let refname = format!("refs/remotes/origin/tags/{s}"); - let id = repo.refname_to_id(&refname).ok()?; - let obj = repo.find_object(id, None).ok()?; - let obj = obj.peel(ObjectType::Commit).ok()?; - Some(obj.id()) - }) - .ok_or_else(|| anyhow::format_err!("failed to find {refkind} `{s}`"))? - } + Self::BranchOrTag(s) => repo + .rev_parse(&format!("origin/{s}^0")) + .or_else(|_| repo.rev_parse(&format!("refs/remotes/origin/tags/{s}^0"))), // Attempt to resolve the branch, then the tag, then the commit. - Self::BranchOrTagOrCommit(s) => { - let name = format!("origin/{s}"); - - // Resolve the remote name since that's all we're configuring in - // `fetch` below. - repo.find_branch(&name, git2::BranchType::Remote) - .ok() - .and_then(|b| b.get().target()) - .or_else(|| { - // Note that we resolve the named tag here in sync with where it's - // fetched into via `fetch` below. - let refname = format!("refs/remotes/origin/tags/{s}"); - let id = repo.refname_to_id(&refname).ok()?; - let obj = repo.find_object(id, None).ok()?; - let obj = obj.peel(ObjectType::Commit).ok()?; - Some(obj.id()) - }) - .or_else(|| { - // Resolve the commit. - let obj = repo.revparse_single(s).ok()?; - match obj.as_tag() { - Some(tag) => Some(tag.target_id()), - None => Some(obj.id()), - } - }) - .ok_or_else(|| anyhow::format_err!("failed to find {refkind} `{s}`"))? - } + Self::BranchOrTagOrCommit(s) => repo + .rev_parse(&format!("origin/{s}^0")) + .or_else(|_| repo.rev_parse(&format!("refs/remotes/origin/tags/{s}^0"))) + .or_else(|_| repo.rev_parse(&format!("{s}^0"))), // We'll be using the HEAD commit. - Self::DefaultBranch => { - let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?; - let head = repo.find_object(head_id, None)?; - head.peel(ObjectType::Commit)?.id() - } + Self::DefaultBranch => repo.rev_parse("refs/remotes/origin/HEAD"), + // Resolve a direct commit reference. Self::FullCommit(s) | Self::ShortCommit(s) | Self::NamedRef(s) => { - let obj = repo.revparse_single(s)?; - match obj.as_tag() { - Some(tag) => tag.target_id(), - None => obj.id(), - } + repo.rev_parse(&format!("{s}^0")) } }; - Ok(id) + + result.with_context(|| anyhow::format_err!("failed to find {refkind} `{self}`")) } } -impl<'a> GitCheckout<'a> { +impl GitCheckout { /// Creates an instance of [`GitCheckout`]. This doesn't imply the checkout /// is done. Use [`GitCheckout::is_fresh`] to check. /// - /// * The `database` is where this checkout is from. /// * The `repo` will be the checked out Git repository. - fn new( - database: &'a GitDatabase, - revision: git2::Oid, - repo: git2::Repository, - ) -> GitCheckout<'a> { - let path = repo.workdir().unwrap_or_else(|| repo.path()); - GitCheckout { - path: path.to_path_buf(), - database, - revision, - repo, - } - } - - /// Gets the remote repository URL. - fn remote_url(&self) -> &Url { - self.database.remote.url() + fn new(revision: GitOid, repo: GitRepository) -> GitCheckout { + GitCheckout { revision, repo } } - /// Clone a repo for a `revision` into a local path from a `datatabase`. + /// Clone a repo for a `revision` into a local path from a `database`. /// This is a filesystem-to-filesystem clone. - fn clone_into( - into: &Path, - database: &'a GitDatabase, - revision: git2::Oid, - ) -> Result> { + fn clone_into(into: &Path, database: &GitDatabase, revision: GitOid) -> Result { let dirname = into.parent().unwrap(); paths::create_dir_all(dirname)?; if into.exists() { paths::remove_dir_all(into)?; } - // we're doing a local filesystem-to-filesystem clone so there should - // be no need to respect global configuration options, so pass in - // an empty instance of `git2::Config` below. - let git_config = git2::Config::new()?; - - // Clone the repository, but make sure we use the "local" option in - // libgit2 which will attempt to use hardlinks to set up the database. - // This should speed up the clone operation quite a bit if it works. - // - // Note that we still use the same fetch options because while we don't - // need authentication information we may want progress bars and such. - let url = Url::from_file_path(&database.path) - .map_err(|()| anyhow::format_err!("Invalid path URL: {}", database.path.display()))?; - let mut repo = None; - with_fetch_options(&git_config, url.as_str(), &mut |fopts| { - let mut checkout = git2::build::CheckoutBuilder::new(); - checkout.dry_run(); // we'll do this below during a `reset` - - let r = git2::build::RepoBuilder::new() - // use hard links and/or copy the database, we're doing a - // filesystem clone so this'll speed things up quite a bit. - .clone_local(git2::build::CloneLocal::Local) - .with_checkout(checkout) - .fetch_options(fopts) - .clone(url.as_str(), into)?; - // `git2` doesn't seem to handle shallow repos correctly when doing - // a local clone. Fortunately all that's needed is the copy of the - // one file that defines the shallow boundary, the commits which - // have their parents omitted as part of the shallow clone. - // - // TODO(git2): remove this when git2 supports shallow clone correctly - if database.repo.is_shallow() { - fs_err::copy( - database.repo.path().join("shallow"), - r.path().join("shallow"), - )?; - } - repo = Some(r); - Ok(()) - })?; - let repo = repo.unwrap(); - - let checkout = GitCheckout::new(database, revision, repo); + // Perform a local clone of the repository, which will attempt to use + // hardlinks to set up the repository. This should speed up the clone operation + // quite a bit if it works. + ProcessBuilder::new("git") + .arg("clone") + .arg("--local") + // Make sure to pass the local file path and not a file://... url. If given a url, + // Git treats the repository as a remote origin and gets confused because we don't + // have a HEAD checked out. + .arg(database.repo.path.simplified_display().to_string()) + .arg(into.simplified_display().to_string()) + .exec_with_output()?; + + let repo = GitRepository::open(into)?; + let checkout = GitCheckout::new(revision, repo); checkout.reset()?; Ok(checkout) } /// Checks if the `HEAD` of this checkout points to the expected revision. fn is_fresh(&self) -> bool { - match self.repo.revparse_single("HEAD") { - Ok(ref head) if head.id() == self.revision => { + match self.repo.rev_parse("HEAD") { + Ok(id) if id == self.revision => { // See comments in reset() for why we check this - self.path.join(CHECKOUT_READY_LOCK).exists() + self.repo.path.join(CHECKOUT_READY_LOCK).exists() } _ => false, } } - /// Similar to [`reset()`]. This roughly performs `git reset --hard` to the - /// revision of this checkout, with additional interrupt protection by a - /// dummy file [`CHECKOUT_READY_LOCK`]. + /// This performs `git reset --hard` to the revision of this checkout, with + /// additional interrupt protection by a dummy file [`CHECKOUT_READY_LOCK`]. /// /// If we're interrupted while performing a `git reset` (e.g., we die /// because of a signal) Cargo needs to be sure to try to check out this @@ -478,486 +402,47 @@ impl<'a> GitCheckout<'a> { /// /// [`.cargo-ok`]: CHECKOUT_READY_LOCK fn reset(&self) -> Result<()> { - let ok_file = self.path.join(CHECKOUT_READY_LOCK); + let ok_file = self.repo.path.join(CHECKOUT_READY_LOCK); let _ = paths::remove_file(&ok_file); - debug!("reset {} to {}", self.repo.path().display(), self.revision); + debug!("reset {} to {}", self.repo.path.display(), self.revision); - // Ensure libgit2 won't mess with newlines when we vendor. - if let Ok(mut git_config) = self.repo.config() { - git_config.set_bool("core.autocrlf", false)?; - } + // Perform the hard reset. + ProcessBuilder::new("git") + .arg("reset") + .arg("--hard") + .arg(self.revision.as_str()) + .cwd(&self.repo.path) + .exec_with_output()?; - let object = self.repo.find_object(self.revision, None)?; - reset(&self.repo, &object)?; paths::create(ok_file)?; Ok(()) } - /// Like `git submodule update --recursive` but for this git checkout. - /// - /// This function respects `submodule..update = none`[^1] git config. - /// Submodules set to `none` won't be fetched. - /// - /// [^1]: - fn update_submodules(&self, strategy: FetchStrategy, client: &Client) -> Result<()> { - /// Like `Cow`, but without a requirement on `Clone`. - enum Repo<'a> { - Borrowed(&'a git2::Repository), - Owned(git2::Repository), - } - - impl std::ops::Deref for Repo<'_> { - type Target = git2::Repository; - - fn deref(&self) -> &Self::Target { - match self { - Repo::Borrowed(repo) => repo, - Repo::Owned(repo) => repo, - } - } - } - - debug!( - "Update submodules for: {}", - self.repo.workdir().unwrap().display() - ); - - // Initialize a stack with the root repository. - let mut stack = vec![( - Repo::Borrowed(&self.repo), - Cow::Borrowed(self.remote_url().as_str()), - )]; - - while let Some((repo, parent_remote_url)) = stack.pop() { - for mut child in repo.submodules()? { - child.init(false)?; - - let child_url_str = child.url().ok_or_else(|| { - anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path()) - })?; - - // Skip the submodule if the config says not to update it. - if child.update_strategy() == git2::SubmoduleUpdate::None { - debug!( - "Skipping git submodule `{}` due to update strategy in .gitmodules", - child_url_str - ); - continue; - } - - let child_remote_url = - absolute_submodule_url(&parent_remote_url, child_url_str)?.to_string(); - - // A submodule which is listed in .gitmodules but not actually - // checked out will not have a head id, so we should ignore it. - let Some(head) = child.head_id() else { - continue; - }; - - // If the submodule hasn't been checked out yet, we need to - // clone it. If it has been checked out and the head is the same - // as the submodule's head, then we can skip an update and keep - // recursing. - let head_and_repo = child.open().and_then(|repo| { - let target = repo.head()?.target(); - Ok((target, repo)) - }); - let mut repo = if let Ok((head, repo)) = head_and_repo { - if child.head_id() == head { - stack.push((Repo::Owned(repo), Cow::Owned(child_remote_url))); - continue; - } - repo - } else { - let path = repo.workdir().unwrap().join(child.path()); - let _ = paths::remove_dir_all(&path); - init(&path, false)? - }; - - // Fetch data from origin and reset to the head commit - debug!("Updating Git submodule: {}", child_remote_url); - let reference = GitReference::FullCommit(head.to_string()); - fetch(&mut repo, &child_remote_url, &reference, strategy, client).with_context( - || { - format!( - "failed to fetch submodule `{}` from {}", - child.name().unwrap_or(""), - child_remote_url - ) - }, - )?; - - let obj = repo.find_object(head, None)?; - reset(&repo, &obj)?; - drop(obj); - - // Push the current submodule onto the stack. - stack.push((Repo::Owned(repo), Cow::Owned(child_remote_url))); - } - } - - Ok(()) + /// Runs `git submodule update --recursive` on this git checkout. + fn update_submodules(&self) -> Result<()> { + ProcessBuilder::new("git") + .arg("submodule") + .arg("update") + .arg("--recursive") + .arg("--init") + .cwd(&self.repo.path) + .exec_with_output() + .map(drop) } } -/// Constructs an absolute URL for a child submodule URL with its parent base URL. -/// -/// Git only assumes a submodule URL is a relative path if it starts with `./` -/// or `../` [^1]. To fetch the correct repo, we need to construct an absolute -/// submodule URL. -/// -/// At this moment it comes with some limitations: -/// -/// * GitHub doesn't accept non-normalized URLs with relative paths. -/// (`ssh://git@github.com/rust-lang/cargo.git/relative/..` is invalid) -/// * `url` crate cannot parse SCP-like URLs. -/// (`git@github.com:rust-lang/cargo.git` is not a valid WHATWG URL) -/// -/// To overcome these, this patch always tries [`Url::parse`] first to normalize -/// the path. If it couldn't, append the relative path as the last resort and -/// pray the remote git service supports non-normalized URLs. -/// -/// See also rust-lang/cargo#12404 and rust-lang/cargo#12295. -/// -/// [^1]: -fn absolute_submodule_url<'s>(base_url: &str, submodule_url: &'s str) -> Result> { - let absolute_url = if ["./", "../"].iter().any(|p| submodule_url.starts_with(p)) { - if let Ok(mut base_url) = Url::parse(base_url) { - let path = base_url.path(); - if !path.ends_with('/') { - base_url.set_path(&format!("{path}/")); - } - let absolute_url = base_url.join(submodule_url).with_context(|| { - format!( - "Failed to parse relative child submodule URL `{submodule_url}` using parent base URL `{base_url}`" - ) - })?; - Cow::from(absolute_url.to_string()) - } else { - let mut absolute_url = base_url.to_string(); - if !absolute_url.ends_with('/') { - absolute_url.push('/'); - } - absolute_url.push_str(submodule_url); - Cow::from(absolute_url) - } - } else { - Cow::from(submodule_url) - }; - - Ok(absolute_url) -} - -/// Prepare the authentication callbacks for cloning a git repository. -/// -/// The main purpose of this function is to construct the "authentication -/// callback" which is used to clone a repository. This callback will attempt to -/// find the right authentication on the system (without user input) and will -/// guide libgit2 in doing so. -/// -/// The callback is provided `allowed` types of credentials, and we try to do as -/// much as possible based on that: -/// -/// * Prioritize SSH keys from the local ssh agent as they're likely the most -/// reliable. The username here is prioritized from the credential -/// callback, then from whatever is configured in git itself, and finally -/// we fall back to the generic user of `git`. -/// -/// * If a username/password is allowed, then we fallback to git2-rs's -/// implementation of the credential helper. This is what is configured -/// with `credential.helper` in git, and is the interface for the macOS -/// keychain, for example. -/// -/// * After the above two have failed, we just kinda grapple attempting to -/// return *something*. -/// -/// If any form of authentication fails, libgit2 will repeatedly ask us for -/// credentials until we give it a reason to not do so. To ensure we don't -/// just sit here looping forever we keep track of authentications we've -/// attempted and we don't try the same ones again. -fn with_authentication(url: &str, cfg: &git2::Config, mut f: F) -> Result -where - F: FnMut(&mut git2::Credentials<'_>) -> Result, -{ - let mut cred_helper = git2::CredentialHelper::new(url); - cred_helper.config(cfg); - - let mut ssh_username_requested = false; - let mut cred_helper_bad = None; - let mut ssh_agent_attempts = Vec::new(); - let mut any_attempts = false; - let mut tried_sshkey = false; - let mut url_attempt = None; - - let orig_url = url; - let mut res = f(&mut |url, username, allowed| { - any_attempts = true; - if url != orig_url { - url_attempt = Some(url.to_string()); - } - // libgit2's "USERNAME" authentication actually means that it's just - // asking us for a username to keep going. This is currently only really - // used for SSH authentication and isn't really an authentication type. - // The logic currently looks like: - // - // let user = ...; - // if (user.is_null()) - // user = callback(USERNAME, null, ...); - // - // callback(SSH_KEY, user, ...) - // - // So if we're being called here then we know that (a) we're using ssh - // authentication and (b) no username was specified in the URL that - // we're trying to clone. We need to guess an appropriate username here, - // but that may involve a few attempts. Unfortunately we can't switch - // usernames during one authentication session with libgit2, so to - // handle this we bail out of this authentication session after setting - // the flag `ssh_username_requested`, and then we handle this below. - if allowed.contains(git2::CredentialType::USERNAME) { - debug_assert!(username.is_none()); - ssh_username_requested = true; - return Err(git2::Error::from_str("gonna try usernames later")); - } - - // An "SSH_KEY" authentication indicates that we need some sort of SSH - // authentication. This can currently either come from the ssh-agent - // process or from a raw in-memory SSH key. Cargo only supports using - // ssh-agent currently. - // - // If we get called with this then the only way that should be possible - // is if a username is specified in the URL itself (e.g., `username` is - // Some), hence the unwrap() here. We try custom usernames down below. - if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey { - // If ssh-agent authentication fails, libgit2 will keep - // calling this callback asking for other authentication - // methods to try. Make sure we only try ssh-agent once, - // to avoid looping forever. - tried_sshkey = true; - let username = username.unwrap(); - debug_assert!(!ssh_username_requested); - ssh_agent_attempts.push(username.to_string()); - return git2::Cred::ssh_key_from_agent(username); - } - - // Sometimes libgit2 will ask for a username/password in plaintext. This - // is where Cargo would have an interactive prompt if we supported it, - // but we currently don't! Right now the only way we support fetching a - // plaintext password is through the `credential.helper` support, so - // fetch that here. - // - // If ssh-agent authentication fails, libgit2 will keep calling this - // callback asking for other authentication methods to try. Check - // cred_helper_bad to make sure we only try the git credential helper - // once, to avoid looping forever. - if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none() - { - let r = git2::Cred::credential_helper(cfg, url, username); - cred_helper_bad = Some(r.is_err()); - return r; - } - - // I'm... not sure what the DEFAULT kind of authentication is, but seems - // easy to support? - if allowed.contains(git2::CredentialType::DEFAULT) { - return git2::Cred::default(); - } - - // Whelp, we tried our best - Err(git2::Error::from_str("no authentication methods succeeded")) - }); - - // Ok, so if it looks like we're going to be doing ssh authentication, we - // want to try a few different usernames as one wasn't specified in the URL - // for us to use. In order, we'll try: - // - // * A credential helper's username for this URL, if available. - // * This account's username. - // * "git" - // - // We have to restart the authentication session each time (due to - // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we - // call our callback, `f`, in a loop here. - if ssh_username_requested { - debug_assert!(res.is_err()); - let mut attempts = vec![String::from("git")]; - if let Ok(s) = env::var("USER").or_else(|_| env::var("USERNAME")) { - attempts.push(s); - } - if let Some(ref s) = cred_helper.username { - attempts.push(s.clone()); - } - - while let Some(s) = attempts.pop() { - // We should get `USERNAME` first, where we just return our attempt, - // and then after that we should get `SSH_KEY`. If the first attempt - // fails we'll get called again, but we don't have another option so - // we bail out. - let mut attempts = 0; - res = f(&mut |_url, username, allowed| { - if allowed.contains(git2::CredentialType::USERNAME) { - return git2::Cred::username(&s); - } - if allowed.contains(git2::CredentialType::SSH_KEY) { - debug_assert_eq!(Some(&s[..]), username); - attempts += 1; - if attempts == 1 { - ssh_agent_attempts.push(s.to_string()); - return git2::Cred::ssh_key_from_agent(&s); - } - } - Err(git2::Error::from_str("no authentication methods succeeded")) - }); - - // If we made two attempts then that means: - // - // 1. A username was requested, we returned `s`. - // 2. An ssh key was requested, we returned to look up `s` in the - // ssh agent. - // 3. For whatever reason that lookup failed, so we were asked again - // for another mode of authentication. - // - // Essentially, if `attempts == 2` then in theory the only error was - // that this username failed to authenticate (e.g., no other network - // errors happened). Otherwise something else is funny so we bail - // out. - if attempts != 2 { - break; - } - } - } - let mut err = match res { - Ok(e) => return Ok(e), - Err(e) => e, - }; - - // In the case of an authentication failure (where we tried something) then - // we try to give a more helpful error message about precisely what we - // tried. - if any_attempts { - let mut msg = "failed to authenticate when downloading repository".to_string(); - - if let Some(attempt) = &url_attempt { - if url != attempt { - msg.push_str(": "); - msg.push_str(attempt); - } - } - msg.push('\n'); - if !ssh_agent_attempts.is_empty() { - let names = ssh_agent_attempts - .iter() - .map(|agent| format!("`{agent}`")) - .collect::>() - .join(", "); - msg.push_str(&format!( - "\n* attempted ssh-agent authentication, but \ - no usernames succeeded: {names}" - )); - } - if let Some(failed_cred_helper) = cred_helper_bad { - if failed_cred_helper { - msg.push_str( - "\n* attempted to find username/password via \ - git's `credential.helper` support, but failed", - ); - } else { - msg.push_str( - "\n* attempted to find username/password via \ - `credential.helper`, but maybe the found \ - credentials were incorrect", - ); - } - } - err = err.context(msg); - - // Otherwise if we didn't even get to the authentication phase them we may - // have failed to set up a connection, in these cases hint on the - // `net.git-fetch-with-cli` configuration option. - } else if let Some(e) = err.downcast_ref::() { - match e.class() { - ErrorClass::Net - | ErrorClass::Ssl - | ErrorClass::Submodule - | ErrorClass::FetchHead - | ErrorClass::Ssh - | ErrorClass::Http => { - err = err.context("failed to connect to the repository"); - } - ErrorClass::Callback => { - // This unwraps the git2 error. We're using the callback error - // specifically to convey errors from Rust land through the C - // callback interface. We don't need the `; class=Callback - // (26)` that gets tacked on to the git2 error message. - err = anyhow::format_err!("{}", e.message()); - } - _ => {} - } - } - - Err(err) -} - -/// `git reset --hard` to the given `obj` for the `repo`. -/// -/// The `obj` is a commit-ish to which the head should be moved. -fn reset(repo: &git2::Repository, obj: &git2::Object<'_>) -> Result<()> { - // let mut pb = Progress::new("Checkout", config); - let mut opts = git2::build::CheckoutBuilder::new(); - // opts.progress(|_, cur, max| { - // drop(pb.tick(cur, max, "")); - // }); - debug!("doing reset"); - repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?; - debug!("reset done"); - Ok(()) -} - -/// Prepares the callbacks for fetching a git repository. -/// -/// The main purpose of this function is to construct everything before a fetch. -/// This will attempt to setup a progress bar, the authentication for git, -/// ssh known hosts check, and the network retry mechanism. -/// -/// The callback is provided a fetch options, which can be used by the actual -/// git fetch. -pub(crate) fn with_fetch_options( - git_config: &git2::Config, - url: &str, - cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> Result<()>, -) -> Result<()> { - retry::with_retry(|| { - with_authentication(url, git_config, |f| { - let port = Url::parse(url).ok().and_then(|url| url.port()); - - // TODO(charlie): Restore progress reporting. - let mut rcb = git2::RemoteCallbacks::new(); - rcb.credentials(f); - rcb.certificate_check(|cert, host| { - super::known_hosts::certificate_check(cert, host, port) - }); - - // Create a local anonymous remote in the repository to fetch the url. - let mut opts = git2::FetchOptions::new(); - opts.remote_callbacks(rcb); - cb(opts) - })?; - Ok(()) - }) -} - /// Attempts to fetch the given git `reference` for a Git repository. /// /// This is the main entry for git clone/fetch. It does the following: /// /// * Turns [`GitReference`] into refspecs accordingly. -/// * Dispatches `git fetch` using libgit2 or git CLI. +/// * Dispatches `git fetch` using the git CLI. /// /// The `remote_url` argument is the git remote URL where we want to fetch from. pub(crate) fn fetch( - repo: &mut git2::Repository, + repo: &mut GitRepository, remote_url: &str, reference: &GitReference, - strategy: FetchStrategy, client: &Client, ) -> Result<()> { let oid_to_fetch = match github_fast_path(repo, remote_url, reference, client) { @@ -970,10 +455,6 @@ pub(crate) fn fetch( } }; - maybe_gc_repo(repo)?; - - clean_repo_temp_files(repo); - // Translate the reference desired here into an actual list of refspecs // which need to get fetched. Additionally record if we're fetching tags. let mut refspecs = Vec::new(); @@ -1048,124 +529,56 @@ pub(crate) fn fetch( } debug!("Performing a Git fetch for: {remote_url}"); - match strategy { - FetchStrategy::Cli => { - let result = match refspec_strategy { - RefspecStrategy::All => fetch_with_cli(repo, remote_url, refspecs.as_slice(), tags), - RefspecStrategy::First => { - // Try each refspec - let mut errors = refspecs - .iter() - .map_while(|refspec| { - let fetch_result = fetch_with_cli( - repo, - remote_url, - std::slice::from_ref(refspec), - tags, - ); - - // Stop after the first success and log failures - match fetch_result { - Err(ref err) => { - debug!("failed to fetch refspec `{refspec}`: {err}"); - Some(fetch_result) - } - Ok(()) => None, - } - }) - .collect::>(); - - if errors.len() == refspecs.len() { - if let Some(result) = errors.pop() { - // Use the last error for the message - result - } else { - // Can only occur if there were no refspecs to fetch - Ok(()) - } - } else { - Ok(()) - } - } - }; - match reference { - // With the default branch, adding context is confusing - GitReference::DefaultBranch => result, - _ => result.with_context(|| { - format!( - "failed to fetch {} `{}`", - reference.kind_str(), - reference.as_rev() - ) - }), - } - } - FetchStrategy::Libgit2 => { - // Libgit2 does not fail if a refspec is missing, so the `refspec_strategy` - // is not handled here - - let git_config = git2::Config::open_default()?; - with_fetch_options(&git_config, remote_url, &mut |mut opts| { - if tags { - opts.download_tags(git2::AutotagOption::All); - } - - // The `fetch` operation here may fail spuriously due to a corrupt - // repository. It could also fail, however, for a whole slew of other - // reasons (aka network related reasons). We want Cargo to automatically - // recover from corrupt repositories, but we don't want Cargo to stomp - // over other legitimate errors. - // - // Consequently we save off the error of the `fetch` operation and if it - // looks like a "corrupt repo" error then we blow away the repo and try - // again. If it looks like any other kind of error, or if we've already - // blown away the repository, then we want to return the error as-is. - let mut repo_reinitialized = false; - loop { - debug!("initiating fetch of {refspecs:?} from {remote_url}"); - let res = - repo.remote_anonymous(remote_url)? - .fetch(&refspecs, Some(&mut opts), None); - let err = match res { - Ok(()) => break, - Err(e) => e, - }; - debug!("fetch failed: {}", err); - - if !repo_reinitialized - && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb) - { - repo_reinitialized = true; - debug!( - "looks like this is a corrupt repository, reinitializing \ - and trying again" - ); - if reinitialize(repo).is_ok() { - continue; + let result = match refspec_strategy { + RefspecStrategy::All => fetch_with_cli(repo, remote_url, refspecs.as_slice(), tags), + RefspecStrategy::First => { + // Try each refspec + let mut errors = refspecs + .iter() + .map_while(|refspec| { + let fetch_result = + fetch_with_cli(repo, remote_url, std::slice::from_ref(refspec), tags); + + // Stop after the first success and log failures + match fetch_result { + Err(ref err) => { + debug!("failed to fetch refspec `{refspec}`: {err}"); + Some(fetch_result) } + Ok(()) => None, } + }) + .collect::>(); - return Err(err.into()); + if errors.len() == refspecs.len() { + if let Some(result) = errors.pop() { + // Use the last error for the message + result + } else { + // Can only occur if there were no refspecs to fetch + Ok(()) } + } else { Ok(()) - }) + } } + }; + match reference { + // With the default branch, adding context is confusing + GitReference::DefaultBranch => result, + _ => result.with_context(|| { + format!( + "failed to fetch {} `{}`", + reference.kind_str(), + reference.as_rev() + ) + }), } } -/// Attempts to use `git` CLI installed on the system to fetch a repository, -/// when the config value [`net.git-fetch-with-cli`][1] is set. -/// -/// Unfortunately `libgit2` is notably lacking in the realm of authentication -/// when compared to the `git` command line. As a result, allow an escape -/// hatch for users that would prefer to use `git`-the-CLI for fetching -/// repositories instead of `libgit2`-the-library. This should make more -/// flavors of authentication possible while also still giving us all the -/// speed and portability of using `libgit2`. -/// -/// [1]: https://doc.rust-lang.org/nightly/cargo/reference/config.html#netgit-fetch-with-cli +/// Attempts to use `git` CLI installed on the system to fetch a repository,. fn fetch_with_cli( - repo: &mut git2::Repository, + repo: &mut GitRepository, url: &str, refspecs: &[String], tags: bool, @@ -1190,151 +603,15 @@ fn fetch_with_cli( .env_remove("GIT_INDEX_FILE") .env_remove("GIT_OBJECT_DIRECTORY") .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES") - .cwd(repo.path()); + .cwd(&repo.path); // We capture the output to avoid streaming it to the user's console during clones. // The required `on...line` callbacks currently do nothing. // The output appears to be included in error messages by default. - cmd.exec_with_streaming(&mut |_| Ok(()), &mut |_| Ok(()), true)?; + cmd.exec_with_output()?; Ok(()) } -/// Attempts to `git gc` a repository. -/// -/// Cargo has a bunch of long-lived git repositories in its global cache and -/// some, like the index, are updated very frequently. Right now each update -/// creates a new "pack file" inside the git database, and over time this can -/// cause bad performance and bad current behavior in libgit2. -/// -/// One pathological use case today is where libgit2 opens hundreds of file -/// descriptors, getting us dangerously close to blowing out the OS limits of -/// how many fds we can have open. This is detailed in [#4403]. -/// -/// To try to combat this problem we attempt a `git gc` here. Note, though, that -/// we may not even have `git` installed on the system! As a result we -/// opportunistically try a `git gc` when the pack directory looks too big, and -/// failing that we just blow away the repository and start over. -/// -/// In theory this shouldn't be too expensive compared to the network request -/// we're about to issue. -/// -/// [#4403]: https://github.com/rust-lang/cargo/issues/4403 -fn maybe_gc_repo(repo: &mut git2::Repository) -> Result<()> { - // Here we arbitrarily declare that if you have more than 100 files in your - // `pack` folder that we need to do a gc. - let entries = if let Ok(e) = repo.path().join("objects/pack").read_dir() { - e.count() - } else { - debug!("skipping gc as pack dir appears gone"); - return Ok(()); - }; - let max = env::var("__CARGO_PACKFILE_LIMIT") - .ok() - .and_then(|s| s.parse::().ok()) - .unwrap_or(100); - if entries < max { - debug!("skipping gc as there's only {} pack files", entries); - return Ok(()); - } - - // First up, try a literal `git gc` by shelling out to git. This is pretty - // likely to fail though as we may not have `git` installed. Note that - // libgit2 doesn't currently implement the gc operation, so there's no - // equivalent there. - match Command::new("git") - .arg("gc") - .current_dir(repo.path()) - .output() - { - Ok(out) => { - debug!( - "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}", - out.status, - String::from_utf8_lossy(&out.stdout), - String::from_utf8_lossy(&out.stderr) - ); - if out.status.success() { - let new = git2::Repository::open(repo.path())?; - *repo = new; - return Ok(()); - } - } - Err(e) => debug!("git-gc failed to spawn: {}", e), - } - - // Alright all else failed, let's start over. - reinitialize(repo) -} - -/// Removes temporary files left from previous activity. -/// -/// If libgit2 is interrupted while indexing pack files, it will leave behind -/// some temporary files that it doesn't clean up. These can be quite large in -/// size, so this tries to clean things up. -/// -/// This intentionally ignores errors. This is only an opportunistic cleaning, -/// and we don't really care if there are issues (there's unlikely anything -/// that can be done). -/// -/// The git CLI has similar behavior (its temp files look like -/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git -/// prune` which is run by `git gc`. However, it doesn't know about libgit2's -/// filenames, so they never get cleaned up. -fn clean_repo_temp_files(repo: &git2::Repository) { - let path = repo.path().join("objects/pack/pack_git2_*"); - let Some(pattern) = path.to_str() else { - warn!("cannot convert {path:?} to a string"); - return; - }; - let Ok(paths) = glob::glob(pattern) else { - return; - }; - for path in paths.flatten() { - match paths::remove_file(&path) { - Ok(()) => debug!("removed stale temp git file {path:?}"), - Err(e) => { - warn!("failed to remove {path:?} while cleaning temp files: {e}"); - } - } - } -} - -/// Reinitializes a given Git repository. This is useful when a Git repository -/// seems corrupted and we want to start over. -fn reinitialize(repo: &mut git2::Repository) -> Result<()> { - // Here we want to drop the current repository object pointed to by `repo`, - // so we initialize temporary repository in a sub-folder, blow away the - // existing git folder, and then recreate the git repo. Finally we blow away - // the `tmp` folder we allocated. - let path = repo.path().to_path_buf(); - debug!("reinitializing git repo at {:?}", path); - let tmp = path.join("tmp"); - let bare = !repo.path().ends_with(".git"); - *repo = init(&tmp, false)?; - for entry in path.read_dir()? { - let entry = entry?; - if entry.file_name().to_str() == Some("tmp") { - continue; - } - let path = entry.path(); - drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path))); - } - *repo = init(&path, bare)?; - paths::remove_dir_all(&tmp)?; - Ok(()) -} - -/// Initializes a Git repository at `path`. -fn init(path: &Path, bare: bool) -> Result { - let mut opts = git2::RepositoryInitOptions::new(); - // Skip anything related to templates, they just call all sorts of issues as - // we really don't want to use them yet they insist on being used. See #6240 - // for an example issue that comes up. - opts.external_template(false); - opts.bare(bare); - Ok(git2::Repository::init_opts(path, &opts)?) -} - /// The result of GitHub fast path check. See [`github_fast_path`] for more. enum FastPathRev { /// The local rev (determined by `reference.resolve(repo)`) is already up to @@ -1342,7 +619,7 @@ enum FastPathRev { UpToDate, /// The following SHA must be fetched in order for the local rev to become /// up to date. - NeedsFetch(git2::Oid), + NeedsFetch(GitOid), /// Don't know whether local rev is up to date. We'll fetch _all_ branches /// and tags from the server and see what happens. Indeterminate, @@ -1362,7 +639,7 @@ enum FastPathRev { /// /// [^1]: fn github_fast_path( - repo: &mut git2::Repository, + repo: &mut GitRepository, url: &str, reference: &GitReference, client: &Client, @@ -1399,8 +676,8 @@ fn github_fast_path( // but is not a short hash of the found object, it's probably a // branch and we also need to get a hash from GitHub, in case // the branch has moved. - if let Some(local_object) = local_object { - if is_short_hash_of(rev, local_object) { + if let Some(ref local_object) = local_object { + if is_short_hash_of(rev, *local_object) { return Ok(FastPathRev::UpToDate); } } @@ -1450,7 +727,7 @@ fn github_fast_path( if response_code == StatusCode::NOT_MODIFIED { Ok(FastPathRev::UpToDate) } else if response_code == StatusCode::OK { - let oid_to_fetch = response.text().await?.parse::()?; + let oid_to_fetch = response.text().await?.parse()?; Ok(FastPathRev::NeedsFetch(oid_to_fetch)) } else { // Usually response_code == 404 if the repository does not exist, and @@ -1472,109 +749,10 @@ fn looks_like_commit_hash(rev: &str) -> bool { } /// Whether `rev` is a shorter hash of `oid`. -fn is_short_hash_of(rev: &str, oid: git2::Oid) -> bool { +fn is_short_hash_of(rev: &str, oid: GitOid) -> bool { let long_hash = oid.to_string(); match long_hash.get(..rev.len()) { Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev), None => false, } } - -#[cfg(test)] -mod tests { - use super::absolute_submodule_url; - - #[test] - fn test_absolute_submodule_url() { - let cases = [ - ( - "ssh://git@gitub.com/rust-lang/cargo", - "git@github.com:rust-lang/cargo.git", - "git@github.com:rust-lang/cargo.git", - ), - ( - "ssh://git@gitub.com/rust-lang/cargo", - "./", - "ssh://git@gitub.com/rust-lang/cargo/", - ), - ( - "ssh://git@gitub.com/rust-lang/cargo", - "../", - "ssh://git@gitub.com/rust-lang/", - ), - ( - "ssh://git@gitub.com/rust-lang/cargo", - "./foo", - "ssh://git@gitub.com/rust-lang/cargo/foo", - ), - ( - "ssh://git@gitub.com/rust-lang/cargo/", - "./foo", - "ssh://git@gitub.com/rust-lang/cargo/foo", - ), - ( - "ssh://git@gitub.com/rust-lang/cargo/", - "../foo", - "ssh://git@gitub.com/rust-lang/foo", - ), - ( - "ssh://git@gitub.com/rust-lang/cargo", - "../foo", - "ssh://git@gitub.com/rust-lang/foo", - ), - ( - "ssh://git@gitub.com/rust-lang/cargo", - "../foo/bar/../baz", - "ssh://git@gitub.com/rust-lang/foo/baz", - ), - ( - "git@github.com:rust-lang/cargo.git", - "ssh://git@gitub.com/rust-lang/cargo", - "ssh://git@gitub.com/rust-lang/cargo", - ), - ( - "git@github.com:rust-lang/cargo.git", - "./", - "git@github.com:rust-lang/cargo.git/./", - ), - ( - "git@github.com:rust-lang/cargo.git", - "../", - "git@github.com:rust-lang/cargo.git/../", - ), - ( - "git@github.com:rust-lang/cargo.git", - "./foo", - "git@github.com:rust-lang/cargo.git/./foo", - ), - ( - "git@github.com:rust-lang/cargo.git/", - "./foo", - "git@github.com:rust-lang/cargo.git/./foo", - ), - ( - "git@github.com:rust-lang/cargo.git", - "../foo", - "git@github.com:rust-lang/cargo.git/../foo", - ), - ( - "git@github.com:rust-lang/cargo.git/", - "../foo", - "git@github.com:rust-lang/cargo.git/../foo", - ), - ( - "git@github.com:rust-lang/cargo.git", - "../foo/bar/../baz", - "git@github.com:rust-lang/cargo.git/../foo/bar/../baz", - ), - ]; - - for (base_url, submodule_url, expected) in cases { - let url = absolute_submodule_url(base_url, submodule_url).unwrap(); - assert_eq!( - expected, url, - "base `{base_url}`; submodule `{submodule_url}`" - ); - } - } -} diff --git a/crates/uv-git/src/known_hosts.rs b/crates/uv-git/src/known_hosts.rs deleted file mode 100644 index a7c82d4f48a7..000000000000 --- a/crates/uv-git/src/known_hosts.rs +++ /dev/null @@ -1,872 +0,0 @@ -//! Git support is derived from Cargo's implementation. -//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice. -//! Source: -//! -//! SSH host key validation support. -//! -//! The only public item in this module is [`certificate_check`], -//! which provides a callback to [`git2::RemoteCallbacks::certificate_check`]. -//! -//! A primary goal with this implementation is to provide user-friendly error -//! messages, guiding them to understand the issue and how to resolve it. -//! -//! Note that there are a lot of limitations here. This reads OpenSSH -//! `known_hosts` files from well-known locations, but it does not read OpenSSH -//! config files. The config file can change the behavior of how OpenSSH -//! handles `known_hosts` files. For example, some things we don't handle: -//! -//! - `GlobalKnownHostsFile` — Changes the location of the global host file. -//! - `UserKnownHostsFile` — Changes the location of the user's host file. -//! - `KnownHostsCommand` — A command to fetch known hosts. -//! - `CheckHostIP` — DNS spoofing checks. -//! - `VisualHostKey` — Shows a visual ascii-art key. -//! - `VerifyHostKeyDNS` — Uses SSHFP DNS records to fetch a host key. -//! -//! There's also a number of things that aren't supported but could be easily -//! added (it just adds a little complexity). For example, hostname patterns, -//! and revoked markers. See "FIXME" comments littered in this file. - -use base64::engine::general_purpose::STANDARD; -use base64::engine::general_purpose::STANDARD_NO_PAD; -use base64::Engine as _; -use git2::cert::{Cert, SshHostKeyType}; -use git2::CertificateCheckStatus; -use hmac::Mac; -use std::collections::HashSet; -use std::fmt::{Display, Write}; -use std::path::{Path, PathBuf}; - -/// These are host keys that are hard-coded in cargo to provide convenience. -/// -/// If GitHub ever publishes new keys, the user can add them to their own -/// configuration file to use those instead. -/// -/// The GitHub keys are sourced from or -/// . -/// -/// These will be ignored if the user adds their own entries for `github.com`, -/// which can be useful if GitHub ever revokes their old keys. -static BUNDLED_KEYS: &[(&str, &str, &str)] = &[ - ("github.com", "ssh-ed25519", "AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl"), - ("github.com", "ecdsa-sha2-nistp256", "AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg="), - ("github.com", "ssh-rsa", "AAAAB3NzaC1yc2EAAAADAQABAAABgQCj7ndNxQowgcQnjshcLrqPEiiphnt+VTTvDP6mHBL9j1aNUkY4Ue1gvwnGLVlOhGeYrnZaMgRK6+PKCUXaDbC7qtbW8gIkhL7aGCsOr/C56SJMy/BCZfxd1nWzAOxSDPgVsmerOBYfNqltV9/hWCqBywINIR+5dIg6JTJ72pcEpEjcYgXkE2YEFXV1JHnsKgbLWNlhScqb2UmyRkQyytRLtL+38TGxkxCflmO+5Z8CSSNY7GidjMIZ7Q4zMjA2n1nGrlTDkzwDCsw+wqFPGQA179cnfGWOWRVruj16z6XyvxvjJwbz0wQZ75XK5tKSb7FNyeIEs4TT4jk+S4dhPeAUC5y+bDYirYgM4GC7uEnztnZyaVWQ7B381AK4Qdrwt51ZqExKbQpTUNn+EjqoTwvqNj4kqx5QUCI0ThS/YkOxJCXmPUWZbhjpCg56i+2aB6CmK2JGhn57K5mj0MNdBXA4/WnwH6XoPWJzK5Nyu2zB3nAZp+S5hpQs+p1vN1/wsjk="), -]; - -/// List of keys that public hosts have rotated away from. -/// -/// We explicitly distrust these keys as users with the old key in their -/// local configuration will otherwise be vulnerable to MITM attacks if the -/// attacker has access to the old key. As there is no other way to distribute -/// revocations of ssh host keys, we need to bundle them with the client. -/// -/// Unlike [`BUNDLED_KEYS`], these revocations will not be ignored if the user -/// has their own entries: we *know* that these keys are bad. -static BUNDLED_REVOCATIONS: &[(&str, &str, &str)] = &[ - // Used until March 24, 2023: https://github.blog/2023-03-23-we-updated-our-rsa-ssh-host-key/ - ("github.com", "ssh-rsa", "AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ=="), -]; - -enum KnownHostError { - /// Some general error happened while validating the known hosts. - CheckError(anyhow::Error), - /// The host key was not found. - HostKeyNotFound { - hostname: String, - key_type: SshHostKeyType, - remote_host_key: String, - remote_fingerprint: String, - other_hosts: Vec, - }, - /// The host key was found, but does not match the remote's key. - HostKeyHasChanged { - hostname: String, - key_type: SshHostKeyType, - old_known_host: KnownHost, - remote_host_key: String, - remote_fingerprint: String, - }, - /// The host key was found with a @revoked marker, it must not be accepted. - HostKeyRevoked { - hostname: String, - key_type: SshHostKeyType, - remote_host_key: String, - location: KnownHostLocation, - }, - /// The host key was not found, but there was a matching known host with a - /// @cert-authority marker (which Cargo doesn't yet support). - HostHasOnlyCertAuthority { - hostname: String, - location: KnownHostLocation, - }, -} - -impl From for KnownHostError { - fn from(err: anyhow::Error) -> Self { - Self::CheckError(err) - } -} - -/// The location where a host key was located. -#[derive(Clone)] -enum KnownHostLocation { - /// Loaded from a file from disk. - File { path: PathBuf, lineno: usize }, - /// Part of the hard-coded bundled keys in Cargo. - Bundled, -} - -impl Display for KnownHostLocation { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let loc = match self { - Self::File { path, lineno } => { - format!("{} line {lineno}", path.display()) - } - Self::Bundled => "bundled with cargo".to_string(), - }; - f.write_str(&loc) - } -} - -/// The git2 callback used to validate a certificate (only ssh known hosts are validated). -pub(crate) fn certificate_check( - cert: &Cert<'_>, - host: &str, - port: Option, -) -> Result { - let Some(host_key) = cert.as_hostkey() else { - // Return passthrough for TLS X509 certificates to use whatever validation - // was done in git2. - return Ok(CertificateCheckStatus::CertificatePassthrough); - }; - // If a nonstandard port is in use, check for that first. - // The fallback to check without a port is handled in the HostKeyNotFound handler. - let host_maybe_port = match port { - Some(port) if port != 22 => format!("[{host}]:{port}"), - _ => host.to_string(), - }; - // The error message must be constructed as a string to pass through the libgit2 C API. - let err_msg = match check_ssh_known_hosts(host_key, &host_maybe_port) { - Ok(()) => { - return Ok(CertificateCheckStatus::CertificateOk); - } - Err(KnownHostError::CheckError(e)) => { - format!("error: failed to validate host key:\n{e:#}") - } - Err(KnownHostError::HostKeyNotFound { - hostname, - key_type, - remote_host_key, - remote_fingerprint, - other_hosts, - }) => { - // Try checking without the port. - if port.is_some() - && !matches!(port, Some(22)) - && check_ssh_known_hosts(host_key, host).is_ok() - { - return Ok(CertificateCheckStatus::CertificateOk); - } - let key_type_short_name = key_type.short_name(); - let key_type_name = key_type.name(); - let other_hosts_message = if other_hosts.is_empty() { - String::new() - } else { - let mut msg = String::from( - "Note: This host key was found, \ - but is associated with a different host:\n", - ); - for known_host in other_hosts { - writeln!( - msg, - " {loc}: {patterns}", - loc = known_host.location, - patterns = known_host.patterns - ) - .unwrap(); - } - msg - }; - format!("error: unknown SSH host key\n\ - The SSH host key for `{hostname}` is not known and cannot be validated.\n\ - \n\ - To resolve this issue, add the host key to the list of known hosts.\n\ - \n\ - The key to add is:\n\ - \n\ - {hostname} {key_type_name} {remote_host_key}\n\ - \n\ - The {key_type_short_name} key fingerprint is: SHA256:{remote_fingerprint}\n\ - This fingerprint should be validated with the server administrator that it is correct.\n\ - {other_hosts_message}\n\ - See https://doc.rust-lang.org/stable/cargo/appendix/git-authentication.html#ssh-known-hosts \ - for more information.\n\ - ") - } - Err(KnownHostError::HostKeyHasChanged { - hostname, - key_type, - old_known_host, - remote_host_key, - remote_fingerprint, - }) => { - let key_type_short_name = key_type.short_name(); - let key_type_name = key_type.name(); - let old_key_resolution = match old_known_host.location { - KnownHostLocation::File { path, lineno } => { - let old_key_location = path.display(); - format!( - "removing the old {key_type_name} key for `{hostname}` \ - located at {old_key_location} line {lineno}, \ - and adding the new key to the list of known hosts.", - ) - } - KnownHostLocation::Bundled => "adding the new key to the list of known hosts.\n\ - The current host key is bundled as part of Cargo." - .to_string(), - }; - format!("error: SSH host key has changed for `{hostname}`\n\ - *********************************\n\ - * WARNING: HOST KEY HAS CHANGED *\n\ - *********************************\n\ - This may be caused by a man-in-the-middle attack, or the \ - server may have changed its host key.\n\ - \n\ - The {key_type_short_name} fingerprint for the key from the remote host is:\n\ - SHA256:{remote_fingerprint}\n\ - \n\ - You are strongly encouraged to contact the server \ - administrator for `{hostname}` to verify that this new key is \ - correct.\n\ - \n\ - If you can verify that the server has a new key, you can \ - resolve this error by {old_key_resolution}\n\ - \n\ - The key provided by the remote host is:\n\ - \n\ - {hostname} {key_type_name} {remote_host_key}\n\ - \n\ - See https://doc.rust-lang.org/stable/cargo/appendix/git-authentication.html#ssh-known-hosts \ - for more information.\n\ - ") - } - Err(KnownHostError::HostKeyRevoked { - hostname, - key_type, - remote_host_key, - location, - }) => { - let key_type_short_name = key_type.short_name(); - format!( - "error: Key has been revoked for `{hostname}`\n\ - **************************************\n\ - * WARNING: REVOKED HOST KEY DETECTED *\n\ - **************************************\n\ - This may indicate that the key provided by this host has been\n\ - compromised and should not be accepted. - \n\ - The host key {key_type_short_name} {remote_host_key} is revoked\n\ - in {location} and has been rejected.\n\ - " - ) - } - Err(KnownHostError::HostHasOnlyCertAuthority { hostname, location }) => { - format!("error: Found a `@cert-authority` marker for `{hostname}`\n\ - \n\ - Cargo doesn't support certificate authorities for host key verification. It is\n\ - recommended that the command line Git client is used instead. This can be achieved\n\ - by setting `net.git-fetch-with-cli` to `true` in the Cargo config.\n\ - \n - The `@cert-authority` line was found in {location}.\n\ - \n\ - See https://doc.rust-lang.org/stable/cargo/appendix/git-authentication.html#ssh-known-hosts \ - for more information.\n\ - ") - } - }; - Err(git2::Error::new( - git2::ErrorCode::GenericError, - git2::ErrorClass::Callback, - err_msg, - )) -} - -/// Checks if the given host/host key pair is known. -#[allow(clippy::result_large_err)] -fn check_ssh_known_hosts( - cert_host_key: &git2::cert::CertHostkey<'_>, - host: &str, -) -> Result<(), KnownHostError> { - let Some(remote_host_key) = cert_host_key.hostkey() else { - return Err(anyhow::format_err!("remote host key is not available").into()); - }; - let remote_key_type = cert_host_key.hostkey_type().unwrap(); - - // Collect all the known host entries from disk. - let mut known_hosts = Vec::new(); - for path in known_host_files() { - if !path.exists() { - continue; - } - let hosts = load_hostfile(&path)?; - known_hosts.extend(hosts); - } - // Load the bundled keys. Don't add keys for hosts that the user has - // configured, which gives them the option to override them. This could be - // useful if the keys are ever revoked. - let configured_hosts: HashSet<_> = known_hosts - .iter() - .flat_map(|known_host| known_host.patterns.split(',').map(str::to_lowercase)) - .collect(); - for (patterns, key_type, key) in BUNDLED_KEYS { - if !configured_hosts.contains(*patterns) { - let key = STANDARD.decode(key).unwrap(); - known_hosts.push(KnownHost { - location: KnownHostLocation::Bundled, - patterns: (*patterns).to_string(), - key_type: (*key_type).to_string(), - key, - line_type: KnownHostLineType::Key, - }); - } - } - for (patterns, key_type, key) in BUNDLED_REVOCATIONS { - let key = STANDARD.decode(key).unwrap(); - known_hosts.push(KnownHost { - location: KnownHostLocation::Bundled, - patterns: (*patterns).to_string(), - key_type: (*key_type).to_string(), - key, - line_type: KnownHostLineType::Revoked, - }); - } - check_ssh_known_hosts_loaded(&known_hosts, host, remote_key_type, remote_host_key) -} - -/// Checks a host key against a loaded set of known hosts. -#[allow(clippy::result_large_err)] -fn check_ssh_known_hosts_loaded( - known_hosts: &[KnownHost], - host: &str, - remote_key_type: SshHostKeyType, - remote_host_key: &[u8], -) -> Result<(), KnownHostError> { - // `latent_error` keeps track of a potential error that will be returned - // in case a matching host key isn't found. - let mut latent_errors: Vec = Vec::new(); - - // `other_hosts` keeps track of any entries that have an identical key, - // but a different hostname. - let mut other_hosts = Vec::new(); - - // `accepted_known_host_found` keeps track of whether we've found a matching - // line in the `known_hosts` file that we would accept. We can't return that - // immediately, because there may be a subsequent @revoked key. - let mut accepted_known_host_found = false; - - // Older versions of OpenSSH (before 6.8, March 2015) showed MD5 - // fingerprints (see FingerprintHash ssh config option). Here we only - // support SHA256. - let mut remote_fingerprint = cargo_util::Sha256::new(); - remote_fingerprint.update(remote_host_key); - let remote_fingerprint = STANDARD_NO_PAD.encode(remote_fingerprint.finish()); - let remote_host_key_encoded = STANDARD.encode(remote_host_key); - - for known_host in known_hosts { - // The key type from libgit2 needs to match the key type from the host file. - if known_host.key_type != remote_key_type.name() { - continue; - } - let key_matches = known_host.key == remote_host_key; - if !known_host.host_matches(host) { - if key_matches { - other_hosts.push(known_host.clone()); - } - continue; - } - match known_host.line_type { - KnownHostLineType::Key => { - if key_matches { - accepted_known_host_found = true; - } else { - // The host and key type matched, but the key itself did not. - // This indicates the key has changed. - // This is only reported as an error if no subsequent lines have a - // correct key. - latent_errors.push(KnownHostError::HostKeyHasChanged { - hostname: host.to_string(), - key_type: remote_key_type, - old_known_host: known_host.clone(), - remote_host_key: remote_host_key_encoded.clone(), - remote_fingerprint: remote_fingerprint.clone(), - }); - } - } - KnownHostLineType::Revoked => { - if key_matches { - return Err(KnownHostError::HostKeyRevoked { - hostname: host.to_string(), - key_type: remote_key_type, - remote_host_key: remote_host_key_encoded, - location: known_host.location.clone(), - }); - } - } - KnownHostLineType::CertAuthority => { - // The host matches a @cert-authority line, which is unsupported. - latent_errors.push(KnownHostError::HostHasOnlyCertAuthority { - hostname: host.to_string(), - location: known_host.location.clone(), - }); - } - } - } - - // We have an accepted host key and it hasn't been revoked. - if accepted_known_host_found { - return Ok(()); - } - - if latent_errors.is_empty() { - // FIXME: Ideally the error message should include the IP address of the - // remote host (to help the user validate that they are connecting to the - // host they were expecting to). However, I don't see a way to obtain that - // information from libgit2. - Err(KnownHostError::HostKeyNotFound { - hostname: host.to_string(), - key_type: remote_key_type, - remote_host_key: remote_host_key_encoded, - remote_fingerprint, - other_hosts, - }) - } else { - // We're going to take the first HostKeyHasChanged error if - // we find one, otherwise we'll take the first error (which - // we expect to be a CertAuthority error). - if let Some(index) = latent_errors - .iter() - .position(|e| matches!(e, KnownHostError::HostKeyHasChanged { .. })) - { - Err(latent_errors.remove(index)) - } else { - // Otherwise, we take the first error (which we expect to be - // a CertAuthority error). - Err(latent_errors.pop().unwrap()) - } - } -} - -/// Returns a list of files to try loading OpenSSH-formatted known hosts. -fn known_host_files() -> Vec { - let mut result = Vec::new(); - if cfg!(unix) { - result.push(PathBuf::from("/etc/ssh/ssh_known_hosts")); - } else if cfg!(windows) { - // The msys/cygwin version of OpenSSH uses `/etc` from the posix root - // filesystem there (such as `C:\msys64\etc\ssh\ssh_known_hosts`). - // However, I do not know of a way to obtain that location from - // Windows-land. The ProgramData version here is what the PowerShell - // port of OpenSSH does. - if let Some(progdata) = std::env::var_os("ProgramData") { - let mut progdata = PathBuf::from(progdata); - progdata.push("ssh"); - progdata.push("ssh_known_hosts"); - result.push(progdata); - } - } - result.extend(user_known_host_location()); - result -} - -/// The location of the user's `known_hosts` file. -fn user_known_host_location() -> Option { - // NOTE: This is a potentially inaccurate prediction of what the user - // actually wants. The actual location depends on several factors: - // - // - Windows OpenSSH Powershell version: I believe this looks up the home - // directory via ProfileImagePath in the registry, falling back to - // `GetWindowsDirectoryW` if that fails. - // - OpenSSH Portable (under msys): This is very complicated. I got lost - // after following it through some ldap/active directory stuff. - // - OpenSSH (most unix platforms): Uses `pw->pw_dir` from `getpwuid()`. - // - // This doesn't do anything close to that. home_dir's behavior is: - // - Windows: $USERPROFILE, or SHGetKnownFolderPath() - // - Unix: $HOME, or getpwuid_r() - // - // Since there is a mismatch here, the location returned here might be - // different than what the user's `ssh` CLI command uses. We may want to - // consider trying to align it better. - home::home_dir().map(|mut home| { - home.push(".ssh"); - home.push("known_hosts"); - home - }) -} - -const HASH_HOSTNAME_PREFIX: &str = "|1|"; - -#[derive(Clone)] -enum KnownHostLineType { - Key, - CertAuthority, - Revoked, -} - -/// A single known host entry. -#[derive(Clone)] -struct KnownHost { - location: KnownHostLocation, - /// The hostname. May be comma separated to match multiple hosts. - patterns: String, - key_type: String, - key: Vec, - line_type: KnownHostLineType, -} - -impl KnownHost { - /// Returns whether or not the given host matches this known host entry. - fn host_matches(&self, host: &str) -> bool { - let mut match_found = false; - let host = host.to_lowercase(); - if let Some(hashed) = self.patterns.strip_prefix(HASH_HOSTNAME_PREFIX) { - return hashed_hostname_matches(&host, hashed); - } - for pattern in self.patterns.split(',') { - let pattern = pattern.to_lowercase(); - // FIXME: support * and ? wildcards - if let Some(pattern) = pattern.strip_prefix('!') { - if pattern == host { - return false; - } - } else { - match_found |= pattern == host; - } - } - match_found - } -} - -fn hashed_hostname_matches(host: &str, hashed: &str) -> bool { - let Some((b64_salt, b64_host)) = hashed.split_once('|') else { - return false; - }; - let Ok(salt) = STANDARD.decode(b64_salt) else { - return false; - }; - let Ok(hashed_host) = STANDARD.decode(b64_host) else { - return false; - }; - let Ok(mut mac) = hmac::Hmac::::new_from_slice(&salt) else { - return false; - }; - mac.update(host.as_bytes()); - let result = mac.finalize().into_bytes(); - hashed_host == result[..] -} - -/// Loads an OpenSSH `known_hosts` file. -fn load_hostfile(path: &Path) -> Result, anyhow::Error> { - let contents = cargo_util::paths::read(path)?; - Ok(load_hostfile_contents(path, &contents)) -} - -fn load_hostfile_contents(path: &Path, contents: &str) -> Vec { - let entries = contents - .lines() - .enumerate() - .filter_map(|(lineno, line)| { - let location = KnownHostLocation::File { - path: path.to_path_buf(), - lineno: lineno + 1, - }; - parse_known_hosts_line(line, location) - }) - .collect(); - entries -} - -fn parse_known_hosts_line(line: &str, location: KnownHostLocation) -> Option { - let line = line.trim(); - if line.is_empty() || line.starts_with('#') { - return None; - } - let mut parts = line.split([' ', '\t']).filter(|s| !s.is_empty()); - - let line_type = if line.starts_with('@') { - let line_type = parts.next()?; - - if line_type == "@cert-authority" { - KnownHostLineType::CertAuthority - } else if line_type == "@revoked" { - KnownHostLineType::Revoked - } else { - // No other markers are defined - return None; - } - } else { - KnownHostLineType::Key - }; - - let patterns = parts.next()?; - let key_type = parts.next()?; - let key = parts.next().map(|p| STANDARD.decode(p))?.ok()?; - Some(KnownHost { - line_type, - location, - patterns: patterns.to_string(), - key_type: key_type.to_string(), - key, - }) -} - -#[cfg(test)] -mod tests { - use super::*; - - static COMMON_CONTENTS: &str = r" - # Comments allowed at start of line - - example.com,rust-lang.org ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC5MzWIpZwpkpDjyCNiTIEVFhSA9OUUQvjFo7CgZBGCAj/cqeUIgiLsgtfmtBsfWIkAECQpM7ePP7NLZFGJcHvoyg5jXJiIX5s0eKo9IlcuTLLrMkW5MkHXE7bNklVbW1WdCfF2+y7Ao25B4L8FFRokMh0yp/H6+8xZ7PdVwL3FRPEg8ftZ5R0kuups6xiMHPRX+f/07vfJzA47YDPmXfhkn+JK8kL0JYw8iy8BtNBfRQL99d9iXJzWXnNce5NHMuKD5rOonD3aQHLDlwK+KhrFRrdaxQEM8ZWxNti0ux8yT4Dl5jJY0CrIu3Xl6+qroVgTqJGNkTbhs5DGWdFh6BLPTTH15rN4buisg7uMyLyHqx06ckborqD33gWu+Jig7O+PV6KJmL5mp1O1HXvZqkpBdTiT6GiDKG3oECCIXkUk0BSU9VG9VQcrMxxvgiHlyoXUAfYQoXv/lnxkTnm+Sr36kutsVOs7n5B43ZKAeuaxyQ11huJZpxamc0RA1HM641s= eric@host - Example.net ssh-dss AAAAB3NzaC1kc3MAAACBAK2Ek3jVxisXmz5UcZ7W65BAj/nDJCCVvSe0Aytndn4PH6k7sVesut5OoY6PdksZ9tEfuFjjS9HR5SJb8j1GW0GxtaSHHbf+rNc36PeU75bffzyIWwpA8uZFONt5swUAXJXcsHOoapNbUFuhHsRhB2hXxz9QGNiiwIwRJeSHixKRAAAAFQChKfxO1z9H2/757697xP5nJ/Z5dwAAAIEAoc+HIWas+4WowtB/KtAp6XE0B9oHI+55wKtdcGwwb7zHKK9scWNXwxIcMhSvyB3Oe2I7dQQlvyIWxsdZlzOkX0wdsTHjIAnBAP68MyvMv4kq3+I5GAVcFsqoLZfZvh0dlcgUq1/YNYZwKlt89tnzk8Fp4KLWmuw8Bd8IShYVa78AAACAL3qd8kNTY7CthgsQ8iWdjbkGSF/1KCeFyt8UjurInp9wvPDjqagwakbyLOzN7y3/ItTPCaGuX+RjFP0zZTf8i9bsAVyjFJiJ7vzRXcWytuFWANrpzLTn1qzPfh63iK92Aw8AVBYvEA/4bxo+XReAvhNBB/m78G6OedTeu6ZoTsI= eric@host - [example.net]:2222 ssh-dss AAAAB3NzaC1kc3MAAACBAJJN5kLZEpOJpXWyMT4KwYvLAj+b9ErNtglxOi86C6Kw7oZeYdDMCfD3lc3PJyX64udQcWGfO4abSESMiYdY43yFAZH279QGH5Q/B5CklVvTqYpfAUR+1r9TQxy3OVQHk7FB2wOi4xNQ3myO0vaYlBOB9il+P223aERbXx4JTWdvAAAAFQCTHWTcXxLK5Z6ZVPmfdSDyHzkF2wAAAIEAhp41/mTnM0Y0EWSyCXuETMW1QSpKGF8sqoZKp6wdzyhLXu0i32gLdXj4p24em/jObYh93hr+MwgxqWq+FHgD+D80Qg5f6vj4yEl4Uu5hqtTpCBFWUQoyEckbUkPf8uZ4/XzAne+tUSjZm09xATCmK9U2IGqZE+D+90eBkf1Svc8AAACAeKhi4EtfwenFYqKz60ZoEEhIsE1yI2jH73akHnfHpcW84w+fk3YlwjcfDfyYso+D0jZBdJeK5qIdkbUWhAX8wDjJVO0WL6r/YPr4yu/CgEyW1H59tAbujGJ4NR0JDqioulzYqNHnxpiw1RJukZnPBfSFKzRElvPOCq/NkQM/Mwk= eric@host - nistp256.example.org ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBJ4iYGCcJrUIfrHfzlsv8e8kaF36qpcUpe3VNAKVCZX/BDptIdlEe8u8vKNRTPgUO9jqS0+tjTcPiQd8/8I9qng= eric@host - nistp384.example.org ecdsa-sha2-nistp384 AAAAE2VjZHNhLXNoYTItbmlzdHAzODQAAAAIbmlzdHAzODQAAABhBNuGT3TqMz2rcwOt2ZqkiNqq7dvWPE66W2qPCoZsh0pQhVU3BnhKIc6nEr6+Wts0Z3jdF3QWwxbbTjbVTVhdr8fMCFhDCWiQFm9xLerYPKnu9qHvx9K87/fjc5+0pu4hLA== eric@host - nistp521.example.org ecdsa-sha2-nistp521 AAAAE2VjZHNhLXNoYTItbmlzdHA1MjEAAAAIbmlzdHA1MjEAAACFBAD35HH6OsK4DN75BrKipVj/GvZaUzjPNa1F8wMjUdPB1JlVcUfgzJjWSxrhmaNN3u0soiZw8WNRFINsGPCw5E7DywF1689WcIj2Ye2rcy99je15FknScTzBBD04JgIyOI50mCUaPCBoF14vFlN6BmO00cFo+yzy5N8GuQ2sx9kr21xmFQ== eric@host - # Revoked is supported, but without Cert-Authority support, it will only negate some other fixed key. - @revoked revoked.example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKtQsi+KPYispwm2rkMidQf30fG1Niy8XNkvASfePoca eric@host - # Cert-Authority is not supported (below key should not be valid anyway) - @cert-authority ca.example.com ssh-rsa AABBB5Wm - example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAWkjI6XT2SZh3xNk5NhisA3o3sGzWR+VAKMSqHtI0aY eric@host - 192.168.42.12 ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKVYJpa0yUGaNk0NXQTPWa0tHjqRpx+7hl2diReH6DtR eric@host - |1|QxzZoTXIWLhUsuHAXjuDMIV3FjQ=|M6NCOIkjiWdCWqkh5+Q+/uFLGjs= ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIIHgN3O21U4LWtP5OzjTzPnUnSDmCNDvyvlaj6Hi65JC eric@host - # Negation isn't terribly useful without globs. - neg.example.com,!neg.example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOXfUnaAHTlo1Qi//rNk26OcmHikmkns1Z6WW/UuuS3K eric@host - "; - - #[test] - fn known_hosts_parse() { - let kh_path = Path::new("/home/abc/.known_hosts"); - let khs = load_hostfile_contents(kh_path, COMMON_CONTENTS); - assert_eq!(khs.len(), 12); - match &khs[0].location { - KnownHostLocation::File { path, lineno } => { - assert_eq!(path, kh_path); - assert_eq!(*lineno, 4); - } - KnownHostLocation::Bundled => panic!("unexpected"), - } - assert_eq!(khs[0].patterns, "example.com,rust-lang.org"); - assert_eq!(khs[0].key_type, "ssh-rsa"); - assert_eq!(khs[0].key.len(), 407); - assert_eq!(&khs[0].key[..30], b"\x00\x00\x00\x07ssh-rsa\x00\x00\x00\x03\x01\x00\x01\x00\x00\x01\x81\x00\xb935\x88\xa5\x9c)"); - match &khs[1].location { - KnownHostLocation::File { path, lineno } => { - assert_eq!(path, kh_path); - assert_eq!(*lineno, 5); - } - KnownHostLocation::Bundled => panic!("unexpected"), - } - assert_eq!(khs[2].patterns, "[example.net]:2222"); - assert_eq!(khs[3].patterns, "nistp256.example.org"); - assert_eq!(khs[9].patterns, "192.168.42.12"); - } - - #[test] - fn host_matches() { - let kh_path = Path::new("/home/abc/.known_hosts"); - let khs = load_hostfile_contents(kh_path, COMMON_CONTENTS); - assert!(khs[0].host_matches("example.com")); - assert!(khs[0].host_matches("rust-lang.org")); - assert!(khs[0].host_matches("EXAMPLE.COM")); - assert!(khs[1].host_matches("example.net")); - assert!(!khs[0].host_matches("example.net")); - assert!(khs[2].host_matches("[example.net]:2222")); - assert!(!khs[2].host_matches("example.net")); - assert!(khs[10].host_matches("hashed.example.com")); - assert!(!khs[10].host_matches("example.com")); - assert!(!khs[11].host_matches("neg.example.com")); - } - - #[test] - fn check_match() { - let kh_path = Path::new("/home/abc/.known_hosts"); - let khs = load_hostfile_contents(kh_path, COMMON_CONTENTS); - - assert!(check_ssh_known_hosts_loaded( - &khs, - "example.com", - SshHostKeyType::Rsa, - &khs[0].key - ) - .is_ok()); - - match check_ssh_known_hosts_loaded(&khs, "example.com", SshHostKeyType::Dss, &khs[0].key) { - Err(KnownHostError::HostKeyNotFound { - hostname, - remote_fingerprint, - other_hosts, - .. - }) => { - assert_eq!( - remote_fingerprint, - "yn+pONDn0EcgdOCVptgB4RZd/wqmsVKrPnQMLtrvhw8" - ); - assert_eq!(hostname, "example.com"); - assert_eq!(other_hosts.len(), 0); - } - _ => panic!("unexpected"), - } - - match check_ssh_known_hosts_loaded( - &khs, - "foo.example.com", - SshHostKeyType::Rsa, - &khs[0].key, - ) { - Err(KnownHostError::HostKeyNotFound { other_hosts, .. }) => { - assert_eq!(other_hosts.len(), 1); - assert_eq!(other_hosts[0].patterns, "example.com,rust-lang.org"); - } - _ => panic!("unexpected"), - } - - let mut modified_key = khs[0].key.clone(); - modified_key[0] = 1; - match check_ssh_known_hosts_loaded(&khs, "example.com", SshHostKeyType::Rsa, &modified_key) - { - Err(KnownHostError::HostKeyHasChanged { old_known_host, .. }) => { - assert!(matches!( - old_known_host.location, - KnownHostLocation::File { lineno: 4, .. } - )); - } - _ => panic!("unexpected"), - } - } - - #[test] - fn revoked() { - let kh_path = Path::new("/home/abc/.known_hosts"); - let khs = load_hostfile_contents(kh_path, COMMON_CONTENTS); - - match check_ssh_known_hosts_loaded( - &khs, - "revoked.example.com", - SshHostKeyType::Ed255219, - &khs[6].key, - ) { - Err(KnownHostError::HostKeyRevoked { - hostname, location, .. - }) => { - assert_eq!("revoked.example.com", hostname); - assert!(matches!( - location, - KnownHostLocation::File { lineno: 11, .. } - )); - } - _ => panic!("Expected key to be revoked for revoked.example.com."), - } - } - - #[test] - fn cert_authority() { - let kh_path = Path::new("/home/abc/.known_hosts"); - let khs = load_hostfile_contents(kh_path, COMMON_CONTENTS); - - match check_ssh_known_hosts_loaded( - &khs, - "ca.example.com", - SshHostKeyType::Rsa, - &khs[0].key, // The key should not matter - ) { - Err(KnownHostError::HostHasOnlyCertAuthority { - hostname, location, .. - }) => { - assert_eq!("ca.example.com", hostname); - assert!(matches!( - location, - KnownHostLocation::File { lineno: 13, .. } - )); - } - Err(KnownHostError::HostKeyNotFound { hostname, .. }) => { - panic!("host key not found... {hostname}"); - } - _ => panic!("Expected host to only have @cert-authority line (which is unsupported)."), - } - } - - #[test] - fn multiple_errors() { - let contents = r" - not-used.example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAWkjI6XT2SZh3xNk5NhisA3o3sGzWR+VAKMSqHtI0aY eric@host - # Cert-authority and changed key for the same host - changed key error should prevail - @cert-authority example.com ssh-ed25519 AABBB5Wm - example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKVYJpa0yUGaNk0NXQTPWa0tHjqRpx+7hl2diReH6DtR eric@host - "; - - let kh_path = Path::new("/home/abc/.known_hosts"); - let khs = load_hostfile_contents(kh_path, contents); - - match check_ssh_known_hosts_loaded( - &khs, - "example.com", - SshHostKeyType::Ed255219, - &khs[0].key, - ) { - Err(KnownHostError::HostKeyHasChanged { - hostname, - old_known_host, - remote_host_key, - .. - }) => { - assert_eq!("example.com", hostname); - assert_eq!( - "AAAAC3NzaC1lZDI1NTE5AAAAIAWkjI6XT2SZh3xNk5NhisA3o3sGzWR+VAKMSqHtI0aY", - remote_host_key - ); - assert!(matches!( - old_known_host.location, - KnownHostLocation::File { lineno: 5, .. } - )); - } - _ => panic!("Expected error to be of type HostKeyHasChanged."), - } - } - - #[test] - fn known_host_and_revoked() { - let contents = r" - example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKVYJpa0yUGaNk0NXQTPWa0tHjqRpx+7hl2diReH6DtR eric@host - # Later in the file the same host key is revoked - @revoked example.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKVYJpa0yUGaNk0NXQTPWa0tHjqRpx+7hl2diReH6DtR eric@host - "; - - let kh_path = Path::new("/home/abc/.known_hosts"); - let khs = load_hostfile_contents(kh_path, contents); - - match check_ssh_known_hosts_loaded( - &khs, - "example.com", - SshHostKeyType::Ed255219, - &khs[0].key, - ) { - Err(KnownHostError::HostKeyRevoked { - hostname, - remote_host_key, - location, - .. - }) => { - assert_eq!("example.com", hostname); - assert_eq!( - "AAAAC3NzaC1lZDI1NTE5AAAAIKVYJpa0yUGaNk0NXQTPWa0tHjqRpx+7hl2diReH6DtR", - remote_host_key - ); - assert!(matches!( - location, - KnownHostLocation::File { lineno: 4, .. } - )); - } - _ => panic!("Expected host key to be reject with error HostKeyRevoked."), - } - } -} diff --git a/crates/uv-git/src/lib.rs b/crates/uv-git/src/lib.rs index e4caa5ee71ad..a650f8a5e46d 100644 --- a/crates/uv-git/src/lib.rs +++ b/crates/uv-git/src/lib.rs @@ -2,14 +2,12 @@ use std::str::FromStr; use url::Url; pub use crate::git::GitReference; -pub use crate::sha::GitSha; +pub use crate::sha::{GitOid, GitSha, OidParseError}; pub use crate::source::{Fetch, GitSource, Reporter}; mod git; -mod known_hosts; mod sha; mod source; -mod util; /// A URL reference to a Git repository. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Hash, Ord)] @@ -55,7 +53,7 @@ impl GitUrl { } impl TryFrom for GitUrl { - type Error = git2::Error; + type Error = OidParseError; /// Initialize a [`GitUrl`] source from a URL. fn try_from(mut url: Url) -> Result { @@ -121,11 +119,3 @@ impl std::fmt::Display for GitUrl { write!(f, "{}", self.repository) } } - -#[derive(Debug, Clone, Copy)] -pub enum FetchStrategy { - /// Fetch Git repositories using libgit2. - Libgit2, - /// Fetch Git repositories using the `git` CLI. - Cli, -} diff --git a/crates/uv-git/src/sha.rs b/crates/uv-git/src/sha.rs index d4384f5bdbb8..d476c08fe31a 100644 --- a/crates/uv-git/src/sha.rs +++ b/crates/uv-git/src/sha.rs @@ -1,8 +1,11 @@ -use std::str::FromStr; +use std::fmt::Display; +use std::str::{self, FromStr}; + +use thiserror::Error; /// A complete Git SHA, i.e., a 40-character hexadecimal representation of a Git commit. #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct GitSha(git2::Oid); +pub struct GitSha(GitOid); impl GitSha { /// Convert the SHA to a truncated representation, i.e., the first 16 characters of the SHA. @@ -11,14 +14,14 @@ impl GitSha { } } -impl From for git2::Oid { +impl From for GitOid { fn from(value: GitSha) -> Self { value.0 } } -impl From for GitSha { - fn from(value: git2::Oid) -> Self { +impl From for GitSha { + fn from(value: GitOid) -> Self { Self(value) } } @@ -30,9 +33,79 @@ impl std::fmt::Display for GitSha { } impl FromStr for GitSha { - type Err = git2::Error; + type Err = OidParseError; fn from_str(value: &str) -> Result { - Ok(Self(git2::Oid::from_str(value)?)) + Ok(Self(GitOid::from_str(value)?)) + } +} + +/// Unique identity of any Git object (commit, tree, blob, tag). +/// +/// Note this type does not validate whether the input is a valid hash. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct GitOid { + len: usize, + bytes: [u8; 40], +} + +impl GitOid { + /// Return the string representation of an object ID. + pub(crate) fn as_str(&self) -> &str { + str::from_utf8(&self.bytes[..self.len]).unwrap() + } +} + +#[derive(Debug, Error, PartialEq)] +pub enum OidParseError { + #[error("Object ID can be at most 40 hex characters")] + TooLong, + #[error("Object ID cannot be parsed from empty string")] + Empty, +} + +impl FromStr for GitOid { + type Err = OidParseError; + + fn from_str(s: &str) -> Result { + if s.is_empty() { + return Err(OidParseError::Empty); + } + + if s.len() > 40 { + return Err(OidParseError::TooLong); + } + + let mut out = [0; 40]; + out[..s.len()].copy_from_slice(s.as_bytes()); + + Ok(GitOid { + len: s.len(), + bytes: out, + }) + } +} + +impl Display for GitOid { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_str()) + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use super::{GitOid, OidParseError}; + + #[test] + fn git_oid() { + GitOid::from_str("4a23745badf5bf5ef7928f1e346e9986bd696d82").unwrap(); + + assert_eq!(GitOid::from_str(""), Err(OidParseError::Empty)); + assert_eq!( + GitOid::from_str(&str::repeat("a", 41)), + Err(OidParseError::TooLong) + ); } } diff --git a/crates/uv-git/src/source.rs b/crates/uv-git/src/source.rs index 64483d038830..3811438c161b 100644 --- a/crates/uv-git/src/source.rs +++ b/crates/uv-git/src/source.rs @@ -11,7 +11,7 @@ use url::Url; use cache_key::{digest, RepositoryUrl}; use crate::git::GitRemote; -use crate::{FetchStrategy, GitSha, GitUrl}; +use crate::{GitOid, GitSha, GitUrl}; /// A remote Git source that can be checked out locally. pub struct GitSource { @@ -19,8 +19,6 @@ pub struct GitSource { git: GitUrl, /// The HTTP client to use for fetching. client: Client, - /// The fetch strategy to use when cloning. - strategy: FetchStrategy, /// The path to the Git source database. cache: PathBuf, /// The reporter to use for this source. @@ -33,7 +31,6 @@ impl GitSource { Self { git, client: Client::new(), - strategy: FetchStrategy::Cli, cache: cache.into(), reporter: None, } @@ -77,8 +74,7 @@ impl GitSource { &db_path, db, &self.git.reference, - locked_rev.map(git2::Oid::from), - self.strategy, + locked_rev.map(GitOid::from), &self.client, )?; @@ -98,12 +94,8 @@ impl GitSource { .join("checkouts") .join(&ident) .join(short_id.as_str()); - db.copy_to( - actual_rev.into(), - &checkout_path, - self.strategy, - &self.client, - )?; + + db.copy_to(actual_rev.into(), &checkout_path)?; // Report the checkout operation to the reporter. if let Some(task) = task { diff --git a/crates/uv-git/src/util/errors.rs b/crates/uv-git/src/util/errors.rs deleted file mode 100644 index 337461cff174..000000000000 --- a/crates/uv-git/src/util/errors.rs +++ /dev/null @@ -1,45 +0,0 @@ -//! Git support is derived from Cargo's implementation. -//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice. -//! Source: -use std::fmt::{self, Write}; - -use super::truncate_with_ellipsis; - -#[derive(Debug)] -pub(crate) struct HttpNotSuccessful { - pub(crate) code: u32, - pub(crate) url: String, - pub(crate) ip: Option, - pub(crate) body: Vec, -} - -impl HttpNotSuccessful { - fn render(&self) -> String { - let mut result = String::new(); - let body = std::str::from_utf8(&self.body).map_or_else( - |_| format!("[{} non-utf8 bytes]", self.body.len()), - |s| truncate_with_ellipsis(s, 512), - ); - - write!( - result, - "failed to get successful HTTP response from `{}`", - self.url - ) - .unwrap(); - if let Some(ip) = &self.ip { - write!(result, " ({ip})").unwrap(); - } - writeln!(result, ", got {}", self.code).unwrap(); - write!(result, "body:\n{body}").unwrap(); - result - } -} - -impl fmt::Display for HttpNotSuccessful { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.render()) - } -} - -impl std::error::Error for HttpNotSuccessful {} diff --git a/crates/uv-git/src/util/mod.rs b/crates/uv-git/src/util/mod.rs deleted file mode 100644 index 68795f593b37..000000000000 --- a/crates/uv-git/src/util/mod.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! Git support is derived from Cargo's implementation. -//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice. -//! Source: -pub(crate) mod errors; -pub(crate) mod retry; - -pub(crate) fn truncate_with_ellipsis(s: &str, max_width: usize) -> String { - // We should truncate at grapheme-boundary and compute character-widths, - // yet the dependencies on unicode-segmentation and unicode-width are - // not worth it. - let mut chars = s.chars(); - let mut prefix = (&mut chars).take(max_width - 1).collect::(); - if chars.next().is_some() { - prefix.push('…'); - } - prefix -} diff --git a/crates/uv-git/src/util/retry.rs b/crates/uv-git/src/util/retry.rs deleted file mode 100644 index 1bf67ce249df..000000000000 --- a/crates/uv-git/src/util/retry.rs +++ /dev/null @@ -1,188 +0,0 @@ -//! Utilities for retrying a network operation. -//! -//! Some network errors are considered "spurious", meaning it is not a real -//! error (such as a 404 not found) and is likely a transient error (like a -//! bad network connection) that we can hope will resolve itself shortly. The -//! [`Retry`] type offers a way to repeatedly perform some kind of network -//! operation with a delay if it detects one of these possibly transient -//! errors. -//! -//! This supports errors from [`git2`], [`reqwest`], and [`HttpNotSuccessful`] -//! 5xx HTTP errors. -//! -//! The number of retries can be configured by the user via the `net.retry` -//! config option. This indicates the number of times to retry the operation -//! (default 3 times for a total of 4 attempts). -//! -//! There are hard-coded constants that indicate how long to sleep between -//! retries. The constants are tuned to balance a few factors, such as the -//! responsiveness to the user (we don't want cargo to hang for too long -//! retrying things), and accommodating things like Cloudfront's default -//! negative TTL of 10 seconds (if Cloudfront gets a 5xx error for whatever -//! reason it won't try to fetch again for 10 seconds). -//! -//! The timeout also implements a primitive form of random jitter. This is so -//! that if multiple requests fail at the same time that they don't all flood -//! the server at the same time when they are retried. This jitter still has -//! some clumping behavior, but should be good enough. -//! -//! [`Retry`] is the core type for implementing retry logic. The -//! [`Retry::try`] method can be called with a callback, and it will -//! indicate if it needs to be called again sometime in the future if there -//! was a possibly transient error. The caller is responsible for sleeping the -//! appropriate amount of time and then calling [`Retry::try`] again. -//! -//! [`with_retry`] is a convenience function that will create a [`Retry`] and -//! handle repeatedly running a callback until it succeeds, or it runs out of -//! retries. -//! -//! Some interesting resources about retries: -//! - -//! - -//! - - -//! Git support is derived from Cargo's implementation. -//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice. -//! Source: -use std::cmp::min; -use std::time::Duration; - -use anyhow::{Error, Result}; -use rand::Rng; -use tracing::warn; - -use crate::util::errors::HttpNotSuccessful; - -/// State for managing retrying a network operation. -pub(crate) struct Retry { - /// The number of failed attempts that have been done so far. - /// - /// Starts at 0, and increases by one each time an attempt fails. - retries: u64, - /// The maximum number of times the operation should be retried. - /// - /// 0 means it should never retry. - max_retries: u64, -} - -/// The result of attempting some operation via [`Retry::try`]. -pub(crate) enum RetryResult { - /// The operation was successful. - /// - /// The wrapped value is the return value of the callback function. - Success(T), - /// The operation was an error, and it should not be tried again. - Err(Error), - /// The operation failed, and should be tried again in the future. - /// - /// The wrapped value is the number of milliseconds to wait before trying - /// again. The caller is responsible for waiting this long and then - /// calling [`Retry::try`] again. - Retry(u64), -} - -/// Maximum amount of time a single retry can be delayed (milliseconds). -const MAX_RETRY_SLEEP_MS: u64 = 10 * 1000; -/// The minimum initial amount of time a retry will be delayed (milliseconds). -/// -/// The actual amount of time will be a random value above this. -const INITIAL_RETRY_SLEEP_BASE_MS: u64 = 500; -/// The maximum amount of additional time the initial retry will take (milliseconds). -/// -/// The initial delay will be [`INITIAL_RETRY_SLEEP_BASE_MS`] plus a random range -/// from 0 to this value. -const INITIAL_RETRY_JITTER_MS: u64 = 1000; - -impl Retry { - pub(crate) fn new() -> Self { - Self { - retries: 0, - max_retries: 3, - } - } - - /// Calls the given callback, and returns a [`RetryResult`] which - /// indicates whether or not this needs to be called again at some point - /// in the future to retry the operation if it failed. - pub(crate) fn r#try(&mut self, f: impl FnOnce() -> Result) -> RetryResult { - match f() { - Err(ref err) if maybe_spurious(err) && self.retries < self.max_retries => { - let err_msg = err.downcast_ref::().map_or_else( - || err.root_cause().to_string(), - HttpNotSuccessful::to_string, - ); - warn!( - "Spurious network error ({} tries remaining): {err_msg}", - self.max_retries - self.retries, - ); - self.retries += 1; - RetryResult::Retry(self.next_sleep_ms()) - } - Err(e) => RetryResult::Err(e), - Ok(r) => RetryResult::Success(r), - } - } - - /// Gets the next sleep duration in milliseconds. - fn next_sleep_ms(&self) -> u64 { - if self.retries == 1 { - let mut rng = rand::thread_rng(); - INITIAL_RETRY_SLEEP_BASE_MS + rng.gen_range(0..INITIAL_RETRY_JITTER_MS) - } else { - min( - ((self.retries - 1) * 3) * 1000 + INITIAL_RETRY_SLEEP_BASE_MS, - MAX_RETRY_SLEEP_MS, - ) - } - } -} - -fn maybe_spurious(err: &Error) -> bool { - if let Some(git_err) = err.downcast_ref::() { - match git_err.class() { - git2::ErrorClass::Net - | git2::ErrorClass::Os - | git2::ErrorClass::Zlib - | git2::ErrorClass::Ssl - | git2::ErrorClass::Http => return git_err.code() != git2::ErrorCode::Certificate, - _ => (), - } - } - if let Some(reqwest_err) = err.downcast_ref::() { - if reqwest_err.is_timeout() - || reqwest_err.is_connect() - || reqwest_err - .status() - .map_or(false, |status| status.is_server_error()) - { - return true; - } - } - if let Some(not_200) = err.downcast_ref::() { - if 500 <= not_200.code && not_200.code < 600 { - return true; - } - } - - false -} - -/// Wrapper method for network call retry logic. -/// -/// Retry counts provided by Config object `net.retry`. Config shell outputs -/// a warning on per retry. -/// -/// Closure must return a `Result`. -pub(crate) fn with_retry(mut callback: F) -> Result -where - F: FnMut() -> Result, -{ - let mut retry = Retry::new(); - loop { - match retry.r#try(&mut callback) { - RetryResult::Success(r) => return Ok(r), - RetryResult::Err(e) => return Err(e), - RetryResult::Retry(sleep) => std::thread::sleep(Duration::from_millis(sleep)), - } - } -}