From 46f486b155d9a538ad9d237f7d0e802dcca39511 Mon Sep 17 00:00:00 2001 From: Jiahao XU Date: Tue, 8 Aug 2023 20:58:53 +1000 Subject: [PATCH] feat: Verify cksum of crate tarball from cargo registry Fixed #1183 Since the crate tarball could be downloaded from a different set of servers than where the cargo registry is hosted, verifying the checksum is necessary to verify its integrity. Signed-off-by: Jiahao XU --- Cargo.lock | 19 +++++++ crates/binstalk-downloader/src/lib.rs | 2 + crates/binstalk/Cargo.toml | 2 + crates/binstalk/src/drivers/registry.rs | 7 +++ .../binstalk/src/drivers/registry/common.rs | 50 +++++++++++++---- .../drivers/registry/crates_io_registry.rs | 55 +++++++++++-------- .../src/drivers/registry/git_registry.rs | 11 ++-- .../src/drivers/registry/sparse_registry.rs | 7 +-- crates/binstalk/src/helpers.rs | 2 +- 9 files changed, 111 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 36588b254..de70dc8a0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -203,6 +203,12 @@ dependencies = [ "backtrace", ] +[[package]] +name = "base16" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d27c3610c36aee21ce8ac510e6224498de4228ad772a171ed65643a24693a5a8" + [[package]] name = "base64" version = "0.13.1" @@ -229,6 +235,7 @@ name = "binstalk" version = "0.14.1" dependencies = [ "async-trait", + "base16", "binstalk-downloader", "binstalk-types", "cargo_toml", @@ -251,6 +258,7 @@ dependencies = [ "semver", "serde", "serde_json", + "sha2", "strum", "target-lexicon", "tempfile", @@ -3127,6 +3135,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" +[[package]] +name = "sha2" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.4" diff --git a/crates/binstalk-downloader/src/lib.rs b/crates/binstalk-downloader/src/lib.rs index e4d6ddbd3..c4a23c844 100644 --- a/crates/binstalk-downloader/src/lib.rs +++ b/crates/binstalk-downloader/src/lib.rs @@ -1,5 +1,7 @@ #![cfg_attr(docsrs, feature(doc_auto_cfg))] +pub use bytes; + pub mod download; /// Github API client. diff --git a/crates/binstalk/Cargo.toml b/crates/binstalk/Cargo.toml index 469eeb30d..ceb1a66b2 100644 --- a/crates/binstalk/Cargo.toml +++ b/crates/binstalk/Cargo.toml @@ -11,6 +11,7 @@ license = "GPL-3.0-only" [dependencies] async-trait = "0.1.68" +base16 = "0.2.1" binstalk-downloader = { version = "0.6.1", path = "../binstalk-downloader", default-features = false, features = ["gh-api-client"] } binstalk-types = { version = "0.5.0", path = "../binstalk-types" } cargo_toml = "0.15.3" @@ -33,6 +34,7 @@ reflink-copy = "0.1.5" semver = { version = "1.0.17", features = ["serde"] } serde = { version = "1.0.163", features = ["derive"] } serde_json = "1.0.99" +sha2 = "0.10.7" strum = "0.25.0" target-lexicon = { version = "0.12.11", features = ["std"] } tempfile = "3.5.0" diff --git a/crates/binstalk/src/drivers/registry.rs b/crates/binstalk/src/drivers/registry.rs index 7938d98a8..39d67cf79 100644 --- a/crates/binstalk/src/drivers/registry.rs +++ b/crates/binstalk/src/drivers/registry.rs @@ -1,5 +1,6 @@ use std::{str::FromStr, sync::Arc}; +use base16::DecodeError as Base16DecodeError; use cargo_toml::Manifest; use compact_str::CompactString; use leon::{ParseError, RenderError}; @@ -56,6 +57,12 @@ pub enum RegistryError { #[error("Failed to render dl config: {0}")] RenderDlConfig(#[from] RenderError), + + #[error("Failed to parse checksum encoded in hex: {0}")] + InvalidHex(#[from] Base16DecodeError), + + #[error("Expected checksum `{expected}`, actual checksum `{actual}`")] + UnmatchedChecksum { expected: String, actual: String }, } #[derive(Clone, Debug)] diff --git a/crates/binstalk/src/drivers/registry/common.rs b/crates/binstalk/src/drivers/registry/common.rs index 54d64fa75..afc5ec3bd 100644 --- a/crates/binstalk/src/drivers/registry/common.rs +++ b/crates/binstalk/src/drivers/registry/common.rs @@ -1,18 +1,21 @@ -use std::{borrow::Cow, path::PathBuf}; +use std::borrow::Cow; +use base16::{decode as decode_base16, encode_lower as encode_base16}; use cargo_toml::Manifest; use compact_str::{format_compact, CompactString, ToCompactString}; use leon::{Template, Values}; use semver::{Version, VersionReq}; use serde::Deserialize; use serde_json::Error as JsonError; +use sha2::{Digest, Sha256}; use tracing::debug; use crate::{ drivers::registry::{visitor::ManifestVisitor, RegistryError}, errors::BinstallError, helpers::{ - download::Download, + bytes::Bytes, + download::{DataVerifier, Download}, remote::{Client, Url}, }, manifests::cargo_toml_binstall::{Meta, TarBasedFmt}, @@ -23,23 +26,48 @@ pub(super) struct RegistryConfig { pub(super) dl: CompactString, } +struct Sha256Digest(Sha256); + +impl Default for Sha256Digest { + fn default() -> Self { + Sha256Digest(Sha256::new()) + } +} + +impl DataVerifier for Sha256Digest { + fn update(&mut self, data: &Bytes) { + self.0.update(data); + } +} + pub(super) async fn parse_manifest( client: Client, crate_name: &str, - version: &str, crate_url: Url, + MatchedVersion { version, cksum }: MatchedVersion, ) -> Result, BinstallError> { debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it"); - let manifest_dir_path: PathBuf = format!("{crate_name}-{version}").into(); + let mut manifest_visitor = ManifestVisitor::new(format!("{crate_name}-{version}").into()); - let mut manifest_visitor = ManifestVisitor::new(manifest_dir_path); + let checksum = decode_base16(cksum.as_bytes()).map_err(RegistryError::from)?; + let mut sha256_digest = Sha256Digest::default(); - Download::new(client, crate_url) + Download::new_with_data_verifier(client, crate_url, &mut sha256_digest) .and_visit_tar(TarBasedFmt::Tgz, &mut manifest_visitor) .await?; - manifest_visitor.load_manifest() + let digest_checksum = sha256_digest.0.finalize(); + + if digest_checksum.as_slice() != checksum.as_slice() { + Err(RegistryError::UnmatchedChecksum { + expected: cksum, + actual: encode_base16(digest_checksum.as_slice()), + } + .into()) + } else { + manifest_visitor.load_manifest() + } } /// Return components of crate prefix @@ -68,8 +96,7 @@ pub(super) fn render_dl_template( dl_template: &str, crate_name: &str, (c1, c2): &(CompactString, Option), - version: &str, - cksum: &str, + MatchedVersion { version, cksum }: &MatchedVersion, ) -> Result { let template = Template::parse(dl_template)?; if template.keys().next().is_some() { @@ -114,12 +141,13 @@ pub(super) fn render_dl_template( pub(super) struct RegistryIndexEntry { vers: CompactString, yanked: bool, - cksum: CompactString, + cksum: String, } pub(super) struct MatchedVersion { pub(super) version: CompactString, - pub(super) cksum: CompactString, + /// sha256 checksum encoded in base16 + pub(super) cksum: String, } impl MatchedVersion { diff --git a/crates/binstalk/src/drivers/registry/crates_io_registry.rs b/crates/binstalk/src/drivers/registry/crates_io_registry.rs index b2fa25ec5..d66d3998f 100644 --- a/crates/binstalk/src/drivers/registry/crates_io_registry.rs +++ b/crates/binstalk/src/drivers/registry/crates_io_registry.rs @@ -10,7 +10,7 @@ use tokio::{ use tracing::debug; use crate::{ - drivers::registry::{parse_manifest, RegistryError}, + drivers::registry::{parse_manifest, MatchedVersion, RegistryError}, errors::BinstallError, helpers::remote::{Client, Url}, manifests::cargo_toml_binstall::Meta, @@ -43,7 +43,9 @@ impl CratesIoRateLimit { self.0.lock().await.tick().await; } } -async fn is_crate_yanked(client: &Client, url: Url) -> Result { + +/// Return `Some(checksum)` if the version is not yanked, otherwise `None`. +async fn is_crate_yanked(client: &Client, url: Url) -> Result, RemoteError> { #[derive(Deserialize)] struct CrateInfo { version: Inner, @@ -52,25 +54,29 @@ async fn is_crate_yanked(client: &Client, url: Url) -> Result #[derive(Deserialize)] struct Inner { yanked: bool, + checksum: String, } // Fetch / update index debug!("Looking up crate information"); let info: CrateInfo = client.get(url).send(true).await?.json().await?; + let version = info.version; - Ok(info.version.yanked) + Ok((!version.yanked).then_some(version.checksum)) } async fn fetch_crate_cratesio_version_matched( client: &Client, url: Url, version_req: &VersionReq, -) -> Result, RemoteError> { +) -> Result, RemoteError> { #[derive(Deserialize)] struct CrateInfo { #[serde(rename = "crate")] inner: CrateInfoInner, + + versions: Vec, } #[derive(Deserialize)] @@ -78,28 +84,27 @@ async fn fetch_crate_cratesio_version_matched( max_stable_version: CompactString, } - #[derive(Deserialize)] - struct Versions { - versions: Vec, - } - #[derive(Deserialize)] struct Version { num: CompactString, yanked: bool, + checksum: String, } // Fetch / update index debug!("Looking up crate information"); - let response = client.get(url).send(true).await?; + let crate_info: CrateInfo = client.get(url).send(true).await?.json().await?; - let version = if version_req == &VersionReq::STAR { - let crate_info: CrateInfo = response.json().await?; - Some(crate_info.inner.max_stable_version) + let version_with_checksum = if version_req == &VersionReq::STAR { + let version = crate_info.inner.max_stable_version; + crate_info + .versions + .into_iter() + .find_map(|v| (v.num.as_str() == version.as_str()).then_some(v.checksum)) + .map(|checksum| (version, checksum)) } else { - let response: Versions = response.json().await?; - response + crate_info .versions .into_iter() .filter_map(|item| { @@ -115,17 +120,23 @@ async fn fetch_crate_cratesio_version_matched( let ver = semver::Version::parse(&num).ok()?; // Filter by version match - version_req.matches(&ver).then_some((num, ver)) + version_req + .matches(&ver) + .then_some((num, ver, item.checksum)) } else { None } }) // Return highest version - .max_by(|(_ver_str_x, ver_x), (_ver_str_y, ver_y)| ver_x.cmp(ver_y)) - .map(|(ver_str, _)| ver_str) + .max_by( + |(_ver_str_x, ver_x, _checksum_x), (_ver_str_y, ver_y, _checksum_y)| { + ver_x.cmp(ver_y) + }, + ) + .map(|(ver_str, _, checksum)| (ver_str, checksum)) }; - Ok(version) + Ok(version_with_checksum) } /// Find the crate by name, get its latest stable version matches `version_req`, @@ -141,7 +152,7 @@ pub async fn fetch_crate_cratesio( let url = Url::parse(&format!("https://crates.io/api/v1/crates/{name}"))?; - let version = match version_req.comparators.as_slice() { + let (version, cksum) = match version_req.comparators.as_slice() { [Comparator { op: ComparatorOp::Exact, major, @@ -163,7 +174,7 @@ pub async fn fetch_crate_cratesio( is_crate_yanked(&client, url) .await - .map(|yanked| (!yanked).then_some(version)) + .map(|ret| ret.map(|checksum| (version, checksum))) } _ => fetch_crate_cratesio_version_matched(&client, url.clone(), version_req).await, } @@ -185,5 +196,5 @@ pub async fn fetch_crate_cratesio( .push(&version) .push("download"); - parse_manifest(client, name, &version, crate_url).await + parse_manifest(client, name, crate_url, MatchedVersion { version, cksum }).await } diff --git a/crates/binstalk/src/drivers/registry/git_registry.rs b/crates/binstalk/src/drivers/registry/git_registry.rs index f551f5b89..b301cc6bc 100644 --- a/crates/binstalk/src/drivers/registry/git_registry.rs +++ b/crates/binstalk/src/drivers/registry/git_registry.rs @@ -108,7 +108,7 @@ impl GitRegistry { let version_req = version_req.clone(); let this = self.clone(); - let (version, dl_url) = spawn_blocking(move || { + let (matched_version, dl_url) = spawn_blocking(move || { let GitIndex { _tempdir: _, repo, @@ -118,21 +118,20 @@ impl GitRegistry { .git_index .get_or_try_init(|| GitIndex::new(this.0.url.clone()))?; - let MatchedVersion { version, cksum } = + let matched_version = Self::find_crate_matched_ver(repo, &crate_name, &crate_prefix, &version_req)?; let url = Url::parse(&render_dl_template( dl_template, &crate_name, &crate_prefix, - &version, - &cksum, + &matched_version, )?)?; - Ok::<_, BinstallError>((version, url)) + Ok::<_, BinstallError>((matched_version, url)) }) .await??; - parse_manifest(client, name, &version, dl_url).await + parse_manifest(client, name, dl_url, matched_version).await } } diff --git a/crates/binstalk/src/drivers/registry/sparse_registry.rs b/crates/binstalk/src/drivers/registry/sparse_registry.rs index 13acb0579..8b4e9de2c 100644 --- a/crates/binstalk/src/drivers/registry/sparse_registry.rs +++ b/crates/binstalk/src/drivers/registry/sparse_registry.rs @@ -88,7 +88,7 @@ impl SparseRegistry { ) -> Result, BinstallError> { let crate_prefix = crate_prefix_components(crate_name)?; let dl_template = self.get_dl_template(&client).await?; - let MatchedVersion { version, cksum } = Self::find_crate_matched_ver( + let matched_version = Self::find_crate_matched_ver( &client, self.url.clone(), crate_name, @@ -100,10 +100,9 @@ impl SparseRegistry { dl_template, crate_name, &crate_prefix, - &version, - &cksum, + &matched_version, )?)?; - parse_manifest(client, crate_name, &version, dl_url).await + parse_manifest(client, crate_name, dl_url, matched_version).await } } diff --git a/crates/binstalk/src/helpers.rs b/crates/binstalk/src/helpers.rs index 34da60e0d..b9e274d18 100644 --- a/crates/binstalk/src/helpers.rs +++ b/crates/binstalk/src/helpers.rs @@ -8,7 +8,7 @@ pub mod signal; pub mod target_triple; pub mod tasks; -pub use binstalk_downloader::{download, gh_api_client}; +pub use binstalk_downloader::{bytes, download, gh_api_client}; pub fn is_universal_macos(target: &str) -> bool { ["universal-apple-darwin", "universal2-apple-darwin"].contains(&target)