Skip to content

Commit

Permalink
feat: Verify cksum of crate tarball from cargo registry
Browse files Browse the repository at this point in the history
Fixed #1183

Since the crate tarball could be downloaded from a different set of
servers than where the cargo registry is hosted, verifying the checksum
is necessary to verify its integrity.

Signed-off-by: Jiahao XU <Jiahao_XU@outlook.com>
  • Loading branch information
NobodyXu committed Aug 9, 2023
1 parent 939c9b4 commit 46f486b
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 44 deletions.
19 changes: 19 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/binstalk-downloader/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#![cfg_attr(docsrs, feature(doc_auto_cfg))]

pub use bytes;

pub mod download;

/// Github API client.
Expand Down
2 changes: 2 additions & 0 deletions crates/binstalk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ license = "GPL-3.0-only"

[dependencies]
async-trait = "0.1.68"
base16 = "0.2.1"
binstalk-downloader = { version = "0.6.1", path = "../binstalk-downloader", default-features = false, features = ["gh-api-client"] }
binstalk-types = { version = "0.5.0", path = "../binstalk-types" }
cargo_toml = "0.15.3"
Expand All @@ -33,6 +34,7 @@ reflink-copy = "0.1.5"
semver = { version = "1.0.17", features = ["serde"] }
serde = { version = "1.0.163", features = ["derive"] }
serde_json = "1.0.99"
sha2 = "0.10.7"
strum = "0.25.0"
target-lexicon = { version = "0.12.11", features = ["std"] }
tempfile = "3.5.0"
Expand Down
7 changes: 7 additions & 0 deletions crates/binstalk/src/drivers/registry.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::{str::FromStr, sync::Arc};

use base16::DecodeError as Base16DecodeError;
use cargo_toml::Manifest;
use compact_str::CompactString;
use leon::{ParseError, RenderError};
Expand Down Expand Up @@ -56,6 +57,12 @@ pub enum RegistryError {

#[error("Failed to render dl config: {0}")]
RenderDlConfig(#[from] RenderError),

#[error("Failed to parse checksum encoded in hex: {0}")]
InvalidHex(#[from] Base16DecodeError),

#[error("Expected checksum `{expected}`, actual checksum `{actual}`")]
UnmatchedChecksum { expected: String, actual: String },
}

#[derive(Clone, Debug)]
Expand Down
50 changes: 39 additions & 11 deletions crates/binstalk/src/drivers/registry/common.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
use std::{borrow::Cow, path::PathBuf};
use std::borrow::Cow;

use base16::{decode as decode_base16, encode_lower as encode_base16};
use cargo_toml::Manifest;
use compact_str::{format_compact, CompactString, ToCompactString};
use leon::{Template, Values};
use semver::{Version, VersionReq};
use serde::Deserialize;
use serde_json::Error as JsonError;
use sha2::{Digest, Sha256};
use tracing::debug;

use crate::{
drivers::registry::{visitor::ManifestVisitor, RegistryError},
errors::BinstallError,
helpers::{
download::Download,
bytes::Bytes,
download::{DataVerifier, Download},
remote::{Client, Url},
},
manifests::cargo_toml_binstall::{Meta, TarBasedFmt},
Expand All @@ -23,23 +26,48 @@ pub(super) struct RegistryConfig {
pub(super) dl: CompactString,
}

struct Sha256Digest(Sha256);

impl Default for Sha256Digest {
fn default() -> Self {
Sha256Digest(Sha256::new())
}
}

impl DataVerifier for Sha256Digest {
fn update(&mut self, data: &Bytes) {
self.0.update(data);
}
}

pub(super) async fn parse_manifest(
client: Client,
crate_name: &str,
version: &str,
crate_url: Url,
MatchedVersion { version, cksum }: MatchedVersion,
) -> Result<Manifest<Meta>, BinstallError> {
debug!("Fetching crate from: {crate_url} and extracting Cargo.toml from it");

let manifest_dir_path: PathBuf = format!("{crate_name}-{version}").into();
let mut manifest_visitor = ManifestVisitor::new(format!("{crate_name}-{version}").into());

let mut manifest_visitor = ManifestVisitor::new(manifest_dir_path);
let checksum = decode_base16(cksum.as_bytes()).map_err(RegistryError::from)?;
let mut sha256_digest = Sha256Digest::default();

Download::new(client, crate_url)
Download::new_with_data_verifier(client, crate_url, &mut sha256_digest)
.and_visit_tar(TarBasedFmt::Tgz, &mut manifest_visitor)
.await?;

manifest_visitor.load_manifest()
let digest_checksum = sha256_digest.0.finalize();

if digest_checksum.as_slice() != checksum.as_slice() {
Err(RegistryError::UnmatchedChecksum {
expected: cksum,
actual: encode_base16(digest_checksum.as_slice()),
}
.into())
} else {
manifest_visitor.load_manifest()
}
}

/// Return components of crate prefix
Expand Down Expand Up @@ -68,8 +96,7 @@ pub(super) fn render_dl_template(
dl_template: &str,
crate_name: &str,
(c1, c2): &(CompactString, Option<CompactString>),
version: &str,
cksum: &str,
MatchedVersion { version, cksum }: &MatchedVersion,
) -> Result<String, RegistryError> {
let template = Template::parse(dl_template)?;
if template.keys().next().is_some() {
Expand Down Expand Up @@ -114,12 +141,13 @@ pub(super) fn render_dl_template(
pub(super) struct RegistryIndexEntry {
vers: CompactString,
yanked: bool,
cksum: CompactString,
cksum: String,
}

pub(super) struct MatchedVersion {
pub(super) version: CompactString,
pub(super) cksum: CompactString,
/// sha256 checksum encoded in base16
pub(super) cksum: String,
}

impl MatchedVersion {
Expand Down
55 changes: 33 additions & 22 deletions crates/binstalk/src/drivers/registry/crates_io_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use tokio::{
use tracing::debug;

use crate::{
drivers::registry::{parse_manifest, RegistryError},
drivers::registry::{parse_manifest, MatchedVersion, RegistryError},
errors::BinstallError,
helpers::remote::{Client, Url},
manifests::cargo_toml_binstall::Meta,
Expand Down Expand Up @@ -43,7 +43,9 @@ impl CratesIoRateLimit {
self.0.lock().await.tick().await;
}
}
async fn is_crate_yanked(client: &Client, url: Url) -> Result<bool, RemoteError> {

/// Return `Some(checksum)` if the version is not yanked, otherwise `None`.
async fn is_crate_yanked(client: &Client, url: Url) -> Result<Option<String>, RemoteError> {
#[derive(Deserialize)]
struct CrateInfo {
version: Inner,
Expand All @@ -52,54 +54,57 @@ async fn is_crate_yanked(client: &Client, url: Url) -> Result<bool, RemoteError>
#[derive(Deserialize)]
struct Inner {
yanked: bool,
checksum: String,
}

// Fetch / update index
debug!("Looking up crate information");

let info: CrateInfo = client.get(url).send(true).await?.json().await?;
let version = info.version;

Ok(info.version.yanked)
Ok((!version.yanked).then_some(version.checksum))
}

async fn fetch_crate_cratesio_version_matched(
client: &Client,
url: Url,
version_req: &VersionReq,
) -> Result<Option<CompactString>, RemoteError> {
) -> Result<Option<(CompactString, String)>, RemoteError> {
#[derive(Deserialize)]
struct CrateInfo {
#[serde(rename = "crate")]
inner: CrateInfoInner,

versions: Vec<Version>,
}

#[derive(Deserialize)]
struct CrateInfoInner {
max_stable_version: CompactString,
}

#[derive(Deserialize)]
struct Versions {
versions: Vec<Version>,
}

#[derive(Deserialize)]
struct Version {
num: CompactString,
yanked: bool,
checksum: String,
}

// Fetch / update index
debug!("Looking up crate information");

let response = client.get(url).send(true).await?;
let crate_info: CrateInfo = client.get(url).send(true).await?.json().await?;

let version = if version_req == &VersionReq::STAR {
let crate_info: CrateInfo = response.json().await?;
Some(crate_info.inner.max_stable_version)
let version_with_checksum = if version_req == &VersionReq::STAR {
let version = crate_info.inner.max_stable_version;
crate_info
.versions
.into_iter()
.find_map(|v| (v.num.as_str() == version.as_str()).then_some(v.checksum))
.map(|checksum| (version, checksum))
} else {
let response: Versions = response.json().await?;
response
crate_info
.versions
.into_iter()
.filter_map(|item| {
Expand All @@ -115,17 +120,23 @@ async fn fetch_crate_cratesio_version_matched(
let ver = semver::Version::parse(&num).ok()?;

// Filter by version match
version_req.matches(&ver).then_some((num, ver))
version_req
.matches(&ver)
.then_some((num, ver, item.checksum))
} else {
None
}
})
// Return highest version
.max_by(|(_ver_str_x, ver_x), (_ver_str_y, ver_y)| ver_x.cmp(ver_y))
.map(|(ver_str, _)| ver_str)
.max_by(
|(_ver_str_x, ver_x, _checksum_x), (_ver_str_y, ver_y, _checksum_y)| {
ver_x.cmp(ver_y)
},
)
.map(|(ver_str, _, checksum)| (ver_str, checksum))
};

Ok(version)
Ok(version_with_checksum)
}

/// Find the crate by name, get its latest stable version matches `version_req`,
Expand All @@ -141,7 +152,7 @@ pub async fn fetch_crate_cratesio(

let url = Url::parse(&format!("https://crates.io/api/v1/crates/{name}"))?;

let version = match version_req.comparators.as_slice() {
let (version, cksum) = match version_req.comparators.as_slice() {
[Comparator {
op: ComparatorOp::Exact,
major,
Expand All @@ -163,7 +174,7 @@ pub async fn fetch_crate_cratesio(

is_crate_yanked(&client, url)
.await
.map(|yanked| (!yanked).then_some(version))
.map(|ret| ret.map(|checksum| (version, checksum)))
}
_ => fetch_crate_cratesio_version_matched(&client, url.clone(), version_req).await,
}
Expand All @@ -185,5 +196,5 @@ pub async fn fetch_crate_cratesio(
.push(&version)
.push("download");

parse_manifest(client, name, &version, crate_url).await
parse_manifest(client, name, crate_url, MatchedVersion { version, cksum }).await
}
11 changes: 5 additions & 6 deletions crates/binstalk/src/drivers/registry/git_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ impl GitRegistry {
let version_req = version_req.clone();
let this = self.clone();

let (version, dl_url) = spawn_blocking(move || {
let (matched_version, dl_url) = spawn_blocking(move || {
let GitIndex {
_tempdir: _,
repo,
Expand All @@ -118,21 +118,20 @@ impl GitRegistry {
.git_index
.get_or_try_init(|| GitIndex::new(this.0.url.clone()))?;

let MatchedVersion { version, cksum } =
let matched_version =
Self::find_crate_matched_ver(repo, &crate_name, &crate_prefix, &version_req)?;

let url = Url::parse(&render_dl_template(
dl_template,
&crate_name,
&crate_prefix,
&version,
&cksum,
&matched_version,
)?)?;

Ok::<_, BinstallError>((version, url))
Ok::<_, BinstallError>((matched_version, url))
})
.await??;

parse_manifest(client, name, &version, dl_url).await
parse_manifest(client, name, dl_url, matched_version).await
}
}
7 changes: 3 additions & 4 deletions crates/binstalk/src/drivers/registry/sparse_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ impl SparseRegistry {
) -> Result<Manifest<Meta>, BinstallError> {
let crate_prefix = crate_prefix_components(crate_name)?;
let dl_template = self.get_dl_template(&client).await?;
let MatchedVersion { version, cksum } = Self::find_crate_matched_ver(
let matched_version = Self::find_crate_matched_ver(
&client,
self.url.clone(),
crate_name,
Expand All @@ -100,10 +100,9 @@ impl SparseRegistry {
dl_template,
crate_name,
&crate_prefix,
&version,
&cksum,
&matched_version,
)?)?;

parse_manifest(client, crate_name, &version, dl_url).await
parse_manifest(client, crate_name, dl_url, matched_version).await
}
}
Loading

0 comments on commit 46f486b

Please sign in to comment.