Skip to content

Commit

Permalink
Hash
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Aug 27, 2024
1 parent b01c16a commit 114324a
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 84 deletions.
22 changes: 21 additions & 1 deletion crates/distribution-types/src/specified_requirement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::fmt::{Display, Formatter};

use pep508_rs::{MarkerEnvironment, UnnamedRequirement};
use pypi_types::{Requirement, RequirementSource};
use pypi_types::{Hashes, ParsedUrl, Requirement, RequirementSource};
use uv_normalize::ExtraName;

use crate::VerbatimParsedUrl;
Expand Down Expand Up @@ -82,6 +82,26 @@ impl UnresolvedRequirement {
Self::Unnamed(requirement) => requirement.url.is_editable(),
}
}

/// Return the hashes of the requirement, as specified in the URL fragment.
pub fn hashes(&self) -> Option<Hashes> {
match self {
Self::Named(requirement) => {
let RequirementSource::Url { ref url, .. } = requirement.source else {
return None;
};
let fragment = url.fragment()?;
Hashes::parse_fragment(fragment).ok()
}
Self::Unnamed(requirement) => {
let ParsedUrl::Archive(ref url) = requirement.url.parsed_url else {
return None;
};
let fragment = url.url.fragment()?;
Hashes::parse_fragment(fragment).ok()
}
}
}
}

impl From<Requirement> for UnresolvedRequirementSpecification {
Expand Down
70 changes: 69 additions & 1 deletion crates/pypi-types/src/simple_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,68 @@ impl Hashes {
}
digests
}

/// Parse the hash from a fragment, as in: `sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`
pub fn parse_fragment(fragment: &str) -> Result<Self, HashError> {
let mut parts = fragment.split('=');

// Extract the key and value.
let name = parts
.next()
.ok_or_else(|| HashError::InvalidFragment(fragment.to_string()))?;
let value = parts
.next()
.ok_or_else(|| HashError::InvalidFragment(fragment.to_string()))?;

// Ensure there are no more parts.
if parts.next().is_some() {
return Err(HashError::InvalidFragment(fragment.to_string()));
}

match name {
"md5" => {
let md5 = std::str::from_utf8(value.as_bytes())?;
let md5 = md5.to_owned().into_boxed_str();
Ok(Hashes {
md5: Some(md5),
sha256: None,
sha384: None,
sha512: None,
})
}
"sha256" => {
let sha256 = std::str::from_utf8(value.as_bytes())?;
let sha256 = sha256.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: Some(sha256),
sha384: None,
sha512: None,
})
}
"sha384" => {
let sha384 = std::str::from_utf8(value.as_bytes())?;
let sha384 = sha384.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: Some(sha384),
sha512: None,
})
}
"sha512" => {
let sha512 = std::str::from_utf8(value.as_bytes())?;
let sha512 = sha512.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: None,
sha512: Some(sha512),
})
}
_ => Err(HashError::UnsupportedHashAlgorithm(fragment.to_string())),
}
}
}

impl FromStr for Hashes {
Expand Down Expand Up @@ -343,10 +405,16 @@ pub enum HashError {
#[error("Unexpected hash (expected `<algorithm>:<hash>`): {0}")]
InvalidStructure(String),

#[error("Unexpected fragment (expected `#sha256=...` or similar) on URL: {0}")]
InvalidFragment(String),

#[error(
"Unsupported hash algorithm: `{0}` (expected one of: `md5`, `sha256`, `sha384`, or `sha512`)"
"Unsupported hash algorithm (expected one of: `md5`, `sha256`, `sha384`, or `sha512`) on: `{0}`"
)]
UnsupportedHashAlgorithm(String),

#[error("Non-UTF-8 hash digest")]
NonUtf8(#[from] std::str::Utf8Error),
}

#[cfg(test)]
Expand Down
83 changes: 9 additions & 74 deletions crates/uv-client/src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,68 +69,6 @@ impl SimpleHtml {
Ok(Some(url))
}

/// Parse the hash from a fragment, as in: `sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`
fn parse_hash(fragment: &str) -> Result<Hashes, Error> {
let mut parts = fragment.split('=');

// Extract the key and value.
let name = parts
.next()
.ok_or_else(|| Error::FragmentParse(fragment.to_string()))?;
let value = parts
.next()
.ok_or_else(|| Error::FragmentParse(fragment.to_string()))?;

// Ensure there are no more parts.
if parts.next().is_some() {
return Err(Error::FragmentParse(fragment.to_string()));
}

match name {
"md5" => {
let md5 = std::str::from_utf8(value.as_bytes())?;
let md5 = md5.to_owned().into_boxed_str();
Ok(Hashes {
md5: Some(md5),
sha256: None,
sha384: None,
sha512: None,
})
}
"sha256" => {
let sha256 = std::str::from_utf8(value.as_bytes())?;
let sha256 = sha256.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: Some(sha256),
sha384: None,
sha512: None,
})
}
"sha384" => {
let sha384 = std::str::from_utf8(value.as_bytes())?;
let sha384 = sha384.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: Some(sha384),
sha512: None,
})
}
"sha512" => {
let sha512 = std::str::from_utf8(value.as_bytes())?;
let sha512 = sha512.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: None,
sha512: Some(sha512),
})
}
_ => Err(Error::UnsupportedHashAlgorithm(fragment.to_string())),
}
}

/// Parse a [`File`] from an `<a>` tag.
fn parse_anchor(link: &HTMLTag) -> Result<File, Error> {
// Extract the href.
Expand All @@ -145,14 +83,13 @@ impl SimpleHtml {
// Extract the hash, which should be in the fragment.
let decoded = html_escape::decode_html_entities(href);
let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') {
let fragment = urlencoding::decode(fragment)
.map_err(|_| Error::FragmentParse(fragment.to_string()))?;
let fragment = urlencoding::decode(fragment)?;
(
path,
if fragment.trim().is_empty() {
Hashes::default()
} else {
Self::parse_hash(&fragment)?
Hashes::parse_fragment(&fragment)?
},
)
} else {
Expand Down Expand Up @@ -199,7 +136,7 @@ impl SimpleHtml {
match dist_info_metadata.as_ref() {
"true" => Some(CoreMetadata::Bool(true)),
"false" => Some(CoreMetadata::Bool(false)),
fragment => Some(CoreMetadata::Hashes(Self::parse_hash(fragment)?)),
fragment => Some(CoreMetadata::Hashes(Hashes::parse_fragment(fragment)?)),
}
} else {
None
Expand Down Expand Up @@ -235,6 +172,9 @@ pub enum Error {
#[error(transparent)]
Utf8(#[from] std::str::Utf8Error),

#[error(transparent)]
FromUtf8(#[from] std::string::FromUtf8Error),

#[error("Failed to parse URL: {0}")]
UrlParse(String, #[source] url::ParseError),

Expand All @@ -253,13 +193,8 @@ pub enum Error {
#[error("Missing hash attribute on URL: {0}")]
MissingHash(String),

#[error("Unexpected fragment (expected `#sha256=...` or similar) on URL: {0}")]
FragmentParse(String),

#[error(
"Unsupported hash algorithm (expected `md5`, `sha256`, `sha384`, or `sha512`) on: {0}"
)]
UnsupportedHashAlgorithm(String),
#[error(transparent)]
FragmentParse(#[from] pypi_types::HashError),

#[error("Invalid `requires-python` specifier: {0}")]
Pep440(#[source] pep440_rs::VersionSpecifiersParseError),
Expand Down Expand Up @@ -851,7 +786,7 @@ mod tests {
"#;
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
let result = SimpleHtml::parse(text, &base).unwrap_err();
insta::assert_snapshot!(result, @"Unsupported hash algorithm (expected `md5`, `sha256`, `sha384`, or `sha512`) on: blake2=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61");
insta::assert_snapshot!(result, @"Unsupported hash algorithm (expected one of: `md5`, `sha256`, `sha384`, or `sha512`) on: `blake2=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`");
}

#[test]
Expand Down
23 changes: 16 additions & 7 deletions crates/uv-types/src/hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use distribution_types::{
};
use pep440_rs::Version;
use pypi_types::{
HashDigest, HashError, Requirement, RequirementSource, ResolverMarkerEnvironment,
HashDigest, HashError, Hashes, Requirement, RequirementSource, ResolverMarkerEnvironment,
};
use uv_configuration::HashCheckingMode;
use uv_normalize::PackageName;
Expand Down Expand Up @@ -153,6 +153,21 @@ impl HashStrategy {
}
};

let digests = if digests.is_empty() {
// If there are no hashes, and the distribution is URL-based, attempt to extract
// it from the fragment.
requirement
.hashes()
.map(Hashes::into_digests)
.unwrap_or_default()
} else {
// Parse the hashes.
digests
.iter()
.map(|digest| HashDigest::from_str(digest))
.collect::<Result<Vec<_>, _>>()?
};

if digests.is_empty() {
// Under `--require-hashes`, every requirement must include a hash.
if mode.is_require() {
Expand All @@ -164,12 +179,6 @@ impl HashStrategy {
continue;
}

// Parse the hashes.
let digests = digests
.iter()
.map(|digest| HashDigest::from_str(digest))
.collect::<Result<Vec<_>, _>>()?;

hashes.insert(id, digests);
}

Expand Down
Loading

0 comments on commit 114324a

Please sign in to comment.