Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read hash from URL fragment if --hashes are omitted #6731

Merged
merged 1 commit into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion crates/distribution-types/src/specified_requirement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::fmt::{Display, Formatter};

use pep508_rs::{MarkerEnvironment, UnnamedRequirement};
use pypi_types::{Requirement, RequirementSource};
use pypi_types::{Hashes, ParsedUrl, Requirement, RequirementSource};
use uv_normalize::ExtraName;

use crate::VerbatimParsedUrl;
Expand Down Expand Up @@ -82,6 +82,26 @@ impl UnresolvedRequirement {
Self::Unnamed(requirement) => requirement.url.is_editable(),
}
}

/// Return the hashes of the requirement, as specified in the URL fragment.
pub fn hashes(&self) -> Option<Hashes> {
match self {
Self::Named(requirement) => {
let RequirementSource::Url { ref url, .. } = requirement.source else {
return None;
};
let fragment = url.fragment()?;
Hashes::parse_fragment(fragment).ok()
}
Self::Unnamed(requirement) => {
let ParsedUrl::Archive(ref url) = requirement.url.parsed_url else {
return None;
};
let fragment = url.url.fragment()?;
Hashes::parse_fragment(fragment).ok()
}
}
}
}

impl From<Requirement> for UnresolvedRequirementSpecification {
Expand Down
70 changes: 69 additions & 1 deletion crates/pypi-types/src/simple_json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,68 @@ impl Hashes {
}
digests
}

/// Parse the hash from a fragment, as in: `sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`
pub fn parse_fragment(fragment: &str) -> Result<Self, HashError> {
let mut parts = fragment.split('=');

// Extract the key and value.
let name = parts
.next()
.ok_or_else(|| HashError::InvalidFragment(fragment.to_string()))?;
let value = parts
.next()
.ok_or_else(|| HashError::InvalidFragment(fragment.to_string()))?;

// Ensure there are no more parts.
if parts.next().is_some() {
return Err(HashError::InvalidFragment(fragment.to_string()));
}

match name {
"md5" => {
let md5 = std::str::from_utf8(value.as_bytes())?;
let md5 = md5.to_owned().into_boxed_str();
Ok(Hashes {
md5: Some(md5),
sha256: None,
sha384: None,
sha512: None,
})
}
"sha256" => {
let sha256 = std::str::from_utf8(value.as_bytes())?;
let sha256 = sha256.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: Some(sha256),
sha384: None,
sha512: None,
})
}
"sha384" => {
let sha384 = std::str::from_utf8(value.as_bytes())?;
let sha384 = sha384.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: Some(sha384),
sha512: None,
})
}
"sha512" => {
let sha512 = std::str::from_utf8(value.as_bytes())?;
let sha512 = sha512.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: None,
sha512: Some(sha512),
})
}
_ => Err(HashError::UnsupportedHashAlgorithm(fragment.to_string())),
}
}
}

impl FromStr for Hashes {
Expand Down Expand Up @@ -343,10 +405,16 @@ pub enum HashError {
#[error("Unexpected hash (expected `<algorithm>:<hash>`): {0}")]
InvalidStructure(String),

#[error("Unexpected fragment (expected `#sha256=...` or similar) on URL: {0}")]
InvalidFragment(String),

#[error(
"Unsupported hash algorithm: `{0}` (expected one of: `md5`, `sha256`, `sha384`, or `sha512`)"
"Unsupported hash algorithm (expected one of: `md5`, `sha256`, `sha384`, or `sha512`) on: `{0}`"
)]
UnsupportedHashAlgorithm(String),

#[error("Non-UTF-8 hash digest")]
NonUtf8(#[from] std::str::Utf8Error),
}

#[cfg(test)]
Expand Down
83 changes: 9 additions & 74 deletions crates/uv-client/src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,68 +69,6 @@ impl SimpleHtml {
Ok(Some(url))
}

/// Parse the hash from a fragment, as in: `sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`
fn parse_hash(fragment: &str) -> Result<Hashes, Error> {
let mut parts = fragment.split('=');

// Extract the key and value.
let name = parts
.next()
.ok_or_else(|| Error::FragmentParse(fragment.to_string()))?;
let value = parts
.next()
.ok_or_else(|| Error::FragmentParse(fragment.to_string()))?;

// Ensure there are no more parts.
if parts.next().is_some() {
return Err(Error::FragmentParse(fragment.to_string()));
}

match name {
"md5" => {
let md5 = std::str::from_utf8(value.as_bytes())?;
let md5 = md5.to_owned().into_boxed_str();
Ok(Hashes {
md5: Some(md5),
sha256: None,
sha384: None,
sha512: None,
})
}
"sha256" => {
let sha256 = std::str::from_utf8(value.as_bytes())?;
let sha256 = sha256.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: Some(sha256),
sha384: None,
sha512: None,
})
}
"sha384" => {
let sha384 = std::str::from_utf8(value.as_bytes())?;
let sha384 = sha384.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: Some(sha384),
sha512: None,
})
}
"sha512" => {
let sha512 = std::str::from_utf8(value.as_bytes())?;
let sha512 = sha512.to_owned().into_boxed_str();
Ok(Hashes {
md5: None,
sha256: None,
sha384: None,
sha512: Some(sha512),
})
}
_ => Err(Error::UnsupportedHashAlgorithm(fragment.to_string())),
}
}

/// Parse a [`File`] from an `<a>` tag.
fn parse_anchor(link: &HTMLTag) -> Result<File, Error> {
// Extract the href.
Expand All @@ -145,14 +83,13 @@ impl SimpleHtml {
// Extract the hash, which should be in the fragment.
let decoded = html_escape::decode_html_entities(href);
let (path, hashes) = if let Some((path, fragment)) = decoded.split_once('#') {
let fragment = urlencoding::decode(fragment)
.map_err(|_| Error::FragmentParse(fragment.to_string()))?;
let fragment = urlencoding::decode(fragment)?;
(
path,
if fragment.trim().is_empty() {
Hashes::default()
} else {
Self::parse_hash(&fragment)?
Hashes::parse_fragment(&fragment)?
},
)
} else {
Expand Down Expand Up @@ -199,7 +136,7 @@ impl SimpleHtml {
match dist_info_metadata.as_ref() {
"true" => Some(CoreMetadata::Bool(true)),
"false" => Some(CoreMetadata::Bool(false)),
fragment => Some(CoreMetadata::Hashes(Self::parse_hash(fragment)?)),
fragment => Some(CoreMetadata::Hashes(Hashes::parse_fragment(fragment)?)),
}
} else {
None
Expand Down Expand Up @@ -235,6 +172,9 @@ pub enum Error {
#[error(transparent)]
Utf8(#[from] std::str::Utf8Error),

#[error(transparent)]
FromUtf8(#[from] std::string::FromUtf8Error),

#[error("Failed to parse URL: {0}")]
UrlParse(String, #[source] url::ParseError),

Expand All @@ -253,13 +193,8 @@ pub enum Error {
#[error("Missing hash attribute on URL: {0}")]
MissingHash(String),

#[error("Unexpected fragment (expected `#sha256=...` or similar) on URL: {0}")]
FragmentParse(String),

#[error(
"Unsupported hash algorithm (expected `md5`, `sha256`, `sha384`, or `sha512`) on: {0}"
)]
UnsupportedHashAlgorithm(String),
#[error(transparent)]
FragmentParse(#[from] pypi_types::HashError),

#[error("Invalid `requires-python` specifier: {0}")]
Pep440(#[source] pep440_rs::VersionSpecifiersParseError),
Expand Down Expand Up @@ -851,7 +786,7 @@ mod tests {
"#;
let base = Url::parse("https://download.pytorch.org/whl/jinja2/").unwrap();
let result = SimpleHtml::parse(text, &base).unwrap_err();
insta::assert_snapshot!(result, @"Unsupported hash algorithm (expected `md5`, `sha256`, `sha384`, or `sha512`) on: blake2=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61");
insta::assert_snapshot!(result, @"Unsupported hash algorithm (expected one of: `md5`, `sha256`, `sha384`, or `sha512`) on: `blake2=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61`");
}

#[test]
Expand Down
23 changes: 16 additions & 7 deletions crates/uv-types/src/hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use distribution_types::{
};
use pep440_rs::Version;
use pypi_types::{
HashDigest, HashError, Requirement, RequirementSource, ResolverMarkerEnvironment,
HashDigest, HashError, Hashes, Requirement, RequirementSource, ResolverMarkerEnvironment,
};
use uv_configuration::HashCheckingMode;
use uv_normalize::PackageName;
Expand Down Expand Up @@ -153,6 +153,21 @@ impl HashStrategy {
}
};

let digests = if digests.is_empty() {
// If there are no hashes, and the distribution is URL-based, attempt to extract
// it from the fragment.
requirement
.hashes()
.map(Hashes::into_digests)
.unwrap_or_default()
} else {
// Parse the hashes.
digests
.iter()
.map(|digest| HashDigest::from_str(digest))
.collect::<Result<Vec<_>, _>>()?
};

if digests.is_empty() {
// Under `--require-hashes`, every requirement must include a hash.
if mode.is_require() {
Expand All @@ -164,12 +179,6 @@ impl HashStrategy {
continue;
}

// Parse the hashes.
let digests = digests
.iter()
.map(|digest| HashDigest::from_str(digest))
.collect::<Result<Vec<_>, _>>()?;

hashes.insert(id, digests);
}