Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure authentication is passed from the index url to distribution files #1886

Merged
merged 2 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/distribution-types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ distribution-filename = { path = "../distribution-filename", features = ["serde"
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs" }
platform-tags = { path = "../platform-tags" }
uv-auth = { path = "../uv-auth" }
uv-fs = { path = "../uv-fs" }
uv-git = { path = "../uv-git", features = ["vendored-openssl"] }
uv-normalize = { path = "../uv-normalize" }
Expand Down
11 changes: 9 additions & 2 deletions crates/distribution-types/src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use thiserror::Error;

use pep440_rs::{VersionSpecifiers, VersionSpecifiersParseError};
use pypi_types::{DistInfoMetadata, Hashes, Yanked};
use url::Url;
use uv_auth::safe_copy_url_auth_to_str;

/// Error converting [`pypi_types::File`] to [`distribution_type::File`].
#[derive(Debug, Error)]
Expand Down Expand Up @@ -39,7 +41,7 @@ pub struct File {

impl File {
/// `TryFrom` instead of `From` to filter out files with invalid requires python version specifiers
pub fn try_from(file: pypi_types::File, base: &str) -> Result<Self, FileConversionError> {
pub fn try_from(file: pypi_types::File, base: &Url) -> Result<Self, FileConversionError> {
Ok(Self {
dist_info_metadata: file.dist_info_metadata,
filename: file.filename,
Expand All @@ -51,7 +53,12 @@ impl File {
size: file.size,
upload_time_utc_ms: file.upload_time.map(|dt| dt.timestamp_millis()),
url: if file.url.contains("://") {
FileLocation::AbsoluteUrl(file.url)
let url = safe_copy_url_auth_to_str(base, &file.url)
.map_err(|err| FileConversionError::Url(file.url.clone(), err))?
.map(|url| url.to_string())
.unwrap_or(file.url);

FileLocation::AbsoluteUrl(url)
} else {
FileLocation::RelativeUrl(base.to_string(), file.url)
},
Expand Down
6 changes: 6 additions & 0 deletions crates/pypi-types/src/base_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ impl BaseUrl {
pub fn as_url(&self) -> &Url {
&self.0
}

/// Convert to the underlying [`Url`].
#[must_use]
pub fn into_url(self) -> Url {
self.0
}
}

impl From<Url> for BaseUrl {
Expand Down
8 changes: 8 additions & 0 deletions crates/uv-auth/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "uv-auth"
version = "0.0.1"
edition = "2021"

[dependencies]
url = { workspace = true }
tracing = { workspace = true }
51 changes: 32 additions & 19 deletions crates/uv-client/src/auth.rs → crates/uv-auth/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,34 +1,47 @@
/// HTTP authentication utilities.
use tracing::warn;
use url::Url;

/// HTTP authentication utilities.
/// Optimized version of [`safe_copy_url_auth`] which avoids parsing a string
/// into a URL unless the given URL has authentication to copy. Useful for patterns
/// where the returned URL would immediately be cast into a string.
///
/// Returns [`Err`] if there is authentication to copy and `new_url` is not a valid URL.
/// Returns [`None`] if there is no authentication to copy.
pub fn safe_copy_url_auth_to_str(
trusted_url: &Url,
new_url: &str,
) -> Result<Option<Url>, url::ParseError> {
if trusted_url.username().is_empty() && trusted_url.password().is_none() {
return Ok(None);
}

let new_url = Url::parse(new_url)?;
Ok(Some(safe_copy_url_auth(trusted_url, new_url)))
}

/// Copy authentication from one URL to another URL if applicable.
///
/// See [`should_retain_auth`] for details on when authentication is retained.
#[must_use]
pub(crate) fn safe_copy_auth(request_url: &Url, mut response_url: Url) -> Url {
if should_retain_auth(request_url, &response_url) {
response_url
.set_username(request_url.username())
.unwrap_or_else(|_| {
warn!("Failed to transfer username to response URL: {response_url}")
});
response_url
.set_password(request_url.password())
.unwrap_or_else(|_| {
warn!("Failed to transfer password to response URL: {response_url}")
});
pub fn safe_copy_url_auth(trusted_url: &Url, mut new_url: Url) -> Url {
if should_retain_auth(trusted_url, &new_url) {
new_url
.set_username(trusted_url.username())
.unwrap_or_else(|_| warn!("Failed to transfer username to response URL: {new_url}"));
new_url
.set_password(trusted_url.password())
.unwrap_or_else(|_| warn!("Failed to transfer password to response URL: {new_url}"));
}
response_url
new_url
}

/// Determine if authentication information should be retained on a new URL.
/// Implements the specification defined in RFC 7235 and 7230.
///
/// <https://datatracker.ietf.org/doc/html/rfc7235#section-2.2>
/// <https://datatracker.ietf.org/doc/html/rfc7230#section-5.5>
fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
fn should_retain_auth(trusted_url: &Url, new_url: &Url) -> bool {
// The "scheme" and "authority" components must match to retain authentication
// The "authority", is composed of the host and port.

Expand All @@ -37,20 +50,20 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
// Note some clients such as Python's `requests` library allow an upgrade
// from `http` to `https` but this is not spec-compliant.
// <https://github.com/pypa/pip/blob/75f54cae9271179b8cc80435f92336c97e349f9d/src/pip/_vendor/requests/sessions.py#L133-L136>
if request_url.scheme() != response_url.scheme() {
if trusted_url.scheme() != new_url.scheme() {
return false;
}

// The host must always be an exact match.
if request_url.host() != response_url.host() {
if trusted_url.host() != new_url.host() {
return false;
}

// Check the port.
// The port is only allowed to differ if it it matches the "default port" for the scheme.
// However, `reqwest` sets the `port` to `None` if it matches the default port so we do
// not need any special handling here.
if request_url.port() != response_url.port() {
if trusted_url.port() != new_url.port() {
return false;
}

Expand All @@ -61,7 +74,7 @@ fn should_retain_auth(request_url: &Url, response_url: &Url) -> bool {
mod tests {
use url::{ParseError, Url};

use crate::auth::should_retain_auth;
use crate::should_retain_auth;

#[test]
fn test_should_retain_auth() -> Result<(), ParseError> {
Expand Down
1 change: 1 addition & 0 deletions crates/uv-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ install-wheel-rs = { path = "../install-wheel-rs" }
pep440_rs = { path = "../pep440-rs" }
pep508_rs = { path = "../pep508-rs" }
platform-tags = { path = "../platform-tags" }
uv-auth = { path = "../uv-auth" }
uv-cache = { path = "../uv-cache" }
uv-fs = { path = "../uv-fs", features = ["tokio"] }
uv-normalize = { path = "../uv-normalize" }
Expand Down
7 changes: 4 additions & 3 deletions crates/uv-client/src/flat_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ use distribution_types::{
use pep440_rs::Version;
use platform_tags::Tags;
use pypi_types::{Hashes, Yanked};
use uv_auth::safe_copy_url_auth;
use uv_cache::{Cache, CacheBucket};
use uv_normalize::PackageName;

use crate::auth::safe_copy_auth;
use crate::cached_client::{CacheControl, CachedClientError};
use crate::html::SimpleHtml;
use crate::{Connectivity, Error, ErrorKind, RegistryClient};
Expand Down Expand Up @@ -156,16 +156,17 @@ impl<'a> FlatIndexClient<'a> {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = safe_copy_auth(url, response.url().clone());
let url = safe_copy_url_auth(url, response.url().clone());

let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;

let base = safe_copy_url_auth(&url, base.into_url());
let files: Vec<File> = files
.into_iter()
.filter_map(|file| {
match File::try_from(file, base.as_url().as_str()) {
match File::try_from(file, &base) {
Ok(file) => Some(file),
Err(err) => {
// Ignore files with unparsable version specifiers.
Expand Down
1 change: 0 additions & 1 deletion crates/uv-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ pub use registry_client::{
};
pub use rkyvutil::OwnedArchive;

mod auth;
mod cached_client;
mod error;
mod flat_index;
Expand Down
21 changes: 10 additions & 11 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ use distribution_types::{BuiltDist, File, FileLocation, IndexUrl, IndexUrls, Nam
use install_wheel_rs::{find_dist_info, is_metadata_entry};
use pep440_rs::Version;
use pypi_types::{Metadata21, SimpleJson};
use uv_auth::safe_copy_url_auth;
use uv_cache::{Cache, CacheBucket, WheelCache};
use uv_normalize::PackageName;
use uv_warnings::warn_user_once;

use crate::auth::safe_copy_auth;
use crate::cached_client::CacheControl;
use crate::html::SimpleHtml;
use crate::middleware::OfflineMiddleware;
Expand Down Expand Up @@ -249,7 +249,7 @@ impl RegistryClient {
async {
// Use the response URL, rather than the request URL, as the base for relative URLs.
// This ensures that we handle redirects and other URL transformations correctly.
let url = safe_copy_auth(&url, response.url().clone());
let url = safe_copy_url_auth(&url, response.url().clone());

let content_type = response
.headers()
Expand All @@ -271,17 +271,16 @@ impl RegistryClient {
let bytes = response.bytes().await.map_err(ErrorKind::RequestError)?;
let data: SimpleJson = serde_json::from_slice(bytes.as_ref())
.map_err(|err| Error::from_json_err(err, url.clone()))?;
let metadata =
SimpleMetadata::from_files(data.files, package_name, url.as_str());
metadata

SimpleMetadata::from_files(data.files, package_name, &url)
}
MediaType::Html => {
let text = response.text().await.map_err(ErrorKind::RequestError)?;
let SimpleHtml { base, files } = SimpleHtml::parse(&text, &url)
.map_err(|err| Error::from_html_err(err, url.clone()))?;
let metadata =
SimpleMetadata::from_files(files, package_name, base.as_url().as_str());
metadata
let base = safe_copy_url_auth(&url, base.into_url());

SimpleMetadata::from_files(files, package_name, &base)
}
};
OwnedArchive::from_unarchived(&unarchived)
Expand Down Expand Up @@ -670,7 +669,7 @@ impl SimpleMetadata {
self.0.iter()
}

fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName, base: &str) -> Self {
fn from_files(files: Vec<pypi_types::File>, package_name: &PackageName, base: &Url) -> Self {
let mut map: BTreeMap<Version, VersionFiles> = BTreeMap::default();

// Group the distributions by version and kind
Expand Down