From 28f075cb291ce607ec6ba1b1c51af8b229010ffe Mon Sep 17 00:00:00 2001 From: konstin Date: Fri, 21 Jun 2024 18:09:13 +0200 Subject: [PATCH] Support conflicting URL in separate forks ## Introduction We support forking the dependency resolution to support conflicting registry requirements for different platforms, say on package range is required for an older python version while a newer is required for newer python versions, or dependencies that are different per platform. We need to extend this support to direct URL requirements. ```toml dependencies = [ "iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl ; python_version >= '3.12'", "iniconfig @ https://files.pythonhosted.org/packages/9b/dd/b3c12c6d707058fa947864b67f0c4e0c39ef8610988d7baea9578f3c48f3/iniconfig-1.1.1-py2.py3-none-any.whl ; python_version < '3.12'" ] ``` This did not work because `Urls` was built on the assumption that there is a single allowed URL per package. We collect all allowed URL ahead of resolution by following direct URL dependencies (including path dependencies) transitively, i.e. a registry distribution can't require a URL. ## Registry and URL requirement in the same package Consider the following two cases: requirements.in: ```text werkzeug==2.0.0 werkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl ``` pyproject.toml: ```toml dependencies = [ "iniconfig == 1.1.1 ; python_version < '3.12'", "iniconfig @ git+https://github.com/pytest-dev/iniconfig@93f5930e668c0d1ddf4597e38dd0dea4e2665e7a ; python_version >= '3.12'", ] ``` In the first case, we want the URL to override the registry dependency, in the second case we want to fork and have one branch use the registry and the other the URL. We have to know about this in `PubGrubRequirement::from_requirement`, but we only fork after the current method. Instead, we can check if the registry and the URL requirement for the same name are disjoint: If they are, they must end up in different branches on forking, if they aren't, we know the URL overrides the registry. An alternative implementation would be splitting in the lookahead resolver so that all URLs are already tagged by the markers of their fork. This would require duplicating the forking logic while making sure that we follow the exact same precedence rules, which is why i didn't do it. ## Determining the URL of a Requirement For a registry requirement: * If there is a URL override: Use that override, done. * If there is a fork-specific URL: Use that, done. * If there is another requirement in the same package with a URL requirement find a matching URL in `Urls`, use it to canonicalize the current URL. * After forking: Check that this URL is the only URL in this fork. For a URL requirement: * If there is a URL override: Use that override, done. * Find a matching URL in `Urls`, use it to canonicalize the current URL. * After forking: Check that this URL is the only URL in this fork. ## URL conflicts pyproject.toml (invalid): ```toml dependencies = [ "iniconfig @ https://files.pythonhosted.org/packages/44/39/e96292c7f7068e58877f476908c5974dc76c37c623f1fa332fe4ed6dfbec/iniconfig-1.1.0.tar.gz", "iniconfig @ https://files.pythonhosted.org/packages/9b/dd/b3c12c6d707058fa947864b67f0c4e0c39ef8610988d7baea9578f3c48f3/iniconfig-1.1.1-py2.py3-none-any.whl ; python_version < '3.12'", "iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl ; python_version >= '3.12'", ] ``` On the fork state, we keep `ForkUrls` that check for conflicts after forking, rejecting the third case because we added two packages of the same name with different URLs. We need to flatten out the requirements before transformation into pubgrub requirements to get the full list of other requirements which may contain a URL, which was changed in a previous PR: #4430. ## Complex Example a: ```toml dependencies = [ # Force a split "anyio==4.3.0 ; python_version >= '3.12'", "anyio==4.2.0 ; python_version < '3.12'", # Include URLs transitively "b" ] ``` b: ```toml dependencies = [ # Only one is used in each split. "b1 ; python_version < '3.12'", "b2 ; python_version >= '3.12'", "b3 ; python_version >= '3.12'", ] ``` b1: ```toml dependencies = [ "iniconfig @ https://files.pythonhosted.org/packages/9b/dd/b3c12c6d707058fa947864b67f0c4e0c39ef8610988d7baea9578f3c48f3/iniconfig-1.1.1-py2.py3-none-any.whl", ] ``` b2: ```toml dependencies = [ "iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", ] ``` b3: ```toml dependencies = [ "iniconfig @ https://files.pythonhosted.org/packages/44/39/e96292c7f7068e58877f476908c5974dc76c37c623f1fa332fe4ed6dfbec/iniconfig-1.1.0.tar.gz", ] ``` In this example, all packages are url requirements (directory requirements) and the root package is `a`. We first split on `a`, `b` being in each split. In the first fork, we reach `b1`, the fork URLs are empty, we insert the iniconfig 1.1.1 URL, and then we skip over `b2` and `b3` since the mark is disjoint with the fork markers. In the second fork, we skip over `b1`, visit `b2`, insert the iniconfig 2.0.0 URL into the again empty fork URLs, then visit `b3` and try to insert the iniconfig 1.1.0 URL. At this point we find a conflict for the iniconfig URL and error. ## Closing Best reviewed commit-by-commit, the tests are verbose. The git tests are slow, but they make the best example for different URL types i could find. Part of #3927. This PR does not handle `Locals` yet. --- crates/pypi-types/src/requirement.rs | 64 +++- crates/uv-resolver/src/error.rs | 17 +- crates/uv-resolver/src/fork_urls.rs | 56 ++++ crates/uv-resolver/src/lib.rs | 1 + crates/uv-resolver/src/manifest.rs | 38 ++- .../uv-resolver/src/pubgrub/dependencies.rs | 301 ++++++++++-------- crates/uv-resolver/src/pubgrub/package.rs | 7 +- crates/uv-resolver/src/pubgrub/priority.rs | 4 + crates/uv-resolver/src/resolver/mod.rs | 46 ++- crates/uv-resolver/src/resolver/urls.rs | 269 ++++++++-------- crates/uv/tests/pip_compile.rs | 2 + 11 files changed, 511 insertions(+), 294 deletions(-) create mode 100644 crates/uv-resolver/src/fork_urls.rs diff --git a/crates/pypi-types/src/requirement.rs b/crates/pypi-types/src/requirement.rs index 0c3d0ce8629bd..61e3144c75813 100644 --- a/crates/pypi-types/src/requirement.rs +++ b/crates/pypi-types/src/requirement.rs @@ -8,7 +8,9 @@ use pep508_rs::{MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, V use uv_git::{GitReference, GitSha}; use uv_normalize::{ExtraName, PackageName}; -use crate::{ParsedUrl, VerbatimParsedUrl}; +use crate::{ + ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, VerbatimParsedUrl, +}; /// A representation of dependency on a package, an extension over a PEP 508's requirement. /// @@ -251,6 +253,66 @@ impl RequirementSource { } } + pub fn to_verbatim_parsed_url(&self) -> Option { + Some(match &self { + Self::Registry { .. } => { + return None; + } + Self::Url { + subdirectory, + location, + url, + } => VerbatimParsedUrl { + parsed_url: ParsedUrl::Archive(ParsedArchiveUrl::from_source( + location.clone(), + subdirectory.clone(), + )), + verbatim: url.clone(), + }, + Self::Path { + install_path, + lock_path, + url, + } => VerbatimParsedUrl { + parsed_url: ParsedUrl::Path(ParsedPathUrl::from_source( + install_path.clone(), + lock_path.clone(), + url.to_url(), + )), + verbatim: url.clone(), + }, + Self::Directory { + install_path, + lock_path, + editable, + url, + } => VerbatimParsedUrl { + parsed_url: ParsedUrl::Directory(ParsedDirectoryUrl::from_source( + install_path.clone(), + lock_path.clone(), + *editable, + url.to_url(), + )), + verbatim: url.clone(), + }, + Self::Git { + repository, + reference, + precise, + subdirectory, + url, + } => VerbatimParsedUrl { + parsed_url: ParsedUrl::Git(ParsedGitUrl::from_source( + repository.clone(), + reference.clone(), + *precise, + subdirectory.clone(), + )), + verbatim: url.clone(), + }, + }) + } + /// Returns `true` if the source is editable. pub fn is_editable(&self) -> bool { matches!(self, Self::Directory { editable: true, .. }) diff --git a/crates/uv-resolver/src/error.rs b/crates/uv-resolver/src/error.rs index a2919a71da33a..7c27f2bc038e4 100644 --- a/crates/uv-resolver/src/error.rs +++ b/crates/uv-resolver/src/error.rs @@ -9,7 +9,7 @@ use rustc_hash::{FxHashMap, FxHashSet}; use distribution_types::{BuiltDist, IndexLocations, InstalledDist, SourceDist}; use pep440_rs::Version; -use pep508_rs::Requirement; +use pep508_rs::{MarkerTree, Requirement}; use uv_normalize::PackageName; use crate::candidate_selector::CandidateSelector; @@ -48,11 +48,18 @@ pub enum ResolveError { #[error(transparent)] PubGrubSpecifier(#[from] PubGrubSpecifierError), - #[error("Requirements contain conflicting URLs for package `{0}`:\n- {1}\n- {2}")] - ConflictingUrlsDirect(PackageName, String, String), + #[error("Overrides contain conflicting URLs for package `{0}`:\n- {1}\n- {2}")] + ConflictingOverrideUrls(PackageName, String, String), - #[error("There are conflicting URLs for package `{0}`:\n- {1}\n- {2}")] - ConflictingUrlsTransitive(PackageName, String, String), + #[error("Requirements contain conflicting URLs for package `{0}`:\n- {}", _1.join("\n- "))] + ConflictingUrls(PackageName, Vec), + + #[error("Requirements contain conflicting URLs for package `{package_name}` in split `{fork_markers}`:\n- {}", urls.join("\n- "))] + ConflictingUrlsInFork { + package_name: PackageName, + urls: Vec, + fork_markers: MarkerTree, + }, #[error("Package `{0}` attempted to resolve via URL: {1}. URL dependencies must be expressed as direct requirements or constraints. Consider adding `{0} @ {1}` to your dependencies or constraints file.")] DisallowedUrl(PackageName, String), diff --git a/crates/uv-resolver/src/fork_urls.rs b/crates/uv-resolver/src/fork_urls.rs new file mode 100644 index 0000000000000..102c0048a9853 --- /dev/null +++ b/crates/uv-resolver/src/fork_urls.rs @@ -0,0 +1,56 @@ +use std::collections::hash_map::Entry; + +use rustc_hash::FxHashMap; + +use distribution_types::Verbatim; +use pep508_rs::MarkerTree; +use pypi_types::VerbatimParsedUrl; +use uv_normalize::PackageName; + +use crate::ResolveError; + +/// See [`crate::resolver::SolveState`]. +#[derive(Default, Clone)] +pub(crate) struct ForkUrls(FxHashMap); + +impl ForkUrls { + /// Get the URL previously used for a package in this fork. + pub(crate) fn get(&self, package_name: &PackageName) -> Option<&VerbatimParsedUrl> { + self.0.get(package_name) + } + + /// Check that this is the only URL used for this package in this fork. + pub(crate) fn insert( + &mut self, + package_name: &PackageName, + url: &VerbatimParsedUrl, + fork_markers: &MarkerTree, + ) -> Result<(), ResolveError> { + match self.0.entry(package_name.clone()) { + Entry::Occupied(previous) => { + if previous.get() != url { + let conflicting_url = vec![ + previous.get().verbatim.verbatim().to_string(), + url.to_string(), + ]; + return if fork_markers == &MarkerTree::And(Vec::new()) { + Err(ResolveError::ConflictingUrls( + package_name.clone(), + conflicting_url, + )) + } else { + Err(ResolveError::ConflictingUrlsInFork { + package_name: package_name.clone(), + urls: conflicting_url, + fork_markers: fork_markers.clone(), + }) + }; + } + } + Entry::Vacant(vacant) => { + vacant.insert(url.clone()); + } + } + Ok(()) + } +} diff --git a/crates/uv-resolver/src/lib.rs b/crates/uv-resolver/src/lib.rs index 65a4223d3aa11..a1496bd13d4cc 100644 --- a/crates/uv-resolver/src/lib.rs +++ b/crates/uv-resolver/src/lib.rs @@ -30,6 +30,7 @@ mod error; mod exclude_newer; mod exclusions; mod flat_index; +mod fork_urls; mod lock; mod manifest; mod marker; diff --git a/crates/uv-resolver/src/manifest.rs b/crates/uv-resolver/src/manifest.rs index 22ae0df8bbbb2..3b89069b0915f 100644 --- a/crates/uv-resolver/src/manifest.rs +++ b/crates/uv-resolver/src/manifest.rs @@ -97,6 +97,16 @@ impl Manifest { &'a self, markers: Option<&'a MarkerEnvironment>, mode: DependencyMode, + ) -> impl Iterator + 'a { + self.requirements_no_overrides(markers, mode) + .chain(self.overrides(markers, mode)) + } + + /// Like [`Self::requirements`], but without the overrides. + pub fn requirements_no_overrides<'a>( + &'a self, + markers: Option<&'a MarkerEnvironment>, + mode: DependencyMode, ) -> impl Iterator + 'a { match mode { // Include all direct and transitive requirements, with constraints and overrides applied. @@ -119,11 +129,6 @@ impl Manifest { self.constraints .requirements() .filter(move |requirement| requirement.evaluate_markers(markers, &[])), - ) - .chain( - self.overrides - .requirements() - .filter(move |requirement| requirement.evaluate_markers(markers, &[])), ), ), // Include direct requirements, with constraints and overrides applied. @@ -131,7 +136,28 @@ impl Manifest { self.overrides .apply(&self.requirements) .chain(self.constraints.requirements()) - .chain(self.overrides.requirements()) + .filter(move |requirement| requirement.evaluate_markers(markers, &[])), + ), + } + } + + /// Only the overrides from [`Self::requirements`]. + pub fn overrides<'a>( + &'a self, + markers: Option<&'a MarkerEnvironment>, + mode: DependencyMode, + ) -> impl Iterator + 'a { + match mode { + // Include all direct and transitive requirements, with constraints and overrides applied. + DependencyMode::Transitive => Either::Left( + self.overrides + .requirements() + .filter(move |requirement| requirement.evaluate_markers(markers, &[])), + ), + // Include direct requirements, with constraints and overrides applied. + DependencyMode::Direct => Either::Right( + self.overrides + .requirements() .filter(move |requirement| requirement.evaluate_markers(markers, &[])), ), } diff --git a/crates/uv-resolver/src/pubgrub/dependencies.rs b/crates/uv-resolver/src/pubgrub/dependencies.rs index fced9adb5a052..bea613bd948bb 100644 --- a/crates/uv-resolver/src/pubgrub/dependencies.rs +++ b/crates/uv-resolver/src/pubgrub/dependencies.rs @@ -1,9 +1,10 @@ +use std::iter; + use itertools::Itertools; use pubgrub::range::Range; -use tracing::warn; +use tracing::{debug, warn}; -use distribution_types::Verbatim; -use pep440_rs::Version; +use pep440_rs::{Version, VersionSpecifiers}; use pypi_types::{ ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, Requirement, RequirementSource, @@ -11,6 +12,8 @@ use pypi_types::{ use uv_git::GitResolver; use uv_normalize::{ExtraName, PackageName}; +use crate::fork_urls::ForkUrls; +use crate::marker::is_disjoint; use crate::pubgrub::{PubGrubPackage, PubGrubPackageInner}; use crate::resolver::{Locals, Urls}; use crate::{PubGrubSpecifier, ResolveError}; @@ -24,6 +27,7 @@ impl PubGrubDependencies { pub(crate) fn from_requirements( flattened_requirements: &[&Requirement], source_name: Option<&PackageName>, + fork_urls: &ForkUrls, urls: &Urls, locals: &Locals, git: &GitResolver, @@ -31,15 +35,25 @@ impl PubGrubDependencies { let mut dependencies = Vec::new(); for requirement in flattened_requirements { // Add the package, plus any extra variants. - for result in std::iter::once(PubGrubRequirement::from_requirement( + for result in iter::once(PubGrubRequirement::from_requirement( requirement, None, + flattened_requirements, + fork_urls, urls, locals, git, )) .chain(requirement.extras.clone().into_iter().map(|extra| { - PubGrubRequirement::from_requirement(requirement, Some(extra), urls, locals, git) + PubGrubRequirement::from_requirement( + requirement, + Some(extra), + flattened_requirements, + fork_urls, + urls, + locals, + git, + ) })) { let PubGrubRequirement { package, version } = result?; @@ -97,77 +111,74 @@ pub(crate) struct PubGrubRequirement { impl PubGrubRequirement { /// Convert a [`Requirement`] to a PubGrub-compatible package and range. + /// + /// # Handling URLs + /// + /// Consider the following two cases: + /// requirements.in: + /// ```text + /// werkzeug==2.0.0 + /// werkzeug @ https://files.pythonhosted.org/packages/ff/1d/960bb4017c68674a1cb099534840f18d3def3ce44aed12b5ed8b78e0153e/Werkzeug-2.0.0-py3-none-any.whl + /// ``` + /// pyproject.toml: + /// ```toml + /// dependencies = [ + /// "iniconfig == 1.1.1 ; python_version < '3.12'", + /// "iniconfig @ git+https://github.com/pytest-dev/iniconfig@93f5930e668c0d1ddf4597e38dd0dea4e2665e7a ; python_version >= '3.12'", + /// ] + /// ``` + /// + /// In the former case, we want the URL to override the registry dependency, in the latter + /// case we want to fork and have one branch use the registry and the other the URL. + /// We have to know about this in `PubGrubRequirement::from_requirement`, but we only + /// fork after the current method. Instead, we can check if the registry and the URL + /// requirement for the same name are disjoint: If they are, they must end up in + /// different branches on forking, if they aren't, we know the URL overrides the + /// registry. We don't have to check for duplicates/collisions here, `ForkUrls` will + /// handle this after forking. + /// + /// For a registry requirement: + /// * If there is a URL override: Use that override, done. + /// * If there is a fork-specific URL: Use that, done. + /// * If there is another requirement in the same package with a URL requirement find a matching URL in `Urls`, use it to canonicalize the current URL. + /// * After forking: Check that this URL is the only URL in this fork. + /// + /// For a URL requirement: + /// * If there is a URL override: Use that override, done. + /// * Find a matching URL in `Urls`, use it to canonicalize the current URL. + /// * After forking: Check that this URL is the only URL in this fork. pub(crate) fn from_requirement( requirement: &Requirement, extra: Option, + flattened_requirements: &[&Requirement], + fork_urls: &ForkUrls, urls: &Urls, locals: &Locals, git: &GitResolver, ) -> Result { - match &requirement.source { + let (verbatim_url, parsed_url) = match &requirement.source { RequirementSource::Registry { specifier, .. } => { - // TODO(konsti): We're currently losing the index information here, but we need - // either pass it to `PubGrubPackage` or the `ResolverProvider` beforehand. - // If the specifier is an exact version, and the user requested a local version that's - // more precise than the specifier, use the local version instead. - let version = if let Some(expected) = locals.get(&requirement.name) { - specifier - .iter() - .map(|specifier| { - Locals::map(expected, specifier) - .map_err(ResolveError::InvalidVersion) - .and_then(|specifier| Ok(PubGrubSpecifier::try_from(&specifier)?)) - }) - .fold_ok(Range::full(), |range, specifier| { - range.intersection(&specifier.into()) - })? - } else { - PubGrubSpecifier::try_from(specifier)?.into() - }; - - Ok(Self { - package: PubGrubPackage::from_package( - requirement.name.clone(), - extra, - requirement.marker.clone(), - urls, - ), - version, - }) + return Self::from_registry_requirement( + specifier, + extra, + requirement, + flattened_requirements, + fork_urls, + urls, + locals, + git, + ); } RequirementSource::Url { subdirectory, location, url, } => { - let Some(expected) = urls.get(&requirement.name) else { - return Err(ResolveError::DisallowedUrl( - requirement.name.clone(), - url.to_string(), - )); - }; - let parsed_url = ParsedUrl::Archive(ParsedArchiveUrl::from_source( location.clone(), subdirectory.clone(), )); - if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) { - return Err(ResolveError::ConflictingUrlsTransitive( - requirement.name.clone(), - expected.verbatim.verbatim().to_string(), - url.verbatim().to_string(), - )); - } - - Ok(Self { - package: PubGrubPackage::from_url( - requirement.name.clone(), - extra, - requirement.marker.clone(), - expected.clone(), - ), - version: Range::full(), - }) + (url, parsed_url) } RequirementSource::Git { repository, @@ -176,71 +187,25 @@ impl PubGrubRequirement { url, subdirectory, } => { - let Some(expected) = urls.get(&requirement.name) else { - return Err(ResolveError::DisallowedUrl( - requirement.name.clone(), - url.to_string(), - )); - }; - let parsed_url = ParsedUrl::Git(ParsedGitUrl::from_source( repository.clone(), reference.clone(), *precise, subdirectory.clone(), )); - if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) { - return Err(ResolveError::ConflictingUrlsTransitive( - requirement.name.clone(), - expected.verbatim.verbatim().to_string(), - url.verbatim().to_string(), - )); - } - - Ok(Self { - package: PubGrubPackage::from_url( - requirement.name.clone(), - extra, - requirement.marker.clone(), - expected.clone(), - ), - version: Range::full(), - }) + (url, parsed_url) } RequirementSource::Path { url, install_path, lock_path, } => { - let Some(expected) = urls.get(&requirement.name) else { - return Err(ResolveError::DisallowedUrl( - requirement.name.clone(), - url.to_string(), - )); - }; - let parsed_url = ParsedUrl::Path(ParsedPathUrl::from_source( install_path.clone(), lock_path.clone(), url.to_url(), )); - if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) { - return Err(ResolveError::ConflictingUrlsTransitive( - requirement.name.clone(), - expected.verbatim.verbatim().to_string(), - url.verbatim().to_string(), - )); - } - - Ok(Self { - package: PubGrubPackage::from_url( - requirement.name.clone(), - extra, - requirement.marker.clone(), - expected.clone(), - ), - version: Range::full(), - }) + (url, parsed_url) } RequirementSource::Directory { editable, @@ -248,37 +213,115 @@ impl PubGrubRequirement { install_path, lock_path, } => { - let Some(expected) = urls.get(&requirement.name) else { - return Err(ResolveError::DisallowedUrl( - requirement.name.clone(), - url.to_string(), - )); - }; - let parsed_url = ParsedUrl::Directory(ParsedDirectoryUrl::from_source( install_path.clone(), lock_path.clone(), *editable, url.to_url(), )); - if !Urls::same_resource(&expected.parsed_url, &parsed_url, git) { - return Err(ResolveError::ConflictingUrlsTransitive( - requirement.name.clone(), - expected.verbatim.verbatim().to_string(), - url.verbatim().to_string(), - )); - } + (url, parsed_url) + } + }; - Ok(Self { - package: PubGrubPackage::from_url( - requirement.name.clone(), - extra, - requirement.marker.clone(), - expected.clone(), - ), - version: Range::full(), + // If we have a URL override, apply it unconditionally. Otherwise, check that the URL is + // allowed and pick it's canonical from. We will check for conflicts (different URLs in + // the same fork) after forking using the `fork_urls` map. + let url = if let Some(override_url) = urls.get_override(&requirement.name) { + override_url + } else { + urls.canonicalize_allowed_url(&requirement.name, git, verbatim_url, &parsed_url)? + }; + Ok(Self { + package: PubGrubPackage::from_url( + requirement.name.clone(), + extra, + requirement.marker.clone(), + url.clone(), + ), + version: Range::full(), + }) + } + + #[allow(clippy::too_many_arguments)] + fn from_registry_requirement( + specifier: &VersionSpecifiers, + extra: Option, + requirement: &Requirement, + flattened_requirements: &[&Requirement], + fork_urls: &ForkUrls, + urls: &Urls, + locals: &Locals, + git: &GitResolver, + ) -> Result { + // If the specifier is an exact version, and the user requested a local version that's + // more precise than the specifier, use the local version instead. + let version = if let Some(expected) = locals.get(&requirement.name) { + specifier + .iter() + .map(|specifier| { + Locals::map(expected, specifier) + .map_err(ResolveError::InvalidVersion) + .and_then(|specifier| Ok(PubGrubSpecifier::try_from(&specifier)?)) }) - } + .fold_ok(Range::full(), |range, specifier| { + range.intersection(&specifier.into()) + })? + } else { + PubGrubSpecifier::try_from(specifier)?.into() + }; + + // See docstring, registry case. + let url = if let Some(override_url) = urls.get_override(&requirement.name) { + // The override URL is the canonical form. + Some(override_url.clone()) + } else if let Some(fork_url) = fork_urls.get(&requirement.name) { + // Fork URLs are already canonicalized. + Some(fork_url.clone()) + } else if let Some(url) = flattened_requirements + .iter() + // Filter for other requirements with the same name ... + .filter(|other| other.name == requirement.name && other != &&requirement) + // ... that will not be in a different fork by having disjoint markers. + .filter( + |other| match (requirement.marker.as_ref(), other.marker.as_ref()) { + (Some(requirement_marker), Some(other_marker)) => { + !is_disjoint(requirement_marker, other_marker) + } + // Either having a universal marker means they in every fork. + _ => true, + }, + ) + .find_map(|other| other.source.to_verbatim_parsed_url()) + { + Some( + urls.canonicalize_allowed_url( + &requirement.name, + git, + &url.verbatim, + &url.parsed_url, + )? + .clone(), + ) + } else { + None + }; + + if url.is_none() && urls.get_regular(&requirement.name).is_some() { + debug!( + "Fork uses registry instead of URL distribution for {}", + requirement.name + ); } + + let requirement = Self { + package: PubGrubPackage::from_package( + requirement.name.clone(), + extra, + requirement.marker.clone(), + url, + ), + version, + }; + Ok(requirement) } } diff --git a/crates/uv-resolver/src/pubgrub/package.rs b/crates/uv-resolver/src/pubgrub/package.rs index afbc3a908dea0..f3c78a0abeb26 100644 --- a/crates/uv-resolver/src/pubgrub/package.rs +++ b/crates/uv-resolver/src/pubgrub/package.rs @@ -6,8 +6,6 @@ use pep508_rs::MarkerTree; use pypi_types::VerbatimParsedUrl; use uv_normalize::{ExtraName, GroupName, PackageName}; -use crate::resolver::Urls; - /// [`Arc`] wrapper around [`PubGrubPackageInner`] to make cloning (inside PubGrub) cheap. #[derive(Debug, Clone, Eq, Hash, PartialEq, PartialOrd, Ord)] pub struct PubGrubPackage(Arc); @@ -131,14 +129,13 @@ pub enum PubGrubPackageInner { } impl PubGrubPackage { - /// Create a [`PubGrubPackage`] from a package name and version. + /// Create a [`PubGrubPackage`] from a package name and, optionally, a URL. pub(crate) fn from_package( name: PackageName, extra: Option, mut marker: Option, - urls: &Urls, + url: Option, ) -> Self { - let url = urls.get(&name).cloned(); // Remove all extra expressions from the marker, since we track extras // separately. This also avoids an issue where packages added via // extras end up having two distinct marker expressions, which in turn diff --git a/crates/uv-resolver/src/pubgrub/priority.rs b/crates/uv-resolver/src/pubgrub/priority.rs index e293e80114730..fd82d81d130a3 100644 --- a/crates/uv-resolver/src/pubgrub/priority.rs +++ b/crates/uv-resolver/src/pubgrub/priority.rs @@ -140,6 +140,10 @@ pub(crate) enum PubGrubPriority { Singleton(Reverse), /// The package was specified via a direct URL. + /// + /// N.B.: URLs need to have priority over registry distributions for correctly matching registry + /// distributions to URLs, see [`PubGrubPackage::from_package`] an + /// [`crate::fork_urls::ForkUrls`]. DirectUrl(Reverse), /// The package is the root package. diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index d2bd5ece30b78..da72951922673 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -40,6 +40,7 @@ use uv_types::{BuildContext, HashStrategy, InstalledPackagesProvider}; use crate::candidate_selector::{CandidateDist, CandidateSelector}; use crate::dependency_provider::UvDependencyProvider; use crate::error::ResolveError; +use crate::fork_urls::ForkUrls; use crate::manifest::Manifest; use crate::pins::FilePins; use crate::preferences::Preferences; @@ -79,7 +80,7 @@ pub struct Resolver { project: Option, requirements: Vec, @@ -324,6 +325,7 @@ impl ResolverState ResolverState ResolverState, ) -> Result { - let result = self.get_dependencies(package, version, markers, priorities, request_sink); + let result = self.get_dependencies( + package, + version, + fork_urls, + markers, + priorities, + request_sink, + ); if self.markers.is_some() { return result.map(|deps| match deps { Dependencies::Available(deps) => ForkedDependencies::Unforked(deps), @@ -930,6 +941,7 @@ impl ResolverState, @@ -949,6 +961,7 @@ impl ResolverState ResolverState ResolverState)>, prefetcher: &BatchPrefetcher, ) -> Result<(), ResolveError> { + // Check for self-dependencies. if dependencies .iter() .any(|(dependency, _)| dependency == &self.next) @@ -1680,6 +1702,18 @@ impl SolveState { } .into()); } + // Check for conflicting URLs. + for (package, _version) in &dependencies { + if let PubGrubPackageInner::Package { + name, + url: Some(url), + .. + } = &**package + { + self.urls.insert(name, url, &self.markers)?; + }; + } + self.pubgrub.add_package_version_dependencies( self.next.clone(), version.clone(), diff --git a/crates/uv-resolver/src/resolver/urls.rs b/crates/uv-resolver/src/resolver/urls.rs index eb787e60de8f6..2c05dbf9aae7d 100644 --- a/crates/uv-resolver/src/resolver/urls.rs +++ b/crates/uv-resolver/src/resolver/urls.rs @@ -1,22 +1,30 @@ use rustc_hash::FxHashMap; use same_file::is_same_file; +use std::iter; use tracing::debug; use cache_key::CanonicalUrl; use distribution_types::Verbatim; -use pep508_rs::MarkerEnvironment; -use pypi_types::{ - ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, - RequirementSource, VerbatimParsedUrl, -}; +use pep508_rs::{MarkerEnvironment, VerbatimUrl}; +use pypi_types::{ParsedDirectoryUrl, ParsedUrl, VerbatimParsedUrl}; use uv_git::GitResolver; use uv_normalize::PackageName; use crate::{DependencyMode, Manifest, ResolveError}; -/// A map of package names to their associated, required URLs. +/// The URLs that are allowed for packages. +/// +/// These are the URLs used in the root package or by other URL dependencies (including path +/// dependencies). They take precedence over requirements by version (except for the special case +/// where we are in a fork that doesn't use any of the URL(s) used in other forks). Each fork may +/// only use a single URL. #[derive(Debug, Default)] -pub(crate) struct Urls(FxHashMap); +pub(crate) struct Urls { + /// URL requirements in the root package, from other URL requirements or from constraints. + regular: FxHashMap>, + /// URL requirements in overrides. + overrides: FxHashMap, +} impl Urls { pub(crate) fn from_manifest( @@ -25,155 +33,132 @@ impl Urls { git: &GitResolver, dependencies: DependencyMode, ) -> Result { - let mut urls: FxHashMap = FxHashMap::default(); + let mut urls: FxHashMap> = FxHashMap::default(); + let mut overrides: FxHashMap = FxHashMap::default(); // Add all direct requirements and constraints. If there are any conflicts, return an error. - for requirement in manifest.requirements(markers, dependencies) { - match &requirement.source { - RequirementSource::Registry { .. } => {} - RequirementSource::Url { - subdirectory, - location, - url, - } => { - let url = VerbatimParsedUrl { - parsed_url: ParsedUrl::Archive(ParsedArchiveUrl::from_source( - location.clone(), - subdirectory.clone(), - )), - verbatim: url.clone(), - }; - if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) { - if !Self::same_resource(&previous.parsed_url, &url.parsed_url, git) { - return Err(ResolveError::ConflictingUrlsDirect( - requirement.name.clone(), - previous.verbatim.verbatim().to_string(), - url.verbatim.verbatim().to_string(), - )); + for requirement in manifest.requirements_no_overrides(markers, dependencies) { + let Some(url) = requirement.source.to_verbatim_parsed_url() else { + // Registry requirement + continue; + }; + + let package_urls = urls.entry(requirement.name.clone()).or_default(); + if let Some(package_url) = package_urls + .iter_mut() + .find(|package_url| same_resource(&package_url.parsed_url, &url.parsed_url, git)) + { + // Allow editables to override non-editables. + let previous_editable = package_url.is_editable(); + *package_url = url; + if previous_editable { + if let VerbatimParsedUrl { + parsed_url: ParsedUrl::Directory(ParsedDirectoryUrl { editable, .. }), + verbatim: _, + } = package_url + { + if !*editable { + debug!("Allowing an editable variant of {}", &package_url.verbatim); + *editable = true; } - } - } - RequirementSource::Path { - install_path, - lock_path, - url, - } => { - let url = VerbatimParsedUrl { - parsed_url: ParsedUrl::Path(ParsedPathUrl::from_source( - install_path.clone(), - lock_path.clone(), - url.to_url(), - )), - verbatim: url.clone(), }; - if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) { - if !Self::same_resource(&previous.parsed_url, &url.parsed_url, git) { - return Err(ResolveError::ConflictingUrlsDirect( - requirement.name.clone(), - previous.verbatim.verbatim().to_string(), - url.verbatim.verbatim().to_string(), - )); - } - } } - RequirementSource::Directory { - install_path, - lock_path, - editable, - url, - } => { - let url = VerbatimParsedUrl { - parsed_url: ParsedUrl::Directory(ParsedDirectoryUrl::from_source( - install_path.clone(), - lock_path.clone(), - *editable, - url.to_url(), - )), - verbatim: url.clone(), - }; - match urls.entry(requirement.name.clone()) { - std::collections::hash_map::Entry::Occupied(mut entry) => { - let previous = entry.get(); - if Self::same_resource(&previous.parsed_url, &url.parsed_url, git) { - // Allow editables to override non-editables. - if previous.parsed_url.is_editable() && !editable { - debug!( - "Allowing {} as an editable variant of {}", - &previous.verbatim, url.verbatim - ); - } else { - entry.insert(url.clone()); - } - continue; - } + } else { + package_urls.push(url); + } + } - return Err(ResolveError::ConflictingUrlsDirect( - requirement.name.clone(), - previous.verbatim.verbatim().to_string(), - url.verbatim.verbatim().to_string(), - )); - } - std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(url.clone()); - } - } - } - RequirementSource::Git { - repository, - reference, - precise, - subdirectory, - url, - } => { - let url = VerbatimParsedUrl { - parsed_url: ParsedUrl::Git(ParsedGitUrl::from_source( - repository.clone(), - reference.clone(), - *precise, - subdirectory.clone(), - )), - verbatim: url.clone(), - }; - if let Some(previous) = urls.insert(requirement.name.clone(), url.clone()) { - if !Self::same_resource(&previous.parsed_url, &url.parsed_url, git) { - return Err(ResolveError::ConflictingUrlsDirect( - requirement.name.clone(), - previous.verbatim.verbatim().to_string(), - url.verbatim.verbatim().to_string(), - )); - } - } + for requirement in manifest.overrides(markers, dependencies) { + let Some(url) = requirement.source.to_verbatim_parsed_url() else { + // Registry requirement + continue; + }; + // With an override `anyio==0.0.0` and a requirements.txt entry `./anyio`, we still use + // the URL. See `allow_recursive_url_local_path_override_constraint` + urls.remove(&requirement.name); + let previous = overrides.insert(requirement.name.clone(), url.clone()); + if let Some(previous) = previous { + if !same_resource(&previous.parsed_url, &url.parsed_url, git) { + return Err(ResolveError::ConflictingOverrideUrls( + requirement.name.clone(), + previous.verbatim.verbatim().to_string(), + url.verbatim.verbatim().to_string(), + )); } } } - Ok(Self(urls)) + Ok(Self { + regular: urls, + overrides, + }) } - /// Return the [`VerbatimUrl`] associated with the given package name, if any. - pub(crate) fn get(&self, package: &PackageName) -> Option<&VerbatimParsedUrl> { - self.0.get(package) + /// Return the allowed [`VerbatimUrl`]s for given package from regular requirements and + /// constraints (but not overrides), if any. + /// + /// It's more than one more URL if they are in different forks (or conflict after forking). + pub(crate) fn get_regular(&self, package: &PackageName) -> Option<&[VerbatimParsedUrl]> { + self.regular.get(package).map(Vec::as_slice) } - /// Returns `true` if the [`ParsedUrl`] instances point to the same resource. - pub(crate) fn same_resource(a: &ParsedUrl, b: &ParsedUrl, git: &GitResolver) -> bool { - match (a, b) { - (ParsedUrl::Archive(a), ParsedUrl::Archive(b)) => { - a.subdirectory == b.subdirectory - && CanonicalUrl::new(&a.url) == CanonicalUrl::new(&b.url) - } - (ParsedUrl::Git(a), ParsedUrl::Git(b)) => { - a.subdirectory == b.subdirectory && git.same_ref(&a.url, &b.url) - } - (ParsedUrl::Path(a), ParsedUrl::Path(b)) => { - a.install_path == b.install_path - || is_same_file(&a.install_path, &b.install_path).unwrap_or(false) - } - (ParsedUrl::Directory(a), ParsedUrl::Directory(b)) => { - a.install_path == b.install_path - || is_same_file(&a.install_path, &b.install_path).unwrap_or(false) - } - _ => false, + /// Return the [`VerbatimUrl`] override for the given package, if any. + pub(crate) fn get_override(&self, package: &PackageName) -> Option<&VerbatimParsedUrl> { + self.overrides.get(package) + } + + /// Check if a URL is allowed (known), and if so, return its canonical form. + pub(crate) fn canonicalize_allowed_url<'a>( + &'a self, + package_name: &'a PackageName, + git: &'a GitResolver, + verbatim_url: &'a VerbatimUrl, + parsed_url: &'a ParsedUrl, + ) -> Result<&'a VerbatimParsedUrl, ResolveError> { + let Some(expected) = self.get_regular(package_name) else { + return Err(ResolveError::DisallowedUrl( + package_name.clone(), + verbatim_url.to_string(), + )); + }; + + let matching_urls: Vec<_> = expected + .iter() + .filter(|requirement| same_resource(&requirement.parsed_url, parsed_url, git)) + .collect(); + + let [allowed_url] = matching_urls.as_slice() else { + return Err(ResolveError::ConflictingUrls( + package_name.clone(), + matching_urls + .into_iter() + .map(|parsed_url| parsed_url.verbatim.verbatim().to_string()) + .chain(iter::once(verbatim_url.verbatim().to_string())) + .collect(), + )); + }; + Ok(*allowed_url) + } +} + +/// Returns `true` if the [`ParsedUrl`] instances point to the same resource. +fn same_resource(a: &ParsedUrl, b: &ParsedUrl, git: &GitResolver) -> bool { + match (a, b) { + (ParsedUrl::Archive(a), ParsedUrl::Archive(b)) => { + a.subdirectory == b.subdirectory + && CanonicalUrl::new(&a.url) == CanonicalUrl::new(&b.url) + } + (ParsedUrl::Git(a), ParsedUrl::Git(b)) => { + a.subdirectory == b.subdirectory && git.same_ref(&a.url, &b.url) + } + (ParsedUrl::Path(a), ParsedUrl::Path(b)) => { + a.install_path == b.install_path + || is_same_file(&a.install_path, &b.install_path).unwrap_or(false) + } + (ParsedUrl::Directory(a), ParsedUrl::Directory(b)) => { + a.install_path == b.install_path + || is_same_file(&a.install_path, &b.install_path).unwrap_or(false) } + _ => false, } } diff --git a/crates/uv/tests/pip_compile.rs b/crates/uv/tests/pip_compile.rs index 36d62fd9006be..e08cfc49de10e 100644 --- a/crates/uv/tests/pip_compile.rs +++ b/crates/uv/tests/pip_compile.rs @@ -8265,6 +8265,8 @@ requires-python = ">3.8" /// Allow URL dependencies recursively for local source trees, but respect both overrides _and_ /// constraints. +/// +/// We have app -> lib -> anyio and root has a directory requirement on app. #[test] fn allow_recursive_url_local_path_override_constraint() -> Result<()> { let context = TestContext::new("3.12");