diff --git a/news/12459.feature.rst b/news/12459.feature.rst new file mode 100644 index 00000000000..c82e9a35350 --- /dev/null +++ b/news/12459.feature.rst @@ -0,0 +1 @@ +When there are complex conflicting requirements use much faster backtracking choices diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py index fb0dd85f112..d8824bf6930 100644 --- a/src/pip/_internal/resolution/resolvelib/provider.py +++ b/src/pip/_internal/resolution/resolvelib/provider.py @@ -6,8 +6,10 @@ Dict, Iterable, Iterator, + List, Mapping, Sequence, + Set, TypeVar, Union, ) @@ -76,6 +78,133 @@ def _get_with_identifier( return default +def _extract_names_from_causes_and_parents( + causes: Iterable["PreferenceInformation"], +) -> Set[str]: + """ + Utility function to extract names from the causes and their parent packages + + :params causes: An iterable of PreferenceInformation + + Returns a set of strings, each representing the name of a requirement or + its parent package that was in causes + """ + causes_names = set() + for cause in causes: + causes_names.add(cause.requirement.name) + if cause.parent: + causes_names.add(cause.parent.name) + + return causes_names + + +def _causes_with_conflicting_parent( + causes: Iterable["PreferenceInformation"], +) -> List["PreferenceInformation"]: + """ + Identifies causes that conflict because their parent package requirements + are not satisfied by another cause, or vice versa. + + :params causes: An iterable sequence of PreferenceInformation + + Returns a list of PreferenceInformation objects that represent the causes + where their parent conflicts + """ + # Avoid duplication by keeping track of already identified conflicting + # causes by their id + conflicting_causes_by_id: dict[int, "PreferenceInformation"] = {} + all_causes_by_id = {id(c): c for c in causes} + + # Map cause IDs and parent packages by parent name for quick lookup + causes_ids_and_parents_by_parent_name: dict[ + str, list[tuple[int, Candidate]] + ] = collections.defaultdict(list) + for cause_id, cause in all_causes_by_id.items(): + if cause.parent: + causes_ids_and_parents_by_parent_name[cause.parent.name].append( + (cause_id, cause.parent) + ) + + # Identify a cause's requirement conflicts with another cause's parent + for cause_id, cause in all_causes_by_id.items(): + if cause_id in conflicting_causes_by_id: + continue + + cause_id_and_parents = causes_ids_and_parents_by_parent_name.get( + cause.requirement.name + ) + if not cause_id_and_parents: + continue + + for other_cause_id, parent in cause_id_and_parents: + if not cause.requirement.is_satisfied_by(parent): + conflicting_causes_by_id[cause_id] = cause + conflicting_causes_by_id[other_cause_id] = all_causes_by_id[ + other_cause_id + ] + + return list(conflicting_causes_by_id.values()) + + +def _first_causes_with_no_candidates( + causes: Sequence["PreferenceInformation"], + candidates: Mapping[str, Iterator[Candidate]], +) -> List["PreferenceInformation"]: + """ + Checks for causes that have no possible candidates to satisfy their + requirements. Returns first causes found as iterating candidates can + be expensive due to downloading and building packages. + + :params causes: A sequence of PreferenceInformation + :params candidates: A mapping of package names to iterators of their candidates + + Returns a list containing the first pair of PreferenceInformation objects + that were found which had no satisfying candidates, else if all causes + had at least some satisfying candidate an empty list is returned. + """ + # Group causes by package name to reduce the comparison complexity. + causes_by_name: dict[str, list["PreferenceInformation"]] = collections.defaultdict( + list + ) + for cause in causes: + causes_by_name[cause.requirement.project_name].append(cause) + + # Check for cause pairs within the same package that have incompatible specifiers. + for cause_name, causes_list in causes_by_name.items(): + if len(causes_list) < 2: + continue + + while causes_list: + cause = causes_list.pop() + candidate = cause.requirement.get_candidate_lookup()[1] + if candidate is None: + continue + + for other_cause in causes_list: + other_candidate = other_cause.requirement.get_candidate_lookup()[1] + if other_candidate is None: + continue + + # Check if no candidate can match the combined specifier + combined_specifier = candidate.specifier & other_candidate.specifier + possible_candidates = candidates.get(cause_name) + + # If no candidates have been provided then by default the + # causes have no candidates + if possible_candidates is None: + return [cause, other_cause] + + # Use any and contains version here instead of filter so + # if a version is found that matches it will short curcuit + # iterating through possible candidates + if not any( + combined_specifier.contains(c.version) for c in possible_candidates + ): + return [cause, other_cause] + + return [] + + class PipProvider(_ProviderBase): """Pip's provider implementation for resolvelib. @@ -256,3 +385,58 @@ def is_backtrack_cause( if backtrack_cause.parent and identifier == backtrack_cause.parent.name: return True return False + + def narrow_requirement_selection( + self, + identifiers: Iterable[str], + resolutions: Mapping[str, Candidate], + candidates: Mapping[str, Iterator[Candidate]], + information: Mapping[str, Iterable["PreferenceInformation"]], + backtrack_causes: Sequence["PreferenceInformation"], + ) -> Iterable[str]: + """ + Narrows down the selection of requirements to consider for the next + resolution step. + + This method uses principles of conflict-driven clause learning (CDCL) + to focus on the closest conflicts first. + + :params identifiers: Iterable of requirement names currently under + consideration. + :params resolutions: Current mapping of resolved package identifiers + to their selected candidates. + :params candidates: Mapping of each package's possible candidates. + :params information: Mapping of requirement information for each package. + :params backtrack_causes: Sequence of requirements, if non-empty, + were the cause of the resolver backtracking + + Returns: + An iterable of requirement names that the resolver will use to + limit the next step of resolution + """ + + # If there are 2 or less causes then finding conflicts between + # them is not required as there will always be a minumum of two + # conflicts + if len(backtrack_causes) < 3: + return identifiers + + # First, try to resolve direct causes based on conflicting parent packages + direct_causes = _causes_with_conflicting_parent(backtrack_causes) + if not direct_causes: + # If no conflicting parent packages found try to find some causes + # that share the same requirement name but no common candidate, + # we take the first one of these as iterating through candidates + # is potentially expensive + direct_causes = _first_causes_with_no_candidates( + backtrack_causes, candidates + ) + if direct_causes: + backtrack_causes = direct_causes + + # Filter identifiers based on the narrowed down causes. + unsatisfied_causes_names = set( + identifiers + ) & _extract_names_from_causes_and_parents(backtrack_causes) + + return unsatisfied_causes_names if unsatisfied_causes_names else identifiers diff --git a/src/pip/_vendor/resolvelib/providers.py b/src/pip/_vendor/resolvelib/providers.py index e99d87ee75f..b04cf31a339 100644 --- a/src/pip/_vendor/resolvelib/providers.py +++ b/src/pip/_vendor/resolvelib/providers.py @@ -131,3 +131,55 @@ def resolve(self, requirements, **kwargs): :raises: ``self.base_exception`` or its subclass. """ raise NotImplementedError + + def narrow_requirement_selection( + self, identifiers, resolutions, candidates, information, backtrack_causes + ): + """ + Narrows the selection of requirements being considered during + resolution. + + The requirement selection is defined as "The possible requirements + that will be resolved next." If a requirement isn't part of the returned + iterable, it will not be considered during the next step of resolution. + + :param identifiers: An iterable of `identifier` as returned by + ``identify()``. These identify all requirements currently being + considered. + :param resolutions: Mapping of candidates currently pinned by the + resolver. Each key is an identifier, and the value is a candidate. + The candidate may conflict with requirements from ``information``. + :param candidates: Mapping of each dependency's possible candidates. + Each value is an iterator of candidates. + :param information: Mapping of requirement information of each package. + Each value is an iterator of *requirement information*. + :param backtrack_causes: Sequence of *requirement information* that are + the requirements that caused the resolver to most recently + backtrack. + + A *requirement information* instance is a named tuple with two members: + + * ``requirement`` specifies a requirement contributing to the current + list of candidates. + * ``parent`` specifies the candidate that provides (depended on) the + requirement, or ``None`` to indicate a root requirement. + + Must return a non-empty subset of `identifiers`, with the simplest + implementation being to return `identifiers` unchanged. + + Can be used by the provider to optimize the dependency resolution + process. `get_preference` will only be called on the identifiers + returned. If there is only one identifier returned, then `get_preference` + won't be called at all. + + Serving a similar purpose as `get_preference`, this method allows the + provider to guide resolvelib through the resolution process. It should + be used instead of `get_preference` when the provider needs to consider + multiple identifiers simultaneously, or when the provider wants to skip + checking all identifiers, e.g., because the checks are prohibitively + expensive. + + Returns: + Iterable[KT]: A non-empty subset of `identifiers`. + """ + return identifiers diff --git a/src/pip/_vendor/resolvelib/resolvers.py b/src/pip/_vendor/resolvelib/resolvers.py index 2c3d0e306f9..2e10f040f72 100644 --- a/src/pip/_vendor/resolvelib/resolvers.py +++ b/src/pip/_vendor/resolvelib/resolvers.py @@ -378,6 +378,12 @@ def _patch_criteria(): # No way to backtrack anymore. return False + def _extract_causes(self, criteron): + """Extract causes from list of criterion and deduplicate""" + return list( + {id(i): i for c in criteron for i in c.information}.values() + ) + def resolve(self, requirements, max_rounds): if self._states: raise RuntimeError("already resolved") @@ -422,12 +428,38 @@ def resolve(self, requirements, max_rounds): unsatisfied_names ) - # Choose the most preferred unpinned criterion to try. - name = min(unsatisfied_names, key=self._get_preference) - failure_causes = self._attempt_to_pin_criterion(name) + if len(unsatisfied_names) > 1: + narrowed_unstatisfied_names = list( + self._p.narrow_requirement_selection( + identifiers=unsatisfied_names, + resolutions=self.state.mapping, + candidates=IteratorMapping( + self.state.criteria, + operator.attrgetter("candidates"), + ), + information=IteratorMapping( + self.state.criteria, + operator.attrgetter("information"), + ), + backtrack_causes=self.state.backtrack_causes, + ) + ) + else: + narrowed_unstatisfied_names = unsatisfied_names + + # If there is only 1 unsatisfied name skip calling self._get_preference + if len(narrowed_unstatisfied_names) > 1: + # Choose the most preferred unpinned criterion to try. + name = min( + narrowed_unstatisfied_names, key=self._get_preference + ) + else: + name = narrowed_unstatisfied_names[0] + + failure_criterion = self._attempt_to_pin_criterion(name) - if failure_causes: - causes = [i for c in failure_causes for i in c.information] + if failure_criterion: + causes = self._extract_causes(failure_criterion) # Backjump if pinning fails. The backjump process puts us in # an unpinned state, so we can work on it in the next round. self._r.resolving_conflicts(causes=causes)