pypa · notatallshaw · Feb 10, 2024 · Feb 11, 2024 · Feb 11, 2024 · pfmoore
diff --git a/news/12459.feature.rst b/news/12459.feature.rst
@@ -0,0 +1 @@
+When there are complex conflicting requirements use much faster backtracking choices
diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py
@@ -6,8 +6,10 @@
     Dict,
     Iterable,
     Iterator,
+    List,
     Mapping,
     Sequence,
+    Set,
     TypeVar,
     Union,
 )
@@ -76,6 +78,133 @@ def _get_with_identifier(
     return default
 
 
+def _extract_names_from_causes_and_parents(
+    causes: Iterable["PreferenceInformation"],
+) -> Set[str]:
+    """
+    Utility function to extract names from the causes and their parent packages
+
+    :params causes: An iterable of PreferenceInformation
+
+    Returns a set of strings, each representing the name of a requirement or
-    Returns a set of strings, each representing the name of a requirement or
+    Returns a set of strings, representing the name of a requirement and
-    Returns a set of strings, each representing the name of a requirement or
+    Returns a set of strings, representing the name of a requirement and
+    its parent package that was in causes
+    """
+    causes_names = set()
+    for cause in causes:
+        causes_names.add(cause.requirement.name)
+        if cause.parent:
+            causes_names.add(cause.parent.name)
+
+    return causes_names
+
+
+def _causes_with_conflicting_parent(
+    causes: Iterable["PreferenceInformation"],
+) -> List["PreferenceInformation"]:
+    """
+    Identifies causes that conflict because their parent package requirements
+    are not satisfied by another cause, or vice versa.
+
+    :params causes: An iterable  sequence of PreferenceInformation
+
+    Returns a list of PreferenceInformation objects that represent the causes
+    where their parent conflicts
+    """
+    # Avoid duplication by keeping track of already identified conflicting
+    # causes by their id
+    conflicting_causes_by_id: dict[int, "PreferenceInformation"] = {}
+    all_causes_by_id = {id(c): c for c in causes}
+
+    # Map cause IDs and parent packages by parent name for quick lookup
+    causes_ids_and_parents_by_parent_name: dict[
+        str, list[tuple[int, Candidate]]
+    ] = collections.defaultdict(list)
+    for cause_id, cause in all_causes_by_id.items():
+        if cause.parent:
+            causes_ids_and_parents_by_parent_name[cause.parent.name].append(
+                (cause_id, cause.parent)
+            )
+
+    # Identify a cause's requirement conflicts with another cause's parent
+    for cause_id, cause in all_causes_by_id.items():
+        if cause_id in conflicting_causes_by_id:
+            continue
+
+        cause_id_and_parents = causes_ids_and_parents_by_parent_name.get(
+            cause.requirement.name
+        )
+        if not cause_id_and_parents:
+            continue
+
+        for other_cause_id, parent in cause_id_and_parents:
+            if not cause.requirement.is_satisfied_by(parent):
+                conflicting_causes_by_id[cause_id] = cause
+                conflicting_causes_by_id[other_cause_id] = all_causes_by_id[
+                    other_cause_id
+                ]
+
+    return list(conflicting_causes_by_id.values())
+
+
+def _first_causes_with_no_candidates(
+    causes: Sequence["PreferenceInformation"],
+    candidates: Mapping[str, Iterator[Candidate]],
+) -> List["PreferenceInformation"]:
+    """
+    Checks for causes that have no possible candidates to satisfy their
+    requirements. Returns first causes found as iterating candidates can
+    be expensive due to downloading and building packages.
+
+    :params causes: A sequence of PreferenceInformation
+    :params candidates: A mapping of package names to iterators of their candidates
+
+    Returns a list containing the first pair of PreferenceInformation objects
+    that were found which had no satisfying candidates, else if all causes
+    had at least some satisfying candidate an empty list is returned.
+    """
+    # Group causes by package name to reduce the comparison complexity.
+    causes_by_name: dict[str, list["PreferenceInformation"]] = collections.defaultdict(
+        list
+    )
+    for cause in causes:
+        causes_by_name[cause.requirement.project_name].append(cause)
+
+    # Check for cause pairs within the same package that have incompatible specifiers.
+    for cause_name, causes_list in causes_by_name.items():
+        if len(causes_list) < 2:
+            continue
+
+        while causes_list:
+            cause = causes_list.pop()
+            candidate = cause.requirement.get_candidate_lookup()[1]
+            if candidate is None:
+                continue
+
+            for other_cause in causes_list:
+                other_candidate = other_cause.requirement.get_candidate_lookup()[1]
+                if other_candidate is None:
+                    continue
+
+                # Check if no candidate can match the combined specifier
+                combined_specifier = candidate.specifier & other_candidate.specifier
+                possible_candidates = candidates.get(cause_name)
+
+                # If no candidates have been provided then by default the
+                # causes have no candidates
+                if possible_candidates is None:
+                    return [cause, other_cause]
+
+                # Use any and contains version here instead of filter so
+                # if a version is found that matches it will short curcuit
+                # iterating through possible candidates
+                if not any(
+                    combined_specifier.contains(c.version) for c in possible_candidates
+                ):
+                    return [cause, other_cause]
+
+    return []
+
+
 class PipProvider(_ProviderBase):
     """Pip's provider implementation for resolvelib.
 
@@ -256,3 +385,58 @@ def is_backtrack_cause(
             if backtrack_cause.parent and identifier == backtrack_cause.parent.name:
                 return True
         return False
+
+    def narrow_requirement_selection(
+        self,
+        identifiers: Iterable[str],
+        resolutions: Mapping[str, Candidate],
+        candidates: Mapping[str, Iterator[Candidate]],
+        information: Mapping[str, Iterable["PreferenceInformation"]],
+        backtrack_causes: Sequence["PreferenceInformation"],
+    ) -> Iterable[str]:
+        """
+        Narrows down the selection of requirements to consider for the next
+        resolution step.
+
+        This method uses principles of conflict-driven clause learning (CDCL)
+        to focus on the closest conflicts first.
+
+        :params identifiers: Iterable of requirement names currently under
+            consideration.
+        :params resolutions: Current mapping of resolved package identifiers
+            to their selected candidates.
+        :params candidates: Mapping of each package's possible candidates.
+        :params information: Mapping of requirement information for each package.
+        :params backtrack_causes: Sequence of requirements, if non-empty,
+            were the cause of the resolver backtracking
+
+        Returns:
+            An iterable of requirement names that the resolver will use to
+            limit the next step of resolution
+        """
+
+        # If there are 2 or less causes then finding conflicts between
+        # them is not required as there will always be a minumum of two
+        # conflicts
+        if len(backtrack_causes) < 3:
+            return identifiers
+
+        # First, try to resolve direct causes based on conflicting parent packages
-        # First, try to resolve direct causes based on conflicting parent packages
+        # If some of the causes have conflicting parents, focus on them.
-        # First, try to resolve direct causes based on conflicting parent packages
+        # If some of the causes have conflicting parents, focus on them.
+        direct_causes = _causes_with_conflicting_parent(backtrack_causes)
+        if not direct_causes:
+            # If no conflicting parent packages found try to find some causes
-            # If no conflicting parent packages found try to find some causes
+            # Otherwise, try to find some causes
-            # If no conflicting parent packages found try to find some causes
+            # Otherwise, try to find some causes
+            # that share the same requirement name but no common candidate,
-            # that share the same requirement name but no common candidate,
+            # that share the same requirement name but no common candidate.
-            # that share the same requirement name but no common candidate,
+            # that share the same requirement name but no common candidate.
+            # we take the first one of these as iterating through candidates
-            # we take the first one of these as iterating through candidates
+            # Stop after finding the first one of these as iterating through candidates
-            # we take the first one of these as iterating through candidates
+            # Stop after finding the first one of these as iterating through candidates
+            # is potentially expensive
+            direct_causes = _first_causes_with_no_candidates(
+                backtrack_causes, candidates
+            )
+        if direct_causes:
-        if direct_causes:
+        # If we found some causes worth focusing on, use them.
+        if direct_causes:
-        if direct_causes:
+        # If we found some causes worth focusing on, use them.
+        if direct_causes:
+            backtrack_causes = direct_causes
+
+        # Filter identifiers based on the narrowed down causes.
+        unsatisfied_causes_names = set(
+            identifiers
+        ) & _extract_names_from_causes_and_parents(backtrack_causes)
+
+        return unsatisfied_causes_names if unsatisfied_causes_names else identifiers
diff --git a/src/pip/_vendor/resolvelib/providers.py b/src/pip/_vendor/resolvelib/providers.py
@@ -131,3 +131,55 @@ def resolve(self, requirements, **kwargs):
         :raises: ``self.base_exception`` or its subclass.
         """
         raise NotImplementedError
+
+    def narrow_requirement_selection(
+        self, identifiers, resolutions, candidates, information, backtrack_causes
+    ):
+        """
+        Narrows the selection of requirements being considered during
+        resolution.
+
+        The requirement selection is defined as "The possible requirements
+        that will be resolved next." If a requirement isn't part of the returned
+        iterable, it will not be considered during the next step of resolution.
+
+        :param identifiers: An iterable of `identifier` as returned by
+            ``identify()``. These identify all requirements currently being
+            considered.
+        :param resolutions: Mapping of candidates currently pinned by the
+            resolver. Each key is an identifier, and the value is a candidate.
+            The candidate may conflict with requirements from ``information``.
+        :param candidates: Mapping of each dependency's possible candidates.
+            Each value is an iterator of candidates.
+        :param information: Mapping of requirement information of each package.
+            Each value is an iterator of *requirement information*.
+        :param backtrack_causes: Sequence of *requirement information* that are
+            the requirements that caused the resolver to most recently
+            backtrack.
+
+        A *requirement information* instance is a named tuple with two members:
+
+        * ``requirement`` specifies a requirement contributing to the current
+          list of candidates.
+        * ``parent`` specifies the candidate that provides (depended on) the
+          requirement, or ``None`` to indicate a root requirement.
+
+        Must return a non-empty subset of `identifiers`, with the simplest
+        implementation being to return `identifiers` unchanged.
+
+        Can be used by the provider to optimize the dependency resolution
+        process. `get_preference` will only be called on the identifiers
+        returned. If there is only one identifier returned, then `get_preference`
+        won't be called at all.
+
+        Serving a similar purpose as `get_preference`, this method allows the
+        provider to guide resolvelib through the resolution process. It should
+        be used instead of `get_preference` when the provider needs to consider
+        multiple identifiers simultaneously, or when the provider wants to skip
+        checking all identifiers, e.g., because the checks are prohibitively
+        expensive.
+
+        Returns:
+            Iterable[KT]: A non-empty subset of `identifiers`.
+        """
+        return identifiers
diff --git a/src/pip/_vendor/resolvelib/resolvers.py b/src/pip/_vendor/resolvelib/resolvers.py
@@ -378,6 +378,12 @@ def _patch_criteria():
         # No way to backtrack anymore.
         return False
 
+    def _extract_causes(self, criteron):
+        """Extract causes from list of criterion and deduplicate"""
+        return list(
+            {id(i): i for c in criteron for i in c.information}.values()
+        )
+
     def resolve(self, requirements, max_rounds):
         if self._states:
             raise RuntimeError("already resolved")
@@ -422,12 +428,38 @@ def resolve(self, requirements, max_rounds):
                 unsatisfied_names
             )
 
-            # Choose the most preferred unpinned criterion to try.
-            name = min(unsatisfied_names, key=self._get_preference)
-            failure_causes = self._attempt_to_pin_criterion(name)
+            if len(unsatisfied_names) > 1:
+                narrowed_unstatisfied_names = list(
+                    self._p.narrow_requirement_selection(
+                        identifiers=unsatisfied_names,
+                        resolutions=self.state.mapping,
+                        candidates=IteratorMapping(
+                            self.state.criteria,
+                            operator.attrgetter("candidates"),
+                        ),
+                        information=IteratorMapping(
+                            self.state.criteria,
+                            operator.attrgetter("information"),
+                        ),
+                        backtrack_causes=self.state.backtrack_causes,
+                    )
+                )
+            else:
+                narrowed_unstatisfied_names = unsatisfied_names
+
+            # If there is only 1 unsatisfied name skip calling self._get_preference
+            if len(narrowed_unstatisfied_names) > 1:
+                # Choose the most preferred unpinned criterion to try.
+                name = min(
+                    narrowed_unstatisfied_names, key=self._get_preference
+                )
+            else:
+                name = narrowed_unstatisfied_names[0]
+
+            failure_criterion = self._attempt_to_pin_criterion(name)
 
-            if failure_causes:
-                causes = [i for c in failure_causes for i in c.information]
+            if failure_criterion:
+                causes = self._extract_causes(failure_criterion)
                 # Backjump if pinning fails. The backjump process puts us in
                 # an unpinned state, so we can work on it in the next round.
                 self._r.resolving_conflicts(causes=causes)