From f5be9697529e51e2a3da6208efd55afa42b85a2a Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Thu, 18 May 2023 18:09:30 -0500 Subject: [PATCH 1/4] perf: don't clear the entire contradicted_incompatibilies cache when backtracking (#7950) --- src/poetry/mixology/version_solver.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/poetry/mixology/version_solver.py b/src/poetry/mixology/version_solver.py index cb08ec61fe0..19de00afccc 100644 --- a/src/poetry/mixology/version_solver.py +++ b/src/poetry/mixology/version_solver.py @@ -1,5 +1,6 @@ from __future__ import annotations +import collections import functools import time @@ -94,7 +95,9 @@ def __init__(self, root: ProjectPackage, provider: Provider) -> None: self._provider = provider self._dependency_cache = DependencyCache(provider) self._incompatibilities: dict[str, list[Incompatibility]] = {} - self._contradicted_incompatibilities: set[Incompatibility] = set() + self._contradicted_incompatibilities: dict[int, set[Incompatibility]] = ( + collections.defaultdict(set) + ) self._solution = PartialSolution() @property @@ -143,7 +146,10 @@ def _propagate(self, package: str) -> None: # we can derive stronger assignments sooner and more eagerly find # conflicts. for incompatibility in reversed(self._incompatibilities[package]): - if incompatibility in self._contradicted_incompatibilities: + if any( + incompatibility in c + for c in self._contradicted_incompatibilities.values() + ): continue result = self._propagate_incompatibility(incompatibility) @@ -192,7 +198,9 @@ def _propagate_incompatibility( # If term is already contradicted by _solution, then # incompatibility is contradicted as well and there's nothing new we # can deduce from it. - self._contradicted_incompatibilities.add(incompatibility) + self._contradicted_incompatibilities[self._solution.decision_level].add( + incompatibility + ) return None elif relation == SetRelation.OVERLAPPING: # If more than one term is inconclusive, we can't deduce anything about @@ -210,7 +218,9 @@ def _propagate_incompatibility( if unsatisfied is None: return _conflict - self._contradicted_incompatibilities.add(incompatibility) + self._contradicted_incompatibilities[self._solution.decision_level].add( + incompatibility + ) adverb = "not " if unsatisfied.is_positive() else "" self._log(f"derived: {adverb}{unsatisfied.dependency}") @@ -304,8 +314,12 @@ def _resolve_conflict(self, incompatibility: Incompatibility) -> Incompatibility previous_satisfier_level < most_recent_satisfier.decision_level or most_recent_satisfier.cause is None ): + for level in range( + self._solution.decision_level, previous_satisfier_level, -1 + ): + self._contradicted_incompatibilities.pop(level, None) + self._solution.backtrack(previous_satisfier_level) - self._contradicted_incompatibilities.clear() self._dependency_cache.clear() if new_incompatibility: self._add_incompatibility(incompatibility) From 5eaabbd31baf6461a9814bbe75465dcbe1662cc1 Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Thu, 18 May 2023 19:54:55 -0500 Subject: [PATCH 2/4] perf: don't clear the entire dependency cache when backtracking (#7950) --- src/poetry/mixology/version_solver.py | 56 ++++++++++++------- .../version_solver/test_dependency_cache.py | 55 +++++++++++++++--- 2 files changed, 83 insertions(+), 28 deletions(-) diff --git a/src/poetry/mixology/version_solver.py b/src/poetry/mixology/version_solver.py index 19de00afccc..81dde5d3369 100644 --- a/src/poetry/mixology/version_solver.py +++ b/src/poetry/mixology/version_solver.py @@ -39,15 +39,20 @@ class DependencyCache: """ def __init__(self, provider: Provider) -> None: - self.provider = provider - self.cache: dict[ - tuple[str, str | None, str | None, str | None, str | None], - list[DependencyPackage], - ] = {} + self._provider = provider + self._cache: dict[ + int, + dict[ + tuple[str, str | None, str | None, str | None, str | None], + list[DependencyPackage], + ], + ] = collections.defaultdict(dict) self.search_for = functools.lru_cache(maxsize=128)(self._search_for) - def _search_for(self, dependency: Dependency) -> list[DependencyPackage]: + def _search_for( + self, dependency: Dependency, level: int + ) -> list[DependencyPackage]: key = ( dependency.complete_name, dependency.source_type, @@ -56,12 +61,17 @@ def _search_for(self, dependency: Dependency) -> list[DependencyPackage]: dependency.source_subdirectory, ) - packages = self.cache.get(key) - - if packages: - packages = [ - p for p in packages if dependency.constraint.allows(p.package.version) - ] + for check_level in range(level, -1, -1): + packages = self._cache[check_level].get(key) + if packages is not None: + packages = [ + p + for p in packages + if dependency.constraint.allows(p.package.version) + ] + break + else: + packages = None # provider.search_for() normally does not include pre-release packages # (unless requested), but will include them if there are no other @@ -71,14 +81,14 @@ def _search_for(self, dependency: Dependency) -> list[DependencyPackage]: # nothing, we need to call provider.search_for() again as it may return # additional results this time. if not packages: - packages = self.provider.search_for(dependency) - - self.cache[key] = packages + packages = self._provider.search_for(dependency) + self._cache[level][key] = packages return packages - def clear(self) -> None: - self.cache.clear() + def clear_level(self, level: int) -> None: + self.search_for.cache_clear() + self._cache.pop(level, None) class VersionSolver: @@ -318,9 +328,9 @@ def _resolve_conflict(self, incompatibility: Incompatibility) -> Incompatibility self._solution.decision_level, previous_satisfier_level, -1 ): self._contradicted_incompatibilities.pop(level, None) + self._dependency_cache.clear_level(level) self._solution.backtrack(previous_satisfier_level) - self._dependency_cache.clear() if new_incompatibility: self._add_incompatibility(incompatibility) @@ -418,7 +428,11 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]: if locked: return is_specific_marker, Preference.LOCKED, 1 - num_packages = len(self._dependency_cache.search_for(dependency)) + num_packages = len( + self._dependency_cache.search_for( + dependency, self._solution.decision_level + ) + ) if num_packages < 2: preference = Preference.NO_CHOICE @@ -435,7 +449,9 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]: locked = self._provider.get_locked(dependency) if locked is None: - packages = self._dependency_cache.search_for(dependency) + packages = self._dependency_cache.search_for( + dependency, self._solution.decision_level + ) package = next(iter(packages), None) if package is None: diff --git a/tests/mixology/version_solver/test_dependency_cache.py b/tests/mixology/version_solver/test_dependency_cache.py index dffa7e535be..b3bd8721da4 100644 --- a/tests/mixology/version_solver/test_dependency_cache.py +++ b/tests/mixology/version_solver/test_dependency_cache.py @@ -2,6 +2,7 @@ from copy import deepcopy from typing import TYPE_CHECKING +from unittest import mock from poetry.factory import Factory from poetry.mixology.version_solver import DependencyCache @@ -32,14 +33,14 @@ def test_solver_dependency_cache_respects_source_type( cache.search_for.cache_clear() # ensure cache was never hit for both calls - cache.search_for(dependency_pypi) - cache.search_for(dependency_git) + cache.search_for(dependency_pypi, 0) + cache.search_for(dependency_git, 0) assert not cache.search_for.cache_info().hits # increase test coverage by searching for copies # (when searching for the exact same object, __eq__ is never called) - packages_pypi = cache.search_for(deepcopy(dependency_pypi)) - packages_git = cache.search_for(deepcopy(dependency_git)) + packages_pypi = cache.search_for(deepcopy(dependency_pypi), 0) + packages_git = cache.search_for(deepcopy(dependency_git), 0) assert cache.search_for.cache_info().hits == 2 assert cache.search_for.cache_info().currsize == 2 @@ -60,6 +61,44 @@ def test_solver_dependency_cache_respects_source_type( assert package_git.package.source_resolved_reference == MOCK_DEFAULT_GIT_REVISION +def test_solver_dependency_cache_pulls_from_prior_level_cache( + root: ProjectPackage, provider: Provider, repo: Repository +) -> None: + dependency_pypi = Factory.create_dependency("demo", ">=0.1.0") + root.add_dependency(dependency_pypi) + add_to_repo(repo, "demo", "1.0.0") + + wrapped_provider = mock.Mock(wraps=provider) + cache = DependencyCache(wrapped_provider) + cache.search_for.cache_clear() + + # On first call, provider.search_for() should be called and the level-0 + # cache populated. + cache.search_for(dependency_pypi, 0) + assert len(wrapped_provider.search_for.mock_calls) == 1 + assert ("demo", None, None, None, None) in cache._cache[0] + assert cache.search_for.cache_info().hits == 0 + assert cache.search_for.cache_info().misses == 1 + + # On second call at level 1, provider.search_for() should not be called + # again and the level-1 cache should be populated from the level-0 cache. + cache.search_for(dependency_pypi, 1) + assert len(wrapped_provider.search_for.mock_calls) == 1 + assert ("demo", None, None, None, None) in cache._cache[1] + assert cache._cache[0] == cache._cache[1] + assert cache.search_for.cache_info().hits == 0 + assert cache.search_for.cache_info().misses == 2 + + # Clearing the level 1 cache should invalidate the lru_cache on + # cache.search_for and wipe out the level 1 cache while preserving the + # level 0 cache. + cache.clear_level(1) + assert set(cache._cache.keys()) == {0} + assert ("demo", None, None, None, None) in cache._cache[0] + assert cache.search_for.cache_info().hits == 0 + assert cache.search_for.cache_info().misses == 0 + + def test_solver_dependency_cache_respects_subdirectories( root: ProjectPackage, provider: Provider, repo: Repository ) -> None: @@ -87,14 +126,14 @@ def test_solver_dependency_cache_respects_subdirectories( cache.search_for.cache_clear() # ensure cache was never hit for both calls - cache.search_for(dependency_one) - cache.search_for(dependency_one_copy) + cache.search_for(dependency_one, 0) + cache.search_for(dependency_one_copy, 0) assert not cache.search_for.cache_info().hits # increase test coverage by searching for copies # (when searching for the exact same object, __eq__ is never called) - packages_one = cache.search_for(deepcopy(dependency_one)) - packages_one_copy = cache.search_for(deepcopy(dependency_one_copy)) + packages_one = cache.search_for(deepcopy(dependency_one), 0) + packages_one_copy = cache.search_for(deepcopy(dependency_one_copy), 0) assert cache.search_for.cache_info().hits == 2 assert cache.search_for.cache_info().currsize == 2 From c3013682588701592a3b9f4de116ce218b3535dd Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Fri, 19 May 2023 13:21:51 -0500 Subject: [PATCH 3/4] perf: avoid iterating through multiple caches for contradicted_incompatibilities (#7950) --- src/poetry/mixology/version_solver.py | 30 ++++++++++++++------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/poetry/mixology/version_solver.py b/src/poetry/mixology/version_solver.py index 81dde5d3369..e9ab5e150cd 100644 --- a/src/poetry/mixology/version_solver.py +++ b/src/poetry/mixology/version_solver.py @@ -105,9 +105,10 @@ def __init__(self, root: ProjectPackage, provider: Provider) -> None: self._provider = provider self._dependency_cache = DependencyCache(provider) self._incompatibilities: dict[str, list[Incompatibility]] = {} - self._contradicted_incompatibilities: dict[int, set[Incompatibility]] = ( - collections.defaultdict(set) - ) + self._contradicted_incompatibilities: set[Incompatibility] = set() + self._contradicted_incompatibilities_by_level: dict[ + int, set[Incompatibility] + ] = collections.defaultdict(set) self._solution = PartialSolution() @property @@ -156,10 +157,7 @@ def _propagate(self, package: str) -> None: # we can derive stronger assignments sooner and more eagerly find # conflicts. for incompatibility in reversed(self._incompatibilities[package]): - if any( - incompatibility in c - for c in self._contradicted_incompatibilities.values() - ): + if incompatibility in self._contradicted_incompatibilities: continue result = self._propagate_incompatibility(incompatibility) @@ -208,9 +206,10 @@ def _propagate_incompatibility( # If term is already contradicted by _solution, then # incompatibility is contradicted as well and there's nothing new we # can deduce from it. - self._contradicted_incompatibilities[self._solution.decision_level].add( - incompatibility - ) + self._contradicted_incompatibilities.add(incompatibility) + self._contradicted_incompatibilities_by_level[ + self._solution.decision_level + ].add(incompatibility) return None elif relation == SetRelation.OVERLAPPING: # If more than one term is inconclusive, we can't deduce anything about @@ -228,9 +227,10 @@ def _propagate_incompatibility( if unsatisfied is None: return _conflict - self._contradicted_incompatibilities[self._solution.decision_level].add( - incompatibility - ) + self._contradicted_incompatibilities.add(incompatibility) + self._contradicted_incompatibilities_by_level[ + self._solution.decision_level + ].add(incompatibility) adverb = "not " if unsatisfied.is_positive() else "" self._log(f"derived: {adverb}{unsatisfied.dependency}") @@ -327,7 +327,9 @@ def _resolve_conflict(self, incompatibility: Incompatibility) -> Incompatibility for level in range( self._solution.decision_level, previous_satisfier_level, -1 ): - self._contradicted_incompatibilities.pop(level, None) + self._contradicted_incompatibilities.difference_update( + self._contradicted_incompatibilities_by_level.pop(level, set()), + ) self._dependency_cache.clear_level(level) self._solution.backtrack(previous_satisfier_level) From f19b20e678574b486cd6ad2d0718c90a29bd101d Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Fri, 19 May 2023 13:48:47 -0500 Subject: [PATCH 4/4] perf: improve DependencyCache lru_cache hit rate, avoid iterating through lots of levels (#7950) --- src/poetry/mixology/version_solver.py | 94 ++++++++++++------- .../version_solver/test_dependency_cache.py | 77 +++++++++------ 2 files changed, 111 insertions(+), 60 deletions(-) diff --git a/src/poetry/mixology/version_solver.py b/src/poetry/mixology/version_solver.py index e9ab5e150cd..e3c122838d1 100644 --- a/src/poetry/mixology/version_solver.py +++ b/src/poetry/mixology/version_solver.py @@ -5,6 +5,8 @@ import time from typing import TYPE_CHECKING +from typing import Optional +from typing import Tuple from poetry.core.packages.dependency import Dependency @@ -29,6 +31,11 @@ _conflict = object() +DependencyCacheKey = Tuple[ + str, Optional[str], Optional[str], Optional[str], Optional[str] +] + + class DependencyCache: """ A cache of the valid dependencies. @@ -40,36 +47,37 @@ class DependencyCache: def __init__(self, provider: Provider) -> None: self._provider = provider - self._cache: dict[ - int, - dict[ - tuple[str, str | None, str | None, str | None, str | None], - list[DependencyPackage], - ], - ] = collections.defaultdict(dict) - self.search_for = functools.lru_cache(maxsize=128)(self._search_for) + # self._cache maps a package name to a stack of cached package lists, + # ordered by the decision level which added them to the cache. This is + # done so that when backtracking we can maintain cache entries from + # previous decision levels, while clearing cache entries from only the + # rolled back levels. + # + # In order to maintain the integrity of the cache, `clear_level()` + # needs to be called in descending order as decision levels are + # backtracked so that the correct items can be popped from the stack. + self._cache: dict[DependencyCacheKey, list[list[DependencyPackage]]] = ( + collections.defaultdict(list) + ) + self._cached_dependencies_by_level: dict[int, list[DependencyCacheKey]] = ( + collections.defaultdict(list) + ) + + self._search_for_cached = functools.lru_cache(maxsize=128)(self._search_for) def _search_for( - self, dependency: Dependency, level: int + self, + dependency: Dependency, + key: DependencyCacheKey, ) -> list[DependencyPackage]: - key = ( - dependency.complete_name, - dependency.source_type, - dependency.source_url, - dependency.source_reference, - dependency.source_subdirectory, - ) - - for check_level in range(level, -1, -1): - packages = self._cache[check_level].get(key) - if packages is not None: - packages = [ - p - for p in packages - if dependency.constraint.allows(p.package.version) - ] - break + cache_entries = self._cache[key] + if cache_entries: + packages = [ + p + for p in cache_entries[-1] + if dependency.constraint.allows(p.package.version) + ] else: packages = None @@ -83,12 +91,33 @@ def _search_for( if not packages: packages = self._provider.search_for(dependency) - self._cache[level][key] = packages + return packages + + def search_for( + self, + dependency: Dependency, + decision_level: int, + ) -> list[DependencyPackage]: + key = ( + dependency.complete_name, + dependency.source_type, + dependency.source_url, + dependency.source_reference, + dependency.source_subdirectory, + ) + + packages = self._search_for_cached(dependency, key) + if not self._cache[key] or self._cache[key][-1] is not packages: + self._cache[key].append(packages) + self._cached_dependencies_by_level[decision_level].append(key) + return packages def clear_level(self, level: int) -> None: - self.search_for.cache_clear() - self._cache.pop(level, None) + if level in self._cached_dependencies_by_level: + self._search_for_cached.cache_clear() + for key in self._cached_dependencies_by_level.pop(level): + self._cache[key].pop() class VersionSolver: @@ -327,9 +356,10 @@ def _resolve_conflict(self, incompatibility: Incompatibility) -> Incompatibility for level in range( self._solution.decision_level, previous_satisfier_level, -1 ): - self._contradicted_incompatibilities.difference_update( - self._contradicted_incompatibilities_by_level.pop(level, set()), - ) + if level in self._contradicted_incompatibilities_by_level: + self._contradicted_incompatibilities.difference_update( + self._contradicted_incompatibilities_by_level.pop(level), + ) self._dependency_cache.clear_level(level) self._solution.backtrack(previous_satisfier_level) diff --git a/tests/mixology/version_solver/test_dependency_cache.py b/tests/mixology/version_solver/test_dependency_cache.py index b3bd8721da4..96f1a3b6329 100644 --- a/tests/mixology/version_solver/test_dependency_cache.py +++ b/tests/mixology/version_solver/test_dependency_cache.py @@ -30,20 +30,20 @@ def test_solver_dependency_cache_respects_source_type( add_to_repo(repo, "demo", "1.0.0") cache = DependencyCache(provider) - cache.search_for.cache_clear() + cache._search_for_cached.cache_clear() # ensure cache was never hit for both calls cache.search_for(dependency_pypi, 0) cache.search_for(dependency_git, 0) - assert not cache.search_for.cache_info().hits + assert not cache._search_for_cached.cache_info().hits # increase test coverage by searching for copies # (when searching for the exact same object, __eq__ is never called) packages_pypi = cache.search_for(deepcopy(dependency_pypi), 0) packages_git = cache.search_for(deepcopy(dependency_git), 0) - assert cache.search_for.cache_info().hits == 2 - assert cache.search_for.cache_info().currsize == 2 + assert cache._search_for_cached.cache_info().hits == 2 + assert cache._search_for_cached.cache_info().currsize == 2 assert len(packages_pypi) == len(packages_git) == 1 assert packages_pypi != packages_git @@ -65,38 +65,59 @@ def test_solver_dependency_cache_pulls_from_prior_level_cache( root: ProjectPackage, provider: Provider, repo: Repository ) -> None: dependency_pypi = Factory.create_dependency("demo", ">=0.1.0") + dependency_pypi_constrained = Factory.create_dependency("demo", ">=0.1.0,<2.0.0") root.add_dependency(dependency_pypi) + root.add_dependency(dependency_pypi_constrained) add_to_repo(repo, "demo", "1.0.0") wrapped_provider = mock.Mock(wraps=provider) cache = DependencyCache(wrapped_provider) - cache.search_for.cache_clear() + cache._search_for_cached.cache_clear() - # On first call, provider.search_for() should be called and the level-0 - # cache populated. + # On first call, provider.search_for() should be called and the cache + # populated. cache.search_for(dependency_pypi, 0) assert len(wrapped_provider.search_for.mock_calls) == 1 - assert ("demo", None, None, None, None) in cache._cache[0] - assert cache.search_for.cache_info().hits == 0 - assert cache.search_for.cache_info().misses == 1 - - # On second call at level 1, provider.search_for() should not be called - # again and the level-1 cache should be populated from the level-0 cache. + assert ("demo", None, None, None, None) in cache._cache + assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[0] + assert cache._search_for_cached.cache_info().hits == 0 + assert cache._search_for_cached.cache_info().misses == 1 + + # On second call at level 1, neither provider.search_for() nor + # cache._search_for_cached() should have been called again, and the cache + # should remain the same. cache.search_for(dependency_pypi, 1) assert len(wrapped_provider.search_for.mock_calls) == 1 - assert ("demo", None, None, None, None) in cache._cache[1] - assert cache._cache[0] == cache._cache[1] - assert cache.search_for.cache_info().hits == 0 - assert cache.search_for.cache_info().misses == 2 - - # Clearing the level 1 cache should invalidate the lru_cache on - # cache.search_for and wipe out the level 1 cache while preserving the + assert ("demo", None, None, None, None) in cache._cache + assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[0] + assert set(cache._cached_dependencies_by_level.keys()) == {0} + assert cache._search_for_cached.cache_info().hits == 1 + assert cache._search_for_cached.cache_info().misses == 1 + + # On third call at level 2 with an updated constraint for the `demo` + # package should not call provider.search_for(), but should call + # cache._search_for_cached() and update the cache. + cache.search_for(dependency_pypi_constrained, 2) + assert len(wrapped_provider.search_for.mock_calls) == 1 + assert ("demo", None, None, None, None) in cache._cache + assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[0] + assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[2] + assert set(cache._cached_dependencies_by_level.keys()) == {0, 2} + assert cache._search_for_cached.cache_info().hits == 1 + assert cache._search_for_cached.cache_info().misses == 2 + + # Clearing the level 2 and level 1 caches should invalidate the lru_cache + # on cache.search_for and wipe out the level 2 cache while preserving the # level 0 cache. + cache.clear_level(2) cache.clear_level(1) - assert set(cache._cache.keys()) == {0} - assert ("demo", None, None, None, None) in cache._cache[0] - assert cache.search_for.cache_info().hits == 0 - assert cache.search_for.cache_info().misses == 0 + cache.search_for(dependency_pypi, 0) + assert len(wrapped_provider.search_for.mock_calls) == 1 + assert ("demo", None, None, None, None) in cache._cache + assert ("demo", None, None, None, None) in cache._cached_dependencies_by_level[0] + assert set(cache._cached_dependencies_by_level.keys()) == {0} + assert cache._search_for_cached.cache_info().hits == 0 + assert cache._search_for_cached.cache_info().misses == 1 def test_solver_dependency_cache_respects_subdirectories( @@ -123,20 +144,20 @@ def test_solver_dependency_cache_respects_subdirectories( root.add_dependency(dependency_one_copy) cache = DependencyCache(provider) - cache.search_for.cache_clear() + cache._search_for_cached.cache_clear() # ensure cache was never hit for both calls cache.search_for(dependency_one, 0) cache.search_for(dependency_one_copy, 0) - assert not cache.search_for.cache_info().hits + assert not cache._search_for_cached.cache_info().hits # increase test coverage by searching for copies # (when searching for the exact same object, __eq__ is never called) packages_one = cache.search_for(deepcopy(dependency_one), 0) packages_one_copy = cache.search_for(deepcopy(dependency_one_copy), 0) - assert cache.search_for.cache_info().hits == 2 - assert cache.search_for.cache_info().currsize == 2 + assert cache._search_for_cached.cache_info().hits == 2 + assert cache._search_for_cached.cache_info().currsize == 2 assert len(packages_one) == len(packages_one_copy) == 1