From 36332d25a75ad157ca2d86ef6ffad5102d3bc84a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Randy=20D=C3=B6ring?= <30527984+radoering@users.noreply.github.com> Date: Thu, 17 Aug 2023 06:26:41 +0200 Subject: [PATCH] solver: invert heuristics for choosing the next dependency to resolve so that dependencies with more versions are resolved first (#8256) Although the original algorithm proposes to choose dependencies with fewer versions first, we don't know about any real-world examples where this is important. However, we know about real-world examples (especially boto3 vs. urllib3, but also Sphinx vs. docutils) where it's better to choose dependencies with more versions first. Co-authored-by: David Hotham --- src/poetry/mixology/version_solver.py | 15 ++-- .../version_solver/test_backtracking.py | 82 +++++++++++++------ tests/puzzle/test_solver.py | 20 ++--- 3 files changed, 74 insertions(+), 43 deletions(-) diff --git a/src/poetry/mixology/version_solver.py b/src/poetry/mixology/version_solver.py index b97a4ebed04..5c613a77f0a 100644 --- a/src/poetry/mixology/version_solver.py +++ b/src/poetry/mixology/version_solver.py @@ -440,8 +440,13 @@ class Preference: LOCKED = 3 DEFAULT = 4 - # Prefer packages with as few remaining versions as possible, - # so that if a conflict is necessary it's forced quickly. + # The original algorithm proposes to prefer packages with as few remaining + # versions as possible, so that if a conflict is necessary it's forced quickly. + # https://github.com/dart-lang/pub/blob/master/doc/solver.md#decision-making + # However, this leads to the famous boto3 vs. urllib3 issue, so we prefer + # packages with more remaining versions (see + # https://github.com/python-poetry/poetry/pull/8255#issuecomment-1657198242 + # for more details). # In order to provide results that are as deterministic as possible # and consistent between `poetry lock` and `poetry update`, the return value # of two different dependencies should not be equal if possible. @@ -450,7 +455,7 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]: # a regular dependency for some package only to find later that we had a # direct-origin dependency. if dependency.is_direct_origin(): - return False, Preference.DIRECT_ORIGIN, 1 + return False, Preference.DIRECT_ORIGIN, -1 is_specific_marker = not dependency.marker.is_any() @@ -458,7 +463,7 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]: if not use_latest: locked = self._provider.get_locked(dependency) if locked: - return is_specific_marker, Preference.LOCKED, 1 + return is_specific_marker, Preference.LOCKED, -1 num_packages = len( self._dependency_cache.search_for( @@ -472,7 +477,7 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]: preference = Preference.USE_LATEST else: preference = Preference.DEFAULT - return is_specific_marker, preference, num_packages + return is_specific_marker, preference, -num_packages dependency = min(unsatisfied, key=_get_min) diff --git a/tests/mixology/version_solver/test_backtracking.py b/tests/mixology/version_solver/test_backtracking.py index 68e8f638746..c45ebf73c7a 100644 --- a/tests/mixology/version_solver/test_backtracking.py +++ b/tests/mixology/version_solver/test_backtracking.py @@ -67,7 +67,7 @@ def test_backjumps_after_partial_satisfier( add_to_repo(repo, "y", "1.0.0") add_to_repo(repo, "y", "2.0.0") - check_solver_result(root, provider, {"c": "1.0.0", "y": "2.0.0"}, tries=2) + check_solver_result(root, provider, {"c": "1.0.0", "y": "2.0.0"}, tries=4) def test_rolls_back_leaf_versions_first( @@ -132,32 +132,6 @@ def test_backjump_to_nearer_unsatisfied_package( ) -def test_traverse_into_package_with_fewer_versions_first( - root: ProjectPackage, provider: Provider, repo: Repository -) -> None: - # Dependencies are ordered so that packages with fewer versions are tried - # first. Here, there are two valid solutions (either a or b must be - # downgraded once). The chosen one depends on which dep is traversed first. - # Since b has fewer versions, it will be traversed first, which means a will - # come later. Since later selections are revised first, a gets downgraded. - root.add_dependency(Factory.create_dependency("a", "*")) - root.add_dependency(Factory.create_dependency("b", "*")) - - add_to_repo(repo, "a", "1.0.0", deps={"c": "*"}) - add_to_repo(repo, "a", "2.0.0", deps={"c": "*"}) - add_to_repo(repo, "a", "3.0.0", deps={"c": "*"}) - add_to_repo(repo, "a", "4.0.0", deps={"c": "*"}) - add_to_repo(repo, "a", "5.0.0", deps={"c": "1.0.0"}) - add_to_repo(repo, "b", "1.0.0", deps={"c": "*"}) - add_to_repo(repo, "b", "2.0.0", deps={"c": "*"}) - add_to_repo(repo, "b", "3.0.0", deps={"c": "*"}) - add_to_repo(repo, "b", "4.0.0", deps={"c": "2.0.0"}) - add_to_repo(repo, "c", "1.0.0") - add_to_repo(repo, "c", "2.0.0") - - check_solver_result(root, provider, {"a": "4.0.0", "b": "4.0.0", "c": "2.0.0"}) - - def test_backjump_past_failed_package_on_disjoint_constraint( root: ProjectPackage, provider: Provider, repo: Repository ) -> None: @@ -176,3 +150,57 @@ def test_backjump_past_failed_package_on_disjoint_constraint( add_to_repo(repo, "foo", "2.0.4") check_solver_result(root, provider, {"a": "1.0.0", "foo": "2.0.4"}) + + +def test_backtracking_performance_level_1( + root: ProjectPackage, provider: Provider, repo: Repository +) -> None: + """ + This test takes quite long if an unfavorable heuristics is chosen + to select the next package to resolve. + + B depends on A, but does not support the latest version of A. + B has a lot more versions than A. + + Test for boto3/botocore vs. urllib3 issue in its simple form. + """ + root.add_dependency(Factory.create_dependency("a", "*")) + root.add_dependency(Factory.create_dependency("b", "*")) + + add_to_repo(repo, "a", "1") + add_to_repo(repo, "a", "2") + + b_max = 500 + for i in range(1, b_max + 1): + add_to_repo(repo, "b", str(i), deps={"a": "<=1"}) + + check_solver_result(root, provider, {"a": "1", "b": str(b_max)}) + + +def test_backtracking_performance_level_2( + root: ProjectPackage, provider: Provider, repo: Repository +) -> None: + """ + Similar to test_backtracking_performance_level_1, + but with one more level of dependencies. + + C depends on B depends on A, but B does not support the latest version of A. + The root dependency only requires A and C so there is no direct dependency between + these two. + B and C have a lot more versions than A. + + Test for boto3/botocore vs. urllib3 issue in its more complex form. + """ + root.add_dependency(Factory.create_dependency("a", "*")) + root.add_dependency(Factory.create_dependency("c", "*")) + + add_to_repo(repo, "a", "1") + add_to_repo(repo, "a", "2") + + bc_max = 500 + for i in range(1, bc_max + 1): + add_to_repo(repo, "b", str(i), deps={"a": "<=1"}) + for i in range(1, bc_max + 1): + add_to_repo(repo, "c", str(i), deps={"b": f"<={i}"}) + + check_solver_result(root, provider, {"a": "1", "b": str(bc_max), "c": str(bc_max)}) diff --git a/tests/puzzle/test_solver.py b/tests/puzzle/test_solver.py index 125b29511fb..5b5b880e232 100644 --- a/tests/puzzle/test_solver.py +++ b/tests/puzzle/test_solver.py @@ -4265,7 +4265,7 @@ def test_update_with_use_latest_vs_lock( A1 depends on B2, A2 and A3 depend on B1. Same for C. B1 depends on A2/C2, B2 depends on A1/C1. - Because there are fewer versions B than of A and C, B is resolved first + Because there are more versions of B than of A and C, B is resolved first so that latest version of B is used. There shouldn't be a difference between `poetry lock` (not is_locked) and `poetry update` (is_locked + use_latest) @@ -4277,18 +4277,14 @@ def test_update_with_use_latest_vs_lock( package.add_dependency(Factory.create_dependency("C", "*")) package_a1 = get_package("A", "1") - package_a1.add_dependency(Factory.create_dependency("B", "2")) + package_a1.add_dependency(Factory.create_dependency("B", "3")) package_a2 = get_package("A", "2") package_a2.add_dependency(Factory.create_dependency("B", "1")) - package_a3 = get_package("A", "3") - package_a3.add_dependency(Factory.create_dependency("B", "1")) package_c1 = get_package("C", "1") - package_c1.add_dependency(Factory.create_dependency("B", "2")) + package_c1.add_dependency(Factory.create_dependency("B", "3")) package_c2 = get_package("C", "2") package_c2.add_dependency(Factory.create_dependency("B", "1")) - package_c3 = get_package("C", "3") - package_c3.add_dependency(Factory.create_dependency("B", "1")) package_b1 = get_package("B", "1") package_b1.add_dependency(Factory.create_dependency("A", "2")) @@ -4296,18 +4292,20 @@ def test_update_with_use_latest_vs_lock( package_b2 = get_package("B", "2") package_b2.add_dependency(Factory.create_dependency("A", "1")) package_b2.add_dependency(Factory.create_dependency("C", "1")) + package_b3 = get_package("B", "3") + package_b3.add_dependency(Factory.create_dependency("A", "1")) + package_b3.add_dependency(Factory.create_dependency("C", "1")) repo.add_package(package_a1) repo.add_package(package_a2) - repo.add_package(package_a3) repo.add_package(package_b1) repo.add_package(package_b2) + repo.add_package(package_b3) repo.add_package(package_c1) repo.add_package(package_c2) - repo.add_package(package_c3) if is_locked: - locked = [package_a1, package_b2, package_c1] + locked = [package_a1, package_b3, package_c1] use_latest = [package.name for package in locked] else: locked = [] @@ -4320,7 +4318,7 @@ def test_update_with_use_latest_vs_lock( transaction, [ {"job": "install", "package": package_c1}, - {"job": "install", "package": package_b2}, + {"job": "install", "package": package_b3}, {"job": "install", "package": package_a1}, ], )