From abd6788f675fa7826a7003ee599588e17012db9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Randy=20D=C3=B6ring?= <30527984+radoering@users.noreply.github.com> Date: Sun, 30 Jul 2023 18:06:43 +0200 Subject: [PATCH] solver: invert heuristics for choosing the next dependency to resolve so that dependencies with more versions are resolved first Although the original algorithm proposes to choose dependencies with fewer versions first, we don't know about any real-world examples where this is important. However, we know about real-world examples (especially boto3 vs. urllib3, but also Sphinx vs. docutils) where it's better to choose dependencies with more versions first. Co-authored-by: David Hotham --- src/poetry/mixology/version_solver.py | 15 ++-- .../version_solver/test_backtracking.py | 82 +++++++++++++------ tests/puzzle/test_solver.py | 20 ++--- 3 files changed, 74 insertions(+), 43 deletions(-) diff --git a/src/poetry/mixology/version_solver.py b/src/poetry/mixology/version_solver.py index b97a4ebed04..5c613a77f0a 100644 --- a/src/poetry/mixology/version_solver.py +++ b/src/poetry/mixology/version_solver.py @@ -440,8 +440,13 @@ class Preference: LOCKED = 3 DEFAULT = 4 - # Prefer packages with as few remaining versions as possible, - # so that if a conflict is necessary it's forced quickly. + # The original algorithm proposes to prefer packages with as few remaining + # versions as possible, so that if a conflict is necessary it's forced quickly. + # https://github.com/dart-lang/pub/blob/master/doc/solver.md#decision-making + # However, this leads to the famous boto3 vs. urllib3 issue, so we prefer + # packages with more remaining versions (see + # https://github.com/python-poetry/poetry/pull/8255#issuecomment-1657198242 + # for more details). # In order to provide results that are as deterministic as possible # and consistent between `poetry lock` and `poetry update`, the return value # of two different dependencies should not be equal if possible. @@ -450,7 +455,7 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]: # a regular dependency for some package only to find later that we had a # direct-origin dependency. if dependency.is_direct_origin(): - return False, Preference.DIRECT_ORIGIN, 1 + return False, Preference.DIRECT_ORIGIN, -1 is_specific_marker = not dependency.marker.is_any() @@ -458,7 +463,7 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]: if not use_latest: locked = self._provider.get_locked(dependency) if locked: - return is_specific_marker, Preference.LOCKED, 1 + return is_specific_marker, Preference.LOCKED, -1 num_packages = len( self._dependency_cache.search_for( @@ -472,7 +477,7 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]: preference = Preference.USE_LATEST else: preference = Preference.DEFAULT - return is_specific_marker, preference, num_packages + return is_specific_marker, preference, -num_packages dependency = min(unsatisfied, key=_get_min) diff --git a/tests/mixology/version_solver/test_backtracking.py b/tests/mixology/version_solver/test_backtracking.py index 68e8f638746..c45ebf73c7a 100644 --- a/tests/mixology/version_solver/test_backtracking.py +++ b/tests/mixology/version_solver/test_backtracking.py @@ -67,7 +67,7 @@ def test_backjumps_after_partial_satisfier( add_to_repo(repo, "y", "1.0.0") add_to_repo(repo, "y", "2.0.0") - check_solver_result(root, provider, {"c": "1.0.0", "y": "2.0.0"}, tries=2) + check_solver_result(root, provider, {"c": "1.0.0", "y": "2.0.0"}, tries=4) def test_rolls_back_leaf_versions_first( @@ -132,32 +132,6 @@ def test_backjump_to_nearer_unsatisfied_package( ) -def test_traverse_into_package_with_fewer_versions_first( - root: ProjectPackage, provider: Provider, repo: Repository -) -> None: - # Dependencies are ordered so that packages with fewer versions are tried - # first. Here, there are two valid solutions (either a or b must be - # downgraded once). The chosen one depends on which dep is traversed first. - # Since b has fewer versions, it will be traversed first, which means a will - # come later. Since later selections are revised first, a gets downgraded. - root.add_dependency(Factory.create_dependency("a", "*")) - root.add_dependency(Factory.create_dependency("b", "*")) - - add_to_repo(repo, "a", "1.0.0", deps={"c": "*"}) - add_to_repo(repo, "a", "2.0.0", deps={"c": "*"}) - add_to_repo(repo, "a", "3.0.0", deps={"c": "*"}) - add_to_repo(repo, "a", "4.0.0", deps={"c": "*"}) - add_to_repo(repo, "a", "5.0.0", deps={"c": "1.0.0"}) - add_to_repo(repo, "b", "1.0.0", deps={"c": "*"}) - add_to_repo(repo, "b", "2.0.0", deps={"c": "*"}) - add_to_repo(repo, "b", "3.0.0", deps={"c": "*"}) - add_to_repo(repo, "b", "4.0.0", deps={"c": "2.0.0"}) - add_to_repo(repo, "c", "1.0.0") - add_to_repo(repo, "c", "2.0.0") - - check_solver_result(root, provider, {"a": "4.0.0", "b": "4.0.0", "c": "2.0.0"}) - - def test_backjump_past_failed_package_on_disjoint_constraint( root: ProjectPackage, provider: Provider, repo: Repository ) -> None: @@ -176,3 +150,57 @@ def test_backjump_past_failed_package_on_disjoint_constraint( add_to_repo(repo, "foo", "2.0.4") check_solver_result(root, provider, {"a": "1.0.0", "foo": "2.0.4"}) + + +def test_backtracking_performance_level_1( + root: ProjectPackage, provider: Provider, repo: Repository +) -> None: + """ + This test takes quite long if an unfavorable heuristics is chosen + to select the next package to resolve. + + B depends on A, but does not support the latest version of A. + B has a lot more versions than A. + + Test for boto3/botocore vs. urllib3 issue in its simple form. + """ + root.add_dependency(Factory.create_dependency("a", "*")) + root.add_dependency(Factory.create_dependency("b", "*")) + + add_to_repo(repo, "a", "1") + add_to_repo(repo, "a", "2") + + b_max = 500 + for i in range(1, b_max + 1): + add_to_repo(repo, "b", str(i), deps={"a": "<=1"}) + + check_solver_result(root, provider, {"a": "1", "b": str(b_max)}) + + +def test_backtracking_performance_level_2( + root: ProjectPackage, provider: Provider, repo: Repository +) -> None: + """ + Similar to test_backtracking_performance_level_1, + but with one more level of dependencies. + + C depends on B depends on A, but B does not support the latest version of A. + The root dependency only requires A and C so there is no direct dependency between + these two. + B and C have a lot more versions than A. + + Test for boto3/botocore vs. urllib3 issue in its more complex form. + """ + root.add_dependency(Factory.create_dependency("a", "*")) + root.add_dependency(Factory.create_dependency("c", "*")) + + add_to_repo(repo, "a", "1") + add_to_repo(repo, "a", "2") + + bc_max = 500 + for i in range(1, bc_max + 1): + add_to_repo(repo, "b", str(i), deps={"a": "<=1"}) + for i in range(1, bc_max + 1): + add_to_repo(repo, "c", str(i), deps={"b": f"<={i}"}) + + check_solver_result(root, provider, {"a": "1", "b": str(bc_max), "c": str(bc_max)}) diff --git a/tests/puzzle/test_solver.py b/tests/puzzle/test_solver.py index 125b29511fb..5b5b880e232 100644 --- a/tests/puzzle/test_solver.py +++ b/tests/puzzle/test_solver.py @@ -4265,7 +4265,7 @@ def test_update_with_use_latest_vs_lock( A1 depends on B2, A2 and A3 depend on B1. Same for C. B1 depends on A2/C2, B2 depends on A1/C1. - Because there are fewer versions B than of A and C, B is resolved first + Because there are more versions of B than of A and C, B is resolved first so that latest version of B is used. There shouldn't be a difference between `poetry lock` (not is_locked) and `poetry update` (is_locked + use_latest) @@ -4277,18 +4277,14 @@ def test_update_with_use_latest_vs_lock( package.add_dependency(Factory.create_dependency("C", "*")) package_a1 = get_package("A", "1") - package_a1.add_dependency(Factory.create_dependency("B", "2")) + package_a1.add_dependency(Factory.create_dependency("B", "3")) package_a2 = get_package("A", "2") package_a2.add_dependency(Factory.create_dependency("B", "1")) - package_a3 = get_package("A", "3") - package_a3.add_dependency(Factory.create_dependency("B", "1")) package_c1 = get_package("C", "1") - package_c1.add_dependency(Factory.create_dependency("B", "2")) + package_c1.add_dependency(Factory.create_dependency("B", "3")) package_c2 = get_package("C", "2") package_c2.add_dependency(Factory.create_dependency("B", "1")) - package_c3 = get_package("C", "3") - package_c3.add_dependency(Factory.create_dependency("B", "1")) package_b1 = get_package("B", "1") package_b1.add_dependency(Factory.create_dependency("A", "2")) @@ -4296,18 +4292,20 @@ def test_update_with_use_latest_vs_lock( package_b2 = get_package("B", "2") package_b2.add_dependency(Factory.create_dependency("A", "1")) package_b2.add_dependency(Factory.create_dependency("C", "1")) + package_b3 = get_package("B", "3") + package_b3.add_dependency(Factory.create_dependency("A", "1")) + package_b3.add_dependency(Factory.create_dependency("C", "1")) repo.add_package(package_a1) repo.add_package(package_a2) - repo.add_package(package_a3) repo.add_package(package_b1) repo.add_package(package_b2) + repo.add_package(package_b3) repo.add_package(package_c1) repo.add_package(package_c2) - repo.add_package(package_c3) if is_locked: - locked = [package_a1, package_b2, package_c1] + locked = [package_a1, package_b3, package_c1] use_latest = [package.name for package in locked] else: locked = [] @@ -4320,7 +4318,7 @@ def test_update_with_use_latest_vs_lock( transaction, [ {"job": "install", "package": package_c1}, - {"job": "install", "package": package_b2}, + {"job": "install", "package": package_b3}, {"job": "install", "package": package_a1}, ], )