From df2812dd6d7c9555ec853df95118fd6338f77260 Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sat, 30 Mar 2024 13:13:43 -0400 Subject: [PATCH 1/8] Allow provider to narrow backtrack selection --- src/resolvelib/providers.py | 57 ++++++++++++++++++++++++++ src/resolvelib/resolvers/resolution.py | 30 +++++++++++++- 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/resolvelib/providers.py b/src/resolvelib/providers.py index 6d8bc47..5127d8d 100644 --- a/src/resolvelib/providers.py +++ b/src/resolvelib/providers.py @@ -135,3 +135,60 @@ def get_dependencies(self, candidate: CT) -> Iterable[RT]: specifies as its dependencies. """ raise NotImplementedError + + def narrow_requirement_selection( + self, + identifiers: Iterable[KT], + resolutions: Mapping[KT, CT], + candidates: Mapping[KT, Iterator[CT]], + information: Mapping[KT, Iterator[RequirementInformation[RT, CT]]], + backtrack_causes: Sequence[RequirementInformation[RT, CT]], + ) -> Iterable[KT]: + """ + An optional method to narrow the selection of requirements being + considered during resolution. + + The requirement selection is defined as "The possible requirements + that will be resolved next." If a requirement is not part of the returned + iterable, it will not be considered during the next step of resolution. + + :param identifiers: An iterable of `identifiers` as returned by + ``identify()``. These identify all requirements currently being + considered. + :param resolutions: A mapping of candidates currently pinned by the + resolver. Each key is an identifier, and the value is a candidate + that may conflict with requirements from ``information``. + :param candidates: A mapping of each dependency's possible candidates. + Each value is an iterator of candidates. + :param information: A mapping of requirement information for each package. + Each value is an iterator of *requirement information*. + :param backtrack_causes: A sequence of *requirement information* that are + the requirements causing the resolver to most recently + backtrack. + + A *requirement information* instance is a named tuple with two members: + + * ``requirement`` specifies a requirement contributing to the current + list of candidates. + * ``parent`` specifies the candidate that provides (is depended on for) + the requirement, or ``None`` to indicate a root requirement. + + Must return a non-empty subset of `identifiers`, with the default + implementation being to return `identifiers` unchanged. + + Can be used by the provider to optimize the dependency resolution + process. `get_preference` will only be called for the identifiers + returned. If there is only one identifier returned, then `get_preference` + won't be called at all. + + Serving a similar purpose as `get_preference`, this method allows the + provider to guide resolvelib through the resolution process. It should + be used instead of `get_preference` for logic when the provider needs + to consider multiple identifiers simultaneously, or when the provider + wants to skip checking all identifiers, e.g., because the checks are + prohibitively expensive. + + Returns: + Iterable[KT]: A non-empty subset of `identifiers`. + """ + return identifiers diff --git a/src/resolvelib/resolvers/resolution.py b/src/resolvelib/resolvers/resolution.py index 6c0bf50..1b01faa 100644 --- a/src/resolvelib/resolvers/resolution.py +++ b/src/resolvelib/resolvers/resolution.py @@ -411,8 +411,34 @@ def resolve(self, requirements: Iterable[RT], max_rounds: int) -> State[RT, CT, # keep track of satisfied names to calculate diff after pinning satisfied_names = set(self.state.criteria.keys()) - set(unsatisfied_names) - # Choose the most preferred unpinned criterion to try. - name = min(unsatisfied_names, key=self._get_preference) + if len(unsatisfied_names) > 1: + narrowed_unstatisfied_names = list( + self._p.narrow_requirement_selection( + identifiers=unsatisfied_names, + resolutions=self.state.mapping, + candidates=IteratorMapping( + self.state.criteria, + operator.attrgetter("candidates"), + ), + information=IteratorMapping( + self.state.criteria, + operator.attrgetter("information"), + ), + backtrack_causes=self.state.backtrack_causes, + ) + ) + else: + narrowed_unstatisfied_names = unsatisfied_names + + # If there is only 1 unsatisfied name skip calling self._get_preference + if len(narrowed_unstatisfied_names) > 1: + # Choose the most preferred unpinned criterion to try. + name = min( + narrowed_unstatisfied_names, key=self._get_preference + ) + else: + name = narrowed_unstatisfied_names[0] + failure_criterion = self._attempt_to_pin_criterion(name) if failure_criterion: From 5b4d03acc83a50d1fe3506e65d09d18f20c1065c Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sat, 3 Aug 2024 11:11:30 -0400 Subject: [PATCH 2/8] formatting --- src/resolvelib/resolvers/resolution.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/resolvelib/resolvers/resolution.py b/src/resolvelib/resolvers/resolution.py index 1b01faa..7b2f618 100644 --- a/src/resolvelib/resolvers/resolution.py +++ b/src/resolvelib/resolvers/resolution.py @@ -433,9 +433,7 @@ def resolve(self, requirements: Iterable[RT], max_rounds: int) -> State[RT, CT, # If there is only 1 unsatisfied name skip calling self._get_preference if len(narrowed_unstatisfied_names) > 1: # Choose the most preferred unpinned criterion to try. - name = min( - narrowed_unstatisfied_names, key=self._get_preference - ) + name = min(narrowed_unstatisfied_names, key=self._get_preference) else: name = narrowed_unstatisfied_names[0] From 8fec685371cf356b08ea95d81c2aaa366a83a3b9 Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sat, 3 Aug 2024 11:20:46 -0400 Subject: [PATCH 3/8] Throw specific error if narrowed_unstatisfied_names is empty --- src/resolvelib/resolvers/resolution.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/resolvelib/resolvers/resolution.py b/src/resolvelib/resolvers/resolution.py index 7b2f618..da3c66e 100644 --- a/src/resolvelib/resolvers/resolution.py +++ b/src/resolvelib/resolvers/resolution.py @@ -430,6 +430,10 @@ def resolve(self, requirements: Iterable[RT], max_rounds: int) -> State[RT, CT, else: narrowed_unstatisfied_names = unsatisfied_names + # If there are no unsatisfied names use unsatisfied names + if not narrowed_unstatisfied_names: + raise RuntimeError("narrow_requirement_selection returned 0 names") + # If there is only 1 unsatisfied name skip calling self._get_preference if len(narrowed_unstatisfied_names) > 1: # Choose the most preferred unpinned criterion to try. From ad5dd5498b00d799b26075345bfce86393672465 Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sat, 3 Aug 2024 11:27:40 -0400 Subject: [PATCH 4/8] Increase mccabe complexity --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index be55ff5..3622199 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,9 @@ exclude = [ "*.pyi" ] +[tool.ruff.lint.mccabe] +max-complexity = 12 + [tool.mypy] warn_unused_configs = true From 9ab163193acc6f0ba25f795f35cb7405a88883d6 Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sat, 3 Aug 2024 11:49:44 -0400 Subject: [PATCH 5/8] Update docs --- src/resolvelib/providers.py | 39 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/src/resolvelib/providers.py b/src/resolvelib/providers.py index 5127d8d..cb148b8 100644 --- a/src/resolvelib/providers.py +++ b/src/resolvelib/providers.py @@ -40,6 +40,14 @@ def get_preference( ) -> Preference: """Produce a sort key for given requirement based on preference. + As this is a sort key it will be called O(n) times per backtrack step, + where n is the number of `identifier`s. If you have a check which is + expensive in some sense, e.g. it needs to make O(n) checks per + identifier, or takes significant wall clock time but could be short + circuited once finding an identifier that matches the check, consider + using `narrow_requirement_selection` to filter the `identifier`s + before this sort key is called. + The preference is defined as "I think this requirement should be resolved first". The lower the return value is, the more preferred this group of arguments is. @@ -146,11 +154,8 @@ def narrow_requirement_selection( ) -> Iterable[KT]: """ An optional method to narrow the selection of requirements being - considered during resolution. - - The requirement selection is defined as "The possible requirements - that will be resolved next." If a requirement is not part of the returned - iterable, it will not be considered during the next step of resolution. + considered during resolution. This method is called O(1) time per + backtrack step. :param identifiers: An iterable of `identifiers` as returned by ``identify()``. These identify all requirements currently being @@ -174,19 +179,17 @@ def narrow_requirement_selection( the requirement, or ``None`` to indicate a root requirement. Must return a non-empty subset of `identifiers`, with the default - implementation being to return `identifiers` unchanged. - - Can be used by the provider to optimize the dependency resolution - process. `get_preference` will only be called for the identifiers - returned. If there is only one identifier returned, then `get_preference` - won't be called at all. - - Serving a similar purpose as `get_preference`, this method allows the - provider to guide resolvelib through the resolution process. It should - be used instead of `get_preference` for logic when the provider needs - to consider multiple identifiers simultaneously, or when the provider - wants to skip checking all identifiers, e.g., because the checks are - prohibitively expensive. + implementation being to return `identifiers` unchanged. Those `identifiers` + will then be passed to the sort key `get_preference` to pick the most + prefered requirement to attempt to pin, unless `narrow_requirement_selection` + returns only 1 requirement, in which case that will be used without + calling the sort key `get_preference`. + + This method is designed to be used by the provider to optimize the + dependency resolution, e.g. if a check cost is O(m) and it can be done + against all identifiers at once then filtering the requirement selection + here will cost O(m) but making it part of the sort key in `get_preference` + will cost O(m*n), where n is the number of `identifiers`. Returns: Iterable[KT]: A non-empty subset of `identifiers`. From 468f5cec92253fb619a2ff18c38bcc90b6234c2b Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sat, 3 Aug 2024 12:22:00 -0400 Subject: [PATCH 6/8] Add functional tests for narrow_requirement_selection --- .../python/test_resolvers_python.py | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/tests/functional/python/test_resolvers_python.py b/tests/functional/python/test_resolvers_python.py index 18c1550..c1e3038 100644 --- a/tests/functional/python/test_resolvers_python.py +++ b/tests/functional/python/test_resolvers_python.py @@ -121,6 +121,24 @@ def get_dependencies(self, candidate): return list(self._iter_dependencies(candidate)) +class PythonInputProviderNarrowRequirements(PythonInputProvider): + def narrow_requirement_selection( + self, identifiers, resolutions, candidates, information, backtrack_causes + ): + # Consider requirements that have 0 candidates (a resolution end point + # that can be backtracked from) or 1 candidate (speeds up situations where + # ever requirement is pinned to 1 specific version) + number_of_candidates = defaultdict(list) + for identifier in identifiers: + number_of_candidates[len(list(candidates[identifier]))].append(identifier) + + min_candidates = min(number_of_candidates.keys()) + if min_candidates in (0, 1): + return number_of_candidates[min_candidates] + + return identifiers + + INPUTS_DIR = os.path.abspath(os.path.join(__file__, "..", "inputs")) CASE_DIR = os.path.join(INPUTS_DIR, "case") @@ -133,20 +151,32 @@ def get_dependencies(self, candidate): } -@pytest.fixture( - params=[ +def create_params(provider_class): + return [ pytest.param( - os.path.join(CASE_DIR, n), + (os.path.join(CASE_DIR, n), provider_class), marks=pytest.mark.xfail(strict=True, reason=XFAIL_CASES[n]), ) if n in XFAIL_CASES - else os.path.join(CASE_DIR, n) + else (os.path.join(CASE_DIR, n), provider_class) + for n in CASE_NAMES + ] + + +@pytest.fixture( + params=[ + *create_params(PythonInputProvider), + *create_params(PythonInputProviderNarrowRequirements), + ], + ids=[ + f"{n[:-5]}-{cls.__name__}" + for cls in [PythonInputProvider, PythonInputProviderNarrowRequirements] for n in CASE_NAMES ], - ids=[n[:-5] for n in CASE_NAMES], ) def provider(request): - return PythonInputProvider(request.param) + path, provider_class = request.param + return provider_class(path) def _format_confliction(exception): From 23cd4886d026fbe3b985faf40a8e4f946f1c0db5 Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sat, 3 Aug 2024 12:33:17 -0400 Subject: [PATCH 7/8] Add news entry --- news/145.feature | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 news/145.feature diff --git a/news/145.feature b/news/145.feature new file mode 100644 index 0000000..65dcd9e --- /dev/null +++ b/news/145.feature @@ -0,0 +1,3 @@ +New `narrow_requirement_selection` provider method giving option for +providers to reduce the number of times sort key `get_preference` is +called in long running backtrack From 7569d7522b227e742bbf6cbd346176dce1beaec1 Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sat, 3 Aug 2024 12:41:37 -0400 Subject: [PATCH 8/8] update docs of `get_preference` --- src/resolvelib/providers.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/resolvelib/providers.py b/src/resolvelib/providers.py index cb148b8..524e3d8 100644 --- a/src/resolvelib/providers.py +++ b/src/resolvelib/providers.py @@ -40,13 +40,12 @@ def get_preference( ) -> Preference: """Produce a sort key for given requirement based on preference. - As this is a sort key it will be called O(n) times per backtrack step, - where n is the number of `identifier`s. If you have a check which is - expensive in some sense, e.g. it needs to make O(n) checks per - identifier, or takes significant wall clock time but could be short - circuited once finding an identifier that matches the check, consider - using `narrow_requirement_selection` to filter the `identifier`s - before this sort key is called. + As this is a sort key it will be called O(n) times per backtrack + step, where n is the number of `identifier`s, if you have a check + which is expensive in some sense. E.g. It needs to make O(n) checks + per call or takes significant wall clock time, consider using + `narrow_requirement_selection` to filter the `identifier`s, which + is applied before this sort key is called. The preference is defined as "I think this requirement should be resolved first". The lower the return value is, the more preferred