Skip to content

Commit

Permalink
Use a set for TargetPython.get_tags
Browse files Browse the repository at this point in the history
This speeds up a representative pip-compile by about 40%

```
λ hyperfine -w 1 -M 3 -p 'rm req.txt || true' 'python -m piptools compile req.in --output-file req.txt --strip-extras --resolver=backtracking'
Benchmark 1: python -m piptools compile req.in --output-file req.txt --strip-extras --resolver=backtracking
  Time (mean ± σ):     28.552 s ±  2.552 s    [User: 20.477 s, System: 1.547 s]
  Range (min … max):   26.446 s … 31.390 s    3 runs

λ hyperfine -w 1 -M 3 -p 'rm req.txt || true' 'python -m piptools compile req.in --output-file req.txt --strip-extras --resolver=backtracking'
Benchmark 1: python -m piptools compile req.in --output-file req.txt --strip-extras --resolver=backtracking
  Time (mean ± σ):     17.570 s ±  0.450 s    [User: 11.537 s, System: 1.345 s]
  Range (min … max):   17.095 s … 17.989 s    3 runs
```

This makes the set.isdisjoint operation done over here much cheaper:
https://github.com/pypa/pip/blob/593b85f4a/src/pip/_internal/models/wheel.py#L92

The reason for this is because a Python usually has a lot of supported
tags. The Python I used above has 2000 supported tags! Whereas a wheel
usually only has one or two file tags.

The CPython code will unfortunately iterate over all 2000 tags to check
if there's a match. Only if the other collection is a set will CPython
think to swap the operands to iterate over the shorter collection:
https://github.com/python/cpython/blob/35963da40f/Objects/setobject.c#L1352
  • Loading branch information
hauntsaninja committed Aug 4, 2023
1 parent 593b85f commit 91c4fc4
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 20 deletions.
7 changes: 4 additions & 3 deletions src/pip/_internal/commands/debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
import sys
from optparse import Values
from types import ModuleType
from typing import Any, Dict, List, Optional
from typing import Any, Collection, Dict, List, Optional

import pip._vendor
from pip._vendor.certifi import where
from pip._vendor.packaging.tags import Tag
from pip._vendor.packaging.version import parse as parse_version

from pip._internal.cli import cmdoptions
Expand Down Expand Up @@ -105,7 +106,7 @@ def show_tags(options: Values) -> None:
tag_limit = 10

target_python = make_target_python(options)
tags = target_python.get_tags()
tags: Collection[Tag] = target_python.get_tags()

# Display the target options that were explicitly provided.
formatted_target = target_python.format_given()
Expand All @@ -118,7 +119,7 @@ def show_tags(options: Values) -> None:

if options.verbose < 1 and len(tags) > tag_limit:
tags_limited = True
tags = tags[:tag_limit]
tags = list(tags)[:tag_limit]
else:
tags_limited = False

Expand Down
2 changes: 1 addition & 1 deletion src/pip/_internal/index/package_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ def create(
def __init__(
self,
project_name: str,
supported_tags: List[Tag],
supported_tags: Set[Tag],
specifier: specifiers.BaseSpecifier,
prefer_binary: bool = False,
allow_all_prereleases: bool = False,
Expand Down
8 changes: 4 additions & 4 deletions src/pip/_internal/models/target_python.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import sys
from typing import List, Optional, Tuple
from typing import List, Optional, Set, Tuple

from pip._vendor.packaging.tags import Tag

Expand Down Expand Up @@ -62,7 +62,7 @@ def __init__(
self.py_version_info = py_version_info

# This is used to cache the return value of get_tags().
self._valid_tags: Optional[List[Tag]] = None
self._valid_tags: Optional[Set[Tag]] = None

def format_given(self) -> str:
"""
Expand All @@ -84,7 +84,7 @@ def format_given(self) -> str:
f"{key}={value!r}" for key, value in key_values if value is not None
)

def get_tags(self) -> List[Tag]:
def get_tags(self) -> Set[Tag]:
"""
Return the supported PEP 425 tags to check wheel candidates against.
Expand All @@ -105,6 +105,6 @@ def get_tags(self) -> List[Tag]:
abis=self.abis,
impl=self.implementation,
)
self._valid_tags = tags
self._valid_tags = set(tags)

return self._valid_tags
4 changes: 2 additions & 2 deletions src/pip/_internal/models/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name that have meaning.
"""
import re
from typing import Dict, Iterable, List
from typing import Dict, Iterable, List, Set

from pip._vendor.packaging.tags import Tag

Expand Down Expand Up @@ -64,7 +64,7 @@ def support_index_min(self, tags: List[Tag]) -> int:
raise ValueError()

def find_most_preferred_tag(
self, tags: List[Tag], tag_to_priority: Dict[Tag, int]
self, tags: Set[Tag], tag_to_priority: Dict[Tag, int]
) -> int:
"""Return the priority of the most preferred tag that one of the wheel's file
tag combinations achieves in the given list of supported tags using the given
Expand Down
10 changes: 5 additions & 5 deletions tests/unit/test_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_not_find_wheel_not_supported(self, data: TestData) -> None:
req = install_req_from_line("simple.dist")
target_python = TargetPython()
# Make sure no tags will match.
target_python._valid_tags = []
target_python._valid_tags = set()
finder = make_test_finder(
find_links=[data.find_links],
target_python=target_python,
Expand Down Expand Up @@ -221,11 +221,11 @@ def test_link_sorting(self) -> None:
Link("simple-1.0.tar.gz"),
),
]
valid_tags = [
valid_tags = {
Tag("pyT", "none", "TEST"),
Tag("pyT", "TEST", "any"),
Tag("pyT", "none", "any"),
]
}
specifier = SpecifierSet()
evaluator = CandidateEvaluator(
"my-project",
Expand Down Expand Up @@ -289,11 +289,11 @@ def test_build_tag_is_less_important_than_other_tags(self) -> None:
Link("simple-1.0.tar.gz"),
),
]
valid_tags = [
valid_tags = {
Tag("py3", "abi3", "linux_x86_64"),
Tag("py3", "abi3", "linux_i386"),
Tag("py3", "any", "none"),
]
}
evaluator = CandidateEvaluator(
"my-project",
supported_tags=valid_tags,
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def test_evaluate_link__incompatible_wheel(self) -> None:
"""
target_python = TargetPython(py_version_info=(3, 6, 4))
# Set the valid tags to an empty list to make sure nothing matches.
target_python._valid_tags = []
target_python._valid_tags = set()
evaluator = LinkEvaluator(
project_name="sample",
canonical_name="sample",
Expand Down Expand Up @@ -373,7 +373,7 @@ class TestCandidateEvaluator:
)
def test_create(self, allow_all_prereleases: bool, prefer_binary: bool) -> None:
target_python = TargetPython()
target_python._valid_tags = [Tag("py36", "none", "any")]
target_python._valid_tags = {Tag("py36", "none", "any")}
specifier = SpecifierSet()
evaluator = CandidateEvaluator.create(
project_name="my-project",
Expand Down Expand Up @@ -786,7 +786,7 @@ def test_make_candidate_evaluator(
prefer_binary: bool,
) -> None:
target_python = TargetPython()
target_python._valid_tags = [Tag("py36", "none", "any")]
target_python._valid_tags = {Tag("py36", "none", "any")}
candidate_prefs = CandidatePreferences(
prefer_binary=prefer_binary,
allow_all_prereleases=allow_all_prereleases,
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/test_target_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ def test_get_tags__uses_cached_value(self) -> None:
Test that get_tags() uses the cached value.
"""
target_python = TargetPython(py_version_info=None)
target_python._valid_tags = [
target_python._valid_tags = {
Tag("py2", "none", "any"),
Tag("py3", "none", "any"),
]
}
actual = target_python.get_tags()
assert actual == [Tag("py2", "none", "any"), Tag("py3", "none", "any")]

0 comments on commit 91c4fc4

Please sign in to comment.