From af99bae5e96cddcd136726c84c8f1a79b25f122c Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 19 Aug 2024 12:52:11 -0400 Subject: [PATCH] add cli arg to limit download parallelism --- news/12923.feature.rst | 2 +- src/pip/_internal/cli/cmdoptions.py | 18 ++++++++++++++++++ src/pip/_internal/cli/req_command.py | 2 ++ src/pip/_internal/commands/download.py | 2 ++ src/pip/_internal/commands/install.py | 2 ++ src/pip/_internal/commands/wheel.py | 2 ++ src/pip/_internal/network/download.py | 13 +++++++++++-- src/pip/_internal/operations/prepare.py | 5 ++++- tests/unit/test_req.py | 1 + 9 files changed, 43 insertions(+), 4 deletions(-) diff --git a/news/12923.feature.rst b/news/12923.feature.rst index a84cf583803..e152a8d5f1a 100644 --- a/news/12923.feature.rst +++ b/news/12923.feature.rst @@ -1 +1 @@ -Download concrete dists for metadata-only resolves in parallel using worker threads. +Download concrete dists for metadata-only resolves in parallel using worker threads. Add ``--batch-download-parallelism`` CLI flag to limit parallelism. diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py index 0b7cff77bdd..c9f74400178 100644 --- a/src/pip/_internal/cli/cmdoptions.py +++ b/src/pip/_internal/cli/cmdoptions.py @@ -231,6 +231,24 @@ class PipOption(Option): help="Specify whether the progress bar should be used [on, off, raw] (default: on)", ) + +batch_download_parallelism: Callable[..., Option] = partial( + Option, + "--batch-download-parallelism", + dest="batch_download_parallelism", + type="int", + default=1, + help=( + "Maximum parallelism employed for batch downloading of metadata-only dists" + " (default %default parallel requests)." + " Note that more than 10 downloads may overflow the requests connection pool," + " which may affect performance." + " Note also that commands such as 'install --dry-run' should avoid downloads" + " entirely, and so will not be affected by this option." + ), +) + + log: Callable[..., Option] = partial( PipOption, "--log", diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index 92900f94ff4..e2db69d46e4 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -100,6 +100,7 @@ def make_requirement_preparer( use_user_site: bool, download_dir: Optional[str] = None, verbosity: int = 0, + batch_download_parallelism: Optional[int] = None, ) -> RequirementPreparer: """ Create a RequirementPreparer instance for the given parameters. @@ -141,6 +142,7 @@ def make_requirement_preparer( use_user_site=use_user_site, lazy_wheel=lazy_wheel, verbosity=verbosity, + batch_download_parallelism=batch_download_parallelism, legacy_resolver=legacy_resolver, ) diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 917bbb91d83..c2a090de1a3 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -47,6 +47,7 @@ def add_options(self) -> None: self.cmd_opts.add_option(cmdoptions.pre()) self.cmd_opts.add_option(cmdoptions.require_hashes()) self.cmd_opts.add_option(cmdoptions.progress_bar()) + self.cmd_opts.add_option(cmdoptions.batch_download_parallelism()) self.cmd_opts.add_option(cmdoptions.no_build_isolation()) self.cmd_opts.add_option(cmdoptions.use_pep517()) self.cmd_opts.add_option(cmdoptions.no_use_pep517()) @@ -116,6 +117,7 @@ def run(self, options: Values, args: List[str]) -> int: download_dir=options.download_dir, use_user_site=False, verbosity=self.verbosity, + batch_download_parallelism=options.batch_download_parallelism, ) resolver = self.make_resolver( diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py index ad45a2f2a57..fda0e76ba23 100644 --- a/src/pip/_internal/commands/install.py +++ b/src/pip/_internal/commands/install.py @@ -237,6 +237,7 @@ def add_options(self) -> None: self.cmd_opts.add_option(cmdoptions.prefer_binary()) self.cmd_opts.add_option(cmdoptions.require_hashes()) self.cmd_opts.add_option(cmdoptions.progress_bar()) + self.cmd_opts.add_option(cmdoptions.batch_download_parallelism()) self.cmd_opts.add_option(cmdoptions.root_user_action()) index_opts = cmdoptions.make_option_group( @@ -359,6 +360,7 @@ def run(self, options: Values, args: List[str]) -> int: finder=finder, use_user_site=options.use_user_site, verbosity=self.verbosity, + batch_download_parallelism=options.batch_download_parallelism, ) resolver = self.make_resolver( preparer=preparer, diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py index 278719f4e0c..21f11bfe49c 100644 --- a/src/pip/_internal/commands/wheel.py +++ b/src/pip/_internal/commands/wheel.py @@ -67,6 +67,7 @@ def add_options(self) -> None: self.cmd_opts.add_option(cmdoptions.ignore_requires_python()) self.cmd_opts.add_option(cmdoptions.no_deps()) self.cmd_opts.add_option(cmdoptions.progress_bar()) + self.cmd_opts.add_option(cmdoptions.batch_download_parallelism()) self.cmd_opts.add_option( "--no-verify", @@ -131,6 +132,7 @@ def run(self, options: Values, args: List[str]) -> int: download_dir=options.wheel_dir, use_user_site=False, verbosity=self.verbosity, + batch_download_parallelism=options.batch_download_parallelism, ) resolver = self.make_resolver( diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index 9c3967fd9a9..a35ea97b6ca 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -13,7 +13,7 @@ from pip._vendor.requests.models import Response from pip._internal.cli.progress_bars import get_download_progress_renderer -from pip._internal.exceptions import NetworkConnectionError +from pip._internal.exceptions import CommandError, NetworkConnectionError from pip._internal.models.index import PyPI from pip._internal.models.link import Link from pip._internal.network.cache import is_from_cache @@ -226,11 +226,20 @@ def __init__( self, session: PipSession, progress_bar: str, + max_parallelism: Optional[int] = None, ) -> None: self._session = session # FIXME: support progress bar with parallel downloads! logger.info("Ignoring progress bar %s for parallel downloads", progress_bar) + if max_parallelism is None: + max_parallelism = 1 + if max_parallelism < 1: + raise CommandError( + f"invalid batch download parallelism {max_parallelism}: must be >=1" + ) + self._max_parallelism: int = max_parallelism + def __call__( self, links: Iterable[Link], location: Path ) -> Iterable[Tuple[Link, Tuple[Path, Optional[str]]]]: @@ -254,7 +263,7 @@ def __call__( q: "Queue[Union[Tuple[Link, Path, Optional[str]], BaseException]]" = Queue() event = Event() # Limit downloads to 10 at a time so we can reuse our connection pool. - semaphore = Semaphore(value=10) + semaphore = Semaphore(value=self._max_parallelism) # Distribute request i/o across equivalent threads. # NB: event-based/async is likely a better model than thread-per-request, but diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 652a00bf97d..ef5268d6cc5 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -230,6 +230,7 @@ def __init__( use_user_site: bool, lazy_wheel: bool, verbosity: int, + batch_download_parallelism: Optional[int], legacy_resolver: bool, ) -> None: super().__init__() @@ -239,7 +240,9 @@ def __init__( self.build_tracker = build_tracker self._session = session self._download = Downloader(session, progress_bar) - self._batch_download = BatchDownloader(session, progress_bar) + self._batch_download = BatchDownloader( + session, progress_bar, max_parallelism=batch_download_parallelism + ) self.finder = finder # Where still-packed archives should be written to. If None, they are diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 8a95c058706..9d0e2021866 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -106,6 +106,7 @@ def _basic_resolver( use_user_site=False, lazy_wheel=False, verbosity=0, + batch_download_parallelism=None, legacy_resolver=True, ) yield Resolver(