From ddebf171a437e5034542266e2a4fe0ee0a007c30 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 3 Oct 2023 15:18:12 -0400 Subject: [PATCH] Speed up import time by lazy loading requests (#328) Pooch is often used as an import addon but not as a core feature. As such it is a little silly to expand the import time of dependent packages by a lot. While the gain here is small, it is not insignificant when considering that multiple packages are using Pooch and would any small amount of time would add up. --- pooch/core.py | 5 +++-- pooch/downloaders.py | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pooch/core.py b/pooch/core.py index fb7a3cda..7eeab6f0 100644 --- a/pooch/core.py +++ b/pooch/core.py @@ -14,8 +14,6 @@ import shlex import shutil -import requests -import requests.exceptions from .hashes import hash_matches, file_hash from .utils import ( @@ -792,6 +790,9 @@ def stream_download(url, fname, known_hash, downloader, pooch=None, retry_if_fai will retry the download the specified number of times in case the failure was due to a network error. """ + # Lazy import requests to speed up import time + import requests.exceptions # pylint: disable=C0415 + # Ensure the parent directory exists in case the file is in a subdirectory. # Otherwise, move will cause an error. if not fname.parent.exists(): diff --git a/pooch/downloaders.py b/pooch/downloaders.py index 383f2a12..1cae6b1e 100644 --- a/pooch/downloaders.py +++ b/pooch/downloaders.py @@ -12,7 +12,6 @@ import ftplib import warnings -import requests from .utils import parse_url @@ -192,6 +191,9 @@ def __call__(self, url, output_file, pooch, check_only=False): is available on the server. Otherwise, returns ``None``. """ + # Lazy import requests to speed up import time + import requests # pylint: disable=C0415 + if check_only: response = requests.head(url, allow_redirects=True) available = bool(response.status_code == 200) @@ -626,6 +628,9 @@ def doi_to_url(doi): The URL of the archive in the data repository. """ + # Lazy import requests to speed up import time + import requests # pylint: disable=C0415 + # Use doi.org to resolve the DOI to the repository website. response = requests.get(f"https://doi.org/{doi}") url = response.url @@ -777,8 +782,10 @@ def initialize(cls, doi, archive_url): @property def api_response(self): """Cached API response from Zenodo""" - if self._api_response is None: + # Lazy import requests to speed up import time + import requests # pylint: disable=C0415 + article_id = self.archive_url.split("/")[-1] self._api_response = requests.get( f"https://zenodo.org/api/records/{article_id}" @@ -801,7 +808,6 @@ def download_url(self, file_name): download_url : str The HTTP URL that can be used to download the file. """ - files = {item["key"]: item for item in self.api_response["files"]} if file_name not in files: raise ValueError( @@ -875,8 +881,10 @@ def _parse_version_from_doi(self): @property def api_response(self): """Cached API response from Figshare""" - if self._api_response is None: + # Lazy import requests to speed up import time + import requests # pylint: disable=C0415 + # Use the figshare API to find the article ID from the DOI article = requests.get( f"https://api.figshare.com/v2/articles?doi={self.doi}" @@ -927,7 +935,6 @@ def download_url(self, file_name): download_url : str The HTTP URL that can be used to download the file. """ - files = {item["name"]: item for item in self.api_response} if file_name not in files: raise ValueError( @@ -974,7 +981,6 @@ def initialize(cls, doi, archive_url): archive_url : str The resolved URL for the DOI """ - # Access the DOI as if this was a DataVerse instance response = cls._get_api_response(doi, archive_url) @@ -995,6 +1001,9 @@ def _get_api_response(cls, doi, archive_url): This has been separated into a separate ``classmethod``, as it can be used prior and after the initialization. """ + # Lazy import requests to speed up import time + import requests # pylint: disable=C0415 + parsed = parse_url(archive_url) response = requests.get( f"{parsed['protocol']}://{parsed['netloc']}/api/datasets/" @@ -1034,7 +1043,6 @@ def download_url(self, file_name): download_url : str The HTTP URL that can be used to download the file. """ - parsed = parse_url(self.archive_url) # Iterate over the given files until we find one of the requested name