Skip to content

Commit

Permalink
Speed up import time by lazy loading requests (#328)
Browse files Browse the repository at this point in the history
Pooch is often used as an import addon but not as a core feature. 
As such it is a little silly to expand the import time of dependent 
packages by a lot. While the gain here is small, it is not 
insignificant when considering that multiple packages are using
Pooch and would any small amount of time would add up.
  • Loading branch information
hmaarrfk committed Oct 3, 2023
1 parent 8aa2fc3 commit ddebf17
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
5 changes: 3 additions & 2 deletions pooch/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@
import shlex
import shutil

import requests
import requests.exceptions

from .hashes import hash_matches, file_hash
from .utils import (
Expand Down Expand Up @@ -792,6 +790,9 @@ def stream_download(url, fname, known_hash, downloader, pooch=None, retry_if_fai
will retry the download the specified number of times in case the failure
was due to a network error.
"""
# Lazy import requests to speed up import time
import requests.exceptions # pylint: disable=C0415

# Ensure the parent directory exists in case the file is in a subdirectory.
# Otherwise, move will cause an error.
if not fname.parent.exists():
Expand Down
22 changes: 15 additions & 7 deletions pooch/downloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import ftplib

import warnings
import requests

from .utils import parse_url

Expand Down Expand Up @@ -192,6 +191,9 @@ def __call__(self, url, output_file, pooch, check_only=False):
is available on the server. Otherwise, returns ``None``.
"""
# Lazy import requests to speed up import time
import requests # pylint: disable=C0415

if check_only:
response = requests.head(url, allow_redirects=True)
available = bool(response.status_code == 200)
Expand Down Expand Up @@ -626,6 +628,9 @@ def doi_to_url(doi):
The URL of the archive in the data repository.
"""
# Lazy import requests to speed up import time
import requests # pylint: disable=C0415

# Use doi.org to resolve the DOI to the repository website.
response = requests.get(f"https://doi.org/{doi}")
url = response.url
Expand Down Expand Up @@ -777,8 +782,10 @@ def initialize(cls, doi, archive_url):
@property
def api_response(self):
"""Cached API response from Zenodo"""

if self._api_response is None:
# Lazy import requests to speed up import time
import requests # pylint: disable=C0415

article_id = self.archive_url.split("/")[-1]
self._api_response = requests.get(
f"https://zenodo.org/api/records/{article_id}"
Expand All @@ -801,7 +808,6 @@ def download_url(self, file_name):
download_url : str
The HTTP URL that can be used to download the file.
"""

files = {item["key"]: item for item in self.api_response["files"]}
if file_name not in files:
raise ValueError(
Expand Down Expand Up @@ -875,8 +881,10 @@ def _parse_version_from_doi(self):
@property
def api_response(self):
"""Cached API response from Figshare"""

if self._api_response is None:
# Lazy import requests to speed up import time
import requests # pylint: disable=C0415

# Use the figshare API to find the article ID from the DOI
article = requests.get(
f"https://api.figshare.com/v2/articles?doi={self.doi}"
Expand Down Expand Up @@ -927,7 +935,6 @@ def download_url(self, file_name):
download_url : str
The HTTP URL that can be used to download the file.
"""

files = {item["name"]: item for item in self.api_response}
if file_name not in files:
raise ValueError(
Expand Down Expand Up @@ -974,7 +981,6 @@ def initialize(cls, doi, archive_url):
archive_url : str
The resolved URL for the DOI
"""

# Access the DOI as if this was a DataVerse instance
response = cls._get_api_response(doi, archive_url)

Expand All @@ -995,6 +1001,9 @@ def _get_api_response(cls, doi, archive_url):
This has been separated into a separate ``classmethod``, as it can be
used prior and after the initialization.
"""
# Lazy import requests to speed up import time
import requests # pylint: disable=C0415

parsed = parse_url(archive_url)
response = requests.get(
f"{parsed['protocol']}://{parsed['netloc']}/api/datasets/"
Expand Down Expand Up @@ -1034,7 +1043,6 @@ def download_url(self, file_name):
download_url : str
The HTTP URL that can be used to download the file.
"""

parsed = parse_url(self.archive_url)

# Iterate over the given files until we find one of the requested name
Expand Down

0 comments on commit ddebf17

Please sign in to comment.