Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure processor has extracted all members when members=None #365

Merged
merged 11 commits into from
Feb 19, 2024
29 changes: 21 additions & 8 deletions pooch/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ def __init__(self, members=None, extract_dir=None):
self.members = members
self.extract_dir = extract_dir

def _all_members(self, fname):
"""Return all the members in the archive.

In the base class, this returns None.
"""
return None

def __call__(self, fname, action, pooch):
"""
Extract all files from the given archive.
Expand Down Expand Up @@ -79,17 +86,13 @@ def __call__(self, fname, action, pooch):
else:
archive_dir = fname.rsplit(os.path.sep, maxsplit=1)[0]
self.extract_dir = os.path.join(archive_dir, self.extract_dir)
members = self.members or self._all_members(fname)
if (
(action in ("update", "download"))
or (not os.path.exists(self.extract_dir))
or (
(self.members is not None)
and (
not all(
os.path.exists(os.path.join(self.extract_dir, m))
for m in self.members
)
)
or not all(
os.path.exists(os.path.join(self.extract_dir, m))
for m in members
)
):
# Make sure that the folder with the extracted files exists
Expand Down Expand Up @@ -148,6 +151,11 @@ class Unzip(ExtractorProcessor): # pylint: disable=too-few-public-methods

suffix = ".unzip"

def _all_members(self, fname):
"""Return all members from a given archive."""
with ZipFile(fname, "r") as zip_file:
return zip_file.namelist()

def _extract_file(self, fname, extract_dir):
"""
This method receives an argument for the archive to extract and the
Expand Down Expand Up @@ -209,6 +217,11 @@ class Untar(ExtractorProcessor): # pylint: disable=too-few-public-methods

suffix = ".untar"

def _all_members(self, fname):
"""Return all members from a given archive."""
with TarFile.open(fname, "r") as tar_file:
return [info.name for info in tar_file.getmembers()]

def _extract_file(self, fname, extract_dir):
"""
This method receives an argument for the archive to extract and the
Expand Down
25 changes: 25 additions & 0 deletions pooch/tests/test_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,28 @@ def _unpacking_expected_paths_and_logs(archive, members, path, name):
log_lines.append(f"Extracting '{member}'")
true_paths = set(true_paths)
return true_paths, log_lines


@pytest.mark.network
@pytest.mark.parametrize(
"processor_class,extension",
[(Unzip, ".zip"), (Untar, ".tar.gz")],
)
def test_unpacking_members_then_no_members(processor_class, extension):
"""Test that calling with members then without them works.

https://github.com/fatiando/pooch/issues/364
"""

with TemporaryDirectory() as local_store:
pup = Pooch(path=Path(local_store), base_url=BASEURL, registry=REGISTRY)

# Do a first fetch with incorrect member
processor1 = processor_class(members=["i don't exist"])
filenames1 = pup.fetch("store" + extension, processor=processor1)
assert len(filenames1) == 0
jni marked this conversation as resolved.
Show resolved Hide resolved

# Do a second fetch with no members
processor2 = processor_class()
filenames2 = pup.fetch("store" + extension, processor=processor2)
assert len(filenames2) > 0
Loading