From ddfc2b244a4ac06ffdf9a1f3635f2f1f45c7e894 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 17 Oct 2023 11:03:55 -0700 Subject: [PATCH 1/4] Fix bug: update Zenodo downloader for new API Update the Zenodo downloader to work with the new Zenodo API. Use the `filename` key for getting the filenames of all files in a repository. Update the generated download url based on heuristics: the link present in the API reference doesn't work at the moment). Update the method that populates the registry: use the new `filename` key and specify that the checksums are now md5. --- pooch/downloaders.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/pooch/downloaders.py b/pooch/downloaders.py index 1cae6b1e..aee4b98b 100644 --- a/pooch/downloaders.py +++ b/pooch/downloaders.py @@ -807,13 +807,25 @@ def download_url(self, file_name): ------- download_url : str The HTTP URL that can be used to download the file. + + Notes + ----- + After Zenodo migrated to InvenioRDM on Oct 2023, their API changed. The + link to the desired files that appears in the API response leads to 404 + errors (by 2023-10-17). The files are available in the following url: + ``https://zenodo.org/records/{article_id}/files/{file_name}?download=1``. """ - files = {item["key"]: item for item in self.api_response["files"]} - if file_name not in files: + # Check if file exists in the repository + filenames = [item["filename"] for item in self.api_response["files"]] + if file_name not in filenames: raise ValueError( f"File '{file_name}' not found in data archive {self.archive_url} (doi:{self.doi})." ) - download_url = files[file_name]["links"]["self"] + # Build download url + article_id = self.api_response["id"] + download_url = ( + f"https://zenodo.org/records/{article_id}/files/{file_name}?download=1" + ) return download_url def populate_registry(self, pooch): @@ -824,10 +836,15 @@ def populate_registry(self, pooch): ---------- pooch : Pooch The pooch instance that the registry will be added to. + + Notes + ----- + After Zenodo migrated to InvenioRDM on Oct 2023, their API changed. The + checksums for each file listed in the API reference is now an md5 sum. """ for filedata in self.api_response["files"]: - pooch.registry[filedata["key"]] = filedata["checksum"] + pooch.registry[filedata["filename"]] = "md5:" + filedata["checksum"] class FigshareRepository(DataRepository): # pylint: disable=missing-class-docstring @@ -938,7 +955,8 @@ def download_url(self, file_name): files = {item["name"]: item for item in self.api_response} if file_name not in files: raise ValueError( - f"File '{file_name}' not found in data archive {self.archive_url} (doi:{self.doi})." + f"File '{file_name}' not found in data archive " + f"{self.archive_url} (doi:{self.doi})." ) download_url = files[file_name]["download_url"] return download_url From c6be96a51409efde95e20bb9460875e763c2a6bd Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 17 Oct 2023 11:08:53 -0700 Subject: [PATCH 2/4] Shorten line with error message --- pooch/downloaders.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pooch/downloaders.py b/pooch/downloaders.py index aee4b98b..288c6999 100644 --- a/pooch/downloaders.py +++ b/pooch/downloaders.py @@ -819,7 +819,8 @@ def download_url(self, file_name): filenames = [item["filename"] for item in self.api_response["files"]] if file_name not in filenames: raise ValueError( - f"File '{file_name}' not found in data archive {self.archive_url} (doi:{self.doi})." + f"File '{file_name}' not found in data archive " + f"{self.archive_url} (doi:{self.doi})." ) # Build download url article_id = self.api_response["id"] From de956d5001c8ddb843e7b58f79755f52bef40808 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 17 Oct 2023 11:09:49 -0700 Subject: [PATCH 3/4] Restore unrelated line --- pooch/downloaders.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pooch/downloaders.py b/pooch/downloaders.py index 288c6999..50ed6a8b 100644 --- a/pooch/downloaders.py +++ b/pooch/downloaders.py @@ -956,8 +956,7 @@ def download_url(self, file_name): files = {item["name"]: item for item in self.api_response} if file_name not in files: raise ValueError( - f"File '{file_name}' not found in data archive " - f"{self.archive_url} (doi:{self.doi})." + f"File '{file_name}' not found in data archive {self.archive_url} (doi:{self.doi})." ) download_url = files[file_name]["download_url"] return download_url From 23e205784a24804bc0247c391e64c2acd12a71f8 Mon Sep 17 00:00:00 2001 From: Santiago Soler Date: Tue, 17 Oct 2023 11:11:05 -0700 Subject: [PATCH 4/4] Use f-string instead of str concatenation --- pooch/downloaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pooch/downloaders.py b/pooch/downloaders.py index 50ed6a8b..47cdb5fc 100644 --- a/pooch/downloaders.py +++ b/pooch/downloaders.py @@ -845,7 +845,7 @@ def populate_registry(self, pooch): """ for filedata in self.api_response["files"]: - pooch.registry[filedata["filename"]] = "md5:" + filedata["checksum"] + pooch.registry[filedata["filename"]] = f"md5:{filedata['checksum']}" class FigshareRepository(DataRepository): # pylint: disable=missing-class-docstring