From efb4cd3bbcc9ca98bcec944d08b238259df5f567 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sun, 15 Oct 2023 13:43:15 +0100 Subject: [PATCH 1/5] Attempt to fix Zenodo Done by inspecting API responses - https://zenodo.org/api/records/3242074 - https://zenodo.org/api/records/3242074/files --- repo2docker/contentproviders/zenodo.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 5d02f723b..b597cf6d7 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -24,17 +24,19 @@ def __init__(self): "http://sandbox.zenodo.org/record/", ], "api": "https://sandbox.zenodo.org/api/records/", - "filepath": "files", - "filename": "filename", - "download": "links.download", + "files": "links.files", + "filepath": "entries", + "filename": "key", + "download": "links.content", "type": "metadata.upload_type", }, { "hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"], "api": "https://zenodo.org/api/records/", - "filepath": "files", - "filename": "filename", - "download": "links.download", + "files": "links.files", + "filepath": "entries", + "filename": "key", + "download": "links.content", "type": "metadata.upload_type", }, { @@ -43,6 +45,7 @@ def __init__(self): "http://data.caltech.edu/records/", ], "api": "https://data.caltech.edu/api/record/", + "files": "", "filepath": "metadata.electronic_location_and_access", "filename": "electronic_name.0", "download": "uniform_resource_identifier", @@ -69,9 +72,17 @@ def fetch(self, spec, output_dir, yield_output=False): f'{host["api"]}{record_id}', headers={"accept": "application/json"}, ) - record = resp.json() + if host["files"]: + yield f"Fetching Zenodo record {record_id} files.\n" + files_url = deep_get(record, host["files"]) + resp = self.urlopen( + files_url, + headers={"accept": "application/json"}, + ) + record = resp.json() + files = deep_get(record, host["filepath"]) only_one_file = len(files) == 1 for file_ref in files: From 74a94cf2b0234b9129e613be6c94c13b1d3652b6 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sun, 15 Oct 2023 15:08:19 +0100 Subject: [PATCH 2/5] Test resolving DOI This helps distinguish temporary(?) errors in the DOI resolution from errors in the content provider --- tests/unit/contentproviders/test_doi.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/unit/contentproviders/test_doi.py b/tests/unit/contentproviders/test_doi.py index dab3d7fcd..301b50beb 100644 --- a/tests/unit/contentproviders/test_doi.py +++ b/tests/unit/contentproviders/test_doi.py @@ -30,8 +30,14 @@ def test_url_headers(requests_mock): assert result.request.headers["User-Agent"] == f"repo2docker {__version__}" -def test_unresolving_doi(): +@pytest.mark.parametrize( + "requested_doi, expected", + [ + ("10.5281/zenodo.3242074", "https://zenodo.org/records/3242074"), + # Unresolving DOI: + ("10.1/1234", "10.1/1234"), + ], +) +def test_doi2url(requested_doi, expected): doi = DoiProvider() - - fakedoi = "10.1/1234" - assert doi.doi2url(fakedoi) is fakedoi + assert doi.doi2url(requested_doi) == expected From ebdd121b053cc86e9189025ed8a0ee7f74053944 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sun, 15 Oct 2023 15:54:27 +0100 Subject: [PATCH 3/5] Update zenodo mocks --- tests/unit/contentproviders/test_zenodo.py | 68 ++++++++++++++++++---- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/tests/unit/contentproviders/test_zenodo.py b/tests/unit/contentproviders/test_zenodo.py index 0755163da..38737ae1a 100644 --- a/tests/unit/contentproviders/test_zenodo.py +++ b/tests/unit/contentproviders/test_zenodo.py @@ -92,16 +92,31 @@ def test_fetch_software_from_github_archive(requests_mock): # we "fetch" a local ZIP file to simulate a Zenodo record created from a # GitHub repository via the Zenodo-GitHub integration with zenodo_archive() as zen_path: - mock_response = { + mock_record = { "files": [ { "filename": "some_dir/afake.zip", - "links": {"download": f"file://{zen_path}"}, } ], + "links": { + "files": "https://zenodo.org/api/records/1234/files", + }, "metadata": {"upload_type": "other"}, } - requests_mock.get("https://zenodo.org/api/records/1234", json=mock_response) + requests_mock.get("https://zenodo.org/api/records/1234", json=mock_record) + + mock_record_files = { + "entries": [ + { + "key": "some_dir/afake.zip", + "links": {"content": f"file://{zen_path}"}, + } + ], + } + requests_mock.get( + "https://zenodo.org/api/records/1234/files", json=mock_record_files + ) + requests_mock.get(f"file://{zen_path}", content=open(zen_path, "rb").read()) zen = Zenodo() @@ -121,18 +136,33 @@ def test_fetch_software(requests_mock): # we "fetch" a local ZIP file to simulate a Zenodo software record with a # ZIP file in it with zenodo_archive() as zen_path: - mock_response = { + mock_record = { "files": [ { # this is the difference to the GitHub generated one, # the ZIP file isn't in a directory "filename": "afake.zip", - "links": {"download": f"file://{zen_path}"}, } ], + "links": { + "files": "https://zenodo.org/api/records/1234/files", + }, "metadata": {"upload_type": "software"}, } - requests_mock.get("https://zenodo.org/api/records/1234", json=mock_response) + requests_mock.get("https://zenodo.org/api/records/1234", json=mock_record) + + mock_record_files = { + "entries": [ + { + "key": "afake.zip", + "links": {"content": f"file://{zen_path}"}, + } + ], + } + requests_mock.get( + "https://zenodo.org/api/records/1234/files", json=mock_record_files + ) + requests_mock.get(f"file://{zen_path}", content=open(zen_path, "rb").read()) with TemporaryDirectory() as d: @@ -151,20 +181,38 @@ def test_fetch_data(requests_mock): # we "fetch" a local ZIP file to simulate a Zenodo data record with zenodo_archive() as a_zen_path: with zenodo_archive() as b_zen_path: - mock_response = { + mock_record = { "files": [ { "filename": "afake.zip", - "links": {"download": f"file://{a_zen_path}"}, }, { "filename": "bfake.zip", - "links": {"download": f"file://{b_zen_path}"}, }, ], + "links": { + "files": "https://zenodo.org/api/records/1234/files", + }, "metadata": {"upload_type": "data"}, } - requests_mock.get("https://zenodo.org/api/records/1234", json=mock_response) + requests_mock.get("https://zenodo.org/api/records/1234", json=mock_record) + + mock_record_files = { + "entries": [ + { + "key": "afake.zip", + "links": {"content": f"file://{a_zen_path}"}, + }, + { + "key": "bfake.zip", + "links": {"content": f"file://{b_zen_path}"}, + }, + ], + } + requests_mock.get( + "https://zenodo.org/api/records/1234/files", json=mock_record_files + ) + requests_mock.get( f"file://{a_zen_path}", content=open(a_zen_path, "rb").read() ) From 1fc8f192ee6288d668004417520d2d1af5cb611d Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sun, 15 Oct 2023 17:17:23 +0100 Subject: [PATCH 4/5] doi2url: Don't ignore non-404 errors --- repo2docker/contentproviders/doi.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/repo2docker/contentproviders/doi.py b/repo2docker/contentproviders/doi.py index 03fa0171c..065602b7a 100644 --- a/repo2docker/contentproviders/doi.py +++ b/repo2docker/contentproviders/doi.py @@ -51,9 +51,15 @@ def doi2url(self, doi): try: resp = self._request(f"https://doi.org/{doi}") resp.raise_for_status() - # If the DOI doesn't resolve, just return URL - except HTTPError: - return doi + except HTTPError as e: + # If the DOI doesn't exist, just return URL + if e.response.status_code == 404: + return doi + # Reraise any other errors because if the DOI service is down (or + # we hit a rate limit) we don't want to silently continue to the + # default Git provider as this leads to a misleading error. + logging.error(f"DOI {doi} does not resolve: {e}") + raise return resp.url else: # Just return what is actulally just a URL From fc7de2768b990e9a3cb1eac53a6896998bf190b6 Mon Sep 17 00:00:00 2001 From: Simon Li Date: Sun, 15 Oct 2023 21:17:51 +0100 Subject: [PATCH 5/5] Zenodo DOI now returning https://zenodo.org/records/ instead of https://zenodo.org/record/ --- repo2docker/contentproviders/zenodo.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index b597cf6d7..6982c3a7c 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -22,6 +22,7 @@ def __init__(self): "hostname": [ "https://sandbox.zenodo.org/record/", "http://sandbox.zenodo.org/record/", + "http://sandbox.zenodo.org/records/", ], "api": "https://sandbox.zenodo.org/api/records/", "files": "links.files", @@ -31,7 +32,11 @@ def __init__(self): "type": "metadata.upload_type", }, { - "hostname": ["https://zenodo.org/record/", "http://zenodo.org/record/"], + "hostname": [ + "https://zenodo.org/record/", + "http://zenodo.org/record/", + "https://zenodo.org/records/", + ], "api": "https://zenodo.org/api/records/", "files": "links.files", "filepath": "entries",