diff --git a/DAGs.md b/DAGs.md index 86b6b7dc7..3937c5526 100644 --- a/DAGs.md +++ b/DAGs.md @@ -78,7 +78,7 @@ The following are DAGs grouped by their primary tag: | `museum_victoria_workflow` | `@monthly` | `False` | image | | [`nappy_workflow`](#nappy_workflow) | `@monthly` | `False` | image | | `nypl_workflow` | `@monthly` | `False` | image | -| [`phylopic_workflow`](#phylopic_workflow) | `@daily` | `True` | image | +| [`phylopic_workflow`](#phylopic_workflow) | `@weekly` | `False` | image | | [`rawpixel_workflow`](#rawpixel_workflow) | `@monthly` | `False` | image | | [`science_museum_workflow`](#science_museum_workflow) | `@monthly` | `False` | image | | [`smithsonian_workflow`](#smithsonian_workflow) | `@weekly` | `False` | image | @@ -461,7 +461,7 @@ ETL Process: Use the API to identify all CC licensed images. Output: TSV file containing the image, their respective meta-data. -Notes: http://phylopic.org/api/ No rate limit specified. +Notes: http://api-docs.phylopic.org/v2/ No rate limit specified. ## `phylopic_workflow` @@ -471,7 +471,7 @@ ETL Process: Use the API to identify all CC licensed images. Output: TSV file containing the image, their respective meta-data. -Notes: http://phylopic.org/api/ No rate limit specified. +Notes: http://api-docs.phylopic.org/v2/ No rate limit specified. ## `pr_review_reminders` diff --git a/openverse_catalog/dags/providers/provider_api_scripts/phylopic.py b/openverse_catalog/dags/providers/provider_api_scripts/phylopic.py index 25c7292f6..8172eddba 100644 --- a/openverse_catalog/dags/providers/provider_api_scripts/phylopic.py +++ b/openverse_catalog/dags/providers/provider_api_scripts/phylopic.py @@ -6,13 +6,11 @@ Output: TSV file containing the image, their respective meta-data. -Notes: http://phylopic.org/api/ +Notes: http://api-docs.phylopic.org/v2/ No rate limit specified. """ -import argparse import logging -from datetime import date, timedelta from common import constants from common.licenses import get_license_info @@ -25,200 +23,112 @@ class PhylopicDataIngester(ProviderDataIngester): delay = 5 - host = "http://phylopic.org" - # Use "base_endpoint" since class's "endpoint" parameter gets defined as a property - base_endpoint = f"{host}/api/a/image" + host = "https://www.phylopic.org" + endpoint = "https://api.phylopic.org/images" providers = {constants.IMAGE: prov.PHYLOPIC_DEFAULT_PROVIDER} - batch_limit = 25 - @property - def endpoint(self) -> str: - """ - Due to the way this DAG is run (via a dated range), **only one request is ever - issued** to retrieve all updated IDs. As such, it will only - return one endpoint. - """ - list_endpoint = f"{self.base_endpoint}/list" - # Process for a given date - end_date = (date.fromisoformat(self.date) + timedelta(days=1)).isoformat() - # Get a list of objects uploaded/updated within a date range - # http://phylopic.org/api/#method-image-time-range - endpoint = f"{list_endpoint}/modified/{self.date}/{end_date}" - logger.info(f"Constructed endpoint: {endpoint}") - return endpoint + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.current_page = 1 + self.total_pages = 0 + self.build_param = 0 + + def ingest_records(self): + self._get_initial_query_params() + super().ingest_records() + + def _get_initial_query_params(self) -> None: + """Get the required `build` param from the API and set the total pages.""" + resp = self.get_response_json(query_params={}) + if not resp: + raise Exception("No response from Phylopic API.") + self.build_param = resp.get("build") + self.total_pages = resp.get("totalPages") + logger.info( + f"Total items to fetch: {resp.get('totalItems')}. " + f"Total pages: {self.total_pages}." + ) def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict: - """Noop since the range is determined by the endpoint property.""" - return {} + if prev_query_params is not None: + self.current_page += 1 - def get_should_continue(self, response_json): - """ - Override for upstream "return True". This DAG will only ever make 1 query so - they should not continue to loop. - """ - return False + return { + "build": self.build_param, + "page": self.current_page - 1, # PhyloPic pages are 0-indexed. + "embed_items": "true", + } - @staticmethod - def _get_response_data(response_json) -> dict | list | None: - """Intermediate method for pulling out results from a Phylopic API request.""" - if response_json and response_json.get("success") is True: - return response_json.get("result") + def get_should_continue(self, response_json): + logger.debug(f"Processing page {self.current_page} of {self.total_pages}.") + return self.current_page < self.total_pages def get_batch_data(self, response_json): - """ - Process the returned IDs. - - The Phylopic API returns only lists of IDs in the initial request. We must take - this request and iterate through all the IDs to get the metadata for each one. - """ - data = self._get_response_data(response_json) - - if not data: - logger.warning("No content available!") - return None - - return data + return response_json.get("_embedded", {}).get("items", []) - @staticmethod - def _image_url(uid: str) -> str: - return f"{PhylopicDataIngester.host}/image/{uid}" + def _get_creator(self, data: dict) -> tuple[str | None, str | None]: + creator_name = data.get("title") + href = data.get("href") + creator_url = self.host + href if href else None + return creator_name, creator_url @staticmethod - def _get_image_info( - result: dict, uid: str - ) -> tuple[str | None, int | None, int | None]: - img_url = None - width = None - height = None - - image_info = result.get("pngFiles") - if image_info: - images = list( - filter(lambda x: (int(str(x.get("width", "0"))) >= 257), image_info) - ) - if images: - image = sorted(images, key=lambda x: x["width"], reverse=True)[0] - img_url = image.get("url") - if not img_url: - logging.warning( - "Image not detected in url: " - f"{PhylopicDataIngester._image_url(uid)}" - ) - else: - img_url = f"{PhylopicDataIngester.host}{img_url}" - width = image.get("width") - height = image.get("height") - - return img_url, width, height + def _get_image_sizes(data: dict) -> tuple[int | None, int | None]: + width, height = None, None + sizes = data.get("sourceFile", {}).get("sizes") + if sizes and "x" in sizes: + width, height = sizes.split("x") + # SVG sizes include decimal points so we get an approximation. + width, height = int(float(width)), int(float(height)) + return width, height - @staticmethod - def _get_taxa_details(result: dict) -> tuple[list[str] | None, str]: - taxa = result.get("taxa", []) - taxa_list = None - title = "" - if taxa: - taxa = [ - _.get("canonicalName") - for _ in taxa - if _.get("canonicalName") is not None - ] - taxa_list = [_.get("string", "") for _ in taxa] - - if taxa_list: - title = taxa_list[0] - - return taxa_list, title + def get_record_data(self, data: dict) -> dict | list[dict] | None: + """ + Get the data for a single image record. - @staticmethod - def _get_creator_details(result: dict) -> tuple[str | None, str | None, str | None]: - credit_line = None - pub_date = None - creator = None - submitter = result.get("submitter", {}) - first_name = submitter.get("firstName") - last_name = submitter.get("lastName") - if first_name and last_name: - creator = f"{first_name} {last_name}".strip() - - if credit := result.get("credit"): - credit_line = credit.strip() - pub_date = result.get("submitted").strip() - - return creator, credit_line, pub_date + TODO: Adapt `url` and `creator_url` to avoid redirects. + """ - def get_record_data(self, data: dict) -> dict | list[dict] | None: - uid = data.get("uid") + uid = data.get("uuid") if not uid: - return - logger.debug(f"Processing UUID: {uid}") - params = { - "options": " ".join( - [ - "credit", - "licenseURL", - "pngFiles", - "submitted", - "submitter", - "taxa", - "canonicalName", - "string", - "firstName", - "lastName", - ] - ) - } - endpoint = f"{self.base_endpoint}/{uid}" - response_json = self.get_response_json(params, endpoint) - result = self._get_response_data(response_json) - if not result: return None - meta_data = {} - uid = result.get("uid") - license_url = result.get("licenseURL") - - img_url, width, height = self._get_image_info(result, uid) - - if img_url is None: + data = data.get("_links", {}) + license_url = data.get("license", {}).get("href") + img_url = data.get("sourceFile", {}).get("href") + foreign_url = data.get("self", {}).get("href") + if not license_url or not img_url or not foreign_url: return None - meta_data["taxa"], title = self._get_taxa_details(result) - - foreign_url = self._image_url(uid) + foreign_url = self.host + foreign_url - ( - creator, - meta_data["credit_line"], - meta_data["pub_date"], - ) = self._get_creator_details(result) + title = data.get("self", {}).get("title") + creator, creator_url = self._get_creator(data.get("contributor", {})) + width, height = self._get_image_sizes(data) return { + "license_info": get_license_info(license_url=license_url), "foreign_identifier": uid, "foreign_landing_url": foreign_url, "image_url": img_url, - "license_info": get_license_info(license_url=license_url), + "title": title, + "creator": creator, + "creator_url": creator_url, "width": width, "height": height, - "creator": creator, - "title": title, - "meta_data": meta_data, + # TODO: Evaluate whether to include upstream thumbnails. + # Sizes available: 192x192, 128x128, 64x64. + # "thumbnail": thumbnail, + # TODO: Evaluate whether to include nodes' titles as tags. + # "tags": tags, } -def main(date: str = None): +def main(): logger.info("Begin: Phylopic provider script") - ingester = PhylopicDataIngester(date=date) + ingester = PhylopicDataIngester() ingester.ingest_records() if __name__ == "__main__": - parser = argparse.ArgumentParser(description="PhyloPic API Job", add_help=True) - parser.add_argument( - "--date", - default=None, - help="Identify all images updated on a particular date (YYYY-MM-DD).", - ) - - args = parser.parse_args() - - main(args.date) + main() diff --git a/openverse_catalog/dags/providers/provider_workflows.py b/openverse_catalog/dags/providers/provider_workflows.py index 8a9167dc8..1c97930eb 100644 --- a/openverse_catalog/dags/providers/provider_workflows.py +++ b/openverse_catalog/dags/providers/provider_workflows.py @@ -257,8 +257,7 @@ def __post_init__(self): ProviderWorkflow( ingester_class=PhylopicDataIngester, start_date=datetime(2011, 2, 7), - schedule_string="@daily", - dated=True, + schedule_string="@weekly", pull_timeout=timedelta(hours=12), ), ProviderWorkflow( diff --git a/tests/dags/providers/provider_api_scripts/resources/phylopic/correct_meta_data_example.json b/tests/dags/providers/provider_api_scripts/resources/phylopic/correct_meta_data_example.json deleted file mode 100644 index dd5267587..000000000 --- a/tests/dags/providers/provider_api_scripts/resources/phylopic/correct_meta_data_example.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "result": { - "credit": "Jonathan Wells", - "licenseURL": "http://creativecommons.org/publicdomain/zero/1.0/", - "pngFiles": [ - { - "height": 64, - "url": "/assets/images/submissions/e9df48fe-68ea-419e-b9df-441e0b208335.64.png", - "width": 52 - }, - { - "height": 128, - "url": "/assets/images/submissions/e9df48fe-68ea-419e-b9df-441e0b208335.128.png", - "width": 105 - }, - { - "height": 256, - "url": "/assets/images/submissions/e9df48fe-68ea-419e-b9df-441e0b208335.256.png", - "width": 211 - }, - { - "height": 512, - "url": "/assets/images/submissions/e9df48fe-68ea-419e-b9df-441e0b208335.512.png", - "width": 423 - }, - { - "height": 1024, - "url": "/assets/images/submissions/e9df48fe-68ea-419e-b9df-441e0b208335.1024.png", - "width": 847 - } - ], - "submitted": "2020-02-26 11:59:53", - "submitter": { - "firstName": "Jonathan", - "lastName": "Wells", - "uid": "8d28b7ec-05f0-4624-8459-b73f1404a7d8" - }, - "taxa": [ - { - "canonicalName": { - "string": "Chondrus crispus NODC Taxonomic Code, database (version 8.0) 1996", - "uid": "0faa584e-5f44-4520-8c5b-d2e8c7d50292" - } - } - ], - "uid": "e9df48fe-68ea-419e-b9df-441e0b208335" - }, - "success": true -} diff --git a/tests/dags/providers/provider_api_scripts/resources/phylopic/image_ids_example.json b/tests/dags/providers/provider_api_scripts/resources/phylopic/image_ids_example.json deleted file mode 100644 index 12f7b57d8..000000000 --- a/tests/dags/providers/provider_api_scripts/resources/phylopic/image_ids_example.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "result": [ - { - "uid": "863694ac-9f36-40f5-9452-1b435337d9cc" - }, - { - "uid": "329ff574-4bec-4f94-9dd6-9acfec2a6275" - }, - { - "uid": "9c98ff56-8044-483e-b9f1-bf368e4f3322" - } - ], - "success": true -} diff --git a/tests/dags/providers/provider_api_scripts/resources/phylopic/initial_request.json b/tests/dags/providers/provider_api_scripts/resources/phylopic/initial_request.json new file mode 100644 index 000000000..f94fe435a --- /dev/null +++ b/tests/dags/providers/provider_api_scripts/resources/phylopic/initial_request.json @@ -0,0 +1,17 @@ +{ + "_links": { + "firstPage": { + "href": "/images?build=194&page=0" + }, + "lastPage": { + "href": "/images?build=194&page=144" + }, + "self": { + "href": "/images?build=194" + } + }, + "build": 194, + "itemsPerPage": 48, + "totalItems": 6934, + "totalPages": 145 +} diff --git a/tests/dags/providers/provider_api_scripts/resources/phylopic/no_image_url_example.json b/tests/dags/providers/provider_api_scripts/resources/phylopic/no_image_url_example.json deleted file mode 100644 index 1058eff1c..000000000 --- a/tests/dags/providers/provider_api_scripts/resources/phylopic/no_image_url_example.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "result": { - "credit": "Ferran Sayol", - "licenseURL": "http://creativecommons.org/publicdomain/zero/1.0/", - "pngFiles": [ - { - "height": 28, - "url": "/assets/images/submissions/7f7431c6-8f78-498b-92e2-ebf8882a8923.64.png", - "width": 64 - }, - { - "height": 57, - "url": "/assets/images/submissions/7f7431c6-8f78-498b-92e2-ebf8882a8923.128.png", - "width": 128 - }, - { - "height": 72, - "url": "/assets/images/submissions/7f7431c6-8f78-498b-92e2-ebf8882a8923.original.png", - "width": 160 - } - ], - "submitted": "2020-01-28 04:31:19", - "submitter": { - "firstName": "Ferran", - "lastName": "Sayol", - "uid": "9fc252de-ba65-45a6-9d1f-42c45bdfab88" - }, - "taxa": [ - { - "canonicalName": { - "string": "Martes Pinel, 1792", - "uid": "add61d86-ae83-4f65-978f-7ecaf51bbfde" - } - }, - { - "canonicalName": { - "string": "Martes foina (Erxleben, 1777)", - "uid": "cf99c5f9-0f06-42ec-bc55-9795f4f0e485" - } - }, - { - "canonicalName": { - "string": "Guloninae Gray 1825", - "uid": "82b42c9d-91b3-4048-8f73-f54891c93a59" - } - } - ], - "uid": "7f7431c6-8f78-498b-92e2-ebf8882a8923" - }, - "success": true -} diff --git a/tests/dags/providers/provider_api_scripts/resources/phylopic/sample_record.json b/tests/dags/providers/provider_api_scripts/resources/phylopic/sample_record.json new file mode 100644 index 000000000..26574c129 --- /dev/null +++ b/tests/dags/providers/provider_api_scripts/resources/phylopic/sample_record.json @@ -0,0 +1,92 @@ +{ + "_links": { + "contributor": { + "href": "/contributors/c3ac6939-e85a-4a10-99d1-4079537f34de?build=194", + "title": "Andy Wilson" + }, + "generalNode": { + "href": "/nodes/402efb6b-634b-4fe4-ac6e-547e8c37a437?build=194", + "title": "Hemaris" + }, + "http://ogp.me/ns#image": { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/social/1200x628.png", + "sizes": "1200x628", + "type": "image/png" + }, + "license": { + "href": "https://creativecommons.org/publicdomain/zero/1.0/" + }, + "nodes": [ + { + "href": "/nodes/010fedfe-6a4f-4676-a886-4722448ec536?build=194", + "title": "Hemaris tityus" + }, + { + "href": "/nodes/402efb6b-634b-4fe4-ac6e-547e8c37a437?build=194", + "title": "Hemaris" + } + ], + "rasterFiles": [ + { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/raster/1536x1439.png", + "sizes": "1536x1439", + "type": "image/png" + }, + { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/raster/1024x959.png", + "sizes": "1024x959", + "type": "image/png" + }, + { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/raster/512x480.png", + "sizes": "512x480", + "type": "image/png" + } + ], + "self": { + "href": "/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02?build=194", + "title": "Hemaris tityus" + }, + "sourceFile": { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/source.svg", + "sizes": "2048x2048", + "type": "image/svg+xml" + }, + "specificNode": { + "href": "/nodes/010fedfe-6a4f-4676-a886-4722448ec536?build=194", + "title": "Hemaris tityus" + }, + "thumbnailFiles": [ + { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/thumbnail/192x192.png", + "sizes": "192x192", + "type": "image/png" + }, + { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/thumbnail/128x128.png", + "sizes": "128x128", + "type": "image/png" + }, + { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/thumbnail/64x64.png", + "sizes": "64x64", + "type": "image/png" + } + ], + "twitter:image": { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/social/1200x628.png", + "sizes": "1200x628", + "type": "image/png" + }, + "vectorFile": { + "href": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/vector.svg", + "sizes": "1197.7451x1121.9073", + "type": "image/svg+xml" + } + }, + "attribution": "Andy Wilson", + "build": 194, + "created": "2023-03-24T10:45:31.187Z", + "sponsor": null, + "uuid": "5b1e88b5-159d-495d-b8cb-04f9e28d2f02" +} diff --git a/tests/dags/providers/provider_api_scripts/test_phylopic.py b/tests/dags/providers/provider_api_scripts/test_phylopic.py index cb91f18de..daf33c9de 100644 --- a/tests/dags/providers/provider_api_scripts/test_phylopic.py +++ b/tests/dags/providers/provider_api_scripts/test_phylopic.py @@ -4,7 +4,7 @@ import pytest -from common.licenses import LicenseInfo +from common.licenses import get_license_info from providers.provider_api_scripts.phylopic import PhylopicDataIngester @@ -12,142 +12,99 @@ pp = PhylopicDataIngester() -@pytest.fixture -def image_data(): - yield get_json("correct_meta_data_example.json") - - def get_json(filename): with open(RESOURCES / filename) as f: return json.load(f) -@pytest.fixture -def ingester() -> PhylopicDataIngester: - _pp = PhylopicDataIngester() - yield _pp +def test__get_initial_query_params(): + with patch.object(pp, "get_response_json", return_value={}), pytest.raises( + Exception + ): + pp._get_initial_query_params() + data = get_json("initial_request.json") + with patch.object(pp, "get_response_json", return_value=data): + pp._get_initial_query_params() -def test_endpoint(ingester): - expected_endpoint = ( - "http://phylopic.org/api/a/image/list/modified/2022-01-01/2022-01-02" - ) - ingester.date = "2022-01-01" - assert ingester.endpoint == expected_endpoint + assert pp.build_param == 194 + assert pp.total_pages == 145 @pytest.mark.parametrize( - "response_json, expected", + "current_page, prev_query_params, expected_query_params", [ - # Empty cases - (None, None), - ({}, None), - ({"other": "yes"}, None), - # Failure - ({"success": False}, None), - # Success, but no results - ({"success": True}, None), - # Success with results - ({"success": True, "result": 123}, 123), + (1, None, {"build": 111, "page": 0, "embed_items": "true"}), # First call + ( # Second call + 1, + {"build": 111, "page": 0, "embed_items": "true"}, + {"build": 111, "page": 1, "embed_items": "true"}, + ), + ( # Third call + 2, + {"build": 111, "page": 1, "embed_items": "true"}, + {"build": 111, "page": 2, "embed_items": "true"}, + ), + ( # Random intermediate call + 50, + {"build": 111, "page": 1, "embed_items": "true"}, + {"build": 111, "page": 50, "embed_items": "true"}, + ), ], ) -def test_get_response_data(response_json, expected): - actual = pp._get_response_data(response_json) - assert actual == expected - - -def test_get_batch_data(): - r = get_json("image_ids_example.json") - actual_img_ids = [data.get("uid") for data in pp.get_batch_data(r)] - expect_img_ids = [ - "863694ac-9f36-40f5-9452-1b435337d9cc", - "329ff574-4bec-4f94-9dd6-9acfec2a6275", - "9c98ff56-8044-483e-b9f1-bf368e4f3322", - ] - assert actual_img_ids == expect_img_ids - - -def test_get_creator_details(image_data): - result = image_data["result"] - actual_creator_details = pp._get_creator_details(result) - expect_creator_details = ( - "Jonathan Wells", - "Jonathan Wells", - "2020-02-26 11:59:53", - ) - assert actual_creator_details == expect_creator_details +def test_get_next_query_params(current_page, prev_query_params, expected_query_params): + pp.build_param = 111 + pp.current_page = current_page + actual_query_params = pp.get_next_query_params(prev_query_params) + assert actual_query_params == expected_query_params -def test_get_taxa_details(image_data): - result = image_data["result"] - actual_taxa = pp._get_taxa_details(result) - expect_taxa = ( - ["Chondrus crispus NODC Taxonomic Code, database (version 8.0) 1996"], - "Chondrus crispus NODC Taxonomic Code, database (version 8.0) 1996", - ) - assert actual_taxa == expect_taxa - - -def test_get_record_data(image_data): - image_uuid = "e9df48fe-68ea-419e-b9df-441e0b208335" - expected = { - "foreign_identifier": image_uuid, - "foreign_landing_url": f"http://phylopic.org/image/{image_uuid}", - "width": 847, - "height": 1024, - "creator": "Jonathan Wells", - "title": "Chondrus crispus NODC Taxonomic Code, database (version 8.0) 1996", - "image_url": "http://phylopic.org/assets/images/submissions/e9df48fe-68ea-419e-b9df-441e0b208335.1024.png", - "license_info": LicenseInfo( - license="cc0", - version="1.0", - url="https://creativecommons.org/publicdomain/zero/1.0/", - raw_url="http://creativecommons.org/publicdomain/zero/1.0/", - ), - "meta_data": { - "taxa": [ - "Chondrus crispus NODC Taxonomic Code, database (version 8.0) 1996" - ], - "credit_line": "Jonathan Wells", - "pub_date": "2020-02-26 11:59:53", - }, - } - with patch.object(pp, "get_response_json", return_value=image_data): - actual = pp.get_record_data({"uid": image_uuid}) - assert actual == expected - -def test_get_record_data_no_data(): - actual = pp.get_record_data({}) - assert actual is None +@pytest.mark.parametrize( + "contributor_data, expected_creator", + [ + ({}, (None, None)), + ({"title": "Jane Doe", "href": ""}, ("Jane Doe", None)), + ( + {"title": "Jane Doe", "href": "/contributors/uuid?build=123"}, + ("Jane Doe", "https://www.phylopic.org/contributors/uuid?build=123"), + ), + ], +) +def test__get_creator(contributor_data, expected_creator): + actual_creator = pp._get_creator(contributor_data) + assert actual_creator == expected_creator -def test_get_record_data_with_no_img_url(): - r = get_json("no_image_url_example.json") - with patch.object(pp, "get_response_json", return_value=r): - actual = pp.get_record_data({"uid": r["result"]["uid"]}) - assert actual is None +@pytest.mark.parametrize( + "data, expected_sizes", + [ + ({}, (None, None)), + ({"sourceFile": {}}, (None, None)), + ({"sourceFile": {"sizes": "123x321"}}, (123, 321)), + ({"sourceFile": {"sizes": "413.39108x272.68854"}}, (413, 272)), + ], +) +def test__get_image_sizes(data, expected_sizes): + actual_sizes = pp._get_image_sizes(data) + assert actual_sizes == expected_sizes -def test_get_image_info(image_data): - result = image_data["result"] - actual_img_info = pp._get_image_info(result, "e9df48fe-68ea-419e-b9df-441e0b208335") - expect_img_info = ( - ( - "http://phylopic.org/assets/images/submissions/e9df48fe-68ea-" - "419e-b9df-441e0b208335.1024.png" - ), - 847, - 1024, +def test_get_record_data(): + data = get_json("sample_record.json") + image = pp.get_record_data(data) + license_info = get_license_info( + license_url="https://creativecommons.org/publicdomain/zero/1.0/" ) - assert actual_img_info == expect_img_info - -def test_get_image_info_with_no_img_url(): - r = get_json("no_image_url_example.json") - result = r["result"] - actual_img_info = list( - pp._get_image_info(result, "7f7431c6-8f78-498b-92e2-ebf8882a8923") - ) - expect_img_info = [None, None, None] - assert actual_img_info == expect_img_info + assert image == { + "license_info": license_info, + "foreign_identifier": "5b1e88b5-159d-495d-b8cb-04f9e28d2f02", + "foreign_landing_url": "https://www.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02?build=194", + "image_url": "https://images.phylopic.org/images/5b1e88b5-159d-495d-b8cb-04f9e28d2f02/source.svg", + "title": "Hemaris tityus", + "creator": "Andy Wilson", + "creator_url": "https://www.phylopic.org/contributors/c3ac6939-e85a-4a10-99d1-4079537f34de?build=194", + "width": 2048, + "height": 2048, + }