From ac84e55801f30422a364b8b98b763d94bc57d88d Mon Sep 17 00:00:00 2001 From: vinny Date: Fri, 22 Mar 2024 11:36:29 -0400 Subject: [PATCH] HARMONY-1714: Update docs and func name --- harmony/harmony.py | 27 +++++++++++++++++++++++---- tests/test_client.py | 4 ++-- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/harmony/harmony.py b/harmony/harmony.py index f98143a..6260915 100644 --- a/harmony/harmony.py +++ b/harmony/harmony.py @@ -1079,7 +1079,16 @@ def result_urls(self, if link['rel'] == 'data': yield link['href'] - def _is_staged_result(self, url): + def _is_staged_result(self, url: str) -> str: + """Check if the URL indicates that the data is associated with actual + service ouputs (as opposed to a download link for example). + + Args: + url: The location (URL) of the file to be downloaded + + Returns: + A boolean indicating whether the data is staged data. + """ url_parts = url.split('/') possible_uuid = url_parts[-3] possible_item_id = url_parts[-2] @@ -1093,7 +1102,16 @@ def _is_staged_result(self, url): return False return True - def get_filename_from_url(self, url): + def get_download_filename_from_url(self, url: str) -> str: + """Returns the filename for a URL. It will include + a Harmony generated ID if the data is staged. + + Args: + url: The location (URL) of the file to be downloaded + + Returns: + The filename that will be used to name the donwloaded file. + """ url_parts = url.split('/') original_filename = url_parts[-1] @@ -1110,7 +1128,8 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) via environment variable DOWNLOAD_CHUNK_SIZE. Filenames are automatically determined by using the latter portion of the provided URL - and will be prefixed by the item id generated by Harmony (when data was transformed from the original). + and will be prefixed by the item id generated by Harmony when data was transformed + from the original. Args: url: The location (URL) of the file to be downloaded @@ -1125,7 +1144,7 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False) """ chunksize = int(self.config.DOWNLOAD_CHUNK_SIZE) session = self._session() - filename = self.get_filename_from_url(url) + filename = self.get_download_filename_from_url(url) if directory: filename = os.path.join(directory, filename) diff --git a/tests/test_client.py b/tests/test_client.py index af37dbf..8bc079a 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1165,13 +1165,13 @@ def side_effect_for_get_json(extra_links) -> List[str]: def test_get_file_name_staged_link(): # For staged results, the filename should get prefixed with the work item id, to avoid collisions client = Client(should_validate_auth=False) - actual_file_name = client.get_filename_from_url('https://harmony.earthdata.nasa.gov/service-results/staging-bucket/a7aee059-7531-4388-86e0-85af1de9c31a/1047412/C1254854453-LARC_CLOUD_merged.nc4') + actual_file_name = client.get_download_filename_from_url('https://harmony.earthdata.nasa.gov/service-results/staging-bucket/a7aee059-7531-4388-86e0-85af1de9c31a/1047412/C1254854453-LARC_CLOUD_merged.nc4') assert actual_file_name == '1047412_C1254854453-LARC_CLOUD_merged.nc4' def test_get_file_name_non_staged_link(): # In this case, e.g. for a direct download data link, the filename should just be the last part of the URL path client = Client(should_validate_auth=False) - actual_file_name = client.get_filename_from_url('https://harmony.earthdata.nasa.gov/service-results/test-data/C1261703151-EEDTEST/ATL08_20181014001049_02350102_006_02.h5') + actual_file_name = client.get_download_filename_from_url('https://harmony.earthdata.nasa.gov/service-results/test-data/C1261703151-EEDTEST/ATL08_20181014001049_02350102_006_02.h5') assert actual_file_name == 'ATL08_20181014001049_02350102_006_02.h5' @pytest.mark.parametrize('link_type', [LinkType.http, LinkType.https, LinkType.s3])