Skip to content

Commit

Permalink
HARMONY-1714: Update docs and func name
Browse files Browse the repository at this point in the history
  • Loading branch information
vinnyinverso committed Mar 22, 2024
1 parent 58beca2 commit ac84e55
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
27 changes: 23 additions & 4 deletions harmony/harmony.py
Original file line number Diff line number Diff line change
Expand Up @@ -1079,7 +1079,16 @@ def result_urls(self,
if link['rel'] == 'data':
yield link['href']

def _is_staged_result(self, url):
def _is_staged_result(self, url: str) -> str:
"""Check if the URL indicates that the data is associated with actual
service ouputs (as opposed to a download link for example).
Args:
url: The location (URL) of the file to be downloaded
Returns:
A boolean indicating whether the data is staged data.
"""
url_parts = url.split('/')
possible_uuid = url_parts[-3]
possible_item_id = url_parts[-2]
Expand All @@ -1093,7 +1102,16 @@ def _is_staged_result(self, url):
return False
return True

def get_filename_from_url(self, url):
def get_download_filename_from_url(self, url: str) -> str:
"""Returns the filename for a URL. It will include
a Harmony generated ID if the data is staged.
Args:
url: The location (URL) of the file to be downloaded
Returns:
The filename that will be used to name the donwloaded file.
"""
url_parts = url.split('/')
original_filename = url_parts[-1]

Expand All @@ -1110,7 +1128,8 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False)
via environment variable DOWNLOAD_CHUNK_SIZE.
Filenames are automatically determined by using the latter portion of the provided URL
and will be prefixed by the item id generated by Harmony (when data was transformed from the original).
and will be prefixed by the item id generated by Harmony when data was transformed
from the original.
Args:
url: The location (URL) of the file to be downloaded
Expand All @@ -1125,7 +1144,7 @@ def _download_file(self, url: str, directory: str = '', overwrite: bool = False)
"""
chunksize = int(self.config.DOWNLOAD_CHUNK_SIZE)
session = self._session()
filename = self.get_filename_from_url(url)
filename = self.get_download_filename_from_url(url)

if directory:
filename = os.path.join(directory, filename)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1165,13 +1165,13 @@ def side_effect_for_get_json(extra_links) -> List[str]:
def test_get_file_name_staged_link():
# For staged results, the filename should get prefixed with the work item id, to avoid collisions
client = Client(should_validate_auth=False)
actual_file_name = client.get_filename_from_url('https://harmony.earthdata.nasa.gov/service-results/staging-bucket/a7aee059-7531-4388-86e0-85af1de9c31a/1047412/C1254854453-LARC_CLOUD_merged.nc4')
actual_file_name = client.get_download_filename_from_url('https://harmony.earthdata.nasa.gov/service-results/staging-bucket/a7aee059-7531-4388-86e0-85af1de9c31a/1047412/C1254854453-LARC_CLOUD_merged.nc4')
assert actual_file_name == '1047412_C1254854453-LARC_CLOUD_merged.nc4'

def test_get_file_name_non_staged_link():
# In this case, e.g. for a direct download data link, the filename should just be the last part of the URL path
client = Client(should_validate_auth=False)
actual_file_name = client.get_filename_from_url('https://harmony.earthdata.nasa.gov/service-results/test-data/C1261703151-EEDTEST/ATL08_20181014001049_02350102_006_02.h5')
actual_file_name = client.get_download_filename_from_url('https://harmony.earthdata.nasa.gov/service-results/test-data/C1261703151-EEDTEST/ATL08_20181014001049_02350102_006_02.h5')
assert actual_file_name == 'ATL08_20181014001049_02350102_006_02.h5'

@pytest.mark.parametrize('link_type', [LinkType.http, LinkType.https, LinkType.s3])
Expand Down

0 comments on commit ac84e55

Please sign in to comment.