diff --git a/erddapy/core/url.py b/erddapy/core/url.py index 14d226a..e5b86f6 100644 --- a/erddapy/core/url.py +++ b/erddapy/core/url.py @@ -439,3 +439,52 @@ def get_download_url( url = _distinct(url, distinct) return url + + +download_formats = [ + "asc", + "csv", + "csvp", + "csv0", + "dataTable", + "das", + "dds", + "dods", + "esriCsv", + "fgdc", + "geoJson", + "graph", + "help", + "html", + "iso19115", + "itx", + "json", + "jsonlCSV1", + "jsonlCSV", + "jsonlKVP", + "mat", + "nc", + "ncHeader", + "ncCF", + "ncCFHeader", + "ncCFMA", + "ncCFMAHeader", + "nccsv", + "nccsvMetadata", + "ncoJson", + "odvTxt", + "subset", + "tsv", + "tsvp", + "tsv0", + "wav", + "xhtml", + "kml", + "smallPdf", + "pdf", + "largePdf", + "smallPng", + "png", + "largePng", + "transparentPng", +] diff --git a/erddapy/erddapy.py b/erddapy/erddapy.py index b896fd9..0a9fe6a 100644 --- a/erddapy/erddapy.py +++ b/erddapy/erddapy.py @@ -1,7 +1,10 @@ """Pythonic way to access ERDDAP data.""" import functools +import hashlib +from pathlib import Path from typing import Dict, List, Optional, Tuple, Union +from urllib.request import urlretrieve import pandas as pd @@ -16,6 +19,7 @@ _distinct, _format_constraints_url, _quote_string_constraints, + download_formats, get_categorize_url, get_download_url, get_info_url, @@ -471,3 +475,23 @@ def get_var_by_attr(self, dataset_id: OptionalStr = None, **kwargs) -> List[str] if has_value_flag is True: vs.append(vname) return vs + + def download_file( + self, + file_type, + ): + """Download the dataset to a file in a user specified format""" + file_type = file_type.lstrip(".") + if file_type not in download_formats: + raise ValueError( + f"Requested filetype {file_type} not available on ERDDAP", + ) + url = self.get_download_url(response=file_type) + constraints_str = str(dict(sorted(self.constraints.items()))) + str( + sorted(self.variables), + ) + constraints_hash = hashlib.shake_256(constraints_str.encode()).hexdigest(5) + file_name = Path(f"{self.dataset_id}_{constraints_hash}.{file_type}") + if not file_name.exists(): + urlretrieve(url, file_name) + return file_name diff --git a/tests/test_to_objects.py b/tests/test_to_objects.py index e3e0bbf..708fd61 100644 --- a/tests/test_to_objects.py +++ b/tests/test_to_objects.py @@ -197,3 +197,15 @@ def test_to_iris_griddap(dataset_griddap): """Test converting griddap to an iris cube.""" cubes = dataset_griddap.to_iris() assert isinstance(cubes, iris.cube.CubeList) + + +@pytest.mark.web +def test_download_file(dataset_tabledap): + """Test direct download of tabledap dataset""" + fn = dataset_tabledap.download_file("nc") + ds = xr.load_dataset(fn) + assert ds["time"].name == "time" + assert ds["temperature"].name == "temperature" + dataset_tabledap.variables = dataset_tabledap.variables[::-1] + fn_new = dataset_tabledap.download_file("nc") + assert fn_new == fn