Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Single server #72

Merged
merged 10 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
os: [windows-latest, ubuntu-latest, macos-latest]
fail-fast: false

Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: check-ast
Expand All @@ -20,7 +20,7 @@ repos:
language_version: python3

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.5.1
rev: v1.6.0
hooks:
- id: mypy
exclude: docs/source/conf.py
Expand Down
9 changes: 0 additions & 9 deletions environment.yml

This file was deleted.

75 changes: 39 additions & 36 deletions gliderpy/fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@

from gliderpy.servers import (
server_parameter_rename,
server_select,
server_vars,
)

OptionalStr = Optional[str]

# This defaults to the IOOS glider DAC.
# Defaults to the IOOS glider DAC.
_server = "https://gliders.ioos.us/erddap"


Expand All @@ -36,16 +35,15 @@ def standardise_df(df, dataset_url):
class GliderDataFetcher:
"""
Args:
server: a glider ERDDAP server URL
server: A glider ERDDAP server URL.

Attributes:
dataset_id: a dataset unique id.
constraints: download constraints, default
dataset_id: A dataset unique id.
constraints: Download constraints, defaults same as query.

"""

def __init__(self, server=_server):
server = server_select(server)
self.server = server
self.fetcher = ERDDAP(
server=server,
Expand All @@ -61,9 +59,14 @@ def to_pandas(self):

:return: pandas dataframe with datetime UTC as index
"""
if type(self.datasets) is pd.Series:
if self.fetcher.dataset_id:
df = self.fetcher.to_pandas(
index_col="time (UTC)",
parse_dates=True,
)
elif not self.fetcher.dataset_id and self.datasets is not None:
df_all = []
for dataset_id in self.datasets:
for dataset_id in self.datasets["Dataset ID"]:
self.fetcher.dataset_id = dataset_id
df = self.fetcher.to_pandas(
index_col="time (UTC)",
Expand All @@ -73,20 +76,26 @@ def to_pandas(self):
df = standardise_df(df, dataset_url)
df_all.append(df)
return pd.concat(df_all)
else:
raise ValueError(
f"Must provide a {self.fetcher.dataset_id} or `query` terms to download data.",
)

if not self.fetcher.dataset_id:
return None

df = self.fetcher.to_pandas(
index_col="time (UTC)",
parse_dates=True,
)
# Standardize variable names
# Standardize variable names.
dataset_url = self.fetcher.get_download_url().split("?")[0]
df = standardise_df(df, dataset_url)
return df

def query(self, min_lat, max_lat, min_lon, max_lon, min_time, max_time):
def query(
self,
min_lat,
max_lat,
min_lon,
max_lon,
min_time,
max_time,
delayed=False,
):
"""
Takes user supplied geographical and time constraints and adds them to the query

Expand All @@ -106,7 +115,7 @@ def query(self, min_lat, max_lat, min_lon, max_lon, min_time, max_time):
"longitude>=": min_lon,
"longitude<=": max_lon,
}
if not self.fetcher.dataset_id:
if not self.datasets:
url = self.fetcher.get_search_url(
search_for="glider",
response="csv",
Expand All @@ -117,27 +126,24 @@ def query(self, min_lat, max_lat, min_lon, max_lon, min_time, max_time):
min_time=min_time,
max_time=max_time,
)
self.query_url = url
try:
data = urlopen(url)
except httpx.HTTPError as err:
raise Exception(
f"Error, no datasets found in supplied range. Try relaxing your constraints: {self.fetcher.constraints}",
) from err
return None
df = pd.read_csv(data)
self.datasets = df["Dataset ID"]
return df[["Title", "Institution", "Dataset ID"]]

return self

def platform(self, platform):
"""

:param platform: platform and deployment id from ifremer
:return: search query with platform constraint applied
"""
self.fetcher.constraints["platform_deployment="] = platform
return self
df = pd.read_csv(data)[["Title", "Institution", "Dataset ID"]]
if not delayed:
df = df.loc[~df["Dataset ID"].str.endswith("delayed")]
info_urls = [
self.fetcher.get_info_url(dataset_id=dataset_id, response="html")
for dataset_id in df["Dataset ID"]
]
df["info_url"] = info_urls
self.datasets = df
return self.datasets


class DatasetList:
Expand All @@ -146,7 +152,7 @@ class DatasetList:

Attributes:
e: an ERDDAP server instance
TODO: search_terms: A list of terms to search the server for. Multiple terms will be combined as AND
TODO: search_terms: A list of terms to search the server for. Multiple terms will be combined as "AND."

"""

Expand All @@ -166,6 +172,3 @@ def get_ids(self):
return self.dataset_ids
else:
raise ValueError(f"The {self.e.server} does not supported this operation.")
# TODO: List the platform_deployment variable
# if self.e.server == "https://erddap.ifremer.fr/erddap":
# platform_deployment
61 changes: 11 additions & 50 deletions gliderpy/servers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,69 +4,30 @@
"""


server_alias = {
"National Glider Data Assembly Center": "https://gliders.ioos.us/erddap",
"NGDAC": "https://gliders.ioos.us/erddap",
"IOOS": "https://gliders.ioos.us/erddap",
"Ocean Observatories Initiative": "https://erddap.dataexplorer.oceanobservatories.org/erddap/index.html",
"OOI": "https://erddap.dataexplorer.oceanobservatories.org/erddap/index.html",
"Institut français de recherche pour l'exploitation de la mer": "https://www.ifremer.fr/erddap",
"ifremer": "https://www.ifremer.fr/erddap",
"ifremer.fr": "https://www.ifremer.fr/erddap",
}

server_vars = {
"https://gliders.ioos.us/erddap": [
"pressure",
"latitude",
"longitude",
"pressure",
"profile_id",
"salinity",
"temperature",
"time",
],
"http://www.ifremer.fr/erddap": [
"time",
"latitude",
"longitude",
"PSAL",
"TEMP",
"PRES",
],
}

server_parameter_rename = {
"latitude (degrees_north)": "latitude",
"longitude (degrees_east)": "longitude",
"salinity (1)": "salinity",
"psal (psu)": "salinity",
"ctdgv_m_glider_instrument_practical_salinity (1)": "salinity",
"temperature (celsius)": "temperature",
"temp (degree_celsius)": "temperature",
"ctdgv_m_glider_instrument_sci_water_pressure_dbar (dbar)": "pressure",
"ctdgv_m_glider_instrument_sci_water_temp (deg_c)": "temperature",
"dataset_url": "dataset_url",
"latitude (degrees_north)": "latitude",
"longitude (degrees_east)": "longitude",
"pres (decibar)": "pressure",
"pressure (dbar)": "pressure",
"ctdgv_m_glider_instrument_sci_water_pressure_dbar (dbar)": "pressure",
"dataset_url": "dataset_url",
"profile_id": "profile_id",
"psal (psu)": "salinity",
"salinity (1)": "salinity",
"temp (degree_celsius)": "temperature",
"temperature (celsius)": "temperature",
}


def server_select(server_string):
"""
Attempts to match the supplied string to a known ERDDAP server by address or alias
"""
if server_string in server_vars:
# If string matches exactly, return unchanged
return server_string
for server in server_vars:
# If string contains base ERDDAP address, return base ERDDAP address
if server in server_string:
return server
for alias in server_alias:
# If string matches one of the aliases, return the corresponding ERDDAP address
if server_string.lower() == alias.lower():
return server_alias[alias]
# If the server is not recognised, print options of working servers and exit
raise ValueError(
"Supplied server/alias not recognised. Please use one of the following supported servers:\n"
f"{str(server_vars.keys())[10:-1]}",
)
28 changes: 20 additions & 8 deletions notebooks/00-quick_intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"```\n",
"\n",
"gliderpy aims to make querying and downloading glider data easier.\n",
"Here is how one would build a query using `erddapy`:"
"Here is how one would build a query using erddapy:"
]
},
{
Expand All @@ -43,6 +43,7 @@
" \"longitude\",\n",
" \"salinity\",\n",
" \"temperature\",\n",
" \"profile_id\",\n",
" \"time\",\n",
"]\n",
"\n",
Expand Down Expand Up @@ -79,16 +80,18 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"The variable names are standardised by gliderpy, making it easier to fetch from different data sources and comparing the results.\n",
"Much easier, right?\n",
"The variable names are standardized by gliderpy,\n",
"making it easier to fetch from different data sources and comparing the results.\n",
"\n",
"`gliderpy` can subset the data on the server side by passing a geographic bounding box and time interval."
"The gliderpy library can subset the data on the server side by passing a geographic bounding box and time interval."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Multiple datasets\n",
"### Querying multiple datasets\n",
"\n",
"The most common use is to search all datasets for data that falls within the certain space-time bounds."
]
Expand All @@ -101,7 +104,16 @@
"source": [
"glider_grab = GliderDataFetcher()\n",
"\n",
"glider_grab.query(10, 40, -90, 8, \"2010-01-01\", \"2013-06-02\")\n",
"df = glider_grab.query(10, 40, -90, 8, \"2010-01-01\", \"2013-06-02\")\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df = glider_grab.to_pandas()\n",
"df.head()"
]
Expand All @@ -112,7 +124,7 @@
"source": [
"### Dataset search\n",
"\n",
"One can query all dataset_ids available."
"One can query all dataset_ids available in the server."
]
},
{
Expand All @@ -126,7 +138,7 @@
"datasets = DatasetList()\n",
"ds_ids = datasets.get_ids()\n",
"\n",
"print(f\"found {len(ds_ids)} glider datasets on the server {datasets.e.server}\")"
"print(f\"found {len(ds_ids)} glider datasets on the server {datasets.e.server}.\")"
]
}
],
Expand All @@ -146,7 +158,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.11.6"
}
},
"nbformat": 4,
Expand Down
Loading