Skip to content

Commit

Permalink
Store server responses for local tests. Create stac_item decription o…
Browse files Browse the repository at this point in the history
…n disk for test comparisons. Normalize serviceType using Enum. Add utility functions to bypass siphon.
  • Loading branch information
huard committed Nov 17, 2023
1 parent ab8264b commit 658e4b4
Show file tree
Hide file tree
Showing 6 changed files with 628 additions and 39 deletions.
131 changes: 105 additions & 26 deletions STACpopulator/stac_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
import requests
import xncml
import xmltodict
import urllib

import urllib.parse
from pathlib import Path
import numpy as np
import pystac
import yaml
from colorlog import ColoredFormatter

from enum import Enum
from STACpopulator.models import STACItem

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -84,7 +84,13 @@ def collection2literal(collection, property="label"):


def thredds_catalog_attrs(url: str) -> dict:
"""Return attributes from the catalog.xml THREDDS server response."""
"""Return attributes from the catalog.xml THREDDS server response.
Parameters
----------
url : str
Link to the THREDDS catalog URL.
"""
xml = requests.get(url).text

raw = xmltodict.parse(
Expand All @@ -98,8 +104,8 @@ def thredds_catalog_attrs(url: str) -> dict:
return raw


def ncattrs(url: str) -> dict:
"""Return attributes from a THREDDS netCDF dataset."""
def catalog_url(url: str) -> (str, str):
"""Given a THREDDS link to a netCDF file, return a link to its catalog and the file name."""

pr = urllib.parse.urlparse(url)
scheme, netloc, path, params, query, frag = pr
Expand All @@ -111,30 +117,76 @@ def ncattrs(url: str) -> dict:

if path.endswith("catalog.html"):
path = path.replace("catalog.html", "catalog.xml")

# Ideally we would create targeted queries for one dataset, but we're missing the dataset name.
# query = ""
else:
nc = path.split("/")[-1]
path = path.replace(nc, "catalog.xml")

# Get catalog information about available services
catalog = urllib.parse.urlunparse((scheme, netloc, path, "", query, ""))
cattrs = thredds_catalog_attrs(catalog)["catalog"]
cid = cattrs["dataset"]["@ID"]

if not query:
cid += f"/{nc}"
return catalog, nc


def access_urls(catalog_url: str, ds: str) -> dict:
"""Return THREDDS endpoints for the catalog and dataset.
Parameters
----------
catalog_url : str
URI to the THREDDS catalog.
ds : str
Dataset path relative to the catalog.
"""
# Get catalog information about available services
cattrs = thredds_catalog_attrs(catalog_url)["catalog"]

pr = urllib.parse.urlparse(str(catalog_url))

cid = cattrs["dataset"]["@ID"]
if not pr.query:
cid += f"/{ds}"

# Get service URLs for the dataset
access_urls = {}
for service in cattrs["service"]["service"]:
access_urls[service["@serviceType"]] = f'{scheme}://{netloc}{service["@base"]}{cid}'
type = ServiceType.from_value(service["@serviceType"]).value
access_urls[type] = f'{pr.scheme}://{pr.netloc}{service["@base"]}{cid}'

return access_urls


def ncml_attrs(ncml_url: str) -> dict:
"""Return attributes from the NcML response of a THREDDS dataset.
Parameters
----------
ncml_url : str
URI to the NcML dataset description, either a remote server URL or path to a local xml file.
"""
xml = requests.get(ncml_url).text

# Get dataset attributes
r = requests.get(access_urls["NCML"])
attrs = xncml.Dataset.from_text(r.text).to_cf_dict()
attrs = xncml.Dataset.from_text(xml).to_cf_dict()
attrs["attributes"] = numpy_to_python_datatypes(attrs["attributes"])
return attrs


def ds_attrs(url: str) -> dict:
"""Return attributes from the NcML response of a THREDDS dataset and access URLs from the THREDDS server.
Parameters
----------
url : str
URL to the THREDDS netCDF file
"""
urls = access_urls(*catalog_url(url))
attrs = ncml_attrs(urls["NcML"])

# Include service attributes
attrs["access_urls"] = access_urls
attrs["access_urls"] = urls
return attrs


Expand Down Expand Up @@ -263,18 +315,10 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop
return item


asset_name_remaps = {
"httpserver_service": "HTTPServer",
"opendap_service": "OPENDAP",
"wcs_service": "WCS",
"wms_service": "WMS",
"nccs_service": "NetcdfSubset",
}

media_types = {
"HTTPServer": "application/x-netcdf",
"OPENDAP": pystac.MediaType.HTML,
"NCML": pystac.MediaType.XML,
"OpenDAP": pystac.MediaType.HTML,
"NcML": pystac.MediaType.XML,
"WCS": pystac.MediaType.XML,
"WMS": pystac.MediaType.XML,
"NetcdfSubset": "application/x-netcdf",
Expand All @@ -284,11 +328,46 @@ def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_prop

asset_roles = {
"HTTPServer": ["data"],
"OPENDAP": ["data"],
"OpenDAP": ["data"],
"WCS": ["data"],
"WMS": ["visual"],
"NetcdfSubset": ["data"],
"NCML": ["metadata"],
"NcML": ["metadata"],
"ISO": ["metadata"],
"UDDC": ["metadata"]
}


class ServiceType(Enum):
adde = "ADDE"
dap4 = "DAP4"
dods = "DODS" # same as OpenDAP
opendap = "OpenDAP"
opendapg = "OpenDAPG"
netcdfsubset = "NetcdfSubset"
cdmremote = "CdmRemote"
cdmfeature = "CdmFeature"
ncjson = "ncJSON"
h5service = "H5Service"
httpserver = "HTTPServer"
ftp = "FTP"
gridftp = "GridFTP"
file = "File"
iso = "ISO"
las = "LAS"
ncml = "NcML"
uddc = "UDDC"
wcs = "WCS"
wms = "WMS"
wsdl = "WSDL"
webform = "WebForm"
catalog = "Catalog"
compound = "Compound"
resolver = "Resolver"
thredds = "THREDDS"

@classmethod
def from_value(cls, value):
"""Return value irrespective of case."""
return cls[value.lower()]

74 changes: 74 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import urllib.parse
import json
import pytest
import responses
from responses import _recorder
from pathlib import Path
import requests
from STACpopulator.stac_utils import catalog_url, access_urls, ds_attrs
from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import CMIP6ItemProperties, CMIP6populator
from STACpopulator.models import GeoJSONPolygon
from STACpopulator.stac_utils import STAC_item_from_metadata


URLS = ["https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/birdhouse/testdata/xclim/cmip6"
"/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc",
]
URLS = ["https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/birdhouse/testdata/xclim/cmip6"
"/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc",
"https://psl.noaa.gov/thredds/catalog/Datasets/20thC_ReanV2/Monthlies/gaussian/monolevel/catalog.html?dataset=Datasets/20thC_ReanV2/Monthlies/gaussian/monolevel/air.2m.mon.mean.nc"]

DATA = Path(__file__).parent / "data"


def reference_path_from_url(url):
"""Return local path to json dict representation of STAC item."""
catalog_link, nc = catalog_url(url)
nc = Path(nc)
parts = catalog_link.split("/")
return DATA.joinpath("references", parts[-2], nc.with_suffix(".json"))


@_recorder.record(file_path=DATA / "responses.yaml")
def store_responses():
"""Store server responses.
Run this if new URLs are added, if remote THREDDS servers are updated or their configuration changed.
"""
for url in URLS:
# Request to catalog link
catalog_link, nc = catalog_url(url)
requests.get(catalog_link)

# Request to NcML link
ncml_link = access_urls(catalog_link, nc)["NCML"]
requests.get(ncml_link)


@responses.activate
def create_reference_items(overwrite=False):
"""Store json representation of STAC item dict created from stored XML responses.
- Run after store_responses() to update the expected STAC item representation.
- Run if the STAC item representation changes.
"""
# Get server responses from files stored on disk
responses._add_from_file(file_path=DATA / "responses.yaml")

for url in URLS:
# Request to catalog link
catalog_link, nc = catalog_url(url)

# Request to NcML link
ncml_link = access_urls(catalog_link, nc)["NcML"]

reference_path = reference_path_from_url(url)

if overwrite or not reference_path.exists():
reference_path.parent.mkdir(parents=True, exist_ok=True)
attrs = ds_attrs(ncml_link, catalog_link)

if "cmip6" in url:
stac_item_id = CMIP6populator.make_cmip6_item_id(attrs["attributes"])
stac_item = STAC_item_from_metadata(stac_item_id, attrs, CMIP6ItemProperties, GeoJSONPolygon)
reference_path.write_text(json.dumps(stac_item.to_dict()))
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"type": "Feature", "stac_version": "1.0.0", "id": "ScenarioMIP_CCCma_CanESM5_ssp245_r13i1p2f1_SImon_siconc_gn", "properties": {"start_datetime": "2019-12-06T12:00:00Z", "end_datetime": "2020-11-04T12:00:00Z", "datetime": null, "cmip6:Conventions": "CF-1.7 CMIP-6.2", "cmip6:activity_id": "ScenarioMIP", "cmip6:creation_date": "2019-09-25T23:01:33Z", "cmip6:data_specs_version": "01.00.30", "cmip6:experiment": "update of RCP4.5 based on SSP2", "cmip6:experiment_id": "ssp245", "cmip6:frequency": "mon", "cmip6:further_info_url": "https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.ssp245.none.r13i1p2f1", "cmip6:grid_label": "gn", "cmip6:institution": "Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada", "cmip6:institution_id": "CCCma", "cmip6:nominal_resolution": "100 km", "cmip6:realm": ["seaIce"], "cmip6:source": "CanESM5 (2019): \naerosol: interactive\natmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\natmosChem: specified oxidants for aerosols\nland: CLASS3.6/CTEM1.2\nlandIce: specified ice sheets\nocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\nocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\nseaIce: LIM2", "cmip6:source_id": "CanESM5", "cmip6:source_type": ["AOGCM"], "cmip6:sub_experiment": "none", "cmip6:sub_experiment_id": "none", "cmip6:table_id": "SImon", "cmip6:variable_id": "siconc", "cmip6:variant_label": "r13i1p2f1", "cmip6:initialization_index": 1, "cmip6:physics_index": 2, "cmip6:realization_index": 13, "cmip6:forcing_index": 1, "cmip6:tracking_id": "hdl:21.14100/9e4f804b-c161-44fa-acd1-c2e94e220c95", "cmip6:version": "v20190429", "cmip6:product": "model-output", "cmip6:license": "CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.", "cmip6:grid": "ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m", "cmip6:mip_era": "CMIP6"}, "geometry": {"type": "Polygon", "coordinates": [[[0.049800001084804535, -78.39350128173828], [0.049800001084804535, 89.74176788330078], [359.99493408203125, 89.74176788330078], [359.99493408203125, -78.39350128173828], [0.049800001084804535, -78.39350128173828]]]}, "links": [{"rel": "source", "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "application/x-netcdf", "title": "birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc"}], "assets": {"HTTPServer": {"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "application/x-netcdf", "roles": ["data"]}, "OpenDAP": {"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "text/html", "roles": ["data"]}, "NcML": {"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/ncml/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "application/xml", "roles": ["metadata"]}, "UDDC": {"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/uddc/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "text/html", "roles": ["metadata"]}, "ISO": {"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/iso/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "application/xml", "roles": ["metadata"]}, "WCS": {"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wcs/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "application/xml", "roles": ["data"]}, "WMS": {"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wms/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "application/xml", "roles": ["visual"]}, "NetcdfSubset": {"href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/ncss/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", "type": "application/x-netcdf", "roles": ["data"]}}, "bbox": [0.049800001084804535, -78.39350128173828, 359.99493408203125, 89.74176788330078], "stac_extensions": []}
Loading

0 comments on commit 658e4b4

Please sign in to comment.