diff --git a/Makefile b/Makefile index e9e1f6f..439f93e 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,12 @@ -IMP_DIR = /Users/dchandan/DACCS/Codes/stac-populator/implementations +IMP_DIR = STACpopulator/implementations STAC_HOST = http://localhost:8880/stac testcmip6: - python $(IMP_DIR)/CMIP6-UofT/add_CMIP6.py $(STAC_HOST) https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html $(IMP_DIR)/CMIP6-UofT/CMIP6.yml + python $(IMP_DIR)/CMIP6_UofT/add_CMIP6.py $(STAC_HOST) https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/catalog/birdhouse/testdata/xclim/cmip6/catalog.html +delcmip6: + curl --location --request DELETE '$(STAC_HOST)/collections/CMIP6_UofT' + @echo "" starthost: docker compose up diff --git a/STACpopulator/api_requests.py b/STACpopulator/api_requests.py index fdd9a65..35b0dc2 100644 --- a/STACpopulator/api_requests.py +++ b/STACpopulator/api_requests.py @@ -1,7 +1,7 @@ import logging import os from typing import Any, Optional -from urllib.parse import urljoin + import requests from colorlog import ColoredFormatter @@ -79,17 +79,16 @@ def post_stac_item( """ item_id = json_data["id"] - r = requests.post(urljoin(stac_host, f"collections/{collection_id}/items"), json=json_data) + r = requests.post(os.path.join(stac_host, f"collections/{collection_id}/items"), json=json_data) if r.status_code == 200: LOGGER.info(f"Item {item_name} successfully added") elif r.status_code == 409: if update: LOGGER.info(f"Item {item_id} already exists. Updating.") - r = requests.put(urljoin(stac_host, f"collections/{collection_id}/items/{item_id}"), json=json_data) + r = requests.put(os.path.join(stac_host, f"collections/{collection_id}/items/{item_id}"), json=json_data) r.raise_for_status() else: LOGGER.info(f"Item {item_id} already exists.") else: r.raise_for_status() - diff --git a/STACpopulator/extensions/cmip6.py b/STACpopulator/extensions/cmip6.py deleted file mode 100644 index 2e75020..0000000 --- a/STACpopulator/extensions/cmip6.py +++ /dev/null @@ -1,179 +0,0 @@ -"""CMIP6 extension based on https://stac-extensions.github.io/cmip6/v1.0.0/schema.json""" - -import json -from typing import Generic, TypeVar, Union, cast - -import pystac -from pystac.extensions.base import ExtensionManagementMixin, PropertiesExtension -from pystac.extensions.hooks import ExtensionHooks - -from datetime import date, datetime -from typing import Any, Dict, List, Literal -import pyessv -from pydantic import ( - AnyHttpUrl, - FieldValidationInfo, - field_validator, - model_serializer, -) -from pydantic.networks import Url - - -from STACpopulator.stac_utils import ItemProperties -from STACpopulator.stac_utils import collection2literal - -T = TypeVar("T", pystac.Collection, pystac.Item, pystac.Asset) - -SCHEMA_URI = "https://stac-extensions.github.io/cmip6/v1.0.0/schema.json" - - -# CMIP6 controlled vocabulary (CV) -CV = pyessv.WCRP.CMIP6 - -# Enum classes built from the pyessv' CV -ActivityID = collection2literal(CV.activity_id) -ExperimentID = collection2literal(CV.experiment_id) -Frequency = collection2literal(CV.frequency) -GridLabel = collection2literal(CV.grid_label) -InstitutionID = collection2literal(CV.institution_id) -NominalResolution = collection2literal(CV.nominal_resolution) -Realm = collection2literal(CV.realm) -SourceID = collection2literal(CV.source_id) -SourceType = collection2literal(CV.source_type) -SubExperimentID = collection2literal(CV.sub_experiment_id) -TableID = collection2literal(CV.table_id) - - -class Properties(ItemProperties, validate_assignment=True): - """Data model for CMIP6 Controlled Vocabulary.""" - - Conventions: str - activity_id: ActivityID - creation_date: datetime - data_specs_version: str - experiment: str - experiment_id: ExperimentID - frequency: Frequency - further_info_url: AnyHttpUrl - grid_label: GridLabel - institution: str - institution_id: InstitutionID - nominal_resolution: NominalResolution - realm: List[Realm] - source: str - source_id: SourceID - source_type: List[SourceType] - sub_experiment: Union[str, Literal["none"]] - sub_experiment_id: Union[SubExperimentID, Literal["none"]] - table_id: TableID - variable_id: str - variant_label: str - initialization_index: int - physics_index: int - realization_index: int - forcing_index: int - tracking_id: str - version: str - product: str - license: str - grid: str - mip_era: str - - - @field_validator("initialization_index", "physics_index", "realization_index", "forcing_index", mode="before") - @classmethod - def first_item(cls, v: list, info: FieldValidationInfo): - """Pick single item from list.""" - assert len(v) == 1, f"{info.field_name} must have one item only." - return v[0] - - @field_validator("realm", "source_type", mode="before") - @classmethod - def split(cls, v: str, info: FieldValidationInfo): - """Split string into list.""" - return v.split(" ") - - @field_validator("version") - @classmethod - def validate_version(cls, v: str, info: FieldValidationInfo): - assert v[0] == "v", "Version string should begin with a lower case 'v'" - assert v[1:].isdigit(), "All characters in version string, except first, should be digits" - return v - - -class CMIP6Extension(Generic[T], ExtensionManagementMixin[pystac.Item], PropertiesExtension): - """An abstract class that can be used to extend the properties of a - :class:`~pystac.Item` with properties from the :stac-ext:`CMIP6 Extension `. - - To create an instance of :class:`CMIP6Extension`, use the :meth:`CMIP6Extension.ext` method. - """ - prefix: str = "cmip6:" - - def apply(self, attrs: Dict[str, Any]) -> None: - """Applies Datacube Extension properties to the extended - :class:`~pystac.Collection`, :class:`~pystac.Item` or :class:`~pystac.Asset`. - - Args: - dimensions : Dictionary mapping dimension name to :class:`Dimension` - objects. - variables : Dictionary mapping variable name to a :class:`Variable` - object. - """ - import json - - p = Properties(**attrs) - - # Add prefix - objs = {self.prefix + k: v for (k, v) in json.loads(p.model_dump_json()).items()} - - # Update item properties - self.properties.update(**objs) - - @classmethod - def get_schema_uri(cls) -> str: - return SCHEMA_URI - - @classmethod - def ext(cls, obj: T, add_if_missing: bool = False): - """Extends the given STAC Object with properties from the :stac-ext:`CMIP6 - Extension `. - - This extension can be applied to instances of :class:`~pystac.Item`. - - Raises: - pystac.ExtensionTypeError : If an invalid object type is passed. - """ - if isinstance(obj, pystac.Item): - cls.validate_has_extension(obj, add_if_missing) - return cast(CMIP6Extension[T], ItemCMIP6Extension(obj)) - else: - raise pystac.ExtensionTypeError(cls._ext_error_message(obj)) - - -class ItemCMIP6Extension(CMIP6Extension[pystac.Item]): - """A concrete implementation of :class:`DatacubeExtension` on an - :class:`~pystac.Item` that extends the properties of the Item to include properties - defined in the :stac-ext:`Datacube Extension `. - - This class should generally not be instantiated directly. Instead, call - :meth:`DatacubeExtension.ext` on an :class:`~pystac.Item` to extend it. - """ - - item: pystac.Item - properties: Dict[str, Any] - - def __init__(self, item: pystac.Item): - self.item = item - self.properties = item.properties - - def __repr__(self) -> str: - return "".format(self.item.id) - - -class CMIP6ExtensionHooks(ExtensionHooks): - schema_uri: str = SCHEMA_URI - prev_extension_ids = {"cmip6"} - stac_object_types = {pystac.STACObjectType.ITEM} - - -CMIP6_EXTENSION_HOOKS: ExtensionHooks = CMIP6ExtensionHooks() diff --git a/STACpopulator/extensions/__init__.py b/STACpopulator/implementations/CMIP6_UofT/__init__.py similarity index 100% rename from STACpopulator/extensions/__init__.py rename to STACpopulator/implementations/CMIP6_UofT/__init__.py diff --git a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py new file mode 100644 index 0000000..6d6fedb --- /dev/null +++ b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py @@ -0,0 +1,191 @@ +import argparse +import json +import logging +from datetime import datetime +from typing import Any, List, Literal, MutableMapping, Optional + +import pydantic_core +import pyessv +from colorlog import ColoredFormatter +from pydantic import AnyHttpUrl, ConfigDict, Field, FieldValidationInfo, field_validator +from pystac.extensions.datacube import DatacubeExtension + +from STACpopulator import STACpopulatorBase +from STACpopulator.implementations.CMIP6_UofT.extensions import DataCubeHelper +from STACpopulator.input import GenericLoader, THREDDSLoader +from STACpopulator.models import GeoJSONPolygon, STACItemProperties +from STACpopulator.stac_utils import STAC_item_from_metadata, collection2literal + +LOGGER = logging.getLogger(__name__) +LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" +formatter = ColoredFormatter(LOGFORMAT) +stream = logging.StreamHandler() +stream.setFormatter(formatter) +LOGGER.addHandler(stream) +LOGGER.setLevel(logging.INFO) +LOGGER.propagate = False + +# CMIP6 controlled vocabulary (CV) +CV = pyessv.WCRP.CMIP6 + +# Enum classes built from the pyessv' CV +ActivityID = collection2literal(CV.activity_id) +ExperimentID = collection2literal(CV.experiment_id) +Frequency = collection2literal(CV.frequency) +GridLabel = collection2literal(CV.grid_label) +InstitutionID = collection2literal(CV.institution_id) +NominalResolution = collection2literal(CV.nominal_resolution) +Realm = collection2literal(CV.realm) +SourceID = collection2literal(CV.source_id) +SourceType = collection2literal(CV.source_type) +SubExperimentID = collection2literal(CV.sub_experiment_id) +TableID = collection2literal(CV.table_id) + + +def add_cmip6_prefix(name: str) -> str: + return "cmip6:" + name if "datetime" not in name else name + + +class CMIP6ItemProperties(STACItemProperties, validate_assignment=True): + """Data model for CMIP6 Controlled Vocabulary.""" + + Conventions: str + activity_id: ActivityID + creation_date: datetime + data_specs_version: str + experiment: str + experiment_id: ExperimentID + frequency: Frequency + further_info_url: AnyHttpUrl + grid_label: GridLabel + institution: str + institution_id: InstitutionID + nominal_resolution: NominalResolution + realm: List[Realm] + source: str + source_id: SourceID + source_type: List[SourceType] + sub_experiment: str | Literal["none"] + sub_experiment_id: SubExperimentID | Literal["none"] + table_id: TableID + variable_id: str + variant_label: str + initialization_index: int + physics_index: int + realization_index: int + forcing_index: int + tracking_id: str = "" + version: str = Field("") + product: str + license: str + grid: str + mip_era: str + + model_config = ConfigDict(alias_generator=add_cmip6_prefix, populate_by_name=True) + + @field_validator("initialization_index", "physics_index", "realization_index", "forcing_index", mode="before") + @classmethod + def only_item(cls, v: list[int], info: FieldValidationInfo): + """Pick single item from list.""" + assert len(v) == 1, f"{info.field_name} must have one item only." + return v[0] + + @field_validator("realm", "source_type", mode="before") + @classmethod + def split(cls, v: str, info: FieldValidationInfo): + """Split string into list.""" + return v.split(" ") + + @field_validator("version") + @classmethod + def validate_version(cls, v: str, info: FieldValidationInfo): + assert v[0] == "v", "Version string should begin with a lower case 'v'" + assert v[1:].isdigit(), "All characters in version string, except first, should be digits" + return v + + +class CMIP6populator(STACpopulatorBase): + item_properties_model = CMIP6ItemProperties + item_geometry_model = GeoJSONPolygon + + def __init__(self, stac_host: str, data_loader: GenericLoader, update: Optional[bool] = False) -> None: + """Constructor + + :param stac_host: URL to the STAC API + :type stac_host: str + :param thredds_catalog_url: the URL to the THREDDS catalog to ingest + :type thredds_catalog_url: str + """ + super().__init__(stac_host, data_loader, update) + + @staticmethod + def make_cmip6_item_id(attrs: MutableMapping[str, Any]) -> str: + """Return a unique ID for CMIP6 data item.""" + keys = [ + "activity_id", + "institution_id", + "source_id", + "experiment_id", + "variant_label", + "table_id", + "variable_id", + "grid_label", + ] + name = "_".join(attrs[k] for k in keys) + return name + + def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + """Creates the STAC item. + + :param item_name: name of the STAC item. Interpretation of name is left to the input loader implementation + :type item_name: str + :param item_data: dictionary like representation of all information on the item + :type item_data: MutableMapping[str, Any] + :return: _description_ + :rtype: MutableMapping[str, Any] + """ + iid = self.make_cmip6_item_id(item_data["attributes"]) + + try: + item = STAC_item_from_metadata(iid, item_data, self.item_properties_model, self.item_geometry_model) + except pydantic_core._pydantic_core.ValidationError: + print(f"ERROR: ValidationError for {iid}") + return -1 + + # Add the CMIP6 STAC extension + item.stac_extensions.append( + "https://raw.githubusercontent.com/TomAugspurger/cmip6/main/json-schema/schema.json" + ) + + # Add datacube extension + try: + dchelper = DataCubeHelper(item_data) + dc_ext = DatacubeExtension.ext(item, add_if_missing=True) + dc_ext.apply(dimensions=dchelper.dimensions, variables=dchelper.variables) + except: + LOGGER.warning(f"Failed to add Datacube extension to item {item_name}") + + # print(json.dumps(item.to_dict())) + return json.loads(json.dumps(item.to_dict())) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog="CMIP6 STAC populator") + parser.add_argument("stac_host", type=str, help="STAC API address") + parser.add_argument("thredds_catalog_URL", type=str, help="URL to the CMIP6 THREDDS catalog") + parser.add_argument("--update", action="store_true", help="Update collection and its items") + + args = parser.parse_args() + + LOGGER.info(f"Arguments to call: {args}") + + mode = "full" + + if mode == "full": + data_loader = THREDDSLoader(args.thredds_catalog_URL) + else: + # To be implemented + data_loader = ErrorLoader(args.error_file) + + c = CMIP6populator(args.stac_host, data_loader, args.update) + c.ingest() diff --git a/implementations/CMIP6-UofT/CMIP6.yml b/STACpopulator/implementations/CMIP6_UofT/collection_config.yml similarity index 93% rename from implementations/CMIP6-UofT/CMIP6.yml rename to STACpopulator/implementations/CMIP6_UofT/collection_config.yml index a57875b..0f43c78 100644 --- a/implementations/CMIP6-UofT/CMIP6.yml +++ b/STACpopulator/implementations/CMIP6_UofT/collection_config.yml @@ -1,4 +1,5 @@ title: CMIP6 +id: CMIP6_UofT description: Coupled Model Intercomparison Project phase 6 keywords: ['CMIP', 'CMIP6', 'WCRP', 'Climate Change'] license: "CC-BY-4.0" diff --git a/STACpopulator/implementations/CMIP6_UofT/extensions.py b/STACpopulator/implementations/CMIP6_UofT/extensions.py new file mode 100644 index 0000000..31450a6 --- /dev/null +++ b/STACpopulator/implementations/CMIP6_UofT/extensions.py @@ -0,0 +1,209 @@ +import functools + +from pystac.extensions.datacube import Dimension, DimensionType, Variable, VariableType + +from STACpopulator.stac_utils import ncattrs_to_bbox + + +class DataCubeHelper: + """Return STAC Item from CF JSON metadata, as provided by `xncml.Dataset.to_cf_dict`.""" + + axis = {"X": "x", "Y": "y", "Z": "z", "T": "t", "longitude": "x", "latitude": "y", "vertical": "z", "time": "t"} + + def __init__(self, attrs: dict): + """ + Create STAC Item from CF JSON metadata. + + Parameters + ---------- + iid : str + Unique item ID. + attrs: dict + CF JSON metadata returned by `xncml.Dataset.to_cf_dict`. + datamodel : pydantic.BaseModel, optional + Data model for validating global attributes. + """ + self.attrs = attrs + + # From CF-Xarray + self.coordinate_criteria = { + "latitude": { + "standard_name": ("latitude",), + "units": ("degree_north", "degree_N", "degreeN", "degrees_north", "degrees_N", "degreesN"), + "_CoordinateAxisType": ("Lat",), + "long_name": ("latitude",), + }, + "longitude": { + "standard_name": ("longitude",), + "units": ("degree_east", "degree_E", "degreeE", "degrees_east", "degrees_E", "degreesE"), + "_CoordinateAxisType": ("Lon",), + "long_name": ("longitude",), + }, + "Z": { + "standard_name": ( + "model_level_number", + "atmosphere_ln_pressure_coordinate", + "atmosphere_sigma_coordinate", + "atmosphere_hybrid_sigma_pressure_coordinate", + "atmosphere_hybrid_height_coordinate", + "atmosphere_sleve_coordinate", + "ocean_sigma_coordinate", + "ocean_s_coordinate", + "ocean_s_coordinate_g1", + "ocean_s_coordinate_g2", + "ocean_sigma_z_coordinate", + "ocean_double_sigma_coordinate", + ), + "_CoordinateAxisType": ("GeoZ", "Height", "Pressure"), + "axis": ("Z",), + "cartesian_axis": ("Z",), + "grads_dim": ("z",), + "long_name": ( + "model_level_number", + "atmosphere_ln_pressure_coordinate", + "atmosphere_sigma_coordinate", + "atmosphere_hybrid_sigma_pressure_coordinate", + "atmosphere_hybrid_height_coordinate", + "atmosphere_sleve_coordinate", + "ocean_sigma_coordinate", + "ocean_s_coordinate", + "ocean_s_coordinate_g1", + "ocean_s_coordinate_g2", + "ocean_sigma_z_coordinate", + "ocean_double_sigma_coordinate", + ), + }, + "vertical": { + "standard_name": ( + "air_pressure", + "height", + "depth", + "geopotential_height", + "altitude", + "height_above_geopotential_datum", + "height_above_reference_ellipsoid", + "height_above_mean_sea_level", + ), + "positive": ("up", "down"), + "long_name": ( + "air_pressure", + "height", + "depth", + "geopotential_height", + "altitude", + "height_above_geopotential_datum", + "height_above_reference_ellipsoid", + "height_above_mean_sea_level", + ), + }, + "X": { + "standard_name": ("projection_x_coordinate", "grid_longitude", "projection_x_angular_coordinate"), + "_CoordinateAxisType": ("GeoX",), + "axis": ("X",), + "cartesian_axis": ("X",), + "grads_dim": ("x",), + "long_name": ( + "projection_x_coordinate", + "grid_longitude", + "projection_x_angular_coordinate", + "cell index along first dimension", + ), + }, + "Y": { + "standard_name": ("projection_y_coordinate", "grid_latitude", "projection_y_angular_coordinate"), + "_CoordinateAxisType": ("GeoY",), + "axis": ("Y",), + "cartesian_axis": ("Y",), + "grads_dim": ("y",), + "long_name": ( + "projection_y_coordinate", + "grid_latitude", + "projection_y_angular_coordinate", + "cell index along second dimension", + ), + }, + "T": { + "standard_name": ("time",), + "_CoordinateAxisType": ("Time",), + "axis": ("T",), + "cartesian_axis": ("T",), + "grads_dim": ("t",), + "long_name": ("time",), + }, + "time": { + "standard_name": ("time",), + "_CoordinateAxisType": ("Time",), + "axis": ("T",), + "cartesian_axis": ("T",), + "grads_dim": ("t",), + "long_name": ("time",), + }, + } + + @property + @functools.cache + def dimensions(self) -> dict: + """Return Dimension objects required for Datacube extension.""" + + dims = {} + for name, length in self.attrs["dimensions"].items(): + v = self.attrs["variables"].get(name) + if v: + bbox = ncattrs_to_bbox(self.attrs) + for key, criteria in self.coordinate_criteria.items(): + for criterion, expected in criteria.items(): + if v["attributes"].get(criterion, None) in expected: + axis = self.axis[key] + type_ = DimensionType.SPATIAL if axis in ["x", "y", "z"] else DimensionType.TEMPORAL + + if v["type"] == "int": + extent = [0, int(length)] + else: # Not clear the logic is sound + if key == "X": + extent = bbox[0], bbox[2] + elif key == "Y": + extent = bbox[1], bbox[3] + else: + extent = None + + dims[name] = Dimension( + properties=dict( + axis=axis, + type=type_, + extent=extent, + description=v.get("description", v.get("long_name", criteria["standard_name"])), + ) + ) + + return dims + + @property + @functools.cache + def variables(self) -> dict: + """Return Variable objects required for Datacube extension.""" + variables = {} + + for name, meta in self.attrs["variables"].items(): + if name in self.attrs["dimensions"]: + continue + + attrs = meta["attributes"] + variables[name] = Variable( + properties=dict( + dimensions=meta["shape"], + type=VariableType.AUXILIARY.value if self.is_coordinate(attrs) else VariableType.DATA.value, + description=attrs.get("description", attrs.get("long_name")), + unit=attrs.get("units", None), + ) + ) + return variables + + # @property + # @functools.cache + def is_coordinate(self, attrs: dict) -> bool: + """Return whether variable is a coordinate.""" + for key, criteria in self.coordinate_criteria.items(): + for criterion, expected in criteria.items(): + if attrs.get(criterion, None) in expected: + return True + return False diff --git a/implementations/NEX-GDDP-UofT/add_NEX-GDDP.py b/STACpopulator/implementations/NEX_GDDP_UofT/__init__.py similarity index 100% rename from implementations/NEX-GDDP-UofT/add_NEX-GDDP.py rename to STACpopulator/implementations/NEX_GDDP_UofT/__init__.py diff --git a/STACpopulator/implementations/NEX_GDDP_UofT/add_NEX-GDDP.py b/STACpopulator/implementations/NEX_GDDP_UofT/add_NEX-GDDP.py new file mode 100644 index 0000000..e69de29 diff --git a/STACpopulator/implementations/__init__.py b/STACpopulator/implementations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/STACpopulator/input.py b/STACpopulator/input.py index 088051f..25750c0 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -2,13 +2,15 @@ from abc import ABC, abstractmethod from typing import Any, Iterator, MutableMapping, Optional, Tuple +import pystac import requests import siphon import xncml from colorlog import ColoredFormatter -from numpy import extract from siphon.catalog import TDSCatalog +from STACpopulator.stac_utils import numpy_to_python_datatypes, url_validate + LOGGER = logging.getLogger(__name__) LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" formatter = ColoredFormatter(LOGFORMAT) @@ -21,7 +23,7 @@ class GenericLoader(ABC): def __init__(self) -> None: - pass + self.links = [] @abstractmethod def __iter__(self): @@ -37,10 +39,6 @@ def reset(self): pass - - - - class THREDDSLoader(GenericLoader): def __init__(self, thredds_catalog_url: str, depth: Optional[int] = None) -> None: """Constructor @@ -54,13 +52,41 @@ def __init__(self, thredds_catalog_url: str, depth: Optional[int] = None) -> Non super().__init__() self._depth = depth if depth is not None else 1000 - if thredds_catalog_url.endswith(".html"): - thredds_catalog_url = thredds_catalog_url.replace(".html", ".xml") - LOGGER.info("Converting catalog URL from html to xml") + self.thredds_catalog_URL = self.validate_catalog_url(thredds_catalog_url) - self.thredds_catalog_URL = thredds_catalog_url self.catalog = TDSCatalog(self.thredds_catalog_URL) self.catalog_head = self.catalog + self.links.append(self.magpie_collection_link()) + + def validate_catalog_url(self, url: str) -> str: + """Validate the user-provided catalog URL. + + :param url: URL to the THREDDS catalog + :type url: str + :raises RuntimeError: if URL is invalid or contains query parameters. + :return: a valid URL + :rtype: str + """ + if url_validate(url): + if "?" in url: + raise RuntimeError("THREDDS catalog URL should not contain query parameter") + else: + raise RuntimeError("Invalid URL") + + return url.replace(".html", ".xml") if url.endswith(".html") else url + + def magpie_collection_link(self) -> pystac.Link: + """Creates a PySTAC Link for the collection that is used by Cowbird and Magpie. + + :return: A PySTAC Link + :rtype: pystac.Link + """ + url = self.thredds_catalog_URL + parts = url.split("/") + i = parts.index("catalog") + # service = parts[i - 1] + path = "/".join(parts[i + 1 : -1]) + return pystac.Link(rel="source", target=url, media_type="text/xml", title=path) def reset(self): """Reset the generator.""" @@ -79,18 +105,17 @@ def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]: self._depth -= 1 yield from self - def extract_metadata(self, ds: siphon.catalog.Dataset) -> MutableMapping[str, Any]: - # Get URL for NCML service - url = ds.access_urls["NCML"] + def __getitem__(self, dataset): + return self.catalog.datasets[dataset] + def extract_metadata(self, ds: siphon.catalog.Dataset) -> MutableMapping[str, Any]: LOGGER.info("Requesting NcML dataset description") + url = ds.access_urls["NCML"] r = requests.get(url) - # Convert NcML to CF-compliant dictionary attrs = xncml.Dataset.from_text(r.content).to_cf_dict() - + attrs["attributes"] = numpy_to_python_datatypes(attrs["attributes"]) attrs["access_urls"] = ds.access_urls - return attrs diff --git a/STACpopulator/metadata_parsers.py b/STACpopulator/metadata_parsers.py deleted file mode 100644 index 84636f8..0000000 --- a/STACpopulator/metadata_parsers.py +++ /dev/null @@ -1,61 +0,0 @@ -import lxml.etree -import requests - - -def nc_attrs_from_ncml(url): - """Extract attributes from NcML file. - - Parameters - ---------- - url : str - Link to NcML service of THREDDS server for a dataset. - - Returns - ------- - dict - Global attribute values keyed by facet names, with variable attributes in `__variable__` nested dict, and - additional specialized attributes in `__group__` nested dict. - """ - parser = lxml.etree.XMLParser(encoding="UTF-8") - - ns = {"ncml": "http://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2"} - - # Parse XML content - UTF-8 encoded documents need to be read as bytes - xml = requests.get(url).content - doc = lxml.etree.fromstring(xml, parser=parser) - nc = doc.xpath("/ncml:netcdf", namespaces=ns)[0] - - # Extract global attributes - out = _attrib_to_dict(nc.xpath("ncml:attribute", namespaces=ns)) - - # Extract group attributes - gr = {} - for group in nc.xpath("ncml:group", namespaces=ns): - gr[group.attrib["name"]] = _attrib_to_dict(group.xpath("ncml:attribute", namespaces=ns)) - - # Extract variable attributes - va = {} - for variable in nc.xpath("ncml:variable", namespaces=ns): - if "_CoordinateAxisType" in variable.xpath("ncml:attribute/@name", namespaces=ns): - continue - va[variable.attrib["name"]] = _attrib_to_dict(variable.xpath("ncml:attribute", namespaces=ns)) - - out["__group__"] = gr - out["__variable__"] = va - - return out - - -def _attrib_to_dict(elems): - """Convert element attributes to dictionary. - - Ignore attributes with names starting with _ - """ - hidden_prefix = "_" - out = {} - for e in elems: - a = e.attrib - if a["name"].startswith(hidden_prefix): - continue - out[a["name"]] = a["value"] - return out diff --git a/STACpopulator/models.py b/STACpopulator/models.py new file mode 100644 index 0000000..f91dab5 --- /dev/null +++ b/STACpopulator/models.py @@ -0,0 +1,106 @@ +import datetime as dt +from typing import Any, Dict, List, Literal, Optional, Union + +from pydantic import ( + AnyHttpUrl, + AnyUrl, + BaseModel, + Field, + SerializeAsAny, + field_validator, +) + + +class Geometry(BaseModel): + type: str + coordinates: List + + +class GeoJSONPoint(Geometry): + type: Literal["Point"] + coordinates: List[float] + + +class GeoJSONMultiPoint(Geometry): + type: Literal["MultiPoint"] + coordinates: List[List[float]] + + +class GeoJSONPolygon(Geometry): + type: Literal["Polygon"] + coordinates: List[List[List[float]]] + + +class GeoJSONMultiPolygon(Geometry): + type: Literal["MultiPolygon"] + coordinates: List[List[List[List[float]]]] + + +class Asset(BaseModel): + href: AnyHttpUrl + media_type: Optional[str] = None + title: Optional[str] = None + description: Optional[str] = None + roles: Optional[List[str]] = None + + +class STACItemProperties(BaseModel): + """Base STAC Item properties data model. In concrete implementations, users would want to define a new + data model that inherits from this base model and extends it with properties tailored to the data they are + ingesting.""" + + start_datetime: Optional[dt.datetime] = None + end_datetime: Optional[dt.datetime] = None + datetime: Optional[dt.datetime] = None + + @field_validator("datetime", mode="before") + @classmethod + def validate_datetime(cls, v: Union[dt.datetime, str], values: Dict[str, Any]) -> dt: + if v == "null": + if not values["start_datetime"] and not values["end_datetime"]: + raise ValueError("start_datetime and end_datetime must be specified when datetime is null") + + +# class Link(BaseModel): +# """ +# https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#link-object +# """ + +# href: str = Field(..., alias="href", min_length=1) +# rel: str = Field(..., alias="rel", min_length=1) +# type: Optional[str] = None +# title: Optional[str] = None +# # Label extension +# label: Optional[str] = Field(None, alias="label:assets") +# model_config = ConfigDict(use_enum_values=True) + +# def resolve(self, base_url: str) -> None: +# """resolve a link to the given base URL""" +# self.href = urljoin(base_url, self.href) + + +# class PaginationLink(Link): +# """ +# https://github.com/radiantearth/stac-api-spec/blob/master/api-spec.md#paging-extension +# """ + +# rel: Literal["next", "previous"] +# method: Literal["GET", "POST"] +# body: Optional[Dict[Any, Any]] = None +# merge: bool = False + + +# Links = RootModel[List[Union[PaginationLink, Link]]] + + +class STACItem(BaseModel): + """STAC Item data model.""" + + id: str = Field(..., alias="id", min_length=1) + geometry: Optional[SerializeAsAny[Geometry]] = None + bbox: Optional[List[float]] = None + properties: Optional[SerializeAsAny[STACItemProperties]] = None + assets: Dict[str, Asset] = None + stac_extensions: Optional[List[AnyUrl]] = [] + collection: Optional[str] = None + datetime: Optional[dt.datetime] = None # Not in the spec, but needed by pystac.Item. diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py index 2541fe7..f8ccb1c 100644 --- a/STACpopulator/populator_base.py +++ b/STACpopulator/populator_base.py @@ -1,21 +1,18 @@ -import hashlib import logging from abc import ABC, abstractmethod from datetime import datetime -from typing import Any, MutableMapping +from typing import Any, MutableMapping, Optional import pystac -import yaml from colorlog import ColoredFormatter from STACpopulator.api_requests import ( post_stac_collection, post_stac_item, - stac_collection_exists, stac_host_reachable, ) from STACpopulator.input import GenericLoader -from STACpopulator.stac_utils import url_validate +from STACpopulator.stac_utils import load_collection_configuration, url_validate LOGGER = logging.getLogger(__name__) LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" @@ -32,7 +29,7 @@ def __init__( self, stac_host: str, data_loader: GenericLoader, - collection_info_filename: str, + update: Optional[bool] = False, ) -> None: """Constructor @@ -40,25 +37,16 @@ def __init__( :type stac_host: str :param data_loader: A concrete implementation of the GenericLoader abstract base class :type data_loader: GenericLoader - :param collection_info_filename: Yaml file containing the information about the collection to populate - :type collection_info_filename: str :raises RuntimeError: Raised if one of the required definitions is not found in the collection info filename """ super().__init__() - with open(collection_info_filename) as f: - self._collection_info = yaml.load(f, yaml.Loader) - - req_definitions = ["title", "description", "keywords", "license"] - for req in req_definitions: - if req not in self._collection_info.keys(): - LOGGER.error(f"'{req}' is required in the configuration file") - raise RuntimeError(f"'{req}' is required in the configuration file") + self._collection_info = load_collection_configuration() self._ingest_pipeline = data_loader self._stac_host = self.validate_host(stac_host) + self.update = update - #self._collection_id = hashlib.md5(self.collection_name.encode("utf-8")).hexdigest() self._collection_id = self.collection_name LOGGER.info("Initialization complete") LOGGER.info(f"Collection {self.collection_name} is assigned id {self._collection_id}") @@ -74,62 +62,62 @@ def stac_host(self) -> str: @property def collection_id(self) -> str: - return self._collection_id + return self._collection_info["id"] + + @property + @abstractmethod + def item_properties_model(self): + """In derived classes, this property should be defined as a pydantic data model that derives from + models.STACItemProperties.""" + raise NotImplementedError + + @property + @abstractmethod + def item_geometry_model(self): + """In derived classes, this property should be defined as a pydantic data model that derives from + models.STACItemProperties.""" + raise NotImplementedError + + @abstractmethod + def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + raise NotImplementedError def validate_host(self, stac_host: str) -> str: if not url_validate(stac_host): raise ValueError("stac_host URL is not appropriately formatted") if not stac_host_reachable(stac_host): - raise ValueError("stac_host is not reachable") + raise RuntimeError("stac_host is not reachable") return stac_host - def create_stac_collection(self): + def create_stac_collection(self) -> None: """ Create a basic STAC collection. Returns the collection. """ - if stac_collection_exists(self.stac_host, self.collection_id): - LOGGER.info(f"Collection '{self.collection_name}' already exists") - else: - LOGGER.info(f"Creating collection '{self.collection_name}'") - sp_extent = pystac.SpatialExtent([self._collection_info.pop("spatialextent")]) - tmp = self._collection_info.pop("temporalextent") - tmp_extent = pystac.TemporalExtent( + LOGGER.info(f"Creating collection '{self.collection_name}'") + sp_extent = pystac.SpatialExtent([self._collection_info.pop("spatialextent")]) + tmp = self._collection_info.pop("temporalextent") + tmp_extent = pystac.TemporalExtent( + [ [ - [ - datetime.strptime(tmp[0], "%Y-%m-%d") if tmp[0] is not None else None, - datetime.strptime(tmp[1], "%Y-%m-%d") if tmp[1] is not None else None, - ] + datetime.strptime(tmp[0], "%Y-%m-%d") if tmp[0] is not None else None, + datetime.strptime(tmp[1], "%Y-%m-%d") if tmp[1] is not None else None, ] - ) - self._collection_info["extent"] = pystac.Extent(sp_extent, tmp_extent) - self._collection_info["summaries"] = pystac.Summaries({"needs_summaries_update": ["true"]}) + ] + ) + self._collection_info["extent"] = pystac.Extent(sp_extent, tmp_extent) + self._collection_info["summaries"] = pystac.Summaries({"needs_summaries_update": ["true"]}) + collection = pystac.Collection(**self._collection_info) + + collection.add_links(self._ingest_pipeline.links) - collection = pystac.Collection(id=self.collection_id, **self._collection_info) - post_stac_collection(self.stac_host, collection.to_dict()) + post_stac_collection(self.stac_host, collection.to_dict(), self.update) def ingest(self) -> None: LOGGER.info("Data ingestion") for item_name, item_data in self._ingest_pipeline: LOGGER.info(f"Creating STAC representation for {item_name}") stac_item = self.create_stac_item(item_name, item_data) - post_stac_item(self.stac_host, self.collection_id, item_name, stac_item) - try: - pass - except Exception: - LOGGER.error(f"Failed adding STAC item {item_name}") - self.handle_ingestion_error("Posting Error", item_name, item_data) - - @abstractmethod - def handle_ingestion_error(self, error: str, item_name: str, item_data: MutableMapping[str, Any]): - pass - - @abstractmethod - def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: - pass - - @abstractmethod - def validate_stac_item_cv(self, data: MutableMapping[str, Any]) -> bool: - pass + post_stac_item(self.stac_host, self.collection_id, item_name, stac_item, self.update) diff --git a/STACpopulator/stac_utils.py b/STACpopulator/stac_utils.py index 24efb07..c245ed1 100644 --- a/STACpopulator/stac_utils.py +++ b/STACpopulator/stac_utils.py @@ -1,27 +1,27 @@ -import re +import datetime import json -import datetime as dt -from enum import Enum, auto -from typing import Any, Iterator, MutableMapping, Optional, Tuple, Union -from typing import Any, Dict, List, Literal, MutableMapping -from typing_extensions import TypedDict -import pystac -from pystac.extensions.datacube import Dimension, DimensionType, VariableType, Variable, DatacubeExtension -from pydantic import AnyHttpUrl, BaseModel, field_validator, Field, ConfigDict, RootModel, AnyUrl -from urllib.parse import urljoin - - -import pyessv +import logging +import os +import re +import sys +from typing import Any, Literal, MutableMapping +import numpy as np +import pystac +import yaml +from colorlog import ColoredFormatter -try: - from enum import EnumType as enumtype -except ImportError: - # < Python 3.11 - from enum import EnumMeta as enumtype +from STACpopulator.models import STACItem +LOGGER = logging.getLogger(__name__) +LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" +formatter = ColoredFormatter(LOGFORMAT) +stream = logging.StreamHandler() +stream.setFormatter(formatter) +LOGGER.addHandler(stream) +LOGGER.setLevel(logging.INFO) +LOGGER.propagate = False -STAC_VERSION = "1.0.0" def url_validate(target: str) -> bool: """Validate whether a supplied URL is reliably written. @@ -47,395 +47,183 @@ def url_validate(target: str) -> bool: return True if re.match(url_regex, target) else False -def collection2enum(collection: pyessv.model.collection.Collection) -> enumtype: - """Create Enum based on terms from pyessv collection. - - Parameters - ---------- - collection : pyessv.model.collection.Collection - pyessv collection of terms. +def load_collection_configuration() -> MutableMapping[str, Any]: + """Reads details of the STAC Collection to be created from a configuration file. the + code expects a "collection_config.yml" file to be present in the app directory. - Returns - ------- - Enum - Enum storing terms and their labels from collection. + :raises RuntimeError: If the configuration file is not present + :raises RuntimeError: If required values are not present in the configuration file + :return: A python dictionary describing the details of the Collection + :rtype: MutableMapping[str, Any] """ - mp = {term.name: term.label for term in collection} - return Enum(collection.raw_name.capitalize(), mp, module="base") + collection_info_filename = "collection_config.yml" + app_directory = os.path.dirname(sys.argv[0]) + + if not os.path.exists(os.path.join(app_directory, collection_info_filename)): + raise RuntimeError(f"Missing {collection_info_filename} file for this implementation") + + with open(os.path.join(app_directory, collection_info_filename)) as f: + collection_info = yaml.load(f, yaml.Loader) + + req_definitions = ["title", "id", "description", "keywords", "license"] + for req in req_definitions: + if req not in collection_info.keys(): + LOGGER.error(f"'{req}' is required in the configuration file") + raise RuntimeError(f"'{req}' is required in the configuration file") + + return collection_info def collection2literal(collection): - import typing terms = tuple(term.label for term in collection) - return typing.Literal[terms] - - -class AutoValueEnum(Enum): - def _generate_next_value_( # type: ignore - name: str, start: int, count: int, last_values: List[Any] - ) -> Any: - return name - - -# DH: There is a question here whether we want to use pystac.Item or not. -# pystac.Item takes datetime, start_datetime and end_datetime as optional parameters, and then copies them into -# properties. -# If we use pystac.Item, we don't have to put start_datetime and end_datetime into Properties, we can let pystac do -# that. -class ItemProperties(BaseModel): - start_datetime: Optional[dt.datetime] = None - end_datetime: Optional[dt.datetime] = None - datetime: Optional[dt.datetime] = None - - @field_validator("datetime", mode="before") - def validate_datetime(cls, v: Union[dt.datetime, str], values: Dict[str, Any]) -> dt: - if v == "null": - if not values["start_datetime"] and not values["end_datetime"]: - raise ValueError( - "start_datetime and end_datetime must be specified when datetime is null" - ) - - -class Geometry(TypedDict): - type: str - coordinates: List[List[List[float]]] - -class Asset(BaseModel): - href: AnyHttpUrl - media_type: Optional[str] = None - title: Optional[str] = None - description: Optional[str] = None - roles: Optional[List[str]] = None - -class Link(BaseModel): - """ - https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#link-object - """ + return Literal[terms] - href: str = Field(..., alias="href", min_length=1) - rel: str = Field(..., alias="rel", min_length=1) - type: Optional[str] = None - title: Optional[str] = None - # Label extension - label: Optional[str] = Field(None, alias="label:assets") - model_config = ConfigDict(use_enum_values=True) - def resolve(self, base_url: str) -> None: - """resolve a link to the given base URL""" - self.href = urljoin(base_url, self.href) +def ncattrs_to_geometry(attrs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + """Create Polygon geometry from CFMetadata.""" + attrs = attrs["groups"]["CFMetadata"]["attributes"] + return { + "type": "Polygon", + "coordinates": [ + [ + [ + float(attrs["geospatial_lon_min"][0]), + float(attrs["geospatial_lat_min"][0]), + ], + [ + float(attrs["geospatial_lon_min"][0]), + float(attrs["geospatial_lat_max"][0]), + ], + [ + float(attrs["geospatial_lon_max"][0]), + float(attrs["geospatial_lat_max"][0]), + ], + [ + float(attrs["geospatial_lon_max"][0]), + float(attrs["geospatial_lat_min"][0]), + ], + [ + float(attrs["geospatial_lon_min"][0]), + float(attrs["geospatial_lat_min"][0]), + ], + ] + ], + } + + +def ncattrs_to_bbox(attrs: MutableMapping[str, Any]) -> list[float]: + """Create BBOX from CFMetadata.""" + attrs = attrs["groups"]["CFMetadata"]["attributes"] + return [ + float(attrs["geospatial_lon_min"][0]), + float(attrs["geospatial_lat_min"][0]), + float(attrs["geospatial_lon_max"][0]), + float(attrs["geospatial_lat_max"][0]), + ] + + +def numpy_to_python_datatypes(data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + # Converting numpy datatypes to python standard datatypes + for key, value in data.items(): + if isinstance(value, list): + newlist = [] + for item in value: + if issubclass(type(item), np.integer): + newlist.append(int(item)) + elif issubclass(type(item), np.floating): + newlist.append(float(item)) + else: + newlist.append(item) + data[key] = newlist + elif isinstance(type(value), np.integer): + data[key] = int(value) + + return data + + +def magpie_resource_link(url: str) -> pystac.Link: + """Creates a link that will be used by Cowbird to create a resource in Magpie + associated with the STAC item. + + :param url: HTTPServer access URL for a STAC item + :type url: str + :return: A PySTAC Link + :rtype: pystac.Link + """ + url_ = url.replace("fileServer", "*") + i = url_.find("*") + title = url_[i + 2 :] + link = pystac.Link(rel="source", title=title, target=url, media_type="application/x-netcdf") + return link -class PaginationLink(Link): +def STAC_item_from_metadata(iid: str, attrs: MutableMapping[str, Any], item_props_datamodel, item_geometry_model): """ - https://github.com/radiantearth/stac-api-spec/blob/master/api-spec.md#paging-extension + Create STAC Item from CF JSON metadata. + + Parameters + ---------- + iid : str + Unique item ID. + attrs: dict + CF JSON metadata returned by `xncml.Dataset.to_cf_dict`. + item_props_datamodel : pydantic.BaseModel + Data model describing the properties of the STAC item. + item_geometry_model : pydantic.BaseModel + Data model describing the geometry of the STAC item. """ - rel: Literal["next", "previous"] - method: Literal["GET", "POST"] - body: Optional[Dict[Any, Any]] = None - merge: bool = False - -Links = RootModel[List[Union[PaginationLink, Link]]] - - -class Item(BaseModel): - id: str = Field(..., alias="id", min_length=1) - geometry: Optional[Geometry] = None - bbox: Optional[List[float]] = None - properties: Optional[ItemProperties] = None - assets: Dict[str, Asset] = None - stac_extensions: Optional[List[AnyUrl]] = [] - collection: Optional[str] = None - datetime: Optional[dt.datetime] = None # Not in the spec, but needed by pystac.Item. - - - -class CFJsonItem: - """Return STAC Item from CF JSON metadata, as provided by `xncml.Dataset.to_cf_dict`.""" - axis = {"X": "x", "Y": "y", "Z": "z", "T": "t", "longitude": "x", "latitude": "y", "vertical": "z", "time": "t"} - - def __init__(self, iid: str, attrs: dict, datamodel=None): - """ - Create STAC Item from CF JSON metadata. - - Parameters - ---------- - iid : str - Unique item ID. - attrs: dict - CF JSON metadata returned by `xncml.Dataset.to_cf_dict`. - datamodel : pydantic.BaseModel, optional - Data model for validating global attributes. - """ - self.attrs = attrs - cfmeta = attrs["groups"]["CFMetadata"]["attributes"] - - # Global attributes - gattrs = {"start_datetime": cfmeta["time_coverage_start"], - "end_datetime": cfmeta["time_coverage_end"], - **attrs["attributes"], - } - - # Validate using pydantic data model if given - datamodel = datamodel or dict - - class MySTACItem(Item): - properties: datamodel - - # Create STAC item - item = MySTACItem( - id=iid, - geometry=self.ncattrs_to_geometry(), - bbox=self.ncattrs_to_bbox(), - properties=gattrs, - datetime=None, - ) - - item = pystac.Item(**json.loads(item.model_dump_json(by_alias=True))) - - # Add assets - if "access_urls" in attrs: - root = attrs["access_urls"] - elif 'THREDDSMetadata' in attrs["groups"]: - root = attrs["groups"]['THREDDSMetadata']['groups']['services']['attributes'] - else: - root = {} - - for name, url in root.items(): - asset = pystac.Asset(href=url, media_type=media_types.get(name), roles=asset_roles.get(name)) - item.add_asset(name, asset) - - self.item = item - - def to_json(self) -> str: - self.item.model_dump_json() - - def ncattrs_to_geometry(self) -> MutableMapping[str, Any]: - """Create Polygon geometry from CFMetadata.""" - attrs = self.attrs["groups"]["CFMetadata"]["attributes"] - return { - "type": "Polygon", - "coordinates": [ - [ - [ - float(attrs["geospatial_lon_min"][0]), - float(attrs["geospatial_lat_min"][0]), - ], - [ - float(attrs["geospatial_lon_min"][0]), - float(attrs["geospatial_lat_max"][0]), - ], - [ - float(attrs["geospatial_lon_max"][0]), - float(attrs["geospatial_lat_max"][0]), - ], - [ - float(attrs["geospatial_lon_max"][0]), - float(attrs["geospatial_lat_min"][0]), - ], - [ - float(attrs["geospatial_lon_min"][0]), - float(attrs["geospatial_lat_min"][0]), - ], - ] - ], - } - - def ncattrs_to_bbox(self) -> list: - """Create BBOX from CFMetadata.""" - attrs = self.attrs["groups"]["CFMetadata"]["attributes"] - return [ - float(attrs["geospatial_lon_min"][0]), - float(attrs["geospatial_lat_min"][0]), - float(attrs["geospatial_lon_max"][0]), - float(attrs["geospatial_lat_max"][0]), - ] - - def dimensions(self) -> dict: - """Return Dimension objects required for Datacube extension.""" - - dims = {} - for name, length in self.attrs["dimensions"].items(): - v = self.attrs["variables"].get(name) - if v: - bbox = self.obj.ncattrs_to_bbox() - for key, criteria in coordinate_criteria.items(): - for criterion, expected in criteria.items(): - if v['attributes'].get(criterion, None) in expected: - axis = self.axis[key] - type_ = DimensionType.SPATIAL if axis in ['x', 'y', 'z'] else DimensionType.TEMPORAL - - if v['type'] == 'int': - extent = [0, int(length)] - else: # Not clear the logic is sound - if key == 'X': - extent = bbox[0], bbox[2] - elif key == "Y": - extent = bbox[1], bbox[3] - else: - extent = None - - dims[name] = Dimension(properties=dict( - axis = axis, - type = type_, - extent = extent, - description=v.get("description", v.get("long_name", criteria["standard_name"])) - ) - ) - - return dims - - def variables(self)->dict: - """Return Variable objects required for Datacube extension.""" - variables = {} - - for name, meta in self.attrs["variables"].items(): - if name in self.attrs["dimensions"]: - continue - - attrs = meta['attributes'] - variables[name] = Variable(properties=dict( - dimensions=meta["shape"], - type = VariableType.AUXILIARY.value if self.is_coordinate(attrs) else - VariableType.DATA.value, - description=attrs.get("description", attrs.get("long_name")), - unit=attrs.get("units", None) - )) - return variables - - def is_coordinate(self, attrs: dict)-> bool: - """Return whether variable is a coordinate.""" - for key, criteria in coordinate_criteria.items(): - for criterion, expected in criteria.items(): - if attrs.get(criterion, None) in expected: - return True - return False - - -# From CF-Xarray -coordinate_criteria = { - 'latitude': {'standard_name': ('latitude',), - 'units': ('degree_north', - 'degree_N', - 'degreeN', - 'degrees_north', - 'degrees_N', - 'degreesN'), - '_CoordinateAxisType': ('Lat',), - 'long_name': ('latitude',)}, - 'longitude': {'standard_name': ('longitude',), - 'units': ('degree_east', - 'degree_E', - 'degreeE', - 'degrees_east', - 'degrees_E', - 'degreesE'), - '_CoordinateAxisType': ('Lon',), - 'long_name': ('longitude',)}, - 'Z': {'standard_name': ('model_level_number', - 'atmosphere_ln_pressure_coordinate', - 'atmosphere_sigma_coordinate', - 'atmosphere_hybrid_sigma_pressure_coordinate', - 'atmosphere_hybrid_height_coordinate', - 'atmosphere_sleve_coordinate', - 'ocean_sigma_coordinate', - 'ocean_s_coordinate', - 'ocean_s_coordinate_g1', - 'ocean_s_coordinate_g2', - 'ocean_sigma_z_coordinate', - 'ocean_double_sigma_coordinate'), - '_CoordinateAxisType': ('GeoZ', 'Height', 'Pressure'), - 'axis': ('Z',), - 'cartesian_axis': ('Z',), - 'grads_dim': ('z',), - 'long_name': ('model_level_number', - 'atmosphere_ln_pressure_coordinate', - 'atmosphere_sigma_coordinate', - 'atmosphere_hybrid_sigma_pressure_coordinate', - 'atmosphere_hybrid_height_coordinate', - 'atmosphere_sleve_coordinate', - 'ocean_sigma_coordinate', - 'ocean_s_coordinate', - 'ocean_s_coordinate_g1', - 'ocean_s_coordinate_g2', - 'ocean_sigma_z_coordinate', - 'ocean_double_sigma_coordinate')}, - 'vertical': {'standard_name': ('air_pressure', - 'height', - 'depth', - 'geopotential_height', - 'altitude', - 'height_above_geopotential_datum', - 'height_above_reference_ellipsoid', - 'height_above_mean_sea_level'), - 'positive': ('up', 'down'), - 'long_name': ('air_pressure', - 'height', - 'depth', - 'geopotential_height', - 'altitude', - 'height_above_geopotential_datum', - 'height_above_reference_ellipsoid', - 'height_above_mean_sea_level')}, - 'X': {'standard_name': ('projection_x_coordinate', - 'grid_longitude', - 'projection_x_angular_coordinate'), - '_CoordinateAxisType': ('GeoX',), - 'axis': ('X',), - 'cartesian_axis': ('X',), - 'grads_dim': ('x',), - 'long_name': ('projection_x_coordinate', - 'grid_longitude', - 'projection_x_angular_coordinate', - 'cell index along first dimension')}, - 'Y': {'standard_name': ('projection_y_coordinate', - 'grid_latitude', - 'projection_y_angular_coordinate'), - '_CoordinateAxisType': ('GeoY',), - 'axis': ('Y',), - 'cartesian_axis': ('Y',), - 'grads_dim': ('y',), - 'long_name': ('projection_y_coordinate', - 'grid_latitude', - 'projection_y_angular_coordinate', - 'cell index along second dimension')}, - 'T': {'standard_name': ('time',), - '_CoordinateAxisType': ('Time',), - 'axis': ('T',), - 'cartesian_axis': ('T',), - 'grads_dim': ('t',), - 'long_name': ('time',)}, - 'time': {'standard_name': ('time',), - '_CoordinateAxisType': ('Time',), - 'axis': ('T',), - 'cartesian_axis': ('T',), - 'grads_dim': ('t',), - 'long_name': ('time',)}} - - -media_types = {"httpserver_service": "application/x-netcdf", - "opendap_service": pystac.MediaType.HTML, - "wcs_service": pystac.MediaType.XML, - "wms_service": pystac.MediaType.XML, - "nccs_service": "application/x-netcdf", - "HTTPServer": "application/x-netcdf", - "OPENDAP": pystac.MediaType.HTML, - "NCML": pystac.MediaType.XML, - "WCS": pystac.MediaType.XML, - "ISO": pystac.MediaType.XML, - "WMS": pystac.MediaType.XML, - "NetcdfSubset": "application/x-netcdf", - } - -asset_roles = {"httpserver_service": ["data"], - "opendap_service": ["data"], - "wcs_service": ["data"], - "wms_service": ["visual"], - "nccs_service": ["data"], - "HTTPServer": ["data"], - "OPENDAP": ["data"], - "NCML": ["metadata"], - "WCS": ["data"], - "ISO": ["metadata"], - "WMS": ["visual"], - "NetcdfSubset": ["data"],} + cfmeta = attrs["groups"]["CFMetadata"]["attributes"] + + # Create pydantic STAC item + item = STACItem( + id=iid, + geometry=item_geometry_model(**ncattrs_to_geometry(attrs)), + bbox=ncattrs_to_bbox(attrs), + properties=item_props_datamodel( + start_datetime=cfmeta["time_coverage_start"], + end_datetime=cfmeta["time_coverage_end"], + **attrs["attributes"], + ), + datetime=None, + ) + + # Convert pydantic STAC item to a PySTAC Item + item = pystac.Item(**json.loads(item.model_dump_json(by_alias=True))) + + root = attrs["access_urls"] + + for name, url in root.items(): + name = str(name) # converting name from siphon.catalog.CaseInsensitiveStr to str + asset = pystac.Asset(href=url, media_type=media_types.get(name), roles=asset_roles.get(name)) + + item.add_asset(name, asset) + + item.add_link(magpie_resource_link(root["HTTPServer"])) + + return item + + +asset_name_remaps = { + "httpserver_service": "HTTPServer", + "opendap_service": "OPENDAP", + "wcs_service": "WCS", + "wms_service": "WMS", + "nccs_service": "NetcdfSubset", +} + +media_types = { + "HTTPServer": "application/x-netcdf", + "OPENDAP": pystac.MediaType.HTML, + "WCS": pystac.MediaType.XML, + "WMS": pystac.MediaType.XML, + "NetcdfSubset": "application/x-netcdf", +} + +asset_roles = { + "HTTPServer": ["data"], + "OPENDAP": ["data"], + "WCS": ["data"], + "WMS": ["visual"], + "NetcdfSubset": ["data"], +} diff --git a/implementations/CMIP6-UofT/add_CMIP6.py b/implementations/CMIP6-UofT/add_CMIP6.py deleted file mode 100644 index 4fdea40..0000000 --- a/implementations/CMIP6-UofT/add_CMIP6.py +++ /dev/null @@ -1,182 +0,0 @@ -import logging -import hashlib -from datetime import datetime -from typing import Any, Dict, List, Literal, MutableMapping -from colorlog import ColoredFormatter -import argparse -import pyessv -from pydantic import AnyHttpUrl, BaseModel, Field, FieldValidationInfo, field_validator -from pystac.extensions.datacube import DatacubeExtension - -from STACpopulator import STACpopulatorBase -from STACpopulator.extensions import cmip6 -from STACpopulator.input import THREDDSLoader -from STACpopulator.stac_utils import ItemProperties -from STACpopulator.stac_utils import collection2literal, CFJsonItem - - -LOGGER = logging.getLogger(__name__) -LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" -formatter = ColoredFormatter(LOGFORMAT) -stream = logging.StreamHandler() -stream.setFormatter(formatter) -LOGGER.addHandler(stream) -LOGGER.setLevel(logging.INFO) -LOGGER.propagate = False - -# CMIP6 controlled vocabulary (CV) -CV = pyessv.WCRP.CMIP6 - -# Enum classes built from the pyessv' CV -ActivityID = collection2literal(CV.activity_id) -ExperimentID = collection2literal(CV.experiment_id) -Frequency = collection2literal(CV.frequency) -GridLabel = collection2literal(CV.grid_label) -InstitutionID = collection2literal(CV.institution_id) -# Member = collection2literal(CV.member_id) # This is empty -NominalResolution = collection2literal(CV.nominal_resolution) -Realm = collection2literal(CV.realm) -SourceID = collection2literal(CV.source_id) -SourceType = collection2literal(CV.source_type) -SubExperimentID = collection2literal(CV.sub_experiment_id) -TableID = collection2literal(CV.table_id) -# Variable = collection2literal(CV.variable_id) # This is empty - - -class Properties(ItemProperties, validate_assignment=True): - """Data model for CMIP6 Controlled Vocabulary.""" - - Conventions: str = Field(..., serialization_alias="cmip6:Conventions") - activity_id: ActivityID = Field(..., serialization_alias="cmip6:activity_id") - creation_date: datetime = Field(..., serialization_alias="cmip6:creation_date") - data_specs_version: str = Field(..., serialization_alias="cmip6:data_specs_version") - experiment: str = Field(..., serialization_alias="cmip6:experiment") - experiment_id: ExperimentID = Field(..., serialization_alias="cmip6:experiment_id") - frequency: Frequency = Field(..., serialization_alias="cmip6:frequency") - further_info_url: AnyHttpUrl = Field(..., serialization_alias="cmip6:further_info_url") - grid_label: GridLabel = Field(..., serialization_alias="cmip6:grid_label") - institution: str = Field(..., serialization_alias="cmip6:institution") - institution_id: InstitutionID = Field(..., serialization_alias="cmip6:institution_id") - nominal_resolution: NominalResolution = Field(..., serialization_alias="cmip6:nominal_resolution") - realm: List[Realm] = Field(..., serialization_alias="cmip6:realm") - source: str = Field(..., serialization_alias="cmip6:source") - source_id: SourceID = Field(..., serialization_alias="cmip6:source_id") - source_type: List[SourceType] = Field(..., serialization_alias="cmip6:source_type") - sub_experiment: str | Literal["none"] = Field(..., serialization_alias="cmip6:sub_experiment") - sub_experiment_id: SubExperimentID | Literal["none"] = Field(..., serialization_alias="cmip6:sub_experiment_id") - table_id: TableID = Field(..., serialization_alias="cmip6:table_id") - variable_id: str = Field(..., serialization_alias="cmip6:variable_id") - variant_label: str = Field(..., serialization_alias="cmip6:variant_label") - initialization_index: int = Field(..., serialization_alias="cmip6:initialization_index") - physics_index: int = Field(..., serialization_alias="cmip6:physics_index") - realization_index: int = Field(..., serialization_alias="cmip6:realization_index") - forcing_index: int = Field(..., serialization_alias="cmip6:forcing_index") - tracking_id: str = Field(..., serialization_alias="cmip6:tracking_id") - version: str = Field("", serialization_alias="cmip6:version") - product: str = Field(..., serialization_alias="cmip6:product") - license: str = Field(..., serialization_alias="cmip6:license") - grid: str = Field(..., serialization_alias="cmip6:grid") - mip_era: str = Field(..., serialization_alias="cmip6:mip_era") - - @field_validator("initialization_index", "physics_index", "realization_index", "forcing_index", mode="before") - @classmethod - def first_item(cls, v: list, info: FieldValidationInfo): - """Pick single item from list.""" - assert len(v) == 1, f"{info.field_name} must have one item only." - return v[0] - - @field_validator("realm", "source_type", mode="before") - @classmethod - def split(cls, v: str, info: FieldValidationInfo): - """Split string into list.""" - return v.split(" ") - - @field_validator("version") - @classmethod - def validate_version(cls, v: str, info: FieldValidationInfo): - assert v[0] == "v", "Version string should begin with a lower case 'v'" - assert v[1:].isdigit(), "All characters in version string, except first, should be digits" - return v - - -def make_cmip6_item_id(attrs: MutableMapping[str, Any]) -> str: - """Return a unique ID for CMIP6 data item.""" - keys = [ - "activity_id", - "institution_id", - "source_id", - "experiment_id", - "variant_label", - "table_id", - "variable_id", - "grid_label", - ] - name = "_".join(attrs[k] for k in keys) - return name - return hashlib.md5(name.encode("utf-8")).hexdigest() - - -class CMIP6populator(STACpopulatorBase): - def __init__(self, stac_host: str, thredds_catalog_url: str, config_filename: str) -> None: - """Constructor - - :param stac_host: URL to the STAC API - :type stac_host: str - :param thredds_catalog_url: the URL to the THREDDS catalog to ingest - :type thredds_catalog_url: str - :param config_filename: Yaml file containing the information about the collection to populate - :type config_filename: str - """ - - data_loader = THREDDSLoader(thredds_catalog_url) - self.props_model = Properties - super().__init__(stac_host, data_loader, config_filename) - - def handle_ingestion_error(self, error: str, item_name: str, item_data: MutableMapping[str, Any]): - pass - - def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: - """Creates the STAC item. - - :param item_name: name of the STAC item. Interpretation of name is left to the input loader implementation - :type item_name: str - :param item_data: dictionary like representation of all information on the item - :type item_data: MutableMapping[str, Any] - :return: _description_ - :rtype: MutableMapping[str, Any] - """ - iid = make_cmip6_item_id(item_data["attributes"]) - - obj = CFJsonItem(iid, item_data, self.props_model) - - # Add CMIP6 extension - try: - cmip6_ext = cmip6.CMIP6Extension.ext(obj.item, add_if_missing=True) - cmip6_ext.apply(item_data["attributes"]) - except: - LOGGER.warning(f"Failed to add CMIP6 extension to item {item_name}") - - # Add datacube extension - try: - dc_ext = DatacubeExtension.ext(obj.item, add_if_missing=True) - dc_ext.apply(dimensions=obj.dimensions(), variables=obj.variables()) - except: - LOGGER.warning(f"Failed to add Datacube extension to item {item_name}") - - return obj.item.to_dict() - - def validate_stac_item_cv(self, data: MutableMapping[str, Any]) -> bool: - # Validation is done at the item creating stage, using the Properties class. - return True - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(prog="CMIP6 STAC populator") - parser.add_argument("stac_host", type=str, help="STAC API address") - parser.add_argument("thredds_catalog_URL", type=str, help="URL to the CMIP6 THREDDS catalog") - parser.add_argument("config_file", type=str, help="Name of the configuration file") - - args = parser.parse_args() - LOGGER.info(f"Arguments to call: {args}") - c = CMIP6populator(args.stac_host, args.thredds_catalog_URL, args.config_file) - c.ingest() diff --git a/tests/ref.json b/tests/ref.json new file mode 100644 index 0000000..f3b8c23 --- /dev/null +++ b/tests/ref.json @@ -0,0 +1,124 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "ScenarioMIP_CCCma_CanESM5_ssp245_r13i1p2f1_SImon_siconc_gn", + "properties": { + "start_datetime": "2019-12-06T12:00:00Z", + "end_datetime": "2020-11-04T12:00:00Z", + "datetime": null, + "cmip6:Conventions": "CF-1.7 CMIP-6.2", + "cmip6:activity_id": "ScenarioMIP", + "cmip6:creation_date": "2019-09-25T23:01:33Z", + "cmip6:data_specs_version": "01.00.30", + "cmip6:experiment": "update of RCP4.5 based on SSP2", + "cmip6:experiment_id": "ssp245", + "cmip6:frequency": "mon", + "cmip6:further_info_url": "https://furtherinfo.es-doc.org/CMIP6.CCCma.CanESM5.ssp245.none.r13i1p2f1", + "cmip6:grid_label": "gn", + "cmip6:institution": "Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada, Victoria, BC V8P 5C2, Canada", + "cmip6:institution_id": "CCCma", + "cmip6:nominal_resolution": "100 km", + "cmip6:realm": [ + "seaIce" + ], + "cmip6:source": "CanESM5 (2019): \naerosol: interactive\natmos: CanAM5 (T63L49 native atmosphere, T63 Linear Gaussian Grid; 128 x 64 longitude/latitude; 49 levels; top level 1 hPa)\natmosChem: specified oxidants for aerosols\nland: CLASS3.6/CTEM1.2\nlandIce: specified ice sheets\nocean: NEMO3.4.1 (ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m)\nocnBgchem: Canadian Model of Ocean Carbon (CMOC); NPZD ecosystem with OMIP prescribed carbonate chemistry\nseaIce: LIM2", + "cmip6:source_id": "CanESM5", + "cmip6:source_type": [ + "AOGCM" + ], + "cmip6:sub_experiment": "none", + "cmip6:sub_experiment_id": "none", + "cmip6:table_id": "SImon", + "cmip6:variable_id": "siconc", + "cmip6:variant_label": "r13i1p2f1", + "cmip6:initialization_index": 1, + "cmip6:physics_index": 2, + "cmip6:realization_index": 13, + "cmip6:forcing_index": 1, + "cmip6:tracking_id": "hdl:21.14100/9e4f804b-c161-44fa-acd1-c2e94e220c95", + "cmip6:version": "v20190429", + "cmip6:product": "model-output", + "cmip6:license": "CMIP6 model data produced by The Government of Canada (Canadian Centre for Climate Modelling and Analysis, Environment and Climate Change Canada) is licensed under a Creative Commons Attribution ShareAlike 4.0 International License (https://creativecommons.org/licenses). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing CMIP6 output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded as a global attribute in this file) and at https:///pcmdi.llnl.gov/. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.", + "cmip6:grid": "ORCA1 tripolar grid, 1 deg with refinement to 1/3 deg within 20 degrees of the equator; 361 x 290 longitude/latitude; 45 vertical levels; top grid cell 0-6.19 m", + "cmip6:mip_era": "CMIP6" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 0.049800001084804535, + -78.39350128173828 + ], + [ + 0.049800001084804535, + 89.74176788330078 + ], + [ + 359.99493408203125, + 89.74176788330078 + ], + [ + 359.99493408203125, + -78.39350128173828 + ], + [ + 0.049800001084804535, + -78.39350128173828 + ] + ] + ] + }, + "links": [ + { + "rel": "source", + "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", + "type": "application/x-netcdf", + "title": "birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" + } + ], + "assets": { + "HTTPServer": { + "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/fileServer/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "OPENDAP": { + "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/dodsC/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc", + "type": "text/html", + "roles": [ + "data" + ] + }, + "WCS": { + "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wcs/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc?service=WCS&version=1.0.0&request=GetCapabilities", + "type": "application/xml", + "roles": [ + "data" + ] + }, + "WMS": { + "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/wms/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc?service=WMS&version=1.3.0&request=GetCapabilities", + "type": "application/xml", + "roles": [ + "visual" + ] + }, + "NetcdfSubset": { + "href": "https://pavics.ouranos.ca/twitcher/ows/proxy/thredds/ncss/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc/dataset.html", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + } + }, + "bbox": [ + 0.049800001084804535, + -78.39350128173828, + 359.99493408203125, + 89.74176788330078 + ], + "stac_extensions": [] +} \ No newline at end of file diff --git a/tests/test_client.py b/tests/test_client.py deleted file mode 100644 index b35f9ac..0000000 --- a/tests/test_client.py +++ /dev/null @@ -1,5 +0,0 @@ -from pystac_client import Client - -def test_cmip6(): - """Assume some CMIP6 has been ingested.""" - c = Client.open("http://localhost:8880/stac") diff --git a/tests/test_cmip6_extension.py b/tests/test_cmip6_extension.py deleted file mode 100644 index f899a33..0000000 --- a/tests/test_cmip6_extension.py +++ /dev/null @@ -1,20 +0,0 @@ -from STACpopulator.extensions import cmip6 -from STACpopulator.stac_utils import CFJsonItem -import xncml -from pathlib import Path -from pystac import Item, validation - -TEST_DATA = Path(__file__).parent / "data" - -def test_extension(): - ds = xncml.Dataset(TEST_DATA / "o3_Amon_GFDL-ESM4_historical_r1i1p1f1_gr1_185001-194912.xml") - attrs = ds.to_cf_dict() - - item = CFJsonItem("test", attrs).item - validation.validate(item) - - ext = cmip6.CMIP6Extension.ext(item, add_if_missing=True) - ext.apply(attrs["attributes"]) - assert "cmip6:realm" in item.properties - - diff --git a/tests/test_standalone_stac_item.py b/tests/test_standalone_stac_item.py new file mode 100644 index 0000000..d7239a8 --- /dev/null +++ b/tests/test_standalone_stac_item.py @@ -0,0 +1,30 @@ +import json + +import requests +import xncml + +from STACpopulator.implementations.CMIP6_UofT.add_CMIP6 import ( + CMIP6ItemProperties, + make_cmip6_item_id, +) +from STACpopulator.models import GeoJSONPolygon +from STACpopulator.stac_utils import STAC_item_from_metadata + + +def test_standalone_stac_item(): + url = ( + "https://pavics.ouranos.ca/twitcher/ows/proxy/" + "thredds/ncml/birdhouse/testdata/xclim/cmip6/sic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" + "?catalog=https%3A%2F%2Fpavics.ouranos.ca%2Ftwitcher%2Fows%2Fproxy%2F" + "thredds%2Fcatalog%2Fbirdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fcatalog.html" + "&dataset=birdhouse%2Ftestdata%2Fxclim%2Fcmip6%2Fsic_SImon_CCCma-CanESM5_ssp245_r13i1p2f1_2020.nc" + ) + + attrs = xncml.Dataset.from_text(requests.get(url).content).to_cf_dict() + stac_item_id = make_cmip6_item_id(attrs["attributes"]) + stac_item = STAC_item_from_metadata(stac_item_id, attrs, CMIP6ItemProperties, GeoJSONPolygon) + + with open("tests/ref.json", "r") as ff: + reference = json.load(ff) + + assert stac_item.to_dict() == reference