Skip to content

Commit

Permalink
added apply method to extension helpers
Browse files Browse the repository at this point in the history
  • Loading branch information
huard committed Oct 15, 2024
1 parent 8b583bb commit 8db0847
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 36 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ docker-build:
docker build "$(APP_ROOT)" -f "$(APP_ROOT)/docker/Dockerfile" -t "$(DOCKER_TAG)"

del_docker_volume: stophost
docker volume rm stac-populator_stac-db
docker volume rm docker_stac-db

resethost: del_docker_volume starthost

Expand Down
73 changes: 43 additions & 30 deletions STACpopulator/extensions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,15 @@
ncattrs_to_geometry,
)
import types
from pystac.extensions.datacube import DatacubeExtension
from STACpopulator.extensions.datacube import DataCubeHelper
from STACpopulator.extensions.thredds import THREDDSExtension, THREDDSHelper
from STACpopulator.extensions.thredds import THREDDSHelper

T = TypeVar("T", pystac.Collection, pystac.Item, pystac.Asset, item_assets.AssetDefinition)

LOGGER = logging.getLogger(__name__)


class DataModel(BaseModel):
class DataModelHelper(BaseModel):
"""Base class for dataset properties going into the catalog.
Subclass this with attributes.
Expand Down Expand Up @@ -94,6 +93,15 @@ def validate_jsonschema(cls, data):

return data

def apply(self, item, add_if_missing=False):
"""Add extension for the properties of the dataset to the STAC item.
The extension class is created dynamically from the properties.
"""
ExtSubCls = metacls_extension(self._prefix, schema_uri=str(self._schema_uri))
item_ext = ExtSubCls.ext(item, add_if_missing=add_if_missing)
item_ext.apply(self.model_dump(mode="json", by_alias=True))
return item


class THREDDSCatalogDataModel(BaseModel):
"""Base class ingesting attributes loaded by `THREDDSLoader` and creating a STAC item.
Expand All @@ -111,11 +119,16 @@ class THREDDSCatalogDataModel(BaseModel):
start_datetime: datetime
end_datetime: datetime

# Data from loader
data: dict

# Extensions classes
properties: DataModel
properties: DataModelHelper
datacube: DataCubeHelper
thredds: THREDDSHelper

extensions: list = ["properties", "datacube", "thredds"]

model_config = ConfigDict(populate_by_name=True, extra="ignore", arbitrary_types_allowed=True)

@classmethod
Expand All @@ -124,15 +137,34 @@ def from_data(cls, data):
"""
# This is where we match the Loader's output to the STAC item and extensions inputs. If we had multiple
# loaders, that's probably the only thing that would be different between them.
return cls(start_datetime=data["groups"]["CFMetadata"]["attributes"]["time_coverage_start"],
return cls(data=data,
start_datetime=data["groups"]["CFMetadata"]["attributes"]["time_coverage_start"],
end_datetime=data["groups"]["CFMetadata"]["attributes"]["time_coverage_end"],
geometry=ncattrs_to_geometry(data),
bbox=ncattrs_to_bbox(data),
properties=data["attributes"],
datacube=DataCubeHelper(data), # A bit clunky to avoid breaking CMIP6
thredds=THREDDSHelper(data["access_urls"])
)

@model_validator(mode="before")
@classmethod
def properties_helper(cls, data):
"""Instantiate the properties helper."""
data["properties"] = data['data']['attributes']
return data

@model_validator(mode="before")
@classmethod
def datacube_helper(cls, data):
"""Instantiate the DataCubeHelper."""
data["datacube"] = DataCubeHelper(data['data'])
return data

@model_validator(mode="before")
@classmethod
def thredds_helper(cls, data):
"""Instantiate the THREDDSHelper."""
data["thredds"] = THREDDSHelper(data['data']["access_urls"])
return data

@property
def uid(self) -> str:
"""Return a unique ID. When subclassing, use a combination of properties uniquely identifying a dataset."""
Expand All @@ -153,9 +185,9 @@ def stac_item(self) -> "pystac.Item":
datetime=None,
)

self.metadata_extension(item)
self.datacube_extension(item)
self.thredds_extension(item)
# Add extensions
for ext in self.extensions:
getattr(self, ext).apply(item)

try:
item.validate()
Expand All @@ -164,25 +196,6 @@ def stac_item(self) -> "pystac.Item":

return json.loads(json.dumps(item.to_dict()))

def metadata_extension(self, item):
"""Add extension for the properties of the dataset to the STAC item.
The extension class is created dynamically from the properties.
"""
ExtSubCls = metacls_extension(self.properties._prefix, schema_uri=str(self.properties._schema_uri))
item_ext = ExtSubCls.ext(item, add_if_missing=False)
item_ext.apply(self.properties.model_dump(mode="json", by_alias=True))
return item

def datacube_extension(self, item):
"""Add datacube extension to the STAC item."""
dc_ext = DatacubeExtension.ext(item, add_if_missing=True)
dc_ext.apply(dimensions=self.datacube.dimensions, variables=self.datacube.variables)

def thredds_extension(self, item):
"""Add THREDDS extension to the STAC item."""
thredds_ext = THREDDSExtension.ext(item, add_if_missing=False)
thredds_ext.apply(self.thredds.services, self.thredds.links)


def metacls_extension(name, schema_uri):
"""Create an extension class dynamically from the properties."""
Expand Down
10 changes: 6 additions & 4 deletions STACpopulator/extensions/cordex6.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from importlib import reload
import STACpopulator.extensions.base
reload(STACpopulator.extensions.base)
from STACpopulator.extensions.base import THREDDSCatalogDataModel, DataModel
from STACpopulator.extensions.base import THREDDSCatalogDataModel, DataModelHelper


# This is generated using datamodel-codegen + manual edits
class CordexCmip6(DataModel):
class CordexCmip6(DataModelHelper):
# Fields from schema
activity_id: str = Field(..., alias='cordex6:activity_id')
contact: str = Field(..., alias='cordex6:contact')
Expand Down Expand Up @@ -65,8 +65,10 @@ def uid(self) -> str:
"variable_id",
"domain_id",
]
name = "_".join(getattr(self.properties, k) for k in keys)
return name
values = [getattr(self.properties, k) for k in keys]
values.append(self.start_datetime.strftime("%Y%m%d"))
values.append(self.end_datetime.strftime("%Y%m%d"))
return "_".join(values)



9 changes: 8 additions & 1 deletion STACpopulator/extensions/datacube.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import functools
from typing import Any, MutableMapping, MutableSequence

from pystac.extensions.datacube import Dimension, DimensionType, Variable, VariableType
from pystac.extensions.datacube import Dimension, DimensionType, Variable, VariableType, DatacubeExtension

from STACpopulator.stac_utils import ncattrs_to_bbox

Expand Down Expand Up @@ -248,3 +248,10 @@ def temporal_extent(self) -> MutableSequence[str]:
start_datetime = cfmeta["time_coverage_start"]
end_datetime = cfmeta["time_coverage_end"]
return [start_datetime, end_datetime]

def apply(self, item, add_if_missing:bool = True):
"""Apply the Datacube extension to an item."""
ext = DatacubeExtension.ext(item, add_if_missing=add_if_missing)
ext.apply(dimensions=self.dimensions, variables=self.variables)
return item

6 changes: 6 additions & 0 deletions STACpopulator/extensions/thredds.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ def links(self) -> list[pystac.Link]:
link = magpie_resource_link(url)
return [link]

def apply(self, item, add_if_missing:bool = False):
"""Apply the THREDDS extension to an item."""
ext = THREDDSExtension.ext(item, add_if_missing=add_if_missing)
ext.apply(services=self.services, links=self.links)
return item


# TODO: Validate services links exist ?
# @field_validator("access_urls")
Expand Down

0 comments on commit 8db0847

Please sign in to comment.