From febdecaf5a188075fa87cc33b790b0b80f1c3709 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Tue, 8 Aug 2023 22:52:18 +0200 Subject: [PATCH] Issue #457 basic metadata handling in VectoCube --- openeo/metadata.py | 16 ++++++++-- openeo/rest/connection.py | 3 +- openeo/rest/datacube.py | 3 +- openeo/rest/vectorcube.py | 44 +++++++++++++++++--------- tests/rest/datacube/test_vectorcube.py | 18 +++++++++-- 5 files changed, 62 insertions(+), 22 deletions(-) diff --git a/openeo/metadata.py b/openeo/metadata.py index dd1e294d9..4e9dbf36b 100644 --- a/openeo/metadata.py +++ b/openeo/metadata.py @@ -1,3 +1,4 @@ +import logging import warnings from collections import namedtuple from typing import List, Union, Tuple, Callable, Any @@ -6,6 +7,9 @@ from openeo.internal.jupyter import render_component +_log = logging.getLogger(__name__) + + class MetadataException(Exception): pass @@ -191,6 +195,10 @@ class CollectionMetadata: """ + # TODO: "CollectionMetadata" is also used as "cube metadata" where the link to original collection + # might be lost (if any). Better separation between rich EO raster collection metadata and + # essential cube metadata? E.g.: also thing of vector cubes. + def __init__(self, metadata: dict, dimensions: List[Dimension] = None): # Original collection metadata (actual cube metadata might be altered through processes) self._orig_metadata = metadata @@ -317,11 +325,15 @@ def extent(self) -> dict: def dimension_names(self) -> List[str]: return list(d.name for d in self._dimensions) - def assert_valid_dimension(self, dimension: str) -> str: + def assert_valid_dimension(self, dimension: str, just_warn: bool = False) -> str: """Make sure given dimension name is valid.""" names = self.dimension_names() if dimension not in names: - raise ValueError("Invalid dimension {d!r}. Should be one of {n}".format(d=dimension, n=names)) + msg = f"Invalid dimension {dimension!r}. Should be one of {names}" + if just_warn: + _log.warning(msg) + else: + raise ValueError(msg) return dimension def has_band_dimension(self) -> bool: diff --git a/openeo/rest/connection.py b/openeo/rest/connection.py index 535212764..4b9ae6b11 100644 --- a/openeo/rest/connection.py +++ b/openeo/rest/connection.py @@ -1050,10 +1050,12 @@ def vectorcube_from_paths( .. versionadded:: 0.14.0 """ + # TODO #457 deprecate this in favor of `load_url` and standard support for `load_uploaded_files` graph = PGNode( "load_uploaded_files", arguments=dict(paths=paths, format=format, options=options), ) + # TODO: load_uploaded_files might also return a raster data cube. Determine this based on format? return VectorCube(graph=graph, connection=self) def datacube_from_process(self, process_id: str, namespace: Optional[str] = None, **kwargs) -> DataCube: @@ -1336,7 +1338,6 @@ def load_geojson( .. versionadded:: 0.22.0 """ - return VectorCube.load_geojson(connection=self, data=data, properties=properties) @openeo_process diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index dda00ee8c..e234e0b4a 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -907,7 +907,7 @@ def aggregate_spatial( ), ), connection=self._connection, - # TODO: metadata? + # TODO: metadata? And correct dimension of created vector cube? #457 ) @openeo_process @@ -1723,6 +1723,7 @@ def raster_to_vector(self) -> VectorCube: :return: a :py:class:`~openeo.rest.vectorcube.VectorCube` """ pg_node = PGNode(process_id="raster_to_vector", arguments={"data": self}) + # TODO: properly update metadata (e.g. "geometry" dimension) related to #457 return VectorCube(pg_node, connection=self._connection, metadata=self.metadata) ####VIEW methods ####### diff --git a/openeo/rest/vectorcube.py b/openeo/rest/vectorcube.py index 49c780fb2..c90f8a662 100644 --- a/openeo/rest/vectorcube.py +++ b/openeo/rest/vectorcube.py @@ -10,7 +10,7 @@ from openeo.internal.documentation import openeo_process from openeo.internal.graph_building import PGNode from openeo.internal.warnings import legacy_alias -from openeo.metadata import CollectionMetadata +from openeo.metadata import CollectionMetadata, Dimension from openeo.rest._datacube import THIS, UDF, _ProcessGraphAbstraction, build_child_callback from openeo.rest.job import BatchJob from openeo.rest.mlmodel import MlModel @@ -32,16 +32,26 @@ class VectorCube(_ProcessGraphAbstraction): def __init__(self, graph: PGNode, connection: 'Connection', metadata: CollectionMetadata = None): super().__init__(pgnode=graph, connection=connection) - # TODO: does VectorCube need CollectionMetadata? - self.metadata = metadata + self.metadata = metadata or self._build_metadata() + + @classmethod + def _build_metadata(cls, add_properties: bool = False) -> CollectionMetadata: + """Helper to build a (minimal) `CollectionMetadata` object.""" + # Vector cubes have at least a "geometry" dimension + dimensions = [Dimension(name="geometry", type="geometry")] + if add_properties: + dimensions.append(Dimension(name="properties", type="other")) + # TODO: use a more generic metadata container than "collection" metadata + return CollectionMetadata(metadata={}, dimensions=dimensions) def process( - self, - process_id: str, - arguments: dict = None, - metadata: Optional[CollectionMetadata] = None, - namespace: Optional[str] = None, - **kwargs) -> 'VectorCube': + self, + process_id: str, + arguments: dict = None, + metadata: Optional[CollectionMetadata] = None, + namespace: Optional[str] = None, + **kwargs, + ) -> "VectorCube": """ Generic helper to create a new DataCube by applying a process. @@ -79,7 +89,7 @@ def load_geojson( .. versionadded:: 0.22.0 """ # TODO: unify with `DataCube._get_geometry_argument` - # TODO: also support client side fetching of GeoJSON from URL? + # TODO #457 also support client side fetching of GeoJSON from URL? if isinstance(data, str) and data.strip().startswith("{"): # Assume JSON dump geometry = json.loads(data) @@ -96,10 +106,12 @@ def load_geojson( geometry = data else: raise ValueError(data) + # TODO #457 client side verification of GeoJSON construct: valid type, valid structure, presence of CRS, ...? pg = PGNode(process_id="load_geojson", data=geometry, properties=properties or []) - # TODO #424 add basic metadata - return cls(graph=pg, connection=connection) + # TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448 + metadata = cls._build_metadata(add_properties=True) + return cls(graph=pg, connection=connection, metadata=metadata) @classmethod @openeo_process @@ -121,8 +133,9 @@ def load_url( .. versionadded:: 0.22.0 """ pg = PGNode(process_id="load_url", arguments=dict_no_none(url=url, format=format, options=options)) - # TODO #424 add basic metadata - return cls(graph=pg, connection=connection) + # TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448 + metadata = cls._build_metadata(add_properties=True) + return cls(graph=pg, connection=connection, metadata=metadata) @openeo_process def run_udf( @@ -446,7 +459,8 @@ def apply_dimension( { "data": THIS, "process": process, - "dimension": dimension, # TODO #424: self.metadata.assert_valid_dimension(dimension) + # TODO: drop `just_warn`? + "dimension": self.metadata.assert_valid_dimension(dimension, just_warn=True), "target_dimension": target_dimension, "context": context, } diff --git a/tests/rest/datacube/test_vectorcube.py b/tests/rest/datacube/test_vectorcube.py index 5bf3496fb..ec2a552c5 100644 --- a/tests/rest/datacube/test_vectorcube.py +++ b/tests/rest/datacube/test_vectorcube.py @@ -255,9 +255,17 @@ def test_load_url(con100, dummy_backend): } -def test_apply_dimension(con100, dummy_backend): +@pytest.mark.parametrize( + ["dimension", "expect_warning"], + [ + ("geometry", False), + ("geometries", True), + ("wibbles", True), + ], +) +def test_apply_dimension(con100, dummy_backend, dimension, expect_warning, caplog): vc = con100.load_geojson({"type": "Point", "coordinates": [1, 2]}) - result = vc.apply_dimension("sort", dimension="geometries") + result = vc.apply_dimension("sort", dimension=dimension) result.execute() assert dummy_backend.get_pg() == { "loadgeojson1": { @@ -268,7 +276,7 @@ def test_apply_dimension(con100, dummy_backend): "process_id": "apply_dimension", "arguments": { "data": {"from_node": "loadgeojson1"}, - "dimension": "geometries", + "dimension": dimension, "process": { "process_graph": { "sort1": { @@ -282,3 +290,7 @@ def test_apply_dimension(con100, dummy_backend): "result": True, }, } + + assert ( + f"Invalid dimension {dimension!r}. Should be one of ['geometry', 'properties']" in caplog.messages + ) == expect_warning