Skip to content

Commit

Permalink
Issue #457 basic metadata handling in VectoCube
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Aug 8, 2023
1 parent 3c50154 commit febdeca
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 22 deletions.
16 changes: 14 additions & 2 deletions openeo/metadata.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import warnings
from collections import namedtuple
from typing import List, Union, Tuple, Callable, Any
Expand All @@ -6,6 +7,9 @@
from openeo.internal.jupyter import render_component


_log = logging.getLogger(__name__)


class MetadataException(Exception):
pass

Expand Down Expand Up @@ -191,6 +195,10 @@ class CollectionMetadata:
"""

# TODO: "CollectionMetadata" is also used as "cube metadata" where the link to original collection
# might be lost (if any). Better separation between rich EO raster collection metadata and
# essential cube metadata? E.g.: also thing of vector cubes.

def __init__(self, metadata: dict, dimensions: List[Dimension] = None):
# Original collection metadata (actual cube metadata might be altered through processes)
self._orig_metadata = metadata
Expand Down Expand Up @@ -317,11 +325,15 @@ def extent(self) -> dict:
def dimension_names(self) -> List[str]:
return list(d.name for d in self._dimensions)

def assert_valid_dimension(self, dimension: str) -> str:
def assert_valid_dimension(self, dimension: str, just_warn: bool = False) -> str:
"""Make sure given dimension name is valid."""
names = self.dimension_names()
if dimension not in names:
raise ValueError("Invalid dimension {d!r}. Should be one of {n}".format(d=dimension, n=names))
msg = f"Invalid dimension {dimension!r}. Should be one of {names}"
if just_warn:
_log.warning(msg)
else:
raise ValueError(msg)
return dimension

def has_band_dimension(self) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion openeo/rest/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1050,10 +1050,12 @@ def vectorcube_from_paths(
.. versionadded:: 0.14.0
"""
# TODO #457 deprecate this in favor of `load_url` and standard support for `load_uploaded_files`
graph = PGNode(
"load_uploaded_files",
arguments=dict(paths=paths, format=format, options=options),
)
# TODO: load_uploaded_files might also return a raster data cube. Determine this based on format?
return VectorCube(graph=graph, connection=self)

def datacube_from_process(self, process_id: str, namespace: Optional[str] = None, **kwargs) -> DataCube:
Expand Down Expand Up @@ -1336,7 +1338,6 @@ def load_geojson(
.. versionadded:: 0.22.0
"""

return VectorCube.load_geojson(connection=self, data=data, properties=properties)

@openeo_process
Expand Down
3 changes: 2 additions & 1 deletion openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -907,7 +907,7 @@ def aggregate_spatial(
),
),
connection=self._connection,
# TODO: metadata?
# TODO: metadata? And correct dimension of created vector cube? #457
)

@openeo_process
Expand Down Expand Up @@ -1723,6 +1723,7 @@ def raster_to_vector(self) -> VectorCube:
:return: a :py:class:`~openeo.rest.vectorcube.VectorCube`
"""
pg_node = PGNode(process_id="raster_to_vector", arguments={"data": self})
# TODO: properly update metadata (e.g. "geometry" dimension) related to #457
return VectorCube(pg_node, connection=self._connection, metadata=self.metadata)

####VIEW methods #######
Expand Down
44 changes: 29 additions & 15 deletions openeo/rest/vectorcube.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from openeo.internal.documentation import openeo_process
from openeo.internal.graph_building import PGNode
from openeo.internal.warnings import legacy_alias
from openeo.metadata import CollectionMetadata
from openeo.metadata import CollectionMetadata, Dimension
from openeo.rest._datacube import THIS, UDF, _ProcessGraphAbstraction, build_child_callback
from openeo.rest.job import BatchJob
from openeo.rest.mlmodel import MlModel
Expand All @@ -32,16 +32,26 @@ class VectorCube(_ProcessGraphAbstraction):

def __init__(self, graph: PGNode, connection: 'Connection', metadata: CollectionMetadata = None):
super().__init__(pgnode=graph, connection=connection)
# TODO: does VectorCube need CollectionMetadata?
self.metadata = metadata
self.metadata = metadata or self._build_metadata()

@classmethod
def _build_metadata(cls, add_properties: bool = False) -> CollectionMetadata:
"""Helper to build a (minimal) `CollectionMetadata` object."""
# Vector cubes have at least a "geometry" dimension
dimensions = [Dimension(name="geometry", type="geometry")]
if add_properties:
dimensions.append(Dimension(name="properties", type="other"))
# TODO: use a more generic metadata container than "collection" metadata
return CollectionMetadata(metadata={}, dimensions=dimensions)

def process(
self,
process_id: str,
arguments: dict = None,
metadata: Optional[CollectionMetadata] = None,
namespace: Optional[str] = None,
**kwargs) -> 'VectorCube':
self,
process_id: str,
arguments: dict = None,
metadata: Optional[CollectionMetadata] = None,
namespace: Optional[str] = None,
**kwargs,
) -> "VectorCube":
"""
Generic helper to create a new DataCube by applying a process.
Expand Down Expand Up @@ -79,7 +89,7 @@ def load_geojson(
.. versionadded:: 0.22.0
"""
# TODO: unify with `DataCube._get_geometry_argument`
# TODO: also support client side fetching of GeoJSON from URL?
# TODO #457 also support client side fetching of GeoJSON from URL?
if isinstance(data, str) and data.strip().startswith("{"):
# Assume JSON dump
geometry = json.loads(data)
Expand All @@ -96,10 +106,12 @@ def load_geojson(
geometry = data
else:
raise ValueError(data)
# TODO #457 client side verification of GeoJSON construct: valid type, valid structure, presence of CRS, ...?

pg = PGNode(process_id="load_geojson", data=geometry, properties=properties or [])
# TODO #424 add basic metadata
return cls(graph=pg, connection=connection)
# TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448
metadata = cls._build_metadata(add_properties=True)
return cls(graph=pg, connection=connection, metadata=metadata)

@classmethod
@openeo_process
Expand All @@ -121,8 +133,9 @@ def load_url(
.. versionadded:: 0.22.0
"""
pg = PGNode(process_id="load_url", arguments=dict_no_none(url=url, format=format, options=options))
# TODO #424 add basic metadata
return cls(graph=pg, connection=connection)
# TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448
metadata = cls._build_metadata(add_properties=True)
return cls(graph=pg, connection=connection, metadata=metadata)

@openeo_process
def run_udf(
Expand Down Expand Up @@ -446,7 +459,8 @@ def apply_dimension(
{
"data": THIS,
"process": process,
"dimension": dimension, # TODO #424: self.metadata.assert_valid_dimension(dimension)
# TODO: drop `just_warn`?
"dimension": self.metadata.assert_valid_dimension(dimension, just_warn=True),
"target_dimension": target_dimension,
"context": context,
}
Expand Down
18 changes: 15 additions & 3 deletions tests/rest/datacube/test_vectorcube.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,17 @@ def test_load_url(con100, dummy_backend):
}


def test_apply_dimension(con100, dummy_backend):
@pytest.mark.parametrize(
["dimension", "expect_warning"],
[
("geometry", False),
("geometries", True),
("wibbles", True),
],
)
def test_apply_dimension(con100, dummy_backend, dimension, expect_warning, caplog):
vc = con100.load_geojson({"type": "Point", "coordinates": [1, 2]})
result = vc.apply_dimension("sort", dimension="geometries")
result = vc.apply_dimension("sort", dimension=dimension)
result.execute()
assert dummy_backend.get_pg() == {
"loadgeojson1": {
Expand All @@ -268,7 +276,7 @@ def test_apply_dimension(con100, dummy_backend):
"process_id": "apply_dimension",
"arguments": {
"data": {"from_node": "loadgeojson1"},
"dimension": "geometries",
"dimension": dimension,
"process": {
"process_graph": {
"sort1": {
Expand All @@ -282,3 +290,7 @@ def test_apply_dimension(con100, dummy_backend):
"result": True,
},
}

assert (
f"Invalid dimension {dimension!r}. Should be one of ['geometry', 'properties']" in caplog.messages
) == expect_warning

0 comments on commit febdeca

Please sign in to comment.