Skip to content

Commit

Permalink
EP-3981 #114 add more DriverVectorCube tests
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Apr 20, 2022
1 parent 1b0902e commit 9d17693
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 18 deletions.
37 changes: 26 additions & 11 deletions openeo_driver/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,18 @@ class DriverVectorCube:
These components are "joined" on the GeoPandas dataframe's index and DataArray first dimension
"""
DIM_GEOMETRIES = "geometries"

def __init__(self, geometries: gpd.GeoDataFrame, cube: Optional[xarray.DataArray] = None):
FLATTEN_PREFIX = "vc"

def __init__(
self, geometries: gpd.GeoDataFrame, cube: Optional[xarray.DataArray] = None,
flatten_prefix: str = FLATTEN_PREFIX
):
"""
:param geometries:
:param cube:
:param flatten_prefix: prefix for column/field/property names when flattening the cube
"""
# TODO #114 EP-3981: lazy loading (like DelayedVector)?
if cube is not None:
if cube.dims[0] != self.DIM_GEOMETRIES:
Expand All @@ -166,11 +176,12 @@ def __init__(self, geometries: gpd.GeoDataFrame, cube: Optional[xarray.DataArray
raise VectorCubeError("Incompatible vector cube components")
self._geometries = geometries
self._cube = cube
self._flatten_prefix = flatten_prefix

def with_cube(self, cube: xarray.DataArray) -> "DriverVectorCube":
def with_cube(self, cube: xarray.DataArray, flatten_prefix: str = FLATTEN_PREFIX) -> "DriverVectorCube":
"""Create new vector cube with same geometries but new cube"""
log.info(f"Creating vector cube with new cube {cube.name!r}")
return DriverVectorCube(geometries=self._geometries, cube=cube)
return DriverVectorCube(geometries=self._geometries, cube=cube, flatten_prefix=flatten_prefix)

@classmethod
def from_fiona(cls, paths: List[str], driver: str, options: dict):
Expand All @@ -189,23 +200,24 @@ def _as_geopandas_df(self) -> gpd.GeoDataFrame:
assert self._cube.dims[0] == self.DIM_GEOMETRIES
# TODO: better way to combine cube with geometries
# Flatten multiple (non-geometry) dimensions from cube to new properties in geopandas dataframe
prefix = self._cube.attrs.get("prefix", "cube")
if self._cube.dims[1:]:
stacked = self._cube.stack(prop=self._cube.dims[1:])
log.info(f"Flattened cube component of vector cube to {stacked.shape[1]} properties")
for p in stacked.indexes["prop"]:
name = "~".join(str(x) for x in [prefix] + list(p))
name = "~".join(str(x) for x in [self._flatten_prefix] + list(p))
# TODO: avoid column collisions?
df[name] = stacked.sel(prop=p)
else:
df[prefix] = self._cube
df[self._flatten_prefix] = self._cube

return df

def to_geojson(self):
return shapely.geometry.mapping(self._as_geopandas_df())

def write_assets(self, directory: Union[str, Path], format: str, options: Optional[dict] = None) -> Dict[str, StacAsset]:
def write_assets(
self, directory: Union[str, Path], format: str, options: Optional[dict] = None
) -> Dict[str, StacAsset]:
directory = ensure_dir(directory)
format_info = IOFORMATS.get(format)
# TODO: check if format can be used for vector data?
Expand Down Expand Up @@ -243,13 +255,16 @@ def to_multipolygon(self) -> shapely.geometry.MultiPolygon:
return shapely.ops.unary_union(self._geometries.geometry)

def get_bounding_box(self) -> Tuple[float, float, float, float]:
return self._geometries.total_bounds
return tuple(self._geometries.total_bounds)

def get_geometries(self) -> Sequence[shapely.geometry.base.BaseGeometry]:
return self._geometries.geometry

def get_geometries_index(self) -> pd.Index:
return self._geometries.index
def get_xarray_cube_basics(self) -> Tuple[tuple, dict]:
"""Get initial dims/coords for xarray DataArray construction"""
dims = (self.DIM_GEOMETRIES,)
coords = {self.DIM_GEOMETRIES: self._geometries.index.to_list()}
return dims, coords


class DriverMlModel:
Expand Down
10 changes: 3 additions & 7 deletions openeo_driver/dummy/dummy_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,7 @@ def assert_polygon_sequence(geometries: Union[Sequence, BaseMultipartGeometry]):
# TODO #114 EP-3981 normalize to vector cube and preserve original properties
if isinstance(geometries, DriverVectorCube):
# Build dummy aggregation data cube
dims = (DriverVectorCube.DIM_GEOMETRIES,)
# TODO: use something else than the geopandas dataframe's index?
coords = {DriverVectorCube.DIM_GEOMETRIES: geometries.get_geometries_index().to_list()}
dims, coords = geometries.get_xarray_cube_basics()
if self.metadata.has_temporal_dimension():
dims += (self.metadata.temporal_dimension.name,)
coords[self.metadata.temporal_dimension.name] = ["2015-07-06T00:00:00", "2015-08-22T00:00:00"]
Expand All @@ -233,10 +231,8 @@ def assert_polygon_sequence(geometries: Union[Sequence, BaseMultipartGeometry]):
coords[self.metadata.band_dimension.name] = self.metadata.band_names
shape = [len(coords[d]) for d in dims]
data = numpy.arange(numpy.prod(shape)).reshape(shape)
cube = xarray.DataArray(
data=data, dims=dims, coords=coords, name="aggregate_spatial", attrs={"prefix": "agg"}
)
return geometries.with_cube(cube=cube)
cube = xarray.DataArray(data=data, dims=dims, coords=coords, name="aggregate_spatial")
return geometries.with_cube(cube=cube, flatten_prefix="agg")
elif isinstance(geometries, str):
geometries = [geometry for geometry in DelayedVector(geometries).geometries]
assert_polygon_sequence(geometries)
Expand Down
99 changes: 99 additions & 0 deletions tests/test_vectorcube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import geopandas as gpd
import pytest
import xarray
from shapely.geometry import Polygon, MultiPolygon

from openeo_driver.datacube import DriverVectorCube
from openeo_driver.testing import DictSubSet
from .data import get_path


class TestDriverVectorCube:

@pytest.fixture
def gdf(self) -> gpd.GeoDataFrame:
"""Fixture for a simple GeoPandas DataFrame from file"""
path = str(get_path("geojson/FeatureCollection02.json"))
df = gpd.read_file(path)
return df

def test_basic(self, gdf):
vc = DriverVectorCube(gdf)
assert vc.get_bounding_box() == (1, 1, 5, 4)

def test_to_multipolygon(self, gdf):
vc = DriverVectorCube(gdf)
mp = vc.to_multipolygon()
assert isinstance(mp, MultiPolygon)
assert len(mp) == 2
assert mp.equals(MultiPolygon([
Polygon([(1, 1), (2, 3), (3, 1), (1, 1)]),
Polygon([(4, 2), (5, 4), (3, 4), (4, 2)]),
]))

def test_get_geometries(self, gdf):
vc = DriverVectorCube(gdf)
geometries = vc.get_geometries()
assert len(geometries) == 2
expected_geometries = [
Polygon([(1, 1), (2, 3), (3, 1), (1, 1)]),
Polygon([(4, 2), (5, 4), (3, 4), (4, 2)]),
]
for geometry, expected in zip(geometries, expected_geometries):
assert geometry.equals(expected)

def test_to_geojson(self, gdf):
vc = DriverVectorCube(gdf)
assert vc.to_geojson() == DictSubSet({
"type": "FeatureCollection",
"features": [
DictSubSet({
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": (((1, 1), (3, 1), (2, 3), (1, 1)),)},
"properties": {"id": "first", "pop": 1234},
}),
DictSubSet({
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": (((4, 2), (5, 4), (3, 4), (4, 2)),)},
"properties": {"id": "second", "pop": 5678},
}),
]
})

def test_with_cube_to_geojson(self, gdf):
vc1 = DriverVectorCube(gdf)
dims, coords = vc1.get_xarray_cube_basics()
dims += ("bands",)
coords["bands"] = ["red", "green"]
cube = xarray.DataArray(data=[[1, 2], [3, 4]], dims=dims, coords=coords)
vc2 = vc1.with_cube(cube, flatten_prefix="bandz")
assert vc1.to_geojson() == DictSubSet({
"type": "FeatureCollection",
"features": [
DictSubSet({
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": (((1, 1), (3, 1), (2, 3), (1, 1)),)},
"properties": {"id": "first", "pop": 1234},
}),
DictSubSet({
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": (((4, 2), (5, 4), (3, 4), (4, 2)),)},
"properties": {"id": "second", "pop": 5678},
}),
]
})
assert vc2.to_geojson() == DictSubSet({
"type": "FeatureCollection",
"features": [
DictSubSet({
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": (((1, 1), (3, 1), (2, 3), (1, 1)),)},
"properties": {"id": "first", "pop": 1234, "bandz~red": 1, "bandz~green": 2},
}),
DictSubSet({
"type": "Feature",
"geometry": {"type": "Polygon", "coordinates": (((4, 2), (5, 4), (3, 4), (4, 2)),)},
"properties": {"id": "second", "pop": 5678, "bandz~red": 3, "bandz~green": 4},
}),
]
})

0 comments on commit 9d17693

Please sign in to comment.