Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/xarray refactoring #61

Closed
wants to merge 39 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
cd20268
remove unnecessary file
mathleur Jul 12, 2023
98c0db9
Merge branch 'feature/octahedral_grid_support' of github.com:ecmwf/po…
mathleur Jul 12, 2023
528d7fb
start FDB datacube
mathleur Jul 12, 2023
eb10f57
add FDB_datacube
mathleur Jul 13, 2023
6a0bf09
make FDB datacube work with tests glue functions
mathleur Jul 13, 2023
2bae21b
add the list_axes fdb function
mathleur Jul 27, 2023
1a19464
add FDB function to FDB datacube
mathleur Jul 28, 2023
3530301
make fdb datacube work with the pyfdb list_axes method
mathleur Aug 1, 2023
6d76e5a
black
mathleur Aug 1, 2023
96554a8
make FDB datacube work with mappers
mathleur Aug 2, 2023
dfdf999
add transformations class to handle merging of axes
mathleur Aug 2, 2023
dc524fb
avoid circular imports
mathleur Aug 2, 2023
07698a8
black
mathleur Aug 2, 2023
ecc3217
finish the merge transformation
mathleur Aug 2, 2023
7376ea8
trnasformations general structure
mathleur Aug 2, 2023
9bee5a6
fix factory for datacube transformation class
mathleur Aug 2, 2023
fd79088
restructure folders
mathleur Aug 3, 2023
fcb71ad
restructure datacube folder and finish generic transformation class a…
mathleur Aug 3, 2023
e10c45c
try to make generic transformations class work
mathleur Aug 3, 2023
7d241bc
finish creating a generic transformation class, but only works for da…
mathleur Aug 4, 2023
e9d7b98
remove merger transformation
mathleur Aug 4, 2023
3665e95
black
mathleur Aug 4, 2023
3de9482
add framework for cyclic transformation
mathleur Aug 4, 2023
2c16165
make merge transformation work
mathleur Aug 4, 2023
6c6a16b
add merge transformation without tests
mathleur Aug 4, 2023
0e5833a
add test
mathleur Aug 4, 2023
19d4382
make merge transformation work
mathleur Aug 4, 2023
6b5efb3
add reverse transformation and tests
mathleur Aug 7, 2023
664353b
make merge work for xarray without modifying the actual datacube
mathleur Aug 7, 2023
9504777
create new branch for reverse transformation
mathleur Aug 7, 2023
e28ac02
black
mathleur Aug 7, 2023
e3329d5
refactor the xarray backend a bit
mathleur Aug 7, 2023
2f9fbb0
add TODO
mathleur Aug 7, 2023
2d5b7a2
make cyclic axes a decorator
mathleur Aug 8, 2023
29ea2ca
add specific transformation functions within transformation classes
mathleur Aug 8, 2023
7eeb5e0
separate datacube backend and transformation more
mathleur Aug 8, 2023
9976933
separate transformations and datacube backends
mathleur Aug 9, 2023
548bf5f
add explanation
mathleur Aug 14, 2023
b26a469
add explanation
mathleur Aug 14, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/4D_flight_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from earthkit import data
from PIL import Image

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Box, Path, Select
Expand Down
2 changes: 1 addition & 1 deletion examples/country_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from earthkit import data
from shapely.geometry import shape

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Polygon, Select, Union
Expand Down
2 changes: 1 addition & 1 deletion examples/cyclic_route_around_earth.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
from earthkit import data

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Box, PathSegment, Select
Expand Down
2 changes: 1 addition & 1 deletion examples/octahedral_grid_box_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from eccodes import codes_grib_find_nearest, codes_grib_new_from_file
from matplotlib import markers

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Box, Select
Expand Down
2 changes: 1 addition & 1 deletion examples/octahedral_grid_country_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from matplotlib import markers
from shapely.geometry import shape

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Polygon, Select, Union
Expand Down
2 changes: 1 addition & 1 deletion examples/slicing_all_ecmwf_countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from earthkit import data
from shapely.geometry import shape

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Polygon, Select, Union
Expand Down
2 changes: 1 addition & 1 deletion examples/timeseries_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from earthkit import data
from shapely.geometry import shape

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Polygon, Select, Union
Expand Down
2 changes: 1 addition & 1 deletion examples/wind_farms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from osgeo import gdal
from shapely.geometry import shape

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Polygon, Select, Union
Expand Down
2 changes: 1 addition & 1 deletion performance/scalability_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import xarray as xr

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Box, Disk, Ellipsoid, Select
Expand Down
2 changes: 1 addition & 1 deletion performance/scalability_test_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import xarray as xr

from polytope.datacube.xarray import XArrayDatacube
from polytope.datacube.backends.xarray import XArrayDatacube
from polytope.engine.hullslicer import HullSlicer
from polytope.polytope import Polytope, Request
from polytope.shapes import Box, Select, Union
Expand Down
2 changes: 1 addition & 1 deletion polytope/datacube/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .datacube import *
from .backends.datacube import *
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

import xarray as xr

from .datacube_axis import DatacubeAxis
from .index_tree import DatacubePath, IndexTree
from ..datacube_axis import DatacubeAxis
from ..index_tree import DatacubePath, IndexTree


class Datacube(ABC):
Expand Down Expand Up @@ -47,14 +47,29 @@ def create(datacube, axis_options: dict):

xadatacube = XArrayDatacube(datacube, axis_options=axis_options)
return xadatacube
else:
return datacube

@abstractmethod
def ax_vals(self, name: str) -> List:
pass

@abstractmethod
def _find_indexes_between(self, axis, indexes, low, up):
pass

# TODO: need to add transformation properties like the datacube.transformations dico


def configure_datacube_axis(options, name, values, datacube):
if options == {}:
DatacubeAxis.create_standard(name, values, datacube)
if "mapper" in options.keys():
from .datacube_mappers import DatacubeMapper

DatacubeMapper.create_mapper(options, name, datacube)
if "cyclic" in options.keys():
DatacubeAxis.create_cyclic(options, name, values, datacube)
# First need to check we are not initialising an axis which should not be initialised
if name not in datacube.blocked_axes:
# Now look at the options passed in
if options == {}:
DatacubeAxis.create_axis(name, values, datacube)
else:
from ..transformations.datacube_transformations import (
DatacubeAxisTransformation,
)

DatacubeAxisTransformation.create_transformation(options, name, values, datacube)
10 changes: 8 additions & 2 deletions polytope/datacube/mock.py → polytope/datacube/backends/mock.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import math
from copy import deepcopy

from ..utility.combinatorics import validate_axes
from ...utility.combinatorics import validate_axes
from ..datacube_axis import IntDatacubeAxis
from .datacube import Datacube, DatacubePath, IndexTree
from .datacube_axis import IntDatacubeAxis


class MockDatacube(Datacube):
Expand Down Expand Up @@ -60,3 +60,9 @@ def axes(self):

def validate(self, axes):
return validate_axes(self.axes, axes)

def ax_vals(self, name):
return []

def _find_indexes_between(self, axis, indexes, low, up):
pass
188 changes: 188 additions & 0 deletions polytope/datacube/backends/xarray.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
import math

import xarray as xr

from ...utility.combinatorics import unique, validate_axes
from .datacube import Datacube, DatacubePath, IndexTree, configure_datacube_axis


class XArrayDatacube(Datacube):
"""Xarray arrays are labelled, axes can be defined as strings or integers (e.g. "time" or 0)."""

def __init__(self, dataarray: xr.DataArray, axis_options={}):
self.axis_options = axis_options
self.grid_mapper = None
self.axis_counter = 0
self._axes = {}
self.dataarray = dataarray
treated_axes = []
self.complete_axes = []
self.blocked_axes = []
self.transformation = {}
for name, values in dataarray.coords.variables.items():
if name in dataarray.dims:
options = axis_options.get(name, {})
configure_datacube_axis(options, name, values, self)
treated_axes.append(name)
self.complete_axes.append(name)
else:
if self.dataarray[name].dims == ():
options = axis_options.get(name, {})
configure_datacube_axis(options, name, values, self)
treated_axes.append(name)
for name in dataarray.dims:
if name not in treated_axes:
options = axis_options.get(name, {})
val = dataarray[name].values[0]
configure_datacube_axis(options, name, val, self)

def get(self, requests: IndexTree):
for r in requests.leaves:
path = r.flatten()
if len(path.items()) == self.axis_counter:
# first, find the grid mapper transform
unmap_path = {}
considered_axes = []
for key in path.keys():
(path, first_val, considered_axes, unmap_path) = self.fit_path_to_datacube(
key, path, considered_axes, unmap_path
)
considered_axes.append(key)
subxarray = self.dataarray.sel(path, method="nearest")
subxarray = subxarray.sel(unmap_path)
value = subxarray.item()
key = subxarray.name
r.result = (key, value)
else:
r.remove_branch()

def get_mapper(self, axis):
return self._axes[axis]

# TODO: should this be in DatacubePath?
def remap_path(self, path: DatacubePath):
for key in path:
value = path[key]
path[key] = self._axes[key].remap_val_to_axis_range(value)
return path

def _find_indexes_between(self, axis, indexes, low, up):
if axis.name in self.complete_axes:
# Find the range of indexes between lower and upper
# https://pandas.pydata.org/docs/reference/api/pandas.Index.searchsorted.html
# Assumes the indexes are already sorted (could sort to be sure) and monotonically increasing
start = indexes.searchsorted(low, "left")
end = indexes.searchsorted(up, "right")
indexes_between = indexes[start:end].to_list()
else:
indexes_between = [i for i in indexes if low <= i <= up]
return indexes_between

def _look_up_datacube(self, search_ranges, search_ranges_offset, indexes, axis, first_val):
idx_between = []
for i in range(len(search_ranges)):
r = search_ranges[i]
offset = search_ranges_offset[i]
low = r[0]
up = r[1]
if axis.name in self.transformation.keys():
axis_transforms = self.transformation[axis.name]
for transform in axis_transforms:
indexes_between = transform._find_transformed_indices_between(
axis, self, indexes, low, up, first_val
)
else:
indexes_between = self._find_indexes_between(axis, indexes, low, up)
# Now the indexes_between are values on the cyclic range so need to remap them to their original
# values before returning them
for j in range(len(indexes_between)):
if offset is None:
indexes_between[j] = indexes_between[j]
else:
indexes_between[j] = round(indexes_between[j] + offset, int(-math.log10(axis.tol)))
idx_between.append(indexes_between[j])
return idx_between

def datacube_natural_indexes(self, axis, subarray):
if axis.name in self.complete_axes:
indexes = next(iter(subarray.xindexes.values())).to_pandas_index()
else:
indexes = [subarray[axis.name].values]
return indexes

def fit_path_to_datacube(self, axis_name, path, considered_axes=[], unmap_path={}):
# TODO: how to make this also work for the get method?
path = self.remap_path(path)
for key in path.keys():
if self.dataarray[key].dims == ():
path.pop(key)
first_val = None
if axis_name in self.transformation.keys():
axis_transforms = self.transformation[axis_name]
for transform in axis_transforms:
(path, first_val, considered_axes, unmap_path) = transform._adjust_path(
path, considered_axes, unmap_path
)
return (path, first_val, considered_axes, unmap_path)

def get_indices(self, path: DatacubePath, axis, lower, upper):
(path, first_val, considered_axes, unmap_path) = self.fit_path_to_datacube(axis.name, path)
# Open a view on the subset identified by the path
subarray = self.dataarray.sel(path, method="nearest")
# Get the indexes of the axis we want to query
# XArray does not support branching, so no need to use label, we just take the next axis
if axis.name in self.transformation.keys():
axis_transforms = self.transformation[axis.name]
for transform in axis_transforms:
# TODO: put this within the transform as transformed_indices method
indexes = transform._find_transformed_axis_indices(self, axis, subarray)
else:
indexes = self.datacube_natural_indexes(axis, subarray)
# Here, we do a cyclic remapping so we look up on the right existing values in the cyclic range on the datacube
search_ranges = axis.remap([lower, upper])
original_search_ranges = axis.to_intervals([lower, upper])
# Find the offsets for each interval in the requested range, which we will need later
search_ranges_offset = []
for r in original_search_ranges:
offset = axis.offset(r)
search_ranges_offset.append(offset)
# Look up the values in the datacube for each cyclic interval range
idx_between = self._look_up_datacube(search_ranges, search_ranges_offset, indexes, axis, first_val)
# Remove duplicates even if difference of the order of the axis tolerance
if offset is not None:
# Note that we can only do unique if not dealing with time values
idx_between = unique(idx_between)
return idx_between

def has_index(self, path: DatacubePath, axis, index):
# when we want to obtain the value of an unsliceable axis, need to check the values does exist in the datacube
path = self.fit_path_to_datacube(axis, path)[0]

# Open a view on the subset identified by the path
subarray = self.dataarray.sel(path, method="nearest")
if axis.name in self.transformation.keys():
axis_transforms = self.transformation[axis.name]
for transform in axis_transforms:
indexes = transform._find_transformed_axis_indices(self, axis, subarray)
# return index in subarray_vals
else:
indexes = self.datacube_natural_indexes(axis, subarray)
return index in indexes

@property
def axes(self):
return self._axes

def validate(self, axes):
return validate_axes(list(self.axes.keys()), axes)

def ax_vals(self, name):
treated_axes = []
for _name, values in self.dataarray.coords.variables.items():
treated_axes.append(_name)
if _name == name:
return values.values
for _name in self.dataarray.dims:
if _name not in treated_axes:
if _name == name:
return self.dataarray[name].values[0]
Loading