-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes to support subsetting datasets by constraining coordinate values. #494
base: develop
Are you sure you want to change the base?
Changes from 4 commits
89da9f6
4d76558
21cbbb7
54de8b3
e3dab1e
ef682f9
61af083
ecb776e
bcd86ba
78f5a32
dab7110
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from dataclasses import dataclass | ||
from numpy import number | ||
from numbers import Number | ||
from typing import Union, Optional | ||
|
||
|
||
Numeric = Optional[Union[Number, number]] | ||
|
||
|
||
@dataclass | ||
class Coordinates2D: | ||
"""Represent a point in one-to-two-dimensional space with optional X and Y coordinates.""" | ||
|
||
x: Numeric | ||
y: Numeric | ||
|
||
|
||
@dataclass | ||
class Coordinates3D(Coordinates2D): | ||
"""Represent a point in one-to-three-dimensional space with optional X, Y, and Z coordinates.""" | ||
|
||
z: Numeric | ||
|
||
|
||
@dataclass | ||
class Coordinates4D(Coordinates3D): | ||
"""Represent a point in one-to-four-dimensional spacetime with optional X, Y, Z, and T coordinates.""" | ||
|
||
t: Numeric # TODO Should this be a datetime or is it likely to be something like a Unix timestamp? | ||
|
||
MCazaly marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Coordinates = Union[Coordinates2D, Coordinates3D, Coordinates4D] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
from dask.distributed import Client | ||
import copy | ||
from .._utils.logging_util import get_slug, debug, info, warn, warning | ||
from .._utils.coordinates import Coordinates, Coordinates3D, Coordinates4D, Numeric | ||
from .opendap import OpendapInfo | ||
|
||
|
||
|
@@ -467,5 +468,165 @@ def plot_cartopy(self, var: str, plot_var: array, params, time_counter: int = 0) | |
info("Displaying plot!") | ||
plt.show() | ||
|
||
def set_constraint(self, start: Coordinates, end: Coordinates, drop: bool = True) -> None: | ||
"""Constrain the underlying dataset to values within an arbitrarily sized orthotope. | ||
MCazaly marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
Args: | ||
start: The start coordinates of the shape to define. | ||
end: The end coordinates of the shape to define. | ||
drop: Whether values should be dropped from the constrained dataset (if False, they will be NaNed). | ||
""" | ||
self.dataset = self.constrain(start, end, drop=drop) | ||
|
||
def constrain(self, start: Coordinates, end: Coordinates, drop: bool = True) -> xr.Dataset: | ||
"""Return the underlying dataset with values constrained to an arbitrarily sized orthotope. | ||
|
||
Args: | ||
start: The start coordinates of the shape to define. | ||
end: The end coordinates of the shape to define. | ||
drop: Whether values should be dropped from the constrained dataset (if False, they will be NaNed). | ||
|
||
Returns: | ||
The underlying dataset with values constrained to within the defined selection. | ||
""" | ||
return constrain(self.dataset, start, end, drop=drop) | ||
|
||
@property | ||
def x_dim(self) -> xr.DataArray: | ||
"""Return the X coordinate array of the underlying dataset.""" | ||
return x_dim(self.dataset) | ||
|
||
@property | ||
def y_dim(self) -> xr.DataArray: | ||
"""Return the Y coordinate array of the underlying dataset.""" | ||
return y_dim(self.dataset) | ||
|
||
@property | ||
def z_dim(self) -> xr.DataArray: | ||
"""Return the Z coordinate array of the underlying dataset.""" | ||
return z_dim(self.dataset) | ||
|
||
@property | ||
def t_dim(self) -> xr.DataArray: | ||
"""Return the T[ime] coordinate array of the underlying dataset.""" | ||
return t_dim(self.dataset) | ||
|
||
def get_coord(self, dim: str) -> xr.DataArray: | ||
"""Get the coordinate array for a dimension from the underlying dataset. | ||
|
||
Args: | ||
dim: The name of the dimension (i.e. "x", "y", "z", or "t"). | ||
|
||
Returns: | ||
The corresponding coordinate array from the underlying dataset. | ||
""" | ||
return get_coord(self.dataset, dim) | ||
|
||
def plot_movie(self): | ||
raise NotImplementedError | ||
|
||
|
||
def create_constraint(start: Numeric, end: Numeric, dim: xr.DataArray) -> np.typing.NDArray[bool]: | ||
"""Create a mask to exclude coordinates that do not fall within a range of two arbitrary values. | ||
|
||
Args: | ||
start: The start of the range of values to constrain within. | ||
end: The end of the range of values ot constrain within. | ||
dim: The coordinate array to constrain values from. | ||
|
||
Returns: | ||
A mask that can be applied to dim to exclude unwanted values. | ||
""" | ||
return np.logical_and(dim >= start, dim <= end) | ||
MCazaly marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
def get_coord(dataset: xr.Dataset, dim: str) -> xr.DataArray: | ||
"""Get the coordinate array for a dimension in a dataset. | ||
|
||
Args: | ||
dataset: The dataset to interrogate. | ||
dim: The name of the dimension (i.e. "x", "y", "z", or "t"). | ||
|
||
Returns: | ||
The corresponding coordinate array from the provided dataset. | ||
""" | ||
# TODO Really not a fan of this, is there an easier way to get the mapping? | ||
return dataset[list(dataset[f"{dim.lower()}_dim"].coords)[0]] | ||
|
||
|
||
def x_dim(dataset: xr.Dataset) -> xr.DataArray: | ||
"""Get the X coordinate array for a dimension in a dataset. | ||
|
||
Args: | ||
dataset: The dataset to interrogate. | ||
|
||
Returns: | ||
The corresponding coordinate array from the provided dataset. | ||
""" | ||
return get_coord(dataset, "x") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be clear, a coordinate longitude can be 2 dimensional. E.g. if you want a box that is 1000 km x 1000 km, then near the north pole it would vary with latitude and longitude. Or is the terminology for "coordinates" referring to indices in an array, which are 1-dimensional? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That's right, I was basing the name off There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm. If I load your copernicus example and inspect the
So the horizontal coords are have names Indices are generally referred to as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. However, from data in the example files:
Gives a floating point latitude, which will vary with both indices.
In summary indices are more basic than coordinates because coordinates are a function of indices. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #494 (comment) |
||
|
||
|
||
def y_dim(dataset: xr.Dataset) -> xr.DataArray: | ||
"""Get the Y coordinate array for a dimension in a dataset. | ||
|
||
Args: | ||
dataset: The dataset to interrogate. | ||
|
||
Returns: | ||
The corresponding coordinate array from the provided dataset. | ||
""" | ||
return get_coord(dataset, "y") | ||
|
||
|
||
def z_dim(dataset: xr.Dataset) -> xr.DataArray: | ||
"""Get the Z coordinate array for a dimension in a dataset. | ||
|
||
Args: | ||
dataset: The dataset to interrogate. | ||
|
||
Returns: | ||
The corresponding coordinate array from the provided dataset. | ||
""" | ||
return get_coord(dataset, "z") | ||
|
||
|
||
def t_dim(dataset: xr.Dataset) -> xr.DataArray: | ||
"""Get the T[ime] coordinate array for a dimension in a dataset. | ||
|
||
Args: | ||
dataset: The dataset to interrogate. | ||
|
||
Returns: | ||
The corresponding coordinate array from the provided dataset. | ||
""" | ||
return get_coord(dataset, "t") | ||
|
||
|
||
def constrain(dataset: xr.Dataset, start: Coordinates, end: Coordinates, drop: bool = True) -> xr.Dataset: | ||
"""Constrain values within a dataset to an arbitrarily sized orthotope. | ||
|
||
Args: | ||
dataset: The dataset to constrain values from. | ||
start: The start coordinates of the shape to define. | ||
end: The end coordinates of the shape to define. | ||
drop: Whether values should be dropped from the constrained dataset (if False, they will be NaNed). | ||
|
||
Returns: | ||
The provided dataset with values constrained to within the defined selection. | ||
""" | ||
assert type(start) == type(end), "Coordinates must be of the same dimensionality!" | ||
|
||
constrained = dataset | ||
if (x_start := start.x is not None) and (x_end := end.x is not None): | ||
assert x_start == x_end, "Tried to constrain on X with a missing paired value!" | ||
constrained = constrained.where(create_constraint(start.x, end.x, x_dim(constrained)), drop=drop) | ||
if (y_start := start.y is not None) and (y_end := end.y is not None): | ||
assert y_start == y_end, "Tried to constrain on Y with a missing paired value!" | ||
constrained = constrained.where(create_constraint(start.y, end.y, y_dim(constrained)), drop=drop) | ||
if isinstance(start, Coordinates3D) and (z_start := start.z is not None) and (z_end := end.z is not None): | ||
assert z_start == z_end, "Tried to constrain on Z with a missing paired value!" | ||
constrained = constrained.where(create_constraint(start.z, end.y, z_dim(constrained)), drop=drop) | ||
if isinstance(start, Coordinates4D) and (t_start := start.t is not None) and (t_end := end.t is not None): | ||
assert t_start == t_end, "Tried to constrain on Z with a missing paired value!" | ||
constrained = constrained.where(create_constraint(start.t, end.t, t_dim(constrained)), drop=drop) | ||
return constrained |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Coordinates2D is an example of an object where the user needs to know the ordering convention: Coordinates2D(45, 50) is obviously a different place to Coordinates2D(50,45), but which is which.
Convention would have x preceding y, but also that latitude proceeds longitude...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As the parameters are named "x" and "y", what would be you preferred way of indicating that?