Skip to content

Commit

Permalink
Merge pull request #655 from British-Oceanographic-Data-Centre/featur…
Browse files Browse the repository at this point in the history
…e/zarr-files

Add ability to open Zarr files
  • Loading branch information
soutobias authored Dec 6, 2023
2 parents 308ffd7 + 5c9cecc commit ff55000
Show file tree
Hide file tree
Showing 10 changed files with 10,450 additions and 108 deletions.
2 changes: 1 addition & 1 deletion .pylint-score
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.81
5.99
3 changes: 3 additions & 0 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ authors:
- family-names: "Cazaly"
given-names: "Matthew"
orcid: "https://orcid.org/0000-0002-0679-4054"
- family-names: "Ferreira"
given-names: "Tobias"
orcid: "https://orcid.org/0000-0002-0888-9751"
- family-names: "Hearn"
given-names: "Malcolm"
- family-names: "Jennings"
Expand Down
175 changes: 111 additions & 64 deletions coast/data/coast.py

Large diffs are not rendered by default.

95 changes: 55 additions & 40 deletions coast/data/gridded.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
# import graphviz
import gsw
import numpy as np
import pandas as pd
import xarray as xr

from .._utils import general_utils, stats_util
from .._utils.logging_util import debug, error, get_slug, info, warn, warning
from .coast import Coast
from .config_parser import ConfigParser
from .._utils.logging_util import get_slug, debug, info, warn, error, warning
import pandas as pd


class Gridded(Coast): # TODO Complete this docstring
Expand Down Expand Up @@ -49,7 +49,9 @@ def __init__(
self._setup_grid_obj(self.config.chunks, multiple, **kwargs)
else:
self._setup_grid_obj(None, multiple, **kwargs)
else: # allow for usage without config file, this will be limted and dosen't bring the full COAST features
# allow for usage without config file, this will be limted and dosen't
# bring the full COAST features
else:
debug("Config file expected. Limited functionality without config file")
if self.fn_data is not None:
self.load(self.fn_data, None, multiple)
Expand All @@ -62,11 +64,15 @@ def _setup_grid_obj(self, chunks, multiple, **kwargs):
"""This is a helper method to reduce the size of def __init__
Args:
chunks: This is a setting for xarray as to whether dask (parrell processing) should be on and how it works
chunks: This is a setting for xarray as to whether dask (parrell
processing) should be on and how it works
multiple: flag to tell if we are loading one or more files
**kwargs: pass direct to loaded xarray dataset
lims = [x_dim index 1, x_dim_index 2, y_dim index 1, y_dim_index 2] - subset region defined from
lower left to upper right corners
lims = [x_dim index 1,
x_dim_index 2,
y_dim index 1,
y_dim_index 2] - subset region defined from lower left to
upper right corners
calculate_bathymetry [boolean]: default-False
"""
self.set_grid_vars()
Expand All @@ -78,7 +84,8 @@ def _setup_grid_obj(self, chunks, multiple, **kwargs):

self.set_dimension_names(self.config.dataset.dimension_map)
self.set_variable_names(self.config.dataset.variable_map)
self.dataset = self.spatial_subset(self.dataset, lims) # Trim data size if indices specified
# Trim data size if indices specified
self.dataset = self.spatial_subset(self.dataset, lims)

if self.fn_domain is None:
self.filename_domain = "" # empty store for domain fileanme
Expand All @@ -92,12 +99,14 @@ def _setup_grid_obj(self, chunks, multiple, **kwargs):
for key, value in kwargs.items():
dataset_domain[key] = value

dataset_domain = self.spatial_subset(dataset_domain, lims) # Trim domain size if indices specified
# Trim domain size if indices specified
dataset_domain = self.spatial_subset(dataset_domain, lims)

# Trim domain size if self.data is smaller
if self.fn_data is not None:
dataset_domain = self.trim_domain_size(dataset_domain) # Trim domain size if self.data is smaller
self.set_timezero_depths(
dataset_domain, **kwargs
) # THIS ADDS TO dataset_domain. Should it be 'return'ed (as in trim_domain_size) or is implicit OK?
dataset_domain = self.trim_domain_size(dataset_domain)
self.set_timezero_depths(dataset_domain, **kwargs) # THIS ADDS TO dataset_domain. Should it be 'return'ed
# (as in trim_domain_size) or is implicit OK?
self.merge_domain_into_dataset(dataset_domain)
debug(f"Initialised {get_slug(self)}")

Expand Down Expand Up @@ -127,7 +136,10 @@ def load_domain(self, fn_domain, chunks): # TODO Do something with this unused
"""Loads domain file and renames dimensions with dim_mapping_domain"""
# Load xarray dataset
info(f'Loading domain: "{fn_domain}"')
dataset_domain = xr.open_dataset(fn_domain)
if isinstance(fn_domain, xr.core.dataset.Dataset):
dataset_domain = fn_domain
else:
dataset_domain = xr.open_dataset(fn_domain)
self.domain_loaded = True
# Rename dimensions
for key, value in self.config.domain.dimension_map.items():
Expand All @@ -136,7 +148,8 @@ def load_domain(self, fn_domain, chunks): # TODO Do something with this unused
dataset_domain = dataset_domain.rename_dims(mapping)
except ValueError as err:
warning(
f"{get_slug(self)}: Problem renaming domain dimension from {get_slug(self.dataset)}: {key} -> {value}."
f"{get_slug(self)}: Problem renaming domain dimension from "
f"{get_slug(self.dataset)}: {key} -> {value}."
f"{chr(10)}Error message of '{err}'"
)
# Rename domain variables.
Expand All @@ -146,7 +159,8 @@ def load_domain(self, fn_domain, chunks): # TODO Do something with this unused
dataset_domain = dataset_domain.rename_vars(mapping)
except ValueError as err:
warning(
f"{get_slug(self)}: Problem renaming domain variable from {get_slug(self.dataset)}: {key} -> {value}."
f"{get_slug(self)}: Problem renaming domain variable from "
f"{get_slug(self.dataset)}: {key} -> {value}."
f"{chr(10)}Error message of '{err}'"
)
return dataset_domain
Expand Down Expand Up @@ -415,7 +429,7 @@ def find_j_i_domain(self, *, lat: float, lon: float, dataset_domain: xr.DataArra
:param dataset_domain: dataset domain
:return: the y and x coordinates for the grid_ref variable within the domain file
"""
debug(f"Finding j,i domain for {lat},{lon} from {get_slug(self)} using {get_slug(dataset_domain)}")
debug(f"Finding j,i domain for {lat},{lon} from {get_slug(self)} using " f"{get_slug(dataset_domain)}")
internal_lat = dataset_domain["latitude"] # [f"gphi{self.grid_ref.replace('-grid','')}"]
internal_lon = dataset_domain["longitude"] # [f"glam{self.grid_ref.replace('-grid','')}"]
dist2 = np.square(internal_lat - lat) + np.square(internal_lon - lon)
Expand Down Expand Up @@ -509,7 +523,8 @@ def interpolate_in_time(model_array, new_times, interp_method="nearest", extrapo
)
else:
interpolated = interpolated.interp(time=new_times, method=interp_method)
# interpolated = interpolated.swap_dims({'time':'t_dim'}) # TODO Do something with this or delete it
# TODO Do something with this or delete it
# interpolated = interpolated.swap_dims({'time':'t_dim'})

return interpolated

Expand Down Expand Up @@ -543,9 +558,9 @@ def construct_density(
DESCRIPTION. The default is 'False'.
pot_dens :Calculation at zero pressure
DESCRIPTION. The default is 'False'.
Tbar and Sbar : If rhobar is True then these can be switch to False to allow one component to
remain depth varying. So Tbar=Flase gives temperature component, Sbar=Flase gives Salinity component
DESCRIPTION. The default is 'True'.
Tbar and Sbar : If rhobar is True then these can be switch to False to allow one
component to remain depth varying. So Tbar=Flase gives temperature component,
Sbar=Flase gives Salinity component DESCRIPTION. The default is 'True'.
Returns
-------
Expand Down Expand Up @@ -682,7 +697,9 @@ def construct_density(

def spatial_subset(self, dataset, lims):
"""
Specify indices to subset the data. Subset region defined as a 2D box from lower left to upper right corners
Specify indices to subset the data. Subset region defined as a 2D box from
lower left to upper right corners
lims = [x_dim index_1, x_dim_index_2, y_dim index_1, y_dim_index_2] -
Modifies self.dataset
"""
Expand Down Expand Up @@ -713,13 +730,9 @@ def trim_domain_size(self, dataset_domain):
self.dataset["y_dim"].size != dataset_domain["y_dim"].size
):
info(
"The domain and dataset objects are different sizes:"
" [{},{}] cf [{},{}]. Trim domain.".format(
dataset_domain["x_dim"].size,
dataset_domain["y_dim"].size,
self.dataset["x_dim"].size,
self.dataset["y_dim"].size,
)
f"The domain and dataset objects are different sizes:"
f" [{dataset_domain['x_dim'].size},{dataset_domain['y_dim'].size}]"
f" cf [{self.dataset['x_dim'].size},{self.dataset['y_dim'].size}]. Trim domain."
)

# Find the corners of the cut out domain.
Expand All @@ -730,7 +743,7 @@ def trim_domain_size(self, dataset_domain):
[j1, i1] = self.find_j_i_domain(
lat=self.dataset.latitude[-1, -1], lon=self.dataset.longitude[-1, -1], dataset_domain=dataset_domain
)
debug(f"trim_domain_size(): USED dataset.longitude")
debug("trim_domain_size(): USED dataset.longitude")
except: # if called before variables are re-mapped. Not very pretty...
[j0, i0] = self.find_j_i_domain(
lat=self.dataset.nav_lat[0, 0], lon=self.dataset.nav_lon[0, 0], dataset_domain=dataset_domain
Expand All @@ -742,15 +755,14 @@ def trim_domain_size(self, dataset_domain):

dataset_subdomain = dataset_domain.isel(y_dim=slice(j0, j1 + 1), x_dim=slice(i0, i1 + 1))
return dataset_subdomain
else:
return dataset_domain
return dataset_domain

def copy_domain_vars_to_dataset(self, dataset_domain, grid_vars):
"""
Map the domain coordinates and metric variables to the dataset object.
Expects the source and target DataArrays to be same sizes.
"""
debug(f"Copying domain vars from {get_slug(dataset_domain)}/{get_slug(grid_vars)} to {get_slug(self)}")
debug(f"Copying domain vars from {get_slug(dataset_domain)}/{get_slug(grid_vars)} " f"to {get_slug(self)}")
for var in grid_vars:
try:
new_name = self.config.domain.variable_map[var]
Expand All @@ -762,7 +774,7 @@ def copy_domain_vars_to_dataset(self, dataset_domain, grid_vars):
else:
self.dataset[new_name] = dataset_domain[new_name].squeeze()

debug("map: {} --> {}".format(var, new_name))
debug(f"map: {var} --> {new_name}")
except: # FIXME Catch specific exception(s)
pass # TODO Should we log something here?

Expand Down Expand Up @@ -799,11 +811,13 @@ def differentiate(self, in_var_str, config_path=None, dim="z_dim", out_var_str=N
nemo_w_1 = nemo_t.differentiate( 'temperature', dim='z_dim' )
# For f(z)=-z. Compute df/dz = -1. Surface value is set to zero
nemo_t.dataset['depth4D'],_ = xr.broadcast( nemo_t.dataset['depth_0'], nemo_t.dataset['temperature'] )
nemo_t.dataset['depth4D'],_ = xr.broadcast( nemo_t.dataset['depth_0'],
nemo_t.dataset['temperature'] )
nemo_w_4 = nemo_t.differentiate( 'depth4D', dim='z_dim', out_var_str='dzdz' )
Provide an existing target NEMO object and target variable name:
nemo_w_1 = nemo_t.differentiate( 'temperature', dim='z_dim', out_var_str='dTdz', out_obj=nemo_w_1 )
nemo_w_1 = nemo_t.differentiate( 'temperature', dim='z_dim',
out_var_str='dTdz', out_obj=nemo_w_1 )
Parameters
Expand All @@ -815,8 +829,6 @@ def differentiate(self, in_var_str, config_path=None, dim="z_dim", out_var_str=N
out_obj : exiting NEMO obj to store xr.DataArray (optional)
"""
import xarray as xr

new_units = ""

# Check in_var_str exists in self.
Expand Down Expand Up @@ -851,7 +863,8 @@ def differentiate(self, in_var_str, config_path=None, dim="z_dim", out_var_str=N

# Create new DataArray with the same dimensions as the parent
# Crucially have a coordinate value that is appropriate to the target location.
blank = xr.zeros_like(var.isel(z_dim=[0])) # Using "z_dim=[0]" as a list preserves z-dimension
# Using "z_dim=[0]" as a list preserves z-dimension
blank = xr.zeros_like(var.isel(z_dim=[0]))
blank.coords["depth_0"] -= blank.coords["depth_0"] # reset coord vals to zero
# Add blank slice to the 'surface'. Concat over the 'dim' coords
diff = xr.concat([blank, var.diff(dim)], dim)
Expand All @@ -868,7 +881,8 @@ def differentiate(self, in_var_str, config_path=None, dim="z_dim", out_var_str=N
return out_obj

else:
warn("Not ready for that combination of grid ({}) and " "derivative ({})".format(self.grid_ref, dim))
warning_message = f"Not ready for that combination of grid ({self.grid_ref}) " f"and derivative ({dim})"
warn(warning_message)
return None
else:
warn(f"{in_var_str} does not exist in {get_slug(self)} dataset")
Expand Down Expand Up @@ -1011,7 +1025,8 @@ def get_e3_from_ssh(nemo_t, e3t=True, e3u=False, e3v=False, e3f=False, e3w=False
e1e2f = ds_dom.e1f * ds_dom.e2f
e3u_dt = e3u_new - ds_dom.e3u_0
e3f_temp = (
(0.5 / e1e2f[:-1, :]) * ((e1e2u[:-1, :] * e3u_dt[:, :, :-1, :]) + (e1e2u[1:, :] * e3u_dt[:, :, 1:, :]))
(0.5 / e1e2f[:-1, :])
* ((e1e2u[:-1, :] * e3u_dt[:, :, :-1, :]) + ((e1e2u[1:, :] * e3u_dt[:, :, 1:, :])))
).transpose("t_dim", "z_dim", "y_dim", "x_dim")
e3f_temp = e3f_temp.where(e3u_dt[:, :, 1:, :] != 0, 0)
e3f_temp = e3f_temp.where(e3f_temp.z_dim < ds_dom.bottom_level[:-1, :], 0)
Expand Down
94 changes: 94 additions & 0 deletions config/example_nemo_monthly_climate.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"type": "gridded",
"dimensionality": 3,
"chunks": {"time_counter":2},
"grid_ref": {
"t-grid": [
"glamt",
"gphit",
"e1t",
"e2t",
"e3t_0",
"deptht_0",
"tmask",
"bottom_level",
"mbathy",
"hbatt"
]
},
"dataset": {
"dimension_map": {
"time_counter": "t_dim",
"deptht": "z_dim",
"nav_lev": "z_dim",
"y": "y_dim",
"x": "x_dim",
"x_grid_T": "x_dim",
"y_grid_T": "y_dim"
},
"variable_map": {
"time_counter": "time",
"votemper": "temperature",
"thetao": "temperature",
"temp": "temperature",
"toce": "temperature",
"thetao_con": "temperature",
"so": "salinity",
"vosaline": "salinity",
"soce": "salinity",
"so_abs": "salinity",
"sossheig": "ssh",
"zos": "ssh"
},
"coord_vars": [
"longitude",
"latitude",
"time",
"depth_0"
]
},
"domain": {
"dimension_map": {
"t": "t_dim0",
"x": "x_dim",
"y": "y_dim",
"z": "z_dim",
"nav_lev": "z_dim"
},
"variable_map": {
"time_counter": "time0",
"glamt": "longitude",
"gphit": "latitude",
"e1t": "e1",
"e2t": "e2",
"e3t_0": "e3_0",
"e3w_0": "e3w_0",
"tmask":"mask",
"deptht_0": "depth_0",
"bottom_level": "bottom_level",
"mbathy":"bottom_level",
"hbatt":"bathymetry"
}
},
"static_variables": {
"not_grid_vars": [
"jpiglo",
"jpjglo",
"jpkglo",
"jperio",
"ln_zco",
"ln_zps",
"ln_sco",
"ln_isfcav"
],
"delete_vars": [
"nav_lat",
"nav_lon",
"deptht"
]
},
"processing_flags": [
"example_flag1",
"example_flag2"
]
}
Loading

0 comments on commit ff55000

Please sign in to comment.