Skip to content

Commit

Permalink
Cache regridding weights if possible (#2344)
Browse files Browse the repository at this point in the history
Co-authored-by: Valeriu Predoi <[email protected]>
  • Loading branch information
schlunma and valeriupredoi authored Apr 16, 2024
1 parent 6cf32c7 commit bbd307d
Show file tree
Hide file tree
Showing 6 changed files with 383 additions and 181 deletions.
40 changes: 32 additions & 8 deletions doc/recipe/preprocessor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -780,10 +780,6 @@ regridding is based on the horizontal grid of another cube (the reference
grid). If the horizontal grids of a cube and its reference grid are sufficiently
the same, regridding is automatically and silently skipped for performance reasons.

The underlying regridding mechanism in ESMValCore uses
:obj:`iris.cube.Cube.regrid`
from Iris.

The use of the horizontal regridding functionality is flexible depending on
what type of reference grid and what interpolation scheme is preferred. Below
we show a few examples.
Expand Down Expand Up @@ -821,7 +817,7 @@ cell specification is oftentimes used when operating on localized data.
target_grid: 2.5x2.5
scheme: nearest
In this case the ``NearestNeighbour`` interpolation scheme is used (see below
In this case the nearest-neighbor interpolation scheme is used (see below
for scheme definitions).

When using a ``MxN`` type of grid it is possible to offset the grid cell
Expand Down Expand Up @@ -917,9 +913,6 @@ Built-in regridding schemes
:class:`~esmvalcore.preprocessor.regrid_schemes.ESMPyAreaWeighted`.
Source data on an unstructured grid is not supported, yet.

See also :func:`esmvalcore.preprocessor.regrid`


.. _generic regridding schemes:

Generic regridding schemes
Expand Down Expand Up @@ -1017,6 +1010,37 @@ scheme available in :doc:`iris-esmf-regrid:index`:
reference: esmf_regrid.schemes:regrid_rectilinear_to_rectilinear
mdtol: 0.7
.. _caching_regridding_weights:

Reusing regridding weights
--------------------------

If desired, regridding weights can be cached to reduce run times (see `here
<https://scitools-iris.readthedocs.io/en/latest/userguide/interpolation_and_regridding.html#caching-a-regridder>`__
for technical details on this).
This can speed up the regridding of different datasets with similar source and
target grids massively, but may take up a lot of memory for extremely
high-resolution data.
By default, this feature is disabled; to enable it, use the option
``cache_weights: true`` in the preprocessor definition:

.. code-block:: yaml
preprocessors:
regrid_preprocessor:
regrid:
target_grid: 0.1x0.1
scheme: linear
cache_weights: true
Not all regridding schemes support weights caching. An overview of those that
do is given `here
<https://scitools-iris.readthedocs.io/en/latest/further_topics/which_regridder_to_use.html#which-regridder-to-use>`__
and in the docstrings :ref:`here <regridding_schemes>`.

See also :func:`esmvalcore.preprocessor.regrid`


.. _ensemble statistics:

Ensemble statistics
Expand Down
97 changes: 92 additions & 5 deletions esmvalcore/preprocessor/_regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,8 +500,7 @@ def _get_target_grid_cube(
elif isinstance(target_grid, (str, Path)) and os.path.isfile(target_grid):
target_grid_cube = iris.load_cube(target_grid)
elif isinstance(target_grid, str):
# Generate a target grid from the provided cell-specification,
# and cache the resulting stock cube for later use.
# Generate a target grid from the provided cell-specification
target_grid_cube = _global_stock_cube(
target_grid, lat_offset, lon_offset
)
Expand Down Expand Up @@ -639,12 +638,82 @@ def _load_generic_scheme(scheme: dict):
return loaded_scheme


_CACHED_REGRIDDERS: dict[tuple, dict] = {}


def _get_regridder(
src_cube: Cube,
tgt_cube: Cube,
scheme: str | dict,
cache_weights: bool,
):
"""Get regridder to actually perform regridding.
Note
----
If possible, this uses an existing regridder to reduce runtime (see also
https://scitools-iris.readthedocs.io/en/latest/userguide/
interpolation_and_regridding.html#caching-a-regridder.)
"""
# (1) Weights caching enabled
if cache_weights:
# To search for a matching regridder in the cache, first check the
# regridding scheme name and shapes of source and target coordinates.
# Only if these match, check coordinates themselves (this is much more
# expensive).
coord_key = _get_coord_key(src_cube, tgt_cube)
name_shape_key = _get_name_and_shape_key(src_cube, tgt_cube, scheme)
if name_shape_key in _CACHED_REGRIDDERS:
# We cannot simply do a test for `coord_key in
# _CACHED_REGRIDDERS[shape_key]` below since the hash() of a
# coordinate is simply its id() (thus, coordinates loaded from two
# different files would never be considered equal)
for (key, regridder) in _CACHED_REGRIDDERS[name_shape_key].items():
if key == coord_key:
return regridder

# Regridder is not in cached -> return a new one and cache it
loaded_scheme = _load_scheme(src_cube, scheme)
regridder = loaded_scheme.regridder(src_cube, tgt_cube)
_CACHED_REGRIDDERS.setdefault(name_shape_key, {})
_CACHED_REGRIDDERS[name_shape_key][coord_key] = regridder

# (2) Weights caching disabled
else:
loaded_scheme = _load_scheme(src_cube, scheme)
regridder = loaded_scheme.regridder(src_cube, tgt_cube)

return regridder


def _get_coord_key(src_cube: Cube, tgt_cube: Cube) -> tuple:
"""Get dict key from coordinates."""
src_lat = src_cube.coord('latitude')
src_lon = src_cube.coord('longitude')
tgt_lat = tgt_cube.coord('latitude')
tgt_lon = tgt_cube.coord('longitude')
return (src_lat, src_lon, tgt_lat, tgt_lon)


def _get_name_and_shape_key(
src_cube: Cube,
tgt_cube: Cube,
scheme: str | dict,
) -> tuple:
"""Get dict key from scheme name and coordinate shapes."""
name = str(scheme)
shapes = [c.shape for c in _get_coord_key(src_cube, tgt_cube)]
return (name, *shapes)


def regrid(
cube: Cube,
target_grid: Cube | Dataset | Path | str | dict,
scheme: str | dict,
lat_offset: bool = True,
lon_offset: bool = True,
cache_weights: bool = False,
) -> Cube:
"""Perform horizontal regridding.
Expand Down Expand Up @@ -691,6 +760,14 @@ def regrid(
Offset the grid centers of the longitude coordinate w.r.t. Greenwich
meridian by half a grid step. This argument is ignored if
`target_grid` is a cube or file.
cache_weights:
If ``True``, cache regridding weights for later usage. This can speed
up the regridding of different datasets with similar source and target
grids massively, but may take up a lot of memory for extremely
high-resolution data. This option is ignored for schemes that do not
support weights caching. More details on this are given in the section
on :ref:`caching_regridding_weights`. To clear the cache, use
:func:`esmvalcore.preprocessor.regrid.cache_clear`.
Returns
-------
Expand Down Expand Up @@ -757,16 +834,26 @@ def regrid(
)
return cube

# Load scheme, rechunk and regrid
# Load scheme and reuse existing regridder if possible
if isinstance(scheme, str):
scheme = scheme.lower()
loaded_scheme = _load_scheme(cube, scheme)
regridder = _get_regridder(cube, target_grid_cube, scheme, cache_weights)

# Rechunk and actually perform the regridding
cube = _rechunk(cube, target_grid_cube)
cube = cube.regrid(target_grid_cube, loaded_scheme)
cube = regridder(cube)

return cube


def _cache_clear():
"""Clear regridding weights cache."""
_CACHED_REGRIDDERS.clear()


regrid.cache_clear = _cache_clear # type: ignore


def _rechunk(cube: Cube, target_grid: Cube) -> Cube:
"""Re-chunk cube with optimal chunk sizes for target grid."""
if not cube.has_lazy_data() or cube.ndim < 3:
Expand Down
2 changes: 2 additions & 0 deletions esmvalcore/preprocessor/_regrid_esmpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
class ESMPyRegridder:
"""General ESMPy regridder.
Does not support lazy regridding nor weights caching.
Parameters
----------
src_cube:
Expand Down
3 changes: 3 additions & 0 deletions esmvalcore/preprocessor/regrid_schemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
class GenericRegridder:
r"""Generic function regridder.
Does support lazy regridding if `func` does. Does not support weights
caching.
Parameters
----------
src_cube:
Expand Down
Loading

0 comments on commit bbd307d

Please sign in to comment.