From 8e2ef15ac26c03cfd1dcc56c2830b8cc5c781830 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Tue, 4 Jun 2024 15:03:02 +0200 Subject: [PATCH] Make volume calculation in preprocessor function `volume_statistics` lazy (#2436) Co-authored-by: Manuel Schlund <32543114+schlunma@users.noreply.github.com> --- esmvalcore/preprocessor/_volume.py | 12 ++++--- .../unit/preprocessor/_volume/test_volume.py | 34 +++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 3d04f23500..c59fe82936 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -16,6 +16,7 @@ from iris.cube import Cube from ._shared import ( + broadcast_to_shape, get_iris_aggregator, get_normalized_cube, preserve_float_dtype, @@ -152,6 +153,8 @@ def calculate_volume(cube: Cube) -> da.core.Array: # Calculate Z-direction thickness thickness = depth.core_bounds()[..., 1] - depth.core_bounds()[..., 0] + if cube.has_lazy_data(): + thickness = da.array(thickness) # Get or calculate the horizontal areas of the cube has_cell_measure = bool(cube.cell_measures('cell_area')) @@ -166,10 +169,11 @@ def calculate_volume(cube: Cube) -> da.core.Array: if not has_cell_measure: cube.remove_cell_measure('cell_area') - area_arr = iris.util.broadcast_to_shape( - area.core_data(), cube.shape, area_dim) - thickness_arr = iris.util.broadcast_to_shape( - thickness, cube.shape, z_dim) + chunks = cube.core_data().chunks if cube.has_lazy_data() else None + area_arr = broadcast_to_shape( + area.core_data(), cube.shape, area_dim, chunks=chunks) + thickness_arr = broadcast_to_shape( + thickness, cube.shape, z_dim, chunks=chunks) grid_volume = area_arr * thickness_arr return grid_volume diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index e962e062dd..f055d2f7b2 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -2,6 +2,7 @@ import unittest +import dask.array as da import iris import iris.fileformats import numpy as np @@ -118,6 +119,10 @@ def setUp(self): units='kg m-3', ) + self.grid_4d_lazy = self.grid_4d.copy() + self.grid_4d_lazy.data = self.grid_4d_lazy.lazy_data().rechunk( + (1, 2, None, None)) + coords_spec4_sigma = [(time, 0), (scoord, 1), (lats2, 2), (lons2, 3)] self.grid_4d_sigma_space = iris.cube.Cube( data2, @@ -442,6 +447,18 @@ def test_volume_nolevbounds(self): self.assertFalse(self.grid_4d.cell_measures('ocean_volume')) self.assertFalse(result.cell_measures('ocean_volume')) + def test_calculate_volume_lazy(self): + """Test that calculate_volume returns a lazy volume + + The volume chunks should match those of the input cube for + computational efficiency. + """ + chunks = self.grid_4d_lazy.core_data().chunks + volume = calculate_volume(self.grid_4d_lazy) + assert self.grid_4d_lazy.has_lazy_data() + assert isinstance(volume, da.Array) + assert volume.chunks == chunks + def test_volume_statistics_cell_measure(self): """Test to take the volume weighted average of a (2,3,2,2) cube. @@ -458,6 +475,23 @@ def test_volume_statistics_cell_measure(self): self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg m-3') + def test_volume_statistics_cell_measure_lazy(self): + """Test to take the volume weighted average of a lazy (2,3,2,2) cube. + + The volume measure is pre-loaded in the cube. + """ + grid_volume = calculate_volume(self.grid_4d_lazy) + measure = iris.coords.CellMeasure(grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume') + self.grid_4d_lazy.add_cell_measure(measure, range(0, measure.ndim)) + result = volume_statistics(self.grid_4d_lazy, 'mean') + assert result.has_lazy_data() + expected = np.ma.array([1., 1.], mask=False) + self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') + def test_volume_statistics_long(self): """Test to take the volume weighted average of a (4,3,2,2) cube.