Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid copying input data in CMIP6 CESM2 air pressure level fixes #2454

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies:
- jinja2
- libnetcdf !=4.9.1 # to avoid hdf5 warnings
- nc-time-axis
- ncdata
- nested-lookup
- netcdf4
- numpy !=1.24.3,<2.0.0 # avoid pulling 2.0.0rcX
Expand Down
96 changes: 33 additions & 63 deletions esmvalcore/cmor/_fixes/cmip6/cesm2.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Fixes for CESM2 model."""

from shutil import copyfile

import ncdata
import ncdata.iris
import ncdata.netcdf4
import numpy as np
from netCDF4 import Dataset

from ..common import SiconcFixScalarCoord
from ..fix import Fix
Expand All @@ -19,24 +19,16 @@
class Cl(Fix):
"""Fixes for ``cl``."""

def _fix_formula_terms(
self,
filepath,
output_dir,
add_unique_suffix=False,
):
@staticmethod
def _fix_formula_terms(dataset: ncdata.NcData) -> None:
"""Fix ``formula_terms`` attribute."""
new_path = self.get_fixed_filepath(
output_dir, filepath, add_unique_suffix=add_unique_suffix
lev = dataset.variables["lev"]
lev.set_attrval("formula_terms", "p0: p0 a: a b: b ps: ps")
lev.set_attrval(
"standard_name", "atmosphere_hybrid_sigma_pressure_coordinate"
)
copyfile(filepath, new_path)
dataset = Dataset(new_path, mode="a")
dataset.variables["lev"].formula_terms = "p0: p0 a: a b: b ps: ps"
dataset.variables[
"lev"
].standard_name = "atmosphere_hybrid_sigma_pressure_coordinate"
dataset.close()
return new_path
lev.set_attrval("units", "1")
dataset.variables["lev_bnds"].attributes.pop("units")

def fix_file(self, filepath, output_dir, add_unique_suffix=False):
"""Fix hybrid pressure coordinate.
Expand All @@ -63,45 +55,33 @@ def fix_file(self, filepath, output_dir, add_unique_suffix=False):
-------
str
Path to the fixed file.

"""
new_path = self._fix_formula_terms(
filepath, output_dir, add_unique_suffix=add_unique_suffix
dataset = ncdata.netcdf4.from_nc4(filepath)
self._fix_formula_terms(dataset)

# Correct order of bounds data
a_bnds = dataset.variables["a_bnds"]
a_bnds.data = a_bnds.data[::-1, :]
b_bnds = dataset.variables["b_bnds"]
b_bnds.data = b_bnds.data[::-1, :]

# Correct lev and lev_bnds data
lev = dataset.variables["lev"]
lev.data = dataset.variables["a"].data + dataset.variables["b"].data
lev_bnds = dataset.variables["lev_bnds"]
lev_bnds.data = (
dataset.variables["a_bnds"].data + dataset.variables["b_bnds"].data
)
dataset = Dataset(new_path, mode="a")
dataset.variables["a_bnds"][:] = dataset.variables["a_bnds"][::-1, :]
dataset.variables["b_bnds"][:] = dataset.variables["b_bnds"][::-1, :]
dataset.close()
return new_path

def fix_metadata(self, cubes):
"""Fix ``atmosphere_hybrid_sigma_pressure_coordinate``.

See discussion in #882 for more details on that.

Parameters
----------
cubes : iris.cube.CubeList
Input cubes.

Returns
-------
iris.cube.CubeList
# Remove 'title' attribute that duplicates long name
for var_name in dataset.variables:
dataset.variables[var_name].attributes.pop("title", None)

"""
cube = self.get_cube_from_list(cubes)
lev_coord = cube.coord(var_name="lev")
a_coord = cube.coord(var_name="a")
b_coord = cube.coord(var_name="b")
lev_coord.points = a_coord.core_points() + b_coord.core_points()
lev_coord.bounds = a_coord.core_bounds() + b_coord.core_bounds()
lev_coord.units = "1"
return cubes
return self.ncdata_to_iris(dataset, filepath)


Cli = Cl


Clw = Cl


Expand All @@ -119,7 +99,6 @@ def fix_metadata(self, cubes):
Returns
-------
iris.cube.CubeList

"""
cube = self.get_cube_from_list(cubes)
add_scalar_depth_coord(cube)
Expand All @@ -130,8 +109,7 @@ class Prw(Fix):
"""Fixes for tas."""

def fix_metadata(self, cubes):
"""
Fix latitude_bounds and longitude_bounds data type and round to 4 d.p.
"""Fix latitude_bounds and longitude_bounds dtype and round to 4 d.p.

Parameters
----------
Expand All @@ -141,7 +119,6 @@ def fix_metadata(self, cubes):
Returns
-------
iris.cube.CubeList

"""
for cube in cubes:
for coord_name in ["latitude", "longitude"]:
Expand All @@ -159,8 +136,7 @@ class Tas(Prw):
"""Fixes for tas."""

def fix_metadata(self, cubes):
"""
Add height (2m) coordinate.
"""Add height (2m) coordinate.

Fix also done for prw.
Fix latitude_bounds and longitude_bounds data type and round to 4 d.p.
Expand All @@ -173,7 +149,6 @@ def fix_metadata(self, cubes):
Returns
-------
iris.cube.CubeList

"""
super().fix_metadata(cubes)
# Specific code for tas
Expand All @@ -197,7 +172,6 @@ def fix_metadata(self, cubes):
Returns
-------
iris.cube.CubeList

"""
cube = self.get_cube_from_list(cubes)
add_scalar_typeland_coord(cube)
Expand All @@ -218,7 +192,6 @@ def fix_metadata(self, cubes):
Returns
-------
iris.cube.CubeList

"""
cube = self.get_cube_from_list(cubes)
add_scalar_typesea_coord(cube)
Expand All @@ -232,8 +205,7 @@ class Tos(Fix):
"""Fixes for tos."""

def fix_metadata(self, cubes):
"""
Round times to 1 d.p. for monthly means.
"""Round times to 1 d.p. for monthly means.

Required to get hist-GHG and ssp245-GHG Omon tos to concatenate.

Expand All @@ -245,7 +217,6 @@ def fix_metadata(self, cubes):
Returns
-------
iris.cube.CubeList

"""
cube = self.get_cube_from_list(cubes)

Expand All @@ -271,7 +242,6 @@ def fix_metadata(self, cubes):
Returns
-------
iris.cube.CubeList

"""
for cube in cubes:
if cube.coords(axis="Z"):
Expand Down
33 changes: 17 additions & 16 deletions esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Fixes for CESM2-WACCM model."""

from netCDF4 import Dataset
import ncdata.iris
import ncdata.netcdf4

from ..common import SiconcFixScalarCoord
from .cesm2 import Cl as BaseCl
Expand All @@ -10,7 +11,7 @@


class Cl(BaseCl):
"""Fixes for cl."""
"""Fixes for ``cl``."""

def fix_file(self, filepath, output_dir, add_unique_suffix=False):
"""Fix hybrid pressure coordinate.
Expand All @@ -37,31 +38,31 @@ def fix_file(self, filepath, output_dir, add_unique_suffix=False):
-------
str
Path to the fixed file.

"""
new_path = self._fix_formula_terms(
filepath, output_dir, add_unique_suffix=add_unique_suffix
)
dataset = Dataset(new_path, mode="a")
dataset.variables["a_bnds"][:] = dataset.variables["a_bnds"][:, ::-1]
dataset.variables["b_bnds"][:] = dataset.variables["b_bnds"][:, ::-1]
dataset.close()
return new_path
dataset = ncdata.netcdf4.from_nc4(filepath)
self._fix_formula_terms(dataset)

# Correct order of bounds data
a_bnds = dataset.variables["a_bnds"]
a_bnds.data = a_bnds.data[:, ::-1]
b_bnds = dataset.variables["b_bnds"]
b_bnds.data = b_bnds.data[:, ::-1]

Cli = Cl
# Remove 'title' attribute that duplicates long name
for var_name in dataset.variables:
dataset.variables[var_name].attributes.pop("title", None)

return self.ncdata_to_iris(dataset, filepath)

Clw = Cl

Cli = Cl

Fgco2 = BaseFgco2
Clw = Cl

Fgco2 = BaseFgco2

Omon = BaseOmon


Siconc = SiconcFixScalarCoord


Tas = BaseTas
61 changes: 61 additions & 0 deletions esmvalcore/cmor/_fixes/fix.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Any, Optional
from warnings import catch_warnings, filterwarnings

import dask
import iris
import ncdata.iris
import ncdata.threadlock_sharing
import numpy as np
from cf_units import Unit
from iris.coords import Coord, CoordExtent
Expand All @@ -36,6 +40,9 @@
logger = logging.getLogger(__name__)
generic_fix_logger = logging.getLogger(f"{__name__}.genericfix")

# Enable lock sharing between Iris and ncdata
ncdata.threadlock_sharing.enable_lockshare(iris=True)


class Fix:
"""Base class for dataset fixes."""
Expand Down Expand Up @@ -157,6 +164,60 @@ def get_cube_from_list(
return cube
raise ValueError(f'Cube for variable "{short_name}" not found')

def ncdata_to_iris(
self,
dataset: ncdata.NcData,
filepath: Path,
) -> CubeList:
"""Convert an :obj:`~ncdata.NcData` object to an Iris cubelist.

This function mimics the behaviour of
:func:`esmvalcore.preprocessor.load`.

Parameters
----------
dataset:
The :obj:`~ncdata.NcData` object to convert.
filepath:
The path that the dataset was loaded from.

Returns
-------
iris.cube.CubeList
:obj:`iris.cube.CubeList` containing the requested cube.

"""
# Filter warnings
with catch_warnings():
# Ignore warnings about missing cell measures that are stored in
# a separate file for CMIP data.
filterwarnings(
message="Missing CF-netCDF measure variable .*",
category=UserWarning,
module="iris",
action="ignore",
)
cubes = ncdata.iris.to_iris(dataset)

cube = self.get_cube_from_list(cubes)

# Restore the lat/lon coordinate units that iris changes to degrees
for coord_name in ["latitude", "longitude"]:
try:
coord = cube.coord(coord_name)
except iris.exceptions.CoordinateNotFoundError:
pass
else:
if coord.var_name in dataset.variables:
nc_coord = dataset.variables[coord.var_name]
coord.units = nc_coord.attributes["units"].value

# Add the source file as an attribute to support grouping by file
# when calling fix_metadata.
cube.attributes["source_file"] = str(filepath)

return iris.cube.CubeList([cube])

def fix_data(self, cube: Cube) -> Cube:
"""Apply fixes to the data of the cube.

Expand Down
Loading