Merge branch 'main' into unstructured_linear_regrid

ESMValGroup · Apr 16, 2024 · 8999de5 · 8999de5
2 parents 4b8574a + 6cf32c7
commit 8999de5
Show file tree

Hide file tree

Showing 63 changed files with 1,598 additions and 648 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -2,7 +2,7 @@
 version: 2.1
 
 orbs:
-  coverage-reporter: codacy/coverage-reporter@13.13.7
+  coverage-reporter: codacy/coverage-reporter@13.16.5
   codecov: codecov/[email protected]
 
 commands:

diff --git a/.github/workflows/run-tests-monitor.yml b/.github/workflows/run-tests-monitor.yml
@@ -75,6 +75,7 @@ jobs:
       - run: python -V 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/python_version.txt
       - run: pip install pytest-monitor
       - run: pip install -e .[develop] 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/install.txt
+      - run: conda list
       - run: pytest -n 2 -m "not installation" --db ../.pymon 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/test_report.txt
       - run: python tests/parse_pymon.py
       - name: Upload artifacts

diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -58,6 +58,7 @@ jobs:
       - run: conda --version 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/conda_version.txt
       - run: python -V 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/python_version.txt
       - run: pip install -e .[develop] 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/install.txt
+      - run: conda list
       - run: flake8
       - run: pytest -n 2 -m "not installation" 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/test_report.txt
       - name: Upload artifacts
@@ -90,6 +91,7 @@ jobs:
       - run: conda --version 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/conda_version.txt
       - run: python -V 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/python_version.txt
       - run: pip install -e .[develop] 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/install.txt
+      - run: conda list
       - run: flake8
       - run: pytest -n 2 -m "not installation" 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/test_report.txt
       - name: Upload artifacts

diff --git a/.zenodo.json b/.zenodo.json
@@ -200,6 +200,10 @@
             "affiliation": "BSC, Spain",
             "name": "Martin-Martinez, Eneko",
             "orcid": "0000-0002-9213-7818"
+        },
+        {
+            "affiliation": "DLR, Germany",
+            "name": "Cammarano, Diego"
         }
     ],
     "description": "ESMValCore: A community tool for pre-processing data from Earth system models in CMIP and running analysis scripts.",

diff --git a/CITATION.cff b/CITATION.cff
@@ -204,6 +204,10 @@ authors:
     family-names: Martin-Martinez
     given-names: Eneko
     orcid: "https://orcid.org/0000-0002-9213-7818"
+  -
+    affiliation: "DLR, Germany"
+    family-names: Cammarano
+    given-names: Diego
 
 cff-version: 1.2.0
 date-released: 2023-12-19

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 [![Chat on Matrix](https://matrix.to/img/matrix-badge.svg)](https://matrix.to/#/#ESMValGroup_Lobby:gitter.im)
 [![CircleCI](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/main.svg?style=svg)](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/main)
 [![codecov](https://codecov.io/gh/ESMValGroup/ESMValCore/branch/main/graph/badge.svg?token=wQnDzguwq6)](https://codecov.io/gh/ESMValGroup/ESMValCore)
-[![Codacy Badge](https://app.codacy.com/project/badge/Grade/5d496dea9ef64ec68e448a6df5a65783)](https://www.codacy.com/gh/ESMValGroup/ESMValCore?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=ESMValGroup/ESMValCore&amp;utm_campaign=Badge_Grade)
+[![Codacy Badge](https://app.codacy.com/project/badge/Grade/5d496dea9ef64ec68e448a6df5a65783)](https://app.codacy.com/gh/ESMValGroup/ESMValCore/dashboard?utm_source=gh&utm_medium=referral&utm_content=&utm_campaign=Badge_grade)
 [![Docker Build Status](https://img.shields.io/docker/cloud/build/esmvalgroup/esmvalcore)](https://hub.docker.com/r/esmvalgroup/esmvalcore/)
 [![Anaconda-Server Badge](https://img.shields.io/conda/vn/conda-forge/ESMValCore?color=blue&label=conda-forge&logo=conda-forge&logoColor=white)](https://anaconda.org/conda-forge/esmvalcore)
 [![Github Actions Test](https://github.com/ESMValGroup/ESMValCore/actions/workflows/run-tests.yml/badge.svg)](https://github.com/ESMValGroup/ESMValCore/actions/workflows/run-tests.yml)

diff --git a/conda-linux-64.lock b/conda-linux-64.lock
diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst
@@ -388,7 +388,9 @@ This grid file can either be specified as absolute or relative (to
 with the facet ``horizontal_grid`` in the recipe or the extra facets (see
 below), or retrieved automatically from the `grid_file_uri` attribute of the
 input files.
-In the latter case, the file is downloaded once and then cached.
+In the latter case, ESMValCore first searches the input directories specified
+for ICON for a grid file with that name, and if that was not successful, tries
+to download the file and cache it.
 The cached file is valid for 7 days.
 
 ESMValCore can automatically make native ICON data `UGRID
@@ -467,7 +469,7 @@ Key                 Description                      Default value if not specif
 =================== ================================ ===================================
 ``horizontal_grid`` Absolute or relative (to         If not given, use file attribute
                     ``auxiliary_data_dir`` defined   ``grid_file_uri`` to retrieve ICON
-                    in the                           grid file
+                    in the                           grid file (see details above)
                     :ref:`user configuration file`)
                     path to the ICON grid file
 ``latitude``        Standard name of the latitude    ``latitude``

diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
@@ -177,12 +177,12 @@ Calculate the global non-weighted root mean square:
     global_mean:
       area_statistics:
         operator: rms
-        weighted: false
+        weights: false
 
 .. warning::
 
   The disabling of weights by specifying the keyword argument ``weights:
-  False`` needs to be used with great care; from a scientific standpoint, we
+  false`` needs to be used with great care; from a scientific standpoint, we
   strongly recommend to **not** use it!
 
 
@@ -307,7 +307,7 @@ Preprocessor                                                   Variable short na
 :ref:`area_statistics<area_statistics>`                        ``areacella``, ``areacello``   cell_area
 :ref:`mask_landsea<land/sea/ice masking>`                      ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
 :ref:`mask_landseaice<ice masking>`                            ``sftgif``                     land_ice_area_fraction
-:ref:`volume_statistics<volume_statistics>`                    ``volcello``                   ocean_volume
+:ref:`volume_statistics<volume_statistics>`                    ``volcello``, ``areacello``    ocean_volume, cell_area
 :ref:`weighting_landsea_fraction<land/sea fraction weighting>` ``sftlf``, ``sftof``           land_area_fraction, sea_area_fraction
 ============================================================== ============================== =====================================
 
@@ -2135,12 +2135,16 @@ but maintains the time dimension.
 By default, the `mean` operation is weighted by the grid cell volumes.
 
 For weighted statistics, this function requires a cell volume `cell measure`_,
-unless the coordinates of the input data are regular 1D latitude and longitude
-coordinates so the cell volumes can be computed internally.
-The required supplementary variable ``volcello`` can be attached to the main
-dataset as described in :ref:`supplementary_variables`.
+unless it has a cell_area `cell measure`_ or the coordinates of the input data
+are regular 1D latitude and longitude coordinates so the cell volumes can be
+computed internally.
+The required supplementary variable ``volcello``, or ``areacello`` in its
+absence, can be attached to the main dataset as described in
+:ref:`supplementary_variables`.
 
-No depth coordinate is required as this is determined by Iris.
+No depth coordinate is required as this is determined by Iris. However, to
+compute the volume automatically when ``volcello`` is not provided, the depth
+coordinate units should be convertible to meters.
 
 Parameters:
     * `operator`: Operation to apply.

diff --git a/environment.yml b/environment.yml
@@ -30,7 +30,7 @@ dependencies:
   - netcdf4
   - numpy !=1.24.3
   - packaging
-  - pandas !=2.2.0,!=2.2.1  # github.com/ESMValGroup/ESMValCore/pull/2305 and #2349
+  - pandas !=2.2.0,!=2.2.1,!=2.2.2  # github.com/ESMValGroup/ESMValCore/pull/2305 and #2349
   - pillow
   - pip !=21.3
   - prov

diff --git a/esmvalcore/_recipe/recipe.py b/esmvalcore/_recipe/recipe.py
@@ -50,7 +50,7 @@
 from .to_datasets import (
     _derive_needed,
     _get_input_datasets,
-    _representative_dataset,
+    _representative_datasets,
 )
 
 logger = logging.getLogger(__name__)
@@ -116,7 +116,7 @@ def _update_target_levels(dataset, datasets, settings):
             del settings['extract_levels']
         else:
             target_ds = _select_dataset(dataset_name, datasets)
-            representative_ds = _representative_dataset(target_ds)
+            representative_ds = _representative_datasets(target_ds)[0]
             check.data_availability(representative_ds)
             settings['extract_levels']['levels'] = get_reference_levels(
                 representative_ds)
@@ -133,8 +133,8 @@ def _update_target_grid(dataset, datasets, settings):
     if dataset.facets['dataset'] == grid:
         del settings['regrid']
     elif any(grid == d.facets['dataset'] for d in datasets):
-        representative_ds = _representative_dataset(
-            _select_dataset(grid, datasets))
+        representative_ds = _representative_datasets(
+            _select_dataset(grid, datasets))[0]
         check.data_availability(representative_ds)
         settings['regrid']['target_grid'] = representative_ds
     else:

diff --git a/esmvalcore/_recipe/to_datasets.py b/esmvalcore/_recipe/to_datasets.py
@@ -404,60 +404,105 @@ def datasets_from_recipe(
 def _dataset_from_files(dataset: Dataset) -> list[Dataset]:
     """Replace facet values of '*' based on available files."""
     result: list[Dataset] = []
-    errors = []
+    errors: list[str] = []
 
     if any(_isglob(f) for f in dataset.facets.values()):
         logger.debug(
             "Expanding dataset globs for dataset %s, "
             "this may take a while..", dataset.summary(shorten=True))
 
-    repr_dataset = _representative_dataset(dataset)
-    for repr_ds in repr_dataset.from_files():
-        updated_facets = {}
-        failed = {}
-        for key, value in dataset.facets.items():
-            if _isglob(value):
-                if key in repr_ds.facets and not _isglob(repr_ds[key]):
-                    updated_facets[key] = repr_ds.facets[key]
-                else:
-                    failed[key] = value
-
-        if failed:
-            msg = ("Unable to replace " +
-                   ", ".join(f"{k}={v}" for k, v in failed.items()) +
-                   f" by a value for\n{dataset}")
-            # Set supplementaries to [] to avoid searching for supplementary
-            # files.
-            repr_ds.supplementaries = []
-            if repr_ds.files:
-                paths_msg = "paths to " if any(
-                    isinstance(f, LocalFile) for f in repr_ds.files) else ""
-                msg = (f"{msg}\nDo the {paths_msg}the files:\n" +
-                       "\n".join(f"{f} with facets: {f.facets}"
-                                 for f in repr_ds.files) +
-                       "\nprovide the missing facet values?")
-            else:
-                timerange = repr_ds.facets.get('timerange')
-                patterns = repr_ds._file_globs
-                msg = (
-                    f"{msg}\nNo files found matching:\n" +
-                    "\n".join(str(p) for p in patterns) +  # type:ignore
-                    (f"\nwithin the requested timerange {timerange}."
-                     if timerange else ""))
-            errors.append(msg)
-            continue
-
-        new_ds = dataset.copy()
-        new_ds.facets.update(updated_facets)
-        new_ds.supplementaries = repr_ds.supplementaries
-        result.append(new_ds)
+    representative_datasets = _representative_datasets(dataset)
+
+    # For derived variables, representative_datasets might contain more than
+    # one element
+    all_datasets: list[list[tuple[dict, Dataset]]] = []
+    for representative_dataset in representative_datasets:
+        all_datasets.append([])
+        for expanded_ds in representative_dataset.from_files():
+            updated_facets = {}
+            unexpanded_globs = {}
+            for key, value in dataset.facets.items():
+                if _isglob(value):
+                    if (key in expanded_ds.facets and
+                            not _isglob(expanded_ds[key])):
+                        updated_facets[key] = expanded_ds.facets[key]
+                    else:
+                        unexpanded_globs[key] = value
+
+            if unexpanded_globs:
+                msg = _report_unexpanded_globs(
+                    dataset, expanded_ds, unexpanded_globs
+                )
+                errors.append(msg)
+                continue
+
+            new_ds = dataset.copy()
+            new_ds.facets.update(updated_facets)
+            new_ds.supplementaries = expanded_ds.supplementaries
+
+            all_datasets[-1].append((updated_facets, new_ds))
+
+    # If globs have been expanded, only consider those datasets that contain
+    # all necessary input variables if derivation is necessary
+    for (updated_facets, new_ds) in all_datasets[0]:
+        other_facets = [[d[0] for d in ds] for ds in all_datasets[1:]]
+        if all(updated_facets in facets for facets in other_facets):
+            result.append(new_ds)
+        else:
+            logger.debug(
+                "Not all necessary input variables to derive '%s' are "
+                "available for dataset %s",
+                dataset['short_name'],
+                updated_facets,
+            )
 
     if errors:
         raise RecipeError("\n".join(errors))
 
     return result
 
 
+def _report_unexpanded_globs(
+    unexpanded_ds: Dataset,
+    expanded_ds: Dataset,
+    unexpanded_globs: dict,
+) -> str:
+    """Get error message for unexpanded globs."""
+    msg = (
+        "Unable to replace " +
+        ", ".join(f"{k}={v}" for k, v in unexpanded_globs.items()) +
+        f" by a value for\n{unexpanded_ds}"
+    )
+
+    # Set supplementaries to [] to avoid searching for supplementary files
+    expanded_ds.supplementaries = []
+
+    if expanded_ds.files:
+        if any(isinstance(f, LocalFile) for f in expanded_ds.files):
+            paths_msg = "paths to the "
+        else:
+            paths_msg = ""
+        msg = (
+            f"{msg}\nDo the {paths_msg}files:\n" +
+            "\n".join(
+                f"{f} with facets: {f.facets}" for f in expanded_ds.files
+            ) +
+            "\nprovide the missing facet values?"
+        )
+    else:
+        timerange = expanded_ds.facets.get('timerange')
+        patterns = expanded_ds._file_globs
+        msg = (
+            f"{msg}\nNo files found matching:\n" +
+            "\n".join(str(p) for p in patterns) + (  # type:ignore
+                f"\nwithin the requested timerange {timerange}."
+                if timerange else ""
+            )
+        )
+
+    return msg
+
+
 def _derive_needed(dataset: Dataset) -> bool:
     """Check if dataset needs to be derived from other datasets."""
     if not dataset.facets.get('derive'):
@@ -512,11 +557,11 @@ def _get_input_datasets(dataset: Dataset) -> list[Dataset]:
     return datasets
 
 
-def _representative_dataset(dataset: Dataset) -> Dataset:
-    """Find a representative dataset that has files available."""
+def _representative_datasets(dataset: Dataset) -> list[Dataset]:
+    """Find representative datasets for all input variables."""
     copy = dataset.copy()
     copy.supplementaries = []
-    datasets = _get_input_datasets(copy)
-    representative_dataset = datasets[0]
-    representative_dataset.supplementaries = dataset.supplementaries
-    return representative_dataset
+    representative_datasets = _get_input_datasets(copy)
+    for representative_dataset in representative_datasets:
+        representative_dataset.supplementaries = dataset.supplementaries
+    return representative_datasets
diff --git a/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py b/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py
@@ -76,10 +76,6 @@ class Tasmax(Tas):
     """Fixes for tasmax."""
 
 
-class SfcWind(Tas):
-    """Fixes for sfcWind."""
-
-
 class Hurs(Tas):
     """Fixes for hurs."""
 
@@ -110,6 +106,10 @@ class Vas(Uas):
     """Fixes for vas."""
 
 
+class SfcWind(Uas):
+    """Fixes for sfcWind."""
+
+
 Omon = BaseOmon
 
 

diff --git a/esmvalcore/cmor/_fixes/cordex/cnrm_cerfacs_cnrm_cm5/wrf381p.py b/esmvalcore/cmor/_fixes/cordex/cnrm_cerfacs_cnrm_cm5/wrf381p.py
@@ -0,0 +1,36 @@
+"""Fixes for rcm WRF381P driven by CNRM-CERFACS-CNRM-CM5."""
+from esmvalcore.cmor._fixes.shared import add_scalar_height_coord
+from esmvalcore.cmor.fix import Fix
+
+
+class Tas(Fix):
+    """Fixes for tas."""
+
+    def fix_metadata(self, cubes):
+        """Add height (2m) coordinate and correct long_name for time.
+
+        Parameters
+        ----------
+        cubes : iris.cube.CubeList
+            Input cubes.
+
+        Returns
+        -------
+        iris.cube.CubeList
+        """
+        cube = self.get_cube_from_list(cubes)
+        add_scalar_height_coord(cube)
+
+        return cubes
+
+
+Tasmin = Tas
+
+
+Tasmax = Tas
+
+
+Hurs = Tas
+
+
+Huss = Tas