Merge pull request #232 from cerfacs-globc/enh/convert_mm_to_mmday_wh…

…en_possible Enh/convert mm to mmday when possible
cerfacs-globc · Oct 20, 2022 · 471f1b3 · 471f1b3
2 parents 19f50b0 + 1159ddd
commit 471f1b3
Show file tree

Hide file tree

Showing 13 changed files with 60 additions and 118 deletions.
diff --git a/doc/source/_static/logo_icclim_colored__displayed.svg b/doc/source/_static/logo_icclim_colored__displayed.svg
diff --git a/doc/source/_static/logo_icclim_grey__displayed.svg b/doc/source/_static/logo_icclim_grey__displayed.svg
diff --git a/doc/source/_static/logo_icclim_white__displayed.svg b/doc/source/_static/logo_icclim_white__displayed.svg
diff --git a/doc/source/references/ecad_functions_api.rst b/doc/source/references/ecad_functions_api.rst
@@ -78,7 +78,6 @@ Generated API
         cw
         wd
         ww
-        ddnorth
         custom_index
 
         .. Generated API comment:End
diff --git a/doc/source/references/release_notes.rst b/doc/source/references/release_notes.rst
@@ -41,6 +41,8 @@ Release history
   Instances of BoundedThreshold are created with the `build_threshold` factory function, E.G. `build_threshold(">= -20 degree AND <= 20 degree ")`
 * [enh] Make it possible to compute multiple percentiles at once.
 * [maint] Update coverage computation. Now tests files are ignored when calculating the code coverage, thus it dropped a little (by 3%).
+* [enh] Convert input data that are recognized as a precipitation amount into precipitation rate.
+  This is necessary to handle e-obs precipitation datasets.
 
 5.4.0
 -----

diff --git a/icclim/_generated_api.py b/icclim/_generated_api.py
@@ -71,7 +71,6 @@
     "cw",
     "wd",
     "ww",
-    "ddnorth",
     "custom_index",
 ]
 
@@ -4828,88 +4827,6 @@ def ww(
     )
 
 
-def ddnorth(
-    in_files: InFileLike,
-    var_name: str | Sequence[str] | None = None,
-    slice_mode: FrequencyLike | Frequency = "year",
-    time_range: Sequence[datetime | str] | None = None,
-    out_file: str | None = None,
-    ignore_Feb29th: bool = False,
-    netcdf_version: str | NetcdfVersion = "NETCDF4",
-    logs_verbosity: Verbosity | str = "LOW",
-    date_event: bool = False,
-) -> Dataset:
-    """
-    DDnorth: Days with northerly winds (-45 degree < DD ≤ 45 degree)
-
-    Source: ECA&D, Algorithm Theoretical Basis Document (ATBD) v11.
-
-    Parameters
-    ----------
-
-    in_files: str | list[str] | Dataset | DataArray | InputDictionary
-        Absolute path(s) to NetCDF dataset(s), including OPeNDAP URLs,
-        or path to zarr store, or xarray.Dataset or xarray.DataArray.
-    var_name: str | list[str] | None
-        ``optional`` Target variable name to process corresponding to ``in_files``.
-        If None (default) on ECA&D index, the variable is guessed based on the climate
-        index wanted.
-        Mandatory for a user index.
-    slice_mode: SliceMode
-        Type of temporal aggregation:
-        The possibles values are ``{"year", "month", "DJF", "MAM", "JJA", "SON",
-        "ONDJFM" or "AMJJAS", ("season", [1,2,3]), ("month", [1,2,3,])}``
-        (where season and month lists can be customized) or any valid pandas frequency.
-        A season can also be defined between two exact dates:
-        ``("season", ("19 july", "14 august"))``.
-        Default is "year".
-        See :ref:`slice_mode` for details.
-    time_range: list[datetime ] | list[str]  | tuple[str, str] | None
-        ``optional`` Temporal range: upper and lower bounds for temporal subsetting.
-        If ``None``, whole period of input files will be processed.
-        The dates can either be given as instance of datetime.datetime or as string
-        values. For strings, many format are accepted.
-        Default is ``None``.
-    out_file: str | None
-        Output NetCDF file name (default: "icclim_out.nc" in the current directory).
-        Default is "icclim_out.nc".
-        If the input ``in_files`` is a ``Dataset``, ``out_file`` field is ignored.
-        Use the function returned value instead to retrieve the computed value.
-        If ``out_file`` already exists, icclim will overwrite it!
-    ignore_Feb29th: bool
-        ``optional`` Ignoring or not February 29th (default: False).
-    netcdf_version: str | NetcdfVersion
-        ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC").
-    logs_verbosity: str | Verbosity
-        ``optional`` Configure how verbose icclim is.
-        Possible values: ``{"LOW", "HIGH", "SILENT"}`` (default: "LOW")
-    date_event: bool
-        When True the date of the event (such as when a maximum is reached) will be
-        stored in coordinates variables.
-        **warning** This option may significantly slow down computation.
-    Notes
-    -----
-    This function has been auto-generated.
-
-    """
-    return icclim.index(
-        index_name="DDNORTH",
-        in_files=in_files,
-        var_name=var_name,
-        slice_mode=slice_mode,
-        time_range=time_range,
-        out_file=out_file,
-        ignore_Feb29th=ignore_Feb29th,
-        netcdf_version=netcdf_version,
-        logs_verbosity=logs_verbosity,
-        date_event=date_event,
-        threshold=build_threshold(
-            query="> -45 degree AND <= 45 degree",
-        ),
-        out_unit="day",
-    )
-
-
 def custom_index(
     user_index: UserIndexDict,
     in_files: InFileLike,

diff --git a/icclim/ecad/ecad_indices.py b/icclim/ecad/ecad_indices.py
@@ -645,15 +645,3 @@ def list(cls: EcadIndexRegistry) -> list[str]:
         qualifiers=[QUANTILE_BASED],
         doy_window_width=5,
     )
-    # WIND
-    DDNORTH = StandardIndex(
-        reference=ECAD_REFERENCE,
-        generic_indicator=GenericIndicatorRegistry.CountOccurrences,
-        threshold="> -45 degree AND <= 45 degree",
-        output_unit="day",
-        definition="Days with northerly winds (-45 degree < DD ≤ 45 degree)",
-        source=ECAD_ATBD,
-        short_name="DDnorth",
-        group=IndexGroupRegistry.WIND,
-        input_variables=[StandardVariableRegistry.WIND_TO_DIRECTION],
-    )
diff --git a/icclim/icclim_types.py b/icclim/icclim_types.py
@@ -4,7 +4,7 @@
 
 from xarray import DataArray, Dataset
 
-InFileBaseType = Union[str, List[str], Dataset, DataArray]
+InFileBaseType = Union[str, Sequence[str], Dataset, DataArray]
 ThresholdedDict = Dict[str, Union[Dict]]  # Dict === InFileDictionary
 InFileLike = Union[ThresholdedDict, InFileBaseType, Dict[str, InFileBaseType]]
 

diff --git a/icclim/models/constants.py b/icclim/models/constants.py
@@ -3,7 +3,7 @@
 # fmt: off
 # flake8: noqa
 
-ICCLIM_VERSION = "5.4.0"
+ICCLIM_VERSION = "6.0.0"
 
 # placeholders for user_index
 PERCENTILE_THRESHOLD_STAMP = "p"

diff --git a/icclim/pre_processing/input_parsing.py b/icclim/pre_processing/input_parsing.py
@@ -21,7 +21,7 @@
 from icclim.models.cf_calendar import CfCalendarRegistry
 from icclim.models.constants import UNITS_KEY, VALID_PERCENTILE_DIMENSION
 from icclim.models.standard_index import StandardIndex
-from icclim.utils import get_date_to_iso_format
+from icclim.utils import get_date_to_iso_format, is_precipitation_amount
 
 DEFAULT_INPUT_FREQUENCY = "days"
 
@@ -215,17 +215,18 @@ def build_studied_data(
         check_time_range_post_validity(da, original_da, "time_range", time_range)
         if len(da.time) == 0:
             raise InvalidIcclimArgumentError(
-                f"The given `time_range` {time_range} "
-                f"is out of the dataset time period: "
-                f"{original_da.time.min().dt.floor('D').values} "
-                f"- {original_da.time.max().dt.floor('D').values}."
+                f"The given `time_range` {time_range} is out of the dataset time"
+                f" period: {original_da.time.min().dt.floor('D').values}"
+                f" - {original_da.time.max().dt.floor('D').values}."
             )
     else:
         da = original_da
     if ignore_Feb29th:
         da = xclim.core.calendar.convert_calendar(da, CfCalendarRegistry.NO_LEAP.name)
     if da.attrs.get(UNITS_KEY, None) is None and standard_var is not None:
         da.attrs[UNITS_KEY] = standard_var.default_units
+    if is_precipitation_amount(da):
+        da = xclim.core.units.amount2rate(da)
     da = da.chunk("auto")
     return da
 
@@ -258,7 +259,7 @@ def _is_alias_valid(ds, alias) -> bool:
 def _get_actual_name(ds, alias) -> str:
     for ds_var in ds.data_vars:
         if str(ds_var).upper() == alias.upper():
-            return ds_var
+            return str(ds_var)
     raise KeyError(f"Could not find {alias} in dataset.")
 
 

diff --git a/icclim/tests/test_main.py b/icclim/tests/test_main.py
@@ -7,6 +7,7 @@
 import cftime
 import numpy as np
 import pandas as pd
+import pint
 import pytest
 import xarray as xr
 
@@ -42,7 +43,7 @@ def test_deprecated_indice(log_mock: MagicMock, index_mock: MagicMock):
 @pytest.mark.slow
 class Test_Integration:
     """
-    Simple integration test.
+    Integration tests.
     We are not testing here the actual indices results, they are already tested in
     `test_ecad_indices.py` as well as in xclim directly.
     The goal it to make sure every the whole app can run smoothly
@@ -830,21 +831,36 @@ def test_rr_with_slice_mode__4_weeks(self):
         np.testing.assert_almost_equal(rr.RR.isel(time=0), 5.3)
         np.testing.assert_almost_equal(rr.RR.isel(time=1), 0)
 
-    def test_ddnorth(self):
+    def test_mm_to_mmday(self):
         # GIVEN
         time_range = xr.DataArray(
             pd.date_range("2000", periods=365, freq="D"), dims=["time"]
         )
-        dd = xr.DataArray(
-            np.zeros(365),
+        precip = xr.DataArray(
+            np.ones(365),
             coords={"time": time_range, "lat": 1, "lon": 1},
             dims="time",
-            attrs={"units": "degree"},
+            attrs={"units": "mm", "standard_name": "thickness_of_rainfall_amount"},
         )
-        dd.loc[{"time": slice("2000-01-01", "2000-01-05")}] = 50
-        dd.loc[{"time": slice("2000-03-01", "2000-03-02")}] = -50
+        precip.loc[{"time": slice("2000-01-01", "2000-01-05")}] = 50
         # WHEN
-        ddnorth = icclim.ddnorth(in_files=dd, slice_mode="month")
+        r10mm = icclim.r10mm(in_files=precip, slice_mode="month")
+        # THEN
+        assert r10mm.isel(time=0) == 5
+
+    def test_mm_to_mmday__error_bas_standard_name(self):
+        # GIVEN
+        time_range = xr.DataArray(
+            pd.date_range("2000", periods=365, freq="D"), dims=["time"]
+        )
+        precip = xr.DataArray(
+            np.ones(365),
+            coords={"time": time_range, "lat": 1, "lon": 1},
+            dims="time",
+            attrs={"units": "mm", "standard_name": "HeHoCacao"},
+        )
+        precip.loc[{"time": slice("2000-01-01", "2000-01-05")}] = 50
         # THEN
-        assert ddnorth.isel(time=0) == 26
-        assert ddnorth.isel(time=3) == 29
+        with pytest.raises(pint.DimensionalityError):
+            # WHEN
+            icclim.r10mm(in_files=precip)
diff --git a/icclim/utils.py b/icclim/utils.py
@@ -3,9 +3,14 @@
 from datetime import datetime
 
 import dateparser
+import pint
+import xarray as xr
+import xclim
 
 from icclim.icclim_exceptions import InvalidIcclimArgumentError
 
+PR_AMOUNT_STANDARD_NAME = "thickness_of_rainfall_amount"
+
 
 def read_date(in_date: str | datetime) -> datetime:
     if isinstance(in_date, datetime):
@@ -30,3 +35,17 @@ def is_number_sequence(values) -> bool:
     return isinstance(values, (tuple, list)) and all(
         map(lambda x: isinstance(x, (float, int)), values)
     )
+
+
+def _is_rate(u: pint.Unit) -> bool:
+    return u.dimensionality.get("[time]") == -1
+
+
+def _is_amount(u: pint.Unit) -> bool:
+    return not _is_rate(u)
+
+
+def is_precipitation_amount(source: xr.DataArray) -> bool:
+    standard_name = source.attrs.get("standard_name", None)
+    source_unit = xclim.core.units.units2pint(source)
+    return standard_name == PR_AMOUNT_STANDARD_NAME and _is_amount(source_unit)
diff --git a/setup.py b/setup.py
@@ -20,7 +20,7 @@
 
 setup(
     name="icclim",
-    version="5.4.0",
+    version="6.0.0",
     packages=find_packages(),
     author="Christian P.",
     author_email="[email protected]",