diff --git a/.github/CONTRIBUTING.rst b/.github/CONTRIBUTING.rst index b2cbb834..0821b660 100644 --- a/.github/CONTRIBUTING.rst +++ b/.github/CONTRIBUTING.rst @@ -13,7 +13,7 @@ Contents: Getting started ---------------- +=============== Let's first see how to get icclim up and running. icclim sources are stored in `github `_. @@ -51,7 +51,7 @@ You can now: See `numpy's guide `_ for additional keywords. Documentation contribution --------------------------- +========================== One of the most important aspect of any open source project is its documentation. It's both the entry point for most new users and the gallery of the project. @@ -83,7 +83,7 @@ By default it will serve the documentation on ``localhost://8000``. Add new standard indices ------------------------- +======================== Existing index in xclim ~~~~~~~~~~~~~~~~~~~~~~~ @@ -142,7 +142,7 @@ This process is described in section `Existing index in xclim`_ above. Add new operators for user indices ----------------------------------- +================================== icclim provides a convenient way to quickly write simple index, we call this features "user indices". A few operators are already available but if you think icclim could benefit from adding new ones, your contribution is most welcomed. @@ -168,7 +168,7 @@ For the unit tests, you can add them in ``tests/unit_tests/test_user_indices.py` For the documentation, you should add an explanation of the operator behavior in section ``user_index`` of ``doc/references/icclim_index_api.rst``. Improve icclim API ------------------- +================== icclim features pre-processing and post-processing steps over the index computation: This includes: @@ -194,7 +194,7 @@ For post-processing steps: Other contributions -------------------- +=================== If you would like to see any other change in icclim not listed here, you can always open an issue on `icclim's github `_ and we will work with you on how to implement it. diff --git a/README.rst b/README.rst index a8ced923..db77ee48 100644 --- a/README.rst +++ b/README.rst @@ -63,7 +63,7 @@ For a detailed description of each ECA&D index, please visit: https://www.ecad.e .. Pytest Coverage Comment:Begin -.. |coverage| image:: https://img.shields.io/badge/Coverage-90%25-brightgreen.svg +.. |coverage| image:: https://img.shields.io/badge/Coverage-92%25-brightgreen.svg :target: https://github.com/cerfacs-globc/icclim/blob/master/README.rst#code-coverage :alt: Code coverage diff --git a/doc/source/dev/ci.rst b/doc/source/dev/ci.rst index 47bfc1c9..a7580b5f 100644 --- a/doc/source/dev/ci.rst +++ b/doc/source/dev/ci.rst @@ -1,16 +1,15 @@ Continuous integration ====================== -icclim continuous integration aims to assist development by: -- Avoiding introducing bugs in the code base . -- Ensuring all new code follow the same code style. -- Measuring how much icclim code base is tested by automated unit tests. This is known as code coverage. -- Making sure the documentation generation is functioning well. +icclim continuous integration (CI) aims to assist development by: + - Avoiding introducing bugs in the code base. + - Ensuring all new code follow the same code style. + - Measuring how much icclim code base is tested by automated unit tests. This is known as code coverage. + - Making sure the documentation generation is functioning well. These goals are reached using multiple tools: -- pre-commit CI enforce the code style (Black + flake8 + isort) is followed by -committing changes directly on new pull request and blocking merge if necessary. -The relevant file is `.pre-commit-config.yaml`. -- readthedocs, which serve our documentation is also configured to run the documentation generation on -each new pull request. -- github actions are used to run unit tests and report the results in each pull request. + - pre-commit CI enforce the code style (Black + flake8 + isort) is followed by + committing changes directly on new pull request and blocking merge if necessary. + The relevant file is `.pre-commit-config.yaml`. + - readthedocs, which serve our documentation is also configured to run the documentation generation on each new pull request. + - github actions are used to run unit tests and report the results in each pull request. diff --git a/doc/source/dev/release_process.rst b/doc/source/dev/release_process.rst index 5db086e4..05da9fb6 100644 --- a/doc/source/dev/release_process.rst +++ b/doc/source/dev/release_process.rst @@ -31,11 +31,11 @@ Release process python3 -m pip install --index-url https://test.pypi.org/simple/ icclim -.. note:: + .. note:: - It may fail due to missing dependencies in test.pypi. - In that case, create the environment from icclim environment.yml file to - pull all needed dependencies from conda. + It may fail due to missing dependencies in test.pypi. + In that case, create the environment from icclim environment.yml file to + pull all needed dependencies from conda. #. Upload to pypi for real. diff --git a/doc/source/references/release_notes.rst b/doc/source/references/release_notes.rst index bbbd5b2c..c17a1043 100644 --- a/doc/source/references/release_notes.rst +++ b/doc/source/references/release_notes.rst @@ -1,8 +1,17 @@ Release history =============== -5.2.1-dev ---------- +5.2.1 +----- +[maint] Made Frequency part of SliceMode union. +[fix] slice_mode seasonal samplings was giving wrong results for quite a few indices. This has been fixed and the performances should also be improved by the fix. +However, now seasonal slice_mode does not allow to use xclim missing values mechanisms. +[fix] user_index ExtremeMode config was not properly parsed when a string was used. +[fix] user_index Anomaly operator was not properly using the `ref_time_range` to setup a reference period as it should. +[fix] user_index Sum and Mean operators were broken due to a previous refactoring and a lack of unit tests, it is now fixed and tested. +[maint] Changed how `rechunker` dependency is pinned to add flexibility. We want a version above '0.3' but not the '0.4'. +[maint] For the newly generate API, on `custom_index` function, the parameter `user_index` is now mandatory. + 5.2.0 ----- diff --git a/environment.yml b/environment.yml index c1ec3a10..e10d070a 100644 --- a/environment.yml +++ b/environment.yml @@ -13,7 +13,7 @@ dependencies: - netCDF4>=1.5.7 - cftime>=1.5.0 - pyyaml>=6.0 - - rechunker>=0.5 + - rechunker>=0.3.3 - psutil - zarr - fsspec diff --git a/icclim/__init__.py b/icclim/__init__.py index ce09ed2d..0feea378 100644 --- a/icclim/__init__.py +++ b/icclim/__init__.py @@ -3,4 +3,4 @@ from .main import index, indice, indices # noqa from .pre_processing.rechunk import create_optimized_zarr_store # noqa -__version__ = "5.2.0" +__version__ = "5.2.1" diff --git a/icclim/_generated_api.py b/icclim/_generated_api.py index 3c57bbdb..fbfaca1c 100644 --- a/icclim/_generated_api.py +++ b/icclim/_generated_api.py @@ -4012,6 +4012,7 @@ def ww( def custom_index( + user_index: UserIndexDict, in_files: str | list[str] | Dataset | DataArray, var_name: str | list[str] | None = None, slice_mode: SliceMode = Frequency.YEAR, @@ -4022,7 +4023,6 @@ def custom_index( ignore_Feb29th: bool = False, out_unit: str | None = None, netcdf_version: str | NetcdfVersion = NetcdfVersion.NETCDF4, - user_index: UserIndexDict = None, save_percentile: bool = False, logs_verbosity: Verbosity | str = Verbosity.LOW, ) -> Dataset: @@ -4074,10 +4074,6 @@ def custom_index( ``optional`` Output unit for certain indices: "days" or "%" (default: "days"). netcdf_version : str | icclim.models.netcdf_version.NetcdfVersion ``optional`` NetCDF version to create (default: "NETCDF3_CLASSIC"). - user_index : UserIndexDict - ``optional`` A dictionary with parameters for user defined index. - See :ref:`Custom indices`. - Ignored for ECA&D indices. save_percentile : bool ``optional`` True if the percentiles should be saved within the resulting netcdf file (default: False). @@ -4090,6 +4086,7 @@ def custom_index( This function has been auto-generated. """ return icclim.index( + user_index=user_index, in_files=in_files, var_name=var_name, slice_mode=slice_mode, @@ -4100,7 +4097,6 @@ def custom_index( ignore_Feb29th=ignore_Feb29th, out_unit=out_unit, netcdf_version=netcdf_version, - user_index=user_index, save_percentile=save_percentile, logs_verbosity=logs_verbosity, ) diff --git a/icclim/main.py b/icclim/main.py index 0d10938e..a2ade48c 100644 --- a/icclim/main.py +++ b/icclim/main.py @@ -28,7 +28,7 @@ from icclim.models.user_index_config import UserIndexConfig from icclim.models.user_index_dict import UserIndexDict from icclim.pre_processing.input_parsing import read_dataset, update_to_standard_coords -from icclim.user_indices.dispatcher import compute_user_index +from icclim.user_indices.calc_operation import CalcOperation, compute_user_index log: IcclimLogger = IcclimLogger.get_instance(Verbosity.LOW) @@ -221,7 +221,7 @@ def index( DEPRECATED, use user_index instead. """ - _setup(callback, callback_percentage_start_value, logs_verbosity) + _setup(callback, callback_percentage_start_value, logs_verbosity, slice_mode) index_name, user_index = _handle_deprecated_params( index_name, indice_name, transfer_limit_Mbytes, user_index, user_indice ) @@ -312,14 +312,19 @@ def _handle_deprecated_params( return index_name, user_index -def _setup(callback, callback_percentage_start_value, logs_verbosity): +def _setup(callback, callback_start_value, logs_verbosity, slice_mode): # make xclim input daily check a warning instead of an error + # TODO: it might be safer to feed a context manager which will setup + # and teardown these confs xclim.set_options(data_validation="warn") + if Frequency.is_seasonal(slice_mode): + # for now seasonal slice_modes missing values cannot be checked + xclim.set_options(check_missing="skip") # keep attributes through xarray operations xr.set_options(keep_attrs=True) log.set_verbosity(logs_verbosity) log.start_message() - callback(callback_percentage_start_value) + callback(callback_start_value) def _compute_ecad_index_dataset( @@ -359,12 +364,13 @@ def _compute_user_index_dataset( ) user_indice_da = compute_user_index(user_indice_config) user_indice_da.attrs["units"] = _get_unit(config.out_unit, user_indice_da) - if config.freq.post_processing is not None: - user_indice_da, time_bounds = config.freq.post_processing(user_indice_da) - result_ds[user_indice_config.index_name] = user_indice_da - result_ds["time_bounds"] = time_bounds - else: + if user_indice_config.calc_operation is CalcOperation.ANOMALY: + # with anomaly time axis disappear result_ds[user_indice_config.index_name] = user_indice_da + return result_ds + user_indice_da, time_bounds = config.freq.post_processing(user_indice_da) + result_ds[user_indice_config.index_name] = user_indice_da + result_ds["time_bounds"] = time_bounds return result_ds diff --git a/icclim/models/constants.py b/icclim/models/constants.py index 7c80903c..a8808eca 100644 --- a/icclim/models/constants.py +++ b/icclim/models/constants.py @@ -26,3 +26,14 @@ MODIFIABLE_UNIT = "MODIFIABLE_UNIT" # fields: out_unit MODIFIABLE_THRESHOLD = "MODIFIABLE_THRESHOLD" # fields: threshold MODIFIABLE_QUANTILE_WINDOW = "MODIFIABLE_QUANTILE_WINDOW" # fields: window_width + +# Map of months index to their short name, used to get a pandas frequency anchor +MONTHS_MAP = {1:"JAN", 2:"FEB", 3:"MAR", 4:"APR", 5:"MAY", 6:"JUN", 7:"JUL", 8:"AUG", 9:"SEP", 10:"OCT", 11:"NOV", 12:"DEC" } + +# Season defined by their month numbers +AMJJAS_MONTHS = [*range(4, 9)] +ONDJFM_MONTHS = [10, 11, 12, 1, 2, 3] +DJF_MONTHS = [12, 1, 2] +MAM_MONTHS = [*range(3, 6)] +JJA_MONTHS = [*range(6, 9)] +SON_MONTHS = [*range(9, 12)] diff --git a/icclim/models/frequency.py b/icclim/models/frequency.py index 6de9bfc8..6be3331d 100644 --- a/icclim/models/frequency.py +++ b/icclim/models/frequency.py @@ -16,79 +16,88 @@ from xarray.core.dataarray import DataArray from icclim.icclim_exceptions import InvalidIcclimArgumentError +from icclim.models.constants import ( + AMJJAS_MONTHS, + DJF_MONTHS, + JJA_MONTHS, + MAM_MONTHS, + MONTHS_MAP, + ONDJFM_MONTHS, + SON_MONTHS, +) -def seasons_resampler( - month_list: list[int], +def get_month_filter(month_list: list[int]) -> Callable: + return lambda da: filter_months(da, month_list) + + +def filter_months(da: DataArray, month_list: list[int]) -> DataArray: + return da.sel(time=da.time.dt.month.isin(month_list)) + + +def get_seasonal_time_updater( + start_month: int, + end_month: int, ) -> Callable[[DataArray], tuple[DataArray, DataArray]]: - """ - Seasonal resampling method generator. - Returns a callable of DataArray which will resample the data to - the a season composed of the given month. - It also attached the corresponding time_bounds. + """Seasonal time updater and time bounds creator method generator. + Returns a callable of DataArray which will rewrite the time dimension to + the season composed of the given month. The data must have been computed on this + season beforehand. + It also create the corresponding time_bounds. Parameters ---------- - month_list : List[int] - List of month identified by `{1..12}`. + start_month: int + The season starting month, it must be between 1 and 12. + end_month: int + The season ending month, it must be between 1 and 12. Returns ------- function: Callable[[DataArray], DataArray] function resampling the input da to the wanted season. """ - def resampler(da: DataArray) -> tuple[DataArray, DataArray]: + def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: da_years = np.unique(da.time.dt.year) - seasons_acc: list[DataArray] = [] time_bounds = [] new_time_axis = [] - start_month = month_list[0] - end_month = month_list[-1] - filtered_da = month_filter(da, month_list) - # TODO, maybe raise a warning if the month_list is not made of consecutive month - # (case of user error) + first_time = da.time.values[0] for year in da_years: if start_month > end_month: - int_year = year - 1 + year_of_season_end = year + 1 else: - int_year = year - first_time = filtered_da.time.values[0] + year_of_season_end = year if isinstance(first_time, cftime.datetime): start = cftime.datetime( year, start_month, 1, calendar=first_time.calendar ) end = cftime.datetime( - year, end_month + 1, 1, calendar=first_time.calendar + year_of_season_end, end_month + 1, 1, calendar=first_time.calendar ) else: - start = pd.to_datetime(f"{int_year}-{start_month}") - end = pd.to_datetime(f"{year}-{end_month + 1}") + start = pd.to_datetime(f"{year}-{start_month}") + end = pd.to_datetime(f"{year_of_season_end}-{end_month + 1}") end = end - datetime.timedelta(days=1) - season = filtered_da.sel(time=slice(start, end)).sum("time") new_time_axis.append(start + (end - start) / 2) time_bounds.append([start, end]) - seasons_acc.append(season) - seasons = xr.concat(seasons_acc, "time") - seasons.coords["time"] = ("time", new_time_axis) + da.coords["time"] = ("time", new_time_axis) time_bounds_da = DataArray( data=time_bounds, dims=["time", "bounds"], - coords=[("time", seasons.time.values), ("bounds", [0, 1])], + coords=[("time", da.time.values), ("bounds", [0, 1])], ) - return seasons, time_bounds_da - - return resampler - + return da, time_bounds_da -def month_filter(da: DataArray, month_list: list[int]) -> DataArray: - return da.sel(time=da.time.dt.month.isin(month_list)) + return add_time_bounds -def _add_time_bounds(freq: str) -> Callable[[DataArray], tuple[DataArray, DataArray]]: - def add_bounds(da: DataArray) -> tuple[DataArray, DataArray]: +def _get_time_bounds_updater( + freq: str, +) -> Callable[[DataArray], tuple[DataArray, DataArray]]: + def add_time_bounds(da: DataArray) -> tuple[DataArray, DataArray]: # da should already be resampled to freq if isinstance(da.indexes.get("time"), xr.CFTimeIndex): offset = xr.coding.cftime_offsets.to_offset(freq) - start = np.array( + starts = np.array( [ cftime.datetime( date.year, @@ -102,81 +111,155 @@ def add_bounds(da: DataArray) -> tuple[DataArray, DataArray]: for date in da.indexes.get("time") ] ) - end = start + offset - end = end - datetime.timedelta(days=1) + ends = starts + offset + ends = ends - datetime.timedelta(days=1) else: offset = pd.tseries.frequencies.to_offset(freq) - start = pd.to_datetime(da.time.dt.floor("D")) - end = start + offset - end = end - pd.Timedelta(days=1) - da["time"] = start + (end - start) / 2 + starts = pd.to_datetime(da.time.dt.floor("D")) + ends = starts + offset + ends = ends - pd.Timedelta(days=1) + # make time axis values be in the middle of the bounds + da["time"] = starts + (ends - starts) / 2 time_bounds_da = DataArray( - data=list(zip(start, end)), + data=list(zip(starts, ends)), dims=["time", "bounds"], coords=[("time", da.time.values), ("bounds", [0, 1])], ) return da, time_bounds_da - return add_bounds + return add_time_bounds -class Frequency(Enum): - """ - The sampling frequency of the resulting dataset. +class _Freq: + """Internal class to ease writing and maintaining the enum. + Without it, in the instanciation of enum values we would have to write tuples + would not be able to use kwargs, which make the code less readable. """ - MONTH = ("MS", ["month", "MS"], "monthly time series", _add_time_bounds("MS")) + def __init__( + self, + panda_freq: str, + accepted_values: list[str], + description: str, + post_processing: Callable[[DataArray], tuple[DataArray, DataArray]], + pre_processing: Callable[[DataArray], DataArray], + ): + self.panda_freq: str = panda_freq + self.accepted_values: list[str] = accepted_values + self.description = description + self.post_processing = post_processing + self.pre_processing = pre_processing + + +class Frequency(Enum): + """The sampling frequency of the resulting dataset.""" + + MONTH = _Freq( + panda_freq="MS", + accepted_values=["month", "MS"], + description="monthly time series", + post_processing=_get_time_bounds_updater("MS"), + pre_processing=lambda x: x, + ) """ Resample to monthly values""" - AMJJAS = ( - "MS", - ["AMJJAS"], - "summer half-year time series", - seasons_resampler([*range(4, 9)]), + AMJJAS = _Freq( + panda_freq="AS-APR", + accepted_values=["AMJJAS"], + description="summer half-year time series", + post_processing=get_seasonal_time_updater(AMJJAS_MONTHS[0], AMJJAS_MONTHS[-1]), + pre_processing=get_month_filter(AMJJAS_MONTHS), ) """ Resample to summer half-year, from April to September included.""" - ONDJFM = ( - "MS", - ["ONDJFM"], - "winter half-year time series", - seasons_resampler([10, 11, 12, 1, 2, 3]), + ONDJFM = _Freq( + panda_freq="AS-OCT", + accepted_values=["ONDJFM"], + description="winter half-year time series", + post_processing=get_seasonal_time_updater(ONDJFM_MONTHS[0], ONDJFM_MONTHS[-1]), + pre_processing=get_month_filter(ONDJFM_MONTHS), ) """ Resample to winter half-year, from October to March included.""" - DJF = ("MS", ["DJF"], "winter time series", seasons_resampler([12, 1, 2])) + DJF = _Freq( + panda_freq="AS-DEC", + accepted_values=["DJF"], + description="winter time series", + post_processing=get_seasonal_time_updater(DJF_MONTHS[0], DJF_MONTHS[-1]), + pre_processing=get_month_filter(DJF_MONTHS), + ) """ Resample to winter season, from December to February included.""" - MAM = ("MS", ["MAM"], "spring time series", seasons_resampler([*range(3, 6)])) + MAM = _Freq( + panda_freq="AS-MAR", + accepted_values=["MAM"], + description="spring time series", + post_processing=get_seasonal_time_updater(MAM_MONTHS[0], MAM_MONTHS[-1]), + pre_processing=get_month_filter(MAM_MONTHS), + ) """ Resample to spring season, from March to May included.""" - JJA = ("MS", ["JJA"], "summer time series", seasons_resampler([*range(6, 9)])) + JJA = _Freq( + panda_freq="AS-JUN", + accepted_values=["JJA"], + description="summer time series", + post_processing=get_seasonal_time_updater(JJA_MONTHS[0], JJA_MONTHS[-1]), + pre_processing=get_month_filter(JJA_MONTHS), + ) """ Resample to summer season, from June to Agust included.""" - SON = ("MS", ["SON"], "autumn time series", seasons_resampler([*range(9, 12)])) + SON = _Freq( + panda_freq="AS-SEP", + accepted_values=["SON"], + description="autumn time series", + post_processing=get_seasonal_time_updater(SON_MONTHS[0], SON_MONTHS[-1]), + pre_processing=get_month_filter(SON_MONTHS), + ) """ Resample to fall season, from September to November included.""" - CUSTOM = ("MS", [], None, None) - """ Resample to custom values. Do not use as is, use `slice_mode` with month or season - keywords instead. + CUSTOM = _Freq( + panda_freq="MS", + accepted_values=[], + description="", + post_processing=lambda x: x, + pre_processing=lambda x: x, + ) + """ Placeholder instance for custom sampling frequencies. + Do not use as is, use `slice_mode` with "month", "season" or "dates" keywords + instead. """ - YEAR = ("YS", ["year", "YS"], "annual time series", _add_time_bounds("YS")) + YEAR = _Freq( + panda_freq="YS", + accepted_values=["year", "YS"], + description="annual time series", + post_processing=_get_time_bounds_updater("YS"), + pre_processing=lambda x: x, + ) """ Resample to yearly values.""" - def __init__( - self, - panda_time: str, - accepted_values: list[str], - description: str | None = None, - post_processing: ( - Callable[[DataArray], tuple[DataArray, DataArray]] | None - ) = None, - ): - self.panda_freq: str = panda_time - self.accepted_values: list[str] = accepted_values - self.description = description - self.post_processing = post_processing + def __init__(self, freq: _Freq): + self._freq = freq + + @property + def panda_freq(self): + return self._freq.panda_freq + + @property + def accepted_values(self): + return self._freq.accepted_values + + @property + def description(self): + return self._freq.description + + @property + def post_processing(self): + return self._freq.post_processing + + @property + def pre_processing(self): + return self._freq.pre_processing @staticmethod def lookup(slice_mode: SliceMode) -> Frequency: @@ -191,6 +274,18 @@ def lookup(slice_mode: SliceMode) -> Frequency: f"Use a Frequency from {[f for f in Frequency]}" ) + @staticmethod + def is_seasonal(slice_mode: SliceMode) -> bool: + return Frequency.lookup(slice_mode) in [ + Frequency.CUSTOM, + Frequency.ONDJFM, + Frequency.AMJJAS, + Frequency.MAM, + Frequency.JJA, + Frequency.SON, + Frequency.DJF, + ] + def _get_frequency_from_string(slice_mode: str) -> Frequency: for freq in Frequency: @@ -198,9 +293,22 @@ def _get_frequency_from_string(slice_mode: str) -> Frequency: str.upper, freq.accepted_values ): return freq + # TODO: we could add a compatibility to other pandas freq if we detect + # something like WS, 4MS, etc. In which case we would use FREQUENCY.CUSTOM raise InvalidIcclimArgumentError(f"Unknown frequency {slice_mode}.") +def _is_season_valid(months): + is_valid = True + for i in range(0, len(months) - 1): + is_valid = is_valid and months[i] > 0 and months[i] < 13 + if months[i] > months[i + 1]: + is_valid = is_valid and months[i + 1] == 1 and months[i] == 12 + else: + is_valid = is_valid and (months[i + 1] - months[i] == 1) + return is_valid + + def _get_frequency_from_list(slice_mode_list: list) -> Frequency: if len(slice_mode_list) < 2: raise InvalidIcclimArgumentError( @@ -209,25 +317,42 @@ def _get_frequency_from_list(slice_mode_list: list) -> Frequency: f" The maximum length here is 2." ) sampling_freq = slice_mode_list[0] - months = slice_mode_list[1] custom_freq = Frequency.CUSTOM - if sampling_freq == "month": - custom_freq.post_processing = lambda da: month_filter(da, months) - custom_freq.description = f"monthly time series (months: {months})" + if sampling_freq in ["month", "months"]: + months = slice_mode_list[1] + + def month_list_post_processing(da): + res, bounds = _get_time_bounds_updater("MS")(da) + res = get_month_filter(months)(res) + return res, bounds + + custom_freq._freq = _Freq( + pre_processing=get_month_filter(months), + post_processing=month_list_post_processing, + panda_freq="MS", + description=f"monthly time series (months: {months})", + accepted_values=[], + ) elif sampling_freq == "season": - if months is Tuple: - rearranged_months = months[1] + months[0] - custom_freq.post_processing = seasons_resampler(rearranged_months) - custom_freq.description = ( - f"seasonal time series (season: {rearranged_months})" + months = slice_mode_list[1] + if isinstance(months, Tuple): + months = months[0] + months[1] # concat in case of ([12], [1, 2]) + if not _is_season_valid(months): + raise InvalidIcclimArgumentError( + f"A season created using `slice_mode` must be made of consecutive" + f" months. It was {months}." ) - else: - custom_freq.post_processing = seasons_resampler(months) - custom_freq.description = f"seasonal time series (season: {months})" + custom_freq._freq = _Freq( + pre_processing=get_month_filter(months), + post_processing=get_seasonal_time_updater(months[0], months[-1]), + panda_freq=f"AS-{MONTHS_MAP[months[0]]}", + description=f"seasonal time series (season: {months})", + accepted_values=[], + ) else: raise InvalidIcclimArgumentError( - f"Unknown frequency {slice_mode_list}. " - "The sampling frequency must be one of {'season', 'month'}" + f"Unknown frequency {slice_mode_list}." + " The sampling frequency must be one of {'season', 'month'}" ) return custom_freq diff --git a/icclim/models/index_config.py b/icclim/models/index_config.py index c4e60ba1..7c976abc 100644 --- a/icclim/models/index_config.py +++ b/icclim/models/index_config.py @@ -29,7 +29,6 @@ class CfVariable: The variable studied limited to the in base period. """ - # TODO: seems unnecessary abstraction between ds and da. Replace by a Dataset ? name: str study_da: DataArray reference_da: DataArray @@ -118,6 +117,7 @@ def __init__( base_period_time_range=base_period_time_range, only_leap_years=only_leap_years, chunk_it=chunk_it, + pre_processing=self.freq.pre_processing, ) for var_name in var_names ] @@ -181,14 +181,20 @@ def _build_cf_variable( base_period_time_range: list[str] | None, only_leap_years: bool, chunk_it: bool, + pre_processing: Callable, ) -> CfVariable: if chunk_it: da = da.chunk("auto") # noqa - typing fixed in futur xarray version study_da = _build_study_da(da, time_range, ignore_Feb29th) + study_da = pre_processing(study_da) if base_period_time_range is not None: reference_da = _build_reference_da(da, base_period_time_range, only_leap_years) + reference_da = pre_processing(reference_da) else: reference_da = study_da + # TODO: all these operations should probably be added in history metadata + # it could be a property in CfVariable which will be reused when we update the + # metadata of the index, at the end. return CfVariable(name, study_da, reference_da) diff --git a/icclim/models/user_index_config.py b/icclim/models/user_index_config.py index 102c7e61..109fabf9 100644 --- a/icclim/models/user_index_config.py +++ b/icclim/models/user_index_config.py @@ -53,11 +53,11 @@ class ExtremeMode(Enum): @staticmethod def lookup(query: str) -> ExtremeMode: for mode in ExtremeMode: - if query.upper == mode.value.upper(): + if query.upper() == mode.value.upper(): return mode raise InvalidIcclimArgumentError( - f"Unknown extreme mode {query}." - f"Use one of {[mode.value for mode in ExtremeMode]}." + f"Unknown extreme_mode {query}." + f" Use one of {[mode.value for mode in ExtremeMode]}." ) @@ -113,7 +113,6 @@ class UserIndexConfig: window_width: int | None = None coef: float | None = None var_type: str | None = None - da_ref: DataArray | None = None nb_event_config: NbEventConfig | None = None save_percentile: bool = False @@ -134,6 +133,7 @@ def __init__( var_type=None, is_percent=False, save_percentile=False, + ref_time_range: list[str] = None, ) -> None: self.index_name = index_name self.calc_operation = calc_operation @@ -155,6 +155,11 @@ def __init__( logical_operation, link_logical_operations, thresh, cf_vars ) self.save_percentile = save_percentile + self.ref_time_range = ref_time_range + if (rtr := ref_time_range) is not None: + rtr = [x.strftime("%Y-%m-%d") for x in rtr] + for cf_var in cf_vars: + cf_var.reference_da = cf_var.study_da.sel(time=slice(rtr[0], rtr[1])) def get_nb_event_conf( diff --git a/icclim/models/user_index_dict.py b/icclim/models/user_index_dict.py index 983944af..a14a912a 100644 --- a/icclim/models/user_index_dict.py +++ b/icclim/models/user_index_dict.py @@ -4,20 +4,20 @@ from typing import Literal, TypedDict from icclim.models.user_index_config import LogicalOperationLiteral -from icclim.user_indices.dispatcher import CalcOperationLiteral +from icclim.user_indices.calc_operation import CalcOperation, CalcOperationLiteral class UserIndexDict(TypedDict, total=False): index_name: str - calc_operation: CalcOperationLiteral - logical_operation: LogicalOperationLiteral - thresh: str | float - link_logical_operations: Literal["and", "or"] - extreme_mode: Literal["min", "max"] - window_width: int - coef: float - date_event: bool - var_type: Literal["t", "p"] - ref_time_range: list[datetime.datetime] # length of 2 + calc_operation: CalcOperationLiteral | CalcOperation + logical_operation: LogicalOperationLiteral | None + thresh: str | float | None + link_logical_operations: Literal["and", "or"] | None + extreme_mode: Literal["min", "max"] | None + window_width: int | None + coef: float | None + date_event: bool | None + var_type: Literal["t", "p"] | None + ref_time_range: list[datetime.datetime] | None # length of 2 # deprecated - indice_name: str + indice_name: str | None diff --git a/icclim/pre_processing/input_parsing.py b/icclim/pre_processing/input_parsing.py index f309a4bb..b1028f3d 100644 --- a/icclim/pre_processing/input_parsing.py +++ b/icclim/pre_processing/input_parsing.py @@ -26,12 +26,15 @@ def read_dataset( raise InvalidIcclimArgumentError( "When the input is a DataArray, var_names must be a string." ) + if isinstance(var_names, list): + var_names = var_names[0] data_name = var_names else: if len(index.variables) > 1: raise InvalidIcclimArgumentError( - f"Index {index.name} need {len(index.variables)} variables." - f"Please provide them with an xarray.Dataset or a netCDF file." + f"Index {index.name} needs {len(index.variables)} variables." + f" Please provide them with an xarray.Dataset, a netCDF file or a" + f" zarr store." ) data_name = index.variables[0][0] # first alias of the unique variable input_dataset = data.to_dataset(name=data_name, promote_attrs=True) diff --git a/icclim/tests/test_dispatcher.py b/icclim/tests/test_calc_operation.py similarity index 78% rename from icclim/tests/test_dispatcher.py rename to icclim/tests/test_calc_operation.py index 47f849c6..29698f5c 100644 --- a/icclim/tests/test_dispatcher.py +++ b/icclim/tests/test_calc_operation.py @@ -9,8 +9,16 @@ from icclim.models.index_config import CfVariable from icclim.models.user_index_config import LogicalOperation from icclim.tests.test_utils import stub_pr, stub_tas, stub_user_index -from icclim.user_indices import dispatcher -from icclim.user_indices.dispatcher import CalcOperation +from icclim.user_indices import calc_operation +from icclim.user_indices.calc_operation import ( + CalcOperation, + anomaly, + compute_user_index, + count_events, + max_consecutive_event_count, + run_mean, + run_sum, +) class Test_compute: @@ -22,7 +30,7 @@ def test_error_bad_operation(self): user_index.freq = Frequency.MONTH # WHEN with pytest.raises(InvalidIcclimArgumentError): - dispatcher.compute_user_index(user_index) + compute_user_index(user_index) def test_simple(self): # GIVEN @@ -31,7 +39,7 @@ def test_simple(self): user_index.calc_operation = "max" user_index.freq = Frequency.MONTH # WHEN - result = dispatcher.compute_user_index(user_index) + result = compute_user_index(user_index) # THEN assert result.data[0] == 1 @@ -50,7 +58,7 @@ def test_simple_percentile_pr(self): user_index.var_type = PRECIPITATION user_index.freq = Frequency.YEAR # WHEN - result = dispatcher.compute_user_index(user_index) + result = compute_user_index(user_index) # THEN assert result.data[0] == 5 @@ -67,91 +75,104 @@ def test_simple_percentile_temp(self): user_index.var_type = TEMPERATURE user_index.freq = Frequency.MONTH # WHEN - result = dispatcher.compute_user_index(user_index) + result = compute_user_index(user_index) # THEN assert result.data[0] == 1 assert result.data[1] == 5 @patch("icclim.models.user_index_config.UserIndexConfig") - def test_error_anomaly(self, config_mock: MagicMock): - config_mock.da_ref = None + @patch("icclim.models.index_config.CfVariable") + def test_error_anomaly(self, config_mock: MagicMock, cf_var_mock: MagicMock): + config_mock.cf_vars = [cf_var_mock] + cf_var_mock.reference_da = None with pytest.raises(MissingIcclimInputError): - dispatcher.anomaly(config_mock) + anomaly(config_mock) - @patch("icclim.user_indices.operators.anomaly") @patch("icclim.models.user_index_config.UserIndexConfig") - def test_success_anomaly(self, config_mock: MagicMock, op_mock: MagicMock): - dispatcher.anomaly(config_mock) + @patch("icclim.user_indices.operators.anomaly") + @patch("icclim.models.index_config.CfVariable") + def test_success_anomaly( + self, config_mock: MagicMock, op_mock: MagicMock, cf_var_mock: MagicMock + ): + config_mock.cf_vars = [cf_var_mock] + cf_var_mock.reference_da = [1, 2, 3] # no-op, just need to mock a valid length + anomaly(config_mock) op_mock.assert_called_once() @patch("icclim.models.user_index_config.UserIndexConfig") def test_error_run_sum(self, config_mock: MagicMock): config_mock.extreme_mode = None with pytest.raises(MissingIcclimInputError): - dispatcher.run_sum(config_mock) + run_sum(config_mock) config_mock.extreme_mode = {} config_mock.window_width = None with pytest.raises(MissingIcclimInputError): - dispatcher.run_sum(config_mock) + run_sum(config_mock) @patch("icclim.user_indices.operators.run_sum") @patch("icclim.models.user_index_config.UserIndexConfig") def test_success_run_sum(self, config_mock: MagicMock, op_mock: MagicMock): - dispatcher.run_sum(config_mock) + run_sum(config_mock) op_mock.assert_called_once() @patch("icclim.models.user_index_config.UserIndexConfig") def test_error_run_mean(self, config_mock: MagicMock): config_mock.extreme_mode = None with pytest.raises(MissingIcclimInputError): - dispatcher.run_mean(config_mock) + run_mean(config_mock) config_mock.extreme_mode = {} config_mock.window_width = None with pytest.raises(MissingIcclimInputError): - dispatcher.run_mean(config_mock) + run_mean(config_mock) @patch("icclim.user_indices.operators.run_mean") @patch("icclim.models.user_index_config.UserIndexConfig") def test_success_run_mean(self, config_mock: MagicMock, op_mock: MagicMock): - dispatcher.run_mean(config_mock) + run_mean(config_mock) op_mock.assert_called_once() @patch("icclim.models.user_index_config.UserIndexConfig") def test_error_max_consecutive_event_count(self, config_mock: MagicMock): config_mock.logical_operation = None with pytest.raises(MissingIcclimInputError): - dispatcher.max_consecutive_event_count(config_mock) + max_consecutive_event_count(config_mock) config_mock.logical_operation = {} config_mock.thresh = None with pytest.raises(MissingIcclimInputError): - dispatcher.max_consecutive_event_count(config_mock) + max_consecutive_event_count(config_mock) config_mock.logical_operation = {} config_mock.thresh = [] with pytest.raises(InvalidIcclimArgumentError): - dispatcher.max_consecutive_event_count(config_mock) + max_consecutive_event_count(config_mock) @patch("icclim.user_indices.operators.max_consecutive_event_count") @patch("icclim.models.user_index_config.UserIndexConfig") def test_success_max_consecutive_event_count( self, config_mock: MagicMock, op_mock: MagicMock ): - dispatcher.max_consecutive_event_count(config_mock) + max_consecutive_event_count(config_mock) op_mock.assert_called_once() @patch("icclim.models.user_index_config.UserIndexConfig") def test_error_count_events(self, config_mock: MagicMock): config_mock.nb_event_config = None with pytest.raises(MissingIcclimInputError): - dispatcher.count_events(config_mock) + count_events(config_mock) @patch("icclim.user_indices.operators.count_events") @patch("icclim.models.user_index_config.UserIndexConfig") def test_success_count_events(self, config_mock: MagicMock, op_mock: MagicMock): - dispatcher.count_events(config_mock) + count_events(config_mock) op_mock.assert_called_once() @pytest.mark.parametrize( - "reducer", [dispatcher.sum, dispatcher.mean, dispatcher.min, dispatcher.max] + "reducer", + [ + calc_operation.sum, + calc_operation.mean, + calc_operation.min, + calc_operation.max, + ], ) @patch("icclim.models.user_index_config.UserIndexConfig") def test_error_simple_reducer(self, config_mock: MagicMock, reducer: Callable): @@ -170,5 +191,5 @@ def test_success_simple_reducer(self, config_mock: MagicMock, reducer: str): config_mock.cf_vars = [MagicMock()] config_mock.thresh = 42 with patch("icclim.user_indices.operators." + reducer) as op_mock: - dispatcher.compute_user_index(config_mock) + compute_user_index(config_mock) op_mock.assert_called_once() diff --git a/icclim/tests/test_ecad_indices.py b/icclim/tests/test_ecad_indices.py index 868af2fa..cf6b5baa 100644 --- a/icclim/tests/test_ecad_indices.py +++ b/icclim/tests/test_ecad_indices.py @@ -72,7 +72,7 @@ class Test_SU: @pytest.mark.parametrize("use_dask", [True, False]) def test_su_default_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[:5] = 0 conf = IndexConfig( ds=ds, @@ -107,7 +107,7 @@ class Test_TR: @pytest.mark.parametrize("use_dask", [True, False]) def test_default_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[:5] = 0 conf = IndexConfig( ds=ds, @@ -160,7 +160,7 @@ class Test_csu: @pytest.mark.parametrize("use_dask", [True, False]) def test_default_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[10:15] = 0 conf = IndexConfig( ds=ds, @@ -196,7 +196,7 @@ class Test_gd4: @pytest.mark.parametrize("use_dask", [True, False]) def test_default_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[5:15] = 0 conf = IndexConfig( ds=ds, @@ -213,7 +213,7 @@ def test_default_threshold(self, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_custom_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[5:15] = 0 conf = IndexConfig( ds=ds, @@ -233,7 +233,7 @@ class Test_cfd: @pytest.mark.parametrize("use_dask", [True, False]) def test_default_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[5:15] = 0 conf = IndexConfig( ds=ds, @@ -249,7 +249,7 @@ def test_default_threshold(self, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_custom_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[5:10] = 0 ds.tas[10:15] = 4 conf = IndexConfig( @@ -269,7 +269,7 @@ class Test_fd: @pytest.mark.parametrize("use_dask", [True, False]) def test_default_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[5:15] = 0 ds.tas[20:25] = 0 conf = IndexConfig( @@ -286,7 +286,7 @@ def test_default_threshold(self, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_custom_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=26 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=26 + K2C, use_dask=use_dask) ds.tas[5:10] = 0 ds.tas[10:15] = 4 conf = IndexConfig( @@ -306,7 +306,7 @@ class Test_hd17: @pytest.mark.parametrize("use_dask", [True, False]) def test_default_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=27 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=27 + K2C, use_dask=use_dask) ds.tas[5:10] = 0 conf = IndexConfig( ds=ds, @@ -322,7 +322,7 @@ def test_default_threshold(self, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_custom_threshold(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=27 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=27 + K2C, use_dask=use_dask) ds.tas[5:10] = 0 conf = IndexConfig( ds=ds, @@ -341,7 +341,7 @@ class TestTx90p: @pytest.mark.parametrize("use_dask", [True, False]) def test_no_bootstrap_no_overlap(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=27 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=27 + K2C, use_dask=use_dask) ds.tas[5:10] = 0 conf = IndexConfig( ds=ds, @@ -361,7 +361,7 @@ def test_no_bootstrap_no_overlap(self, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_no_bootstrap_1_year_base(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=27 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=27 + K2C, use_dask=use_dask) conf = IndexConfig( ds=ds, slice_mode=Frequency.MONTH, @@ -380,7 +380,7 @@ def test_no_bootstrap_1_year_base(self, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_bootstrap_2_years(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=27 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=27 + K2C, use_dask=use_dask) conf = IndexConfig( ds=ds, slice_mode=Frequency.MONTH, @@ -401,7 +401,7 @@ class TestWsdi: @pytest.mark.parametrize("use_dask", [True, False]) def test_wsdi_bootstrap_2_years(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=27 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=27 + K2C, use_dask=use_dask) conf = IndexConfig( ds=ds, slice_mode=Frequency.MONTH, @@ -422,7 +422,7 @@ class TestCsdi: @pytest.mark.parametrize("use_dask", [True, False]) def test_csdi_bootstrap_2_years(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=27 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=27 + K2C, use_dask=use_dask) conf = IndexConfig( ds=ds, slice_mode=Frequency.MONTH, @@ -443,7 +443,7 @@ def test_csdi_bootstrap_2_years(self, use_dask): @pytest.mark.parametrize("use_dask", [True, False]) def test_csdi_custom_thresh(self, use_dask): ds = Dataset() - ds["tas"] = stub_tas(value=27 + K2C, use_dask=use_dask) + ds["tas"] = stub_tas(tas_value=27 + K2C, use_dask=use_dask) conf = IndexConfig( ds=ds, slice_mode=Frequency.MONTH, diff --git a/icclim/tests/test_frequency.py b/icclim/tests/test_frequency.py index 6b383d33..d1458335 100644 --- a/icclim/tests/test_frequency.py +++ b/icclim/tests/test_frequency.py @@ -3,7 +3,7 @@ import pytest from icclim.icclim_exceptions import InvalidIcclimArgumentError -from icclim.models.frequency import Frequency, month_filter, seasons_resampler +from icclim.models.frequency import Frequency, filter_months, get_seasonal_time_updater from icclim.tests.test_utils import stub_tas @@ -38,22 +38,29 @@ def test_month(self): def test_season(self): freq = Frequency.lookup(["season", [1, 2, 3, 4]]) assert freq == Frequency.CUSTOM - assert freq.panda_freq == "MS" + assert freq.panda_freq == "AS-JAN" assert freq.accepted_values == [] assert freq.post_processing is not None - def test_winter_deprecated(self): - # deprecated way - freq = Frequency.lookup(["season", ([11, 12], [3, 4])]) + def test_winter__deprecated_tuple(self): + freq = Frequency.lookup(["season", ([11, 12], [1, 2, 3, 4])]) assert freq == Frequency.CUSTOM - assert freq.panda_freq == "MS" + assert freq.panda_freq == "AS-NOV" assert freq.accepted_values == [] assert freq.post_processing is not None + def test_error__non_consecutive_season(self): + with pytest.raises(InvalidIcclimArgumentError): + Frequency.lookup(["season", ([12, 3])]) + + def test_error__weird_months(self): + with pytest.raises(InvalidIcclimArgumentError): + Frequency.lookup(["season", ([42, 0])]) + def test_winter(self): freq = Frequency.lookup(["season", [11, 12, 1, 2]]) assert freq == Frequency.CUSTOM - assert freq.panda_freq == "MS" + assert freq.panda_freq == "AS-NOV" assert freq.accepted_values == [] assert freq.post_processing is not None @@ -61,7 +68,7 @@ def test_winter(self): class Test_filter_months: def test_simple(self): # WHEN - da = month_filter(stub_tas(), [1, 2, 7]) + da = filter_months(stub_tas(), [1, 2, 7]) # THEN months = np.unique(da.time.dt.month) assert len(months) == 3 @@ -73,9 +80,10 @@ def test_simple(self): class Test_seasons_resampler: def test_simple(self): # WHEN - da_res, time_bds_res = seasons_resampler([4, 5, 6])(stub_tas()) + test_da = filter_months(stub_tas(), [4, 5, 6]).resample(time="YS").mean() + da_res, time_bds_res = get_seasonal_time_updater(4, 6)(test_da) # THEN - assert da_res[0] == 91 + np.testing.assert_array_equal(1, da_res) assert time_bds_res[0].data[0] == pd.to_datetime("2042-04") assert ( time_bds_res[0].data[1] @@ -84,28 +92,17 @@ def test_simple(self): def test_winter(self): # WHEN - da_res, time_bds_res = seasons_resampler([11, 12, 1])(stub_tas()) + test_da = filter_months(stub_tas(), [11, 12, 1]).resample(time="AS-NOV").mean() + da_res, time_bds_res = get_seasonal_time_updater(11, 1)(test_da) # THEN - assert da_res[0] == 31 + np.testing.assert_array_equal(1, da_res) assert time_bds_res[0].data[0] == pd.to_datetime("2041-11") assert ( time_bds_res[0].data[1] == pd.to_datetime("2042-02") - pd.tseries.offsets.Day() ) - assert da_res[1] == 92 assert time_bds_res[1].data[0] == pd.to_datetime("2042-11") assert ( time_bds_res[1].data[1] == pd.to_datetime("2043-02") - pd.tseries.offsets.Day() ) - - def test_season_with_holes(self): - # WHEN - da_res, time_bds_res = seasons_resampler([1, 3, 4])(stub_tas()) - # THEN - assert da_res[0] == 92 - assert time_bds_res[0].data[0] == pd.to_datetime("2042-01") - assert ( - time_bds_res[0].data[1] - == pd.to_datetime("2042-05") - pd.tseries.offsets.Day() - ) diff --git a/icclim/tests/test_generated_api.py b/icclim/tests/test_generated_api.py index cdb96fed..fd8d83dc 100644 --- a/icclim/tests/test_generated_api.py +++ b/icclim/tests/test_generated_api.py @@ -1,6 +1,10 @@ +from datetime import datetime from unittest.mock import MagicMock, patch -import icclim # noqa (used in eval) +import numpy as np +import pytest + +import icclim from icclim.icclim_logger import Verbosity from icclim.models.constants import ( MODIFIABLE_QUANTILE_WINDOW, @@ -12,6 +16,8 @@ from icclim.models.frequency import Frequency from icclim.models.netcdf_version import NetcdfVersion from icclim.models.quantile_interpolation import QuantileInterpolation +from icclim.tests.test_utils import stub_tas +from icclim.user_indices.calc_operation import CalcOperation DEFAULT_ARGS = dict( in_files="pouet.nc", @@ -58,3 +64,128 @@ def test_generated_api(generic_index_fun_mock: MagicMock): # THEN expected_call_args = build_expected_args(i) generic_index_fun_mock.assert_called_with(**expected_call_args) + + +@patch("icclim.index") +def test_custom_index(index_fun_mock: MagicMock): + user_index_args = dict( + in_files="pouet_file.nc", + var_name=None, + slice_mode=Frequency.YEAR, + time_range=None, + out_file=None, + base_period_time_range=None, + only_leap_years=False, + ignore_Feb29th=False, + out_unit=None, + netcdf_version=NetcdfVersion.NETCDF4, + save_percentile=False, + logs_verbosity=Verbosity.LOW, + user_index={ + "index_name": "pouet", + "calc_operation": "nb_events", + "logical_operation": "gt", + "thresh": 0, + "date_event": True, + }, + ) + icclim.custom_index(**user_index_args) + index_fun_mock.assert_called_with(**user_index_args) + + +# integration test +def test_txx__season_slice_mode(): + tas = stub_tas() + tas.loc[{"time": "2042-02-02"}] = 295 + tas.loc[{"time": "2042-01-01"}] = 303.15 # 30ºC 273.15 + res = icclim.txx(tas, slice_mode=["season", [11, 12, 1, 2]]).compute() + np.testing.assert_array_equal(res.TXx.isel(time=0), 30) + np.testing.assert_array_equal( + res.time_bounds.isel(time=0), + [np.datetime64("2041-11-01"), np.datetime64("2042-02-28")], + ) + + +def test_txx__months_slice_mode(): + tas = stub_tas() + tas.loc[{"time": "2042-11-02"}] = 295 + tas.loc[{"time": "2042-01-01"}] = 303.15 # 30ºC 273.15 + res = icclim.txx(tas, slice_mode=["months", [11, 1]]).compute() + np.testing.assert_array_equal(res.TXx.isel(time=0), 30) + np.testing.assert_almost_equal(res.TXx.isel(time=1), 21.85) + np.testing.assert_array_equal( + res.time_bounds.isel(time=0), + [np.datetime64("2042-01-01"), np.datetime64("2042-01-31")], + ) + + +# integration test +@pytest.mark.parametrize( + "operator, exp_y1, exp_y2", + [ + (CalcOperation.MIN, 303.15, 280.15), + (CalcOperation.MAX, 303.15, 280.15), + (CalcOperation.SUM, 303.15, 280.15), # values below 275 are filtered out + (CalcOperation.MEAN, 303.15, 280.15), + (CalcOperation.EVENT_COUNT, 1, 1), + (CalcOperation.MAX_NUMBER_OF_CONSECUTIVE_EVENTS, 1, 1), + ], +) +def test_custom_index__season_slice_mode(operator, exp_y1, exp_y2): + tas = stub_tas(2.0) + tas.loc[{"time": "2042-01-01"}] = 303.15 + tas.loc[{"time": "2042-12-01"}] = 280.15 + res = icclim.custom_index( + in_files=tas, + slice_mode=["season", [12, 1]], + var_name="a_name", + user_index={ + "index_name": "pouet", + "calc_operation": operator, + "logical_operation": "gt", + "thresh": 275, + }, + ).compute() + np.testing.assert_almost_equal(res.pouet.isel(time=0), exp_y1) + np.testing.assert_almost_equal(res.pouet.isel(time=1), exp_y2) + + +# integration test +@pytest.mark.parametrize( + "operator, exp_y1, exp_y2", + [ + (CalcOperation.RUN_MEAN, 2, 2), + (CalcOperation.RUN_SUM, 14, 14), + ], +) +def test_custom_index_run_algos__season_slice_mode(operator, exp_y1, exp_y2): + tas = stub_tas(2.0) + res = icclim.custom_index( + in_files=tas, + slice_mode=["season", [12, 1]], + var_name="a_name", + user_index={ + "index_name": "pouet", + "calc_operation": operator, + "extreme_mode": "max", + "window_width": 7, + }, + ).compute() + np.testing.assert_almost_equal(res.pouet.isel(time=0), exp_y1) + np.testing.assert_almost_equal(res.pouet.isel(time=1), exp_y2) + + +def test_custom_index_anomaly__season_slice_mode(): + tas = stub_tas(2.0) + tas.loc[{"time": "2045-01-01"}] = 300 + res = icclim.custom_index( + in_files=tas, + slice_mode=["season", [12, 1]], + var_name="a_name", + user_index={ + "index_name": "anomaly", + "calc_operation": CalcOperation.ANOMALY, + "ref_time_range": [datetime(2042, 1, 1), datetime(2044, 12, 31)], + }, + ).compute() + np.testing.assert_almost_equal(res.anomaly, 0.96129032) diff --git a/icclim/tests/test_main.py b/icclim/tests/test_main.py index 67a77a30..760b8bca 100644 --- a/icclim/tests/test_main.py +++ b/icclim/tests/test_main.py @@ -1,6 +1,7 @@ import os from unittest.mock import MagicMock, patch +import cftime import numpy as np import pandas as pd import pytest @@ -40,7 +41,8 @@ class Test_Integration: """ OUTPUT_FILE = "out.nc" - TIME_RANGE = pd.date_range(start="2042-01-01", end="2045-12-31", freq="D") + CF_TIME_RANGE = pd.date_range(start="2042-01-01", end="2045-12-31", freq="D") + TIME_RANGE = xr.cftime_range("2042-01-01", end="2045-12-31", freq="D") data = xr.DataArray( data=(np.full(len(TIME_RANGE), 20).reshape((len(TIME_RANGE), 1, 1))), dims=["time", "lat", "lon"], @@ -52,6 +54,17 @@ class Test_Integration: attrs={"units": "degC"}, ) + data_cf_time = xr.DataArray( + data=(np.full(len(TIME_RANGE), 20).reshape((len(TIME_RANGE), 1, 1))), + dims=["time", "lat", "lon"], + coords=dict( + lat=[42], + lon=[42], + time=CF_TIME_RANGE, + ), + attrs={"units": "degC"}, + ) + @pytest.fixture(autouse=True) def cleanup(self): # setup @@ -81,6 +94,44 @@ def test_index_SU__monthy_sampled(self): len(np.unique(self.TIME_RANGE.year)) * 12, len(res.time) ) + def test_index_SU__monthy_sampled_cf_time(self): + res = icclim.index( + indice_name="SU", + in_files=self.data, + out_file=self.OUTPUT_FILE, + slice_mode=Frequency.MONTH, + ) + np.testing.assert_array_equal(0, res.SU) + res.time_bounds.isel(time=0) + np.testing.assert_array_equal( + len(np.unique(self.TIME_RANGE.year)) * 12, len(res.time) + ) + assert res.time_bounds.sel(time=res.time[0])[0] == cftime.DatetimeGregorian( + 2042, 1, 1, 0, 0, 0, 0 + ) + assert res.time_bounds.sel(time=res.time[0])[1] == cftime.DatetimeGregorian( + 2042, 1, 31, 0, 0, 0, 0 + ) + + def test_index_SU__DJF_cf_time(self): + res = icclim.index( + indice_name="SU", + in_files=self.data, + out_file=self.OUTPUT_FILE, + slice_mode=Frequency.DJF, + ) + np.testing.assert_array_equal(0, res.SU) + # 1 more year as DJF sampling create a months withs nans before + np.testing.assert_array_equal( + len(np.unique(self.TIME_RANGE.year)) + 1, len(res.time) + ) + assert res.time_bounds.sel(time=res.time[0])[0] == cftime.DatetimeGregorian( + 2041, 12, 1, 0, 0, 0, 0 + ) + assert res.time_bounds.sel(time=res.time[0])[1] == cftime.DatetimeGregorian( + 2042, 2, 28, 0, 0, 0, 0 + ) + def test_indices_from_DataArray(self): res = icclim.indices( index_group=IndexGroup.HEAT, in_files=self.data, out_file=self.OUTPUT_FILE diff --git a/icclim/tests/test_utils.py b/icclim/tests/test_utils.py index c908c422..b961c05b 100644 --- a/icclim/tests/test_utils.py +++ b/icclim/tests/test_utils.py @@ -23,9 +23,9 @@ def stub_user_index(cf_vars: List[CfVariable]): ) -def stub_tas(value: float = 1, use_dask=False): +def stub_tas(tas_value: float = 1.0, use_dask=False): da = xarray.DataArray( - data=(np.full(VALUE_COUNT, value).reshape((VALUE_COUNT, 1, 1))), + data=(np.full(VALUE_COUNT, tas_value).reshape((VALUE_COUNT, 1, 1))), dims=["time", "lat", "lon"], coords=COORDS, attrs={"units": "K"}, diff --git a/icclim/user_indices/dispatcher.py b/icclim/user_indices/calc_operation.py similarity index 84% rename from icclim/user_indices/dispatcher.py rename to icclim/user_indices/calc_operation.py index 08772e9d..67f141a4 100644 --- a/icclim/user_indices/dispatcher.py +++ b/icclim/user_indices/calc_operation.py @@ -28,13 +28,18 @@ def compute_user_index(config: UserIndexConfig) -> DataArray: def anomaly(config: UserIndexConfig): - if config.da_ref is None: + if ( + config.cf_vars[0].reference_da is None + or len(config.cf_vars[0].reference_da) == 0 + ): raise MissingIcclimInputError( - f"You must provide a in base to compute {CalcOperation.ANOMALY.value}." + f"You must provide a `ref_time_range` in user_index dictionary to compute" + f" {CalcOperation.ANOMALY.value}." + f" To be valid, it must be within the dataset time range." ) return operators.anomaly( da=config.cf_vars[0].study_da, - da_ref=config.da_ref, + da_ref=config.cf_vars[0].reference_da, percent=config.is_percent, ) @@ -42,7 +47,7 @@ def anomaly(config: UserIndexConfig): def run_sum(config: UserIndexConfig): if config.extreme_mode is None or config.window_width is None: raise MissingIcclimInputError( - "Please provide a extreme mode and a window width." + "Please provide an extreme_mode and a window_width to user_index." ) return operators.run_sum( da=config.cf_vars[0].study_da, @@ -109,11 +114,25 @@ def count_events(config: UserIndexConfig): def sum(config: UserIndexConfig): - return _simple_reducer(operators.sum, config) + return operators.sum( + da=_check_and_get_da(config), + in_base_da=_check_and_get_in_base_da(config), + coef=config.coef, + logical_operation=config.logical_operation, + threshold=_check_and_get_simple_threshold(config.thresh), + freq=config.freq.panda_freq, + ) def mean(config: UserIndexConfig): - return _simple_reducer(operators.mean, config) + return operators.mean( + da=_check_and_get_da(config), + in_base_da=_check_and_get_in_base_da(config), + coef=config.coef, + logical_operation=config.logical_operation, + threshold=_check_and_get_simple_threshold(config.thresh), + freq=config.freq.panda_freq, + ) def min(config: UserIndexConfig): diff --git a/requirements.txt b/requirements.txt index 1cb7d033..a9046d25 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ numpy pandas pytest pyyaml -rechunker~=0.5 +rechunker>=0.3.3 setuptools xarray xclim~=0.34.0 diff --git a/requirements_dev.txt b/requirements_dev.txt index 41c72f51..9576427e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -13,7 +13,7 @@ pydata-sphinx-theme pylint pytest pytest-cov -rechunker~=0.5 +rechunker>=0.3.3 setuptools>=49.6.0 sphinx twine diff --git a/setup.py b/setup.py index 3b827931..8b77a00b 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import find_packages, setup MINIMAL_REQUIREMENTS = [ - # todo: Unpin numpy 1.22 once numba work with it (numba comes with xclim) + # todo: Unpin numpy 1.22 once numba works with it (numba comes with xclim) # https://github.com/numba/numba/issues/7754 "numpy>=1.16,<1.22", "xarray>=0.17", @@ -15,14 +15,13 @@ "pyyaml", "psutil", "zarr", - # rechunker 0.4 is broken, enforce at least 0.5 - "rechunker>=0.5", + "rechunker>=0.3, !=0.4", "fsspec", ] setup( name="icclim", - version="5.2.0", + version="5.2.1", packages=find_packages(), author="Christian P.", author_email="christian.page@cerfacs.fr", diff --git a/tools/extract-icclim-funs.py b/tools/extract-icclim-funs.py index 695060b0..e300e803 100644 --- a/tools/extract-icclim-funs.py +++ b/tools/extract-icclim-funs.py @@ -111,10 +111,17 @@ def get_user_index_declaration() -> str: pop_args.append("window_width") # Pop not implemented yet pop_args.append("interpolation") + # Pop manually added arg + pop_args.append("user_index") # for `custom_index`, user_index is mandatory for pop_arg in pop_args: icclim_index_args.pop(pop_arg) fun_signature_args = build_fun_signature_args(icclim_index_args) - fun_signature = f"\n\ndef custom_index({fun_signature_args},\n) -> Dataset:\n" + fun_signature = ( + f"\n\ndef custom_index(\n" + f"user_index: UserIndexDict," + f"{fun_signature_args},\n" + f") -> Dataset:\n" + ) args_docs = get_params_docstring( list(icclim_index_args.keys()), icclim.index.__doc__ ) @@ -130,12 +137,17 @@ def get_user_index_declaration() -> str: f'"""\n' ) fun_call_args = f",\n{TAB}{TAB}".join([a + "=" + a for a in icclim_index_args]) - fun_call = f"{TAB}return icclim.index(\n{TAB}{TAB}{fun_call_args},\n{TAB})\n" + fun_call = ( + f"{TAB}return icclim.index(\n" + f"{TAB}{TAB}user_index=user_index,\n" + f"{TAB}{TAB}{fun_call_args}," + f"\n{TAB})\n" + ) return f"{fun_signature}{docstring}{fun_call}" def build_fun_signature_args(args) -> str: - return f"\n{TAB}" + f",\n{TAB}".join(map(get_arg, args.values())) + return f"\n{TAB}" + f",\n{TAB}".join(map(get_parameter_declaration, args.values())) def get_ecad_index_declaration(index: EcadIndex) -> str: @@ -187,16 +199,16 @@ def get_ecad_index_declaration(index: EcadIndex) -> str: return f"{fun_signature}{docstring}{fun_call}" -def get_arg(a: inspect.Parameter) -> str: - annotation = a.annotation +def get_parameter_declaration(param: inspect.Parameter) -> str: + annotation = param.annotation if type(annotation) is type: annotation = annotation.__name__ annotation = annotation.__str__().replace("NoneType", "None") annotation = annotation.__str__().replace("xarray.core.dataset.Dataset", "Dataset") - prefix = f"{a.name}: {annotation}" - if a.default is inspect._empty: + prefix = f"{param.name}: {annotation}" + if param.default is inspect._empty: return prefix - default = a.default + default = param.default if type(default) is str: default = f'"{default.__str__()}"' return f"{prefix} = {default}"