From faaaa38431bfec36fa11803a411fa5315544ec3b Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Thu, 28 Jan 2021 11:17:24 +0100 Subject: [PATCH 01/68] First attempt at replacing multimodel stats with a native iris alternative. --- esmvalcore/preprocessor/_multimodel.py | 327 ++++++------------------- 1 file changed, 78 insertions(+), 249 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 80a930bbf5..e4fda7f2db 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -1,182 +1,17 @@ """Functions to compute multi-cube statistics.""" import logging -import re from datetime import datetime -from functools import partial, reduce +from functools import reduce import cf_units import iris import numpy as np -import scipy +from iris.experimental.equalise_cubes import equalise_attributes logger = logging.getLogger(__name__) -def _plev_fix(dataset, pl_idx): - """Extract valid plev data. - - This function takes care of situations in which certain plevs are - completely masked due to unavailable interpolation boundaries. - """ - if np.ma.is_masked(dataset): - # keep only the valid plevs - if not np.all(dataset.mask[pl_idx]): - statj = np.ma.array(dataset[pl_idx], mask=dataset.mask[pl_idx]) - else: - logger.debug('All vals in plev are masked, ignoring.') - statj = None - else: - mask = np.zeros_like(dataset[pl_idx], bool) - statj = np.ma.array(dataset[pl_idx], mask=mask) - - return statj - - -def _quantile(data, axis, quantile): - """Calculate quantile. - - Workaround for calling scipy's mquantiles with arrays of >2 - dimensions Similar to iris' _percentiles function, see their - discussion: https://github.com/SciTools/iris/pull/625 - """ - # Ensure that the target axis is the last dimension. - data = np.rollaxis(data, axis, start=data.ndim) - shape = data.shape[:-1] - # Flatten any leading dimensions. - if shape: - data = data.reshape([np.prod(shape), data.shape[-1]]) - # Perform the quantile calculation. - result = scipy.stats.mstats.mquantiles(data, - quantile, - axis=-1, - alphap=1, - betap=1) - # Ensure to unflatten any leading dimensions. - if shape: - result = result.reshape(shape) - # Check whether to reduce to a scalar result - if result.shape == (1, ): - result = result[0] - - return result - - -def _compute_statistic(data, statistic_name): - """Compute multimodel statistic.""" - data = np.ma.array(data) - statistic = data[0] - - if statistic_name == 'median': - statistic_function = np.ma.median - elif statistic_name == 'mean': - statistic_function = np.ma.mean - elif statistic_name == 'std': - statistic_function = np.ma.std - elif statistic_name == 'max': - statistic_function = np.ma.max - elif statistic_name == 'min': - statistic_function = np.ma.min - elif re.match(r"^(p\d{1,2})(\.\d*)?$", statistic_name): - # percentiles between p0 and p99.99999... - quantile = float(statistic_name[1:]) / 100 - statistic_function = partial(_quantile, quantile=quantile) - else: - raise ValueError(f'No such statistic: `{statistic_name}`') - - # no plevs - if len(data[0].shape) < 3: - # get all NOT fully masked data - u_data - # data is per time point - # so we can safely NOT compute stats for single points - if data.ndim == 1: - u_datas = data - else: - u_datas = [d for d in data if not np.all(d.mask)] - if len(u_datas) > 1: - statistic = statistic_function(data, axis=0) - else: - statistic.mask = True - return statistic - - # plevs - for j in range(statistic.shape[0]): - plev_check = [] - for cdata in data: - fixed_data = _plev_fix(cdata, j) - if fixed_data is not None: - plev_check.append(fixed_data) - - # check for nr datasets - if len(plev_check) > 1: - plev_check = np.ma.array(plev_check) - statistic[j] = statistic_function(plev_check, axis=0) - else: - statistic.mask[j] = True - - return statistic - - -def _put_in_cube(template_cube, cube_data, statistic, t_axis): - """Quick cube building and saving.""" - tunits = template_cube.coord('time').units - times = iris.coords.DimCoord(t_axis, - standard_name='time', - units=tunits, - var_name='time', - long_name='time') - times.bounds = None - times.guess_bounds() - - coord_names = [c.long_name for c in template_cube.coords()] - coord_names.extend([c.standard_name for c in template_cube.coords()]) - if 'latitude' in coord_names: - lats = template_cube.coord('latitude') - else: - lats = None - if 'longitude' in coord_names: - lons = template_cube.coord('longitude') - else: - lons = None - - # no plevs - if len(template_cube.shape) == 3: - cspec = [(times, 0), (lats, 1), (lons, 2)] - # plevs - elif len(template_cube.shape) == 4: - plev = template_cube.coord('air_pressure') - cspec = [(times, 0), (plev, 1), (lats, 2), (lons, 3)] - elif len(template_cube.shape) == 1: - cspec = [ - (times, 0), - ] - elif len(template_cube.shape) == 2: - # If you're going to hardwire air_pressure into this, - # might as well have depth here too. - plev = template_cube.coord('depth') - cspec = [ - (times, 0), - (plev, 1), - ] - - # correct dspec if necessary - fixed_dspec = np.ma.fix_invalid(cube_data, copy=False, fill_value=1e+20) - # put in cube - stats_cube = iris.cube.Cube(fixed_dspec, - dim_coords_and_dims=cspec, - long_name=statistic) - coord_names = [coord.name() for coord in template_cube.coords()] - if 'air_pressure' in coord_names: - if len(template_cube.shape) == 3: - stats_cube.add_aux_coord(template_cube.coord('air_pressure')) - - stats_cube.var_name = template_cube.var_name - stats_cube.long_name = template_cube.long_name - stats_cube.standard_name = template_cube.standard_name - stats_cube.units = template_cube.units - return stats_cube - - def _get_consistent_time_unit(cubes): """Return cubes' time unit if consistent, standard calendar otherwise.""" t_units = [cube.coord('time').units for cube in cubes] @@ -212,7 +47,6 @@ def _unify_time_coordinates(cubes): if 0 not in np.diff(years): # yearly data dates = [datetime(year, 7, 1, 0, 0, 0) for year in years] - elif 0 not in np.diff(months): # monthly data dates = [ @@ -242,115 +76,110 @@ def _unify_time_coordinates(cubes): cube.coord('time').guess_bounds() -def _get_time_slice(cubes, time): - """Fill time slice array with cubes' data if time in cube, else mask.""" - time_slice = [] - for cube in cubes: - cube_time = cube.coord('time').points - if time in cube_time: - idx = int(np.argwhere(cube_time == time)) - subset = cube.data[idx] - else: - subset = np.ma.empty(list(cube.shape[1:])) - subset.mask = True - time_slice.append(subset) - return time_slice +def _interpolate(cubes, func): + """Expand or subset cubes so they share a common time span.""" + _unify_time_coordinates(cubes) + time_spans = [cube.coord('time').points for cube in cubes] + new_times = reduce(func, time_spans) + # new_times = cubes[0].coord('time').units.num2date(new_times) + sample_points = [('time', new_times)] + scheme = iris.analysis.Nearest(extrapolation_mode='nan') + return [cube.interpolate(sample_points, scheme) for cube in cubes] -def _assemble_data(cubes, statistic, span='overlap'): - """Get statistical data in iris cubes.""" - # New time array representing the union or intersection of all cubes - time_spans = [cube.coord('time').points for cube in cubes] +def _extend(cubes): + return _interpolate(cubes, np.intersect1d) + + +def _subset(cubes): + """Only keep the times that are present in all cubes.""" + return _interpolate(cubes, np.union1d) + + +def _align(cubes, span): + """Expand or subset cubes so they share a common time span.""" + if span == 'overlap': - new_times = reduce(np.intersect1d, time_spans) + new_cubes = _subset(cubes) elif span == 'full': - new_times = reduce(np.union1d, time_spans) - n_times = len(new_times) + new_cubes = _extend(cubes) + else: + raise ValueError("Unknown value for span. Expected 'full' or 'overlap'" + "got {}".format(span)) + + for cube in new_cubes: + cube.coord('time').guess_bounds() + return new_cubes - # Target array to populate with computed statistics - new_shape = [n_times] + list(cubes[0].shape[1:]) - stats_data = np.ma.zeros(new_shape, dtype=np.dtype('float32')) - # Realize all cubes at once instead of separately for each time slice - _ = [cube.data for cube in cubes] +def _combine(cubes, dim='new_dim'): + """Merge iris cubes into a single big cube with new dimension.""" + equalise_attributes(cubes) - # Make time slices and compute stats - for i, time in enumerate(new_times): - time_data = _get_time_slice(cubes, time) - stats_data[i] = _compute_statistic(time_data, statistic) + for i, cube in enumerate(cubes): + concat_dim = iris.coords.AuxCoord(i, var_name=dim) + cube.add_aux_coord(concat_dim) - template = cubes[0] - stats_cube = _put_in_cube(template, stats_data, statistic, new_times) - return stats_cube + cubes = iris.cube.CubeList(cubes) + return cubes.merge_cube() + + +def _compute(cube, statistic, dim='new_dim'): + """Compute statistic.""" + operators = vars(iris.analysis) + + try: + operator = operators[statistic.upper()] + except KeyError as err: + raise ValueError( + f'Statistic `{statistic}` not supported in', + '`ensemble_statistics`. Choose supported operator from', + '`iris.analysis package`.') from err + + # This will always return a masked array + return cube.collapsed('concat_dim', operator) def _multicube_statistics(cubes, statistics, span): - """Compute statistics over multiple cubes. + """Compute multi-cube statistics. + + Cubes are merged and subsequently collapsed along a new auxiliary + coordinate. Inconsistent attributes will be removed. - Can be used e.g. for ensemble or multi-model statistics. + This function deals with non-homogeneous cubes by taking the time union + computed across a common overlap in time (set span: overlap) or across the + full length in time of each model (set span: full). Apart from the time + coordinate, cubes must have consistent shapes. - This function was designed to work on (max) four-dimensional data: - time, vertical axis, two horizontal axes. + This method uses iris' built in functions, exposing the operators in + iris.analysis and supporting lazy evaluation. - Apart from the time coordinate, cubes must have consistent shapes. There - are two options to combine time coordinates of different lengths, see - the `span` argument. + Note: some of the operators in iris.analysis require additional + arguments, such as percentiles or weights. These operators are + currently not supported. Parameters ---------- cubes: list - list of cubes over which the statistics will be computed; + list of cubes to be used in multimodel stat computation; statistics: list - statistical metrics to be computed. Available options: mean, median, - max, min, std, or pXX.YY (for percentile XX.YY; decimal part optional). + statistical measures to be computed. Choose from the + operators listed in the iris.analysis package. span: str - overlap or full; if overlap, statitsticss are computed on common time- - span; if full, statistics are computed on full time spans, ignoring - missing data. + 'overlap' or 'full'. If 'overlap', statitsticss are computed on common + time span; if 'full', statistics are computed on full time spans, + ignoring missing data. Returns ------- dict dictionary of statistics cubes with statistics' names as keys. - - Raises - ------ - ValueError - If span is neither overlap nor full. """ - if len(cubes) < 2: - logger.info('Found only 1 cube; no statistics computed for %r', - list(cubes)[0]) - return {statistic: cubes[0] for statistic in statistics} - - logger.debug('Multicube statistics: computing: %s', statistics) - - # Reset time coordinates and make cubes share the same calendar - _unify_time_coordinates(cubes) - - # Check whether input is valid - if span == 'overlap': - # check if we have any time overlap - times = [cube.coord('time').points for cube in cubes] - overlap = reduce(np.intersect1d, times) - if len(overlap) <= 1: - logger.info("Time overlap between cubes is none or a single point." - "check datasets: will not compute statistics.") - return cubes - logger.debug("Using common time overlap between " - "datasets to compute statistics.") - elif span == 'full': - logger.debug("Using full time spans to compute statistics.") - else: - raise ValueError( - "Unexpected value for span {}, choose from 'overlap', 'full'". - format(span)) - - # Compute statistics + aligned_cubes = _align(cubes, span=span) + big_cube = _combine(aligned_cubes) statistics_cubes = {} for statistic in statistics: - statistic_cube = _assemble_data(cubes, statistic, span) - statistics_cubes[statistic] = statistic_cube + statistics_cubes[statistic] = _compute(big_cube, statistic) return statistics_cubes From 4e5a67512e5efd2f4a4509ef8c3fb968f427c5e3 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Thu, 28 Jan 2021 11:33:50 +0100 Subject: [PATCH 02/68] temporary test draft to derive new tests from --- .../preprocessor/_multimodel/test_times.py | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 tests/unit/preprocessor/_multimodel/test_times.py diff --git a/tests/unit/preprocessor/_multimodel/test_times.py b/tests/unit/preprocessor/_multimodel/test_times.py new file mode 100644 index 0000000000..cfd6e04e75 --- /dev/null +++ b/tests/unit/preprocessor/_multimodel/test_times.py @@ -0,0 +1,56 @@ +import iris +# import pytest +from cf_units import Unit +import numpy as np + +from esmvalcore.preprocessor._multimodel import multi_model_statistics + +def timecoord(days=[1, 2], calendar='gregorian'): + """Return a standard time coordinate with the given days as time points.""" + return iris.coords.DimCoord(days, standard_name='time', units = Unit('days since 1850-01-01', calendar=calendar)) + +cube1 = iris.cube.Cube([1, 1], dim_coords_and_dims=[(timecoord([1, 2]), 0)]) +cube2 = iris.cube.Cube([2, 2, 2], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) +cube3 = iris.cube.Cube([3, 3], dim_coords_and_dims=[(timecoord([1, 3]), 0)]) + +# overlap between cube 1 and 2 +result = multi_model_statistics([cube1, cube2], span='overlap', statistics=['mean'])['mean'] +expected = iris.cube.Cube([1.5, 1.5], dim_coords_and_dims=[(timecoord([1, 2]), 0)]) +assert np.all(result.data == expected.data) + +# overlap between cube 1 and 3 +result = multi_model_statistics([cube1, cube3], span='overlap', statistics=['mean'])['mean'] +expected = iris.cube.Cube([2], dim_coords_and_dims=[(timecoord([1]), 0)]) +assert np.all(result.data == expected.data) + +overlap between cube 2 and 3 +result = multi_model_statistics([cube2, cube3], span='overlap', statistics=['mean'])['mean'] +expected = iris.cube.Cube([2.5, 2.5], dim_coords_and_dims=[(timecoord([1, 3]), 0)]) +assert np.all(result.data == expected.data) + +# overlap between cube 1 and 2 and 3 +result = multi_model_statistics([cube1, cube2, cube3], span='overlap', statistics=['mean'])['mean'] +expected = iris.cube.Cube([2], dim_coords_and_dims=[(timecoord([1]), 0)]) +assert np.all(result.data == expected.data) + +################################################################################### + +# full between cube 1 and 2 +result = multi_model_statistics([cube1, cube2], span='full', statistics=['mean'])['mean'] +expected = iris.cube.Cube([1.5, 1.5, 2], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) +assert np.all(result.data == expected.data) + +# full between cube 1 and 3 +result = multi_model_statistics([cube1, cube3], span='full', statistics=['mean'])['mean'] +expected = iris.cube.Cube([2, 1, 3], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) +assert np.all(result.data == expected.data) + +# full between cube 2 and 3 +result = multi_model_statistics([cube2, cube3], span='full', statistics=['mean'])['mean'] +expected = iris.cube.Cube([2.5, 2, 2.5], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) +assert np.all(result.data == expected.data) + +# full between cube 1 and 2 and 3 +result = multi_model_statistics([cube1, cube2, cube3], span='full', statistics=['mean'])['mean'] +expected = iris.cube.Cube([2, 1.5, 2.5], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) +assert np.all(result.data == expected.data) \ No newline at end of file From d5b75b5442f72163f0ff8cc6778e4028bd6e4503 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Thu, 28 Jan 2021 11:43:19 +0100 Subject: [PATCH 03/68] Support percentiles --- esmvalcore/preprocessor/_multimodel.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index e4fda7f2db..36948ea48b 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -1,6 +1,7 @@ """Functions to compute multi-cube statistics.""" import logging +import re from datetime import datetime from functools import reduce @@ -126,18 +127,25 @@ def _combine(cubes, dim='new_dim'): def _compute(cube, statistic, dim='new_dim'): """Compute statistic.""" - operators = vars(iris.analysis) - try: - operator = operators[statistic.upper()] - except KeyError as err: - raise ValueError( - f'Statistic `{statistic}` not supported in', - '`ensemble_statistics`. Choose supported operator from', - '`iris.analysis package`.') from err + if re.match(r"^(p\d{1,2})(\.\d*)?$", statistic): + # percentiles between p0 and p99.99999... + percentile = float(statistic[1:]) + operator = iris.analysis.PERCENTILE + kwargs = {'percent': percentile} + else: + try: + operators = vars(iris.analysis) + operator = operators[statistic.upper()] + kwargs = {} + except KeyError as err: + raise ValueError( + f'Statistic `{statistic}` not supported in', + '`ensemble_statistics`. Choose supported operator from', + '`iris.analysis package`.') from err # This will always return a masked array - return cube.collapsed('concat_dim', operator) + return cube.collapsed('concat_dim', operator, **kwargs) def _multicube_statistics(cubes, statistics, span): From 98580d18b2f885e81198391c1806a16b443e0908 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Thu, 28 Jan 2021 12:07:10 +0100 Subject: [PATCH 04/68] Docstrings and check for no overlap --- esmvalcore/preprocessor/_multimodel.py | 57 ++++++++++---------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 36948ea48b..443bfa01d9 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -81,7 +81,11 @@ def _interpolate(cubes, func): """Expand or subset cubes so they share a common time span.""" _unify_time_coordinates(cubes) time_spans = [cube.coord('time').points for cube in cubes] + new_times = reduce(func, time_spans) + if len(new_times) == 0: + raise ValueError("No time overlap found between input cubes.") + # new_times = cubes[0].coord('time').units.num2date(new_times) sample_points = [('time', new_times)] scheme = iris.analysis.Nearest(extrapolation_mode='nan') @@ -144,44 +148,18 @@ def _compute(cube, statistic, dim='new_dim'): '`ensemble_statistics`. Choose supported operator from', '`iris.analysis package`.') from err + logger.debug('Multicube statistics: computing: %s', statistic) # This will always return a masked array return cube.collapsed('concat_dim', operator, **kwargs) def _multicube_statistics(cubes, statistics, span): - """Compute multi-cube statistics. + """Compute statistics over multiple cubes. + + Can be used e.g. for ensemble or multi-model statistics. Cubes are merged and subsequently collapsed along a new auxiliary coordinate. Inconsistent attributes will be removed. - - This function deals with non-homogeneous cubes by taking the time union - computed across a common overlap in time (set span: overlap) or across the - full length in time of each model (set span: full). Apart from the time - coordinate, cubes must have consistent shapes. - - This method uses iris' built in functions, exposing the operators in - iris.analysis and supporting lazy evaluation. - - Note: some of the operators in iris.analysis require additional - arguments, such as percentiles or weights. These operators are - currently not supported. - - Parameters - ---------- - cubes: list - list of cubes to be used in multimodel stat computation; - statistics: list - statistical measures to be computed. Choose from the - operators listed in the iris.analysis package. - span: str - 'overlap' or 'full'. If 'overlap', statitsticss are computed on common - time span; if 'full', statistics are computed on full time spans, - ignoring missing data. - - Returns - ------- - dict - dictionary of statistics cubes with statistics' names as keys. """ aligned_cubes = _align(cubes, span=span) big_cube = _combine(aligned_cubes) @@ -224,18 +202,25 @@ def multi_model_statistics(products, span, statistics, output_products=None): workflow and provenance information, and this option should typically be ignored. - This function was designed to work on (max) four-dimensional data: time, - vertical axis, two horizontal axes. Apart from the time coordinate, cubes - must have consistent shapes. There are two options to combine time - coordinates of different lengths, see the `span` argument. + Apart from the time coordinate, cubes must have consistent shapes. There + are two options to combine time coordinates of different lengths, see the + `span` argument. + + Uses the statistical operators in iris.analysis, including 'mean', + 'median', 'min', 'max', and 'std'. Percentiles are also supported and can + be specified like pXX.YY (for percentile XX.YY; decimal part optional). + + Note: some of the operators in iris.analysis require additional arguments. + Except for percentiles, these operators are currently not supported. Parameters ---------- products: list Cubes (or products) over which the statistics will be computed. statistics: list - Statistical metrics to be computed. Available options: mean, median, - max, min, std, or pXX.YY (for percentile XX.YY; decimal part optional). + Statistical metrics to be computed, e.g. ['mean', 'max']. Choose from + the operators listed in the iris.analysis package. Percentiles can be + specified like 'pXX.YY'. span: str Overlap or full; if overlap, statitstics are computed on common time- span; if full, statistics are computed on full time spans, ignoring From a0336cca8fc027e57df9220afd6fcec8287ea588 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Fri, 29 Jan 2021 11:50:36 +0100 Subject: [PATCH 05/68] Resolve span gets its own function --- esmvalcore/preprocessor/_multimodel.py | 46 +++++++++++--------------- 1 file changed, 19 insertions(+), 27 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 443bfa01d9..75af9b09eb 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -77,40 +77,32 @@ def _unify_time_coordinates(cubes): cube.coord('time').guess_bounds() -def _interpolate(cubes, func): - """Expand or subset cubes so they share a common time span.""" - _unify_time_coordinates(cubes) - time_spans = [cube.coord('time').points for cube in cubes] +def _resolve_span(all_times, span): + """Construct new time array based on the span parameter.""" + if span == 'full': + new_times = reduce(np.union1d, all_times) + return new_times - new_times = reduce(func, time_spans) - if len(new_times) == 0: + if span == 'overlap': + new_times = reduce(np.intersect1d, all_times) + if new_times.size > 0: + return new_times raise ValueError("No time overlap found between input cubes.") - # new_times = cubes[0].coord('time').units.num2date(new_times) - sample_points = [('time', new_times)] - scheme = iris.analysis.Nearest(extrapolation_mode='nan') - return [cube.interpolate(sample_points, scheme) for cube in cubes] - - -def _extend(cubes): - return _interpolate(cubes, np.intersect1d) - - -def _subset(cubes): - """Only keep the times that are present in all cubes.""" - return _interpolate(cubes, np.union1d) + raise ValueError("Unknown value for span. Expected 'full' or 'overlap'" + "got {}".format(span)) def _align(cubes, span): """Expand or subset cubes so they share a common time span.""" + _unify_time_coordinates(cubes) + all_time_arrays = [cube.coord('time').points for cube in cubes] + new_times = _resolve_span(all_time_arrays, span) - if span == 'overlap': - new_cubes = _subset(cubes) - elif span == 'full': - new_cubes = _extend(cubes) - else: - raise ValueError("Unknown value for span. Expected 'full' or 'overlap'" - "got {}".format(span)) + # new_times = cubes[0].coord('time').units.num2date(new_times) + sample_points = [('time', new_times)] + scheme = iris.analysis.Nearest(extrapolation_mode='nan') + new_cubes = [cube.interpolate(sample_points, scheme) for cube in cubes] for cube in new_cubes: cube.coord('time').guess_bounds() @@ -150,7 +142,7 @@ def _compute(cube, statistic, dim='new_dim'): logger.debug('Multicube statistics: computing: %s', statistic) # This will always return a masked array - return cube.collapsed('concat_dim', operator, **kwargs) + return cube.collapsed(dim, operator, **kwargs) def _multicube_statistics(cubes, statistics, span): From a5322a2e4f9520ead612679ac4eaff7244c97e8e Mon Sep 17 00:00:00 2001 From: Barbara Vreede Date: Fri, 29 Jan 2021 11:53:11 +0100 Subject: [PATCH 06/68] fix merge conflict --- esmvalcore/preprocessor/_multimodel.py | 15 +- .../_multimodel/test_multimodel.py | 216 +++++++++--------- .../preprocessor/_multimodel/test_times.py | 2 +- 3 files changed, 122 insertions(+), 111 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 75af9b09eb..6c2ba99af6 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -89,8 +89,19 @@ def _resolve_span(all_times, span): return new_times raise ValueError("No time overlap found between input cubes.") - raise ValueError("Unknown value for span. Expected 'full' or 'overlap'" - "got {}".format(span)) + # new_times = cubes[0].coord('time').units.num2date(new_times) + sample_points = [('time', new_times)] + scheme = iris.analysis.Nearest(extrapolation_mode='nan') + return [cube.interpolate(sample_points, scheme) for cube in cubes] + + +def _extend(cubes): + return _interpolate(cubes, np.union1d) + + +def _subset(cubes): + """Only keep the times that are present in all cubes.""" + return _interpolate(cubes, np.intersect1d) def _align(cubes, span): diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 1899e09097..38c7d7a5d9 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -8,14 +8,14 @@ import tests from esmvalcore.preprocessor import multi_model_statistics -from esmvalcore.preprocessor._multimodel import ( - _assemble_data, - _compute_statistic, - _get_time_slice, - _plev_fix, - _put_in_cube, - _unify_time_coordinates, -) +# from esmvalcore.preprocessor._multimodel import ( +# _assemble_data, +# _compute_statistic, +# _get_time_slice, +# _plev_fix, +# _put_in_cube, +# _unify_time_coordinates, +# ) class Test(tests.Test): @@ -82,15 +82,15 @@ def setUp(self): coords_spec6 = [(daily1, 0), (zcoord, 1), (lats, 2), (lons, 3)] self.cube6 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec6) - def test_compute_statistic(self): - """Test statistic.""" - data = [self.cube1.data[0], self.cube2.data[0]] - stat_mean = _compute_statistic(data, "mean") - stat_median = _compute_statistic(data, "median") - expected_mean = np.ma.ones((3, 2, 2)) - expected_median = np.ma.ones((3, 2, 2)) - self.assert_array_equal(stat_mean, expected_mean) - self.assert_array_equal(stat_median, expected_median) + # def test_compute_statistic(self): + # """Test statistic.""" + # data = [self.cube1.data[0], self.cube2.data[0]] + # stat_mean = _compute_statistic(data, "mean") + # stat_median = _compute_statistic(data, "median") + # expected_mean = np.ma.ones((3, 2, 2)) + # expected_median = np.ma.ones((3, 2, 2)) + # self.assert_array_equal(stat_mean, expected_mean) + # self.assert_array_equal(stat_median, expected_median) def test_compute_full_statistic_mon_cube(self): data = [self.cube1, self.cube2] @@ -128,97 +128,97 @@ def test_compute_overlap_statistic_yr_cube(self): expected_ovlap_mean = np.ma.ones((2, 3, 2, 2)) self.assert_array_equal(stats['mean'].data, expected_ovlap_mean) - def test_compute_std(self): - """Test statistic.""" - data = [self.cube1.data[0], self.cube2.data[0] * 2] - stat = _compute_statistic(data, "std") - expected = np.ma.ones((3, 2, 2)) * 0.5 - expected[0, 0, 0] = 0 - self.assert_array_equal(stat, expected) - - def test_compute_max(self): - """Test statistic.""" - data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] - stat = _compute_statistic(data, "max") - expected = np.ma.ones((3, 2, 2)) * 2 - expected[0, 0, 0] = 0.5 - self.assert_array_equal(stat, expected) - - def test_compute_min(self): - """Test statistic.""" - data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] - stat = _compute_statistic(data, "min") - expected = np.ma.ones((3, 2, 2)) * 0.5 - self.assert_array_equal(stat, expected) - - def test_compute_percentile(self): - """Test statistic.""" - data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] - stat = _compute_statistic(data, "p75") - expected = np.ma.ones((3, 2, 2)) * 1.625 - expected[0, 0, 0] = 0.5 - self.assert_array_equal(stat, expected) - - def test_put_in_cube(self): - """Test put in cube.""" - cube_data = np.ma.ones((2, 3, 2, 2)) - stat_cube = _put_in_cube(self.cube1, cube_data, "mean", t_axis=[1, 2]) - self.assert_array_equal(stat_cube.data, self.cube1.data) - - def test_assemble_overlap_data(self): - """Test overlap data.""" - comp_ovlap_mean = _assemble_data([self.cube1, self.cube1], - "mean", - span='overlap') - expected_ovlap_mean = np.ma.ones((2, 3, 2, 2)) - self.assert_array_equal(comp_ovlap_mean.data, expected_ovlap_mean) - - def test_assemble_full_data(self): - """Test full data.""" - comp_full_mean = _assemble_data([self.cube1, self.cube2], - "mean", - span='full') - expected_full_mean = np.ma.ones((5, 3, 2, 2)) - expected_full_mean.mask = np.ones((5, 3, 2, 2)) - expected_full_mean.mask[1] = False - self.assert_array_equal(comp_full_mean.data, expected_full_mean) - - def test_plev_fix(self): - """Test plev fix.""" - fixed_data = _plev_fix(self.cube2.data, 1) - expected_data = np.ma.ones((3, 2, 2)) - self.assert_array_equal(expected_data, fixed_data) - - def test_unify_time_coordinates(self): - """Test set common calenar.""" - cube1 = self.cube1 - time1 = cube1.coord('time') - t_unit1 = time1.units - dates = t_unit1.num2date(time1.points) - - t_unit2 = Unit('days since 1850-01-01', calendar='gregorian') - time2 = t_unit2.date2num(dates) - cube2 = self.cube1.copy() - cube2.coord('time').points = time2 - cube2.coord('time').units = t_unit2 - _unify_time_coordinates([cube1, cube2]) - self.assertEqual(cube1.coord('time'), cube2.coord('time')) - - def test_get_time_slice_all(self): - """Test get time slice if all cubes have data.""" - cubes = [self.cube1, self.cube2] - result = _get_time_slice(cubes, time=45) - expected = [self.cube1[1].data, self.cube2[0].data] - self.assert_array_equal(expected, result) - - def test_get_time_slice_part(self): - """Test get time slice if all cubes have data.""" - cubes = [self.cube1, self.cube2] - result = _get_time_slice(cubes, time=14) - masked = np.ma.empty(list(cubes[0].shape[1:])) - masked.mask = True - expected = [self.cube1[0].data, masked] - self.assert_array_equal(expected, result) + # def test_compute_std(self): + # """Test statistic.""" + # data = [self.cube1.data[0], self.cube2.data[0] * 2] + # stat = _compute_statistic(data, "std") + # expected = np.ma.ones((3, 2, 2)) * 0.5 + # expected[0, 0, 0] = 0 + # self.assert_array_equal(stat, expected) + + # def test_compute_max(self): + # """Test statistic.""" + # data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] + # stat = _compute_statistic(data, "max") + # expected = np.ma.ones((3, 2, 2)) * 2 + # expected[0, 0, 0] = 0.5 + # self.assert_array_equal(stat, expected) + + # def test_compute_min(self): + # """Test statistic.""" + # data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] + # stat = _compute_statistic(data, "min") + # expected = np.ma.ones((3, 2, 2)) * 0.5 + # self.assert_array_equal(stat, expected) + + # def test_compute_percentile(self): + # """Test statistic.""" + # data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] + # stat = _compute_statistic(data, "p75") + # expected = np.ma.ones((3, 2, 2)) * 1.625 + # expected[0, 0, 0] = 0.5 + # self.assert_array_equal(stat, expected) + + # def test_put_in_cube(self): + # """Test put in cube.""" + # cube_data = np.ma.ones((2, 3, 2, 2)) + # stat_cube = _put_in_cube(self.cube1, cube_data, "mean", t_axis=[1, 2]) + # self.assert_array_equal(stat_cube.data, self.cube1.data) + + # # def test_assemble_overlap_data(self): + # # """Test overlap data.""" + # # comp_ovlap_mean = _assemble_data([self.cube1, self.cube1], + # # "mean", + # # span='overlap') + # # expected_ovlap_mean = np.ma.ones((2, 3, 2, 2)) + # # self.assert_array_equal(comp_ovlap_mean.data, expected_ovlap_mean) + + # # def test_assemble_full_data(self): + # # """Test full data.""" + # # comp_full_mean = _assemble_data([self.cube1, self.cube2], + # # "mean", + # # span='full') + # # expected_full_mean = np.ma.ones((5, 3, 2, 2)) + # # expected_full_mean.mask = np.ones((5, 3, 2, 2)) + # # expected_full_mean.mask[1] = False + # # self.assert_array_equal(comp_full_mean.data, expected_full_mean) + + # def test_plev_fix(self): + # """Test plev fix.""" + # fixed_data = _plev_fix(self.cube2.data, 1) + # expected_data = np.ma.ones((3, 2, 2)) + # self.assert_array_equal(expected_data, fixed_data) + + # def test_unify_time_coordinates(self): + # """Test set common calenar.""" + # cube1 = self.cube1 + # time1 = cube1.coord('time') + # t_unit1 = time1.units + # dates = t_unit1.num2date(time1.points) + + # t_unit2 = Unit('days since 1850-01-01', calendar='gregorian') + # time2 = t_unit2.date2num(dates) + # cube2 = self.cube1.copy() + # cube2.coord('time').points = time2 + # cube2.coord('time').units = t_unit2 + # _unify_time_coordinates([cube1, cube2]) + # self.assertEqual(cube1.coord('time'), cube2.coord('time')) + + # def test_get_time_slice_all(self): + # """Test get time slice if all cubes have data.""" + # cubes = [self.cube1, self.cube2] + # result = _get_time_slice(cubes, time=45) + # expected = [self.cube1[1].data, self.cube2[0].data] + # self.assert_array_equal(expected, result) + + # def test_get_time_slice_part(self): + # """Test get time slice if all cubes have data.""" + # cubes = [self.cube1, self.cube2] + # result = _get_time_slice(cubes, time=14) + # masked = np.ma.empty(list(cubes[0].shape[1:])) + # masked.mask = True + # expected = [self.cube1[0].data, masked] + # self.assert_array_equal(expected, result) if __name__ == '__main__': diff --git a/tests/unit/preprocessor/_multimodel/test_times.py b/tests/unit/preprocessor/_multimodel/test_times.py index cfd6e04e75..6b8c8c5220 100644 --- a/tests/unit/preprocessor/_multimodel/test_times.py +++ b/tests/unit/preprocessor/_multimodel/test_times.py @@ -23,7 +23,7 @@ def timecoord(days=[1, 2], calendar='gregorian'): expected = iris.cube.Cube([2], dim_coords_and_dims=[(timecoord([1]), 0)]) assert np.all(result.data == expected.data) -overlap between cube 2 and 3 +# overlap between cube 2 and 3 result = multi_model_statistics([cube2, cube3], span='overlap', statistics=['mean'])['mean'] expected = iris.cube.Cube([2.5, 2.5], dim_coords_and_dims=[(timecoord([1, 3]), 0)]) assert np.all(result.data == expected.data) From 8bbd878078c0d2ca306a61261d3269a87cde09ba Mon Sep 17 00:00:00 2001 From: Barbara Vreede Date: Fri, 29 Jan 2021 15:08:01 +0100 Subject: [PATCH 07/68] first set unit tests for multimodel stats and span --- esmvalcore/preprocessor/_multimodel.py | 15 +---- .../preprocessor/_multimodel/test_span.py | 60 +++++++++++++++++++ 2 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 tests/unit/preprocessor/_multimodel/test_span.py diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 6c2ba99af6..75af9b09eb 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -89,19 +89,8 @@ def _resolve_span(all_times, span): return new_times raise ValueError("No time overlap found between input cubes.") - # new_times = cubes[0].coord('time').units.num2date(new_times) - sample_points = [('time', new_times)] - scheme = iris.analysis.Nearest(extrapolation_mode='nan') - return [cube.interpolate(sample_points, scheme) for cube in cubes] - - -def _extend(cubes): - return _interpolate(cubes, np.union1d) - - -def _subset(cubes): - """Only keep the times that are present in all cubes.""" - return _interpolate(cubes, np.intersect1d) + raise ValueError("Unknown value for span. Expected 'full' or 'overlap'" + "got {}".format(span)) def _align(cubes, span): diff --git a/tests/unit/preprocessor/_multimodel/test_span.py b/tests/unit/preprocessor/_multimodel/test_span.py new file mode 100644 index 0000000000..2cc66626d1 --- /dev/null +++ b/tests/unit/preprocessor/_multimodel/test_span.py @@ -0,0 +1,60 @@ +import pytest +import iris +from cf_units import Unit +import numpy as np + +# from esmvalcore.preprocessor._multimodel import multi_model_statistics +import esmvalcore.preprocessor._multimodel as mm + +SPAN_OPTIONS = ('overlap', 'full') + +STATISTICS_OPTIONS = ('mean', 'std', 'std_dev', 'min', 'max', 'median') + +EXPECTED = { + 'overlap': { + 'mean': [1.5, 1.5], + 'std': [0.5, 0.5], + 'std_dev': [0.5, 0.5], + 'min': [1, 1], + 'max': [2, 2], + 'median': [1.5, 1.5] + }, + 'full': { + 'mean': [1.5, 1.5], + 'std': [0.5, 0.5], + 'std_dev': [0.5, 0.5], + 'min': [1, 1], + 'max': [2, 2], + 'median': [1.5, 1.5] + } +} + + +def timecoord(days=[1, 2], calendar='gregorian'): + """Return a standard time coordinate with the given days as time points.""" + return iris.coords.DimCoord(days, + standard_name='time', + units=Unit('days since 1850-01-01', + calendar=calendar)) + + +@pytest.fixture +def cubes_time(): + """Set up cubes used for testing multimodel statistics""" + cube1 = iris.cube.Cube([1, 1], + dim_coords_and_dims=[(timecoord([1, 2]), 0)]) + cube2 = iris.cube.Cube([9, 9, 9], + dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) + return ([cube1, cube2]) + + +@pytest.mark.parametrize('span', SPAN_OPTIONS) +@pytest.mark.parametrize('stats', STATISTICS_OPTIONS) +def test_mean(cubes_time, span, stats): + '''overlap between cube 1 and 2''' + result = mm.multi_model_statistics([cubes_time[0], cubes_time[1]], + span=span, + statistics=[stats]) + result = result[stats] + expected = np.array(EXPECTED[span][stats]) + assert np.all(result.data == expected.data) From 8e24ed9dc630fde93f8208bae44a26a499a290c3 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Fri, 29 Jan 2021 15:17:29 +0100 Subject: [PATCH 08/68] Also accept std as valid statistics --- esmvalcore/preprocessor/_multimodel.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 75af9b09eb..e035042ab4 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -129,6 +129,11 @@ def _compute(cube, statistic, dim='new_dim'): percentile = float(statistic[1:]) operator = iris.analysis.PERCENTILE kwargs = {'percent': percentile} + elif statistic == 'std': + statistic = 'std_dev' + logger.warning( + "Multicube statistics is aligning its behaviour with iris.analysis" + ". Please consider replacing 'std' with 'std_dev' in your code.") else: try: operators = vars(iris.analysis) From 5e63398c57d7be09f71b15f5577e6a4c50dbcf69 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Fri, 29 Jan 2021 16:30:42 +0100 Subject: [PATCH 09/68] Another attempt at supporting std --- esmvalcore/preprocessor/_multimodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index e035042ab4..c1ac657999 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -130,7 +130,7 @@ def _compute(cube, statistic, dim='new_dim'): operator = iris.analysis.PERCENTILE kwargs = {'percent': percentile} elif statistic == 'std': - statistic = 'std_dev' + operator = iris.analysis.STD_DEV logger.warning( "Multicube statistics is aligning its behaviour with iris.analysis" ". Please consider replacing 'std' with 'std_dev' in your code.") From f93d42f18325f97f5b972753ef9af855ce33a54f Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Fri, 29 Jan 2021 16:47:11 +0100 Subject: [PATCH 10/68] deal better with kwargs --- esmvalcore/preprocessor/_multimodel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index c1ac657999..dc42bb61e2 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -124,11 +124,12 @@ def _combine(cubes, dim='new_dim'): def _compute(cube, statistic, dim='new_dim'): """Compute statistic.""" + kwargs = {} if re.match(r"^(p\d{1,2})(\.\d*)?$", statistic): # percentiles between p0 and p99.99999... percentile = float(statistic[1:]) operator = iris.analysis.PERCENTILE - kwargs = {'percent': percentile} + kwargs['percent'] = percentile elif statistic == 'std': operator = iris.analysis.STD_DEV logger.warning( @@ -138,7 +139,6 @@ def _compute(cube, statistic, dim='new_dim'): try: operators = vars(iris.analysis) operator = operators[statistic.upper()] - kwargs = {} except KeyError as err: raise ValueError( f'Statistic `{statistic}` not supported in', From 2bff8efc75ce4624bf3ab3ea7131917a07ccda7b Mon Sep 17 00:00:00 2001 From: Barbara Vreede Date: Fri, 29 Jan 2021 16:53:46 +0100 Subject: [PATCH 11/68] corrected expected test results --- .../preprocessor/_multimodel/test_span.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/tests/unit/preprocessor/_multimodel/test_span.py b/tests/unit/preprocessor/_multimodel/test_span.py index 2cc66626d1..428d37fc6f 100644 --- a/tests/unit/preprocessor/_multimodel/test_span.py +++ b/tests/unit/preprocessor/_multimodel/test_span.py @@ -12,20 +12,21 @@ EXPECTED = { 'overlap': { - 'mean': [1.5, 1.5], - 'std': [0.5, 0.5], - 'std_dev': [0.5, 0.5], + 'mean': [5, 5], + 'std': [5.656854249492381, 5.656854249492381], + 'std_dev': [5.656854249492381, 5.656854249492381], 'min': [1, 1], - 'max': [2, 2], - 'median': [1.5, 1.5] + 'max': [9, 9], + 'median': [5, 5] }, 'full': { - 'mean': [1.5, 1.5], - 'std': [0.5, 0.5], - 'std_dev': [0.5, 0.5], - 'min': [1, 1], - 'max': [2, 2], - 'median': [1.5, 1.5] + 'mean': [5, 5, 9], + 'std': [5.656854249492381, 5.656854249492381, 0], + 'std_dev': + [5.656854249492381, 5.656854249492381, 0], + 'min': [1, 1, 9], + 'max': [9, 9, 9], + 'median': [5, 5, 9] } } From b0f93e63f6d02a92dd0e9eed9fe87acb7be87455 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Mon, 1 Feb 2021 12:11:32 +0100 Subject: [PATCH 12/68] masterplan for overhaul of multimodel tests --- esmvalcore/preprocessor/_multimodel.py | 2 +- .../_multimodel/test_multimodel.py | 51 +++++++++++++++++ .../preprocessor/_multimodel/test_span.py | 48 ++++++++++------ .../preprocessor/_multimodel/test_times.py | 56 ------------------- 4 files changed, 82 insertions(+), 75 deletions(-) delete mode 100644 tests/unit/preprocessor/_multimodel/test_times.py diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index dc42bb61e2..cfdc49e5a3 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -101,7 +101,7 @@ def _align(cubes, span): # new_times = cubes[0].coord('time').units.num2date(new_times) sample_points = [('time', new_times)] - scheme = iris.analysis.Nearest(extrapolation_mode='nan') + scheme = iris.analysis.Nearest(extrapolation_mode='mask') new_cubes = [cube.interpolate(sample_points, scheme) for cube in cubes] for cube in new_cubes: diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 38c7d7a5d9..43b54fa107 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -8,6 +8,7 @@ import tests from esmvalcore.preprocessor import multi_model_statistics + # from esmvalcore.preprocessor._multimodel import ( # _assemble_data, # _compute_statistic, @@ -17,6 +18,56 @@ # _unify_time_coordinates, # ) +""" +# What tests to do? +test_get_consistent_time_unit + --> pass multiple cubes + - if they have the same calendar: return that calendar + - if they have different calendars: return default calendar + +test_unify_time_coordinates(cubes) + --> pass multiple cubes with all kinds of different calendars + - Check that output cubes all have the same calendar + - check that the dates in the output correspond to the dates in the input + - do this for different time frequencies + - check warning/error for (sub)daily data + +test_resolve_span(all_times, span) + --> pass a list of lists with time points (integers) + --> span arguments: 'overlap' or 'full' + --> check that either the union or the intersection is correct + +test_align(cubes, span) + --> pass multiple cubes with different time coords + --> check that the returned cubes have consistent shapes and calendars + --> check that if a cube is extended, the extended points are masked (not NaN!) + +test_combine(cubes, dim='new_dim') + --> pass multiple combinations of cubes + - if cubes have the same shape, check that they are combined along a new dimension + - if they have inconsistent shapes, check that iris raises an error + - if they have inconsistent variable names, they should not be combined + +test_compute(cube, statistic, dim='new_dim') + --> make one big cube with a dimension called 'new dim' + - call with multiple different statistics + - check that the resulting data (computed statistics) is correct + - check that the output has a correct variable name + - check that the 'new_dim' dimension is removed again + - what happens if some of the input data is masked or NaN? + - test with COUNT statistics whether masked points are treated as expected. + +test_multi_model_statistics (cubes, ..., ...) + --> currently: span: overlap/full, freq: monthly/yearly, statistics: mean + --> different data frequencies ((sub)daily, monthly, yearly) + --> different statistics + --> different span arguments + --> different mask options + --> different combinations of coordinates + --> check return type is dict with all requested statistics as keys +""" + + class Test(tests.Test): """Test class for preprocessor/_multimodel.py.""" diff --git a/tests/unit/preprocessor/_multimodel/test_span.py b/tests/unit/preprocessor/_multimodel/test_span.py index 428d37fc6f..09046c1014 100644 --- a/tests/unit/preprocessor/_multimodel/test_span.py +++ b/tests/unit/preprocessor/_multimodel/test_span.py @@ -1,14 +1,19 @@ -import pytest +"""tests for multimodel preprocessor.""" + import iris -from cf_units import Unit import numpy as np +import pytest +from cf_units import Unit # from esmvalcore.preprocessor._multimodel import multi_model_statistics import esmvalcore.preprocessor._multimodel as mm SPAN_OPTIONS = ('overlap', 'full') -STATISTICS_OPTIONS = ('mean', 'std', 'std_dev', 'min', 'max', 'median') +FREQUENCY_OPTIONS = ('daily', 'monthly') + +STATISTICS_OPTIONS = ('mean', 'std', 'std_dev', 'min', 'max', 'median', + 'count', 'p50', 'p99.5') EXPECTED = { 'overlap': { @@ -22,8 +27,7 @@ 'full': { 'mean': [5, 5, 9], 'std': [5.656854249492381, 5.656854249492381, 0], - 'std_dev': - [5.656854249492381, 5.656854249492381, 0], + 'std_dev': [5.656854249492381, 5.656854249492381, 0], 'min': [1, 1, 9], 'max': [9, 9, 9], 'median': [5, 5, 9] @@ -39,23 +43,31 @@ def timecoord(days=[1, 2], calendar='gregorian'): calendar=calendar)) -@pytest.fixture -def cubes_time(): - """Set up cubes used for testing multimodel statistics""" +def cubes(frequency): + """Set up cubes used for testing multimodel statistics.""" + if frequency == 'daily': + points1 = [1, 2] + points2 = [1, 2, 3] + elif frequency == 'monthly': + points1 = [14, 45] + points2 = [14, 45, 74] cube1 = iris.cube.Cube([1, 1], - dim_coords_and_dims=[(timecoord([1, 2]), 0)]) + dim_coords_and_dims=[(timecoord(points1), 0)]) cube2 = iris.cube.Cube([9, 9, 9], - dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) - return ([cube1, cube2]) + dim_coords_and_dims=[(timecoord(points2), 0)]) + return cube1, cube2 +@pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS) @pytest.mark.parametrize('span', SPAN_OPTIONS) -@pytest.mark.parametrize('stats', STATISTICS_OPTIONS) -def test_mean(cubes_time, span, stats): - '''overlap between cube 1 and 2''' - result = mm.multi_model_statistics([cubes_time[0], cubes_time[1]], +# @pytest.mark.parametrize('stats', STATISTICS_OPTIONS) +def test_mean(span, frequency): + """overlap between cube 1 and 2.""" + cube1, cube2 = cubes(frequency) + + result = mm.multi_model_statistics([cube1, cube2], span=span, - statistics=[stats]) - result = result[stats] - expected = np.array(EXPECTED[span][stats]) + statistics=['mean']) + result = result['mean'] + expected = np.array(EXPECTED[span]['mean']) assert np.all(result.data == expected.data) diff --git a/tests/unit/preprocessor/_multimodel/test_times.py b/tests/unit/preprocessor/_multimodel/test_times.py deleted file mode 100644 index 6b8c8c5220..0000000000 --- a/tests/unit/preprocessor/_multimodel/test_times.py +++ /dev/null @@ -1,56 +0,0 @@ -import iris -# import pytest -from cf_units import Unit -import numpy as np - -from esmvalcore.preprocessor._multimodel import multi_model_statistics - -def timecoord(days=[1, 2], calendar='gregorian'): - """Return a standard time coordinate with the given days as time points.""" - return iris.coords.DimCoord(days, standard_name='time', units = Unit('days since 1850-01-01', calendar=calendar)) - -cube1 = iris.cube.Cube([1, 1], dim_coords_and_dims=[(timecoord([1, 2]), 0)]) -cube2 = iris.cube.Cube([2, 2, 2], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) -cube3 = iris.cube.Cube([3, 3], dim_coords_and_dims=[(timecoord([1, 3]), 0)]) - -# overlap between cube 1 and 2 -result = multi_model_statistics([cube1, cube2], span='overlap', statistics=['mean'])['mean'] -expected = iris.cube.Cube([1.5, 1.5], dim_coords_and_dims=[(timecoord([1, 2]), 0)]) -assert np.all(result.data == expected.data) - -# overlap between cube 1 and 3 -result = multi_model_statistics([cube1, cube3], span='overlap', statistics=['mean'])['mean'] -expected = iris.cube.Cube([2], dim_coords_and_dims=[(timecoord([1]), 0)]) -assert np.all(result.data == expected.data) - -# overlap between cube 2 and 3 -result = multi_model_statistics([cube2, cube3], span='overlap', statistics=['mean'])['mean'] -expected = iris.cube.Cube([2.5, 2.5], dim_coords_and_dims=[(timecoord([1, 3]), 0)]) -assert np.all(result.data == expected.data) - -# overlap between cube 1 and 2 and 3 -result = multi_model_statistics([cube1, cube2, cube3], span='overlap', statistics=['mean'])['mean'] -expected = iris.cube.Cube([2], dim_coords_and_dims=[(timecoord([1]), 0)]) -assert np.all(result.data == expected.data) - -################################################################################### - -# full between cube 1 and 2 -result = multi_model_statistics([cube1, cube2], span='full', statistics=['mean'])['mean'] -expected = iris.cube.Cube([1.5, 1.5, 2], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) -assert np.all(result.data == expected.data) - -# full between cube 1 and 3 -result = multi_model_statistics([cube1, cube3], span='full', statistics=['mean'])['mean'] -expected = iris.cube.Cube([2, 1, 3], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) -assert np.all(result.data == expected.data) - -# full between cube 2 and 3 -result = multi_model_statistics([cube2, cube3], span='full', statistics=['mean'])['mean'] -expected = iris.cube.Cube([2.5, 2, 2.5], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) -assert np.all(result.data == expected.data) - -# full between cube 1 and 2 and 3 -result = multi_model_statistics([cube1, cube2, cube3], span='full', statistics=['mean'])['mean'] -expected = iris.cube.Cube([2, 1.5, 2.5], dim_coords_and_dims=[(timecoord([1, 2, 3]), 0)]) -assert np.all(result.data == expected.data) \ No newline at end of file From 973979d12d79a4fe3771f41bbf3d47f6f2521a82 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Thu, 18 Feb 2021 14:15:20 +0100 Subject: [PATCH 13/68] git stash dump - wip --- esmvalcore/preprocessor/_multimodel.py | 5 +- .../_multimodel/test_multimodel.py | 427 +++++++----------- .../preprocessor/_multimodel/test_span.py | 108 ++--- 3 files changed, 236 insertions(+), 304 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index cfdc49e5a3..90de0009ab 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -97,8 +97,11 @@ def _align(cubes, span): """Expand or subset cubes so they share a common time span.""" _unify_time_coordinates(cubes) all_time_arrays = [cube.coord('time').points for cube in cubes] - new_times = _resolve_span(all_time_arrays, span) + if reduce(np.array_equal, all_times): + # cubes are already aligned + return cubes + new_times = _resolve_span(all_time_arrays, span) # new_times = cubes[0].coord('time').units.num2date(new_times) sample_points = [('time', new_times)] scheme = iris.analysis.Nearest(extrapolation_mode='mask') diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 43b54fa107..4f60d867a2 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -1,41 +1,4 @@ -"""Unit test for :func:`esmvalcore.preprocessor._multimodel`.""" - -import unittest - -import iris -import numpy as np -from cf_units import Unit - -import tests -from esmvalcore.preprocessor import multi_model_statistics - -# from esmvalcore.preprocessor._multimodel import ( -# _assemble_data, -# _compute_statistic, -# _get_time_slice, -# _plev_fix, -# _put_in_cube, -# _unify_time_coordinates, -# ) - -""" -# What tests to do? -test_get_consistent_time_unit - --> pass multiple cubes - - if they have the same calendar: return that calendar - - if they have different calendars: return default calendar - -test_unify_time_coordinates(cubes) - --> pass multiple cubes with all kinds of different calendars - - Check that output cubes all have the same calendar - - check that the dates in the output correspond to the dates in the input - - do this for different time frequencies - - check warning/error for (sub)daily data - -test_resolve_span(all_times, span) - --> pass a list of lists with time points (integers) - --> span arguments: 'overlap' or 'full' - --> check that either the union or the intersection is correct +"""Unit test for :func:`esmvalcore.preprocessor._multimodel` test_align(cubes, span) --> pass multiple cubes with different time coords @@ -56,221 +19,181 @@ - check that the 'new_dim' dimension is removed again - what happens if some of the input data is masked or NaN? - test with COUNT statistics whether masked points are treated as expected. - -test_multi_model_statistics (cubes, ..., ...) - --> currently: span: overlap/full, freq: monthly/yearly, statistics: mean - --> different data frequencies ((sub)daily, monthly, yearly) - --> different statistics - --> different span arguments - --> different mask options - --> different combinations of coordinates - --> check return type is dict with all requested statistics as keys """ +from datetime import datetime +import iris +import numpy as np +import pytest +from cf_units import Unit +from iris.cube import Cube -class Test(tests.Test): - """Test class for preprocessor/_multimodel.py.""" - def setUp(self): - """Prepare tests.""" - # Make various time arrays - time_args = { - 'standard_name': 'time', - 'units': Unit('days since 1850-01-01', calendar='gregorian') - } - monthly1 = iris.coords.DimCoord([14, 45], **time_args) - monthly2 = iris.coords.DimCoord([45, 73, 104, 134], **time_args) - monthly3 = iris.coords.DimCoord([104, 134], **time_args) - yearly1 = iris.coords.DimCoord([14., 410.], **time_args) - yearly2 = iris.coords.DimCoord([1., 367., 733., 1099.], **time_args) - daily1 = iris.coords.DimCoord([1., 2.], **time_args) - for time in [monthly1, monthly2, monthly3, yearly1, yearly2, daily1]: - time.guess_bounds() - - # Other dimensions are fixed - zcoord = iris.coords.DimCoord([0.5, 5., 50.], - standard_name='air_pressure', - long_name='air_pressure', - bounds=[[0., 2.5], [2.5, 25.], - [25., 250.]], - units='m', - attributes={'positive': 'down'}) - coord_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) - lons = iris.coords.DimCoord([1.5, 2.5], - standard_name='longitude', - long_name='longitude', - bounds=[[1., 2.], [2., 3.]], - units='degrees_east', - coord_system=coord_sys) - lats = iris.coords.DimCoord([1.5, 2.5], - standard_name='latitude', - long_name='latitude', - bounds=[[1., 2.], [2., 3.]], - units='degrees_north', - coord_system=coord_sys) - - data1 = np.ma.ones((2, 3, 2, 2)) - data2 = np.ma.ones((4, 3, 2, 2)) - mask2 = np.full((4, 3, 2, 2), False) - mask2[0, 0, 0, 0] = True - data2 = np.ma.array(data2, mask=mask2) - - coords_spec1 = [(monthly1, 0), (zcoord, 1), (lats, 2), (lons, 3)] - self.cube1 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec1) - - coords_spec2 = [(monthly2, 0), (zcoord, 1), (lats, 2), (lons, 3)] - self.cube2 = iris.cube.Cube(data2, dim_coords_and_dims=coords_spec2) - - coords_spec3 = [(monthly3, 0), (zcoord, 1), (lats, 2), (lons, 3)] - self.cube3 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec3) - - coords_spec4 = [(yearly1, 0), (zcoord, 1), (lats, 2), (lons, 3)] - self.cube4 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec4) - - coords_spec5 = [(yearly2, 0), (zcoord, 1), (lats, 2), (lons, 3)] - self.cube5 = iris.cube.Cube(data2, dim_coords_and_dims=coords_spec5) - - coords_spec6 = [(daily1, 0), (zcoord, 1), (lats, 2), (lons, 3)] - self.cube6 = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec6) - - # def test_compute_statistic(self): - # """Test statistic.""" - # data = [self.cube1.data[0], self.cube2.data[0]] - # stat_mean = _compute_statistic(data, "mean") - # stat_median = _compute_statistic(data, "median") - # expected_mean = np.ma.ones((3, 2, 2)) - # expected_median = np.ma.ones((3, 2, 2)) - # self.assert_array_equal(stat_mean, expected_mean) - # self.assert_array_equal(stat_median, expected_median) - - def test_compute_full_statistic_mon_cube(self): - data = [self.cube1, self.cube2] - stats = multi_model_statistics(products=data, - statistics=['mean'], - span='full') - expected_full_mean = np.ma.ones((5, 3, 2, 2)) - expected_full_mean.mask = np.ones((5, 3, 2, 2)) - expected_full_mean.mask[1] = False - self.assert_array_equal(stats['mean'].data, expected_full_mean) - - def test_compute_full_statistic_yr_cube(self): - data = [self.cube4, self.cube5] - stats = multi_model_statistics(products=data, - statistics=['mean'], - span='full') - expected_full_mean = np.ma.ones((4, 3, 2, 2)) - expected_full_mean.mask = np.zeros((4, 3, 2, 2)) - expected_full_mean.mask[2:4] = True - self.assert_array_equal(stats['mean'].data, expected_full_mean) - - def test_compute_overlap_statistic_mon_cube(self): - data = [self.cube1, self.cube1] - stats = multi_model_statistics(products=data, - statistics=['mean'], - span='overlap') - expected_ovlap_mean = np.ma.ones((2, 3, 2, 2)) - self.assert_array_equal(stats['mean'].data, expected_ovlap_mean) - - def test_compute_overlap_statistic_yr_cube(self): - data = [self.cube4, self.cube4] - stats = multi_model_statistics(products=data, - statistics=['mean'], - span='overlap') - expected_ovlap_mean = np.ma.ones((2, 3, 2, 2)) - self.assert_array_equal(stats['mean'].data, expected_ovlap_mean) - - # def test_compute_std(self): - # """Test statistic.""" - # data = [self.cube1.data[0], self.cube2.data[0] * 2] - # stat = _compute_statistic(data, "std") - # expected = np.ma.ones((3, 2, 2)) * 0.5 - # expected[0, 0, 0] = 0 - # self.assert_array_equal(stat, expected) - - # def test_compute_max(self): - # """Test statistic.""" - # data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] - # stat = _compute_statistic(data, "max") - # expected = np.ma.ones((3, 2, 2)) * 2 - # expected[0, 0, 0] = 0.5 - # self.assert_array_equal(stat, expected) - - # def test_compute_min(self): - # """Test statistic.""" - # data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] - # stat = _compute_statistic(data, "min") - # expected = np.ma.ones((3, 2, 2)) * 0.5 - # self.assert_array_equal(stat, expected) - - # def test_compute_percentile(self): - # """Test statistic.""" - # data = [self.cube1.data[0] * 0.5, self.cube2.data[0] * 2] - # stat = _compute_statistic(data, "p75") - # expected = np.ma.ones((3, 2, 2)) * 1.625 - # expected[0, 0, 0] = 0.5 - # self.assert_array_equal(stat, expected) - - # def test_put_in_cube(self): - # """Test put in cube.""" - # cube_data = np.ma.ones((2, 3, 2, 2)) - # stat_cube = _put_in_cube(self.cube1, cube_data, "mean", t_axis=[1, 2]) - # self.assert_array_equal(stat_cube.data, self.cube1.data) - - # # def test_assemble_overlap_data(self): - # # """Test overlap data.""" - # # comp_ovlap_mean = _assemble_data([self.cube1, self.cube1], - # # "mean", - # # span='overlap') - # # expected_ovlap_mean = np.ma.ones((2, 3, 2, 2)) - # # self.assert_array_equal(comp_ovlap_mean.data, expected_ovlap_mean) - - # # def test_assemble_full_data(self): - # # """Test full data.""" - # # comp_full_mean = _assemble_data([self.cube1, self.cube2], - # # "mean", - # # span='full') - # # expected_full_mean = np.ma.ones((5, 3, 2, 2)) - # # expected_full_mean.mask = np.ones((5, 3, 2, 2)) - # # expected_full_mean.mask[1] = False - # # self.assert_array_equal(comp_full_mean.data, expected_full_mean) - - # def test_plev_fix(self): - # """Test plev fix.""" - # fixed_data = _plev_fix(self.cube2.data, 1) - # expected_data = np.ma.ones((3, 2, 2)) - # self.assert_array_equal(expected_data, fixed_data) - - # def test_unify_time_coordinates(self): - # """Test set common calenar.""" - # cube1 = self.cube1 - # time1 = cube1.coord('time') - # t_unit1 = time1.units - # dates = t_unit1.num2date(time1.points) - - # t_unit2 = Unit('days since 1850-01-01', calendar='gregorian') - # time2 = t_unit2.date2num(dates) - # cube2 = self.cube1.copy() - # cube2.coord('time').points = time2 - # cube2.coord('time').units = t_unit2 - # _unify_time_coordinates([cube1, cube2]) - # self.assertEqual(cube1.coord('time'), cube2.coord('time')) - - # def test_get_time_slice_all(self): - # """Test get time slice if all cubes have data.""" - # cubes = [self.cube1, self.cube2] - # result = _get_time_slice(cubes, time=45) - # expected = [self.cube1[1].data, self.cube2[0].data] - # self.assert_array_equal(expected, result) - - # def test_get_time_slice_part(self): - # """Test get time slice if all cubes have data.""" - # cubes = [self.cube1, self.cube2] - # result = _get_time_slice(cubes, time=14) - # masked = np.ma.empty(list(cubes[0].shape[1:])) - # masked.mask = True - # expected = [self.cube1[0].data, masked] - # self.assert_array_equal(expected, result) - - -if __name__ == '__main__': - unittest.main() +from esmvalcore.preprocessor import multi_model_statistics +import esmvalcore.preprocessor._multimodel as mm + +SPAN_OPTIONS = ('overlap', 'full') + +FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly') # hourly + + +def timecoord(frequency, calendar='gregorian', offset='days since 1850-01-01'): + """Return a time coordinate with the given time points and calendar.""" + if frequency == 'hourly': + dates = [datetime(1850, 1, 1, i, 0, 0) for i in [1, 2, 3]] + if frequency == 'daily': + dates = [datetime(1850, 1, i, 0, 0, 0) for i in [1, 2, 3]] + elif frequency == 'monthly': + dates = [datetime(1850, i, 15, 0, 0, 0) for i in [1, 2, 3]] + elif frequency == 'yearly': + dates = [datetime(1850, 7, i, 0, 0, 0) for i in [1, 2, 3]] + + unit = Unit(offset, calendar=calendar) + points = unit.date2num(dates) + return iris.coords.DimCoord(points, standard_name='time', units=unit) + + +def get_cubes(frequency): + """Set up cubes used for testing multimodel statistics.""" + + # Simple 1d cube with standard time cord + time = timecoord(frequency) + cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(time, 0)]) + + # Cube with masked data + cube2 = cube1.copy() + cube2.data = np.ma.array([5, 5, 5], mask=[True, False, False]) + + # Cube with deviating time coord + time = timecoord(frequency, + calendar='360_day', + offset='days since 1950-01-01')[:2] + cube3 = Cube([9, 9], dim_coords_and_dims=[(time, 0)]) + return [cube1, cube2, cube3] + + +@pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS) +@pytest.mark.parametrize('span', SPAN_OPTIONS) +# @pytest.mark.parametrize('stats', STATISTICS_OPTIONS) +def test_multimodel_statistics(span, frequency): + """High level test for multicube statistics function. + + - Should work for multiple data frequencies + - Should be able to deal with multiple statistics + - Should work for both span arguments + - Should deal correctly with different mask options + - Return type should be a dict with all requested statistics as keys + """ + cubes = get_cubes(frequency) + verification_data = { + # For span=overlap, take the first 2 items. + # Span = full --> statistic computed on [1, 1, 1], [-, 5, 5], [9, 9, -] + # Span = overlap --> statistic computed on [1, 1], [-, 5], [9, 9] + 'mean': [5, 5, 3], + 'std': [5.656854249492381, 4, 2.8284271247461903], + 'std_dev': [5.656854249492381, 4, 2.8284271247461903], + 'min': [1, 1, 1], + 'max': [9, 9, 5], + 'median': [5, 5, 3], + 'p50': [5, 5, 3], + 'p99.5': [8.96, 8.96, 4.98], + } + + statistics = verification_data.keys() + results = multi_model_statistics(cubes, span, statistics) + + assert isinstance(results, dict) + assert results.keys() == statistics + + for statistic, result in results.items(): + expected = np.ma.array(verification_data[statistic], mask=False) + if span == 'overlap': + expected = expected[:2] + np.testing.assert_array_equal(result.data.mask, expected.mask) + np.testing.assert_array_almost_equal(result.data, expected.data) + + +def test_get_consistent_time_unit(): + """Test same calendar returned or default if calendars differ.""" + + time1 = timecoord('monthly', '360_day') + cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(time1, 0)]) + time2 = timecoord('monthly', '365_day') + cube2 = Cube([1, 1, 1,], dim_coords_and_dims=[(time2, 0)]) + + result1 = mm._get_consistent_time_unit([cube1, cube1]) + result2 = mm._get_consistent_time_unit([cube1, cube2]) + assert result1.calendar == '360_day' + assert result2.calendar == 'gregorian' + + +def test_unify_time_coordinates(): + """Test whether the time coordinates are made consistent.""" + + # # Check that monthly data have midpoints at 15th day + # cube1 = Cube([1, 1, 1], ) + + # hourly = { + # 'input1': timecoord([datetime(1850, 1, 1, i, 0, 0) for i in [1, 2, 3]], + # calendar='standard'), + # 'input2' timecoord([datetime(1850, 1, 1, i, 0, 0) for i in [1, 2, 3]]), + # calendar='gregorian'), + # 'output': timecoord([datetime(1850, 1, 1, i, 0, 0) for i in [1, 2, 3]], + # calendar='gregorian') + # } + + # daily = ([datetime(1850, 1, i, 0, 0, 0) for i in [1, 2, 3]], + # [datetime(1850, 1, i, 12, 0, 0) for i in [1, 2, 3]]) + # monthly = ([datetime(1850, i, 1, 0, 0, 0) for i in [1, 2, 3]], + # [datetime(1850, i, 15, 0, 0, 0) for i in [1, 2, 3]]) + # yearly = ([datetime(1850+i, 1, 7, 0, 0, 0) for i in [1, 2, 3]], + # [datetime(1850+i, 1, 1, 0, 0, 0) for i in [1, 2, 3]]) + + # time_sets = [hourly, daily, monthly, yearly] + # calendars_sets = [ + # ('standard', 'gregorian'), + # ('360_day', '360_day'), + # ('365_day', 'proleptic_gregorian'), + # ('standard', 'standard'), + # ] + + # for (time1, time2), (calendar1, calendar2) in zip(time_sets, calendar_sets): + # cube1 = [Cube([1, 1, 1], dim_coords_and_dims=[(timecoord(time1, calendar1), 0)]) + # cube2 = [Cube([1, 1, 1], dim_coords_and_dims=[(timecoord(time2, calendar2), 0)]) + # cubes = mm._unify_time_coordinates([cube1, cube2]) + + # --> pass multiple cubes with all kinds of different calendars + # - Check that output cubes all have the same calendar + # - check that the dates in the output correspond to the dates in the input + # - do this for different time frequencies + # - check warning/error for (sub)daily data + + +def test_resolve_span(): + """Check that resolve_span returns the correct union/intersection.""" + span1 = [1, 2, 3] + span2 = [2, 3, 4] + span3 = [3, 4, 5] + span4 = [4, 5, 6] + + assert all(mm._resolve_span([span1, span2], span='overlap') == [2, 3]) + assert all(mm._resolve_span([span1, span2], span='full') == [1, 2, 3, 4]) + + assert all(mm._resolve_span([span1, span2, span3], span='overlap') == [3]) + assert all( + mm._resolve_span([span1, span2, span3], span='full') == + [1, 2, 3, 4, 5]) + + with pytest.raises(ValueError): + mm._resolve_span([span1, span4], span='overlap') + + +# test edge cases + +# different time offsets in calendar +# different calendars +# no overlap +# statistic without kwargs +# time points not in middle of months +# fail for sub-daily data +# diff --git a/tests/unit/preprocessor/_multimodel/test_span.py b/tests/unit/preprocessor/_multimodel/test_span.py index 09046c1014..f8fd208e32 100644 --- a/tests/unit/preprocessor/_multimodel/test_span.py +++ b/tests/unit/preprocessor/_multimodel/test_span.py @@ -1,73 +1,79 @@ """tests for multimodel preprocessor.""" +from datetime import datetime + import iris import numpy as np import pytest from cf_units import Unit +from iris.cube import Cube + +from esmvalcore.preprocessor import multi_model_statistics -# from esmvalcore.preprocessor._multimodel import multi_model_statistics -import esmvalcore.preprocessor._multimodel as mm +# import esmvalcore.preprocessor._multimodel as mm SPAN_OPTIONS = ('overlap', 'full') -FREQUENCY_OPTIONS = ('daily', 'monthly') - -STATISTICS_OPTIONS = ('mean', 'std', 'std_dev', 'min', 'max', 'median', - 'count', 'p50', 'p99.5') - -EXPECTED = { - 'overlap': { - 'mean': [5, 5], - 'std': [5.656854249492381, 5.656854249492381], - 'std_dev': [5.656854249492381, 5.656854249492381], - 'min': [1, 1], - 'max': [9, 9], - 'median': [5, 5] - }, - 'full': { - 'mean': [5, 5, 9], - 'std': [5.656854249492381, 5.656854249492381, 0], - 'std_dev': [5.656854249492381, 5.656854249492381, 0], - 'min': [1, 1, 9], - 'max': [9, 9, 9], - 'median': [5, 5, 9] - } -} +FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly') # hourly -def timecoord(days=[1, 2], calendar='gregorian'): - """Return a standard time coordinate with the given days as time points.""" - return iris.coords.DimCoord(days, - standard_name='time', - units=Unit('days since 1850-01-01', - calendar=calendar)) +def timecoord(dates, calendar='gregorian'): + """Return a time coordinate with the given time points and calendar.""" + unit = Unit('days since 1850-01-01', calendar=calendar) + points = unit.date2num(dates) + return iris.coords.DimCoord(points, standard_name='time', units=unit) +# lons = iris.coords.DimCoord([0,], standard_name='longitude', units='degrees_east') +# lats = iris.coords.DimCoord([0,], standard_name='latitude', units='degrees_north') -def cubes(frequency): + +def get_cubes(frequency): """Set up cubes used for testing multimodel statistics.""" + if frequency == 'hourly': + dates = [datetime(1850, 1, 1, i, 0, 0) for i in range(1, 4)] if frequency == 'daily': - points1 = [1, 2] - points2 = [1, 2, 3] + dates = [datetime(1850, 1, i, 0, 0, 0) for i in range(1, 4)] elif frequency == 'monthly': - points1 = [14, 45] - points2 = [14, 45, 74] - cube1 = iris.cube.Cube([1, 1], - dim_coords_and_dims=[(timecoord(points1), 0)]) - cube2 = iris.cube.Cube([9, 9, 9], - dim_coords_and_dims=[(timecoord(points2), 0)]) - return cube1, cube2 + dates = [datetime(1850, i, 15, 0, 0, 0) for i in range(1, 4)] + elif frequency == 'yearly': + dates = [datetime(1850, 7, i, 0, 0, 0) for i in range(1, 4)] + + cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(timecoord(dates), 0)]) + cube2 = cube1.copy() + cube2.data = np.ma.array([5, 5, 5], mask=[True, False, False]) + cube3 = Cube([9, 9], dim_coords_and_dims=[(timecoord(dates[:2]), 0)]) + return [cube1, cube2, cube3] @pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS) @pytest.mark.parametrize('span', SPAN_OPTIONS) # @pytest.mark.parametrize('stats', STATISTICS_OPTIONS) -def test_mean(span, frequency): - """overlap between cube 1 and 2.""" - cube1, cube2 = cubes(frequency) - - result = mm.multi_model_statistics([cube1, cube2], - span=span, - statistics=['mean']) - result = result['mean'] - expected = np.array(EXPECTED[span]['mean']) - assert np.all(result.data == expected.data) +def test_multimodel_statistics(span, frequency): + """High level test for multicube statistics function.""" + cubes = get_cubes(frequency) + verification_data = { + # For span=overlap, take the first 2 items. + # Span = full --> statistic computed on [1, 1, 1], [-, 5, 5], [9, 9, -] + # Span = overlap --> statistic computed on [1, 1], [-, 5], [9, 9] + 'mean': [5, 5, 3], + 'std': [5.656854249492381, 4, 2.8284271247461903], + 'std_dev': [5.656854249492381, 4, 2.8284271247461903], + 'min': [1, 1, 1], + 'max': [9, 9, 5], + 'median': [5, 5, 3], + 'p50': [5, 5, 3], + 'p99.5': [8.96, 8.96, 4.98], + } + + statistics = verification_data.keys() + results = multi_model_statistics(cubes, span, statistics) + + assert isinstance(results, dict) + assert results.keys == statistics + + for statistic, result in results.items(): + expected = np.ma.array(verification_data[statistic], mask=False) + if span == 'overlap': + expected = expected[:2] + np.testing.assert_array_equal(result.data.mask, expected.mask) + np.testing.assert_array_almost_equal(result.data, expected.data) From 930bec91570d39453bb907c19b2251a0212f75f3 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 22 Feb 2021 11:18:37 +0100 Subject: [PATCH 14/68] Fix bugs to ensure that tests can run --- esmvalcore/preprocessor/_multimodel.py | 8 +++++--- tests/unit/preprocessor/_multimodel/test_multimodel.py | 8 ++++++-- tests/unit/preprocessor/_multimodel/test_span.py | 10 +++++++--- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index f585a5822b..904dd49bb4 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -8,7 +8,7 @@ import cf_units import iris import numpy as np -from iris.experimental.equalise_cubes import equalise_attributes +from iris.util import equalise_attributes logger = logging.getLogger(__name__) @@ -97,7 +97,7 @@ def _align(cubes, span): """Expand or subset cubes so they share a common time span.""" _unify_time_coordinates(cubes) all_time_arrays = [cube.coord('time').points for cube in cubes] - if reduce(np.array_equal, all_times): + if reduce(np.array_equal, all_time_arrays): # cubes are already aligned return cubes @@ -109,18 +109,20 @@ def _align(cubes, span): for cube in new_cubes: cube.coord('time').guess_bounds() + return new_cubes def _combine(cubes, dim='new_dim'): """Merge iris cubes into a single big cube with new dimension.""" - equalise_attributes(cubes) + equalise_attributes(cubes) # in-place for i, cube in enumerate(cubes): concat_dim = iris.coords.AuxCoord(i, var_name=dim) cube.add_aux_coord(concat_dim) cubes = iris.cube.CubeList(cubes) + return cubes.merge_cube() diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 4f60d867a2..3fdd619a1c 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -29,8 +29,8 @@ from cf_units import Unit from iris.cube import Cube -from esmvalcore.preprocessor import multi_model_statistics import esmvalcore.preprocessor._multimodel as mm +from esmvalcore.preprocessor import multi_model_statistics SPAN_OPTIONS = ('overlap', 'full') @@ -119,7 +119,11 @@ def test_get_consistent_time_unit(): time1 = timecoord('monthly', '360_day') cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(time1, 0)]) time2 = timecoord('monthly', '365_day') - cube2 = Cube([1, 1, 1,], dim_coords_and_dims=[(time2, 0)]) + cube2 = Cube([ + 1, + 1, + 1, + ], dim_coords_and_dims=[(time2, 0)]) result1 = mm._get_consistent_time_unit([cube1, cube1]) result2 = mm._get_consistent_time_unit([cube1, cube2]) diff --git a/tests/unit/preprocessor/_multimodel/test_span.py b/tests/unit/preprocessor/_multimodel/test_span.py index f8fd208e32..29abe16703 100644 --- a/tests/unit/preprocessor/_multimodel/test_span.py +++ b/tests/unit/preprocessor/_multimodel/test_span.py @@ -23,8 +23,11 @@ def timecoord(dates, calendar='gregorian'): points = unit.date2num(dates) return iris.coords.DimCoord(points, standard_name='time', units=unit) -# lons = iris.coords.DimCoord([0,], standard_name='longitude', units='degrees_east') -# lats = iris.coords.DimCoord([0,], standard_name='latitude', units='degrees_north') + +# lons = iris.coords.DimCoord([0,], +# standard_name='longitude', units='degrees_east') +# lats = iris.coords.DimCoord([0,], +# standard_name='latitude', units='degrees_north') def get_cubes(frequency): @@ -69,7 +72,8 @@ def test_multimodel_statistics(span, frequency): results = multi_model_statistics(cubes, span, statistics) assert isinstance(results, dict) - assert results.keys == statistics + + assert results.keys() == statistics for statistic, result in results.items(): expected = np.ma.array(verification_data[statistic], mask=False) From e645812e2b6eaae7999a8d45cf72366ada8caf9e Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 22 Feb 2021 11:48:36 +0100 Subject: [PATCH 15/68] Clean up var mapping to `iris.analysis` --- esmvalcore/preprocessor/_multimodel.py | 72 +++++++++++++++++++------- 1 file changed, 54 insertions(+), 18 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 904dd49bb4..3665918f65 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -12,6 +12,24 @@ logger = logging.getLogger(__name__) +STATISTIC_MAPPING = { + 'count': iris.analysis.COUNT, + 'gmean': iris.analysis.GMEAN, + 'hmean': iris.analysis.HMEAN, + 'mean': iris.analysis.MEAN, + 'median': iris.analysis.MEDIAN, + 'min': iris.analysis.MIN, + 'max': iris.analysis.MAX, + 'peak': iris.analysis.PEAK, + 'percentile': iris.analysis.PERCENTILE, + 'proportion': iris.analysis.PROPORTION, + 'rms': iris.analysis.RMS, + 'std_dev': iris.analysis.STD_DEV, + 'sum': iris.analysis.SUM, + 'variance': iris.analysis.VARIANCE, + 'wpercentile': iris.analysis.WPERCENTILE, +} + def _get_consistent_time_unit(cubes): """Return cubes' time unit if consistent, standard calendar otherwise.""" @@ -126,31 +144,49 @@ def _combine(cubes, dim='new_dim'): return cubes.merge_cube() -def _compute(cube, statistic, dim='new_dim'): - """Compute statistic.""" +def _compute(cube, statistic: str, dim: str = 'new_dim'): + """Compute statistic. + + Parameters + ---------- + cube : :obj:`iris.cube.Cube` + statistic : str + Name of the statistic to calculate. Must be available via + :mod:`iris.analysis`. + dim : str + Collapse cube along this coordinate. + + Returns + ------- + :obj:`iris.cube.Cube` + Collapsed cube. + """ + statistic = statistic.lower() kwargs = {} - if re.match(r"^(p\d{1,2})(\.\d*)?$", statistic): - # percentiles between p0 and p99.99999... - percentile = float(statistic[1:]) - operator = iris.analysis.PERCENTILE - kwargs['percent'] = percentile - elif statistic == 'std': - operator = iris.analysis.STD_DEV + + # special cases + if statistic == 'dev': logger.warning( "Multicube statistics is aligning its behaviour with iris.analysis" ". Please consider replacing 'std' with 'std_dev' in your code.") - else: - try: - operators = vars(iris.analysis) - operator = operators[statistic.upper()] - except KeyError as err: - raise ValueError( - f'Statistic `{statistic}` not supported in', - '`ensemble_statistics`. Choose supported operator from', - '`iris.analysis package`.') from err + statistic = 'std_dev' + + elif re.match(r"^(p\d{1,2})(\.\d*)?$", statistic): + # percentiles between p0 and p99.99999... + percentile = float(statistic[1:]) + kwargs['percent'] = percentile + statistic = 'percentile' + + try: + operator = STATISTIC_MAPPING[statistic] + except KeyError as err: + raise ValueError( + f'Statistic `{statistic}` not supported by multicube statistics. ' + f'Must be one of {tuple(STATISTIC_MAPPING.keys())}.') from err logger.debug('Multicube statistics: computing: %s', statistic) + # This will always return a masked array return cube.collapsed(dim, operator, **kwargs) From d3b31c9fefa30422e73bb6bffd7a2d2e8ee6dd29 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 22 Feb 2021 12:38:45 +0100 Subject: [PATCH 16/68] Update tests --- esmvalcore/preprocessor/_multimodel.py | 2 +- .../_multimodel/test_multimodel.py | 155 ++++++++++++------ 2 files changed, 102 insertions(+), 55 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 3665918f65..f5b186066e 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -166,7 +166,7 @@ def _compute(cube, statistic: str, dim: str = 'new_dim'): kwargs = {} # special cases - if statistic == 'dev': + if statistic == 'std': logger.warning( "Multicube statistics is aligning its behaviour with iris.analysis" ". Please consider replacing 'std' with 'std_dev' in your code.") diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 3fdd619a1c..f39bb8be8f 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -1,25 +1,4 @@ -"""Unit test for :func:`esmvalcore.preprocessor._multimodel` - -test_align(cubes, span) - --> pass multiple cubes with different time coords - --> check that the returned cubes have consistent shapes and calendars - --> check that if a cube is extended, the extended points are masked (not NaN!) - -test_combine(cubes, dim='new_dim') - --> pass multiple combinations of cubes - - if cubes have the same shape, check that they are combined along a new dimension - - if they have inconsistent shapes, check that iris raises an error - - if they have inconsistent variable names, they should not be combined - -test_compute(cube, statistic, dim='new_dim') - --> make one big cube with a dimension called 'new dim' - - call with multiple different statistics - - check that the resulting data (computed statistics) is correct - - check that the output has a correct variable name - - check that the 'new_dim' dimension is removed again - - what happens if some of the input data is masked or NaN? - - test with COUNT statistics whether masked points are treated as expected. -""" +"""Unit test for :func:`esmvalcore.preprocessor._multimodel`""" from datetime import datetime @@ -72,10 +51,25 @@ def get_cubes(frequency): return [cube1, cube2, cube3] +VALIDATION_DATA = { + # For span=overlap, take the first 2 items. + # Span = full --> statistic computed on [1, 1, 1], [-, 5, 5], [9, 9, -] + # Span = overlap --> statistic computed on [1, 1], [-, 5], [9, 9] + 'mean': [5, 5, 3], + 'std': [5.656854249492381, 4, 2.8284271247461903], + 'std_dev': [5.656854249492381, 4, 2.8284271247461903], + 'min': [1, 1, 1], + 'max': [9, 9, 5], + 'median': [5, 5, 3], + 'p50': [5, 5, 3], + 'p99.5': [8.96, 8.96, 4.98], +} + + @pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS) @pytest.mark.parametrize('span', SPAN_OPTIONS) -# @pytest.mark.parametrize('stats', STATISTICS_OPTIONS) -def test_multimodel_statistics(span, frequency): +@pytest.mark.parametrize('statistic', VALIDATION_DATA) +def test_multimodel_statistics(span, frequency, statistic): """High level test for multicube statistics function. - Should work for multiple data frequencies @@ -85,32 +79,44 @@ def test_multimodel_statistics(span, frequency): - Return type should be a dict with all requested statistics as keys """ cubes = get_cubes(frequency) - verification_data = { - # For span=overlap, take the first 2 items. - # Span = full --> statistic computed on [1, 1, 1], [-, 5, 5], [9, 9, -] - # Span = overlap --> statistic computed on [1, 1], [-, 5], [9, 9] - 'mean': [5, 5, 3], - 'std': [5.656854249492381, 4, 2.8284271247461903], - 'std_dev': [5.656854249492381, 4, 2.8284271247461903], - 'min': [1, 1, 1], - 'max': [9, 9, 5], - 'median': [5, 5, 3], - 'p50': [5, 5, 3], - 'p99.5': [8.96, 8.96, 4.98], + + statistics = (statistic, ) + + result = multi_model_statistics(cubes, span, statistics) + + assert isinstance(result, dict) + assert len(result.keys()) == 1 + assert statistic in result + + expected = np.ma.array(VALIDATION_DATA[statistic], mask=False) + if span == 'overlap': + expected = expected[:2] + + result_cube = result[statistic] + np.testing.assert_array_equal(result_cube.data.mask, expected.mask) + np.testing.assert_array_almost_equal(result_cube.data, expected.data) + + +def generate_failing_tests(): + """Generate failing tests.""" + failing_tests = { + 'percentile': ValueError, + 'wpercentile': ValueError, + 'count': TypeError, + 'peak': TypeError, + 'proportion': TypeError, } - statistics = verification_data.keys() - results = multi_model_statistics(cubes, span, statistics) + yield from failing_tests.items() - assert isinstance(results, dict) - assert results.keys() == statistics - for statistic, result in results.items(): - expected = np.ma.array(verification_data[statistic], mask=False) - if span == 'overlap': - expected = expected[:2] - np.testing.assert_array_equal(result.data.mask, expected.mask) - np.testing.assert_array_almost_equal(result.data, expected.data) +@pytest.mark.parametrize('statistic, error', generate_failing_tests()) +def test_all_statistics(statistic, error): + cubes = get_cubes('monthly') + span = 'overlap' + statistics = (statistic, ) + with pytest.raises(error): + result = multi_model_statistics(cubes, span, statistics) def test_get_consistent_time_unit(): @@ -192,12 +198,53 @@ def test_resolve_span(): mm._resolve_span([span1, span4], span='overlap') -# test edge cases +def test_align(): + """ + --> pass multiple cubes with different time coords + --> check that the returned cubes have consistent shapes and calendars + --> check that if a cube is extended, + the extended points are masked (not NaN!) + """ + # cubes = ? + # span = ? + pass + + +def test_combine(): + """ + --> pass multiple combinations of cubes + - if cubes have the same shape, + check that they are combined along a new dimension + - if they have inconsistent shapes, check that iris raises an error + - if they have inconsistent variable names, they should not be combined + """ + # cubes = ? + dim = 'new_dim' + -# different time offsets in calendar -# different calendars -# no overlap -# statistic without kwargs -# time points not in middle of months -# fail for sub-daily data -# +def test_compute(): + """ + --> make one big cube with a dimension called 'new dim' + - call with multiple different statistics + - check that the resulting data (computed statistics) is correct + - check that the output has a correct variable name + - check that the 'new_dim' dimension is removed again + - what happens if some of the input data is masked or NaN? + - test with COUNT statistics whether masked points are treated as + expected. + """ + # cube = ? + # statistic = ? + dim = 'new_dim' + + +def test_edge_cases(): + """ + # different time offsets in calendar + # different calendars + # no overlap + # statistic without kwargs + # time points not in middle of months + # fail for sub-daily data + """ + pass From a108445283f7ed4b25a688cb67fe5e42f299d066 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 23 Feb 2021 11:41:06 +0100 Subject: [PATCH 17/68] Fix test cube metadata so that iris will merge them --- .../multimodel_statistics/test_multimodel.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index dd6dd3cb67..3997200b01 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -39,6 +39,18 @@ def assert_array_almost_equal(this, other): np.testing.assert_array_almost_equal(this, other) +def fix_metadata(cubes): + """Fix metadata.""" + for cube in cubes: + cube.coord('air_pressure').bounds = None + + for coord in cube.coords(): + coord.long_name = None + coord.attributes = None + + cube.cell_methods = None + + def preprocess_data(cubes, time_slice: dict = None): """Regrid the data to the first cube and optional time-slicing.""" if time_slice: @@ -61,7 +73,8 @@ def get_cache_key(value): """Get a cache key that is hopefully unique enough for unpickling. If this doesn't avoid problems with unpickling the cached data, - manually clean the pytest cache with the command `pytest --cache-clear`. + manually clean the pytest cache with the command `pytest --cache- + clear`. """ return ' '.join([ str(value), @@ -96,6 +109,8 @@ def timeseries_cubes_month(request): request.config.cache.set(cache_key, pickle.dumps(cubes).decode('latin1')) + fix_metadata(cubes) + return cubes @@ -125,6 +140,8 @@ def timeseries_cubes_day(request): request.config.cache.set(cache_key, pickle.dumps(cubes).decode('latin1')) + fix_metadata(cubes) + def calendar(cube): return cube.coord('time').units.calendar From 24f22708fbb48dc9dc4d9f83aed4926e54c9a362 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 23 Feb 2021 16:08:18 +0100 Subject: [PATCH 18/68] Update multimodel tests --- .../_multimodel/test_multimodel.py | 225 ++++++++++++------ 1 file changed, 147 insertions(+), 78 deletions(-) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index f39bb8be8f..1395c4cbd6 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -16,16 +16,30 @@ FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly') # hourly -def timecoord(frequency, calendar='gregorian', offset='days since 1850-01-01'): +def assert_array_almost_equal(this, other): + """Assert that array `this` almost equals array `other`.""" + if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other): + np.testing.assert_array_equal(this.mask, other.mask) + + np.testing.assert_array_almost_equal(this, other) + + +def timecoord(frequency, + calendar='gregorian', + offset='days since 1850-01-01', + num=3): """Return a time coordinate with the given time points and calendar.""" + + time_data = range(1, num + 1) + if frequency == 'hourly': - dates = [datetime(1850, 1, 1, i, 0, 0) for i in [1, 2, 3]] + dates = [datetime(1850, 1, 1, i, 0, 0) for i in time_data] if frequency == 'daily': - dates = [datetime(1850, 1, i, 0, 0, 0) for i in [1, 2, 3]] + dates = [datetime(1850, 1, i, 0, 0, 0) for i in time_data] elif frequency == 'monthly': - dates = [datetime(1850, i, 15, 0, 0, 0) for i in [1, 2, 3]] + dates = [datetime(1850, i, 15, 0, 0, 0) for i in time_data] elif frequency == 'yearly': - dates = [datetime(1850, 7, i, 0, 0, 0) for i in [1, 2, 3]] + dates = [datetime(1850, 7, i, 0, 0, 0) for i in time_data] unit = Unit(offset, calendar=calendar) points = unit.date2num(dates) @@ -51,25 +65,32 @@ def get_cubes(frequency): return [cube1, cube2, cube3] -VALIDATION_DATA = { - # For span=overlap, take the first 2 items. - # Span = full --> statistic computed on [1, 1, 1], [-, 5, 5], [9, 9, -] - # Span = overlap --> statistic computed on [1, 1], [-, 5], [9, 9] - 'mean': [5, 5, 3], - 'std': [5.656854249492381, 4, 2.8284271247461903], - 'std_dev': [5.656854249492381, 4, 2.8284271247461903], - 'min': [1, 1, 1], - 'max': [9, 9, 5], - 'median': [5, 5, 3], - 'p50': [5, 5, 3], - 'p99.5': [8.96, 8.96, 4.98], -} +VALIDATION_DATA_SUCCESS = ( + ('full', 'mean', (5, 5, 3)), + ('full', 'std', (5.656854249492381, 4, 2.8284271247461903)), + ('full', 'std_dev', (5.656854249492381, 4, 2.8284271247461903)), + ('full', 'min', (1, 1, 1)), + ('full', 'max', (9, 9, 5)), + ('full', 'median', (5, 5, 3)), + ('full', 'p50', (5, 5, 3)), + ('full', 'p99.5', (8.96, 8.96, 4.98)), + ('overlap', 'mean', (5, 5)), + ('overlap', 'std', (5.656854249492381, 4)), + ('overlap', 'std_dev', (5.656854249492381, 4)), + ('overlap', 'min', (1, 1)), + ('overlap', 'max', (9, 9)), + ('overlap', 'median', (5, 5)), + ('overlap', 'p50', (5, 5)), + ('overlap', 'p99.5', (8.96, 8.96)), + # test multiple statistics + ('overlap', ('min', 'max'), ((1, 1), (9, 9))), + ('full', ('min', 'max'), ((1, 1, 1), (9, 9, 5))), +) @pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS) -@pytest.mark.parametrize('span', SPAN_OPTIONS) -@pytest.mark.parametrize('statistic', VALIDATION_DATA) -def test_multimodel_statistics(span, frequency, statistic): +@pytest.mark.parametrize('span, statistics, expected', VALIDATION_DATA_SUCCESS) +def test_multimodel_statistics(frequency, span, statistics, expected): """High level test for multicube statistics function. - Should work for multiple data frequencies @@ -80,37 +101,31 @@ def test_multimodel_statistics(span, frequency, statistic): """ cubes = get_cubes(frequency) - statistics = (statistic, ) + if isinstance(statistics, str): + statistics = (statistics, ) + expected = (expected, ) result = multi_model_statistics(cubes, span, statistics) assert isinstance(result, dict) - assert len(result.keys()) == 1 - assert statistic in result - - expected = np.ma.array(VALIDATION_DATA[statistic], mask=False) - if span == 'overlap': - expected = expected[:2] + assert set(result.keys()) == set(statistics) - result_cube = result[statistic] - np.testing.assert_array_equal(result_cube.data.mask, expected.mask) - np.testing.assert_array_almost_equal(result_cube.data, expected.data) + for i, statistic in enumerate(statistics): + result_cube = result[statistic] + expected_data = np.ma.array(expected[i], mask=False) + assert_array_almost_equal(result_cube.data, expected_data) -def generate_failing_tests(): - """Generate failing tests.""" - failing_tests = { - 'percentile': ValueError, - 'wpercentile': ValueError, - 'count': TypeError, - 'peak': TypeError, - 'proportion': TypeError, - } +VALIDATION_DATA_FAIL = ( + ('percentile', ValueError), + ('wpercentile', ValueError), + ('count', TypeError), + ('peak', TypeError), + ('proportion', TypeError), +) - yield from failing_tests.items() - -@pytest.mark.parametrize('statistic, error', generate_failing_tests()) +@pytest.mark.parametrize('statistic, error', VALIDATION_DATA_FAIL) def test_all_statistics(statistic, error): cubes = get_cubes('monthly') span = 'overlap' @@ -121,15 +136,10 @@ def test_all_statistics(statistic, error): def test_get_consistent_time_unit(): """Test same calendar returned or default if calendars differ.""" - time1 = timecoord('monthly', '360_day') cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(time1, 0)]) time2 = timecoord('monthly', '365_day') - cube2 = Cube([ - 1, - 1, - 1, - ], dim_coords_and_dims=[(time2, 0)]) + cube2 = Cube([1, 1, 1], dim_coords_and_dims=[(time2, 0)]) result1 = mm._get_consistent_time_unit([cube1, cube1]) result2 = mm._get_consistent_time_unit([cube1, cube2]) @@ -198,28 +208,92 @@ def test_resolve_span(): mm._resolve_span([span1, span4], span='overlap') -def test_align(): - """ - --> pass multiple cubes with different time coords - --> check that the returned cubes have consistent shapes and calendars - --> check that if a cube is extended, - the extended points are masked (not NaN!) - """ - # cubes = ? - # span = ? - pass +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_align(span): + """Test _align function.""" + # TODO --> check that if a cube is extended, + # the extended points are masked (not NaN!) -def test_combine(): - """ - --> pass multiple combinations of cubes - - if cubes have the same shape, - check that they are combined along a new dimension - - if they have inconsistent shapes, check that iris raises an error - - if they have inconsistent variable names, they should not be combined - """ - # cubes = ? - dim = 'new_dim' + test_calendars = ('360_day', '365_day', 'gregorian', 'proleptic_gregorian', + 'julian') + data = [1, 1, 1] + cubes = [] + + for calendar in test_calendars: + time_coord = timecoord('monthly', '360_day') + cube = Cube(data, dim_coords_and_dims=[(time_coord, 0)]) + cubes.append(cube) + + result_cubes = mm._align(cubes, span) + + calendars = set(cube.coord('time').units.calendar for cube in result_cubes) + + assert len(calendars) == 1 + + shapes = set(cube.shape for cube in result_cubes) + + assert len(shapes) == 1 + assert tuple(shapes)[0] == (len(data), ) + + +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_combine_same_shape(span): + """Test _combine with same shape of cubes.""" + len_data = 3 + num_cubes = 5 + test_dim = 'test_dim' + cubes = [] + time_coord = timecoord('monthly', '360_day') + + for i in range(num_cubes): + cube = Cube([i] * len_data, dim_coords_and_dims=[(time_coord, 0)]) + cubes.append(cube) + + result_cube = mm._combine(cubes, dim=test_dim) + + dim_coord = result_cube.coord(test_dim) + assert dim_coord.var_name == test_dim + assert result_cube.shape == (num_cubes, len_data) + + desired = np.linspace((0, ) * len_data, + num_cubes - 1, + num=num_cubes, + dtype=int) + np.testing.assert_equal(result_cube.data, desired) + + +def test_combine_different_shape_fail(): + """Test _combine with inconsistent data.""" + num_cubes = 5 + test_dim = 'test_dim' + cubes = [] + + for num in range(1, num_cubes + 1): + time_coord = timecoord('monthly', '360_day', num=num) + cube = Cube([1] * num, dim_coords_and_dims=[(time_coord, 0)]) + cubes.append(cube) + + with pytest.raises(iris.exceptions.MergeError): + _ = mm._combine(cubes, dim=test_dim) + + +def test_combine_inconsistent_var_names_fail(): + """Test _combine with inconsistent var names.""" + num_cubes = 5 + test_dim = 'test_dim' + data = [1, 1, 1] + cubes = [] + + for num in range(num_cubes): + time_coord = timecoord('monthly', '360_day') + cube = Cube(data, + dim_coords_and_dims=[(time_coord, 0)], + var_name=f'test_var_{num}') + cubes.append(cube) + + with pytest.raises(iris.exceptions.MergeError): + _ = mm._combine(cubes, dim=test_dim) def test_compute(): @@ -230,8 +304,6 @@ def test_compute(): - check that the output has a correct variable name - check that the 'new_dim' dimension is removed again - what happens if some of the input data is masked or NaN? - - test with COUNT statistics whether masked points are treated as - expected. """ # cube = ? # statistic = ? @@ -239,12 +311,9 @@ def test_compute(): def test_edge_cases(): - """ - # different time offsets in calendar - # different calendars - # no overlap - # statistic without kwargs - # time points not in middle of months - # fail for sub-daily data + """# different time offsets in calendar + + # different calendars # no overlap # statistic without kwargs # time + points not in middle of months # fail for sub-daily data """ pass From ff6ade873f338978fad623142c38d88b1dc9bfe9 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 23 Feb 2021 17:10:58 +0100 Subject: [PATCH 19/68] Implement edge cases for multimodel tests --- .../_multimodel/test_multimodel.py | 158 +++++++++++++++--- 1 file changed, 136 insertions(+), 22 deletions(-) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 1395c4cbd6..cc07b0f008 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -30,16 +30,16 @@ def timecoord(frequency, num=3): """Return a time coordinate with the given time points and calendar.""" - time_data = range(1, num + 1) + time_points = range(1, num + 1) if frequency == 'hourly': - dates = [datetime(1850, 1, 1, i, 0, 0) for i in time_data] + dates = [datetime(1850, 1, 1, i, 0, 0) for i in time_points] if frequency == 'daily': - dates = [datetime(1850, 1, i, 0, 0, 0) for i in time_data] + dates = [datetime(1850, 1, i, 0, 0, 0) for i in time_points] elif frequency == 'monthly': - dates = [datetime(1850, i, 15, 0, 0, 0) for i in time_data] + dates = [datetime(1850, i, 15, 0, 0, 0) for i in time_points] elif frequency == 'yearly': - dates = [datetime(1850, 7, i, 0, 0, 0) for i in time_data] + dates = [datetime(1850, 7, i, 0, 0, 0) for i in time_points] unit = Unit(offset, calendar=calendar) points = unit.date2num(dates) @@ -126,12 +126,13 @@ def test_multimodel_statistics(frequency, span, statistics, expected): @pytest.mark.parametrize('statistic, error', VALIDATION_DATA_FAIL) -def test_all_statistics(statistic, error): +def test_unsupported_statistics_fail(statistic, error): + """Check that unsupported statistics raise an exception.""" cubes = get_cubes('monthly') span = 'overlap' statistics = (statistic, ) with pytest.raises(error): - result = multi_model_statistics(cubes, span, statistics) + _ = multi_model_statistics(cubes, span, statistics) def test_get_consistent_time_unit(): @@ -296,24 +297,137 @@ def test_combine_inconsistent_var_names_fail(): _ = mm._combine(cubes, dim=test_dim) -def test_compute(): +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_edge_case_different_time_offsets(span): + time1 = timecoord('monthly', '360_day', offset='days since 1888-01-01') + cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(time1, 0)]) + time2 = timecoord('monthly', '360_day', offset='days since 1899-01-01') + cube2 = Cube([1, 1, 1], dim_coords_and_dims=[(time2, 0)]) + + cubes = (cube1, cube2) + statistic = 'min' + statistics = (statistic, ) + + result = multi_model_statistics(cubes, span, statistics) + + result_cube = result[statistic] + + time_coord = result_cube.coord('time') + + assert time_coord.units.calendar == 'gregorian' + assert time_coord.units.origin == 'days since 1850-01-01' + + desired = np.array((14., 45., 73.)) + np.testing.assert_array_equal(time_coord.points, desired) + + # old coords are updated in-place + np.testing.assert_array_equal(time1.points, desired) + np.testing.assert_array_equal(time2.points, desired) + + +def generate_cube_from_dates(dates): + """Generate cube from list of dates.""" + unit = Unit('days since 1850-01-01', calendar='gregorian') + + time = iris.coords.DimCoord(unit.date2num(dates), + standard_name='time', + units=unit) + + return Cube([1, 1, 1], dim_coords_and_dims=[(time, 0)]) + + +def generate_cubes_with_non_overlapping_timecoords(): + """Generate sample data where time coords do not overlap.""" + time_points = range(1, 4) + dates1 = [datetime(1850, i, 15, 0, 0, 0) for i in time_points] + dates2 = [datetime(1950, i, 15, 0, 0, 0) for i in time_points] + + return ( + generate_cube_from_dates(dates1), + generate_cube_from_dates(dates2), + ) + + +def test_edge_case_time_no_overlap_fail(): + """Test case when time coords do not overlap using span='overlap'. + + Expected behaviour: `multi_model_statistics` should fail if time + points are not overlapping. """ - --> make one big cube with a dimension called 'new dim' - - call with multiple different statistics - - check that the resulting data (computed statistics) is correct - - check that the output has a correct variable name - - check that the 'new_dim' dimension is removed again - - what happens if some of the input data is masked or NaN? + cubes = generate_cubes_with_non_overlapping_timecoords() + + statistic = 'min' + statistics = (statistic, ) + + with pytest.raises(ValueError): + _ = multi_model_statistics(cubes, 'overlap', statistics) + + +def test_edge_case_time_no_overlap_success(): + """Test case when time coords do not overlap using span='full'. + + Expected behaviour: `multi_model_statistics` should use all + available time points. """ - # cube = ? - # statistic = ? - dim = 'new_dim' + cubes = generate_cubes_with_non_overlapping_timecoords() + + statistic = 'min' + statistics = (statistic, ) + + result = multi_model_statistics(cubes, 'full', statistics) + result_cube = result[statistic] + + assert result_cube.coord('time').shape == (6, ) -def test_edge_cases(): - """# different time offsets in calendar +def generate_cubes_with_time_not_in_middle_of_month(): + """Generate sample data where time coords do not overlap.""" + time_points = range(1, 4) + dates1 = [datetime(1850, i, 12, 0, 0, 0) for i in time_points] + dates2 = [datetime(1850, i, 25, 0, 0, 0) for i in time_points] - # different calendars # no overlap # statistic without kwargs # time - points not in middle of months # fail for sub-daily data + return ( + generate_cube_from_dates(dates1), + generate_cube_from_dates(dates2), + ) + + +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_edge_case_time_not_in_middle_of_months(span): + """Test case when time coords are not on 15th for monthly data. + + Expected behaviour: `multi_model_statistics` will set all dates to + the 15th. """ - pass + cubes = generate_cubes_with_time_not_in_middle_of_month() + + statistic = 'min' + statistics = (statistic, ) + + result = multi_model_statistics(cubes, span, statistics) + result_cube = result[statistic] + + time_coord = result_cube.coord('time') + + desired = np.array((14., 45., 73.)) + np.testing.assert_array_equal(time_coord.points, desired) + + # input cubes are updated in-place + for cube in cubes: + np.testing.assert_array_equal(cube.coord('time').points, desired) + + +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_edge_case_sub_daily_data_fail(span): + """Test case when cubes with sub-daily time coords are passed.""" + time_points = range(1, 4) + dates = [datetime(1850, 1, 1, i, 0, 0) for i in time_points] + + cube = generate_cube_from_dates(dates) + cubes = (cube, cube) + + statistic = 'min' + statistics = (statistic, ) + + with pytest.raises(ValueError): + _ = multi_model_statistics(cubes, span, statistics) From 11232c555b92ca9c0f5b9edeff9e97973f63f542 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Wed, 24 Feb 2021 09:42:57 +0100 Subject: [PATCH 20/68] Refactor multimodel tests --- .../_multimodel/test_multimodel.py | 170 +++++++++++------- 1 file changed, 106 insertions(+), 64 deletions(-) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index cc07b0f008..920d69b3fa 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -15,6 +15,9 @@ FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly') # hourly +CALENDAR_OPTIONS = ('360_day', '365_day', 'gregorian', 'proleptic_gregorian', + 'julian') + def assert_array_almost_equal(this, other): """Assert that array `this` almost equals array `other`.""" @@ -46,22 +49,64 @@ def timecoord(frequency, return iris.coords.DimCoord(points, standard_name='time', units=unit) +def generate_cube_from_dates( + dates, + calendar='gregorian', + offset='days since 1850-01-01', + fill_val=1, + len_data=3, + var_name=None, +): + """Generate test cube from list of dates / frequency specification. + + Parameters + ---------- + calendar : str or list + Date frequency: 'hourly' / 'daily' / 'monthly' / 'yearly' or + list of datetimes. + offset : str + Offset to use + fill_val : int + Value to fill the data with + len_data : int + Number of data / time points + var_name : str + Name of the data variable + + Returns + ------- + iris.cube.Cube + """ + if isinstance(dates, str): + time = timecoord(dates, calendar, offset=offset, num=len_data) + else: + unit = Unit(offset, calendar=calendar) + time = iris.coords.DimCoord(unit.date2num(dates), + standard_name='time', + units=unit) + + return Cube((fill_val, ) * len_data, + dim_coords_and_dims=[(time, 0)], + var_name=var_name) + + def get_cubes(frequency): """Set up cubes used for testing multimodel statistics.""" # Simple 1d cube with standard time cord - time = timecoord(frequency) - cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(time, 0)]) + cube1 = generate_cube_from_dates(frequency) # Cube with masked data cube2 = cube1.copy() cube2.data = np.ma.array([5, 5, 5], mask=[True, False, False]) # Cube with deviating time coord - time = timecoord(frequency, - calendar='360_day', - offset='days since 1950-01-01')[:2] - cube3 = Cube([9, 9], dim_coords_and_dims=[(time, 0)]) + cube3 = generate_cube_from_dates(frequency, + calendar='360_day', + offset='days since 1950-01-01', + len_data=2, + fill_val=9) + return [cube1, cube2, cube3] @@ -135,17 +180,28 @@ def test_unsupported_statistics_fail(statistic, error): _ = multi_model_statistics(cubes, span, statistics) -def test_get_consistent_time_unit(): - """Test same calendar returned or default if calendars differ.""" - time1 = timecoord('monthly', '360_day') - cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(time1, 0)]) - time2 = timecoord('monthly', '365_day') - cube2 = Cube([1, 1, 1], dim_coords_and_dims=[(time2, 0)]) +@pytest.mark.parametrize('calendar1, calendar2, expected', ( + ('360_day', '360_day', '360_day'), + ('365_day', '365_day', '365_day'), + ('365_day', '360_day', 'gregorian'), + ('360_day', '365_day', 'gregorian'), + ('gregorian', '365_day', 'gregorian'), + ('proleptic_gregorian', 'julian', 'gregorian'), + ('julian', '365_day', 'gregorian'), +)) +def test_get_consistent_time_unit(calendar1, calendar2, expected): + """Test same calendar returned or default if calendars differ. + + Expected behaviour: If the calendars are the same, return that one. + If the calendars are not the same, return 'gregorian'. + """ + cubes = ( + generate_cube_from_dates('monthly', calendar=calendar1), + generate_cube_from_dates('monthly', calendar=calendar2), + ) - result1 = mm._get_consistent_time_unit([cube1, cube1]) - result2 = mm._get_consistent_time_unit([cube1, cube2]) - assert result1.calendar == '360_day' - assert result2.calendar == 'gregorian' + result = mm._get_consistent_time_unit(cubes) + assert result.calendar == expected def test_unify_time_coordinates(): @@ -216,14 +272,14 @@ def test_align(span): # TODO --> check that if a cube is extended, # the extended points are masked (not NaN!) - test_calendars = ('360_day', '365_day', 'gregorian', 'proleptic_gregorian', - 'julian') - data = [1, 1, 1] + len_data = 3 + cubes = [] - for calendar in test_calendars: - time_coord = timecoord('monthly', '360_day') - cube = Cube(data, dim_coords_and_dims=[(time_coord, 0)]) + for calendar in CALENDAR_OPTIONS: + cube = generate_cube_from_dates('monthly', + calendar=calendar, + len_data=3) cubes.append(cube) result_cubes = mm._align(cubes, span) @@ -235,7 +291,7 @@ def test_align(span): shapes = set(cube.shape for cube in result_cubes) assert len(shapes) == 1 - assert tuple(shapes)[0] == (len(data), ) + assert tuple(shapes)[0] == (len_data, ) @pytest.mark.parametrize('span', SPAN_OPTIONS) @@ -245,10 +301,12 @@ def test_combine_same_shape(span): num_cubes = 5 test_dim = 'test_dim' cubes = [] - time_coord = timecoord('monthly', '360_day') for i in range(num_cubes): - cube = Cube([i] * len_data, dim_coords_and_dims=[(time_coord, 0)]) + cube = generate_cube_from_dates('monthly', + '360_day', + fill_val=i, + len_data=len_data) cubes.append(cube) result_cube = mm._combine(cubes, dim=test_dim) @@ -271,8 +329,7 @@ def test_combine_different_shape_fail(): cubes = [] for num in range(1, num_cubes + 1): - time_coord = timecoord('monthly', '360_day', num=num) - cube = Cube([1] * num, dim_coords_and_dims=[(time_coord, 0)]) + cube = generate_cube_from_dates('monthly', '360_day', len_data=num) cubes.append(cube) with pytest.raises(iris.exceptions.MergeError): @@ -283,14 +340,12 @@ def test_combine_inconsistent_var_names_fail(): """Test _combine with inconsistent var names.""" num_cubes = 5 test_dim = 'test_dim' - data = [1, 1, 1] cubes = [] for num in range(num_cubes): - time_coord = timecoord('monthly', '360_day') - cube = Cube(data, - dim_coords_and_dims=[(time_coord, 0)], - var_name=f'test_var_{num}') + cube = generate_cube_from_dates('monthly', + '360_day', + var_name=f'test_var_{num}') cubes.append(cube) with pytest.raises(iris.exceptions.MergeError): @@ -299,12 +354,15 @@ def test_combine_inconsistent_var_names_fail(): @pytest.mark.parametrize('span', SPAN_OPTIONS) def test_edge_case_different_time_offsets(span): - time1 = timecoord('monthly', '360_day', offset='days since 1888-01-01') - cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(time1, 0)]) - time2 = timecoord('monthly', '360_day', offset='days since 1899-01-01') - cube2 = Cube([1, 1, 1], dim_coords_and_dims=[(time2, 0)]) + cubes = ( + generate_cube_from_dates('monthly', + '360_day', + offset='days since 1888-01-01'), + generate_cube_from_dates('monthly', + '360_day', + offset='days since 1899-01-01'), + ) - cubes = (cube1, cube2) statistic = 'min' statistics = (statistic, ) @@ -320,20 +378,9 @@ def test_edge_case_different_time_offsets(span): desired = np.array((14., 45., 73.)) np.testing.assert_array_equal(time_coord.points, desired) - # old coords are updated in-place - np.testing.assert_array_equal(time1.points, desired) - np.testing.assert_array_equal(time2.points, desired) - - -def generate_cube_from_dates(dates): - """Generate cube from list of dates.""" - unit = Unit('days since 1850-01-01', calendar='gregorian') - - time = iris.coords.DimCoord(unit.date2num(dates), - standard_name='time', - units=unit) - - return Cube([1, 1, 1], dim_coords_and_dims=[(time, 0)]) + # input cubes are updated in-place + for cube in cubes: + np.testing.assert_array_equal(cube.coord('time').points, desired) def generate_cubes_with_non_overlapping_timecoords(): @@ -380,18 +427,6 @@ def test_edge_case_time_no_overlap_success(): assert result_cube.coord('time').shape == (6, ) -def generate_cubes_with_time_not_in_middle_of_month(): - """Generate sample data where time coords do not overlap.""" - time_points = range(1, 4) - dates1 = [datetime(1850, i, 12, 0, 0, 0) for i in time_points] - dates2 = [datetime(1850, i, 25, 0, 0, 0) for i in time_points] - - return ( - generate_cube_from_dates(dates1), - generate_cube_from_dates(dates2), - ) - - @pytest.mark.parametrize('span', SPAN_OPTIONS) def test_edge_case_time_not_in_middle_of_months(span): """Test case when time coords are not on 15th for monthly data. @@ -399,7 +434,14 @@ def test_edge_case_time_not_in_middle_of_months(span): Expected behaviour: `multi_model_statistics` will set all dates to the 15th. """ - cubes = generate_cubes_with_time_not_in_middle_of_month() + time_points = range(1, 4) + dates1 = [datetime(1850, i, 12, 0, 0, 0) for i in time_points] + dates2 = [datetime(1850, i, 25, 0, 0, 0) for i in time_points] + + cubes = ( + generate_cube_from_dates(dates1), + generate_cube_from_dates(dates2), + ) statistic = 'min' statistics = (statistic, ) From e0577b24f071343d5d7b4d9f9cb1bbf207afd0f0 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Wed, 24 Feb 2021 10:48:56 +0100 Subject: [PATCH 21/68] Add test cases for _resolve_span --- esmvalcore/preprocessor/_multimodel.py | 19 ++-- .../_multimodel/test_multimodel.py | 101 ++++++++---------- 2 files changed, 53 insertions(+), 67 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index f5b186066e..6afba87bab 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -97,18 +97,21 @@ def _unify_time_coordinates(cubes): def _resolve_span(all_times, span): """Construct new time array based on the span parameter.""" - if span == 'full': + if len(all_times) == 1: + new_times = np.array(all_times[0]) + + elif span == 'full': new_times = reduce(np.union1d, all_times) - return new_times - if span == 'overlap': + elif span == 'overlap': new_times = reduce(np.intersect1d, all_times) - if new_times.size > 0: - return new_times - raise ValueError("No time overlap found between input cubes.") + if new_times.size < 1: + raise ValueError("No time overlap found between input cubes.") + else: + raise ValueError("Unknown value for span. Expected 'full' or 'overlap'" + "got {}".format(span)) - raise ValueError("Unknown value for span. Expected 'full' or 'overlap'" - "got {}".format(span)) + return new_times def _align(cubes, span): diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 920d69b3fa..cbfbaba14d 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -90,7 +90,7 @@ def generate_cube_from_dates( var_name=var_name) -def get_cubes(frequency): +def get_cubes_for_validation_test(frequency): """Set up cubes used for testing multimodel statistics.""" # Simple 1d cube with standard time cord @@ -144,7 +144,7 @@ def test_multimodel_statistics(frequency, span, statistics, expected): - Should deal correctly with different mask options - Return type should be a dict with all requested statistics as keys """ - cubes = get_cubes(frequency) + cubes = get_cubes_for_validation_test(frequency) if isinstance(statistics, str): statistics = (statistics, ) @@ -173,7 +173,7 @@ def test_multimodel_statistics(frequency, span, statistics, expected): @pytest.mark.parametrize('statistic, error', VALIDATION_DATA_FAIL) def test_unsupported_statistics_fail(statistic, error): """Check that unsupported statistics raise an exception.""" - cubes = get_cubes('monthly') + cubes = get_cubes_for_validation_test('monthly') span = 'overlap' statistics = (statistic, ) with pytest.raises(error): @@ -204,65 +204,47 @@ def test_get_consistent_time_unit(calendar1, calendar2, expected): assert result.calendar == expected -def test_unify_time_coordinates(): - """Test whether the time coordinates are made consistent.""" - - # # Check that monthly data have midpoints at 15th day - # cube1 = Cube([1, 1, 1], ) - - # hourly = { - # 'input1': timecoord([datetime(1850, 1, 1, i, 0, 0) for i in [1, 2, 3]], - # calendar='standard'), - # 'input2' timecoord([datetime(1850, 1, 1, i, 0, 0) for i in [1, 2, 3]]), - # calendar='gregorian'), - # 'output': timecoord([datetime(1850, 1, 1, i, 0, 0) for i in [1, 2, 3]], - # calendar='gregorian') - # } - - # daily = ([datetime(1850, 1, i, 0, 0, 0) for i in [1, 2, 3]], - # [datetime(1850, 1, i, 12, 0, 0) for i in [1, 2, 3]]) - # monthly = ([datetime(1850, i, 1, 0, 0, 0) for i in [1, 2, 3]], - # [datetime(1850, i, 15, 0, 0, 0) for i in [1, 2, 3]]) - # yearly = ([datetime(1850+i, 1, 7, 0, 0, 0) for i in [1, 2, 3]], - # [datetime(1850+i, 1, 1, 0, 0, 0) for i in [1, 2, 3]]) - - # time_sets = [hourly, daily, monthly, yearly] - # calendars_sets = [ - # ('standard', 'gregorian'), - # ('360_day', '360_day'), - # ('365_day', 'proleptic_gregorian'), - # ('standard', 'standard'), - # ] - - # for (time1, time2), (calendar1, calendar2) in zip(time_sets, calendar_sets): - # cube1 = [Cube([1, 1, 1], dim_coords_and_dims=[(timecoord(time1, calendar1), 0)]) - # cube2 = [Cube([1, 1, 1], dim_coords_and_dims=[(timecoord(time2, calendar2), 0)]) - # cubes = mm._unify_time_coordinates([cube1, cube2]) - - # --> pass multiple cubes with all kinds of different calendars - # - Check that output cubes all have the same calendar - # - check that the dates in the output correspond to the dates in the input - # - do this for different time frequencies - # - check warning/error for (sub)daily data - - -def test_resolve_span(): - """Check that resolve_span returns the correct union/intersection.""" - span1 = [1, 2, 3] - span2 = [2, 3, 4] - span3 = [3, 4, 5] - span4 = [4, 5, 6] +def generate_resolve_span_cases(valid): + """Generate test cases for _resolve_span.""" + points_1 = (1, 2, 3) + points_2 = (2, 3, 4) + points_3 = (3, 4, 5) + points_4 = (4, 5, 6) + empty_tuple = () + + if valid: + yield from ( + ((points_1, ), 'overlap', points_1), + ((points_1, ), 'full', points_1), + ((points_1, points_2), 'overlap', (2, 3)), + ((points_1, points_2), 'full', (1, 2, 3, 4)), + ((points_1, points_2, points_3), 'overlap', (3, )), + ((points_1, points_2, points_3), 'full', (1, 2, 3, 4, 5)), + ((points_1, points_4), 'full', (1, 2, 3, 4, 5, 6)), + ) + else: + yield from ( + (empty_tuple, 'overlap', TypeError), + (empty_tuple, 'full', TypeError), + ((points_1, points_4), 'overlap', ValueError), + ) + - assert all(mm._resolve_span([span1, span2], span='overlap') == [2, 3]) - assert all(mm._resolve_span([span1, span2], span='full') == [1, 2, 3, 4]) +@pytest.mark.parametrize('points, span, expected', + generate_resolve_span_cases(True)) +def test_resolve_span(points, span, expected): + """Check that resolve_span returns the correct union/intersection.""" + result = mm._resolve_span(points, span=span) + assert isinstance(result, np.ndarray) + np.testing.assert_equal(result, expected) - assert all(mm._resolve_span([span1, span2, span3], span='overlap') == [3]) - assert all( - mm._resolve_span([span1, span2, span3], span='full') == - [1, 2, 3, 4, 5]) - with pytest.raises(ValueError): - mm._resolve_span([span1, span4], span='overlap') +@pytest.mark.parametrize('points, span, error', + generate_resolve_span_cases(False)) +def test_resolve_span_fail(points, span, error): + """Test failing case for _resolve_span.""" + with pytest.raises(error): + mm._resolve_span(points, span=span) @pytest.mark.parametrize('span', SPAN_OPTIONS) @@ -287,6 +269,7 @@ def test_align(span): calendars = set(cube.coord('time').units.calendar for cube in result_cubes) assert len(calendars) == 1 + assert list(calendars)[0] == 'gregorian' shapes = set(cube.shape for cube in result_cubes) From c3a9a0a7b9621f5678876e6a99edb195f0305614 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Wed, 24 Feb 2021 11:32:33 +0100 Subject: [PATCH 22/68] Generate cube using existing function --- tests/unit/preprocessor/_multimodel/test_multimodel.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index cbfbaba14d..d15691f943 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -445,10 +445,7 @@ def test_edge_case_time_not_in_middle_of_months(span): @pytest.mark.parametrize('span', SPAN_OPTIONS) def test_edge_case_sub_daily_data_fail(span): """Test case when cubes with sub-daily time coords are passed.""" - time_points = range(1, 4) - dates = [datetime(1850, 1, 1, i, 0, 0) for i in time_points] - - cube = generate_cube_from_dates(dates) + cube = generate_cube_from_dates('hourly') cubes = (cube, cube) statistic = 'min' From 2aa566ae31c5718c8ab650d0f4e345a641a3cebf Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Wed, 24 Feb 2021 12:35:06 +0100 Subject: [PATCH 23/68] Fix Codacy issue --- esmvalcore/preprocessor/_multimodel.py | 1 - 1 file changed, 1 deletion(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 6afba87bab..b4f9d6d1ab 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -164,7 +164,6 @@ def _compute(cube, statistic: str, dim: str = 'new_dim'): :obj:`iris.cube.Cube` Collapsed cube. """ - statistic = statistic.lower() kwargs = {} From fb33c19e61cfb6a622bdbe539d124adc94d070de Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 25 Feb 2021 11:04:34 +0100 Subject: [PATCH 24/68] Remove redundant test file --- .../preprocessor/_multimodel/test_span.py | 83 ------------------- 1 file changed, 83 deletions(-) delete mode 100644 tests/unit/preprocessor/_multimodel/test_span.py diff --git a/tests/unit/preprocessor/_multimodel/test_span.py b/tests/unit/preprocessor/_multimodel/test_span.py deleted file mode 100644 index 29abe16703..0000000000 --- a/tests/unit/preprocessor/_multimodel/test_span.py +++ /dev/null @@ -1,83 +0,0 @@ -"""tests for multimodel preprocessor.""" - -from datetime import datetime - -import iris -import numpy as np -import pytest -from cf_units import Unit -from iris.cube import Cube - -from esmvalcore.preprocessor import multi_model_statistics - -# import esmvalcore.preprocessor._multimodel as mm - -SPAN_OPTIONS = ('overlap', 'full') - -FREQUENCY_OPTIONS = ('daily', 'monthly', 'yearly') # hourly - - -def timecoord(dates, calendar='gregorian'): - """Return a time coordinate with the given time points and calendar.""" - unit = Unit('days since 1850-01-01', calendar=calendar) - points = unit.date2num(dates) - return iris.coords.DimCoord(points, standard_name='time', units=unit) - - -# lons = iris.coords.DimCoord([0,], -# standard_name='longitude', units='degrees_east') -# lats = iris.coords.DimCoord([0,], -# standard_name='latitude', units='degrees_north') - - -def get_cubes(frequency): - """Set up cubes used for testing multimodel statistics.""" - if frequency == 'hourly': - dates = [datetime(1850, 1, 1, i, 0, 0) for i in range(1, 4)] - if frequency == 'daily': - dates = [datetime(1850, 1, i, 0, 0, 0) for i in range(1, 4)] - elif frequency == 'monthly': - dates = [datetime(1850, i, 15, 0, 0, 0) for i in range(1, 4)] - elif frequency == 'yearly': - dates = [datetime(1850, 7, i, 0, 0, 0) for i in range(1, 4)] - - cube1 = Cube([1, 1, 1], dim_coords_and_dims=[(timecoord(dates), 0)]) - cube2 = cube1.copy() - cube2.data = np.ma.array([5, 5, 5], mask=[True, False, False]) - cube3 = Cube([9, 9], dim_coords_and_dims=[(timecoord(dates[:2]), 0)]) - return [cube1, cube2, cube3] - - -@pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS) -@pytest.mark.parametrize('span', SPAN_OPTIONS) -# @pytest.mark.parametrize('stats', STATISTICS_OPTIONS) -def test_multimodel_statistics(span, frequency): - """High level test for multicube statistics function.""" - cubes = get_cubes(frequency) - verification_data = { - # For span=overlap, take the first 2 items. - # Span = full --> statistic computed on [1, 1, 1], [-, 5, 5], [9, 9, -] - # Span = overlap --> statistic computed on [1, 1], [-, 5], [9, 9] - 'mean': [5, 5, 3], - 'std': [5.656854249492381, 4, 2.8284271247461903], - 'std_dev': [5.656854249492381, 4, 2.8284271247461903], - 'min': [1, 1, 1], - 'max': [9, 9, 5], - 'median': [5, 5, 3], - 'p50': [5, 5, 3], - 'p99.5': [8.96, 8.96, 4.98], - } - - statistics = verification_data.keys() - results = multi_model_statistics(cubes, span, statistics) - - assert isinstance(results, dict) - - assert results.keys() == statistics - - for statistic, result in results.items(): - expected = np.ma.array(verification_data[statistic], mask=False) - if span == 'overlap': - expected = expected[:2] - np.testing.assert_array_equal(result.data.mask, expected.mask) - np.testing.assert_array_almost_equal(result.data, expected.data) From 8bd985886ea5531f80ea9d71d4d4a4c5d2ed3ed4 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 25 Feb 2021 13:14:47 +0100 Subject: [PATCH 25/68] Mark tests failing because of inconsistent plev data with xfail --- .../multimodel_statistics/test_multimodel.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index 768d9c9cba..381efb39fb 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -21,7 +21,11 @@ reason='Cannot calculate statistics with single cube in list')), '365_day', 'gregorian', - 'proleptic_gregorian', + pytest.param( + 'proleptic_gregorian', + marks=pytest.mark.xfail( + raises=iris.exceptions.MergeError, + reason='https://github.com/ESMValGroup/ESMValCore/issues/956')), pytest.param( 'julian', marks=pytest.mark.skip( @@ -200,6 +204,9 @@ def multimodel_regression_test(cubes, span, name): raise RuntimeError(f'Wrote reference data to {filename.absolute()}') +@pytest.mark.xfail( + raises=iris.exceptions.MergeError, + reason='https://github.com/ESMValGroup/ESMValCore/issues/956') @pytest.mark.use_sample_data @pytest.mark.parametrize('span', SPAN_PARAMS) def test_multimodel_regression_month(timeseries_cubes_month, span): @@ -238,8 +245,11 @@ def test_multimodel_no_vertical_dimension(timeseries_cubes_month): @pytest.mark.use_sample_data @pytest.mark.xfail( - 'iris.exceptions.CoordinateNotFoundError', - reason='https://github.com/ESMValGroup/ESMValCore/issues/891') + raises=iris.exceptions.MergeError, + reason='https://github.com/ESMValGroup/ESMValCore/issues/956') +# @pytest.mark.xfail( +# raises=iris.exceptions.CoordinateNotFoundError, +# reason='https://github.com/ESMValGroup/ESMValCore/issues/891') def test_multimodel_no_horizontal_dimension(timeseries_cubes_month): """Test statistic without horizontal dimension using monthly data.""" span = 'full' @@ -262,7 +272,7 @@ def test_multimodel_only_time_dimension(timeseries_cubes_month): @pytest.mark.use_sample_data @pytest.mark.xfail( - 'ValueError', + raises=ValueError, reason='https://github.com/ESMValGroup/ESMValCore/issues/890') def test_multimodel_no_time_dimension(timeseries_cubes_month): """Test statistic without time dimension using monthly data.""" From 649b8f301dfbd5b247f84a354809203fa92e0c5a Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 25 Feb 2021 13:24:25 +0100 Subject: [PATCH 26/68] Make cache key more readable --- .../multimodel_statistics/test_multimodel.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index 381efb39fb..3fc3c4dafb 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -1,7 +1,7 @@ """Test using sample data for :func:`esmvalcore.preprocessor._multimodel`.""" import pickle -import sys +import platform from itertools import groupby from pathlib import Path @@ -80,12 +80,9 @@ def get_cache_key(value): manually clean the pytest cache with the command `pytest --cache- clear`. """ - return ' '.join([ - str(value), - iris.__version__, - np.__version__, - sys.version, - ]) + py_version = platform.python_version() + return (f'{value}_iris-{iris.__version__}_' + f'numpy-{np.__version__}_python-{py_version}') @pytest.fixture(scope="module") From 554edea920ea44e2a3d8d81da734ab6aa97c69e4 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 25 Feb 2021 13:44:28 +0100 Subject: [PATCH 27/68] Compare coord / metadata attributes directly --- .../multimodel_statistics/test_multimodel.py | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index 3fc3c4dafb..5f43dcdb1f 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -43,6 +43,23 @@ def assert_array_almost_equal(this, other): np.testing.assert_array_almost_equal(this, other) +def assert_coords_equal(this: list, other: list): + """Assert coords list `this` equals coords list `other`.""" + for this_coord, other_coord in zip(this, other): + np.testing.assert_equal(this_coord.points, other_coord.points) + assert this_coord.var_name == other_coord.var_name + assert this_coord.standard_name == other_coord.standard_name + assert this_coord.units == other_coord.units + + +def assert_metadata_equal(this, other): + """Assert metadata `this` are equal to metadata `other`.""" + assert this.standard_name == other.standard_name + assert this.long_name == other.long_name + assert this.var_name == other.var_name + assert this.units == other.units + + def fix_metadata(cubes): """Fix metadata.""" for cube in cubes: @@ -183,17 +200,10 @@ def multimodel_regression_test(cubes, span, name): filename = Path(__file__).with_name(f'{name}-{span}-{statistic}.nc') if filename.exists(): reference_cube = iris.load_cube(str(filename)) - assert_array_almost_equal(result_cube.data, reference_cube.data) - - # Compare coords - for this_coord, other_coord in zip(result_cube.coords(), - reference_cube.coords()): - assert this_coord == other_coord - # remove Conventions which are added by Iris on save - reference_cube.attributes.pop('Conventions', None) - - assert reference_cube.metadata == result_cube.metadata + assert_array_almost_equal(result_cube.data, reference_cube.data) + assert_metadata_equal(result_cube.metadata, reference_cube.metadata) + assert_coords_equal(result_cube.coords(), reference_cube.coords()) else: # The test will fail if no regression data are available. From ade5f35b3480510657758b0b822e71484662dca7 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 25 Feb 2021 14:45:40 +0100 Subject: [PATCH 28/68] Use `allclose` instead of `almost_equal` for array comparison --- tests/sample_data/multimodel_statistics/test_multimodel.py | 2 +- tests/unit/preprocessor/_multimodel/test_multimodel.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index 5f43dcdb1f..ad79a66e6f 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -40,7 +40,7 @@ def assert_array_almost_equal(this, other): if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other): np.testing.assert_array_equal(this.mask, other.mask) - np.testing.assert_array_almost_equal(this, other) + np.testing.assert_allclose(this, other) def assert_coords_equal(this: list, other: list): diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index d15691f943..5a4aae319b 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -24,7 +24,7 @@ def assert_array_almost_equal(this, other): if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other): np.testing.assert_array_equal(this.mask, other.mask) - np.testing.assert_array_almost_equal(this, other) + np.testing.assert_allclose(this, other) def timecoord(frequency, From 04dbaf6cb6d71d1fa70a35d1fad9b52242cb8b32 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 25 Feb 2021 14:46:57 +0100 Subject: [PATCH 29/68] Group iris.analysis functions --- esmvalcore/preprocessor/_multimodel.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index b4f9d6d1ab..490bcf4f0b 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -13,20 +13,22 @@ logger = logging.getLogger(__name__) STATISTIC_MAPPING = { - 'count': iris.analysis.COUNT, 'gmean': iris.analysis.GMEAN, 'hmean': iris.analysis.HMEAN, - 'mean': iris.analysis.MEAN, + 'max': iris.analysis.MAX, 'median': iris.analysis.MEDIAN, 'min': iris.analysis.MIN, - 'max': iris.analysis.MAX, - 'peak': iris.analysis.PEAK, - 'percentile': iris.analysis.PERCENTILE, - 'proportion': iris.analysis.PROPORTION, 'rms': iris.analysis.RMS, - 'std_dev': iris.analysis.STD_DEV, 'sum': iris.analysis.SUM, + # lazy via dask + 'mean': iris.analysis.MEAN, + 'std_dev': iris.analysis.STD_DEV, 'variance': iris.analysis.VARIANCE, + # not directly supported + 'count': iris.analysis.COUNT, + 'peak': iris.analysis.PEAK, + 'percentile': iris.analysis.PERCENTILE, + 'proportion': iris.analysis.PROPORTION, 'wpercentile': iris.analysis.WPERCENTILE, } From 59f25647f39881615cf8c095e41ac0c97f5dbbc6 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 1 Mar 2021 14:46:28 +0100 Subject: [PATCH 30/68] Add tests to make sure returned cubes are still lazy --- .../_multimodel/test_multimodel.py | 59 ++++++++++++++++--- 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 5a4aae319b..f3a9e5d74b 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -2,6 +2,7 @@ from datetime import datetime +import dask.array as da import iris import numpy as np import pytest @@ -56,6 +57,7 @@ def generate_cube_from_dates( fill_val=1, len_data=3, var_name=None, + lazy=False, ): """Generate test cube from list of dates / frequency specification. @@ -85,27 +87,34 @@ def generate_cube_from_dates( standard_name='time', units=unit) - return Cube((fill_val, ) * len_data, - dim_coords_and_dims=[(time, 0)], - var_name=var_name) + data = np.array((fill_val, ) * len_data) + if lazy: + data = da.from_array(data) -def get_cubes_for_validation_test(frequency): + return Cube(data, dim_coords_and_dims=[(time, 0)], var_name=var_name) + + +def get_cubes_for_validation_test(frequency, lazy=False): """Set up cubes used for testing multimodel statistics.""" # Simple 1d cube with standard time cord - cube1 = generate_cube_from_dates(frequency) + cube1 = generate_cube_from_dates(frequency, lazy=lazy) # Cube with masked data cube2 = cube1.copy() - cube2.data = np.ma.array([5, 5, 5], mask=[True, False, False]) + data2 = np.ma.array([5, 5, 5], mask=[True, False, False]) + if lazy: + data2 = da.from_array(data2) + cube2.data = data2 # Cube with deviating time coord cube3 = generate_cube_from_dates(frequency, calendar='360_day', offset='days since 1950-01-01', len_data=2, - fill_val=9) + fill_val=9, + lazy=lazy) return [cube1, cube2, cube3] @@ -161,6 +170,42 @@ def test_multimodel_statistics(frequency, span, statistics, expected): assert_array_almost_equal(result_cube.data, expected_data) +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_lazy_data_consistent_times(span): + cubes = ( + generate_cube_from_dates('monthly', fill_val=1, lazy=True), + generate_cube_from_dates('monthly', fill_val=3, lazy=True), + generate_cube_from_dates('monthly', fill_val=6, lazy=True), + ) + + for cube in cubes: + assert cube.has_lazy_data() + + statistic = 'sum' + statistics = (statistic, ) + + result = mm._multicube_statistics(cubes, span=span, statistics=statistics) + + result_cube = result[statistic] + assert result_cube.has_lazy_data() + + +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_lazy_data_inconsistent_times(span): + cubes = get_cubes_for_validation_test('monthly', lazy=True) + + for cube in cubes: + assert cube.has_lazy_data() + + statistic = 'sum' + statistics = (statistic, ) + + result = mm._multicube_statistics(cubes, span=span, statistics=statistics) + + result_cube = result[statistic] + assert result_cube.has_lazy_data() + + VALIDATION_DATA_FAIL = ( ('percentile', ValueError), ('wpercentile', ValueError), From 24bcea04a58a8a6dad130246f966bb481d8bf480 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 1 Mar 2021 14:50:23 +0100 Subject: [PATCH 31/68] Fix bug with check if cubes are already aligned Using `reduce`, this would always fail --- esmvalcore/preprocessor/_multimodel.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 490bcf4f0b..232b445746 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -116,14 +116,27 @@ def _resolve_span(all_times, span): return new_times +def _time_coords_are_aligned(cubes): + """Return `True` if time coords are aligned.""" + first_time_array = cubes[0].coord('time').points + + for cube in cubes[1:]: + other_time_array = cube.coord('time').points + if not np.array_equal(first_time_array, other_time_array): + return False + + return True + + def _align(cubes, span): """Expand or subset cubes so they share a common time span.""" _unify_time_coordinates(cubes) - all_time_arrays = [cube.coord('time').points for cube in cubes] - if reduce(np.array_equal, all_time_arrays): - # cubes are already aligned + + if _time_coords_are_aligned(cubes): return cubes + all_time_arrays = [cube.coord('time').points for cube in cubes] + new_times = _resolve_span(all_time_arrays, span) # new_times = cubes[0].coord('time').units.num2date(new_times) sample_points = [('time', new_times)] From 1a6695baa59c100062fae6f13f9b9497b1f10e4e Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 1 Mar 2021 15:48:17 +0100 Subject: [PATCH 32/68] Remove aux coord after cubes are merged --- esmvalcore/preprocessor/_multimodel.py | 8 +++++++- tests/unit/preprocessor/_multimodel/test_multimodel.py | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 232b445746..ebdef01920 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -159,7 +159,13 @@ def _combine(cubes, dim='new_dim'): cubes = iris.cube.CubeList(cubes) - return cubes.merge_cube() + merged_cube = cubes.merge_cube() + + # Clean up after merge, because new dimension is no longer needed + for cube in cubes: + cube.remove_coord(dim) + + return merged_cube def _compute(cube, statistic: str, dim: str = 'new_dim'): diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index d993093dac..818636f364 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -557,9 +557,18 @@ def test_return_products(): result1 = mm._multiproduct_statistics(products, keep_input_datasets=True, **kwargs) + result2 = mm._multiproduct_statistics(products, keep_input_datasets=False, **kwargs) assert result1 == set([input1, input2, output]) assert result2 == set([output]) + + result3 = mm.multi_model_statistics(products, **kwargs) + result4 = mm.multi_model_statistics(products, + keep_input_datasets=False, + **kwargs) + + assert result3 == result1 + assert result4 == result2 From 9b3708f6d65f7dde28660ea27beb73871e00f7d2 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 1 Mar 2021 16:28:11 +0100 Subject: [PATCH 33/68] Implement lazy alignment for span='overlap' --- esmvalcore/preprocessor/_multimodel.py | 35 ++++++++++++++++++-------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index ebdef01920..3fcf80ed7b 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -13,23 +13,22 @@ logger = logging.getLogger(__name__) STATISTIC_MAPPING = { - 'gmean': iris.analysis.GMEAN, - 'hmean': iris.analysis.HMEAN, + 'gmean': iris.analysis.GMEAN, # not lazy in iris + 'hmean': iris.analysis.HMEAN, # not lazy in iris 'max': iris.analysis.MAX, 'median': iris.analysis.MEDIAN, 'min': iris.analysis.MIN, 'rms': iris.analysis.RMS, 'sum': iris.analysis.SUM, - # lazy via dask 'mean': iris.analysis.MEAN, 'std_dev': iris.analysis.STD_DEV, 'variance': iris.analysis.VARIANCE, # not directly supported 'count': iris.analysis.COUNT, 'peak': iris.analysis.PEAK, - 'percentile': iris.analysis.PERCENTILE, - 'proportion': iris.analysis.PROPORTION, - 'wpercentile': iris.analysis.WPERCENTILE, + 'percentile': iris.analysis.PERCENTILE, # not lazy in iris + 'proportion': iris.analysis.PROPORTION, # not lazy in iris + 'wpercentile': iris.analysis.WPERCENTILE, # not lazy in iris } @@ -128,6 +127,14 @@ def _time_coords_are_aligned(cubes): return True +def _subset(cube, times): + """Subset cube to given time range.""" + begin = cube.coord('time').units.num2date(times[0]) + end = cube.coord('time').units.num2date(times[-1]) + constraint = iris.Constraint(time=lambda cell: begin <= cell.point <= end) + return cube.extract(constraint) + + def _align(cubes, span): """Expand or subset cubes so they share a common time span.""" _unify_time_coordinates(cubes) @@ -137,13 +144,19 @@ def _align(cubes, span): all_time_arrays = [cube.coord('time').points for cube in cubes] - new_times = _resolve_span(all_time_arrays, span) - # new_times = cubes[0].coord('time').units.num2date(new_times) - sample_points = [('time', new_times)] - scheme = iris.analysis.Nearest(extrapolation_mode='mask') - new_cubes = [cube.interpolate(sample_points, scheme) for cube in cubes] + if span == 'overlap': + common_times = reduce(np.intersect1d, all_time_arrays) + new_cubes = [_subset(cube, common_times) for cube in cubes] + else: + new_times = _resolve_span(all_time_arrays, span) + # new_times = cubes[0].coord('time').units.num2date(new_times) + sample_points = [('time', new_times)] + scheme = iris.analysis.Nearest(extrapolation_mode='mask') + new_cubes = [cube.interpolate(sample_points, scheme) for cube in cubes] for cube in new_cubes: + # Make sure bounds exist and are consistent + cube.coord('time').bounds = None cube.coord('time').guess_bounds() return new_cubes From b07c70467e0e4abdfc6bb94dcaddf8251de64a6e Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 1 Mar 2021 16:28:34 +0100 Subject: [PATCH 34/68] Fix validation data --- tests/unit/preprocessor/_multimodel/test_multimodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 818636f364..7e5e5d4ed9 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -136,7 +136,7 @@ def get_cubes_for_validation_test(frequency, lazy=False): ('overlap', 'mean', (5, 5)), ('overlap', 'std_dev', (5.656854249492381, 4)), pytest.param( - 'full', + 'overlap', 'std', (5.656854249492381, 4), marks=pytest.mark.xfail( raises=AssertionError, From a43ecb9082c405839cd4a0c39a2116da2b347135 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Mon, 1 Mar 2021 17:19:21 +0100 Subject: [PATCH 35/68] Use extrapolate extend time points for span='full' --- esmvalcore/preprocessor/_multimodel.py | 29 +++++++++++++++++--------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 3fcf80ed7b..2187f9c320 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -127,14 +127,23 @@ def _time_coords_are_aligned(cubes): return True -def _subset(cube, times): +def _subset(cube, time_points): """Subset cube to given time range.""" - begin = cube.coord('time').units.num2date(times[0]) - end = cube.coord('time').units.num2date(times[-1]) + begin = cube.coord('time').units.num2date(time_points[0]) + end = cube.coord('time').units.num2date(time_points[-1]) constraint = iris.Constraint(time=lambda cell: begin <= cell.point <= end) return cube.extract(constraint) +def _extend(cube, time_points): + """Extend cube to a specified time range.""" + time_points = cube.coord('time').units.num2date(time_points) + sample_points = [('time', time_points)] + scheme = iris.analysis.Nearest(extrapolation_mode='mask') + + return cube.interpolate(sample_points, scheme) + + def _align(cubes, span): """Expand or subset cubes so they share a common time span.""" _unify_time_coordinates(cubes) @@ -145,14 +154,14 @@ def _align(cubes, span): all_time_arrays = [cube.coord('time').points for cube in cubes] if span == 'overlap': - common_times = reduce(np.intersect1d, all_time_arrays) - new_cubes = [_subset(cube, common_times) for cube in cubes] + common_time_points = reduce(np.intersect1d, all_time_arrays) + new_cubes = [_subset(cube, common_time_points) for cube in cubes] + elif span == 'full': + all_time_points = reduce(np.union1d, all_time_arrays) + new_cubes = [_extend(cube, all_time_points) for cube in cubes] else: - new_times = _resolve_span(all_time_arrays, span) - # new_times = cubes[0].coord('time').units.num2date(new_times) - sample_points = [('time', new_times)] - scheme = iris.analysis.Nearest(extrapolation_mode='mask') - new_cubes = [cube.interpolate(sample_points, scheme) for cube in cubes] + raise ValueError(f"Invalid argument for span: {span!r}" + "Must be one of 'overlap', 'full'.") for cube in new_cubes: # Make sure bounds exist and are consistent From 2ffcb9aedb89bb20cddc39a3e89d788619ba4eb0 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 2 Mar 2021 11:52:45 +0100 Subject: [PATCH 36/68] Implement lazy cube extension scheme for span='full' --- esmvalcore/preprocessor/_multimodel.py | 43 +++++++++++++++++-- .../_multimodel/test_multimodel.py | 11 ++++- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 2187f9c320..893d335c0e 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -6,6 +6,7 @@ from functools import reduce import cf_units +import dask.array as da import iris import numpy as np from iris.util import equalise_attributes @@ -137,11 +138,45 @@ def _subset(cube, time_points): def _extend(cube, time_points): """Extend cube to a specified time range.""" - time_points = cube.coord('time').units.num2date(time_points) - sample_points = [('time', time_points)] - scheme = iris.analysis.Nearest(extrapolation_mode='mask') + cube.coord('time').bounds = None + cube_points = cube.coord('time').points - return cube.interpolate(sample_points, scheme) + if not np.all(np.diff(cube_points) > 0): + raise ValueError('Time points are not monotonic') + + begin = cube_points[0] + end = cube_points[-1] + + pad_begin = time_points[time_points < begin] + pad_end = time_points[time_points > end] + + if (len(pad_begin)) == 0 and (len(pad_end) == 0): + return cube + + template_cube = cube[:1].copy() + template_cube.data = np.ma.array(da.zeros_like(template_cube.data), + mask=True, + dtype=template_cube.data.dtype) + + cube_list = [] + + for time_point in pad_begin: + new_slice = template_cube.copy() + new_slice.coord('time').points = float(time_point) + cube_list.append(new_slice) + + cube_list.append(cube) + + for time_point in pad_end: + new_slice = template_cube.copy() + new_slice.coord('time').points = float(time_point) + cube_list.append(new_slice) + + cube_list = iris.cube.CubeList(cube_list) + + new_cube = cube_list.concatenate_cube() + + return new_cube def _align(cubes, span): diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 7e5e5d4ed9..138c84feec 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -82,6 +82,7 @@ def generate_cube_from_dates( if isinstance(dates, str): time = timecoord(dates, calendar, offset=offset, num=len_data) else: + len_data = len(dates) unit = Unit(offset, calendar=calendar) time = iris.coords.DimCoord(unit.date2num(dates), standard_name='time', @@ -202,7 +203,15 @@ def test_lazy_data_consistent_times(span): @pytest.mark.parametrize('span', SPAN_OPTIONS) def test_lazy_data_inconsistent_times(span): - cubes = get_cubes_for_validation_test('monthly', lazy=True) + + cubes = ( + generate_cube_from_dates( + [datetime(1850, i, 15, 0, 0, 0) for i in range(1, 10)], lazy=True), + generate_cube_from_dates( + [datetime(1850, i, 15, 0, 0, 0) for i in range(3, 8)], lazy=True), + generate_cube_from_dates( + [datetime(1850, i, 15, 0, 0, 0) for i in range(2, 9)], lazy=True), + ) for cube in cubes: assert cube.has_lazy_data() From 999d514c2c85cd3c00524e2d0f3505aeb1aae9cd Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 2 Mar 2021 13:32:37 +0100 Subject: [PATCH 37/68] Fix pep257: D417 --- esmvalcore/preprocessor/_multimodel.py | 1 + 1 file changed, 1 insertion(+) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 893d335c0e..ba81c7061f 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -231,6 +231,7 @@ def _compute(cube, statistic: str, dim: str = 'new_dim'): Parameters ---------- cube : :obj:`iris.cube.Cube` + Input cube. statistic : str Name of the statistic to calculate. Must be available via :mod:`iris.analysis`. From bdfc12528dee9c0b7c73b51ff351c2fed4f677fe Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 2 Mar 2021 14:19:48 +0100 Subject: [PATCH 38/68] Update documentation --- esmvalcore/preprocessor/_multimodel.py | 2 +- .../_multimodel/test_multimodel.py | 29 ++++++------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index ba81c7061f..7faa6085fe 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -17,7 +17,7 @@ 'gmean': iris.analysis.GMEAN, # not lazy in iris 'hmean': iris.analysis.HMEAN, # not lazy in iris 'max': iris.analysis.MAX, - 'median': iris.analysis.MEDIAN, + 'median': iris.analysis.MEDIAN, # not lazy in iris 'min': iris.analysis.MIN, 'rms': iris.analysis.RMS, 'sum': iris.analysis.SUM, diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 138c84feec..a9c69ee7c8 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -123,12 +123,7 @@ def get_cubes_for_validation_test(frequency, lazy=False): VALIDATION_DATA_SUCCESS = ( ('full', 'mean', (5, 5, 3)), ('full', 'std_dev', (5.656854249492381, 4, 2.8284271247461903)), - pytest.param( - 'full', - 'std', (5.656854249492381, 4, 2.8284271247461903), - marks=pytest.mark.xfail( - raises=AssertionError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/1024')), + ('full', 'std', (5.656854249492381, 4, 2.8284271247461903)), ('full', 'min', (1, 1, 1)), ('full', 'max', (9, 9, 5)), ('full', 'median', (5, 5, 3)), @@ -136,12 +131,7 @@ def get_cubes_for_validation_test(frequency, lazy=False): ('full', 'p99.5', (8.96, 8.96, 4.98)), ('overlap', 'mean', (5, 5)), ('overlap', 'std_dev', (5.656854249492381, 4)), - pytest.param( - 'overlap', - 'std', (5.656854249492381, 4), - marks=pytest.mark.xfail( - raises=AssertionError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/1024')), + ('overlap', 'std', (5.656854249492381, 4)), ('overlap', 'min', (1, 1)), ('overlap', 'max', (9, 9)), ('overlap', 'median', (5, 5)), @@ -156,14 +146,7 @@ def get_cubes_for_validation_test(frequency, lazy=False): @pytest.mark.parametrize('frequency', FREQUENCY_OPTIONS) @pytest.mark.parametrize('span, statistics, expected', VALIDATION_DATA_SUCCESS) def test_multimodel_statistics(frequency, span, statistics, expected): - """High level test for multicube statistics function. - - - Should work for multiple data frequencies - - Should be able to deal with multiple statistics - - Should work for both span arguments - - Should deal correctly with different mask options - - Return type should be a dict with all requested statistics as keys - """ + """High level test for multicube statistics function.""" cubes = get_cubes_for_validation_test(frequency) if isinstance(statistics, str): @@ -183,6 +166,7 @@ def test_multimodel_statistics(frequency, span, statistics, expected): @pytest.mark.parametrize('span', SPAN_OPTIONS) def test_lazy_data_consistent_times(span): + """Test laziness of multimodel statistics with consistent time axis.""" cubes = ( generate_cube_from_dates('monthly', fill_val=1, lazy=True), generate_cube_from_dates('monthly', fill_val=3, lazy=True), @@ -203,6 +187,11 @@ def test_lazy_data_consistent_times(span): @pytest.mark.parametrize('span', SPAN_OPTIONS) def test_lazy_data_inconsistent_times(span): + """Test laziness of multimodel statistics with inconsistent time axis. + + This hits `_align`, which adds additional computations which must be + lazy. + """ cubes = ( generate_cube_from_dates( From cea76e4bc0d51eca2cad3581e471013da618808d Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 2 Mar 2021 14:20:32 +0100 Subject: [PATCH 39/68] Metadata tweaks to make it work with sample data / test recipe --- esmvalcore/preprocessor/_multimodel.py | 20 +++++++++++++++---- .../multimodel_statistics/test_multimodel.py | 6 ------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 7faa6085fe..9219aa09ec 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -211,17 +211,29 @@ def _combine(cubes, dim='new_dim'): equalise_attributes(cubes) # in-place for i, cube in enumerate(cubes): + try: + # since cubes are updated in-place, new coord may already exist + cube.remove_coord(dim) + except iris.exceptions.CoordinateNotFoundError: + pass + concat_dim = iris.coords.AuxCoord(i, var_name=dim) cube.add_aux_coord(concat_dim) + # Clear some metadata that can cause merge to fail + # https://scitools-iris.readthedocs.io/en/stable/userguide/ + # merge_and_concat.html#common-issues-with-merge-and-concatenate + + cube.cell_methods = None + + for coord in cube.coords(): + coord.long_name = None + coord.attributes = None + cubes = iris.cube.CubeList(cubes) merged_cube = cubes.merge_cube() - # Clean up after merge, because new dimension is no longer needed - for cube in cubes: - cube.remove_coord(dim) - return merged_cube diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index ad79a66e6f..c9930a3937 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -65,12 +65,6 @@ def fix_metadata(cubes): for cube in cubes: cube.coord('air_pressure').bounds = None - for coord in cube.coords(): - coord.long_name = None - coord.attributes = None - - cube.cell_methods = None - def preprocess_data(cubes, time_slice: dict = None): """Regrid the data to the first cube and optional time-slicing.""" From c19335faaa0fff3bb4027b3e162fb282b5e0a9f0 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 4 Mar 2021 14:05:59 +0100 Subject: [PATCH 40/68] Update docstring and add note --- esmvalcore/preprocessor/_multimodel.py | 31 +++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 9219aa09ec..74dd044bf6 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -339,30 +339,39 @@ def multi_model_statistics(products, keep_input_datasets=True): """Compute multi-model statistics. - This function computes multi-model statistics on cubes or products. - Products (or: preprocessorfiles) are used internally by ESMValCore to store + This function computes multi-model statistics on a list of ``products``, + which can be instances of :py:class:`~iris.cube.Cube` or + :py:class:`~esmvalcore.preprocessor.PreprocessorFile`. + The latter is used internally by ESMValCore to store workflow and provenance information, and this option should typically be ignored. Apart from the time coordinate, cubes must have consistent shapes. There are two options to combine time coordinates of different lengths, see the - `span` argument. + ``span`` argument. - Uses the statistical operators in iris.analysis, including 'mean', - 'median', 'min', 'max', and 'std'. Percentiles are also supported and can - be specified like pXX.YY (for percentile XX.YY; decimal part optional). + Uses the statistical operators in :py:mod:`iris.analysis`, including + ``mean``, ``median``, ``min``, ``max``, and ``std``. Percentiles are also + supported and can be specified like ``pXX.YY`` (for percentile ``XX.YY``; + decimal part optional). - Note: some of the operators in iris.analysis require additional arguments. - Except for percentiles, these operators are currently not supported. + Notes + ----- + Some of the operators in :py:mod:`iris.analysis` require additional + arguments. Except for percentiles, these operators are currently not + supported. + + Lazy operation is supported for all statistics, except + ``median``, ``percentile``, ``gmean`` and ``hmean``. Parameters ---------- products: list Cubes (or products) over which the statistics will be computed. statistics: list - Statistical metrics to be computed, e.g. ['mean', 'max']. Choose from - the operators listed in the iris.analysis package. Percentiles can be - specified like 'pXX.YY'. + Statistical metrics to be computed, e.g. [``mean``, ``max``]. Choose + from the operators listed in the iris.analysis package. Percentiles can + be specified like ``pXX.YY``. span: str Overlap or full; if overlap, statitstics are computed on common time- span; if full, statistics are computed on full time spans, ignoring From 34994ccf95d5d607e328a2fb220cc8353890349e Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 5 Mar 2021 08:07:00 +0000 Subject: [PATCH 41/68] Apply suggestions from code review Co-authored-by: Peter Kalverla --- esmvalcore/preprocessor/_multimodel.py | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 74dd044bf6..44b3001d25 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -24,7 +24,7 @@ 'mean': iris.analysis.MEAN, 'std_dev': iris.analysis.STD_DEV, 'variance': iris.analysis.VARIANCE, - # not directly supported + # The following require extra kwargs. ATM this is only supported for percentiles 'count': iris.analysis.COUNT, 'peak': iris.analysis.PEAK, 'percentile': iris.analysis.PERCENTILE, # not lazy in iris @@ -97,25 +97,6 @@ def _unify_time_coordinates(cubes): cube.coord('time').guess_bounds() -def _resolve_span(all_times, span): - """Construct new time array based on the span parameter.""" - if len(all_times) == 1: - new_times = np.array(all_times[0]) - - elif span == 'full': - new_times = reduce(np.union1d, all_times) - - elif span == 'overlap': - new_times = reduce(np.intersect1d, all_times) - if new_times.size < 1: - raise ValueError("No time overlap found between input cubes.") - else: - raise ValueError("Unknown value for span. Expected 'full' or 'overlap'" - "got {}".format(span)) - - return new_times - - def _time_coords_are_aligned(cubes): """Return `True` if time coords are aligned.""" first_time_array = cubes[0].coord('time').points @@ -141,9 +122,6 @@ def _extend(cube, time_points): cube.coord('time').bounds = None cube_points = cube.coord('time').points - if not np.all(np.diff(cube_points) > 0): - raise ValueError('Time points are not monotonic') - begin = cube_points[0] end = cube_points[-1] From 9921b33dbf7e37b7e397d07fc1570046751b3ee0 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 5 Mar 2021 09:34:32 +0100 Subject: [PATCH 42/68] Update documentation --- esmvalcore/preprocessor/_multimodel.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 44b3001d25..ae2b58ed3e 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -24,7 +24,8 @@ 'mean': iris.analysis.MEAN, 'std_dev': iris.analysis.STD_DEV, 'variance': iris.analysis.VARIANCE, - # The following require extra kwargs. ATM this is only supported for percentiles + # The following require extra kwargs, + # atm this is only supported for percentiles via e.g. `pXX` 'count': iris.analysis.COUNT, 'peak': iris.analysis.PEAK, 'percentile': iris.analysis.PERCENTILE, # not lazy in iris @@ -118,7 +119,13 @@ def _subset(cube, time_points): def _extend(cube, time_points): - """Extend cube to a specified time range.""" + """Extend cube to a specified time range. + + If time points are missing before the start/after the end of the + time range, cubes for each missing time pointwith masked data will + be added to pad the time range to match `time_points`. This method + supports lazy operation. + """ cube.coord('time').bounds = None cube_points = cube.coord('time').points @@ -185,7 +192,10 @@ def _align(cubes, span): def _combine(cubes, dim='new_dim'): - """Merge iris cubes into a single big cube with new dimension.""" + """Merge iris cubes into a single big cube with new dimension. + + This assumes that all input cubes have the same shape. + """ equalise_attributes(cubes) # in-place for i, cube in enumerate(cubes): From e560bcd899d7b4a04620ba3a5ac8c77dd8da33fb Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 5 Mar 2021 09:35:54 +0100 Subject: [PATCH 43/68] Remove unused test code --- .../_multimodel/test_multimodel.py | 43 ------------------- 1 file changed, 43 deletions(-) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index a9c69ee7c8..3c23488e5c 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -257,49 +257,6 @@ def test_get_consistent_time_unit(calendar1, calendar2, expected): assert result.calendar == expected -def generate_resolve_span_cases(valid): - """Generate test cases for _resolve_span.""" - points_1 = (1, 2, 3) - points_2 = (2, 3, 4) - points_3 = (3, 4, 5) - points_4 = (4, 5, 6) - empty_tuple = () - - if valid: - yield from ( - ((points_1, ), 'overlap', points_1), - ((points_1, ), 'full', points_1), - ((points_1, points_2), 'overlap', (2, 3)), - ((points_1, points_2), 'full', (1, 2, 3, 4)), - ((points_1, points_2, points_3), 'overlap', (3, )), - ((points_1, points_2, points_3), 'full', (1, 2, 3, 4, 5)), - ((points_1, points_4), 'full', (1, 2, 3, 4, 5, 6)), - ) - else: - yield from ( - (empty_tuple, 'overlap', TypeError), - (empty_tuple, 'full', TypeError), - ((points_1, points_4), 'overlap', ValueError), - ) - - -@pytest.mark.parametrize('points, span, expected', - generate_resolve_span_cases(True)) -def test_resolve_span(points, span, expected): - """Check that resolve_span returns the correct union/intersection.""" - result = mm._resolve_span(points, span=span) - assert isinstance(result, np.ndarray) - np.testing.assert_equal(result, expected) - - -@pytest.mark.parametrize('points, span, error', - generate_resolve_span_cases(False)) -def test_resolve_span_fail(points, span, error): - """Test failing case for _resolve_span.""" - with pytest.raises(error): - mm._resolve_span(points, span=span) - - @pytest.mark.parametrize('span', SPAN_OPTIONS) def test_align(span): """Test _align function.""" From 0bd079b4336366ce8c1d0f2fdc8fc1d10c7c649f Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 5 Mar 2021 09:36:09 +0100 Subject: [PATCH 44/68] Copy cubes to avoid updating them inplace --- esmvalcore/preprocessor/_multimodel.py | 11 ++++------- .../unit/preprocessor/_multimodel/test_multimodel.py | 8 -------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index ae2b58ed3e..8f9db30459 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -199,12 +199,6 @@ def _combine(cubes, dim='new_dim'): equalise_attributes(cubes) # in-place for i, cube in enumerate(cubes): - try: - # since cubes are updated in-place, new coord may already exist - cube.remove_coord(dim) - except iris.exceptions.CoordinateNotFoundError: - pass - concat_dim = iris.coords.AuxCoord(i, var_name=dim) cube.add_aux_coord(concat_dim) @@ -280,7 +274,10 @@ def _multicube_statistics(cubes, statistics, span): Cubes are merged and subsequently collapsed along a new auxiliary coordinate. Inconsistent attributes will be removed. """ - aligned_cubes = _align(cubes, span=span) + # work with copy of cubes to avoid modifying input cubes + copied_cubes = [cube.copy() for cube in cubes] + + aligned_cubes = _align(copied_cubes, span=span) big_cube = _combine(aligned_cubes) statistics_cubes = {} for statistic in statistics: diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 3c23488e5c..6546e7e1ed 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -371,10 +371,6 @@ def test_edge_case_different_time_offsets(span): desired = np.array((14., 45., 73.)) np.testing.assert_array_equal(time_coord.points, desired) - # input cubes are updated in-place - for cube in cubes: - np.testing.assert_array_equal(cube.coord('time').points, desired) - def generate_cubes_with_non_overlapping_timecoords(): """Generate sample data where time coords do not overlap.""" @@ -448,10 +444,6 @@ def test_edge_case_time_not_in_middle_of_months(span): desired = np.array((14., 45., 73.)) np.testing.assert_array_equal(time_coord.points, desired) - # input cubes are updated in-place - for cube in cubes: - np.testing.assert_array_equal(cube.coord('time').points, desired) - @pytest.mark.parametrize('span', SPAN_OPTIONS) def test_edge_case_sub_daily_data_fail(span): From 709d849d6691fbe9acfbf34ba86ea9ad38ca32d6 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Thu, 11 Mar 2021 16:17:57 +0100 Subject: [PATCH 45/68] Add option to rechunk data before computing statistics --- esmvalcore/preprocessor/_multimodel.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 8f9db30459..68007bef38 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -6,6 +6,7 @@ from functools import reduce import cf_units +import dask import dask.array as da import iris import numpy as np @@ -219,6 +220,24 @@ def _combine(cubes, dim='new_dim'): return merged_cube +def rechunk(cube, blocksize='auto'): + """Rechunk the cube to speed up out-of-memory computation.""" + + if blocksize != 'auto': # auto block size in dask is "128MiB" + dask.config.set({"array.chunk-size": blocksize}) + + new_chunks = {0: -1} # don't chunk along the multimodel dimension + if cube.ndim > 1: + new_chunks[1] = 'auto' # do chunk along the first subsequent dimension + + cube.data = cube.lazy_data().rechunk(new_chunks) + + logger.debug("Total data size: %s MB", cube.lazy_data().nbytes * 1e-6) + logger.debug("New chunk block size: %s MB", + cube.lazy_data().nbytes / cube.lazy_data().npartitions * 1e-6) + logger.debug("New chunk configuration: %s", cube.lazy_data()) + + def _compute(cube, statistic: str, dim: str = 'new_dim'): """Compute statistic. @@ -240,6 +259,8 @@ def _compute(cube, statistic: str, dim: str = 'new_dim'): statistic = statistic.lower() kwargs = {} + rechunk(cube, blocksize="auto") + # special cases if statistic == 'std': logger.warning( From f9f5600541a4de1e625647417557f2dfa37ff32a Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Thu, 11 Mar 2021 16:20:52 +0100 Subject: [PATCH 46/68] Expand options for statistics --- esmvalcore/_recipe_checks.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 8be5b64081..dd9fd00193 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -9,7 +9,8 @@ import yamale from ._data_finder import get_start_end_year -from .preprocessor import PreprocessingTask, TIME_PREPROCESSORS +from .preprocessor import TIME_PREPROCESSORS, PreprocessingTask +from .preprocessor._multimodel import STATISTIC_MAPPING logger = logging.getLogger(__name__) @@ -191,7 +192,7 @@ def extract_shape(settings): def valid_multimodel_statistic(statistic): """Check that `statistic` is a valid argument for multimodel stats.""" - valid_names = ["mean", "median", "std", "min", "max"] + valid_names = ['std'] + list(STATISTIC_MAPPING.keys()) valid_patterns = [r"^(p\d{1,2})(\.\d*)?$"] if not (statistic in valid_names or re.match(r'|'.join(valid_patterns), statistic)): From cf6bc9cabb5441b864ba4e9293195d900683c2d0 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 12 Mar 2021 10:46:44 +0100 Subject: [PATCH 47/68] Convert input to lazy arrays for memory efficiency A test was added to ensure that lazy in -> lazy out, real in -> real out --- esmvalcore/preprocessor/_multimodel.py | 15 ++++++++++++++- .../preprocessor/_multimodel/test_multimodel.py | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 68007bef38..948ec0f0a8 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -295,6 +295,13 @@ def _multicube_statistics(cubes, statistics, span): Cubes are merged and subsequently collapsed along a new auxiliary coordinate. Inconsistent attributes will be removed. """ + realize = False + for cube in cubes: + # make input cubes lazy for efficient operation on real data + if not cube.has_lazy_data(): + cube.data = cube.lazy_data() + realize = True + # work with copy of cubes to avoid modifying input cubes copied_cubes = [cube.copy() for cube in cubes] @@ -302,7 +309,13 @@ def _multicube_statistics(cubes, statistics, span): big_cube = _combine(aligned_cubes) statistics_cubes = {} for statistic in statistics: - statistics_cubes[statistic] = _compute(big_cube, statistic) + result_cube = _compute(big_cube, statistic) + + # realize data if input cubes are not lazy + if realize: + result_cube.data + + statistics_cubes[statistic] = result_cube return statistics_cubes diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 6546e7e1ed..8de816640e 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -160,6 +160,8 @@ def test_multimodel_statistics(frequency, span, statistics, expected): for i, statistic in enumerate(statistics): result_cube = result[statistic] + # test that real data in => real data out + assert result_cube.has_lazy_data() is False expected_data = np.ma.array(expected[i], mask=False) assert_array_allclose(result_cube.data, expected_data) From 9f535120d9e341adc3ccf8b0d2e84ef94b00a5e9 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Mon, 15 Mar 2021 11:59:53 +0100 Subject: [PATCH 48/68] Outline of workaround for non-lazy iris funcs --- esmvalcore/preprocessor/_multimodel.py | 14 +++++++++++++- .../multimodel_statistics/test_multimodel.py | 4 ++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 948ec0f0a8..8b960a1ef8 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -238,6 +238,15 @@ def rechunk(cube, blocksize='auto'): logger.debug("New chunk configuration: %s", cube.lazy_data()) +def apply_slicewise(cube, dim, operator, **kwargs): + """Loop over slices of a cube if iris has no lazy aggregator.""" + slices = [] + for timeslice in cube.slices(['time']): + timeslice.collapsed(dim, operator, **kwargs) + result = iris.cube.CubeList(slices).merge_cube(dim='time') + return result + + def _compute(cube, statistic: str, dim: str = 'new_dim'): """Compute statistic. @@ -284,7 +293,9 @@ def _compute(cube, statistic: str, dim: str = 'new_dim'): logger.debug('Multicube statistics: computing: %s', statistic) # This will always return a masked array - return cube.collapsed(dim, operator, **kwargs) + if iris_has_lazy_func: # TODO: determine whether iris has a lazy func or not + return cube.collapsed(dim, operator, **kwargs) + return apply_slicewise(cube, dim, operator, **kwargs) def _multicube_statistics(cubes, statistics, span): @@ -295,6 +306,7 @@ def _multicube_statistics(cubes, statistics, span): Cubes are merged and subsequently collapsed along a new auxiliary coordinate. Inconsistent attributes will be removed. """ + dask.config.set(scheduler='synchronous') realize = False for cube in cubes: # make input cubes lazy for efficient operation on real data diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index 485328d85b..68c31a00b3 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -50,7 +50,7 @@ def assert_coords_equal(this: list, other: list): """Assert coords list `this` equals coords list `other`.""" for this_coord, other_coord in zip(this, other): np.testing.assert_equal(this_coord.points, other_coord.points) - assert this_coord.var_name == other_coord.var_name + # assert this_coord.var_name == other_coord.var_name assert this_coord.standard_name == other_coord.standard_name assert this_coord.units == other_coord.units @@ -193,7 +193,7 @@ def multimodel_regression_test(cubes, span, name): fail the first time with a RuntimeError, because the reference data are being written. """ - statistic = 'mean' + statistic = 'p50' result = multimodel_test(cubes, statistic=statistic, span=span) result_cube = result[statistic] From 1f1bfcd3c143f71fefec2dc61fc5dcbb37e8b4da Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Mon, 15 Mar 2021 12:02:09 +0100 Subject: [PATCH 49/68] undo accidental changes to tests --- tests/sample_data/multimodel_statistics/test_multimodel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index 68c31a00b3..485328d85b 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -50,7 +50,7 @@ def assert_coords_equal(this: list, other: list): """Assert coords list `this` equals coords list `other`.""" for this_coord, other_coord in zip(this, other): np.testing.assert_equal(this_coord.points, other_coord.points) - # assert this_coord.var_name == other_coord.var_name + assert this_coord.var_name == other_coord.var_name assert this_coord.standard_name == other_coord.standard_name assert this_coord.units == other_coord.units @@ -193,7 +193,7 @@ def multimodel_regression_test(cubes, span, name): fail the first time with a RuntimeError, because the reference data are being written. """ - statistic = 'p50' + statistic = 'mean' result = multimodel_test(cubes, statistic=statistic, span=span) result_cube = result[statistic] From 62987b6929c562cd4113eb6cf9ad5018521cdfe5 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 16 Mar 2021 15:29:03 +0100 Subject: [PATCH 50/68] Fix work-around for iris non-lazy aggregators --- esmvalcore/preprocessor/_multimodel.py | 48 +++++++++++++++++++------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 8b960a1ef8..1d2446afd7 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -238,16 +238,37 @@ def rechunk(cube, blocksize='auto'): logger.debug("New chunk configuration: %s", cube.lazy_data()) -def apply_slicewise(cube, dim, operator, **kwargs): - """Loop over slices of a cube if iris has no lazy aggregator.""" +def apply_along_dim(cube: iris.cube.Cube, *, dim: str, + operator: iris.analysis.Aggregator, **kwargs): + """Loop over slices of a cube if iris has no lazy aggregator. + + Parameters: + cube : `:obj:`iris.cube.Cube` + Cube to operate on. + dim : str + Dimension to apply operator along. + operator : :obj:`iris.analysis.Aggregator` + Operator from the `iris.analysis` module to apply. + + Returns: + ret_cube : iris.cube.Cube + Cube collapsed along `dim`. + """ + slices = [] - for timeslice in cube.slices(['time']): - timeslice.collapsed(dim, operator, **kwargs) - result = iris.cube.CubeList(slices).merge_cube(dim='time') - return result + for timeslice in cube.slices(dim): + new_slice = timeslice.collapsed(dim, operator, **kwargs) + slices.append(new_slice) + ret_cube = iris.cube.CubeList(slices).merge_cube() -def _compute(cube, statistic: str, dim: str = 'new_dim'): + # for consistency with normal procedure + ret_cube.data = np.ma.array(ret_cube.data) + + return ret_cube + + +def _compute(cube: iris.cube.Cube, *, statistic: str, dim: str = 'new_dim'): """Compute statistic. Parameters @@ -292,10 +313,13 @@ def _compute(cube, statistic: str, dim: str = 'new_dim'): logger.debug('Multicube statistics: computing: %s', statistic) - # This will always return a masked array - if iris_has_lazy_func: # TODO: determine whether iris has a lazy func or not - return cube.collapsed(dim, operator, **kwargs) - return apply_slicewise(cube, dim, operator, **kwargs) + if operator.lazy_func is None: + ret_cube = apply_along_dim(cube, dim=dim, operator=operator, **kwargs) + else: + # This will always return a masked array + ret_cube = cube.collapsed(dim, operator, **kwargs) + + return ret_cube def _multicube_statistics(cubes, statistics, span): @@ -321,7 +345,7 @@ def _multicube_statistics(cubes, statistics, span): big_cube = _combine(aligned_cubes) statistics_cubes = {} for statistic in statistics: - result_cube = _compute(big_cube, statistic) + result_cube = _compute(big_cube, statistic=statistic) # realize data if input cubes are not lazy if realize: From 7687457d1b74f94be0278d2357d77bc998841ec7 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 16 Mar 2021 16:24:09 +0100 Subject: [PATCH 51/68] Use `slice_over` to generate time slices --- esmvalcore/preprocessor/_multimodel.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 1d2446afd7..f04a3b9085 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -254,10 +254,9 @@ def apply_along_dim(cube: iris.cube.Cube, *, dim: str, ret_cube : iris.cube.Cube Cube collapsed along `dim`. """ - slices = [] - for timeslice in cube.slices(dim): - new_slice = timeslice.collapsed(dim, operator, **kwargs) + for time_slice in cube.slices_over(['time']): + new_slice = time_slice.collapsed(dim, operator, **kwargs) slices.append(new_slice) ret_cube = iris.cube.CubeList(slices).merge_cube() From 3ff0879e113baa1787a2a80da8427e10c04eb27c Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 16 Mar 2021 16:37:49 +0100 Subject: [PATCH 52/68] Remove dask config --- esmvalcore/preprocessor/_multimodel.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index f04a3b9085..70eca84444 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -6,7 +6,6 @@ from functools import reduce import cf_units -import dask import dask.array as da import iris import numpy as np @@ -220,12 +219,8 @@ def _combine(cubes, dim='new_dim'): return merged_cube -def rechunk(cube, blocksize='auto'): +def rechunk(cube): """Rechunk the cube to speed up out-of-memory computation.""" - - if blocksize != 'auto': # auto block size in dask is "128MiB" - dask.config.set({"array.chunk-size": blocksize}) - new_chunks = {0: -1} # don't chunk along the multimodel dimension if cube.ndim > 1: new_chunks[1] = 'auto' # do chunk along the first subsequent dimension @@ -288,7 +283,7 @@ def _compute(cube: iris.cube.Cube, *, statistic: str, dim: str = 'new_dim'): statistic = statistic.lower() kwargs = {} - rechunk(cube, blocksize="auto") + rechunk(cube) # special cases if statistic == 'std': @@ -329,7 +324,6 @@ def _multicube_statistics(cubes, statistics, span): Cubes are merged and subsequently collapsed along a new auxiliary coordinate. Inconsistent attributes will be removed. """ - dask.config.set(scheduler='synchronous') realize = False for cube in cubes: # make input cubes lazy for efficient operation on real data From bb8b3cd1808fd807191fae5c1c3059188ebb8dba Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 16 Mar 2021 16:39:06 +0100 Subject: [PATCH 53/68] Remove temporary coordinate --- esmvalcore/preprocessor/_multimodel.py | 2 ++ tests/unit/preprocessor/_multimodel/test_multimodel.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 70eca84444..c712396f00 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -313,6 +313,8 @@ def _compute(cube: iris.cube.Cube, *, statistic: str, dim: str = 'new_dim'): # This will always return a masked array ret_cube = cube.collapsed(dim, operator, **kwargs) + ret_cube.remove_coord(dim) + return ret_cube diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 8de816640e..862c08f098 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -160,6 +160,9 @@ def test_multimodel_statistics(frequency, span, statistics, expected): for i, statistic in enumerate(statistics): result_cube = result[statistic] + # make sure that temporary coord has been removed + with pytest.raises(iris.exceptions.CoordinateNotFoundError): + result_cube.coord('multi-model') # test that real data in => real data out assert result_cube.has_lazy_data() is False expected_data = np.ma.array(expected[i], mask=False) From ee7af3d8efc6b661efbb3493cf44124e3663cef8 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 16 Mar 2021 16:40:16 +0100 Subject: [PATCH 54/68] Fix func/dim name and docstring --- esmvalcore/preprocessor/_multimodel.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index c712396f00..869e3725c6 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -191,7 +191,7 @@ def _align(cubes, span): return new_cubes -def _combine(cubes, dim='new_dim'): +def _combine(cubes, dim='multi-model'): """Merge iris cubes into a single big cube with new dimension. This assumes that all input cubes have the same shape. @@ -233,19 +233,21 @@ def rechunk(cube): logger.debug("New chunk configuration: %s", cube.lazy_data()) -def apply_along_dim(cube: iris.cube.Cube, *, dim: str, - operator: iris.analysis.Aggregator, **kwargs): +def apply_along_time_points(cube: iris.cube.Cube, *, dim: str, + operator: iris.analysis.Aggregator, **kwargs): """Loop over slices of a cube if iris has no lazy aggregator. Parameters: + ----------- cube : `:obj:`iris.cube.Cube` Cube to operate on. dim : str - Dimension to apply operator along. + Dimension to apply the operator along. operator : :obj:`iris.analysis.Aggregator` Operator from the `iris.analysis` module to apply. Returns: + -------- ret_cube : iris.cube.Cube Cube collapsed along `dim`. """ @@ -262,7 +264,10 @@ def apply_along_dim(cube: iris.cube.Cube, *, dim: str, return ret_cube -def _compute(cube: iris.cube.Cube, *, statistic: str, dim: str = 'new_dim'): +def _compute(cube: iris.cube.Cube, + *, + statistic: str, + dim: str = 'multi-model'): """Compute statistic. Parameters @@ -308,7 +313,10 @@ def _compute(cube: iris.cube.Cube, *, statistic: str, dim: str = 'new_dim'): logger.debug('Multicube statistics: computing: %s', statistic) if operator.lazy_func is None: - ret_cube = apply_along_dim(cube, dim=dim, operator=operator, **kwargs) + ret_cube = apply_along_time_points(cube, + dim=dim, + operator=operator, + **kwargs) else: # This will always return a masked array ret_cube = cube.collapsed(dim, operator, **kwargs) From 3cad48ff464019b073157b9a6be080b22558fdaa Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Tue, 16 Mar 2021 16:47:16 +0100 Subject: [PATCH 55/68] Raise error when a single model is passed to multicube statistics --- esmvalcore/preprocessor/_multimodel.py | 4 ++++ .../preprocessor/_multimodel/test_multimodel.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 869e3725c6..44b4866ea0 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -334,6 +334,10 @@ def _multicube_statistics(cubes, statistics, span): Cubes are merged and subsequently collapsed along a new auxiliary coordinate. Inconsistent attributes will be removed. """ + if len(cubes) == 1: + raise ValueError('Cannot perform multicube statistics ' + 'for a single cube.') + realize = False for cube in cubes: # make input cubes lazy for efficient operation on real data diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 862c08f098..00f7b7eab8 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -463,6 +463,19 @@ def test_edge_case_sub_daily_data_fail(span): _ = multi_model_statistics(cubes, span, statistics) +@pytest.mark.parametrize('span', SPAN_OPTIONS) +def test_edge_case_single_cube_fail(span): + """Test that an error is raised when a single cube is passed.""" + cube = generate_cube_from_dates('monthly') + cubes = (cube, ) + + statistic = 'min' + statistics = (statistic, ) + + with pytest.raises(ValueError): + _ = multi_model_statistics(cubes, span, statistics) + + def test_unify_time_coordinates(): """Test set common calendar.""" cube1 = generate_cube_from_dates('monthly', From 9c8eb679ae4b646ef159b9fb1b1a83ae3a712d9c Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Wed, 17 Mar 2021 17:13:15 +0100 Subject: [PATCH 56/68] Realize data beforehand if aggregator is not lazy --- esmvalcore/preprocessor/_multimodel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 44b4866ea0..6e641f0dcd 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -251,8 +251,9 @@ def apply_along_time_points(cube: iris.cube.Cube, *, dim: str, ret_cube : iris.cube.Cube Cube collapsed along `dim`. """ + _ = cube.data # realize for more performance like in old implementation slices = [] - for time_slice in cube.slices_over(['time']): + for time_slice in cube.slices_over('time'): new_slice = time_slice.collapsed(dim, operator, **kwargs) slices.append(new_slice) From d76298ee5399ae69fc908e79cc40073c76f08349 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Thu, 18 Mar 2021 18:19:22 +0100 Subject: [PATCH 57/68] Stronger separate lazy from non-lazy path --- esmvalcore/preprocessor/_multimodel.py | 169 +++++++++++-------------- 1 file changed, 72 insertions(+), 97 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 6e641f0dcd..ec59499a3c 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -34,6 +34,34 @@ } +def _resolve_operator(statistic: str): + """Find the operator corresponding to the statistic.""" + statistic = statistic.lower() + kwargs = {} + + # special cases + if statistic == 'std': + logger.warning( + "Multicube statistics is aligning its behaviour with iris.analysis" + ". Please consider replacing 'std' with 'std_dev' in your code.") + statistic = 'std_dev' + + elif re.match(r"^(p\d{1,2})(\.\d*)?$", statistic): + # percentiles between p0 and p99.99999... + percentile = float(statistic[1:]) + kwargs['percent'] = percentile + statistic = 'percentile' + + try: + operator = STATISTIC_MAPPING[statistic] + except KeyError as err: + raise ValueError( + f'Statistic `{statistic}` not supported by multicube statistics. ' + f'Must be one of {tuple(STATISTIC_MAPPING.keys())}.') from err + + return operator, kwargs + + def _get_consistent_time_unit(cubes): """Return cubes' time unit if consistent, standard calendar otherwise.""" t_units = [cube.coord('time').units for cube in cubes] @@ -233,98 +261,42 @@ def rechunk(cube): logger.debug("New chunk configuration: %s", cube.lazy_data()) -def apply_along_time_points(cube: iris.cube.Cube, *, dim: str, - operator: iris.analysis.Aggregator, **kwargs): - """Loop over slices of a cube if iris has no lazy aggregator. +def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator, + **kwargs): + """Loop over slices of a cube if iris has no lazy aggregator.""" + _ = [cube.data for cube in cubes] # make sure the cubes' data are realized - Parameters: - ----------- - cube : `:obj:`iris.cube.Cube` - Cube to operate on. - dim : str - Dimension to apply the operator along. - operator : :obj:`iris.analysis.Aggregator` - Operator from the `iris.analysis` module to apply. + result_slices = [] + for i in range(cubes[0].shape[0]): + single_model_slices = [cube[i] for cube in cubes + ] # maybe filter the iris warning here? + combined_slice = _combine(single_model_slices, dim='multi-model') + collapsed_slice = combined_slice.collapsed('multi-model', operator, + **kwargs) + result_slices.append(collapsed_slice) - Returns: - -------- - ret_cube : iris.cube.Cube - Cube collapsed along `dim`. - """ - _ = cube.data # realize for more performance like in old implementation - slices = [] - for time_slice in cube.slices_over('time'): - new_slice = time_slice.collapsed(dim, operator, **kwargs) - slices.append(new_slice) - - ret_cube = iris.cube.CubeList(slices).merge_cube() + result_cube = iris.cube.CubeList(result_slices).merge_cube() - # for consistency with normal procedure - ret_cube.data = np.ma.array(ret_cube.data) + # For consistency with lazy procedure + result_cube.data = np.ma.array(result_cube.data) - return ret_cube + result_cube.remove_coord('multi-model') + return result_cube -def _compute(cube: iris.cube.Cube, - *, - statistic: str, - dim: str = 'multi-model'): - """Compute statistic. - - Parameters - ---------- - cube : :obj:`iris.cube.Cube` - Input cube. - statistic : str - Name of the statistic to calculate. Must be available via - :mod:`iris.analysis`. - dim : str - Collapse cube along this coordinate. - - Returns - ------- - :obj:`iris.cube.Cube` - Collapsed cube. - """ - statistic = statistic.lower() - kwargs = {} +def _compute_lazy(cubes: list, *, operator: iris.analysis.Aggregator, + **kwargs): + """Compute statistics using lazy iris function.""" + cube = _combine( + cubes, dim='multi-model' + ) # this is now done for each statistic, can we avoid that? rechunk(cube) - # special cases - if statistic == 'std': - logger.warning( - "Multicube statistics is aligning its behaviour with iris.analysis" - ". Please consider replacing 'std' with 'std_dev' in your code.") - statistic = 'std_dev' - - elif re.match(r"^(p\d{1,2})(\.\d*)?$", statistic): - # percentiles between p0 and p99.99999... - percentile = float(statistic[1:]) - kwargs['percent'] = percentile - statistic = 'percentile' - - try: - operator = STATISTIC_MAPPING[statistic] - except KeyError as err: - raise ValueError( - f'Statistic `{statistic}` not supported by multicube statistics. ' - f'Must be one of {tuple(STATISTIC_MAPPING.keys())}.') from err - - logger.debug('Multicube statistics: computing: %s', statistic) - - if operator.lazy_func is None: - ret_cube = apply_along_time_points(cube, - dim=dim, - operator=operator, - **kwargs) - else: - # This will always return a masked array - ret_cube = cube.collapsed(dim, operator, **kwargs) - - ret_cube.remove_coord(dim) - - return ret_cube + # This will always return a masked array + result_cube = cube.collapsed('multi-model', operator, **kwargs) + result_cube.remove_coord('multi-model') + return result_cube def _multicube_statistics(cubes, statistics, span): @@ -339,25 +311,28 @@ def _multicube_statistics(cubes, statistics, span): raise ValueError('Cannot perform multicube statistics ' 'for a single cube.') - realize = False - for cube in cubes: - # make input cubes lazy for efficient operation on real data - if not cube.has_lazy_data(): - cube.data = cube.lazy_data() - realize = True - - # work with copy of cubes to avoid modifying input cubes - copied_cubes = [cube.copy() for cube in cubes] + lazy_input = True if all(cube.has_lazy_data() for cube in cubes) else False + copied_cubes = [cube.copy() for cube in cubes] # avoid modifying inputs aligned_cubes = _align(copied_cubes, span=span) - big_cube = _combine(aligned_cubes) + statistics_cubes = {} for statistic in statistics: - result_cube = _compute(big_cube, statistic=statistic) + logger.debug('Multicube statistics: computing: %s', statistic) + operator, kwargs = _resolve_operator(statistic) + + if operator.lazy_func is None: + result_cube = _compute_eager(aligned_cubes, + operator=operator, + **kwargs) + else: + result_cube = _compute_lazy(aligned_cubes, + operator=operator, + **kwargs) - # realize data if input cubes are not lazy - if realize: - result_cube.data + # lazy input --> lazy output + result_cube.data = result_cube.lazy_data( + ) if lazy_input else result_cube.data statistics_cubes[statistic] = result_cube From db3aa93d7dbdc61aef0a79a9c77bc685d5b3bcd6 Mon Sep 17 00:00:00 2001 From: Stef Smeets Date: Fri, 19 Mar 2021 15:25:02 +0100 Subject: [PATCH 58/68] Make concat dim a global and fix tests --- esmvalcore/preprocessor/_multimodel.py | 24 ++++++++++++------- .../_multimodel/test_multimodel.py | 16 ++++++------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index ec59499a3c..a9ec099080 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -33,6 +33,8 @@ 'wpercentile': iris.analysis.WPERCENTILE, # not lazy in iris } +CONCAT_DIM = 'multi-model' + def _resolve_operator(statistic: str): """Find the operator corresponding to the statistic.""" @@ -219,7 +221,7 @@ def _align(cubes, span): return new_cubes -def _combine(cubes, dim='multi-model'): +def _combine(cubes): """Merge iris cubes into a single big cube with new dimension. This assumes that all input cubes have the same shape. @@ -227,7 +229,8 @@ def _combine(cubes, dim='multi-model'): equalise_attributes(cubes) # in-place for i, cube in enumerate(cubes): - concat_dim = iris.coords.AuxCoord(i, var_name=dim) + concat_dim = iris.coords.AuxCoord(i, var_name=CONCAT_DIM) + cube.add_aux_coord(concat_dim) # Clear some metadata that can cause merge to fail @@ -270,8 +273,8 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator, for i in range(cubes[0].shape[0]): single_model_slices = [cube[i] for cube in cubes ] # maybe filter the iris warning here? - combined_slice = _combine(single_model_slices, dim='multi-model') - collapsed_slice = combined_slice.collapsed('multi-model', operator, + combined_slice = _combine(single_model_slices) + collapsed_slice = combined_slice.collapsed(CONCAT_DIM, operator, **kwargs) result_slices.append(collapsed_slice) @@ -280,7 +283,7 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator, # For consistency with lazy procedure result_cube.data = np.ma.array(result_cube.data) - result_cube.remove_coord('multi-model') + result_cube.remove_coord(CONCAT_DIM) return result_cube @@ -289,13 +292,16 @@ def _compute_lazy(cubes: list, *, operator: iris.analysis.Aggregator, **kwargs): """Compute statistics using lazy iris function.""" cube = _combine( - cubes, dim='multi-model' - ) # this is now done for each statistic, can we avoid that? + cubes) # this is now done for each statistic, can we avoid that? rechunk(cube) # This will always return a masked array - result_cube = cube.collapsed('multi-model', operator, **kwargs) - result_cube.remove_coord('multi-model') + result_cube = cube.collapsed(CONCAT_DIM, operator, **kwargs) + result_cube.remove_coord(CONCAT_DIM) + + for cube in cubes: + cube.remove_coord(CONCAT_DIM) + return result_cube diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 00f7b7eab8..dc9092eb2f 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -129,6 +129,7 @@ def get_cubes_for_validation_test(frequency, lazy=False): ('full', 'median', (5, 5, 3)), ('full', 'p50', (5, 5, 3)), ('full', 'p99.5', (8.96, 8.96, 4.98)), + ('full', 'peak', ([9], [9], [5])), ('overlap', 'mean', (5, 5)), ('overlap', 'std_dev', (5.656854249492381, 4)), ('overlap', 'std', (5.656854249492381, 4)), @@ -137,6 +138,7 @@ def get_cubes_for_validation_test(frequency, lazy=False): ('overlap', 'median', (5, 5)), ('overlap', 'p50', (5, 5)), ('overlap', 'p99.5', (8.96, 8.96)), + ('overlap', 'peak', ([9], [9])), # test multiple statistics ('overlap', ('min', 'max'), ((1, 1), (9, 9))), ('full', ('min', 'max'), ((1, 1, 1), (9, 9, 5))), @@ -223,7 +225,6 @@ def test_lazy_data_inconsistent_times(span): ('percentile', ValueError), ('wpercentile', ValueError), ('count', TypeError), - ('peak', TypeError), ('proportion', TypeError), ) @@ -297,7 +298,6 @@ def test_combine_same_shape(span): """Test _combine with same shape of cubes.""" len_data = 3 num_cubes = 5 - test_dim = 'test_dim' cubes = [] for i in range(num_cubes): @@ -307,10 +307,10 @@ def test_combine_same_shape(span): len_data=len_data) cubes.append(cube) - result_cube = mm._combine(cubes, dim=test_dim) + result_cube = mm._combine(cubes) - dim_coord = result_cube.coord(test_dim) - assert dim_coord.var_name == test_dim + dim_coord = result_cube.coord(mm.CONCAT_DIM) + assert dim_coord.var_name == mm.CONCAT_DIM assert result_cube.shape == (num_cubes, len_data) desired = np.linspace((0, ) * len_data, @@ -323,7 +323,6 @@ def test_combine_same_shape(span): def test_combine_different_shape_fail(): """Test _combine with inconsistent data.""" num_cubes = 5 - test_dim = 'test_dim' cubes = [] for num in range(1, num_cubes + 1): @@ -331,13 +330,12 @@ def test_combine_different_shape_fail(): cubes.append(cube) with pytest.raises(iris.exceptions.MergeError): - _ = mm._combine(cubes, dim=test_dim) + _ = mm._combine(cubes) def test_combine_inconsistent_var_names_fail(): """Test _combine with inconsistent var names.""" num_cubes = 5 - test_dim = 'test_dim' cubes = [] for num in range(num_cubes): @@ -347,7 +345,7 @@ def test_combine_inconsistent_var_names_fail(): cubes.append(cube) with pytest.raises(iris.exceptions.MergeError): - _ = mm._combine(cubes, dim=test_dim) + _ = mm._combine(cubes) @pytest.mark.parametrize('span', SPAN_OPTIONS) From 99159ac7fda6fff442725502c988951d1bbc78b5 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Tue, 30 Mar 2021 13:52:46 +0200 Subject: [PATCH 59/68] Simplify one-line if statement as per codacy suggestion --- esmvalcore/preprocessor/_multimodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index a9ec099080..483019c6ec 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -317,7 +317,7 @@ def _multicube_statistics(cubes, statistics, span): raise ValueError('Cannot perform multicube statistics ' 'for a single cube.') - lazy_input = True if all(cube.has_lazy_data() for cube in cubes) else False + lazy_input = bool(all(cube.has_lazy_data() for cube in cubes)) copied_cubes = [cube.copy() for cube in cubes] # avoid modifying inputs aligned_cubes = _align(copied_cubes, span=span) From 3ac43b5ef5a51d6f79d1f82c71d946e15be18f61 Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Wed, 9 Jun 2021 18:02:52 +0200 Subject: [PATCH 60/68] Merge branch origin/main into this branch and resolve conflicts --- .circleci/config.yml | 41 +- .github/pull_request_template.md | 51 +- .github/workflows/action-conda-publish.yml | 6 +- .../workflows/action-install-from-conda.yml | 13 +- .../workflows/action-install-from-pypi.yml | 9 +- .../workflows/action-install-from-source.yml | 8 +- .../action-pypi-build-and-deploy.yml | 2 +- .github/workflows/action-test.yml | 16 +- .gitignore | 1 + .pre-commit-config.yaml | 4 + .prospector.yml | 3 + .zenodo.json | 6 + CITATION.cff | 5 + README.md | 2 +- doc/api/esmvalcore.api.config.rst | 2 +- doc/api/esmvalcore.rst | 6 +- doc/changelog.rst | 2 +- doc/conf.py | 4 +- doc/contributing.rst | 750 +++++++++++++----- doc/develop/derivation.rst | 12 +- doc/develop/fixing_data.rst | 101 ++- doc/develop/index.rst | 10 +- doc/develop/preprocessor_function.rst | 269 +++++++ doc/interfaces.rst | 2 + doc/quickstart/configure.rst | 34 +- doc/quickstart/find_data.rst | 13 + doc/quickstart/install.rst | 23 +- doc/quickstart/recipes.rst | 15 +- doc/recipe/overview.rst | 15 +- doc/recipe/preprocessor.rst | 157 +++- esmvalcore/_citation.py | 2 +- esmvalcore/_config/_logging.py | 12 +- esmvalcore/_data_finder.py | 74 +- esmvalcore/_recipe.py | 254 +++--- esmvalcore/_recipe_checks.py | 2 +- esmvalcore/_task.py | 6 +- esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py | 65 +- esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py | 6 +- esmvalcore/cmor/_fixes/cmip5/ec_earth.py | 76 +- esmvalcore/cmor/_fixes/cmip5/miroc5.py | 4 + esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py | 57 +- esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py | 12 +- esmvalcore/cmor/_fixes/cmip6/cesm2.py | 36 +- esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py | 8 + esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py | 8 + .../cmor/_fixes/cmip6/cesm2_waccm_fv2.py | 9 + esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py | 28 + esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py | 29 +- esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py | 5 +- esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py | 47 +- esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py | 50 +- esmvalcore/cmor/_fixes/cmip6/kiost_esm.py | 4 + esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py | 77 +- esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py | 25 + esmvalcore/cmor/_fixes/common.py | 126 +-- esmvalcore/cmor/_fixes/fix.py | 10 +- esmvalcore/cmor/_fixes/shared.py | 32 + esmvalcore/cmor/check.py | 42 +- esmvalcore/cmor/table.py | 25 +- esmvalcore/cmor/tables/custom/CMOR_tasaga.dat | 25 + esmvalcore/cmor/variable_alt_names.yml | 3 +- esmvalcore/config-developer.yml | 2 + esmvalcore/experimental/_logging.py | 3 +- esmvalcore/experimental/_warnings.py | 7 +- .../experimental/config/_config_object.py | 12 +- .../experimental/config/_validated_config.py | 27 +- esmvalcore/experimental/recipe.py | 29 +- esmvalcore/experimental/recipe_info.py | 14 +- esmvalcore/experimental/recipe_metadata.py | 5 +- esmvalcore/experimental/recipe_output.py | 19 +- esmvalcore/experimental/templates/__init__.py | 3 +- esmvalcore/experimental/utils.py | 18 +- esmvalcore/preprocessor/__init__.py | 9 +- esmvalcore/preprocessor/_ancillary_vars.py | 214 +++++ esmvalcore/preprocessor/_area.py | 97 +-- esmvalcore/preprocessor/_derive/rlus.py | 49 ++ esmvalcore/preprocessor/_derive/rsus.py | 49 ++ esmvalcore/preprocessor/_io.py | 10 + esmvalcore/preprocessor/_mask.py | 122 +-- esmvalcore/preprocessor/_multimodel.py | 44 +- esmvalcore/preprocessor/_regrid.py | 54 +- esmvalcore/preprocessor/_volume.py | 42 +- esmvalcore/preprocessor/_weighting.py | 54 +- package/meta.yaml | 10 +- setup.cfg | 5 + setup.py | 13 +- .../cmor/_fixes/cmip5/test_access1_0.py | 2 +- .../cmor/_fixes/cmip5/test_access1_3.py | 2 +- .../cmor/_fixes/cmip5/test_bcc_csm1_1.py | 56 +- .../cmor/_fixes/cmip5/test_bcc_csm1_1_m.py | 8 +- .../cmor/_fixes/cmip5/test_ec_earth.py | 85 +- .../cmor/_fixes/cmip5/test_miroc5.py | 18 +- .../cmor/_fixes/cmip6/test_bcc_csm2_mr.py | 178 +---- .../cmor/_fixes/cmip6/test_bcc_esm1.py | 39 +- .../cmor/_fixes/cmip6/test_cesm2.py | 115 ++- .../cmor/_fixes/cmip6/test_cesm2_fv2.py | 33 +- .../cmor/_fixes/cmip6/test_cesm2_waccm.py | 33 +- .../cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py | 33 +- .../cmor/_fixes/cmip6/test_cnrm_esm2_1.py | 60 +- .../cmor/_fixes/cmip6/test_fgoals_g3.py | 62 +- .../cmor/_fixes/cmip6/test_gfdl_cm4.py | 16 +- .../cmor/_fixes/cmip6/test_gfdl_esm4.py | 131 ++- .../cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py | 94 ++- .../cmor/_fixes/cmip6/test_kiost_esm.py | 18 +- .../cmor/_fixes/cmip6/test_mcm_ua_1_0.py | 146 +++- .../cmor/_fixes/cmip6/test_sam0_unicon.py | 32 +- tests/integration/cmor/_fixes/test_common.py | 253 +++++- tests/integration/cmor/_fixes/test_fix.py | 27 +- tests/integration/cmor/_fixes/test_shared.py | 59 ++ tests/integration/cmor/test_table.py | 9 + tests/integration/data_finder.yml | 470 +++++++++-- .../preprocessor/_ancillary_vars/__init__.py | 5 + .../_ancillary_vars/test_add_fx_variables.py | 257 ++++++ .../preprocessor/_io/test_concatenate.py | 8 +- .../integration/preprocessor/_io/test_save.py | 9 +- .../preprocessor/_mask/test_mask.py | 88 +- tests/integration/test_data_finder.py | 5 +- tests/integration/test_recipe.py | 322 +++++--- tests/integration/test_recipe_checks.py | 3 +- .../timeseries_daily_365_day-full-mean.nc | Bin 25378 -> 23299 bytes .../timeseries_daily_365_day-overlap-mean.nc | Bin 25378 -> 23299 bytes tests/unit/cmor/test_cmor_check.py | 31 + .../data_finder/test_get_start_end_year.py | 5 + tests/unit/data_finder/test_replace_tags.py | 65 +- tests/unit/experimental/test_config.py | 5 +- tests/unit/preprocessor/_area/test_area.py | 121 ++- tests/unit/preprocessor/_derive/test_rlus.py | 35 + tests/unit/preprocessor/_derive/test_rsus.py | 35 + tests/unit/preprocessor/_mask/test_mask.py | 8 +- .../_multimodel/test_multimodel.py | 4 +- tests/unit/preprocessor/_other/test_other.py | 1 - .../unit/preprocessor/_regrid/test_regrid.py | 131 ++- tests/unit/preprocessor/_time/test_time.py | 3 +- .../unit/preprocessor/_volume/test_volume.py | 38 +- .../test_weighting_landsea_fraction.py | 188 ++--- tests/unit/test_recipe.py | 65 +- yamale_meta.yaml | 52 -- 137 files changed, 5427 insertions(+), 1688 deletions(-) create mode 100644 doc/develop/preprocessor_function.rst create mode 100644 esmvalcore/cmor/tables/custom/CMOR_tasaga.dat create mode 100644 esmvalcore/preprocessor/_ancillary_vars.py create mode 100644 esmvalcore/preprocessor/_derive/rlus.py create mode 100644 esmvalcore/preprocessor/_derive/rsus.py create mode 100644 tests/integration/preprocessor/_ancillary_vars/__init__.py create mode 100644 tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py create mode 100644 tests/unit/preprocessor/_derive/test_rlus.py create mode 100644 tests/unit/preprocessor/_derive/test_rsus.py delete mode 100644 yamale_meta.yaml diff --git a/.circleci/config.yml b/.circleci/config.yml index 27be5c21ae..09e336c98b 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,8 +8,8 @@ commands: check_changes: steps: - run: | - if (test "$CIRCLE_BRANCH" = master || - git --no-pager diff --name-only origin/master... | + if (test "$CIRCLE_BRANCH" = main || + git --no-pager diff --name-only origin/main... | grep -q -E -f .circleci/install_triggers) then echo Running installation tests @@ -33,7 +33,8 @@ jobs: . /opt/conda/etc/profile.d/conda.sh conda activate esmvaltool pip install .[test] - pytest -n 2 -m "not installation" + pytest -n 2 -m "not installation and not sequential" + pytest -n 0 -m "sequential" - save_cache: key: test-{{ .Branch }} paths: @@ -62,19 +63,25 @@ jobs: . /opt/conda/etc/profile.d/conda.sh set -x mkdir /logs + # Add additional requirements for running all tests + echo " + - r-base + - r-yaml + - ncl + " >> environment.yml # Install - # conda update -y conda > /logs/conda.txt 2>&1 - conda env update >> /logs/conda.txt 2>&1 + conda env create >> /logs/conda.txt 2>&1 set +x; conda activate esmvaltool; set -x - conda install -yS r-base r-yaml ncl -c conda-forge pip install .[test] > /logs/install.txt 2>&1 # Log versions dpkg -l > /logs/versions.txt conda env export > /logs/environment.yml pip freeze > /logs/requirements.txt # Test installation - pytest -n 2 + pytest -n 2 -m "not sequential" + pytest -n 0 -m "sequential" esmvaltool version + no_output_timeout: 30m - save_cache: key: install-{{ .Branch }} paths: @@ -127,20 +134,25 @@ jobs: command: | . /opt/conda/etc/profile.d/conda.sh mkdir /logs + # Add additional requirements for running all tests + echo " + - r-base + - r-yaml + - ncl + " >> environment.yml # Install - # conda update -y conda > /logs/conda.txt 2>&1 - conda env update >> /logs/conda.txt 2>&1 + conda env create >> /logs/conda.txt 2>&1 conda activate esmvaltool pip install -e .[develop] > /logs/install.txt 2>&1 - # install additional requirements for running all tests - conda install -yS r-base r-yaml ncl -c conda-forge # Log versions dpkg -l > /logs/versions.txt conda env export > /logs/environment.yml pip freeze > /logs/requirements.txt # Test installation esmvaltool version - pytest -n 2 + pytest -n 2 -m "not sequential" + pytest -n 0 -m "sequential" + no_output_timeout: 30m - store_artifacts: path: /logs @@ -164,7 +176,8 @@ jobs: dpkg -l > /logs/versions.txt conda env export -n base > /logs/build_environment.yml # Build conda package - conda build package -c conda-forge -c esmvalgroup > /logs/build_log.txt + conda build package -c conda-forge > /logs/build_log.txt + no_output_timeout: 60m - store_artifacts: path: /logs @@ -206,7 +219,7 @@ workflows: filters: branches: only: - - master + - main jobs: - test - install diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 92cc2e7a0e..c07a011dad 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,45 +1,54 @@ ## Description -- Closes #issue_number -- Link to documentation: +Closes #issue_number + +Link to documentation: *** -## Before you get started +## [Before you get started](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#getting-started) -- [ ] [☝ Create an issue](https://github.com/ESMValGroup/ESMValCore/issues) to discuss what you are going to do +- [ ] [☝ Create an issue](https://github.com/ESMValGroup/ESMValCore/issues) to discuss what you are going to do -## Checklist +## [Checklist](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#checklist-for-pull-requests) -- [ ] PR has a descriptive title for the [changelog](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) -- [ ] Labels are assigned so they can be used in the [changelog](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) -- [ ] Code follows the [style guide](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#code-style) -- [ ] [Documentation](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#documentation) is available for new functionality -- [ ] YAML files pass [`pre-commit`](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#pre-commit) or [`yamllint`](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/community/introduction.html#yaml) checks -- [ ] [Circle/CI tests pass](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) -- [ ] [Codacy code quality checks pass](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) -- [ ] [Documentation builds successfully](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) on readthedocs -- [ ] [Unit tests](https://docs.esmvaltool.org/projects/esmvalcore/projects/esmvalcore/en/latest/contributing.html#contributing-to-the-esmvalcore-package) are available +It is the responsibility of the author to make sure the pull request is ready to review. The icons indicate whether the item will be subject to the [🛠 Technical][1] or [🧪 Scientific][2] review. -If you make backwards incompatible changes to the recipe format: + +[1]: https://docs.esmvaltool.org/en/latest/community/review.html#technical-review +[2]: https://docs.esmvaltool.org/en/latest/community/review.html#scientific-review -- [ ] Update [ESMValTool](https://github.com/esmvalgroup/esmvaltool) and link the pull request(s) in the description +- [ ] [🧪][2] The new functionality is [relevant and scientifically sound](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#scientific-relevance) +- [ ] [🛠][1] This pull request has a [descriptive title and labels](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#pull-request-title-and-label) +- [ ] [🛠][1] Code is written according to the [code quality guidelines](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#code-quality) +- [ ] [🧪][2] and [🛠][1] [Documentation](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#documentation) is available +- [ ] [🛠][1] [Unit tests](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#tests) have been added +- [ ] [🛠][1] Changes are [backward compatible](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#backward-compatibility) +- [ ] [🛠][1] Any changed [dependencies have been added or removed](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#dependencies) correctly +- [ ] [🛠][1] The [list of authors](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#list-of-authors) is up to date +- [ ] [🛠][1] All [checks below this pull request](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#pull-request-checks) were successful *** To help with the number pull requests: -- 🙏 We kindly ask you to [review](https://docs.esmvaltool.org/en/latest/community/review.html#review-of-pull-requests) two other [open pull requests](https://github.com/ESMValGroup/ESMValTool/pulls) in this repository +- 🙏 We kindly ask you to [review](https://docs.esmvaltool.org/en/latest/community/review.html#review-of-pull-requests) two other [open pull requests](https://github.com/ESMValGroup/ESMValCore/pulls) in this repository diff --git a/.github/workflows/action-conda-publish.yml b/.github/workflows/action-conda-publish.yml index 12501b0f2c..1a20f935fc 100644 --- a/.github/workflows/action-conda-publish.yml +++ b/.github/workflows/action-conda-publish.yml @@ -6,7 +6,7 @@ on: # use this to test before actual release and publish push: branches: - - master + - main jobs: @@ -43,7 +43,7 @@ jobs: export BUILD_FOLDER=/tmp/esmvalcore/_build mkdir -p $BUILD_FOLDER conda build package \ - --channel esmvalgroup --channel conda-forge \ + --channel conda-forge \ --croot $BUILD_FOLDER \ - name: Push the package to anaconda cloud if: startsWith(github.ref, 'refs/tags') @@ -70,7 +70,7 @@ jobs: with: python-version: ${{ matrix.python-version }} miniconda-version: "latest" - channels: esmvalgroup,conda-forge + channels: conda-forge - shell: bash -l {0} run: conda --version - shell: bash -l {0} diff --git a/.github/workflows/action-install-from-conda.yml b/.github/workflows/action-install-from-conda.yml index b6141a161e..152d2692eb 100644 --- a/.github/workflows/action-install-from-conda.yml +++ b/.github/workflows/action-install-from-conda.yml @@ -13,20 +13,19 @@ name: Conda Base Install -# runs on a push on master and at the end of every day +# runs on a push on main and at the end of every day on: # triggering on push without branch name will run tests everytime # there is a push on any branch # turn it on only if needed push: branches: - - master - - github-actions2 - # run the test only if the PR is to master + - main + # run the test only if the PR is to maain # turn it on if required #pull_request: # branches: - # - master + # - main schedule: - cron: '0 4 * * *' @@ -46,7 +45,7 @@ jobs: with: python-version: ${{ matrix.python-version }} miniconda-version: "latest" - channels: esmvalgroup,conda-forge + channels: conda-forge - shell: bash -l {0} run: mkdir -p conda_install_linux_artifacts_python_${{ matrix.python-version }} - shell: bash -l {0} @@ -82,7 +81,7 @@ jobs: activate-environment: esmvalcore python-version: ${{ matrix.python-version }} miniconda-version: "latest" - channels: esmvalgroup,conda-forge + channels: conda-forge - shell: bash -l {0} run: mkdir -p conda_install_osx_artifacts_python_${{ matrix.python-version }} - shell: bash -l {0} diff --git a/.github/workflows/action-install-from-pypi.yml b/.github/workflows/action-install-from-pypi.yml index 8d5e2abcab..19877b0d7a 100644 --- a/.github/workflows/action-install-from-pypi.yml +++ b/.github/workflows/action-install-from-pypi.yml @@ -13,21 +13,20 @@ name: PyPi Install -# runs on a push on master and at the end of every day +# runs on a push on main and at the end of every day on: # triggering on push without branch name will run tests everytime # there is a push on any branch # turn it on only if needed push: branches: - - master - - github-actions2 + - main - # run the test only if the PR is to master + # run the test only if the PR is to main # turn it on if required #pull_request: # branches: - # - master + # - main schedule: - cron: '0 0 * * *' diff --git a/.github/workflows/action-install-from-source.yml b/.github/workflows/action-install-from-source.yml index 00ed97f61d..60ec5b04d6 100644 --- a/.github/workflows/action-install-from-source.yml +++ b/.github/workflows/action-install-from-source.yml @@ -14,20 +14,20 @@ name: Source Install -# runs on a push on master and at the end of every day +# runs on a push on main and at the end of every day on: # triggering on push without branch name will run tests everytime # there is a push on any branch # turn it on only if needed push: branches: - - master + - main - github-actions2 - # run the test only if the PR is to master + # run the test only if the PR is to main # turn it on if required #pull_request: # branches: - # - master + # - main schedule: - cron: '0 0 * * *' diff --git a/.github/workflows/action-pypi-build-and-deploy.yml b/.github/workflows/action-pypi-build-and-deploy.yml index 9bfa4997d5..4d2d0b34df 100644 --- a/.github/workflows/action-pypi-build-and-deploy.yml +++ b/.github/workflows/action-pypi-build-and-deploy.yml @@ -6,7 +6,7 @@ on: # use this for testing push: branches: - - master + - main jobs: build-n-publish: diff --git a/.github/workflows/action-test.yml b/.github/workflows/action-test.yml index a7a0080e6e..0da6da7c58 100644 --- a/.github/workflows/action-test.yml +++ b/.github/workflows/action-test.yml @@ -13,19 +13,19 @@ name: Test -# runs on a push on master and at the end of every day +# runs on a push on main and at the end of every day on: # triggering on push without branch name will run tests everytime # there is a push on any branch # turn it on only if needed push: branches: - - master - # run the test only if the PR is to master + - main + # run the test only if the PR is to main # turn it on if required #pull_request: # branches: - # - master + # - main schedule: - cron: '0 0 * * *' # nightly @@ -55,7 +55,9 @@ jobs: - shell: bash -l {0} run: pip install -e .[develop] 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/install.txt - shell: bash -l {0} - run: pytest -n 2 -m "not installation" 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/test_report.txt + run: pytest -n 2 -m "not installation and not sequential" 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/test_report.txt + - shell: bash -l {0} + run: pytest -n 0 -m "sequential" - name: Upload artifacts if: ${{ always() }} # upload artifacts even if fail uses: actions/upload-artifact@v2 @@ -88,7 +90,9 @@ jobs: - shell: bash -l {0} run: pip install -e .[develop] --use-feature=2020-resolver 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/install.txt - shell: bash -l {0} - run: pytest -n 2 -m "not installation" 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/test_report.txt + run: pytest -n 2 -m "not installation and not sequential" 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/test_report.txt + - shell: bash -l {0} + run: pytest -n 0 -m "sequential" - name: Upload artifacts if: ${{ always() }} # upload artifacts even if fail uses: actions/upload-artifact@v2 diff --git a/.gitignore b/.gitignore index 800d83c2c3..bace074ff4 100644 --- a/.gitignore +++ b/.gitignore @@ -74,6 +74,7 @@ nosetests.xml coverage.xml *.cover .hypothesis/ +.mypy_cache # Jupyter Notebook .ipynb_checkpoints diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 04136d47dc..3e8e150768 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,3 +42,7 @@ repos: rev: '3.8.4' hooks: - id: flake8 + - repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v0.812' + hooks: + - id: mypy diff --git a/.prospector.yml b/.prospector.yml index dbc62018eb..f1272ec938 100644 --- a/.prospector.yml +++ b/.prospector.yml @@ -15,6 +15,9 @@ pyroma: pep8: full: true +mypy: + run: true + pep257: # disable rules that are allowed by the numpy convention # see https://github.com/PyCQA/pydocstyle/blob/master/src/pydocstyle/violations.py diff --git a/.zenodo.json b/.zenodo.json index 35270e4eac..ee05d13180 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -152,6 +152,11 @@ "affiliation": "Barcelona Supercomputing Center", "name": "Jury, Martin", "orcid": "0000-0003-0590-7843" + }, + { + "affiliation": "Stéphane Sénési EIRL, Colomiers, France", + "name": "Sénési, Stéphane", + "orcid": "0000-0003-0892-5967" } ], "description": "ESMValCore: A community tool for pre-processing data from Earth system models in CMIP and running analysis scripts.", @@ -193,4 +198,5 @@ "id": "10.13039/501100000780::824084" } ] + } diff --git a/CITATION.cff b/CITATION.cff index b229d5293c..7e3a9c0a95 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -156,6 +156,11 @@ authors: "family-names": "Jury" "given-names": "Martin" "orcid": "https://orcid.org/0000-0003-0590-7843" + - + "affiliation": "Stéphane Sénési EIRL, Colomiers, France" + "family-names": "Sénési" + "given-names": "Stéphane" + "orcid": "https://orcid.org/0000-0003-0892-5967" cff-version: "1.0.3" date-released: 2021-2-8 diff --git a/README.md b/README.md index 0c35d47036..c7aac18c5b 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Documentation Status](https://readthedocs.org/projects/esmvaltool/badge/?version=latest)](https://esmvaltool.readthedocs.io/en/latest/?badge=latest) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3387139.svg)](https://doi.org/10.5281/zenodo.3387139) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/ESMValGroup?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[![CircleCI](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/master.svg?style=svg)](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/master) +[![CircleCI](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/main.svg?style=svg)](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/main) [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/5d496dea9ef64ec68e448a6df5a65783)](https://www.codacy.com/gh/ESMValGroup/ESMValCore?utm_source=github.com&utm_medium=referral&utm_content=ESMValGroup/ESMValCore&utm_campaign=Badge_Coverage) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/5d496dea9ef64ec68e448a6df5a65783)](https://www.codacy.com/gh/ESMValGroup/ESMValCore?utm_source=github.com&utm_medium=referral&utm_content=ESMValGroup/ESMValCore&utm_campaign=Badge_Grade) [![Docker Build Status](https://img.shields.io/docker/cloud/build/esmvalgroup/esmvalcore)](https://hub.docker.com/r/esmvalgroup/esmvalcore/) diff --git a/doc/api/esmvalcore.api.config.rst b/doc/api/esmvalcore.api.config.rst index 983a085392..295d5bca70 100644 --- a/doc/api/esmvalcore.api.config.rst +++ b/doc/api/esmvalcore.api.config.rst @@ -34,7 +34,7 @@ The global configuration can be imported from the :py:mod:`esmvalcore.experiment 'write_netcdf': True, 'write_plots': True}) -The parameters for the user configuration file are listed `here `__. +The parameters for the user configuration file are listed :ref:`here `. :py:data:`~esmvalcore.experimental.CFG` is essentially a python dictionary with a few extra functions, similar to :py:mod:`matplotlib.rcParams`. This means that values can be updated like this: diff --git a/doc/api/esmvalcore.rst b/doc/api/esmvalcore.rst index b9d2127688..6288927b09 100644 --- a/doc/api/esmvalcore.rst +++ b/doc/api/esmvalcore.rst @@ -1,5 +1,7 @@ -ESMValTool Core API Reference -============================= +.. _api: + +ESMValCore API Reference +======================== ESMValCore is mostly used as a commandline tool. However, it is also possibly to use (parts of) ESMValTool as a library. This section documents the public API of ESMValCore. diff --git a/doc/changelog.rst b/doc/changelog.rst index 77432796fb..ab1ab4881b 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -41,7 +41,7 @@ Documentation - Fix numbering of steps in release instructions (`#838 `__) `Bouwe Andela `__ - Add labels to changelogs of individual versions for easy reference (`#899 `__) `Klaus Zimmermann `__ -- Make CircleCI badge specific to master branch (`#902 `__) `Bouwe Andela `__ +- Make CircleCI badge specific to main branch (`#902 `__) `Bouwe Andela `__ - Fix docker build badge url (`#906 `__) `Stef Smeets `__ - Update github PR template (`#909 `__) `Stef Smeets `__ - Refer to ESMValTool GitHub discussions page in the error message (`#900 `__) `Bouwe Andela `__ diff --git a/doc/conf.py b/doc/conf.py index 9c42ec7eb8..218bcec1a4 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -111,7 +111,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = [] +exclude_patterns: list = [] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -170,7 +170,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = [] +html_static_path: list = [] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied diff --git a/doc/contributing.rst b/doc/contributing.rst index 3b3243dac6..679edb2033 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -1,3 +1,5 @@ +.. _contributing: + Contributions are very welcome ============================== @@ -9,265 +11,638 @@ If you have suggestions for improving the process of contributing, please do not If you have a bug or other issue to report or just need help, please open an issue on the `issues tab on the ESMValCore github repository `__. -If you would like to contribute a new preprocessor function, derived variable, fix for a dataset, or another new -feature, please discuss your idea with the development team before +If you would like to contribute a new +:ref:`preprocessor function `, +:ref:`derived variable `, :ref:`fix for a dataset `, or +another new feature, please discuss your idea with the development team before getting started, to avoid double work and/or disappointment later. A good way to do this is to open an -`issue on GitHub `__. - -To get started developing, follow the instructions below. -For help with common new features, please have a look at :doc:`develop/index`. +`issue `_ on GitHub. Getting started --------------- -To install for development, follow the instructions in :doc:`quickstart/install`. - -Running tests -------------- - -Go to the directory where the repository is cloned and run -``pytest``. Optionally you can skip tests which require -additional dependencies for supported diagnostic script languages by -adding ``-m 'not installation'`` to the previous command. -Tests will also be run automatically by -`CircleCI `__. - -Sample data ------------ +See :ref:`installation-from-source` for instructions on how to set up a development +installation. -If you need sample data to work with, `this repository `__ contains samples of real data for use with ESMValTool development, demonstration purposes and automated testing. The goal is to keep the repository size small (~ 100 MB), so it can be easily downloaded and distributed. - -The data are installed as part of the developer dependencies, and used by some larger tests (i.e. in the `multimodel tests `__) - -The loading and preprocessing of the data can be somewhat time-consuming (~30 secs) and are cached by ``pytest`` to make the tests more performant. -Clear the cache by using running pytest with the ``--cache-clear`` flag. To avoid running these tests using sample data, use `pytest -m "not use_sample_data"`. -If you are adding new tests using sample data, please use the decorator ``@pytest.mark.use_sample_data``. +New development should preferably be done in the +`ESMValCore `__ +GitHub repository. +The default git branch is ``main``. +Use this branch to create a new feature branch from and make a pull request +against. +This +`page `__ +offers a good introduction to git branches, but it was written for +BitBucket while we use GitHub, so replace the word BitBucket by GitHub +whenever you read it. -Code style ----------- +It is recommended that you open a `draft pull +request `__ +early, as this will cause :ref:`CircleCI to run the unit tests `, +:ref:`Codacy to analyse your code `, and +:ref:`readthedocs to build the documentation `. +It’s also easier to get help from other developers if your code is visible in a +pull request. + +:ref:`Make small pull requests `, the ideal pull requests changes +just a few files and adds/changes no more than 100 lines of production code. +The amount of test code added can be more extensive, but changes to existing +test code should be made sparingly. + +Design considerations +~~~~~~~~~~~~~~~~~~~~~ + +When making changes, try to respect the current structure of the program. +If you need to make major changes to the structure of program to add a feature, +chances are that you have either not come up with the +most optimal design or the feature is not a very good fit for the tool. +Discuss your feature with the `@ESMValGroup/esmvaltool-coreteam`_ in an issue_ +to find a solution. + +Please keep the following considerations in mind when programming: + +- Changes should preferably be :ref:`backward compatible `. +- Apply changes gradually and change no more than a few files in a single pull + request, but do make sure every pull request in itself brings a meaningful + improvement. + This reduces the risk of breaking existing functionality and making + :ref:`backward incompatible ` changes, because it + helps you as well as the reviewers of your pull request to better understand + what exactly is being changed. +- :ref:`preprocessor_functions` are Python functions (and not classes) so they + are easy to understand and implement for scientific contributors. +- No additional CMOR checks should be implemented inside preprocessor functions. + The input cube is fixed and confirmed to follow the specification in + `esmvalcore/cmor/tables `__ + before applying any other preprocessor functions. + This design helps to keep the preprocessor functions and diagnostics scripts + that use the preprocessed data from the tool simple and reliable. + See :ref:`cmor_table_configuration` for the mapping from ``project`` in the + recipe to the relevant CMOR table. +- The ESMValCore package is based on :ref:`iris `. + Preprocessor functions should preferably be small and just call the relevant + iris code. + Code that is more involved and more broadly applicable than just in the + ESMValCore, should be implemented in iris instead. +- Any settings in the recipe that can be checked before loading the data should + be checked at the :ref:`task creation stage `. + This avoids that users run a recipe for several hours before finding out they + made a mistake in the recipe. + No data should be processed or files written while creating the tasks. +- CMOR checks should provide a good balance between reliability of the tool + and ease of use. + Several :ref:`levels of strictness of the checks ` + are available to facilitate this. +- Keep your code short and simple: we would like to make contributing as easy as + possible. + For example, avoid implementing complicated class inheritance structures and + `boilerplate `__ + code. +- If you find yourself copy-pasting a piece of code and making minor changes + to every copy, instead put the repeated bit of code in a function that you can + re-use, and provide the changed bits as function arguments. +- Be careful when changing existing unit tests to make your new feature work. + You might be breaking existing features if you have to change existing tests. + +Finally, if you would like to improve the design of the tool, discuss your plans +with the `@ESMValGroup/esmvaltool-coreteam`_ to make sure you understand the +current functionality and you all agree on the new design. + +.. _pull_request_checklist: + +Checklist for pull requests +--------------------------- + +To clearly communicate up front what is expected from a pull request, we have +the following checklist. +Please try to do everything on the list before requesting a review. +If you are unsure about something on the list, please ask the +`@ESMValGroup/tech-reviewers`_ or `@ESMValGroup/science-reviewers`_ for help +by commenting on your (draft) pull request or by starting a new +`discussion `__. + +In the ESMValTool community we use +:ref:`pull request reviews ` to ensure all code and +documentation contributions are of good quality. +The icons indicate whether the item will be checked during the +:ref:`🛠 Technical review ` or +:ref:`🧪 Scientific review `. + +- 🧪 The new functionality is :ref:`relevant and scientifically sound` +- 🛠 :ref:`The pull request has a descriptive title and labels ` +- 🛠 Code is written according to the :ref:`code quality guidelines ` +- 🧪 and 🛠 Documentation_ is available +- 🛠 Unit tests_ have been added +- 🛠 Changes are :ref:`backward compatible ` +- 🛠 Changed :ref:`dependencies have been added or removed correctly ` +- 🛠 The :ref:`list of authors ` is up to date +- 🛠 The :ref:`checks shown below the pull request ` are successful + +.. _scientific_relevance: + +Scientific relevance +-------------------- + +The proposed changes should be relevant for the larger scientific community. +The implementation of new features should be scientifically sound; e.g. +the formulas used in new preprocesssor functions should be accompanied by the +relevant references and checked for correctness by the scientific reviewer. +The `CF Conventions `_ as well as additional +standards imposed by `CMIP `_ should be +followed whenever possible. + +.. _descriptive_pr_title: + +Pull request title and label +---------------------------- + +The title of a pull request should clearly describe what the pull request changes. +If you need more text to describe what the pull request does, please add it in +the description. +`Add one or more labels `__ +to your pull request to indicate the type of change. +At least one of the following +`labels `__ should be used: +`bug`, `deprecated feature`, `fix for dataset`, `preprocessor`, `cmor`, `api`, +`testing`, `documentation` or `enhancement`. + +The titles and labels of pull requests are used to compile the :ref:`changelog`, +therefore it is important that they are easy to understand for people who are +not familiar with the code or people in the project. +Descriptive pull request titles also makes it easier to find back what was +changed when, which is useful in case a bug was introduced. + +.. _code_quality: + +Code quality +------------ To increase the readability and maintainability or the ESMValCore source -code, we aim to adhere to best practices and coding standards. All pull -requests are reviewed and tested by one or more members of the core -development team. For code in all languages, it is highly recommended -that you split your code up in functions that are short enough to view -without scrolling. - -We include checks for Python and yaml files, which are -described in more detail in the sections below. +code, we aim to adhere to best practices and coding standards. + +We include checks for Python and yaml files, most of which are described in more +detail in the sections below. This includes checks for invalid syntax and formatting errors. -`Pre-commit `__ is a handy tool that can run -all of these checks automatically. +:ref:`esmvaltool:pre-commit` is a handy tool that can run all of these checks +automatically just before you commit your code. It knows knows which tool to run for each filetype, and therefore provides -a simple way to check your code! +a convenient way to check your code. +Python +~~~~~~ -Pre-commit -~~~~~~~~~~ +The standard document on best practices for Python code is +`PEP8 `__ and there is +`PEP257 `__ for code documentation. +We make use of +`numpy style docstrings `__ +to document Python functions that are visible on +`readthedocs `_. -To run ``pre-commit`` on your code, go to the ESMValCore directory -(``cd ESMValCore``) and run +To check if your code adheres to the standard, go to the directory where +the repository is cloned, e.g. ``cd ESMValCore``, and run `prospector `_ :: - pre-commit run + prospector esmvalcore/preprocessor/_regrid.py + +In addition to prospector, we use `flake8 `_ +to automatically check for bugs and formatting mistakes and +`mypy `_ for checking that +`type hints `_ are +correct. +Note that `type hints`_ are completely optional, but if you do choose to add +them, they should be correct. + +When you make a pull request, adherence to the Python development best practices +is checked in two ways: + +#. As part of the unit tests, flake8_ and mypy_ are run by + `CircleCI `_, + see the section on Tests_ for more information. +#. `Codacy `_ + is a service that runs prospector (and other code quality tools) on changed + files and reports the results. + Click the 'Details' link behind the Codacy check entry and then click + 'View more details on Codacy Production' to see the results of the static + code analysis done by Codacy_. + If you need to log in, you can do so using your GitHub account. + +The automatic code quality checks by prospector are really helpful to improve +the quality of your code, but they are not flawless. +If you suspect prospector or Codacy may be wrong, please ask the +`@ESMValGroup/tech-reviewers`_ by commenting on your pull request. + +Note that running prospector locally will give you quicker and sometimes more +accurate results than waiting for Codacy. -By default, pre-commit will only run on the files that have been changed, -meaning those that have been staged in git (i.e. after -``git add your_script.py``). - -To make it only check some specific files, use +Most formatting issues in Python code can be fixed automatically by +running the commands :: - pre-commit run --files your_script.py + isort some_file.py -or +to sort the imports in `the standard way `__ +using `isort `__ and :: - pre-commit run --files your_script.R + yapf -i some_file.py -Alternatively, you can configure ``pre-commit`` to run on the staged files before -every commit (i.e. ``git commit``), by installing it as a `git hook `__ using +to add/remove whitespace as required by the standard using `yapf `__, :: - pre-commit install + docformatter -i some_file.py -Pre-commit hooks are used to inspect the code that is about to be committed. The -commit will be aborted if files are changed or if any issues are found that -cannot be fixed automatically. Some issues cannot be fixed (easily), so to -bypass the check, run +to run `docformatter `__ which helps +formatting the docstrings (such as line length, spaces). -:: - - git commit --no-verify +YAML +~~~~ -or +Please use `yamllint `_ to check that your +YAML files do not contain mistakes. +``yamllint`` checks for valid syntax, common mistakes like key repetition and +cosmetic problems such as line length, trailing spaces, wrong indentation, etc. -:: +Any text file +~~~~~~~~~~~~~ - git commit -n +A generic tool to check for common spelling mistakes is +`codespell `__. -or uninstall the pre-commit hook +.. _documentation: -:: +Documentation +------------- - pre-commit uninstall +The documentation lives on `docs.esmvaltool.org `_. +Adding documentation +~~~~~~~~~~~~~~~~~~~~ -Python -~~~~~~ +The documentation is built by readthedocs_ using `Sphinx `_. +There are two main ways of adding documentation: + +#. As written text in the directory + `doc `__. + When writing + `reStructuredText `_ + (``.rst``) files, please try to limit the line length to 80 characters and + always start a sentence on a new line. + This makes it easier to review changes to documentation on GitHub. + +#. As docstrings or comments in code. + For Python code, only the + `docstrings `__ + of Python modules, classes, and functions + that are mentioned in + `doc/api `__ + are used to generate the online documentation. + This results in the :ref:`api`. + The standard document with best practices on writing docstrings is + `PEP257 `__. + For the API documentation, we make use of + `numpy style docstrings `__. -The standard document on best practices for Python code is -`PEP8 `__ and there is -`PEP257 `__ for -documentation. We make use of `numpy style -docstrings `__ -to document Python functions that are visible on -`readthedocs `__. +What should be documented +~~~~~~~~~~~~~~~~~~~~~~~~~ -Most formatting issues in Python code can be fixed automatically by -running the commands +Functionality that is visible to users should be documented. +Any documentation that is visible on readthedocs_ should be well +written and adhere to the standards for documentation. +Examples of this include: + +- The :ref:`recipe ` +- Preprocessor :ref:`functions ` and their + :ref:`use from the recipe ` +- :ref:`Configuration options ` +- :ref:`Installation ` +- :ref:`Output files ` +- :ref:`Command line interface ` +- :ref:`Diagnostic script interfaces ` +- :ref:`The experimental Python interface ` + +Note that: + +- For functions that compute scientific results, comments with references to + papers and/or other resources as well as formula numbers should be included. +- When making changes to/introducing a new preprocessor function, also update the + :ref:`preprocessor documentation `. +- There is no need to write complete numpy style documentation for functions that + are not visible in the :ref:`api` chapter on readthedocs. + However, adding a docstring describing what a function does is always a good + idea. + For short functions, a one-line docstring is usually sufficient, but more + complex functions might require slightly more extensive documentation. + +When reviewing a pull request, always check that documentation is easy to +understand and available in all expected places. + +How to build and view the documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Whenever you make a pull request or push new commits to an existing pull +request, readthedocs will automatically build the documentation. +The link to the documentation will be shown in the list of checks below your +pull request. +Click 'Details' behind the check ``docs/readthedocs.org:esmvaltool`` to preview +the documentation. +If all checks were successful, you may need to click 'Show all checks' to see +the individual checks. + +To build the documentation on your own computer, go to the directory where the +repository was cloned and run :: - isort some_file.py + python setup.py build_sphinx -to sort the imports in `the standard way `__ -using `isort `__ and +or :: - yapf -i some_file.py + python setup.py build_sphinx -Ea -to add/remove whitespace as required by the standard using `yapf `__, +to build it from scratch. -:: +Make sure that your newly added documentation builds without warnings or +errors and looks correctly formatted. +CircleCI_ will build the documentation with the command: - docformatter -i your_script.py +.. code-block:: bash -to run `docformatter `__ which helps formatting the doc strings (such as line length, spaces). + python setup.py build_sphinx --warning-is-error -To check if your code adheres to the standard, go to the directory where -the repository is cloned, e.g. ``cd ESMValCore``, and run `prospector `__ +This will catch mistakes that can be detected automatically. -:: +The configuration file for Sphinx_ is +`doc/shinx/source/conf.py `_. - prospector esmvaltool/diag_scripts/your_diagnostic/your_script.py +See :ref:`esmvaltool:esmvalcore-documentation-integration` for information on +how the ESMValCore documentation is integrated into the complete ESMValTool +project documentation on readthedocs. -Run +When reviewing a pull request, always check that the documentation checks +shown below the pull request were successful. -:: +.. _tests: - python setup.py lint +Tests +----- -to see the warnings about the code style of the entire project. +To check that the code works correctly, there tests available in the +`tests directory `_. +We use `pytest `_ to write and run our tests. -We use `flake8 `__ on CircleCI to automatically check that there are -no formatting mistakes and Codacy for monitoring (Python) code quality. -Running prospector locally will give you quicker and sometimes more -accurate results. +Contributions to ESMValCore should be covered by unit tests. +Have a look at the existing tests in the ``tests`` directory for inspiration on +how to write your own tests. +If you do not know how to start with writing unit tests, ask the +`@ESMValGroup/tech-reviewers`_ for help by commenting on the pull request and +they will try to help you. +To check which parts of your code are covered by tests, open the file +``test-reports/coverage_html/index.html`` and browse to the relevant file. +It is also possible to view code coverage on Codacy_ (click the Files tab) +and CircleCI_ (open the ``tests`` job and click the ARTIFACTS tab). -YAML -~~~~ +Whenever you make a pull request or push new commits to an existing pull +request, the tests in the `tests directory`_ of the branch associated with the +pull request will be run automatically on CircleCI_. +The results appear at the bottom of the pull request. +Click on 'Details' for more information on a specific test job. +To see some of the results on CircleCI, you may need to log in. +You can do so using your GitHub account. -Please use ``yamllint`` to check that your YAML files do not contain -mistakes. +To run the tests on your own computer, go to the directory where the repository +is cloned and run the command -Any text file -~~~~~~~~~~~~~ +.. code-block:: bash -A generic tool to check for common spelling mistakes is -`codespell `__. + pytest -Documentation -------------- +Optionally you can skip tests which require additional dependencies for +supported diagnostic script languages by adding ``-m 'not installation'`` to the +previous command. -What should be documented -~~~~~~~~~~~~~~~~~~~~~~~~~ +When reviewing a pull request, always check that all test jobs on CircleCI_ were +successful. -Any code documentation that is visible on -`readthedocs `__ should be well -written and adhere to the standards for documentation for the respective -language. Note that there is no need to write extensive documentation -for functions that are not visible on readthedocs. However, adding a one -line docstring describing what a function does is always a good idea. -When making changes/introducing a new preprocessor function, also update -the `preprocessor -documentation `__. - -How to build the documentation locally -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _sample_data_tests: -Go to the directory where the repository is cloned and run +Sample data +~~~~~~~~~~~ + +New or modified preprocessor functions should preferably also be tested using +the sample data. +These tests are located in +`tests/sample_data `__. +Please mark new tests that use the sample data with the +`decorator `__ +``@pytest.mark.use_sample_data``. + +The `ESMValTool_sample_data `_ +repository contains samples of CMIP6 data for testing ESMValCore. +The `ESMValTool-sample-data `_ +package is installed as part of the developer dependencies. +The size of the package is relatively small (~ 100 MB), so it can be easily +downloaded and distributed. + +Preprocessing the sample data can be time-consuming, so some +intermediate results are cached by pytest to make the tests run faster. +If you suspect the tests are failing because the cache is invalid, clear it by +running + +.. code-block:: bash + + pytest --cache-clear + +To avoid running the time consuming tests that use sample data altogether, run + +.. code-block:: bash + + pytest -m "not use_sample_data" + + +Automated testing +~~~~~~~~~~~~~~~~~ + +Whenever you make a pull request or push new commits to an existing pull +request, the tests in the `tests directory`_ of the branch associated with the +pull request will be run automatically on CircleCI_. + +Every night, more extensive tests are run to make sure that problems with the +installation of the tool are discovered by the development team before users +encounter them. +These nightly tests have been designed to follow the installation procedures +described in the documentation, e.g. in the :ref:`install` chapter. +The nightly tests are run using both CircleCI and GitHub Actions. +The result of the tests ran by CircleCI can be seen on the +`CircleCI project page `__ +and the result of the tests ran by GitHub Actions can be viewed on the +`Actions tab `__ +of the repository. + +The configuration of the tests run by CircleCI can be found in the directory +`.circleci `__, +while the configuration of the tests run by GitHub Actions can be found in the +directory +`.github/workflows `__. + +.. _backward_compatibility: + +Backward compatibility +---------------------- + +The ESMValCore package is used by many people to run their recipes. +Many of these recipes are maintained in the public +`ESMValTool `_ repository, but +there are also users who choose not to share their work there. +While our commitment is first and foremost to users who do share their recipes +in the ESMValTool repository, we still try to be nice to all of the ESMValCore +users. +When making changes, e.g. to the :ref:`recipe format `, the +:ref:`diagnostic script interface `, the public +:ref:`Python API `, or the :ref:`configuration file format `, +keep in mind that this may affect many users. +To keep the tool user friendly, try to avoid making changes that are not +backward compatible, i.e. changes that require users to change their existing +recipes, diagnostics, configuration files, or scripts. + +If you really must change the public interfaces of the tool, always discuss this +with the `@ESMValGroup/esmvaltool-coreteam`_. +Try to deprecate the feature first by issuing a :py:class:`DeprecationWarning` +using the :py:mod:`warnings` module and schedule it for removal three +`minor versions `__ from the latest released version. +For example, when you deprecate a feature in a pull request that will be +included in version 2.3, that feature could be removed in version 2.5. +Mention the version in which the feature will be removed in the deprecation +message. +Label the pull request with the +`deprecated feature `__ +label. +When deprecating a feature, please follow up by actually removing the feature +in due course. + +If you must make backward incompatible changes, you need to update the available +recipes in ESMValTool and link the ESMValTool pull request(s) in the ESMValCore +pull request description. +You can ask the `@ESMValGroup/esmvaltool-recipe-maintainers`_ for help with +updating existing recipes, but please be considerate of their time. + +When reviewing a pull request, always check for backward incompatible changes +and make sure they are needed and have been discussed with the +`@ESMValGroup/esmvaltool-coreteam`_. +Also, make sure the author of the pull request has created the accompanying pull +request(s) to update the ESMValTool, before merging the ESMValCore pull request. + +.. _dependencies: + +Dependencies +------------ + +Before considering adding a new dependency, carefully check that the +`license `__ +of the dependency you want to add and any of its dependencies are +`compatible `__ +with the +`Apache 2.0 `_ +license that applies to the ESMValCore. +Note that GPL version 2 license is considered incompatible with the Apache 2.0 +license, while the compatibility of GPL version 3 license with the Apache 2.0 +license is questionable. +See this `statement `__ +by the authors of the Apache 2.0 license for more information. + +When adding or removing dependencies, please consider applying the changes in +the following files: + +- ``environment.yml`` + contains development dependencies that cannot be installed from + `PyPI `_ +- ``docs/requirements.txt`` + contains Python dependencies needed to build the documentation that can be + installed from PyPI +- ``docs/conf.py`` + contains a list of Python dependencies needed to build the documentation that + cannot be installed from PyPI and need to be mocked when building the + documentation. + (We do not use conda to build the documentation because this is too time + consuming.) +- ``setup.py`` + contains all Python dependencies, regardless of their installation source +- ``package/meta.yaml`` + contains dependencies for the conda package; all Python and compiled + dependencies that can be installed from conda should be listed here + +Note that packages may have a different name on +`conda-forge `__ than on PyPI_. + +Several test jobs on CircleCI_ related to the installation of the tool will only +run if you change the dependencies. +These will be skipped for most pull requests. + +When reviewing a pull request where dependencies are added or removed, always +check that the changes have been applied in all relevant files. + +.. _authors: -:: +List of authors +--------------- - python setup.py build_sphinx -Ea +If you make a contribution to ESMValCore and you would like to be listed as an +author (e.g. on `Zenodo `__), please add your +name to the list of authors in ``CITATION.cff`` and generate the entry for the +``.zenodo.json`` file by running the commands -Make sure that your newly added documentation builds without warnings or -errors. +:: -Branches, pull requests and code review ---------------------------------------- + pip install cffconvert + cffconvert --ignore-suspect-keys --outputformat zenodo --outfile .zenodo.json -The default git branch is ``master``. Use this branch to create a new -feature branch from and make a pull request against. This -`page `__ -offers a good introduction to git branches, but it was written for -BitBucket while we use GitHub, so replace the word BitBucket by GitHub -whenever you read it. +Presently, this method unfortunately discards entries `communities` +and `grants` from that file; please restore them manually, or +alternately proceed with the addition manually -It is recommended that you open a `draft pull -request `__ -early, as this will cause CircleCI to run the unit tests and Codacy to -analyse your code. It’s also easier to get help from other developers if -your code is visible in a pull request. - -You also must assign at least one `label `__ -to it as they are used to organize the changelog. At least one of the following -ones must be used: `bug`, `deprecated feature`, `fix for dataset`, -`preprocessor`, `cmor`, `api`, `testing`, `documentation` or `enhancement`. - -You can view the results of the automatic checks below your pull -request. If one of the tests shows a red cross instead of a green -approval sign, please click the link and try to solve the issue. Note -that this kind of automated checks make it easier to review code, but -they are not flawless, so occasionally Codacy will report false -positives. - -Contributing to the ESMValCore package -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _pull_request_checks: -Contributions to ESMValCore should +Pull request checks +------------------- -- Preferably be covered by unit tests. Unit tests are mandatory for new - preprocessor functions or modifications to existing functions. If you - do not know how to start with writing unit tests, let us know in a - comment on the pull request and a core development team member will - try to help you get started. -- Be accompanied by appropriate documentation. -- Introduce no new issues on Codacy. +To check that a pull request is up to standard, several automatic checks are +run when you make a pull request. +Read more about it in the Tests_ and Documentation_ sections. +Successful checks have a green ✓ in front, a ❌ means the check failed. -List of authors -~~~~~~~~~~~~~~~ +If you need help with the checks, please ask the technical reviewer of your pull +request for help. +Ask `@ESMValGroup/tech-reviewers`_ if you do not have a technical reviewer yet. -If you make a contribution to ESMValCore and would like to be listed as an -author, please add your name to the list of authors in CITATION.cff and -regenerate the file .zenodo.json by running the command +If the checks are broken because of something unrelated to the current +pull request, please check if there is an open issue that reports the problem. +Create one if there is no issue yet. +You can attract the attention of the `@ESMValGroup/esmvaltool-coreteam`_ by +mentioning them in the issue if it looks like no-one is working on solving the +problem yet. +The issue needs to be fixed in a separate pull request first. +After that has been merged into the ``main`` branch and all checks on this +branch are green again, merge it into your own branch to get the tests to pass. -:: +When reviewing a pull request, always make sure that all checks were successful. +If the Codacy check keeps failing, please run prospector locally. +If necessary, ask the pull request author to do the same and to address the +reported issues. +See the section on code_quality_ for more information. +Never merge a pull request with failing CircleCI or readthedocs checks. - pip install cffconvert - cffconvert --ignore-suspect-keys --outputformat zenodo --outfile .zenodo.json .. _how-to-make-a-release: -How to make a release ---------------------- +Making a release +---------------- The release manager makes the release, assisted by the release manager of the previous release, or if that person is not available, another previous release @@ -280,14 +655,14 @@ To make a new release of the package, follow these steps: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Check the ``nightly`` -`build on CircleCI `__ +`build on CircleCI `__ and the `GitHub Actions run `__. All tests should pass before making a release (branch). 2. Create a release branch ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Create a branch off the ``master`` branch and push it to GitHub. +Create a branch off the ``main`` branch and push it to GitHub. Ask someone with administrative permissions to set up branch protection rules for it so only you and the person helping you with the release can push to it. Announce the name of the branch in an issue and ask the members of the @@ -301,31 +676,31 @@ The version number is stored in ``esmvalcore/_version.py``, ``package/meta.yaml``, ``CITATION.cff``. Make sure to update all files. Also update the release date in ``CITATION.cff``. See https://semver.org for more information on choosing a version number. -Make a pull request and get it merged into ``master`` and cherry pick it into +Make a pull request and get it merged into ``main`` and cherry pick it into the release branch. 4. Add release notes ~~~~~~~~~~~~~~~~~~~~ Use the script -`esmvaltool/utils/draft_release_notes.py `__ +:ref:`esmvaltool/utils/draft_release_notes.py ` to create create a draft of the release notes. This script uses the titles and labels of merged pull requests since the previous release. Review the results, and if anything needs changing, change it on GitHub and re-run the script until the changelog looks acceptable. Copy the result to the file ``doc/changelog.rst``. -Make a pull request and get it merged into ``master`` and cherry pick it into +Make a pull request and get it merged into ``main`` and cherry pick it into the release branch.. 5. Cherry pick bugfixes into the release branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If a bug is found and fixed (i.e. pull request merged into the -``master`` branch) during the period of testing, use the command +``main`` branch) during the period of testing, use the command ``git cherry-pick`` to include the commit for this bugfix into the release branch. When the testing period is over, make a pull request to update the release notes with the latest changes, get it merged into -``master`` and cherry-pick it into the release branch. +``main`` and cherry-pick it into the release branch. 6. Make the release on GitHub ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -334,7 +709,7 @@ Do a final check that all tests on CircleCI and GitHub Actions completed successfully. Then click the `releases tab `__ -and create the new release from the release branch (i.e. not from ``master``). +and create the new release from the release branch (i.e. not from ``main``). 7. Create and upload the Conda package ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -358,7 +733,7 @@ Follow these steps to create a new conda package: - Activate the base environment ``conda activate base`` - Install the required packages: ``conda install -y conda-build conda-verify ripgrep anaconda-client`` -- Run ``conda build package -c conda-forge -c esmvalgroup`` to build the +- Run ``conda build package -c conda-forge`` to build the conda package - If the build was successful, upload the package to the esmvalgroup conda channel, e.g. @@ -394,3 +769,10 @@ Follow these steps to create a new Python package: You can read more about this in `Packaging Python Projects `__. + + +.. _`@ESMValGroup/esmvaltool-coreteam`: https://github.com/orgs/ESMValGroup/teams/esmvaltool-coreteam +.. _`@ESMValGroup/esmvaltool-developmentteam`: https://github.com/orgs/ESMValGroup/teams/esmvaltool-developmentteam +.. _`@ESMValGroup/tech-reviewers`: https://github.com/orgs/ESMValGroup/teams/tech-reviewers +.. _`@ESMValGroup/science-reviewers`: https://github.com/orgs/ESMValGroup/teams/science-reviewers +.. _`@ESMValGroup/esmvaltool-recipe-maintainers`: https://github.com/orgs/ESMValGroup/teams/esmvaltool-recipe-maintainers diff --git a/doc/develop/derivation.rst b/doc/develop/derivation.rst index fcc317db26..9d097ff843 100644 --- a/doc/develop/derivation.rst +++ b/doc/develop/derivation.rst @@ -1,14 +1,14 @@ .. _derivation: ******************* -Variable derivation +Deriving a variable ******************* -The variable derivation module allows to derive variables which are not in the -CMIP standard data request using standard variables as input. All derivations -scripts are located in -`ESMValCore/esmvalcore/preprocessor/_derive/ -`_. +The variable derivation preprocessor module allows to derive variables which are +not in the CMIP standard data request using standard variables as input. +This is a special type of :ref:`preprocessor function `. +All derivation scripts are located in +`esmvalcore/preprocessor/_derive/ `_. A typical example looks like this: .. code-block:: py diff --git a/doc/develop/fixing_data.rst b/doc/develop/fixing_data.rst index 55af3f752a..6dbe5fe96b 100644 --- a/doc/develop/fixing_data.rst +++ b/doc/develop/fixing_data.rst @@ -1,11 +1,11 @@ .. _fixing_data: -************* -Dataset fixes -************* +*********** +Dataset fix +*********** Some (model) datasets contain (known) errors that would normally prevent them -from being processed correctly by the ESMValTool. The errors can be in +from being processed correctly by the ESMValCore. The errors can be in the metadata describing the dataset and/or in the actual data. Typical examples of such errors are missing or wrong attributes (e.g. attribute ''units'' says 1e-9 but data are actually in 1e-6), missing or @@ -13,31 +13,42 @@ mislabeled coordinates (e.g. ''lev'' instead of ''plev'' or missing coordinate bounds like ''lat_bnds'') or problems with the actual data (e.g. cloud liquid water only instead of sum of liquid + ice as specified by the CMIP data request). -The ESMValTool can apply on the fly fixes to data sets that have +The ESMValCore can apply on the fly fixes to datasets that have known errors that can be fixed automatically. .. note:: - **CMORization as a fix**. As of early 2020, we've started implementing CMORization as fixes for - observational datasets. Previously, CMORization was an additional function implemented in ESMValTool. - This meant that users always had to store 2 copies of their observational data: both raw and CMORized. - Implementing CMORization as a fix removes this redundancy, as the fixes are applied 'on the fly' when - running a recipe. **ERA5** is the first dataset for which this 'CMORization on the fly' is supported. - For more information about CMORization, see: - `Contributing a CMORizing script for an observational dataset `_. + **CMORization as a fix**. + Support for many observational and reanalysis datasets is implemented through + :ref:`CMORizer scripts in the ESMValTool `. + However, it is also possible to add support for a dataset that is not part of + a CMIP data request by implementing fixes for it. + This is particularly useful for large datasets, where keeping a copy of both + the original and CMORized dataset is not feasible. + See `Natively supported non-CMIP datasets`_ for a list of currently supported + datasets. + Fix structure ============= -Fixes are Python classes stored in ``esmvalcore/cmor/_fixes/[PROJECT]/[DATASET].py`` -that derive from :class:`esmvalcore.cmor._fixes.fix.Fix` and -are named after the short name of the variable they fix. You can use the name -``AllVars`` if you want the fix to be applied to the whole dataset +Fixes are Python classes stored in +``esmvalcore/cmor/_fixes/[PROJECT]/[DATASET].py`` that derive from +:class:`esmvalcore.cmor._fixes.fix.Fix` and are named after the short name of +the variable they fix. You can also use the names of ``mip`` tables (e.g., +``Amon``, ``Lmon``, ``Omon``, etc.) if you want the fix to be applied to all +variables of that table in the dataset or ``AllVars`` if you want the fix to be +applied to the whole dataset. .. warning:: Be careful to replace any ``-`` with ``_`` in your dataset name. We need this replacement to have proper python module names. The fixes are automatically loaded and applied when the dataset is preprocessed. +They are a special type of :ref:`preprocessor function `, +called by the preprocessor functions +:py:func:`esmvalcore.preprocessor.fix_file`, +:py:func:`esmvalcore.preprocessor.fix_metadata`, and +:py:func:`esmvalcore.preprocessor.fix_data`. Fixing a dataset ================ @@ -139,6 +150,13 @@ so we will implement the ``fix_metadata`` method: This will fix the error. The next time you run ESMValTool you will find that the error is fixed on the fly and, hopefully, your recipe will run free of errors. +The ``cubes`` argument to the ``fix_metadata`` method will contain all cubes +loaded from a single input file. +Some care may need to be taken that the right cube is selected and fixed in case +multiple cubes are created. +Usually this happens when a coordinate is mistakenly loaded as a cube, because +the input data does not follow the +`CF Conventions `__. Sometimes other errors can appear after you fix the first one because they were hidden by it. In our case, the latitude coordinate could have bad units or @@ -155,10 +173,12 @@ submit it. It will also be very helpful if you just scan a couple of other variables from the same dataset and check if they share this error. In case that you find that -it is a general one, you can change the fix name to ``AllVars`` so it gets -executed for all variables in the dataset. If you find that this is shared only by -a handful of similar vars you can just make the fix for those new vars derive -from the one you just created: +it is a general one, you can change the fix name to the corresponding ``mip`` +table name (e.g., ``Amon``, ``Lmon``, ``Omon``, etc.) so it gets executed for +all variables in that table in the dataset or to ``AllVars`` so it gets +executed for all variables in the dataset. If you find that this is shared only +by a handful of similar vars you can just make the fix for those new vars +derive from the one you just created: .. code-block:: python @@ -268,6 +288,8 @@ missing coordinate you can create a fix for this model: return [data_cube] +.. _cmor_check_strictness: + Customizing checker strictness ============================== @@ -275,15 +297,12 @@ The data checker classifies its issues using four different levels of severity. From highest to lowest: - ``CRITICAL``: issues that most of the time will have severe consequences. - - ``ERROR``: issues that usually lead to unexpected errors, but can be safely - ignored sometimes. - + ignored sometimes. - ``WARNING``: something is not up to the standard but is unlikely to have - consequences later. - + consequences later. - ``DEBUG``: any info that the checker wants to communicate. Regardless of - checker strictness, those will always be reported as debug messages. + checker strictness, those will always be reported as debug messages. Users can have control about which levels of issues are interpreted as errors, and therefore make the checker fail or warnings or debug messages. @@ -292,26 +311,28 @@ that can take a number of values, listed below from the lowest level of strictness to the highest: - ``ignore``: all issues, regardless of severity, will be reported as - warnings. Checker will never fail. Use this at your own risk. - + warnings. Checker will never fail. Use this at your own risk. - ``relaxed``: only CRITICAL issues are treated as errors. We recommend not to - rely on this mode, although it can be useful if there are errors preventing - the run that you are sure you can manage on the diagnostics or that will - not affect you. - + rely on this mode, although it can be useful if there are errors preventing + the run that you are sure you can manage on the diagnostics or that will + not affect you. - ``default``: fail if there are any CRITICAL or ERROR issues (DEFAULT); Provides - a good measure of safety. - + a good measure of safety. - ``strict``: fail if there are any warnings, this is the highest level of - strictness. Mostly useful for checking datasets that you have produced, to - be sure that future users will not be distracted by inoffensive warnings. + strictness. Mostly useful for checking datasets that you have produced, to + be sure that future users will not be distracted by inoffensive warnings. -Natively supported non-CMOR datasets +Natively supported non-CMIP datasets ==================================== -Fixed datasets are supported through the ``native6`` project. Below is a list of -datasets currently supported. +Fixed datasets are supported through the ``native6`` project. +Put the files containing the data in the directory that you have configured +for the ``native6`` project in your :ref:`user configuration file`, in a +subdirectory called ``Tier{tier}/{dataset}/{version}/{frequency}/{short_name}``. +Replace the items in curly braces by the values used in the variable/dataset +definition in the :ref:`recipe `. +Below is a list of datasets currently supported. ERA5 ---- @@ -323,7 +344,7 @@ MSWEP ----- - Supported variables: ``pr`` -- Supported frequencies: ``mon``, ``day``, `3hr``. +- Supported frequencies: ``mon``, ``day``, ``3hr``. - Tier: 3 For example for monthly data, place the files in the ``/Tier3/MSWEP/latestversion/mon/pr`` subdirectory of your ``native6`` project location. diff --git a/doc/develop/index.rst b/doc/develop/index.rst index d75c5e993c..e10a5143f0 100644 --- a/doc/develop/index.rst +++ b/doc/develop/index.rst @@ -1,8 +1,14 @@ Development *********** +To get started developing, have a look at our +:ref:`contribution guidelines `. +This chapter describes how to implement the most commonly contributed new +features. + .. toctree:: :maxdepth: 1 - Fixing data - Deriving variables + Preprocessor function + Dataset fix + Deriving a variable diff --git a/doc/develop/preprocessor_function.rst b/doc/develop/preprocessor_function.rst new file mode 100644 index 0000000000..2b828dc9dd --- /dev/null +++ b/doc/develop/preprocessor_function.rst @@ -0,0 +1,269 @@ +.. _preprocessor_function: + +Preprocessor function +********************* + +Preprocessor functions are located in :py:mod:`esmvalcore.preprocessor`. +To add a new preprocessor function, start by finding a likely looking file to +add your function to in +`esmvalcore/preprocessor `_. +Create a new file in that directory if you cannot find a suitable place. + +The function should look like this: + + +.. code-block:: python + + def example_preprocessor_function( + cube, + example_argument, + example_optional_argument=5, + ): + """Compute an example quantity. + + A more extensive explanation of the computation can be added here. Add + references to scientific literature if available. + + Parameters + ---------- + cube: iris.cube.Cube + Input cube. + + example_argument: str + Example argument, the value of this argument can be provided in the + recipe. Describe what valid values are here. In this case, a valid + argument is the name of a dimension of the input cube. + + example_optional_argument: int, optional + Another example argument, the value of this argument can optionally + be provided in the recipe. Describe what valid values are here. + + Returns + ------- + iris.cube.Cube + The result of the example computation. + """ + + # Replace this with your own computation + cube = cube.collapsed(example_argument, iris.analysis.MEAN) + + return cube + + +The above function needs to be imported in the file +`esmvalcore/preprocessor/__init__.py `__: + +.. code-block:: python + + from ._example_module import example_preprocessor_function + + __all__ = [ + ... + 'example_preprocessor_function', + ... + ] + +The location in the ``__all__`` list above determines the default order in which +preprocessor functions are applied, so carefully consider where you put it +and ask for advice if needed. + +The preprocessor function above can then be used from the :ref:`preprocessors` +like this: + +.. code-block:: yaml + + preprocessors: + example_preprocessor: + example_preprocessor_function: + example_argument: median + example_optional_argument: 6 + +The optional argument (in this example: ``example_optional_argument``) can be +omitted in the recipe. + +Lazy and real data +================== + +Preprocessor functions should support both +:ref:`real and lazy data `. +This is vital for supporting the large datasets that are typically used with +the ESMValCore. +If the data of the incoming cube has been realized (i.e. ``cube.has_lazy_data()`` +returns ``False`` so ``cube.core_data()`` is a `NumPy `__ +array), the returned cube should also have realized data. +Conversely, if the incoming cube has lazy data (i.e. ``cube.has_lazy_data()`` +returns ``True`` so ``cube.core_data()`` is a +`Dask array `__), the returned +cube should also have lazy data. +Note that NumPy functions will often call their Dask equivalent if it exists +and if their input array is a Dask array, and vice versa. + +Note that preprocessor functions should preferably be small and just call the +relevant :ref:`iris ` code. +Code that is more involved, e.g. lots of work with Numpy and Dask arrays, +and more broadly applicable, should be implemented in iris instead. + +Documentation +============= + +The documentation in the function docstring will be shown in +the :ref:`preprocessor_functions` chapter. +In addition, you should add documentation on how to use the new preprocessor +function from the recipe in +`doc/recipe/preprocessor.rst `__ +so it is shown in the :ref:`preprocessor` chapter. +See the introduction to :ref:`documentation` for more information on how to +best write documentation. + +Tests +===== + +Tests are should be implemented for new or modified preprocessor functions. +For an introduction to the topic, see :ref:`tests`. + +Unit tests +---------- + +To add a unit test for the preprocessor function from the example above, create +a file called +``tests/unit/preprocessor/_example_module/test_example_preprocessor_function.py`` +and add the following content: + +.. code-block:: python + + """Test function `esmvalcore.preprocessor.example_preprocessor_function`.""" + import cf_units + import dask.array as da + import iris + import numpy as np + import pytest + + from esmvalcore.preprocessor import example_preprocessor_function + + + @pytest.mark.parametrize('lazy', [True, False]) + def test_example_preprocessor_function(lazy): + """Test that the computed result is as expected.""" + + # Construct the input cube + data = np.array([1, 2], dtype=np.float32) + if lazy: + data = da.asarray(data, chunks=(1, )) + cube = iris.cube.Cube( + data, + var_name='tas', + units='K', + ) + cube.add_dim_coord( + iris.coords.DimCoord( + np.array([0.5, 1.5], dtype=np.float64), + bounds=np.array([[0, 1], [1, 2]], dtype=np.float64), + standard_name='time', + units=cf_units.Unit('days since 1950-01-01 00:00:00', + calendar='gregorian'), + ), + 0, + ) + + # Compute the result + result = example_preprocessor_function(cube, example_argument='time') + + # Check that lazy data is returned if and only if the input is lazy + assert result.has_lazy_data() is lazy + + # Construct the expected result cube + expected = iris.cube.Cube( + np.array(1.5, dtype=np.float32), + var_name='tas', + units='K', + ) + expected.add_aux_coord( + iris.coords.AuxCoord( + np.array([1], dtype=np.float64), + bounds=np.array([[0, 2]], dtype=np.float64), + standard_name='time', + units=cf_units.Unit('days since 1950-01-01 00:00:00', + calendar='gregorian'), + )) + expected.add_cell_method( + iris.coords.CellMethod(method='mean', coords=('time', ))) + + # Compare the result of the computation with the expected result + print('result:', result) + print('expected result:', expected) + assert result == expected + + +In this test we used the decorator +`pytest.mark.parametrize `_ +to test two scenarios, with both lazy and realized data, with a single test. + + +Sample data tests +----------------- + +The idea of adding :ref:`sample data tests ` is to check that +preprocessor functions work with realistic data. +This also provides an easy way to add regression tests, though these should +preferably be implemented as unit tests instead, because using the sample data +for this purpose is slow. +To add a test using the sample data, create a file +``tests/sample_data/preprocessor/example_preprocessor_function/test_example_preprocessor_function.py`` +and add the following content: + +.. code-block:: python + + """Test function `esmvalcore.preprocessor.example_preprocessor_function`.""" + from pathlib import Path + + import esmvaltool_sample_data + import iris + import pytest + + from esmvalcore.preprocessor import example_preprocessor_function + + + @pytest.mark.use_sample_data + def test_example_preprocessor_function(): + """Regression test to check that the computed result is as expected.""" + # Load an example input cube + cube = esmvaltool_sample_data.load_timeseries_cubes(mip_table='Amon')[0] + + # Compute the result + result = example_preprocessor_function(cube, example_argument='time') + + filename = Path(__file__).with_name('example_preprocessor_function.nc') + if not filename.exists(): + # Create the file the expected result if it doesn't exist + iris.save(result, target=str(filename)) + raise FileNotFoundError( + f'Reference data was missing, wrote new copy to {filename}') + + # Load the expected result cube + expected = iris.load_cube(str(filename)) + + # Compare the result of the computation with the expected result + print('result:', result) + print('expected result:', expected) + assert result == expected + + +This will use a file from the sample data repository as input. +The first time you run the test, the computed result will be stored in the file +``tests/sample_data/preprocessor/example_preprocessor_function/example_preprocessor_function.nc`` +Any subsequent runs will re-load the data from file and check that it did not +change. +Make sure the stored results are small, i.e. smaller than 100 kilobytes, to +keep the size of the ESMValCore repository small. + +Using multiple datasets as input +================================ + +The name of the first argument of the preprocessor function should in almost all +cases be ``cube``. +Only when implementing a preprocessor function that uses all datasets as input, +the name of the first argument should be ``products``. +If you would like to implement this type of preprocessor function, start by +having a look at the existing functions, e.g. +:py:func:`esmvalcore.preprocessor.multi_model_statistics` or +:py:func:`esmvalcore.preprocessor.mask_fillvalues`. diff --git a/doc/interfaces.rst b/doc/interfaces.rst index dad7dff4fc..88d4c5d96e 100644 --- a/doc/interfaces.rst +++ b/doc/interfaces.rst @@ -1,3 +1,5 @@ +.. _interfaces: + Diagnostic script interfaces ============================ diff --git a/doc/quickstart/configure.rst b/doc/quickstart/configure.rst index 054016f0b5..cd8a92eca9 100644 --- a/doc/quickstart/configure.rst +++ b/doc/quickstart/configure.rst @@ -13,7 +13,6 @@ There are several configuration files in ESMValCore: graphical output format, root paths to data, etc.; * ``config-developer.yml``: sets a number of standardized file-naming and paths to data formatting; -* ``config-logging.yml``: stores information on logging. and one configuration file which is distributed with ESMValTool: @@ -175,7 +174,7 @@ Most users and diagnostic developers will not need to change this file, but it may be useful to understand its content. It will be installed along with ESMValCore and can also be viewed on GitHub: `esmvalcore/config-developer.yml -`_. +`_. This configuration file describes the file system structure and CMOR tables for several key projects (CMIP6, CMIP5, obs4mips, OBS6, OBS) on several key machines (e.g. BADC, CP4CDS, DKRZ, ETHZ, SMHI, BSC). CMIP data is stored as part of the Earth System Grid @@ -265,15 +264,17 @@ The filename to use for preprocessed data is configured in a similar manner using ``output_file``. Note that the extension ``.nc`` (and if applicable, a start and end time) will automatically be appended to the filename. -CMOR table configuration -------------------------- +.. _cmor_table_configuration: + +Project CMOR table configuration +-------------------------------- ESMValCore comes bundled with several CMOR tables, which are stored in the directory -`esmvalcore/cmor/tables -`_. +`esmvalcore/cmor/tables `_. These are copies of the tables available from `PCMDI `_. -There are four settings related to CMOR tables available: +For every ``project`` that can be used in the recipe, there are four settings +related to CMOR table settings available: * ``cmor_type``: can be ``CMIP5`` if the CMOR table is in the same format as the CMIP5 table or ``CMIP6`` if the table is in the same format as the CMIP6 table. @@ -281,17 +282,19 @@ There are four settings related to CMOR tables available: extended with variables from the ``esmvalcore/cmor/tables/custom`` directory and it is possible to use variables with a ``mip`` which is different from the MIP table in which they are defined. -* ``cmor_path``: path to the CMOR table. Defaults to the value provided in - ``cmor_type`` written in lower case. -* ``cmor_default_table_prefix``: defaults to the value provided in ``cmor_type``. - +* ``cmor_path``: path to the CMOR table. + Relative paths are with respect to `esmvalcore/cmor/tables`_. + Defaults to the value provided in ``cmor_type`` written in lower case. +* ``cmor_default_table_prefix``: Prefix that needs to be added to the ``mip`` + to get the name of the file containing the ``mip`` table. + Defaults to the value provided in ``cmor_type``. .. _config-ref: References configuration file ============================= -The `esmvaltool/config-references.yml `__ file contains the list of ESMValTool diagnostic and recipe authors, +The `esmvaltool/config-references.yml `__ file contains the list of ESMValTool diagnostic and recipe authors, references and projects. Each author, project and reference referred to in the documentation section of a recipe needs to be in this file in the relevant section. @@ -317,10 +320,3 @@ following documentation section: These four items here are named people, references and projects listed in the ``config-references.yml`` file. - - -Logging configuration file -========================== - -.. warning:: - Section to be added diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst index 5e823da409..e2fa0a61bd 100644 --- a/doc/quickstart/find_data.rst +++ b/doc/quickstart/find_data.rst @@ -75,6 +75,19 @@ first discuss the ``drs`` parameter: as we've seen in the previous section, the DRS as a standard is used for both file naming conventions and for directory structures. +Synda +----- + +If the `synda install `_ command is used to download data, +it maintains the directory structure as on ESGF. To find data downloaded by +synda, use the ``SYNDA`` ``drs`` parameter. + +.. code-block:: yaml + + drs: + CMIP6: SYNDA + CMIP5: SYNDA + .. _config-user-drs: Explaining ``config-user/drs: CMIP5:`` or ``config-user/drs: CMIP6:`` diff --git a/doc/quickstart/install.rst b/doc/quickstart/install.rst index 4ec5633e48..1c78cd38b2 100644 --- a/doc/quickstart/install.rst +++ b/doc/quickstart/install.rst @@ -1,3 +1,5 @@ +.. _install: + Installation ============ @@ -43,11 +45,11 @@ By far the easiest way to install these dependencies is to use conda_. For a minimal conda installation (recommended) go to https://conda.io/miniconda.html. After installing Conda, download -`the file with the list of dependencies `_: +`the file with the list of dependencies `_: .. code-block:: bash - wget https://raw.githubusercontent.com/ESMValGroup/ESMValCore/master/environment.yml + wget https://raw.githubusercontent.com/ESMValGroup/ESMValCore/main/environment.yml and install these dependencies into a new conda environment with the command @@ -82,7 +84,7 @@ You can get the latest release with docker pull esmvalgroup/esmvalcore:stable -If you want to use the current master branch, use +If you want to use the current main branch, use .. code-block:: bash @@ -158,10 +160,16 @@ To run the container using the image file ``esmvalcore.sif`` use: singularity run esmvalcore.sif -c ~/config-user.yml ~/recipes/recipe_example.yml +.. _installation-from-source: -Development installation +Installation from source ------------------------ +.. note:: + If you would like to install the development version of ESMValCore alongside + ESMValTool, please have a look at + :ref:`these instructions `. + To install from source for development, follow these instructions. - `Download and install @@ -184,7 +192,7 @@ To install from source for development, follow these instructions. - Install in development mode: ``pip install -e '.[develop]'``. If you are installing behind a proxy that does not trust the usual pip-urls you can declare them with the option ``--trusted-host``, - e.g. \ ``pip install --trusted-host=pypi.python.org --trusted-host=pypi.org --trusted-host=files.pythonhosted.org -e .[develop]`` + e.g. ``pip install --trusted-host=pypi.python.org --trusted-host=pypi.org --trusted-host=files.pythonhosted.org -e .[develop]`` - Test that your installation was successful by running ``esmvaltool -h``. @@ -195,5 +203,8 @@ You will find the tool available on HPC clusters and there will be no need to in yourself if you are just running diagnostics: - CEDA-JASMIN: `esmvaltool` is available on the scientific compute nodes (`sciX.jasmin.ac.uk` where - `X = 1, 2,`3, 4, 5`) after login and module loading via `module load esmvaltool`; see the helper page at + `X = 1, 2, 3, 4, 5`) after login and module loading via `module load esmvaltool`; see the helper page at `CEDA `__ ; + - DKRZ-Mistral: `esmvaltool` is available on login nodes (`mistral.dkrz.de`) and pre- and post-processing + nodes (`mistralpp.dkrz.de`) after login and module loading via `module load esmvaltool`; the command + `module help esmvaltool` provides some information about the module. diff --git a/doc/quickstart/recipes.rst b/doc/quickstart/recipes.rst index b9af83d913..141006a4fd 100644 --- a/doc/quickstart/recipes.rst +++ b/doc/quickstart/recipes.rst @@ -3,15 +3,14 @@ Working with the installed recipes ********************************** -Although ESMValTool can be used just to simplify the managment of data -and the creation of your own analysis code, one of its main strenghts is the -continuosly growing set of diagnostics and metrics that it directly provides to +Although ESMValTool can be used just to simplify the management of data +and the creation of your own analysis code, one of its main strengths is the +continuously growing set of diagnostics and metrics that it directly provides to the user. These metrics and diagnostics are provided as a set of preconfigured recipes that users can run or customize for their own analysis. -The latest list of available recipes can be found -`here `_. +The latest list of available recipes can be found :ref:`here `. -In order to make the managmenent of these installed recipes easier, ESMValTool +In order to make the management of these installed recipes easier, ESMValTool provides the ``recipes`` command group with utilities that help the users in discovering and customizing the provided recipes. @@ -29,9 +28,9 @@ using the following command esmvaltool recipes show recipe_name.yml -And finally, to get a local copy that can then be cusotmized and run, users can +And finally, to get a local copy that can then be customized and run, users can use the following command .. code:: bash - esmvaltool recipes get recipe_name.yml \ No newline at end of file + esmvaltool recipes get recipe_name.yml diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index c8bef59085..f62bdbe956 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -60,7 +60,7 @@ the following: Note that all authors, projects, and references mentioned in the description section of the recipe need to be included in the (locally installed copy of the) file - `esmvaltool/config-references.yml `_, + `esmvaltool/config-references.yml `_, see :ref:`config-ref`. The author name uses the format: ``surname_name``. For instance, John Doe would be: ``doe_john``. This information can be omitted by new users @@ -82,6 +82,8 @@ data specifications: ``RCP8.5``) - mip (for CMIP data, key ``mip``, value e.g. ``Amon``, ``Omon``, ``LImon``) - ensemble member (key ``ensemble``, value e.g. ``r1i1p1``, ``r1i1p1f1``) +- sub-experiment id (key `sub_experiment`, value e.g. `s2000`, `s(2000:2002)`, + for DCPP data only) - time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers, so we recommend to avoid them. For example, use ``128`` to specify the year @@ -97,6 +99,7 @@ For example, a datasets section could be: - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004} - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn} - {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn} + - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn, start_year: 2000, end_year, 2002} It is possible to define the experiment as a list to concatenate two experiments. Here it is an example concatenating the `historical` experiment with `rcp85` @@ -136,6 +139,14 @@ Please, bear in mind that this syntax can only be used in the ensemble tag. Also, note that the combination of multiple experiments and ensembles, like exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error. +The same simplified syntax can be used to add multiple sub-experiment ids: + +.. code-block:: yaml + + datasets: + - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2002), grid: gn, start_year: 2003, end_year: 2004} + + Note that this section is not required, as datasets can also be provided in the Diagnostics_ section. @@ -334,7 +345,7 @@ concentration changed from ``sic`` to ``siconc``). ESMValCore is aware of some of them and can do the automatic translation when needed. It will even do the translation in the preprocessed file so the diagnostic does not have to deal with this complexity, setting the short name in all files to match the one used -by the recipe. For example, if ``sic`` is requested, ESMValTool will +by the recipe. For example, if ``sic`` is requested, ESMValCore will find ``sic`` or ``siconc`` depending on the project, but all preprocessed files while use ``sic`` as their short_name. If the recipe requested ``siconc``, the preprocessed files will be identical except that they will use the short_name diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 20a126de5c..5ec6ca5aa8 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -9,6 +9,7 @@ roughly following the default order in which preprocessor functions are applied: * :ref:`Variable derivation` * :ref:`CMOR check and dataset-specific fixes` +* :ref:`Fx variables as cell measures or ancillary variables` * :ref:`Vertical interpolation` * :ref:`Weighting` * :ref:`Land/Sea/Ice masking` @@ -175,6 +176,41 @@ steps: To get an overview on data fixes and how to implement new ones, please go to :ref:`fixing_data`. +.. _Fx variables as cell measures or ancillary variables: + +Fx variables as cell measures or ancillary variables +==================================================== +The following preprocessor may require the use of ``fx_variables`` +to be able to perform the computations: + + - ``area_statistics`` + - ``mask_landsea`` + - ``mask_landseaice`` + - ``volume_statistics`` + - ``weighting_landsea_fraction`` + +The preprocessor step ``add_fx_variables`` loads the required ``fx_variables``, +checks them against CMOR standards and adds them either as ``cell_measure`` +or ``ancillary_variable`` inside the cube data. This ensures that the +defined preprocessor chain is applied to both ``variables`` and ``fx_variables``. + +Note that when calling steps that require ``fx_variables`` inside diagnostic +scripts, the variables are expected to contain the required ``cell_measures`` or +``ancillary_variables``. If missing, they can be added using the following functions: + +.. code-block:: + + from esmvalcore.preprocessor import (add_cell_measure, add_ancillary_variable) + + cube_with_area_measure = add_cell_measure(cube, area_cube, 'area') + + cube_with_volume_measure = add_cell_measure(cube, volume_cube, 'volume) + + cube_with_ancillary_sftlf = add_ancillary_variable(cube, sftlf_cube) + + cube_with_ancillary_sftgif = add_ancillary_variable(cube, sftgif_cube) + + Details on the arguments needed for each step can be found in the following sections. .. _Vertical interpolation: @@ -325,8 +361,25 @@ experiment is preferred for fx data retrieval: weighting_landsea_fraction: area_type: land exclude: ['CanESM2', 'reference_dataset'] - fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] + fx_variables: + sftlf: + exp: piControl + sftof: + exp: piControl + +or alternatively: +.. code-block:: yaml + + preprocessors: + preproc_weighting: + weighting_landsea_fraction: + area_type: land + exclude: ['CanESM2', 'reference_dataset'] + fx_variables: [ + {'short_name': 'sftlf', 'exp': 'piControl'}, + {'short_name': 'sftof', 'exp': 'piControl'} + ] See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -379,7 +432,8 @@ missing. Conversely, it retrieves the ``fx: sftlf`` mask when land needs to be masked out, respectively. Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain -experiment is preferred for fx data retrieval: +experiment is preferred for fx data retrieval. Note that it is possible to specify as many tags +for the fx variable as required: .. code-block:: yaml @@ -388,7 +442,25 @@ experiment is preferred for fx data retrieval: landmask: mask_landsea: mask_out: sea - fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] + fx_variables: + sftlf: + exp: piControl + sftof: + exp: piControl + ensemble: r2i1p1f1 + +or alternatively: + +.. code-block:: yaml + + preprocessors: + landmask: + mask_landsea: + mask_out: sea + fx_variables: [ + {'short_name': 'sftlf', 'exp': 'piControl'}, + {'short_name': 'sftof', 'exp': 'piControl', 'ensemble': 'r2i1p1f1'} + ] If the corresponding fx file is not found (which is the case for some models and almost all observational datasets), the @@ -428,8 +500,19 @@ experiment is preferred for fx data retrieval: landseaicemask: mask_landseaice: mask_out: sea - fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] + fx_variables: + sftgif: + exp: piControl +or alternatively: + +.. code-block:: yaml + + preprocessors: + landseaicemask: + mask_landseaice: + mask_out: sea + fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] See also :func:`esmvalcore.preprocessor.mask_landseaice`. @@ -529,7 +612,9 @@ inter-comparison or comparison with observational datasets). Regridding is conceptually a very similar process to interpolation (in fact, the regridder engine uses interpolation and extrapolation, with various schemes). The primary difference is that interpolation is based on sample data points, while -regridding is based on the horizontal grid of another cube (the reference grid). +regridding is based on the horizontal grid of another cube (the reference +grid). If the horizontal grids of a cube and its reference grid are sufficiently +the same, regridding is automatically and silently skipped for performance reasons. The underlying regridding mechanism in ESMValTool uses :obj:`iris.cube.Cube.regrid` @@ -1180,9 +1265,9 @@ The area manipulation module contains the following preprocessor functions: ``extract_region`` ------------------ -This function masks data outside a rectangular region requested. The boundaries -of the region are provided as latitude and longitude coordinates in the -arguments: +This function returns a subset of the data on the rectangular region requested. +The boundaries of the region are provided as latitude and longitude coordinates +in the arguments: * ``start_longitude`` * ``end_longitude`` @@ -1192,6 +1277,10 @@ arguments: Note that this function can only be used to extract a rectangular region. Use ``extract_shape`` to extract any other shaped region from a shapefile. +If the grid is irregular, the returned region retains the original coordinates, +but is cropped to a rectangular bounding box defined by the start/end +coordinates. The deselected area inside the region is masked. + See also :func:`esmvalcore.preprocessor.extract_region`. @@ -1332,18 +1421,36 @@ region, depth layer or time period is required, then those regions need to be removed using other preprocessor operations in advance. The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it as a list of variables e.g. +the user may specify it calling the variables e.g. .. code-block:: yaml - fx_variables: ['areacello', 'volcello'] + fx_variables: + areacello: + volcello: -or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +or calling the variables and adding specific variable parameters (the key-value pair may be as specific as a CMOR variable can permit): .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] + fx_variables: + areacello: + mip: Omon + volcello: + mip: fx + +Alternatively, the ``fx_variables`` argument can also be specified as a list: + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as a list of dictionaries: + +.. code-block:: yaml + + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. @@ -1392,18 +1499,36 @@ No depth coordinate is required as this is determined by Iris. This function works best when the ``fx_variables`` provide the cell volume. The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it as a list of variables e.g. +the user may specify it calling the variables e.g. .. code-block:: yaml - fx_variables: ['areacello', 'volcello'] + fx_variables: + areacello: + volcello: -or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +or calling the variables and adding specific variable parameters (the key-value pair may be as specific as a CMOR variable can permit): .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] + fx_variables: + areacello: + mip: Omon + volcello: + mip: fx + +Alternatively, the ``fx_variables`` argument can also be specified as a list: + +.. code-block:: yaml + + fx_variables: ['areacello', 'volcello'] + +or as a list of dictionaries: + +.. code-block:: yaml + + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index c660759b0a..c6129c21a6 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -146,7 +146,7 @@ def _get_response(url): json_data = None if url.lower().startswith('https'): try: - response = requests.get(url) + response = requests.get(url, timeout=5) if response.status_code == 200: json_data = response.json() else: diff --git a/esmvalcore/_config/_logging.py b/esmvalcore/_config/_logging.py index acacecee16..0831618fe3 100644 --- a/esmvalcore/_config/_logging.py +++ b/esmvalcore/_config/_logging.py @@ -5,6 +5,7 @@ import os import time from pathlib import Path +from typing import Union import yaml @@ -26,7 +27,8 @@ def _purge_file_handlers(cfg: dict) -> None: ] -def _get_log_files(cfg: dict, output_dir: str = None) -> list: +def _get_log_files(cfg: dict, + output_dir: Union[os.PathLike, str] = None) -> list: """Initialize log files for the file handlers.""" log_files = [] @@ -36,8 +38,12 @@ def _get_log_files(cfg: dict, output_dir: str = None) -> list: filename = handler.get('filename', None) if filename: + if output_dir is None: + raise ValueError('`output_dir` must be defined') + if not os.path.isabs(filename): handler['filename'] = os.path.join(output_dir, filename) + log_files.append(handler['filename']) return log_files @@ -53,8 +59,8 @@ def _update_stream_level(cfg: dict, level=None): handler['level'] = level.upper() -def configure_logging(cfg_file: str = None, - output_dir: str = None, +def configure_logging(cfg_file: Union[os.PathLike, str] = None, + output_dir: Union[os.PathLike, str] = None, console_log_level: str = None) -> list: """Configure logging. diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index ad0367799f..b9f8e8c8df 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -33,29 +33,60 @@ def find_files(dirnames, filenames): def get_start_end_year(filename): - """Get the start and end year from a file name.""" + """Get the start and end year from a file name. + + Examples of allowed dates : 1980, 198001, 19801231, + 1980123123, 19801231T23, 19801231T2359, 19801231T235959, + 19801231T235959Z + + Dates must be surrounded by - or _ or string start or string end + (after removing filename suffix) + + Look first for two dates separated by - or _, then for one single + date, and if they are multiple, for one date at start or end + """ stem = Path(filename).stem start_year = end_year = None - - # First check for a block of two potential dates separated by _ or - - daterange = re.findall(r'([0-9]{4,12}[-_][0-9]{4,12})', stem) + # + time_pattern = (r"(?P[0-2][0-9]" + r"(?P[0-5][0-9]" + r"(?P[0-5][0-9])?)?Z?)") + date_pattern = (r"(?P[0-9]{4})" + r"(?P[01][0-9]" + r"(?P[0-3][0-9]" + rf"(T?{time_pattern})?)?)?") + # + end_date_pattern = date_pattern.replace(">", "_end>") + date_range_pattern = date_pattern + r"[-_]" + end_date_pattern + # + # Next string allows to test that there is an allowed delimiter (or + # string start or end) close to date range (or to single date) + context = r"(?:^|[-_]|$)" + # + # First check for a block of two potential dates + date_range_pattern = context + date_range_pattern + context + daterange = re.search(date_range_pattern, stem) if daterange: - start_date, end_date = re.findall(r'([0-9]{4,12})', daterange[0]) - start_year = start_date[:4] - end_year = end_date[:4] + start_year = daterange.group("year") + end_year = daterange.group("year_end") else: # Check for single dates in the filename - dates = re.findall(r'([0-9]{4,12})', stem) + single_date_pattern = context + date_pattern + context + dates = re.findall(single_date_pattern, stem) if len(dates) == 1: - start_year = end_year = dates[0][:4] + start_year = end_year = dates[0][0] elif len(dates) > 1: - # Check for dates at start or end of filename - outerdates = re.findall(r'^[0-9]{4,12}|[0-9]{4,12}$', stem) - if len(outerdates) == 1: - start_year = end_year = outerdates[0][:4] + # Check for dates at start or (exclusive or) end of filename + start = re.search(r'^' + date_pattern, stem) + end = re.search(date_pattern + r'$', stem) + if start and not end: + start_year = end_year = start.group('year') + elif end: + start_year = end_year = end.group('year') # As final resort, try to get the dates from the file contents if start_year is None or end_year is None: + logger.debug("Must load file %s for daterange ", filename) cubes = iris.load(filename) for cube in cubes: @@ -92,13 +123,21 @@ def select_files(filenames, start_year, end_year): def _replace_tags(paths, variable): """Replace tags in the config-developer's file with actual values.""" if isinstance(paths, str): - paths = (paths.strip('/'), ) + paths = set((paths.strip('/'),)) else: - paths = [path.strip('/') for path in paths] + paths = set(path.strip('/') for path in paths) tlist = set() - for path in paths: tlist = tlist.union(re.findall(r'{([^}]*)}', path)) + if 'sub_experiment' in variable: + new_paths = [] + for path in paths: + new_paths.extend(( + re.sub(r'(\b{ensemble}\b)', r'{sub_experiment}-\1', path), + re.sub(r'({ensemble})', r'{sub_experiment}-\1', path) + )) + tlist.add('sub_experiment') + paths = new_paths logger.debug(tlist) for tag in tlist: @@ -112,7 +151,6 @@ def _replace_tags(paths, variable): else: raise KeyError("Dataset key {} must be specified for {}, check " "your recipe entry".format(tag, variable)) - paths = _replace_tag(paths, original_tag, replacewith) return paths @@ -127,7 +165,7 @@ def _replace_tag(paths, tag, replacewith): else: text = _apply_caps(str(replacewith), lower, upper) result.extend(p.replace('{' + tag + '}', text) for p in paths) - return result + return list(set(result)) def _get_caps_options(tag): diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 1f7a5ab883..3e094c9ed4 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -315,16 +315,21 @@ def _get_default_settings(variable, config_user, derive=False): if variable['short_name'] != variable['original_short_name']: settings['save']['alias'] = variable['short_name'] + # Configure fx settings + settings['add_fx_variables'] = { + 'fx_variables': {}, + 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) + } + settings['remove_fx_variables'] = {} + return settings -def _add_fxvar_keys(fx_var_dict, variable): +def _add_fxvar_keys(fx_info, variable): """Add keys specific to fx variable to use get_input_filelist.""" - fx_variable = dict(variable) - fx_variable.update(fx_var_dict) - - # set variable names - fx_variable['variable_group'] = fx_var_dict['short_name'] + fx_variable = deepcopy(variable) + fx_variable.update(fx_info) + fx_variable['variable_group'] = fx_info['short_name'] # add special ensemble for CMIP5 only if fx_variable['project'] == 'CMIP5': @@ -336,77 +341,69 @@ def _add_fxvar_keys(fx_var_dict, variable): return fx_variable -def _get_fx_file(variable, fx_variable, config_user): +def _search_fx_mip(tables, found_mip, variable, fx_info, config_user): + fx_files = None + for mip in tables: + fx_cmor = tables[mip].get(fx_info['short_name']) + if fx_cmor: + found_mip = True + fx_info['mip'] = mip + fx_info = _add_fxvar_keys(fx_info, variable) + logger.debug( + "For fx variable '%s', found table '%s'", + fx_info['short_name'], mip) + fx_files = _get_input_files(fx_info, config_user)[0] + if fx_files: + logger.debug( + "Found fx variables '%s':\n%s", + fx_info['short_name'], pformat(fx_files)) + return found_mip, fx_info, fx_files + + +def _get_fx_files(variable, fx_info, config_user): """Get fx files (searching all possible mips).""" - # make it a dict - if isinstance(fx_variable, str): - fx_varname = fx_variable - fx_variable = {'short_name': fx_varname} - else: - fx_varname = fx_variable['short_name'] # assemble info from master variable - var = dict(variable) var_project = variable['project'] # check if project in config-developer try: get_project_config(var_project) except ValueError: raise RecipeError( - f"Requested fx variable '{fx_varname}' with parent variable" - f"'{variable}' does not have a '{var_project}' project" - f"in config-developer.") - cmor_table = CMOR_TABLES[var_project] - valid_fx_vars = [] + f"Requested fx variable '{fx_info['short_name']}' " + f"with parent variable '{variable}' does not have " + f"a '{var_project}' project in config-developer.") + project_tables = CMOR_TABLES[var_project].tables # force only the mip declared by user - if 'mip' in fx_variable: - fx_mips = [fx_variable['mip']] + found_mip = False + if not fx_info['mip']: + found_mip, fx_info, fx_files = _search_fx_mip( + project_tables, found_mip, variable, fx_info, config_user) else: - # Get all fx-related mips (original var mip, - # 'fx' and extend from cmor tables) - fx_mips = [variable['mip']] - fx_mips.extend(mip for mip in cmor_table.tables if 'fx' in mip) - - # Search all mips for available variables - # priority goes to user specified mip if available - searched_mips = [] - fx_files = [] - for fx_mip in fx_mips: - fx_cmor_variable = cmor_table.get_variable(fx_mip, fx_varname) - if fx_cmor_variable is not None: - fx_var_dict = dict(fx_variable) - searched_mips.append(fx_mip) - fx_var_dict['mip'] = fx_mip - fx_var_dict = _add_fxvar_keys(fx_var_dict, var) - valid_fx_vars.append(fx_var_dict) - logger.debug("For fx variable '%s', found table '%s'", fx_varname, - fx_mip) - fx_files = _get_input_files(fx_var_dict, config_user)[0] - - # If files found, return them - if fx_files: - logger.debug("Found fx variables '%s':\n%s", fx_varname, - pformat(fx_files)) - break + fx_cmor = project_tables[fx_info['mip']].get(fx_info['short_name']) + if fx_cmor: + found_mip = True + fx_info = _add_fxvar_keys(fx_info, variable) + fx_files = _get_input_files(fx_info, config_user)[0] # If fx variable was not found in any table, raise exception - if not searched_mips: + if not found_mip: raise RecipeError( - f"Requested fx variable '{fx_varname}' not available in " - f"any 'fx'-related CMOR table ({fx_mips}) for '{var_project}'") + f"Requested fx variable '{fx_info['short_name']}' " + f"not available in any CMOR table for '{var_project}'") # flag a warning if not fx_files: - logger.warning("Missing data for fx variable '%s'", fx_varname) + logger.warning( + "Missing data for fx variable '%s'", fx_info['short_name']) # allow for empty lists corrected for by NE masks if fx_files: - fx_files = fx_files[0] - if valid_fx_vars: - valid_fx_vars = valid_fx_vars[0] + if fx_info['frequency'] == 'fx': + fx_files = fx_files[0] - return fx_files, valid_fx_vars + return fx_files, fx_info def _exclude_dataset(settings, variable, step): @@ -432,44 +429,82 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): """Update settings with mask fx file list or dict.""" if not fx_vars: return - - fx_vars = [_get_fx_file(variable, fxvar, config_user) for fxvar in fx_vars] - - fx_dict = {fx_var[1]['short_name']: fx_var[0] for fx_var in fx_vars} - settings['fx_variables'] = fx_dict - logger.info('Using fx_files: %s for variable %s during step %s', - pformat(settings['fx_variables']), variable['short_name'], - step_name) + for fx_var, fx_info in fx_vars.items(): + if not fx_info: + fx_info = {} + if 'mip' not in fx_info: + fx_info.update({'mip': None}) + if 'short_name' not in fx_info: + fx_info.update({'short_name': fx_var}) + fx_files, fx_info = _get_fx_files(variable, fx_info, config_user) + if fx_files: + fx_info['filename'] = fx_files + settings['add_fx_variables']['fx_variables'].update({ + fx_var: fx_info + }) + logger.info( + 'Using fx files for variable %s during step %s: %s', + variable['short_name'], step_name, pformat(fx_files)) + + +def _fx_list_to_dict(fx_vars): + """Convert fx list to dictionary. To be deprecated at some point.""" + user_fx_vars = {} + for fx_var in fx_vars: + if isinstance(fx_var, dict): + short_name = fx_var['short_name'] + user_fx_vars.update({short_name: fx_var}) + continue + user_fx_vars.update({fx_var: None}) + return user_fx_vars def _update_fx_settings(settings, variable, config_user): """Update fx settings depending on the needed method.""" - # get fx variables either from user defined attribute or fixed def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars = step_settings.get('fx_variables') + if isinstance(user_fx_vars, list): + user_fx_vars = _fx_list_to_dict(user_fx_vars) + step_settings['fx_variables'] = user_fx_vars if not user_fx_vars: - if step_name in ('mask_landsea', 'weighting_landsea_fraction'): - user_fx_vars = ['sftlf'] - if variable['project'] != 'obs4mips': - user_fx_vars.append('sftof') - elif step_name == 'mask_landseaice': - user_fx_vars = ['sftgif'] - elif step_name in ('area_statistics', 'volume_statistics', - 'zonal_statistics'): - user_fx_vars = [] - return user_fx_vars + default_fx = { + 'area_statistics': { + 'areacella': None, + 'areacello': None, + }, + 'mask_landsea': { + 'sftlf': None, + }, + 'mask_landseaice': { + 'sftgif': None, + }, + 'volume_statistics': { + 'volcello': None, + }, + 'weighting_landsea_fraction': { + 'sftlf': None, + }, + } + if variable['project'] != 'obs4mips': + default_fx['mask_landsea'].update({'sftof': None}) + default_fx['weighting_landsea_fraction'].update( + {'sftof': None}) + step_settings['fx_variables'] = default_fx[step_name] fx_steps = [ 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', - 'area_statistics', 'volume_statistics', 'zonal_statistics' + 'area_statistics', 'volume_statistics' ] - - for step_name, step_settings in settings.items(): + for step_name in settings: if step_name in fx_steps: - fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) - _update_fx_files(step_name, step_settings, variable, config_user, - fx_vars) + _get_fx_vars_from_attribute(settings[step_name], step_name) + _update_fx_files(step_name, settings, variable, config_user, + settings[step_name]['fx_variables']) + # Remove unused attribute in 'fx_steps' preprocessors. + # The fx_variables information is saved in + # the 'add_fx_variables' step. + settings[step_name].pop('fx_variables', None) def _read_attributes(filename): @@ -510,9 +545,7 @@ def _get_ancestors(variable, config_user): logger.info("Using input files for variable %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], '\n'.join(input_files)) - if (not config_user.get('skip-nonexistent') - or variable['dataset'] == variable.get('reference_dataset')): - check.data_availability(input_files, variable, dirnames, filenames) + check.data_availability(input_files, variable, dirnames, filenames) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -651,6 +684,16 @@ def get_matching(attributes): return grouped_products +def _allow_skipping(ancestors, variable, config_user): + """Allow skipping of datasets.""" + allow_skipping = all([ + config_user.get('skip-nonexistent'), + not ancestors, + variable['dataset'] != variable.get('reference_dataset'), + ]) + return allow_skipping + + def _get_preprocessor_products(variables, profile, order, ancestor_products, config_user, name): """Get preprocessor product definitions for a set of datasets. @@ -692,7 +735,7 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, try: ancestors = _get_ancestors(variable, config_user) except RecipeError as ex: - if config_user.get('skip-nonexistent') and not ancestors: + if _allow_skipping(ancestors, variable, config_user): logger.info("Skipping: %s", ex.message) else: missing_vars.add(ex.message) @@ -997,37 +1040,38 @@ def _initialize_datasets(raw_datasets): return datasets @staticmethod - def _expand_ensemble(variables): - """Expand ensemble members to multiple datasets. + def _expand_tag(variables, input_tag): + """ + Expand tags such as ensemble members or stardates to multiple datasets. Expansion only supports ensembles defined as strings, not lists. """ expanded = [] regex = re.compile(r'\(\d+:\d+\)') - def expand_ensemble(variable): - ens = variable.get('ensemble', "") - match = regex.search(ens) + def expand_tag(variable, input_tag): + tag = variable.get(input_tag, "") + match = regex.search(tag) if match: start, end = match.group(0)[1:-1].split(':') for i in range(int(start), int(end) + 1): expand = deepcopy(variable) - expand['ensemble'] = regex.sub(str(i), ens, 1) - expand_ensemble(expand) + expand[input_tag] = regex.sub(str(i), tag, 1) + expand_tag(expand, input_tag) else: expanded.append(variable) for variable in variables: - ensemble = variable.get('ensemble', "") - if isinstance(ensemble, (list, tuple)): - for elem in ensemble: + tag = variable.get(input_tag, "") + if isinstance(tag, (list, tuple)): + for elem in tag: if regex.search(elem): raise RecipeError( - f"In variable {variable}: ensemble expansion " - "cannot be combined with ensemble lists") + f"In variable {variable}: {input_tag} expansion " + f"cannot be combined with {input_tag} lists") expanded.append(variable) else: - expand_ensemble(variable) + expand_tag(variable, input_tag) return expanded @@ -1074,8 +1118,14 @@ def _initialize_variables(self, raw_variable, raw_datasets): activity = get_activity(variable) if activity: variable['activity'] = activity - check.variable(variable, required_keys) - variables = self._expand_ensemble(variables) + if 'sub_experiment' in variable: + subexperiment_keys = deepcopy(required_keys) + subexperiment_keys.update({'sub_experiment'}) + check.variable(variable, subexperiment_keys) + else: + check.variable(variable, required_keys) + variables = self._expand_tag(variables, 'ensemble') + variables = self._expand_tag(variables, 'sub_experiment') return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, @@ -1327,9 +1377,9 @@ def initialize_tasks(self): tasks.add(task) priority += 1 if failed_tasks: - ex = RecipeError('Could not create all tasks') - ex.failed_tasks.extend(failed_tasks) - raise ex + recipe_error = RecipeError('Could not create all tasks') + recipe_error.failed_tasks.extend(failed_tasks) + raise recipe_error check.tasks_valid(tasks) # Resolve diagnostic ancestors diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index dd9fd00193..1133a36639 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -56,7 +56,7 @@ def recipe_with_schema(filename): logger.debug("Checking recipe against schema %s", schema_file) recipe = yamale.make_data(filename) schema = yamale.make_schema(schema_file) - yamale.validate(schema, recipe) + yamale.validate(schema, recipe, strict=False) def diagnostics(diags): diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 1bd649ecdf..7e6ad4429d 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -13,8 +13,10 @@ import time from copy import deepcopy from multiprocessing import Pool +from multiprocessing.pool import ApplyResult from pathlib import Path, PosixPath from shutil import which +from typing import Dict, Type import psutil import yaml @@ -682,10 +684,10 @@ def _run_sequential(self) -> None: for task in sorted(tasks, key=lambda t: t.priority): task.run() - def _run_parallel(self, max_parallel_tasks: int = None) -> None: + def _run_parallel(self, max_parallel_tasks=None): """Run tasks in parallel.""" scheduled = self.flatten() - running = {} + running: Dict[Type[BaseTask], Type[ApplyResult]] = {} n_tasks = n_scheduled = len(scheduled) n_running = 0 diff --git a/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py b/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py index c61eeaa192..28593a9d21 100644 --- a/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py +++ b/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py @@ -1,68 +1,7 @@ """Fixes for bcc-csm1-1.""" -import numpy as np -from scipy.interpolate import InterpolatedUnivariateSpline -from scipy.ndimage import map_coordinates - -from ..common import ClFixHybridPressureCoord -from ..fix import Fix - +from ..common import ClFixHybridPressureCoord, OceanFixGrid Cl = ClFixHybridPressureCoord -class Tos(Fix): - """Fixes for tos.""" - - def fix_data(self, cube): - """Fix data. - - Calculate missing latitude/longitude boundaries using interpolation. - - Parameters - ---------- - cube: iris.cube.Cube - Input cube to fix. - - Returns - ------- - iris.cube.Cube - - """ - rlat = cube.coord('grid_latitude').points - rlon = cube.coord('grid_longitude').points - - # Transform grid latitude/longitude to array indices [0, 1, 2, ...] - rlat_to_idx = InterpolatedUnivariateSpline(rlat, - np.arange(len(rlat)), - k=1) - rlon_to_idx = InterpolatedUnivariateSpline(rlon, - np.arange(len(rlon)), - k=1) - rlat_idx_bnds = rlat_to_idx(cube.coord('grid_latitude').bounds) - rlon_idx_bnds = rlon_to_idx(cube.coord('grid_longitude').bounds) - - # Calculate latitude/longitude vertices by interpolation - lat_vertices = [] - lon_vertices = [] - for (i, j) in [(0, 0), (0, 1), (1, 1), (1, 0)]: - (rlat_v, rlon_v) = np.meshgrid(rlat_idx_bnds[:, i], - rlon_idx_bnds[:, j], - indexing='ij') - lat_vertices.append( - map_coordinates(cube.coord('latitude').points, - [rlat_v, rlon_v], - mode='nearest')) - lon_vertices.append( - map_coordinates(cube.coord('longitude').points, - [rlat_v, rlon_v], - mode='wrap')) - lat_vertices = np.array(lat_vertices) - lon_vertices = np.array(lon_vertices) - lat_vertices = np.moveaxis(lat_vertices, 0, -1) - lon_vertices = np.moveaxis(lon_vertices, 0, -1) - - # Copy vertices to cube - cube.coord('latitude').bounds = lat_vertices - cube.coord('longitude').bounds = lon_vertices - - return cube +Tos = OceanFixGrid diff --git a/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py b/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py index 8a1ab262a0..4794048f67 100644 --- a/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py +++ b/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py @@ -1,9 +1,7 @@ """Fixes for bcc-csm1-1-m.""" -from ..common import ClFixHybridPressureCoord -from .bcc_csm1_1 import Tos as BaseTos - +from ..common import ClFixHybridPressureCoord, OceanFixGrid Cl = ClFixHybridPressureCoord -Tos = BaseTos +Tos = OceanFixGrid diff --git a/esmvalcore/cmor/_fixes/cmip5/ec_earth.py b/esmvalcore/cmor/_fixes/cmip5/ec_earth.py index 86b5bc6dbe..1ef16df970 100644 --- a/esmvalcore/cmor/_fixes/cmip5/ec_earth.py +++ b/esmvalcore/cmor/_fixes/cmip5/ec_earth.py @@ -1,6 +1,7 @@ """Fixes for EC-Earth model.""" -from dask import array as da import iris +import numpy as np +from dask import array as da from ..fix import Fix from ..shared import add_scalar_height_coord, cube_to_aux_coord @@ -129,3 +130,76 @@ def fix_metadata(self, cubes): areacello.add_aux_coord(cube_to_aux_coord(lon), (0, 1)) return iris.cube.CubeList([areacello, ]) + + +class Pr(Fix): + """Fixes for pr.""" + + def fix_metadata(self, cubes): + """Fix time coordinate. + + Last file (2000-2009) has erroneously duplicated points + in time coordinate (e.g. [t1, t2, t3, t4, t2, t3, t4, t5]) + which should be removed except the last one which is correct. + + Parameters + ---------- + cubes : iris.cube.CubeList + Cubes to fix. + + Returns + ------- + iris.cube.CubeList + + """ + new_list = iris.cube.CubeList() + for cube in cubes: + try: + old_time = cube.coord('time') + except iris.exceptions.CoordinateNotFoundError: + new_list.append(cube) + else: + if old_time.is_monotonic(): + new_list.append(cube) + else: + time_units = old_time.units + time_data = old_time.points + + # erase erroneously copy-pasted points + time_diff = np.diff(time_data) + idx_neg = np.where(time_diff <= 0.)[0] + while len(idx_neg) > 0: + time_data = np.delete(time_data, idx_neg[0] + 1) + time_diff = np.diff(time_data) + idx_neg = np.where(time_diff <= 0.)[0] + + # create the new time coord + new_time = iris.coords.DimCoord(time_data, + standard_name='time', + var_name='time', + units=time_units) + + # create a new cube with the right shape + dims = (time_data.shape[0], + cube.coord('latitude').shape[0], + cube.coord('longitude').shape[0]) + data = cube.data + new_data = np.ma.append(data[:dims[0] - 1, :, :], + data[-1, :, :]) + new_data = new_data.reshape(dims) + + tmp_cube = iris.cube.Cube( + new_data, + standard_name=cube.standard_name, + long_name=cube.long_name, + var_name=cube.var_name, + units=cube.units, + attributes=cube.attributes, + cell_methods=cube.cell_methods, + dim_coords_and_dims=[(new_time, 0), + (cube.coord('latitude'), 1), + (cube.coord('longitude'), 2)]) + + new_list.append(tmp_cube) + + return new_list diff --git a/esmvalcore/cmor/_fixes/cmip5/miroc5.py b/esmvalcore/cmor/_fixes/cmip5/miroc5.py index ae5ec6ee16..4331652eb2 100644 --- a/esmvalcore/cmor/_fixes/cmip5/miroc5.py +++ b/esmvalcore/cmor/_fixes/cmip5/miroc5.py @@ -163,3 +163,7 @@ def fix_data(self, cube): """ cube.data = da.ma.masked_equal(cube.core_data(), 0.) return cube + + +class Pr(Tas): + """Fixes for pr.""" diff --git a/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py b/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py index e084fbebb5..38f96526e1 100644 --- a/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py +++ b/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py @@ -1,7 +1,5 @@ """Fixes for BCC-CSM2-MR model.""" -from ..cmip5.bcc_csm1_1 import Tos as BaseTos -from ..common import ClFixHybridPressureCoord - +from ..common import ClFixHybridPressureCoord, OceanFixGrid Cl = ClFixHybridPressureCoord @@ -12,57 +10,10 @@ Clw = ClFixHybridPressureCoord -class Tos(BaseTos): - """Fixes for tos.""" +Tos = OceanFixGrid - def fix_metadata(self, cubes): - """Rename ``var_name`` of 1D-``latitude`` and 1D-``longitude``. - Parameters - ---------- - cubes : iris.cube.CubeList - Input cubes. - Returns - ------- - iris.cube.CubeList - """ - cube = self.get_cube_from_list(cubes) - lat_coord = cube.coord('latitude', dimensions=(1, )) - lon_coord = cube.coord('longitude', dimensions=(2, )) - lat_coord.standard_name = None - lat_coord.long_name = 'grid_latitude' - lat_coord.var_name = 'i' - lat_coord.units = '1' - lon_coord.standard_name = None - lon_coord.long_name = 'grid_longitude' - lon_coord.var_name = 'j' - lon_coord.units = '1' - lon_coord.circular = False - return cubes +Siconc = OceanFixGrid -class Siconc(BaseTos): - """Fixes for siconc.""" - def fix_metadata(self, cubes): - """Rename ``var_name`` of 1D-``latitude`` and 1D-``longitude``. - Parameters - ---------- - cubes : iris.cube.CubeList - Input cubes. - Returns - ------- - iris.cube.CubeList - """ - cube = self.get_cube_from_list(cubes) - lat_coord = cube.coord('latitude', dimensions=(1, )) - lon_coord = cube.coord('longitude', dimensions=(2, )) - lat_coord.standard_name = None - lat_coord.long_name = 'grid_latitude' - lat_coord.var_name = 'i' - lat_coord.units = '1' - lon_coord.standard_name = None - lon_coord.long_name = 'grid_longitude' - lon_coord.var_name = 'j' - lon_coord.units = '1' - lon_coord.circular = False - return cubes +Sos = OceanFixGrid diff --git a/esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py b/esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py index cae45b3f2f..6a55b379a4 100644 --- a/esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py +++ b/esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py @@ -1,7 +1,5 @@ """Fixes for BCC-ESM1 model.""" -from ..common import ClFixHybridPressureCoord -from .bcc_csm2_mr import Tos as BaseTos - +from ..common import ClFixHybridPressureCoord, OceanFixGrid Cl = ClFixHybridPressureCoord @@ -12,4 +10,10 @@ Clw = ClFixHybridPressureCoord -Tos = BaseTos +Tos = OceanFixGrid + + +Sos = OceanFixGrid + + +Siconc = OceanFixGrid diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2.py index 9b4bc499f4..51bc04685e 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2.py @@ -4,14 +4,15 @@ import numpy as np from netCDF4 import Dataset +from ..common import SiconcFixScalarCoord from ..fix import Fix from ..shared import ( add_scalar_depth_coord, add_scalar_height_coord, add_scalar_typeland_coord, add_scalar_typesea_coord, + fix_ocean_depth_coord, ) -from .gfdl_esm4 import Siconc as Addtypesi class Cl(Fix): @@ -214,6 +215,9 @@ def fix_metadata(self, cubes): return cubes +Siconc = SiconcFixScalarCoord + + class Tos(Fix): """Fixes for tos.""" @@ -242,4 +246,32 @@ def fix_metadata(self, cubes): return cubes -Siconc = Addtypesi +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + + # Only points need to be fixed, not bounds + if z_coord.units == 'cm': + z_coord.points = z_coord.core_points() / 100.0 + z_coord.units = 'm' + + # Fix depth metadata + if z_coord.standard_name is None: + fix_ocean_depth_coord(cube) + return cubes diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py index 74dff9bc74..80f2e58849 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py @@ -1,6 +1,8 @@ """Fixes for CESM2-FV2 model.""" from .cesm2 import Cl as BaseCl +from .cesm2 import Fgco2 as BaseFgco2 from .cesm2 import Tas as BaseTas +from ..common import SiconcFixScalarCoord Cl = BaseCl @@ -12,4 +14,10 @@ Clw = Cl +Fgco2 = BaseFgco2 + + +Siconc = SiconcFixScalarCoord + + Tas = BaseTas diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py index 1bf837884f..d0014f308a 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py @@ -2,7 +2,9 @@ from netCDF4 import Dataset from .cesm2 import Cl as BaseCl +from .cesm2 import Fgco2 as BaseFgco2 from .cesm2 import Tas as BaseTas +from ..common import SiconcFixScalarCoord class Cl(BaseCl): @@ -47,4 +49,10 @@ def fix_file(self, filepath, output_dir): Clw = Cl +Fgco2 = BaseFgco2 + + +Siconc = SiconcFixScalarCoord + + Tas = BaseTas diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py index cd453d66a5..bc8068af8a 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py @@ -1,8 +1,11 @@ """Fixes for CESM2-WACCM-FV2 model.""" from .cesm2 import Tas as BaseTas +from .cesm2 import Fgco2 as BaseFgco2 from .cesm2_waccm import Cl as BaseCl from .cesm2_waccm import Cli as BaseCli from .cesm2_waccm import Clw as BaseClw +from ..common import SiconcFixScalarCoord + Cl = BaseCl @@ -13,4 +16,10 @@ Clw = BaseClw +Fgco2 = BaseFgco2 + + +Siconc = SiconcFixScalarCoord + + Tas = BaseTas diff --git a/esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py b/esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py index 25e2a22b74..0dbb7fc9b2 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py +++ b/esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py @@ -1,4 +1,8 @@ """Fixes for CNRM-ESM2-1 model.""" +from ..fix import Fix +from ..shared import (fix_ocean_depth_coord) + + from .cnrm_cm6_1 import Cl as BaseCl from .cnrm_cm6_1 import Clcalipso as BaseClcalipso from .cnrm_cm6_1 import Cli as BaseCli @@ -15,3 +19,27 @@ Clw = BaseClw + + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if z_coord.standard_name is None: + fix_ocean_depth_coord(cube) + return cubes diff --git a/esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py b/esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py index c752ca2076..69a6b6375b 100644 --- a/esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py +++ b/esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py @@ -2,7 +2,6 @@ from ..cmip5.fgoals_g2 import Cl as BaseCl from ..common import OceanFixGrid - Cl = BaseCl @@ -12,7 +11,31 @@ Clw = BaseCl -Tos = OceanFixGrid +class Tos(OceanFixGrid): + """Fixes for tos.""" + + def fix_metadata(self, cubes): + """Fix metadata. + + FGOALS-g3 data contain latitude and longitude data set to >1e30 in some + places. + + Parameters + ---------- + cubes : iris.cube.CubeList + Input cubes. + + Returns + ------- + iris.cube.CubeList + + """ + cube = self.get_cube_from_list(cubes) + cube.coord('latitude').points[ + cube.coord('latitude').points > 1000.0] = 0.0 + cube.coord('longitude').points[ + cube.coord('longitude').points > 1000.0] = 0.0 + return super().fix_metadata(cubes) -Siconc = OceanFixGrid +Siconc = Tos diff --git a/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py b/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py index ce61e7fab6..35baf68a1f 100644 --- a/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py +++ b/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py @@ -1,7 +1,7 @@ """Fixes for GFDL-CM4 model.""" import iris -from ..common import ClFixHybridPressureCoord +from ..common import ClFixHybridPressureCoord, SiconcFixScalarCoord from ..fix import Fix from ..shared import add_aux_coords_from_cubes, add_scalar_height_coord @@ -38,6 +38,9 @@ def fix_metadata(self, cubes): Clw = Cl +Siconc = SiconcFixScalarCoord + + class Tas(Fix): """Fixes for tas.""" diff --git a/esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py b/esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py index a4e937842e..c2610a318b 100644 --- a/esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py +++ b/esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py @@ -1,14 +1,38 @@ """Fixes for GFDL-ESM4 model.""" -import iris +from ..common import SiconcFixScalarCoord from ..fix import Fix +from ..shared import ( + add_scalar_depth_coord, + fix_ocean_depth_coord, +) -class Siconc(Fix): - """Fixes for siconc.""" +class Fgco2(Fix): + """Fixes for fgco2.""" def fix_metadata(self, cubes): + """Add depth (0m) coordinate. + + Parameters + ---------- + cubes : iris.cube.CubeList + Input cubes. + + Returns + ------- + iris.cube.CubeList + """ - Fix missing type. + cube = self.get_cube_from_list(cubes) + add_scalar_depth_coord(cube) + return cubes + + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. Parameters ---------- @@ -20,13 +44,12 @@ def fix_metadata(self, cubes): iris.cube.CubeList """ - typesi = iris.coords.AuxCoord( - 'siconc', - standard_name='area_type', - long_name='Sea Ice area type', - var_name='type', - units='1', - bounds=None) for cube in cubes: - cube.add_aux_coord(typesi) + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if z_coord.standard_name is None: + fix_ocean_depth_coord(cube) return cubes + + +Siconc = SiconcFixScalarCoord diff --git a/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py b/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py index 97d36e92b5..dfd7116275 100644 --- a/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py +++ b/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py @@ -1,9 +1,8 @@ """Fixes for IPSL-CM6A-LR model.""" from iris.cube import CubeList -from iris.coords import AuxCoord -from iris.exceptions import ConstraintMismatchError from ..fix import Fix +from ..shared import fix_ocean_depth_coord class AllVars(Fix): @@ -23,27 +22,12 @@ def fix_metadata(self, cubes): iris.cube.CubeList """ - try: - cell_area = cubes.extract_cube('cell_area') - except ConstraintMismatchError: - return cubes - - cell_area = AuxCoord( - cell_area.data, - standard_name=cell_area.standard_name, - long_name=cell_area.long_name, - var_name=cell_area.var_name, - units=cell_area.units, - ) - new_list = CubeList() - for cube in cubes: - if cube.name() == 'cell_area': - continue - cube.add_aux_coord(cell_area, cube.coord_dims('latitude')) + cube = self.get_cube_from_list(cubes) + if cube.coords('latitude'): cube.coord('latitude').var_name = 'lat' + if cube.coords('longitude'): cube.coord('longitude').var_name = 'lon' - new_list.append(cube) - return CubeList(new_list) + return CubeList([cube]) class Clcalipso(Fix): @@ -68,3 +52,27 @@ def fix_metadata(self, cubes): alt_40_coord.standard_name = 'altitude' alt_40_coord.var_name = 'alt40' return CubeList([cube]) + + +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if z_coord.var_name == 'olevel': + fix_ocean_depth_coord(cube) + return cubes diff --git a/esmvalcore/cmor/_fixes/cmip6/kiost_esm.py b/esmvalcore/cmor/_fixes/cmip6/kiost_esm.py index 9f98e319fd..518a82e541 100644 --- a/esmvalcore/cmor/_fixes/cmip6/kiost_esm.py +++ b/esmvalcore/cmor/_fixes/cmip6/kiost_esm.py @@ -1,4 +1,5 @@ """Fixes for KIOST-ESM model.""" +from ..common import SiconcFixScalarCoord from ..fix import Fix from ..shared import add_scalar_height_coord @@ -57,3 +58,6 @@ class Uas(SfcWind): class Vas(SfcWind): """Fixes for vas.""" + + +Siconc = SiconcFixScalarCoord diff --git a/esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py b/esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py index 99cf06b688..c31a5e7c51 100644 --- a/esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py +++ b/esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py @@ -1,8 +1,10 @@ """Fixes for MCM-UA-1-0 model.""" import iris +import numpy as np +from dask import array as da from ..fix import Fix -from ..shared import add_scalar_height_coord +from ..shared import add_scalar_height_coord, fix_ocean_depth_coord def strip_cube_metadata(cube): @@ -59,18 +61,54 @@ def fix_metadata(self, cubes): for cube in cubes: coord_names = [cor.standard_name for cor in cube.coords()] if 'longitude' in coord_names: - if cube.coord('longitude').ndim == 1 and \ - cube.coord('longitude').has_bounds(): - lon_bnds = cube.coord('longitude').bounds.copy() - if cube.coord('longitude').points[0] == 0. and \ - cube.coord('longitude').points[-1] == 356.25 and \ + lon_coord = cube.coord('longitude') + if lon_coord.ndim == 1 and lon_coord.has_bounds(): + lon_bnds = lon_coord.bounds.copy() + # atmos & land + if lon_coord.points[0] == 0. and \ + lon_coord.points[-1] == 356.25 and \ lon_bnds[-1][-1] == 360.: lon_bnds[-1][-1] = 358.125 - cube.coord('longitude').bounds = lon_bnds + lon_coord.bounds = lon_bnds + # ocean & seaice + if lon_coord.points[0] == -0.9375: + lon_dim = cube.coord_dims('longitude')[0] + cube.data = da.roll(cube.core_data(), -1, axis=lon_dim) + lon_points = np.roll(lon_coord.core_points(), -1) + lon_bounds = np.roll(lon_coord.core_bounds(), -1, + axis=0) + lon_points[-1] += 360.0 + lon_bounds[-1] += 360.0 + lon_coord.points = lon_points + lon_coord.bounds = lon_bounds return cubes +class Omon(Fix): + """Fixes for ocean variables.""" + + def fix_metadata(self, cubes): + """Fix ocean depth coordinate. + + Parameters + ---------- + cubes: iris CubeList + List of cubes to fix + + Returns + ------- + iris.cube.CubeList + + """ + for cube in cubes: + if cube.coords(axis='Z'): + z_coord = cube.coord(axis='Z') + if z_coord.standard_name is None: + fix_ocean_depth_coord(cube) + return cubes + + class Tas(Fix): """Fixes for tas.""" @@ -84,9 +122,30 @@ def fix_metadata(self, cubes): Returns ------- - iris.cube.Cube + iris.cube.CubeList """ cube = self.get_cube_from_list(cubes) add_scalar_height_coord(cube, 2.0) - return [cube] + return cubes + + +class Uas(Fix): + """Fixes for uas.""" + + def fix_metadata(self, cubes): + """Add height (10m) coordinate. + + Parameters + ---------- + cubes : iris.cube.CubeList + Cubes to fix. + + Returns + ------- + iris.cube.CubeList + + """ + cube = self.get_cube_from_list(cubes) + add_scalar_height_coord(cube, 10.0) + return cubes diff --git a/esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py b/esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py index 696574b9a4..dc0aa1ccb8 100644 --- a/esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py +++ b/esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py @@ -1,5 +1,6 @@ """Fixes for SAM0-UNICON model.""" from ..common import ClFixHybridPressureCoord +from ..fix import Fix Cl = ClFixHybridPressureCoord @@ -9,3 +10,27 @@ Clw = ClFixHybridPressureCoord + + +class Nbp(Fix): + """Fixes for nbp.""" + + def fix_data(self, cube): + """Fix data. + + Fixes wrong sign for land surface flux. Tested for v20190323. + + Parameters + ---------- + cube : iris.cube.Cube + Input cube. + + Returns + ------- + iris.cube.Cube + + """ + metadata = cube.metadata + cube *= -1 + cube.metadata = metadata + return cube diff --git a/esmvalcore/cmor/_fixes/common.py b/esmvalcore/cmor/_fixes/common.py index 2e3165dc33..90c64162e7 100644 --- a/esmvalcore/cmor/_fixes/common.py +++ b/esmvalcore/cmor/_fixes/common.py @@ -1,10 +1,18 @@ """Common fixes used for multiple datasets.""" +import logging + import iris import numpy as np from scipy.ndimage import map_coordinates from .fix import Fix -from .shared import add_plev_from_altitude, fix_bounds +from .shared import ( + add_plev_from_altitude, + add_scalar_typesi_coord, + fix_bounds, +) + +logger = logging.getLogger(__name__) class ClFixHybridHeightCoord(Fix): @@ -108,49 +116,74 @@ def fix_metadata(self, cubes): class OceanFixGrid(Fix): """Fixes for tos, siconc in FGOALS-g3.""" - def fix_data(self, cube): - """ - Fix data. - - Calculate missing latitude/longitude boundaries using interpolation. - Based on a similar fix for BCC-CSM2-MR. + def fix_metadata(self, cubes): + """Fix ``latitude`` and ``longitude`` (metadata and bounds). Parameters ---------- - cube: iris.cube.Cube - Input cube to fix. + cubes : iris.cube.CubeList + Input cubes. Returns ------- - iris.cube.Cube + iris.cube.CubeList + """ - rlat = cube.coord('grid_latitude').points - rlon = cube.coord('grid_longitude').points - - # Guess coordinate bounds in rlat, rlon (following BCC-CSM2-MR-1). - rlat_idx_bnds = np.zeros((len(rlat), 2)) - rlat_idx_bnds[:, 0] = np.arange(len(rlat)) - 0.5 - rlat_idx_bnds[:, 1] = np.arange(len(rlat)) + 0.5 - rlat_idx_bnds[0, 0] = 0. - rlat_idx_bnds[len(rlat) - 1, 1] = len(rlat) - rlon_idx_bnds = np.zeros((len(rlon), 2)) - rlon_idx_bnds[:, 0] = np.arange(len(rlon)) - 0.5 - rlon_idx_bnds[:, 1] = np.arange(len(rlon)) + 0.5 - - # Calculate latitude/longitude vertices by interpolation + cube = self.get_cube_from_list(cubes) + if cube.ndim != 3: + logger.warning( + "OceanFixGrid is designed to work on any data with an " + "irregular ocean grid, but it was only tested on 3D (time, " + "latitude, longitude) data so far; got %dD data", cube.ndim) + + # Get dimensional coordinates. Note: + # - First dimension i -> X-direction (= longitude) + # - Second dimension j -> Y-direction (= latitude) + (j_dim, i_dim) = sorted(set( + cube.coord_dims(cube.coord('latitude', dim_coords=False)) + + cube.coord_dims(cube.coord('longitude', dim_coords=False)) + )) + i_coord = cube.coord(dim_coords=True, dimensions=i_dim) + j_coord = cube.coord(dim_coords=True, dimensions=j_dim) + + # Fix metadata of coordinate i + i_coord.var_name = 'i' + i_coord.standard_name = None + i_coord.long_name = 'cell index along first dimension' + i_coord.units = '1' + i_coord.circular = False + + # Fix metadata of coordinate j + j_coord.var_name = 'j' + j_coord.standard_name = None + j_coord.long_name = 'cell index along second dimension' + j_coord.units = '1' + + # Fix points and bounds of index coordinates i and j + for idx_coord in (i_coord, j_coord): + idx_coord.points = np.arange(len(idx_coord.points)) + idx_coord.bounds = None + idx_coord.guess_bounds() + + # Calculate latitude/longitude vertices by interpolation. + # Following the CF conventions (see + # cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries) + # we go counter-clockwise around the cells and construct a grid of + # index values which are in turn used to interpolate longitudes and + # latitudes in the midpoints between the cell centers. lat_vertices = [] lon_vertices = [] - for (i, j) in [(0, 0), (0, 1), (1, 1), (1, 0)]: - (rlat_v, rlon_v) = np.meshgrid(rlat_idx_bnds[:, i], - rlon_idx_bnds[:, j], - indexing='ij') + for (j, i) in [(0, 0), (0, 1), (1, 1), (1, 0)]: + (j_v, i_v) = np.meshgrid(j_coord.bounds[:, j], + i_coord.bounds[:, i], + indexing='ij') lat_vertices.append( map_coordinates(cube.coord('latitude').points, - [rlat_v, rlon_v], + [j_v, i_v], mode='nearest')) lon_vertices.append( map_coordinates(cube.coord('longitude').points, - [rlat_v, rlon_v], + [j_v, i_v], mode='wrap')) lat_vertices = np.array(lat_vertices) lon_vertices = np.array(lon_vertices) @@ -160,11 +193,15 @@ def fix_data(self, cube): # Copy vertices to cube cube.coord('latitude').bounds = lat_vertices cube.coord('longitude').bounds = lon_vertices - return cube + + return iris.cube.CubeList([cube]) + + +class SiconcFixScalarCoord(Fix): + """Fixes for siconc.""" def fix_metadata(self, cubes): - """ - Rename ``var_name`` of 1D-``latitude`` and 1D-``longitude``. + """Add typesi coordinate. Parameters ---------- @@ -174,25 +211,8 @@ def fix_metadata(self, cubes): Returns ------- iris.cube.CubeList + """ cube = self.get_cube_from_list(cubes) - lat_coord = cube.coord('cell index along second dimension', - dimensions=(1, )) - lon_coord = cube.coord('cell index along first dimension', - dimensions=(2, )) - lat_coord.standard_name = None - lat_coord.long_name = 'grid_latitude' - lat_coord.var_name = 'i' - lat_coord.units = '1' - lon_coord.standard_name = None - lon_coord.long_name = 'grid_longitude' - lon_coord.var_name = 'j' - lon_coord.units = '1' - lon_coord.circular = False - # FGOALS-g3 data contain latitude and longitude data set to - # >1e30 in some places. Set to 0. to avoid problem in check.py. - cube.coord('latitude').points[cube.coord('latitude').points > 1000.]\ - = 0. - cube.coord('longitude').points[cube.coord('longitude').points > 1000.]\ - = 0. - return cubes + add_scalar_typesi_coord(cube) + return iris.cube.CubeList([cube]) diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 12aee7b9a0..4595c3914f 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -15,7 +15,7 @@ def __init__(self, vardef): Parameters ---------- - vardef: basestring + vardef: str CMOR table entry """ @@ -31,14 +31,14 @@ def fix_file(self, filepath, output_dir): Parameters ---------- - filepath: basestring + filepath: str file to fix - output_dir: basestring + output_dir: str path to the folder to store the fixe files, if required Returns ------- - basestring + str Path to the corrected file. It can be different from the original filepath if a fix has been applied, but if not it should be the original filepath @@ -162,7 +162,7 @@ def get_fixes(project, dataset, mip, short_name): classes = inspect.getmembers(fixes_module, inspect.isclass) classes = dict((name.lower(), value) for name, value in classes) - for fix_name in (short_name, 'allvars'): + for fix_name in (short_name, mip.lower(), 'allvars'): try: fixes.append(classes[fix_name](vardef)) except KeyError: diff --git a/esmvalcore/cmor/_fixes/shared.py b/esmvalcore/cmor/_fixes/shared.py index 62b9c76c8b..d5c74c095a 100644 --- a/esmvalcore/cmor/_fixes/shared.py +++ b/esmvalcore/cmor/_fixes/shared.py @@ -361,6 +361,21 @@ def add_scalar_typesea_coord(cube, value='default'): return cube +def add_scalar_typesi_coord(cube, value='sea_ice'): + """Add scalar coordinate 'typesi' with value of `value`.""" + logger.debug("Adding typesi coordinate (%s)", value) + typesi_coord = iris.coords.AuxCoord(value, + var_name='type', + standard_name='area_type', + long_name='Sea Ice area type', + units=Unit('no unit')) + try: + cube.coord('area_type') + except iris.exceptions.CoordinateNotFoundError: + cube.add_aux_coord(typesi_coord, ()) + return cube + + def cube_to_aux_coord(cube): """Convert cube to iris AuxCoord.""" return iris.coords.AuxCoord( @@ -517,3 +532,20 @@ def round_coordinates(cubes, decimals=5, coord_names=None): coord.bounds = da.round(da.asarray(coord.core_bounds()), decimals) return cubes + + +def fix_ocean_depth_coord(cube): + """Fix attributes of ocean vertical level coordinate. + + Parameters + ---------- + cube : iris.cube.Cube + Input cube. + + """ + depth_coord = cube.coord(axis='Z') + depth_coord.standard_name = 'depth' + depth_coord.var_name = 'lev' + depth_coord.units = 'm' + depth_coord.long_name = 'ocean depth coordinate' + depth_coord.attributes = {'positive': 'down'} diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 131de15209..48a5113d35 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -527,9 +527,7 @@ def _check_coord(self, cmor, coord, var_name): if not fixed: self.report_critical(self._attr_msg, var_name, 'units', cmor.units, coord.units) - self._check_coord_values(cmor, coord, var_name) - self._check_coord_bounds(cmor, coord, var_name) - self._check_coord_monotonicity_and_direction(cmor, coord, var_name) + self._check_coord_points(cmor, coord, var_name) def _check_coord_bounds(self, cmor, coord, var_name): if cmor.must_have_bounds == 'yes' and not coord.has_bounds(): @@ -605,9 +603,19 @@ def _reverse_coord(self, coord): if coord.ndim == 1: self._cube = iris.util.reverse(self._cube, self._cube.coord_dims(coord)) - - def _check_coord_values(self, coord_info, coord, var_name): - """Check coordinate values.""" + reversed_coord = self._cube.coord(var_name=coord.var_name) + if reversed_coord.has_bounds(): + bounds = reversed_coord.bounds + right_bounds = bounds[:-2, 1] + left_bounds = bounds[1:-1, 0] + if np.all(right_bounds != left_bounds): + reversed_coord.bounds = np.fliplr(bounds) + coord = reversed_coord + self.report_debug_message(f'Coordinate {coord.var_name} values' + 'have been reversed.') + + def _check_coord_points(self, coord_info, coord, var_name): + """Check coordinate points: values, bounds and monotonicity.""" # Check requested coordinate values exist in coord.points self._check_requested_values(coord, coord_info, var_name) @@ -655,6 +663,10 @@ def _check_coord_values(self, coord_info, coord, var_name): dims = self._cube.coord_dims(coord) self._cube.remove_coord(coord) self._cube.add_aux_coord(new_coord, dims) + coord = self._cube.coord(var_name=var_name) + self._check_coord_bounds(coord_info, coord, var_name) + self._check_coord_monotonicity_and_direction(coord_info, coord, + var_name) def _check_longitude_max(self, coord, var_name): if np.any(coord.points > 720): @@ -969,13 +981,13 @@ def cmor_check_metadata(cube, ---------- cube: iris.cube.Cube Data cube to check. - cmor_table: basestring + cmor_table: str CMOR definitions to use. mip: Variable's mip. - short_name: basestring + short_name: str Variable's short name. - frequency: basestring + frequency: str Data frequency. check_level: CheckLevels Level of strictness of the checks. @@ -1003,13 +1015,13 @@ def cmor_check_data(cube, ---------- cube: iris.cube.Cube Data cube to check. - cmor_table: basestring + cmor_table: str CMOR definitions to use. mip: Variable's mip. - short_name: basestring + short_name: str Variable's short name - frequency: basestring + frequency: str Data frequency check_level: CheckLevels Level of strictness of the checks. @@ -1033,13 +1045,13 @@ def cmor_check(cube, cmor_table, mip, short_name, frequency, check_level): ---------- cube: iris.cube.Cube Data cube to check. - cmor_table: basestring + cmor_table: str CMOR definitions to use. mip: Variable's mip. - short_name: basestring + short_name: str Variable's short name. - frequency: basestring + frequency: str Data frequency. check_level: enum.IntEnum Level of strictness of the checks. diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index a1fc3a5903..4c5bd2e1ff 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -12,12 +12,13 @@ from collections import Counter from functools import total_ordering from pathlib import Path +from typing import Dict, Type import yaml logger = logging.getLogger(__name__) -CMOR_TABLES = {} +CMOR_TABLES: Dict[str, Type['InfoBase']] = {} """dict of str, obj: CMOR info objects.""" @@ -127,7 +128,7 @@ def get_table(self, table): Parameters ---------- - table: basestring + table: str Table name Returns @@ -143,9 +144,9 @@ def get_variable(self, table_name, short_name, derived=False): Parameters ---------- - table_name: basestring + table_name: str Table name - short_name: basestring + short_name: str Variable's short name derived: bool, optional Variable is derived. Info retrieval for derived variables always @@ -226,7 +227,7 @@ class CMIP6Info(InfoBase): Parameters ---------- - cmor_tables_path: basestring + cmor_tables_path: str Path to the folder containing the Tables folder with the json files default: object @@ -366,7 +367,7 @@ def get_table(self, table): Parameters ---------- - table: basestring + table: str Table name Returns @@ -615,7 +616,7 @@ class CMIP5Info(InfoBase): Parameters ---------- - cmor_tables_path: basestring + cmor_tables_path: str Path to the folder containing the Tables folder with the json files default: object @@ -756,7 +757,7 @@ def get_table(self, table): Parameters ---------- - table: basestring + table: str Table name Returns @@ -773,7 +774,7 @@ class CMIP3Info(CMIP5Info): Parameters ---------- - cmor_tables_path: basestring + cmor_tables_path: str Path to the folder containing the Tables folder with the json files default: object @@ -810,7 +811,7 @@ class CustomInfo(CMIP5Info): Parameters ---------- - cmor_tables_path: basestring or None + cmor_tables_path: str or None Full path to the table or name for the table if it is present in ESMValTool repository """ @@ -847,9 +848,9 @@ def get_variable(self, table, short_name, derived=False): Parameters ---------- - table: basestring + table: str Table name - short_name: basestring + short_name: str Variable's short name derived: bool, optional Variable is derived. Info retrieval for derived variables always diff --git a/esmvalcore/cmor/tables/custom/CMOR_tasaga.dat b/esmvalcore/cmor/tables/custom/CMOR_tasaga.dat new file mode 100644 index 0000000000..f62d34c5e4 --- /dev/null +++ b/esmvalcore/cmor/tables/custom/CMOR_tasaga.dat @@ -0,0 +1,25 @@ +SOURCE: CMIP5 (adapted from tas) +!============ +variable_entry: tasaga +!============ +modeling_realm: atmos +!---------------------------------- +! Variable attributes: +!---------------------------------- +standard_name: +units: K +cell_methods: time: mean +cell_measures: area: areacella +long_name: Global-mean Near-Surface Air Temperature Anomaly +!---------------------------------- +! Additional variable information: +!---------------------------------- +dimensions: time +out_name: tasaga +type: real +valid_min: -20.0 +valid_max: 20.0 +ok_min_mean_abs: -20 +ok_max_mean_abs: 20. +!---------------------------------- +! diff --git a/esmvalcore/cmor/variable_alt_names.yml b/esmvalcore/cmor/variable_alt_names.yml index 787a20c99a..e0416797eb 100644 --- a/esmvalcore/cmor/variable_alt_names.yml +++ b/esmvalcore/cmor/variable_alt_names.yml @@ -9,7 +9,6 @@ ############################################################################### --- - ['sic', 'siconc'] -- ['sit', 'sithick'] - ['tro3', 'o3'] - ['usi', 'siu'] -- ['vsi', 'siv'] \ No newline at end of file +- ['vsi', 'siv'] diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml index a0a242e6a1..19b00482da 100644 --- a/esmvalcore/config-developer.yml +++ b/esmvalcore/config-developer.yml @@ -25,6 +25,7 @@ CMIP6: BADC: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}' DKRZ: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}' ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/{grid}/' + SYNDA: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}' input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc' output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}' cmor_type: 'CMIP6' @@ -40,6 +41,7 @@ CMIP5: SMHI: '{dataset}/{ensemble}/{exp}/{frequency}' RCAST: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/' BSC: '{type}/{project}/{exp}/{dataset.lower}' + SYNDA: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{latestversion}' input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc' output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}' institutes: diff --git a/esmvalcore/experimental/_logging.py b/esmvalcore/experimental/_logging.py index 02c64491d5..206376c9c0 100644 --- a/esmvalcore/experimental/_logging.py +++ b/esmvalcore/experimental/_logging.py @@ -2,10 +2,11 @@ import logging from contextlib import contextmanager +from pathlib import Path @contextmanager -def log_to_dir(drc: str) -> None: +def log_to_dir(drc: Path): """Log messages to the specified directory. This is a context manager to temporarily redirect the logging when diff --git a/esmvalcore/experimental/_warnings.py b/esmvalcore/experimental/_warnings.py index 9efa752e84..b31eb78c20 100644 --- a/esmvalcore/experimental/_warnings.py +++ b/esmvalcore/experimental/_warnings.py @@ -3,12 +3,7 @@ import warnings -def _warning_formatter(message, - category, - filename, - lineno, - file=None, - line=None): +def _warning_formatter(message, category, filename, lineno, line=None): """Patch warning formatting to not mention itself.""" return f'{filename}:{lineno}: {category.__name__}: {message}\n' diff --git a/esmvalcore/experimental/config/_config_object.py b/esmvalcore/experimental/config/_config_object.py index bea358f773..7f836d694a 100644 --- a/esmvalcore/experimental/config/_config_object.py +++ b/esmvalcore/experimental/config/_config_object.py @@ -1,7 +1,9 @@ """Importable config object.""" +import os from datetime import datetime from pathlib import Path +from typing import Union import yaml @@ -28,7 +30,9 @@ class Config(ValidatedConfig): ) @classmethod - def _load_user_config(cls, filename: str, raise_exception: bool = True): + def _load_user_config(cls, + filename: Union[os.PathLike, str], + raise_exception: bool = True): """Load user configuration from the given file. The config is cleared and updated in-place. @@ -61,7 +65,7 @@ def _load_user_config(cls, filename: str, raise_exception: bool = True): return new @classmethod - def _load_default_config(cls, filename: str): + def _load_default_config(cls, filename: Union[os.PathLike, str]): """Load the default configuration.""" new = cls() @@ -70,7 +74,7 @@ def _load_default_config(cls, filename: str): return new - def load_from_file(self, filename): + def load_from_file(self, filename: Union[os.PathLike, str]): """Load user configuration from the given file.""" path = Path(filename).expanduser() if not path.exists(): @@ -130,7 +134,7 @@ class Session(ValidatedConfig): def __init__(self, config: dict, name: str = 'session'): super().__init__(config) - self.session_name = None + self.session_name: Union[str, None] = None self.set_session_name(name) def set_session_name(self, name: str = 'session'): diff --git a/esmvalcore/experimental/config/_validated_config.py b/esmvalcore/experimental/config/_validated_config.py index e3a5f1d26f..bed3f5e3c5 100644 --- a/esmvalcore/experimental/config/_validated_config.py +++ b/esmvalcore/experimental/config/_validated_config.py @@ -3,6 +3,7 @@ import pprint import warnings from collections.abc import MutableMapping +from typing import Callable, Dict, Tuple from .._exceptions import SuppressedError from ._config_validators import ValidationError @@ -20,15 +21,16 @@ class MissingConfigParameter(UserWarning): # fit the needs of ESMValCore. Matplotlib is licenced under the terms of # the the 'Python Software Foundation License' # (https://www.python.org/psf/license) -class ValidatedConfig(MutableMapping, dict): +class ValidatedConfig(MutableMapping): """Based on `matplotlib.rcParams`.""" - _validate = {} - _warn_if_missing = () + _validate: Dict[str, Callable] = {} + _warn_if_missing: Tuple[Tuple[str, str], ...] = () # validate values on the way in def __init__(self, *args, **kwargs): super().__init__() + self._mapping = {} self.update(*args, **kwargs) def __setitem__(self, key, val): @@ -41,36 +43,37 @@ def __setitem__(self, key, val): raise InvalidConfigParameter( f"`{key}` is not a valid config parameter.") from None - dict.__setitem__(self, key, cval) + self._mapping[key] = cval def __getitem__(self, key): """Return value mapped by key.""" - return dict.__getitem__(self, key) + return self._mapping[key] def __repr__(self): """Return canonical string representation.""" class_name = self.__class__.__name__ indent = len(class_name) + 1 - repr_split = pprint.pformat(dict(self), indent=1, + repr_split = pprint.pformat(self._mapping, indent=1, width=80 - indent).split('\n') repr_indented = ('\n' + ' ' * indent).join(repr_split) return '{}({})'.format(class_name, repr_indented) def __str__(self): """Return string representation.""" - return '\n'.join(map('{0[0]}: {0[1]}'.format, sorted(self.items()))) + return '\n'.join( + map('{0[0]}: {0[1]}'.format, sorted(self._mapping.items()))) def __iter__(self): """Yield sorted list of keys.""" - yield from sorted(dict.__iter__(self)) + yield from sorted(self._mapping) def __len__(self): """Return number of config keys.""" - return dict.__len__(self) + return len(self._mapping) def __delitem__(self, key): """Delete key/value from config.""" - dict.__delitem__(self, key) + del self._mapping[key] def check_missing(self): """Check and warn for missing variables.""" @@ -82,8 +85,8 @@ def check_missing(self): def copy(self): """Copy the keys/values of this object to a dict.""" - return {k: dict.__getitem__(self, k) for k in self} + return {k: self._mapping[k] for k in self} def clear(self): """Clear Config.""" - dict.clear(self) + self._mapping.clear(self) diff --git a/esmvalcore/experimental/recipe.py b/esmvalcore/experimental/recipe.py index 3cca24801c..c237164ed6 100644 --- a/esmvalcore/experimental/recipe.py +++ b/esmvalcore/experimental/recipe.py @@ -1,13 +1,16 @@ """Recipe metadata.""" import logging +import os import pprint import shutil from pathlib import Path +from typing import Dict, Optional import yaml from esmvalcore._recipe import Recipe as RecipeEngine +from esmvalcore.experimental.config import Session from . import CFG from ._logging import log_to_dir @@ -28,14 +31,14 @@ class Recipe(): Path to the recipe. """ - def __init__(self, path: str): + def __init__(self, path: os.PathLike): self.path = Path(path) if not self.path.exists(): raise FileNotFoundError(f'Cannot find recipe: `{path}`.') - self._engine = None - self._data = None - self.last_session = None + self._engine: Optional[RecipeEngine] = None + self._data: Optional[Dict] = None + self.last_session: Optional[Session] = None self.info = RecipeInfo(self.data, filename=self.path.name) def __repr__(self) -> str: @@ -71,7 +74,7 @@ def data(self) -> dict: self._data = yaml.safe_load(open(self.path, 'r')) return self._data - def _load(self, session: dict): + def _load(self, session: Session) -> RecipeEngine: """Load the recipe. This method loads the recipe into the internal ESMValCore Recipe @@ -93,11 +96,11 @@ def _load(self, session: dict): logger.info(pprint.pformat(config_user)) - self._engine = RecipeEngine(raw_recipe=self.data, - config_user=config_user, - recipe_file=self.path) + return RecipeEngine(raw_recipe=self.data, + config_user=config_user, + recipe_file=self.path) - def run(self, task: str = None, session: dict = None): + def run(self, task: str = None, session: Session = None): """Run the recipe. This function loads the recipe into the ESMValCore recipe format @@ -119,7 +122,7 @@ def run(self, task: str = None, session: dict = None): Returns output of the recipe as instances of :obj:`OutputItem` grouped by diagnostic task. """ - if not session: + if session is None: session = CFG.start_session(self.path.stem) self.last_session = session @@ -128,7 +131,7 @@ def run(self, task: str = None, session: dict = None): session['diagnostics'] = task with log_to_dir(session.run_dir): - self._load(session=session) + self._engine = self._load(session=session) self._engine.run() shutil.copy2(self.path, session.run_dir) @@ -138,7 +141,7 @@ def run(self, task: str = None, session: dict = None): return output - def get_output(self) -> dict: + def get_output(self) -> RecipeOutput: """Get output from recipe. Returns @@ -147,7 +150,7 @@ def get_output(self) -> dict: Returns output of the recipe as instances of :obj:`OutputFile` grouped by diagnostic task. """ - if not self._engine: + if self._engine is None: raise AttributeError('Run the recipe first using `.run()`.') output = self._engine.get_output() diff --git a/esmvalcore/experimental/recipe_info.py b/esmvalcore/experimental/recipe_info.py index 04aab5ebd8..2fe0a9dc38 100644 --- a/esmvalcore/experimental/recipe_info.py +++ b/esmvalcore/experimental/recipe_info.py @@ -1,6 +1,8 @@ """Handles recipe metadata (under 'documentation' section).""" +import os import textwrap from pathlib import Path +from typing import Optional, Tuple, Union import yaml @@ -19,14 +21,14 @@ class RecipeInfo(): Name of recipe file """ - def __init__(self, data, filename: str = None): + def __init__(self, data, filename: Union[os.PathLike, str]): self.filename = Path(filename).name self.data = data - self._authors = None - self._maintainers = None - self._projects = None - self._references = None - self._description = None + self._authors: Optional[Tuple[Contributor, ...]] = None + self._maintainers: Optional[Tuple[Contributor, ...]] = None + self._projects: Optional[Tuple[Project, ...]] = None + self._references: Optional[Tuple[Reference, ...]] = None + self._description: Optional[str] = None def __repr__(self) -> str: """Return canonical string representation.""" diff --git a/esmvalcore/experimental/recipe_metadata.py b/esmvalcore/experimental/recipe_metadata.py index 1beb96f9cb..da3a66e9c1 100644 --- a/esmvalcore/experimental/recipe_metadata.py +++ b/esmvalcore/experimental/recipe_metadata.py @@ -180,10 +180,11 @@ def render(self, renderer: str = 'html') -> str: """ style = 'plain' # alpha, plain, unsrt, unsrtalpha backend = pybtex.plugin.find_plugin('pybtex.backends', renderer)() - style = pybtex.plugin.find_plugin('pybtex.style.formatting', style)() + formatter = pybtex.plugin.find_plugin('pybtex.style.formatting', + style)() try: - formatter = style.format_entry(self._key, self._entry) + formatter = formatter.format_entry(self._key, self._entry) rendered = formatter.text.render(backend) except Exception as err: raise RenderError( diff --git a/esmvalcore/experimental/recipe_output.py b/esmvalcore/experimental/recipe_output.py index 45b29be78f..4e31361075 100644 --- a/esmvalcore/experimental/recipe_output.py +++ b/esmvalcore/experimental/recipe_output.py @@ -4,6 +4,7 @@ import logging from collections.abc import Mapping from pathlib import Path +from typing import Optional, Tuple, Type import iris @@ -47,9 +48,9 @@ def __len__(self): """Return number of files.""" return len(self.files) - def __getitem__(self, key: str): - """Get item indexed by `key`.""" - return self.files[key] + def __getitem__(self, index: int): + """Get item indexed by `index`.""" + return self.files[index] @property def image_files(self) -> tuple: @@ -68,7 +69,7 @@ def from_task(cls, task) -> 'TaskOutput': Where task is an instance of `esmvalcore._task.BaseTask`. """ product_attributes = task.get_product_attributes() - return cls(name=task.name, output=product_attributes) + return cls(name=task.name, files=product_attributes) class RecipeOutput(Mapping): @@ -188,7 +189,7 @@ class OutputFile(): Attributes corresponding to the recipe output """ - kind = None + kind: Optional[str] = None def __init__(self, path: str, attributes: dict = None): if not attributes: @@ -197,8 +198,8 @@ def __init__(self, path: str, attributes: dict = None): self.attributes = attributes self.path = Path(path) - self._authors = None - self._references = None + self._authors: Optional[Tuple[Contributor, ...]] = None + self._references: Optional[Tuple[Reference, ...]] = None def __repr__(self): """Return canonical string representation.""" @@ -265,11 +266,13 @@ def provenance_xml_file(self): return self._get_derived_path('_provenance', '.xml') @classmethod - def create(cls, path: str, attributes: dict = None): + def create(cls, path: str, attributes: dict = None) -> 'OutputFile': """Construct new instances of OutputFile. Chooses a derived class if suitable. """ + item_class: Type[OutputFile] + ext = Path(path).suffix if ext in ('.png', ): item_class = ImageFile diff --git a/esmvalcore/experimental/templates/__init__.py b/esmvalcore/experimental/templates/__init__.py index 22bef2fa98..e0f38b93e0 100644 --- a/esmvalcore/experimental/templates/__init__.py +++ b/esmvalcore/experimental/templates/__init__.py @@ -3,7 +3,8 @@ from jinja2 import Environment, FileSystemLoader -file_loader = FileSystemLoader(Path(__file__).parent) +TEMPLATE_DIR = str(Path(__file__).parent) +file_loader = FileSystemLoader(TEMPLATE_DIR) environment = Environment(loader=file_loader, autoescape=True) get_template = environment.get_template diff --git a/esmvalcore/experimental/utils.py b/esmvalcore/experimental/utils.py index 0a7292e184..a28e163267 100644 --- a/esmvalcore/experimental/utils.py +++ b/esmvalcore/experimental/utils.py @@ -1,7 +1,9 @@ """ESMValCore utilities.""" +import os import re from pathlib import Path +from typing import Pattern, Tuple, Union from esmvalcore._config import DIAGNOSTICS @@ -10,7 +12,7 @@ class RecipeList(list): """Container for recipes.""" - def find(self, query: str): + def find(self, query: Pattern[str]): """Search for recipes matching the search query or pattern. Searches in the description, authors and project information fields. @@ -18,7 +20,7 @@ def find(self, query: str): Parameters ---------- - query : str + query : str, Pattern String to search for, e.g. ``find_recipes('righi')`` will return all matching that author. Can be a `regex` pattern. @@ -53,14 +55,14 @@ def get_all_recipes(subdir: str = None) -> list: RecipeList List of available recipes """ - if not subdir: + if subdir is None: subdir = '**' rootdir = DIAGNOSTICS.recipes files = rootdir.glob(f'{subdir}/*.yml') return RecipeList(Recipe(file) for file in files) -def get_recipe(name: str) -> 'Recipe': +def get_recipe(name: Union[os.PathLike, str]) -> Recipe: """Get a recipe by its name. The function looks first in the local directory, and second in the @@ -83,12 +85,14 @@ def get_recipe(name: str) -> 'Recipe': FileNotFoundError If the name cannot be resolved to a recipe file. """ + filenames: Tuple[Union[str, os.PathLike], ...] + locations = Path(), DIAGNOSTICS.recipes - if isinstance(name, Path): - filenames = (name, ) - else: + if isinstance(name, str): filenames = (name, name + '.yml') + else: + filenames = (name, ) for location in locations: for filename in filenames: diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index e9747b2fc1..9fe86d140e 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -10,6 +10,7 @@ from .._task import BaseTask from ..cmor.check import cmor_check_data, cmor_check_metadata from ..cmor.fix import fix_data, fix_file, fix_metadata +from ._ancillary_vars import add_fx_variables, remove_fx_variables from ._area import ( area_statistics, extract_named_regions, @@ -93,6 +94,8 @@ # Data reformatting/CMORization 'fix_data', 'cmor_check_data', + # Load fx_variables in cube + 'add_fx_variables', # Time extraction (as defined in the preprocessor section) 'extract_time', 'extract_season', @@ -156,6 +159,8 @@ 'linear_trend', 'linear_trend_stderr', 'convert_units', + # Remove fx_variables from cube + 'remove_fx_variables', # Save to file 'save', 'cleanup', @@ -179,8 +184,8 @@ DEFAULT_ORDER = tuple(__all__) # The order of initial and final steps cannot be configured -INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('cmor_check_data') + 1] -FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('save'):] +INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('add_fx_variables') + 1] +FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('remove_fx_variables'):] MULTI_MODEL_FUNCTIONS = { 'multi_model_statistics', diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py new file mode 100644 index 0000000000..53a53d529a --- /dev/null +++ b/esmvalcore/preprocessor/_ancillary_vars.py @@ -0,0 +1,214 @@ +"""Preprocessor functions for ancillary variables and cell measures.""" + +import logging +import iris + +import dask.array as da + +from esmvalcore.preprocessor._io import load, concatenate_callback, concatenate +from esmvalcore.cmor.fix import fix_metadata, fix_data +from esmvalcore.cmor.check import cmor_check_metadata, cmor_check_data + +logger = logging.getLogger(__name__) + + +def _load_fx(var_cube, fx_info, check_level): + """Load and CMOR-check fx variables.""" + fx_cubes = iris.cube.CubeList() + + for fx_file in fx_info['filename']: + loaded_cube = load(fx_file, callback=concatenate_callback) + short_name = fx_info['short_name'] + project = fx_info['project'] + dataset = fx_info['dataset'] + mip = fx_info['mip'] + freq = fx_info['frequency'] + loaded_cube = fix_metadata(loaded_cube, short_name=short_name, + project=project, dataset=dataset, + mip=mip, frequency=freq, + check_level=check_level) + fx_cubes.append(loaded_cube[0]) + + fx_cube = concatenate(fx_cubes) + + if not _is_fx_broadcastable(fx_cube, var_cube): + return None + + fx_cube = cmor_check_metadata(fx_cube, cmor_table=project, mip=mip, + short_name=short_name, frequency=freq, + check_level=check_level) + + fx_cube = fix_data(fx_cube, short_name=short_name, project=project, + dataset=dataset, mip=mip, frequency=freq, + check_level=check_level) + + fx_cube = cmor_check_data(fx_cube, cmor_table=project, mip=mip, + short_name=fx_cube.var_name, frequency=freq, + check_level=check_level) + + return fx_cube + + +def _is_fx_broadcastable(fx_cube, cube): + try: + da.broadcast_to(fx_cube.core_data(), cube.shape) + except ValueError as exc: + logger.debug("Dimensions of %s and %s cubes do not match. " + "Discarding use of fx_variable: %s", + cube.var_name, fx_cube.var_name, exc) + return False + return True + + +def add_cell_measure(cube, fx_cube, measure): + """ + Broadcast fx_cube and add it as a cell_measure in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_cube: iris.cube.Cube + Iris cube with fx data. + measure: str + Name of the measure, can be 'area' or 'volume'. + + Returns + ------- + iris.cube.Cube + Cube with added ancillary variables + + Raises + ------ + ValueError + If measure name is not 'area' or 'volume'. + """ + if measure not in ['area', 'volume']: + raise ValueError(f"measure name must be 'area' or 'volume', " + f"got {measure} instead") + try: + fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) + except ValueError: + logger.debug("Dimensions of %s and %s cubes do not match. " + "Cannot broadcast cubes.", + cube.var_name, fx_cube.var_name) + return + measure = iris.coords.CellMeasure( + fx_data, + standard_name=fx_cube.standard_name, + units=fx_cube.units, + measure=measure, + var_name=fx_cube.var_name, + attributes=fx_cube.attributes) + cube.add_cell_measure(measure, range(0, measure.ndim)) + logger.debug('Added %s as cell measure in cube of %s.', + fx_cube.var_name, cube.var_name) + + +def add_ancillary_variable(cube, fx_cube): + """ + Broadcast fx_cube and add it as an ancillary_variable in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_cube: iris.cube.Cube + Iris cube with fx data. + + Returns + ------- + iris.cube.Cube + Cube with added ancillary variables + """ + try: + fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) + except ValueError: + logger.debug("Dimensions of %s and %s cubes do not match. " + "Cannot broadcast cubes.", + cube.var_name, fx_cube.var_name) + return + ancillary_var = iris.coords.AncillaryVariable( + fx_data, + standard_name=fx_cube.standard_name, + units=fx_cube.units, + var_name=fx_cube.var_name, + attributes=fx_cube.attributes) + cube.add_ancillary_variable(ancillary_var, range(0, ancillary_var.ndim)) + logger.debug('Added %s as ancillary variable in cube of %s.', + fx_cube.var_name, cube.var_name) + + +def add_fx_variables(cube, fx_variables, check_level): + """ + Load requested fx files, check with CMOR standards and add the + fx variables as cell measures or ancillary variables in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with input data. + fx_variables: dict + Dictionary with fx_variable information. + check_level: CheckLevels + Level of strictness of the checks. + + + Returns + ------- + iris.cube.Cube + Cube with added cell measures or ancillary variables. + """ + + if not fx_variables: + return cube + for fx_info in fx_variables.values(): + if not fx_info: + continue + if isinstance(fx_info['filename'], str): + fx_info['filename'] = [fx_info['filename']] + fx_cube = _load_fx(cube, fx_info, check_level) + + if fx_cube is None: + continue + + measure_name = { + 'areacella': 'area', + 'areacello': 'area', + 'volcello': 'volume' + } + + if fx_cube.var_name in measure_name: + add_cell_measure(cube, fx_cube, measure_name[fx_cube.var_name]) + else: + add_ancillary_variable(cube, fx_cube) + return cube + + +def remove_fx_variables(cube): + """ + Remove fx variables present as cell measures or ancillary variables in + the cube containing the data. + + Parameters + ---------- + cube: iris.cube.Cube + Iris cube with data and cell measures or ancillary variables. + + + Returns + ------- + iris.cube.Cube + Cube without cell measures or ancillary variables. + """ + + if cube.cell_measures(): + for measure in cube.cell_measures(): + cube.remove_cell_measure(measure.standard_name) + if cube.ancillary_variables(): + for variable in cube.ancillary_variables(): + cube.remove_ancillary_variable(variable.standard_name) + return cube diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 550747e2ec..446c8d94a0 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -22,14 +22,12 @@ logger = logging.getLogger(__name__) -# slice cube over a restricted area (box) def extract_region(cube, start_longitude, end_longitude, start_latitude, end_latitude): """Extract a region from a cube. Function that subsets a cube on a box (start_longitude, end_longitude, start_latitude, end_latitude) - This function is a restriction of masked_cube_lonlat(). Parameters ---------- @@ -63,16 +61,29 @@ def extract_region(cube, start_longitude, end_longitude, start_latitude, ignore_bounds=True, ) region_subset = region_subset.intersection(longitude=(0., 360.)) - return region_subset - # Irregular grids - lats = cube.coord('latitude').points - lons = cube.coord('longitude').points + else: + region_subset = _extract_irregular_region( + cube, + start_longitude, + end_longitude, + start_latitude, + end_latitude, + ) + return region_subset + + +def _extract_irregular_region(cube, start_longitude, end_longitude, + start_latitude, end_latitude): + """Extract a region from a cube on an irregular grid.""" # Convert longitudes to valid range if start_longitude != 360.: start_longitude %= 360. if end_longitude != 360.: end_longitude %= 360. + # Select coordinates inside the region + lats = cube.coord('latitude').points + lons = (cube.coord('longitude').points + 360.) % 360. if start_longitude <= end_longitude: select_lons = (lons >= start_longitude) & (lons <= end_longitude) else: @@ -84,8 +95,19 @@ def extract_region(cube, start_longitude, end_longitude, start_latitude, select_lats = (lats >= start_latitude) | (lats <= end_latitude) selection = select_lats & select_lons - selection = da.broadcast_to(selection, cube.shape) - cube.data = da.ma.masked_where(~selection, cube.core_data()) + + # Crop the selection, but keep rectangular shape + i_range, j_range = selection.nonzero() + if i_range.size == 0: + raise ValueError("No data points available in selected region") + i_min, i_max = i_range.min(), i_range.max() + j_min, j_max = j_range.min(), j_range.max() + i_slice, j_slice = slice(i_min, i_max + 1), slice(j_min, j_max + 1) + cube = cube[..., i_slice, j_slice] + selection = selection[i_slice, j_slice] + # Mask remaining coordinates outside region + mask = da.broadcast_to(~selection, cube.shape) + cube.data = da.ma.masked_where(mask, cube.core_data()) return cube @@ -155,46 +177,7 @@ def meridional_statistics(cube, operator): raise ValueError(msg) -def tile_grid_areas(cube, fx_files): - """Tile the grid area data to match the dataset cube. - - Parameters - ---------- - cube: iris.cube.Cube - input cube. - fx_files: dict - dictionary of field:filename for the fx_files - - Returns - ------- - iris.cube.Cube - Freshly tiled grid areas cube. - """ - grid_areas = None - if fx_files: - for key, fx_file in fx_files.items(): - if not fx_file: - continue - logger.info('Attempting to load %s from file: %s', key, fx_file) - fx_cube = iris.load_cube(fx_file) - - grid_areas = fx_cube.core_data() - if cube.ndim == 4 and grid_areas.ndim == 2: - grid_areas = da.tile(grid_areas, - [cube.shape[0], cube.shape[1], 1, 1]) - elif cube.ndim == 4 and grid_areas.ndim == 3: - grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1, 1]) - elif cube.ndim == 3 and grid_areas.ndim == 2: - grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1]) - else: - raise ValueError('Grid and dataset number of dimensions not ' - 'recognised: {} and {}.' - ''.format(cube.ndim, grid_areas.ndim)) - return grid_areas - - -# get the area average -def area_statistics(cube, operator, fx_variables=None): +def area_statistics(cube, operator): """Apply a statistical operator in the horizontal direction. The average in the horizontal direction. We assume that the @@ -231,8 +214,6 @@ def area_statistics(cube, operator, fx_variables=None): operator: str The operation, options: mean, median, min, max, std_dev, sum, variance, rms. - fx_variables: dict - dictionary of field:filename for the fx_variables Returns ------- @@ -246,9 +227,17 @@ def area_statistics(cube, operator, fx_variables=None): ValueError if input data cube has different shape than grid area weights """ - grid_areas = tile_grid_areas(cube, fx_variables) + grid_areas = None + try: + grid_areas = cube.cell_measure('cell_area').core_data() + except iris.exceptions.CellMeasureNotFoundError: + logger.info( + 'Cell measure "cell_area" not found in cube %s. ' + 'Check fx_file availability.', cube.summary(shorten=True) + ) + logger.info('Attempting to calculate grid cell area...') - if not fx_variables and cube.coord('latitude').points.ndim == 2: + if grid_areas is None and cube.coord('latitude').points.ndim == 2: coord_names = [coord.standard_name for coord in cube.coords()] if 'grid_latitude' in coord_names and 'grid_longitude' in coord_names: cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) @@ -267,7 +256,7 @@ def area_statistics(cube, operator, fx_variables=None): cube.coord('latitude')) coord_names = ['longitude', 'latitude'] - if grid_areas is None or not grid_areas.any(): + if grid_areas is None: cube = guess_bounds(cube, coord_names) grid_areas = iris.analysis.cartography.area_weights(cube) logger.info('Calculated grid area shape: %s', grid_areas.shape) @@ -419,7 +408,7 @@ def _get_masks_from_geometries(geometries, lon, lat, method='contains', if ids: ids = [str(id_) for id_ in ids] for i, item in enumerate(geometries): - for id_prop in ('name', 'NAME', 'id', 'ID'): + for id_prop in ('name', 'NAME', 'Name', 'id', 'ID'): if id_prop in item['properties']: id_ = str(item['properties'][id_prop]) break diff --git a/esmvalcore/preprocessor/_derive/rlus.py b/esmvalcore/preprocessor/_derive/rlus.py new file mode 100644 index 0000000000..4d536c29ad --- /dev/null +++ b/esmvalcore/preprocessor/_derive/rlus.py @@ -0,0 +1,49 @@ +"""Derivation of variable `rlus`. + +authors: + - lukas_brunner + +""" +from iris import Constraint + +from ._baseclass import DerivedVariableBase + + +class DerivedVariable(DerivedVariableBase): + """Derivation of variable `rlus`.""" + + @staticmethod + def required(project): + """Declare the variables needed for derivation.""" + required = [ + { + 'short_name': 'rlds' + }, + { + 'short_name': 'rlns' + }, + ] + return required + + @staticmethod + def calculate(cubes): + """Compute upwelling longwave flux from downwelling and net.""" + rlds_cube = cubes.extract_cube( + Constraint(name='surface_downwelling_longwave_flux_in_air')) + rlns_cube = cubes.extract_cube( + Constraint(name='surface_net_downward_longwave_flux')) + # fix latitude and longitude var_name + rlns_cube.coord(axis='X').long_name = rlds_cube.coord( + axis='X').long_name + rlns_cube.coord(axis='Y').long_name = rlds_cube.coord( + axis='Y').long_name + rlns_cube.coord(axis='X').var_name = rlds_cube.coord( + axis='X').var_name + rlns_cube.coord(axis='Y').var_name = rlds_cube.coord( + axis='Y').var_name + + rlus_cube = rlds_cube - rlns_cube + + rlus_cube.attributes['positive'] = 'up' + + return rlus_cube diff --git a/esmvalcore/preprocessor/_derive/rsus.py b/esmvalcore/preprocessor/_derive/rsus.py new file mode 100644 index 0000000000..326d063c26 --- /dev/null +++ b/esmvalcore/preprocessor/_derive/rsus.py @@ -0,0 +1,49 @@ +"""Derivation of variable `rsus`. + +authors: + - lukas_brunner + +""" +from iris import Constraint + +from ._baseclass import DerivedVariableBase + + +class DerivedVariable(DerivedVariableBase): + """Derivation of variable `rsus`.""" + + @staticmethod + def required(project): + """Declare the variables needed for derivation.""" + required = [ + { + 'short_name': 'rsds' + }, + { + 'short_name': 'rsns' + }, + ] + return required + + @staticmethod + def calculate(cubes): + """Compute upwelling shortwave flux from downwelling and net.""" + rsds_cube = cubes.extract_cube( + Constraint(name='surface_downwelling_shortwave_flux_in_air')) + rsns_cube = cubes.extract_cube( + Constraint(name='surface_net_downward_shortwave_flux')) + # fix latitude and longitude var_name + rsns_cube.coord(axis='X').long_name = rsds_cube.coord( + axis='X').long_name + rsns_cube.coord(axis='Y').long_name = rsds_cube.coord( + axis='Y').long_name + rsns_cube.coord(axis='X').var_name = rsds_cube.coord( + axis='X').var_name + rsns_cube.coord(axis='Y').var_name = rsds_cube.coord( + axis='Y').var_name + + rsus_cube = rsds_cube - rsns_cube + + rsus_cube.attributes['positive'] = 'up' + + return rsus_cube diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index f508f0fa4f..938e4b6f2e 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -173,6 +173,8 @@ def _get_concatenation_error(cubes): def concatenate(cubes): """Concatenate all cubes after fixing metadata.""" + if not cubes: + return cubes if len(cubes) == 1: return cubes[0] @@ -227,7 +229,15 @@ def save(cubes, filename, optimize_access='', compress=False, alias='', str filename + Raises + ------ + ValueError + cubes is empty. + """ + if not cubes: + raise ValueError(f"Cannot save empty cubes '{cubes}'") + # Rename some arguments kwargs['target'] = filename kwargs['zlib'] = compress diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index 08ba463275..db328f97e3 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -2,7 +2,7 @@ Mask module. Module that performs a number of masking -operations that include: masking with fx files, masking with +operations that include: masking with ancillary variables, masking with Natural Earth shapefiles (land or ocean), masking on thresholds, missing values masking. """ @@ -21,28 +21,6 @@ logger = logging.getLogger(__name__) -def _check_dims(cube, mask_cube): - """Check for same ndim and x-y dimensions for data and mask cubes.""" - x_dim = cube.coord('longitude').points.ndim - y_dim = cube.coord('latitude').points.ndim - mx_dim = mask_cube.coord('longitude').points.ndim - my_dim = mask_cube.coord('latitude').points.ndim - len_x = len(cube.coord('longitude').points) - len_y = len(cube.coord('latitude').points) - len_mx = len(mask_cube.coord('longitude').points) - len_my = len(mask_cube.coord('latitude').points) - if (x_dim == mx_dim and y_dim == my_dim and len_x == len_mx - and len_y == len_my): - logger.debug('Data cube and fx mask have same dims') - return True - - logger.debug( - 'Data cube and fx mask differ in dims: ' - 'cube: ((%i, %i), grid=(%i, %i)), mask: ((%i, %i), grid=(%i, %i))', - x_dim, y_dim, len_x, len_y, mx_dim, my_dim, len_mx, len_my) - return False - - def _get_fx_mask(fx_data, fx_option, mask_type): """Build a percentage-thresholded mask from an fx file.""" inmask = np.zeros_like(fx_data, bool) @@ -73,37 +51,32 @@ def _get_fx_mask(fx_data, fx_option, mask_type): def _apply_fx_mask(fx_mask, var_data): """Apply the fx data extracted mask on the actual processed data.""" - # Broadcast mask - var_mask = np.zeros_like(var_data, bool) - var_mask = np.broadcast_to(fx_mask, var_mask.shape).copy() - # Apply mask across if np.ma.is_masked(var_data): - var_mask |= var_data.mask + fx_mask |= var_data.mask # Build the new masked data - var_data = np.ma.array(var_data, mask=var_mask, fill_value=1e+20) + var_data = np.ma.array(var_data, mask=fx_mask, fill_value=1e+20) return var_data -def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): +def mask_landsea(cube, mask_out, always_use_ne_mask=False): """ Mask out either land mass or sea (oceans, seas and lakes). - It uses dedicated fx files (sftlf or sftof) or, in their absence, it - applies a Natural Earth mask (land or ocean contours). Note that the - Natural Earth masks have different resolutions: 10m for land, and 50m - for seas; these are more than enough for ESMValTool puprpose. + It uses dedicated ancillary variables (sftlf or sftof) or, + in their absence, it applies a + Natural Earth mask (land or ocean contours). + Note that the Natural Earth masks have different resolutions: + 10m for land, and 50m for seas. + These are more than enough for ESMValTool purposes. Parameters ---------- cube: iris.cube.Cube data cube to be masked. - fx_variables: dict - dict: keys: fx variables, values: full paths to fx files. - mask_out: str either "land" to mask out land mass or "sea" to mask out seas. @@ -132,30 +105,24 @@ def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): 'sea': os.path.join(cwd, 'ne_masks/ne_50m_ocean.shp') } - fx_files = fx_variables.values() - if any(fx_files) and not always_use_ne_mask: - fx_cubes = {} - for fx_file in fx_files: - if not fx_file: - continue - fxfile_members = os.path.basename(fx_file).split('_') - for fx_root in ['sftlf', 'sftof']: - if fx_root in fxfile_members: - fx_cubes[fx_root] = iris.load_cube(fx_file) - + if not always_use_ne_mask: # preserve importance order: try stflf first then sftof - if ('sftlf' in fx_cubes.keys() - and _check_dims(cube, fx_cubes['sftlf'])): - landsea_mask = _get_fx_mask(fx_cubes['sftlf'].data, mask_out, - 'sftlf') - cube.data = _apply_fx_mask(landsea_mask, cube.data) - logger.debug("Applying land-sea mask: sftlf") - elif ('sftof' in fx_cubes.keys() - and _check_dims(cube, fx_cubes['sftof'])): - landsea_mask = _get_fx_mask(fx_cubes['sftof'].data, mask_out, - 'sftof') + fx_cube = None + try: + fx_cube = cube.ancillary_variable('land_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + try: + fx_cube = cube.ancillary_variable('sea_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + logger.debug( + 'Ancillary variables land/sea area fraction ' + 'not found in cube. Check fx_file availability.') + + if fx_cube: + landsea_mask = _get_fx_mask( + fx_cube.data, mask_out, fx_cube.var_name) cube.data = _apply_fx_mask(landsea_mask, cube.data) - logger.debug("Applying land-sea mask: sftof") + logger.debug("Applying land-sea mask: %s", fx_cube.var_name) else: if cube.coord('longitude').points.ndim < 2: cube = _mask_with_shp(cube, shapefiles[mask_out], [ @@ -184,21 +151,20 @@ def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): return cube -def mask_landseaice(cube, fx_variables, mask_out): +def mask_landseaice(cube, mask_out): """ Mask out either landsea (combined) or ice. Function that masks out either landsea (land and seas) or ice (Antarctica - and Greenland and some wee glaciers). It uses dedicated fx files (sftgif). + and Greenland and some wee glaciers). + + It uses dedicated ancillary variables (sftgif). Parameters ---------- cube: iris.cube.Cube data cube to be masked. - fx_variables: dict - dict: keys: fx variables, values: full paths to fx files. - mask_out: str either "landsea" to mask out landsea or "ice" to mask out ice. @@ -210,26 +176,20 @@ def mask_landseaice(cube, fx_variables, mask_out): Raises ------ ValueError - Error raised if fx mask and data have different dimensions. - ValueError - Error raised if fx files list is empty. + Error raised if landsea-ice mask not found as an ancillary variable. """ # sftgif is the only one so far but users can set others - fx_files = fx_variables.values() - if any(fx_files): - for fx_file in fx_files: - if not fx_file: - continue - fx_cube = iris.load_cube(fx_file) - - if _check_dims(cube, fx_cube): - landice_mask = _get_fx_mask(fx_cube.data, mask_out, 'sftgif') - cube.data = _apply_fx_mask(landice_mask, cube.data) - logger.debug("Applying landsea-ice mask: sftgif") - else: - msg = "Landsea-ice mask and data have different dimensions." - raise ValueError(msg) + fx_cube = None + try: + fx_cube = cube.ancillary_variable('land_ice_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + logger.debug('Ancillary variable land ice area fraction ' + 'not found in cube. Check fx_file availability.') + if fx_cube: + landice_mask = _get_fx_mask(fx_cube.data, mask_out, fx_cube.var_name) + cube.data = _apply_fx_mask(landice_mask, cube.data) + logger.debug("Applying landsea-ice mask: sftgif") else: msg = "Landsea-ice mask could not be found. Stopping. " raise ValueError(msg) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 483019c6ec..dea5d1d93a 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -44,8 +44,10 @@ def _resolve_operator(statistic: str): # special cases if statistic == 'std': logger.warning( - "Multicube statistics is aligning its behaviour with iris.analysis" - ". Please consider replacing 'std' with 'std_dev' in your code.") + "Changing statistics from specified `std` to `std_dev`, " + "since multimodel statistics is now using the iris.analysis module" + ", which also uses `std_dev`. Please consider replacing 'std' " + " with 'std_dev' in your recipe or code.") statistic = 'std_dev' elif re.match(r"^(p\d{1,2})(\.\d*)?$", statistic): @@ -145,7 +147,13 @@ def _subset(cube, time_points): begin = cube.coord('time').units.num2date(time_points[0]) end = cube.coord('time').units.num2date(time_points[-1]) constraint = iris.Constraint(time=lambda cell: begin <= cell.point <= end) - return cube.extract(constraint) + try: + return cube.extract(constraint) + except Exception as excinfo: + raise ValueError( + "Tried to align cubes in multi-model statistics, but failed for" + f" cube {cube} and time points {time_points}. Encountered the " + f"following exception: {excinfo}") def _extend(cube, time_points): @@ -189,7 +197,13 @@ def _extend(cube, time_points): cube_list = iris.cube.CubeList(cube_list) - new_cube = cube_list.concatenate_cube() + try: + new_cube = cube_list.concatenate_cube() + except Exception as excinfo: + raise ValueError( + "Tried to align cubes in multi-model statistics, but failed for" + f" cube {cube} and time points {time_points}. Encountered the " + f"following exception: {excinfo}") return new_cube @@ -266,23 +280,33 @@ def rechunk(cube): def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator, **kwargs): - """Loop over slices of a cube if iris has no lazy aggregator.""" + """Compute statistics one slice at a time.""" _ = [cube.data for cube in cubes] # make sure the cubes' data are realized result_slices = [] for i in range(cubes[0].shape[0]): - single_model_slices = [cube[i] for cube in cubes - ] # maybe filter the iris warning here? + single_model_slices = [cube[i] for cube in cubes] combined_slice = _combine(single_model_slices) collapsed_slice = combined_slice.collapsed(CONCAT_DIM, operator, **kwargs) + + # some iris aggregators modify dtype, see e.g. + # https://numpy.org/doc/stable/reference/generated/numpy.ma.average.html + collapsed_slice.data = collapsed_slice.data.astype(np.float32) + result_slices.append(collapsed_slice) - result_cube = iris.cube.CubeList(result_slices).merge_cube() + try: + result_cube = iris.cube.CubeList(result_slices).merge_cube() + except Exception as excinfo: + raise ValueError( + "Multi-model statistics failed to concatenate results into a" + f" single array. This happened for operator {operator}" + f" with computed statistics {result_slices}." + "This can happen e.g. if the calculation results in inconsistent" + f" dtypes. Encountered the following exception: {excinfo}") - # For consistency with lazy procedure result_cube.data = np.ma.array(result_cube.data) - result_cube.remove_coord(CONCAT_DIM) return result_cube diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py index 2fc3eb60b5..b35c4b3a25 100644 --- a/esmvalcore/preprocessor/_regrid.py +++ b/esmvalcore/preprocessor/_regrid.py @@ -4,6 +4,7 @@ import re from copy import deepcopy from decimal import Decimal +from typing import Dict import iris import numpy as np @@ -40,7 +41,7 @@ _LON_RANGE = _LON_MAX - _LON_MIN # A cached stock of standard horizontal target grids. -_CACHE = dict() +_CACHE: Dict[str, iris.cube.Cube] = dict() # Supported point interpolation schemes. POINT_INTERPOLATION_SCHEMES = { @@ -466,15 +467,56 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True): [coord] = coords cube.remove_coord(coord) - # Perform the horizontal regridding. - if _attempt_irregular_regridding(cube, scheme): - cube = esmpy_regrid(cube, target_grid, scheme) - else: - cube = cube.regrid(target_grid, HORIZONTAL_SCHEMES[scheme]) + # Return non-regridded cube if horizontal grid is the same. + if not _horizontal_grid_is_close(cube, target_grid): + + # Perform the horizontal regridding. + if _attempt_irregular_regridding(cube, scheme): + cube = esmpy_regrid(cube, target_grid, scheme) + else: + cube = cube.regrid(target_grid, HORIZONTAL_SCHEMES[scheme]) return cube +def _horizontal_grid_is_close(cube1, cube2): + """Check if two cubes have the same horizontal grid definition. + + The result of the function is a boolean answer, if both cubes have the + same horizontal grid definition. The function checks both longitude and + latitude, based on extent and resolution. + + Parameters + ---------- + cube1 : cube + The first of the cubes to be checked. + cube2 : cube + The second of the cubes to be checked. + + Returns + ------- + bool + + .. note:: + + The current implementation checks if the bounds and the + grid shapes are the same. + Exits on first difference. + """ + # Go through the 2 expected horizontal coordinates longitude and latitude. + for coord in ['latitude', 'longitude']: + coord1 = cube1.coord(coord) + coord2 = cube2.coord(coord) + + if not coord1.shape == coord2.shape: + return False + + if not np.allclose(coord1.bounds, coord2.bounds): + return False + + return True + + def _create_cube(src_cube, data, src_levels, levels): """Generate a new cube with the interpolated data. diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 589a6c68e3..27182081be 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -174,10 +174,7 @@ def calculate_volume(cube): return grid_volume -def volume_statistics( - cube, - operator, - fx_variables=None): +def volume_statistics(cube, operator): """ Apply a statistical operation over a volume. @@ -187,12 +184,10 @@ def volume_statistics( Parameters ---------- - cube: iris.cube.Cube - Input cube. - operator: str - The operation to apply to the cube, options are: 'mean'. - fx_variables: dict - dictionary of field:filename for the fx_variables + cube: iris.cube.Cube + Input cube. + operator: str + The operation to apply to the cube, options are: 'mean'. Returns ------- @@ -211,27 +206,16 @@ def volume_statistics( # Load z coordinate field and figure out which dim is which. t_dim = cube.coord_dims('time')[0] - grid_volume_found = False - grid_volume = None - if fx_variables: - for key, fx_file in fx_variables.items(): - if fx_file is None: - continue - logger.info('Attempting to load %s from file: %s', key, fx_file) - fx_cube = iris.load_cube(fx_file) - - grid_volume = fx_cube.data - grid_volume_found = True - cube_shape = cube.data.shape - - if not grid_volume_found: + try: + grid_volume = cube.cell_measure('ocean_volume').core_data() + except iris.exceptions.CellMeasureNotFoundError: + logger.info( + 'Cell measure "ocean_volume" not found in cube. ' + 'Check fx_file availability.' + ) + logger.info('Attempting to calculate grid cell volume...') grid_volume = calculate_volume(cube) - # Check whether the dimensions are right. - if cube.data.ndim == 4 and grid_volume.ndim == 3: - grid_volume = np.tile(grid_volume, - [cube_shape[0], 1, 1, 1]) - if cube.data.shape != grid_volume.shape: raise ValueError('Cube shape ({}) doesn`t match grid volume shape ' '({})'.format(cube.data.shape, grid_volume.shape)) diff --git a/esmvalcore/preprocessor/_weighting.py b/esmvalcore/preprocessor/_weighting.py index b786684135..32e6c526a0 100644 --- a/esmvalcore/preprocessor/_weighting.py +++ b/esmvalcore/preprocessor/_weighting.py @@ -7,43 +7,31 @@ logger = logging.getLogger(__name__) -def _get_land_fraction(cube, fx_variables): +def _get_land_fraction(cube): """Extract land fraction as :mod:`dask.array`.""" + fx_cube = None land_fraction = None errors = [] - if not fx_variables: - errors.append("No fx files given.") - return (land_fraction, errors) - for (fx_var, fx_path) in fx_variables.items(): - if not fx_path: - errors.append(f"File for '{fx_var}' not found.") - continue - fx_cube = iris.load_cube(fx_path) - if not _shape_is_broadcastable(fx_cube.shape, cube.shape): + try: + fx_cube = cube.ancillary_variable('land_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: + try: + fx_cube = cube.ancillary_variable('sea_area_fraction') + except iris.exceptions.AncillaryVariableNotFoundError: errors.append( - f"Cube '{fx_var}' with shape {fx_cube.shape} not " - f"broadcastable to cube '{cube.var_name}' with shape " - f"{cube.shape}.") - continue - if fx_var == 'sftlf': - land_fraction = fx_cube.core_data() / 100.0 - break - if fx_var == 'sftof': - land_fraction = 1.0 - fx_cube.core_data() / 100.0 - break - errors.append( - f"Cannot calculate land fraction from '{fx_var}', expected " - f"'sftlf' or 'sftof'.") - return (land_fraction, errors) + 'Ancillary variables land/sea area fraction ' + 'not found in cube. Check fx_file availability.') + return (land_fraction, errors) + if fx_cube.var_name == 'sftlf': + land_fraction = fx_cube.core_data() / 100.0 + if fx_cube.var_name == 'sftof': + land_fraction = 1.0 - fx_cube.core_data() / 100.0 -def _shape_is_broadcastable(shape_1, shape_2): - """Check if two :mod:`numpy.array' shapes are broadcastable.""" - return all((m == n) or (m == 1) or (n == 1) - for (m, n) in zip(shape_1[::-1], shape_2[::-1])) + return (land_fraction, errors) -def weighting_landsea_fraction(cube, fx_variables, area_type): +def weighting_landsea_fraction(cube, area_type): """Weight fields using land or sea fraction. This preprocessor function weights a field with its corresponding land or @@ -58,9 +46,6 @@ def weighting_landsea_fraction(cube, fx_variables, area_type): ---------- cube : iris.cube.Cube Data cube to be weighted. - fx_variables : dict - Dictionary holding ``var_name`` (keys) and full paths (values) to the - fx files as ``str`` or empty ``list`` (if not available). area_type : str Use land (``'land'``) or sea (``'sea'``) fraction for weighting. @@ -74,14 +59,13 @@ def weighting_landsea_fraction(cube, fx_variables, area_type): TypeError ``area_type`` is not ``'land'`` or ``'sea'``. ValueError - Land/sea fraction variables ``sftlf`` or ``sftof`` not found or shape - of them is not broadcastable to ``cube``. + Land/sea fraction variables ``sftlf`` or ``sftof`` not found. """ if area_type not in ('land', 'sea'): raise TypeError( f"Expected 'land' or 'sea' for area_type, got '{area_type}'") - (land_fraction, errors) = _get_land_fraction(cube, fx_variables) + (land_fraction, errors) = _get_land_fraction(cube) if land_fraction is None: raise ValueError( f"Weighting of '{cube.var_name}' with '{area_type}' fraction " diff --git a/package/meta.yaml b/package/meta.yaml index d07ba8f841..59e4de85c6 100644 --- a/package/meta.yaml +++ b/package/meta.yaml @@ -2,7 +2,7 @@ --- # Build command (run this from the root of the repository): -# conda build package -c conda-forge -c esmvalgroup +# conda build package -c conda-forge # Package version number {% set version = "2.2.0" %} @@ -42,7 +42,7 @@ requirements: - scipy<1.6 # until ESMValGroup/ESMValCore/issues/927 gets resolved # Normally installed via pip: - cftime # iris=3.0.1 needs <=1.2.1; >=1.3.0 years<999 get a 0 instead of empty space - - cf-units + - cf-units>=2.1.5 - cython # required by cf-units but not automatically installed - esmpy - fiona @@ -58,7 +58,7 @@ requirements: - pyyaml - requests - shapely - - yamale==2.* # in esmvalgroup channel + - yamale test: source_files: @@ -72,11 +72,13 @@ test: - pytest-html!=2.1.0 - pytest-metadata>=1.5.1 - pytest-mock + - pytest-mypy - pytest-xdist - r-yaml - ncl commands: - - pytest -n 2 --ignore=run_test.py + - pytest -n 2 -m "not sequential" --ignore=run_test.py + - pytest -n 0 -m "sequential" --ignore=run_test.py - esmvaltool -- --help - esmvaltool version imports: diff --git a/setup.cfg b/setup.cfg index e794a1784d..6989a18c54 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,6 +7,7 @@ builder = html [tool:pytest] addopts = --flake8 + --mypy --doctest-modules --ignore=esmvalcore/cmor/tables/ --cov=esmvalcore @@ -32,3 +33,7 @@ convention = numpy [isort] multi_line_output = 3 include_trailing_comma = true + +[mypy] +ignore_missing_imports = True +files = esmvalcore, tests diff --git a/setup.py b/setup.py index 5b5c07655a..12f992c827 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ # Installation dependencies # Use with pip install . to install from source 'install': [ - 'cf-units', + 'cf-units>=2.1.5', 'dask[array]', 'fiona', 'fire', @@ -46,7 +46,7 @@ 'scitools-iris>=3.0.1', 'shapely[vectorized]', 'stratify', - 'yamale==2.*', + 'yamale', ], # Test dependencies # Execute 'python setup.py test' to run tests @@ -57,9 +57,14 @@ 'pytest-flake8>=1.0.6', 'pytest-html!=2.1.0', 'pytest-metadata>=1.5.1', + 'pytest-mypy', 'pytest-mock', 'pytest-xdist', 'ESMValTool_sample_data==0.0.3', + # MyPy library stubs + 'types-requests', + 'types-pkg_resources', + 'types-PyYAML', ], # Development dependencies # Use pip install -e .[develop] to install in development mode @@ -69,7 +74,7 @@ 'docformatter', 'isort', 'pre-commit', - 'prospector[with_pyroma]!=1.1.6.3,!=1.1.6.4', + 'prospector[with_pyroma,with_mypy]!=1.1.6.3,!=1.1.6.4', 'sphinx>2', 'sphinx_rtd_theme', 'vprof', @@ -110,7 +115,7 @@ def install_deps_temp(self): class RunLinter(CustomCommand): """Class to run a linter and generate reports.""" - user_options = [] + user_options: list = [] def initialize_options(self): """Do nothing.""" diff --git a/tests/integration/cmor/_fixes/cmip5/test_access1_0.py b/tests/integration/cmor/_fixes/cmip5/test_access1_0.py index 6bb2cd1f14..42290a971a 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_access1_0.py +++ b/tests/integration/cmor/_fixes/cmip5/test_access1_0.py @@ -44,7 +44,7 @@ def test_fix_metadata(self): time = cube.coord('time') dates = num2date(time.points, time.units.name, time.units.calendar) self.assertEqual(time.units.calendar, 'gregorian') - self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), ' 30001161200') + self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), '30001161200') self.assertEqual(dates[1].strftime('%Y%m%d%H%M'), '185001161200') def test_fix_metadata_if_not_time(self): diff --git a/tests/integration/cmor/_fixes/cmip5/test_access1_3.py b/tests/integration/cmor/_fixes/cmip5/test_access1_3.py index 435782e5b9..b5e1959ca9 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_access1_3.py +++ b/tests/integration/cmor/_fixes/cmip5/test_access1_3.py @@ -43,7 +43,7 @@ def test_fix_metadata(self): time = cube.coord('time') dates = num2date(time.points, time.units.name, time.units.calendar) self.assertEqual(time.units.calendar, 'gregorian') - self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), ' 30001161200') + self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), '30001161200') self.assertEqual(dates[1].strftime('%Y%m%d%H%M'), '185001161200') def test_fix_metadata_if_not_time(self): diff --git a/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1.py b/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1.py index a55007923e..dd38a6f36c 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1.py +++ b/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1.py @@ -5,8 +5,12 @@ import numpy as np from esmvalcore.cmor._fixes.cmip5.bcc_csm1_1 import Cl, Tos -from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord +from esmvalcore.cmor._fixes.common import ( + ClFixHybridPressureCoord, + OceanFixGrid, +) from esmvalcore.cmor.fix import Fix +from esmvalcore.cmor.table import get_var_info def test_get_cl_fix(): @@ -26,11 +30,16 @@ class TestTos(unittest.TestCase): def test_get(self): """Test fix get.""" self.assertListEqual( - Fix.get_fixes('CMIP5', 'BCC-CSM1-1', 'Amon', 'tos'), [Tos(None)]) + Fix.get_fixes('CMIP5', 'bcc-csm1-1', 'Amon', 'tos'), [Tos(None)]) -def test_tos_fix_data(): - """Test ``fix_data`` for ``tos``.""" +def test_tos_fix(): + """Test fix for ``tos``.""" + assert Tos is OceanFixGrid + + +def test_tos_fix_metadata(): + """Test ``fix_metadata`` for ``tos``.""" grid_lat = iris.coords.DimCoord( [20.0, 40.0], bounds=[[10.0, 30.0], [30.0, 50.0]], @@ -55,22 +64,51 @@ def test_tos_fix_data(): standard_name='longitude', units='degrees_east', ) + time_coord = iris.coords.DimCoord( + 1.0, + bounds=[0.0, 2.0], + var_name='time', + standard_name='time', + long_name='time', + units='days since 1950-01-01', + ) # Create cube without bounds cube = iris.cube.Cube( - np.full((2, 3), 300.0), + np.full((1, 2, 3), 300.0), var_name='tos', + standard_name='sea_surface_temperature', units='K', - dim_coords_and_dims=[(grid_lat, 0), (grid_lon, 1)], - aux_coords_and_dims=[(latitude, (0, 1)), (longitude, (0, 1))], + dim_coords_and_dims=[(time_coord, 0), (grid_lat, 1), (grid_lon, 2)], + aux_coords_and_dims=[(latitude, (1, 2)), (longitude, (1, 2))], ) assert cube.coord('latitude').bounds is None assert cube.coord('longitude').bounds is None # Apply fix - fix = Tos(None) - fixed_cube = fix.fix_data(cube) + vardef = get_var_info('CMIP6', 'Omon', 'tos') + fix = Tos(vardef) + cubes = iris.cube.CubeList([cube]) + fixed_cubes = fix.fix_metadata(cubes) + assert len(fixed_cubes) == 1 + fixed_cube = fixed_cubes.extract_cube('sea_surface_temperature') assert fixed_cube is cube + i_coord = fixed_cube.coord('cell index along first dimension') + j_coord = fixed_cube.coord('cell index along second dimension') + assert i_coord.var_name == 'i' + assert i_coord.standard_name is None + assert i_coord.long_name == 'cell index along first dimension' + assert i_coord.units == '1' + assert i_coord.circular is False + assert j_coord.var_name == 'j' + assert j_coord.standard_name is None + assert j_coord.long_name == 'cell index along second dimension' + assert j_coord.units == '1' + np.testing.assert_allclose(i_coord.points, [0, 1, 2]) + np.testing.assert_allclose(i_coord.bounds, + [[-0.5, 0.5], [0.5, 1.5], [1.5, 2.5]]) + np.testing.assert_allclose(j_coord.points, [0, 1]) + np.testing.assert_allclose(j_coord.bounds, [[-0.5, 0.5], [0.5, 1.5]]) assert fixed_cube.coord('latitude').bounds is not None assert fixed_cube.coord('longitude').bounds is not None latitude_bounds = np.array([[[-40, -33.75, -23.75, -30.0], diff --git a/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1_m.py b/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1_m.py index ce8af0eeb5..8473c84f44 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1_m.py +++ b/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1_m.py @@ -1,9 +1,11 @@ """Test fixes for bcc-csm1-1-m.""" import unittest -from esmvalcore.cmor._fixes.cmip5.bcc_csm1_1 import Tos as BaseTos from esmvalcore.cmor._fixes.cmip5.bcc_csm1_1_m import Cl, Tos -from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord +from esmvalcore.cmor._fixes.common import ( + ClFixHybridPressureCoord, + OceanFixGrid, +) from esmvalcore.cmor._fixes.fix import Fix @@ -29,4 +31,4 @@ def test_get(self): def test_tos_fix(): """Test fix for ``tos``.""" - assert Tos is BaseTos + assert Tos is OceanFixGrid diff --git a/tests/integration/cmor/_fixes/cmip5/test_ec_earth.py b/tests/integration/cmor/_fixes/cmip5/test_ec_earth.py index 1b5ee52e63..8ec734a90c 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_ec_earth.py +++ b/tests/integration/cmor/_fixes/cmip5/test_ec_earth.py @@ -2,11 +2,18 @@ import unittest import numpy as np + from cf_units import Unit -from iris.coords import DimCoord +from iris.coords import AuxCoord, DimCoord from iris.cube import Cube, CubeList - -from esmvalcore.cmor._fixes.cmip5.ec_earth import Areacello, Sftlf, Sic, Tas +from iris.exceptions import CoordinateNotFoundError +from esmvalcore.cmor._fixes.cmip5.ec_earth import ( + Areacello, + Pr, + Sftlf, + Sic, + Tas, + ) from esmvalcore.cmor.fix import Fix @@ -152,3 +159,75 @@ def test_areacello_fix_metadata(self): out_cube[0].coord('latitude') out_cube[0].coord('longitude') + + +class TestPr(unittest.TestCase): + """Test pr fixes.""" + def setUp(self): + """Prepare tests.""" + + wrong_time_coord = AuxCoord( + points=[1.0, 2.0, 1.0, 2.0, 3.0], + var_name='time', + standard_name='time', + units='days since 1850-01-01', + ) + + correct_time_coord = AuxCoord( + points=[1.0, 2.0, 3.0], + var_name='time', + standard_name='time', + units='days since 1850-01-01', + ) + + lat_coord = DimCoord( + [0.0], + standard_name='latitude', + var_name='lat', + ) + + lon_coord = DimCoord( + [0.0], + standard_name='longitude', + var_name='lon', + ) + + self.time_coord = correct_time_coord + self.wrong_cube = CubeList([Cube(np.ones((5, 1, 1)), + var_name='pr', + units='kg m-2 s-1')]) + self.wrong_cube[0].add_aux_coord(wrong_time_coord, 0) + self.wrong_cube[0].add_dim_coord(lat_coord, 1) + self.wrong_cube[0].add_dim_coord(lon_coord, 2) + self.correct_cube = CubeList([Cube(np.ones(3), + var_name='pr', + units='kg m-2 s-1')]) + self.correct_cube[0].add_aux_coord(correct_time_coord, 0) + + self.fix = Pr(None) + + def test_get(self): + """Test fix get""" + self.assertListEqual( + Fix.get_fixes('CMIP5', 'EC-EARTH', 'Amon', 'pr'), + [Pr(None)], + ) + + def test_pr_fix_metadata(self): + """Test metadata fix.""" + + out_wrong_cube = self.fix.fix_metadata(self.wrong_cube) + out_correct_cube = self.fix.fix_metadata(self.correct_cube) + + time = out_wrong_cube[0].coord('time') + assert time == self.time_coord + + time = out_correct_cube[0].coord('time') + assert time == self.time_coord + + def test_pr_fix_metadata_no_time(self): + """Test metadata fix with no time coord.""" + self.correct_cube[0].remove_coord('time') + out_correct_cube = self.fix.fix_metadata(self.correct_cube) + with self.assertRaises(CoordinateNotFoundError): + out_correct_cube[0].coord('time') diff --git a/tests/integration/cmor/_fixes/cmip5/test_miroc5.py b/tests/integration/cmor/_fixes/cmip5/test_miroc5.py index 9be2a5f269..a9d61dde10 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_miroc5.py +++ b/tests/integration/cmor/_fixes/cmip5/test_miroc5.py @@ -5,7 +5,7 @@ from cf_units import Unit from iris.cube import Cube -from esmvalcore.cmor._fixes.cmip5.miroc5 import Cl, Hur, Sftof, Tas +from esmvalcore.cmor._fixes.cmip5.miroc5 import Cl, Hur, Pr, Sftof, Tas from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord from esmvalcore.cmor.fix import Fix @@ -27,6 +27,12 @@ def test_get_hur_fix(): assert fix == [Hur(None)] +def test_get_pr_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP5', 'MIROC5', 'Amon', 'pr') + assert fix == [Pr(None)] + + @unittest.mock.patch( 'esmvalcore.cmor._fixes.cmip5.miroc5.Tas.fix_metadata', autospec=True) @@ -37,6 +43,16 @@ def test_hur_fix_metadata(mock_base_fix_metadata): mock_base_fix_metadata.assert_called_once_with(fix, 'cubes') +@unittest.mock.patch( + 'esmvalcore.cmor._fixes.cmip5.miroc5.Tas.fix_metadata', + autospec=True) +def test_pr_fix_metadata(mock_base_fix_metadata): + """Test ``fix_metadata`` for ``pr``.""" + fix = Pr(None) + fix.fix_metadata('cubes') + mock_base_fix_metadata.assert_called_once_with(fix, 'cubes') + + class TestSftof(unittest.TestCase): """Test sftof fixes.""" diff --git a/tests/integration/cmor/_fixes/cmip6/test_bcc_csm2_mr.py b/tests/integration/cmor/_fixes/cmip6/test_bcc_csm2_mr.py index 21aa51dfca..8b0b03916e 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_bcc_csm2_mr.py +++ b/tests/integration/cmor/_fixes/cmip6/test_bcc_csm2_mr.py @@ -1,13 +1,17 @@ """Test fixes for BCC-CSM2-MR.""" -import unittest.mock - -import iris - -from esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr import (Cl, Cli, - Clw, Tos, Siconc) -from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord +from esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr import ( + Cl, + Cli, + Clw, + Siconc, + Sos, + Tos, +) +from esmvalcore.cmor._fixes.common import ( + ClFixHybridPressureCoord, + OceanFixGrid, +) from esmvalcore.cmor._fixes.fix import Fix -from esmvalcore.cmor.table import get_var_info def test_get_cl_fix(): @@ -49,146 +53,28 @@ def test_get_tos_fix(): assert fix == [Tos(None)] +def test_tos_fix(): + """Test fix for ``tos``.""" + assert Tos is OceanFixGrid + + def test_get_siconc_fix(): """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'Omon', 'siconc') + fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'SImon', 'siconc') assert fix == [Siconc(None)] -@unittest.mock.patch( - 'esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr.BaseTos.fix_data', - autospec=True) -def test_tos_fix_data(mock_base_fix_data): - """Test ``fix_data`` for ``tos``.""" - fix = Tos(None) - fix.fix_data('cubes') - mock_base_fix_data.assert_called_once_with(fix, 'cubes') - - -@unittest.mock.patch( - 'esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr.BaseTos.fix_data', - autospec=True) -def test_siconc_fix_data(mock_base_fix_data): - """Test ``fix_data`` for ``siconc``.""" - fix = Siconc(None) - fix.fix_data('cubes') - mock_base_fix_data.assert_called_once_with(fix, 'cubes') - - -def test_tos_fix_metadata(): - """Test ``fix_metadata`` for ``tos``.""" - grid_lat = iris.coords.DimCoord([1.0], - var_name='lat', - standard_name='latitude', - long_name='latitude', - units='degrees_north', - attributes={'1D': '1'}) - grid_lon = iris.coords.DimCoord([1.0], - var_name='lon', - standard_name='longitude', - long_name='longitude', - units='degrees_east', - circular=True, - attributes={'1D': '1'}) - latitude = iris.coords.AuxCoord([[0.0]], - var_name='lat', - standard_name='latitude', - long_name='latitude', - units='degrees_north') - longitude = iris.coords.AuxCoord([[0]], - var_name='lon', - standard_name='longitude', - long_name='longitude', - units='degrees_east') - cube = iris.cube.Cube( - [[[0.0]]], - var_name='tos', - long_name='sea_surface_temperature', - dim_coords_and_dims=[(grid_lat.copy(), 1), (grid_lon.copy(), 2)], - aux_coords_and_dims=[(latitude.copy(), (1, 2)), - (longitude.copy(), (1, 2))], - ) - cubes = iris.cube.CubeList([cube, iris.cube.Cube(0.0)]) - vardef = get_var_info('CMIP6', 'Omon', 'tos') - fix = Tos(vardef) - fixed_cubes = fix.fix_metadata(cubes) - tos_cube = fixed_cubes.extract_cube('sea_surface_temperature') - - # No duplicates anymore - assert len(tos_cube.coords('latitude')) == 1 - assert len(tos_cube.coords('longitude')) == 1 - - # Latitude - grid_lat = tos_cube.coord('grid_latitude') - assert grid_lat.var_name == 'i' - assert grid_lat.long_name == 'grid_latitude' - assert grid_lat.standard_name is None - assert grid_lat.units == '1' - - # Longitude - grid_lon = tos_cube.coord('grid_longitude') - assert grid_lon.var_name == 'j' - assert grid_lon.long_name == 'grid_longitude' - assert grid_lon.standard_name is None - assert grid_lon.units == '1' - assert not grid_lon.circular - - -def test_siconc_fix_metadata(): - """Test ``fix_metadata`` for ``tos``.""" - grid_lat = iris.coords.DimCoord([1.0], - var_name='lat', - standard_name='latitude', - long_name='latitude', - units='degrees_north', - attributes={'1D': '1'}) - grid_lon = iris.coords.DimCoord([1.0], - var_name='lon', - standard_name='longitude', - long_name='longitude', - units='degrees_east', - circular=True, - attributes={'1D': '1'}) - latitude = iris.coords.AuxCoord([[0.0]], - var_name='lat', - standard_name='latitude', - long_name='latitude', - units='degrees_north') - longitude = iris.coords.AuxCoord([[0]], - var_name='lon', - standard_name='longitude', - long_name='longitude', - units='degrees_east') - - cube = iris.cube.Cube( - [[[0.0]]], - var_name='siconc', - long_name='sea_ice_area_fraction', - dim_coords_and_dims=[(grid_lat.copy(), 1), (grid_lon.copy(), 2)], - aux_coords_and_dims=[(latitude.copy(), (1, 2)), - (longitude.copy(), (1, 2))], - ) - cubes = iris.cube.CubeList([cube, iris.cube.Cube(0.0)]) - vardef = get_var_info('CMIP6', 'SImon', 'siconc') - fix = Siconc(vardef) - fixed_cubes = fix.fix_metadata(cubes) - siconc_cube = fixed_cubes.extract_cube('sea_ice_area_fraction') - - # No duplicates anymore - assert len(siconc_cube.coords('latitude')) == 1 - assert len(siconc_cube.coords('longitude')) == 1 - - # Latitude - grid_lat = siconc_cube.coord('grid_latitude') - assert grid_lat.var_name == 'i' - assert grid_lat.long_name == 'grid_latitude' - assert grid_lat.standard_name is None - assert grid_lat.units == '1' - - # Longitude - grid_lon = siconc_cube.coord('grid_longitude') - assert grid_lon.var_name == 'j' - assert grid_lon.long_name == 'grid_longitude' - assert grid_lon.standard_name is None - assert grid_lon.units == '1' - assert not grid_lon.circular +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is OceanFixGrid + + +def test_get_sos_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'Omon', 'sos') + assert fix == [Sos(None)] + + +def test_sos_fix(): + """Test fix for ``sos``.""" + assert Sos is OceanFixGrid diff --git a/tests/integration/cmor/_fixes/cmip6/test_bcc_esm1.py b/tests/integration/cmor/_fixes/cmip6/test_bcc_esm1.py index c6ce251355..382d0268f3 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_bcc_esm1.py +++ b/tests/integration/cmor/_fixes/cmip6/test_bcc_esm1.py @@ -1,7 +1,16 @@ """Test fixes for BCC-ESM1.""" -from esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr import Tos as BaseTos -from esmvalcore.cmor._fixes.cmip6.bcc_esm1 import Cl, Cli, Clw, Tos -from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord +from esmvalcore.cmor._fixes.cmip6.bcc_esm1 import ( + Cl, + Cli, + Clw, + Siconc, + Sos, + Tos, +) +from esmvalcore.cmor._fixes.common import ( + ClFixHybridPressureCoord, + OceanFixGrid, +) from esmvalcore.cmor._fixes.fix import Fix @@ -38,6 +47,28 @@ def test_clw_fix(): assert Clw is ClFixHybridPressureCoord +def test_get_siconc_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'BCC-ESM1', 'SImon', 'siconc') + assert fix == [Siconc(None)] + + +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is OceanFixGrid + + +def test_get_sos_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'BCC-ESM1', 'Omon', 'sos') + assert fix == [Sos(None)] + + +def test_sos_fix(): + """Test fix for ``sos``.""" + assert Sos is OceanFixGrid + + def test_get_tos_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'BCC-ESM1', 'Omon', 'tos') @@ -46,4 +77,4 @@ def test_get_tos_fix(): def test_tos_fix(): """Test fix for ``tos``.""" - assert Tos is BaseTos + assert Tos is OceanFixGrid diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py index 7ec96ffb5e..212902dfed 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py @@ -8,7 +8,17 @@ import pytest from cf_units import Unit -from esmvalcore.cmor._fixes.cmip6.cesm2 import Cl, Cli, Clw, Tas, Tos +from esmvalcore.cmor._fixes.cmip6.cesm2 import ( + Cl, + Cli, + Clw, + Fgco2, + Omon, + Siconc, + Tas, + Tos, +) +from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -51,6 +61,7 @@ def test_get_cl_fix(): [7.0, 25.0]]]]]) +@pytest.mark.sequential @pytest.mark.skipif(sys.version_info < (3, 7, 6), reason="requires python3.7.6 or newer") @unittest.mock.patch( @@ -224,6 +235,34 @@ def tos_cubes(): return iris.cube.CubeList([tos_cube]) +@pytest.fixture +def thetao_cubes(): + """Cubes to test fixes for ``thetao``.""" + time_coord = iris.coords.DimCoord( + [0.0004, 1.09776], var_name='time', standard_name='time', + units='days since 1850-01-01 00:00:00') + lat_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') + lon_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lon', standard_name='longitude', units='degrees') + lev_coord = iris.coords.DimCoord( + [500.0, 1000.0], bounds=[[2.5, 7.5], [7.5, 12.5]], + var_name='lev', standard_name=None, units='cm', + attributes={'positive': 'up'}) + coord_specs = [ + (time_coord, 0), + (lev_coord, 1), + (lat_coord, 2), + (lon_coord, 3), + ] + thetao_cube = iris.cube.Cube( + np.ones((2, 2, 2, 2)), + var_name='thetao', + dim_coords_and_dims=coord_specs, + ) + return iris.cube.CubeList([thetao_cube]) + + def test_get_tas_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'CESM2', 'Amon', 'tas') @@ -232,8 +271,26 @@ def test_get_tas_fix(): def test_get_tos_fix(): """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2', 'Amon', 'tos') - assert fix == [Tos(None)] + fix = Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'tos') + assert fix == [Tos(None), Omon(None)] + + +def test_get_thetao_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'thetao') + assert fix == [Omon(None)] + + +def test_get_fgco2_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'fgco2') + assert fix == [Fgco2(None), Omon(None)] + + +def test_get_siconc_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2', 'SImon', 'siconc') + assert fix == [Siconc(None)] def test_tas_fix_metadata(tas_cubes): @@ -270,3 +327,55 @@ def test_tos_fix_metadata(tos_cubes): assert out_cubes is tos_cubes for cube in out_cubes: np.testing.assert_equal(cube.coord("time").points, [0., 1.1]) + + +def test_thetao_fix_metadata(thetao_cubes): + """Test ``fix_metadata`` for ``thetao``.""" + vardef = get_var_info('CMIP6', 'Omon', 'thetao') + fix = Omon(vardef) + out_cubes = fix.fix_metadata(thetao_cubes) + assert out_cubes is thetao_cubes + assert len(out_cubes) == 1 + out_cube = out_cubes[0] + + # Check metadata of depth coordinate + depth_coord = out_cube.coord('depth') + assert depth_coord.standard_name == 'depth' + assert depth_coord.var_name == 'lev' + assert depth_coord.long_name == 'ocean depth coordinate' + assert depth_coord.units == 'm' + assert depth_coord.attributes == {'positive': 'down'} + + # Check values of depth coordinate + np.testing.assert_allclose(depth_coord.points, [5.0, 10.0]) + np.testing.assert_allclose(depth_coord.bounds, [[2.5, 7.5], [7.5, 12.5]]) + + +def test_fgco2_fix_metadata(): + """Test ``fix_metadata`` for ``fgco2``.""" + vardef = get_var_info('CMIP6', 'Omon', 'fgco2') + cubes = iris.cube.CubeList([ + iris.cube.Cube(0.0, var_name='fgco2'), + ]) + fix = Fgco2(vardef) + out_cubes = fix.fix_metadata(cubes) + assert out_cubes is cubes + assert len(out_cubes) == 1 + out_cube = out_cubes[0] + + # Check depth coordinate + depth_coord = out_cube.coord('depth') + assert depth_coord.standard_name == 'depth' + assert depth_coord.var_name == 'depth' + assert depth_coord.long_name == 'depth' + assert depth_coord.units == 'm' + assert depth_coord.attributes == {'positive': 'down'} + + # Check values of depth coordinate + np.testing.assert_allclose(depth_coord.points, 0.0) + assert depth_coord.bounds is None + + +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is SiconcFixScalarCoord diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_fv2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_fv2.py index 5159cbea9b..4a5e049126 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_fv2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_fv2.py @@ -1,7 +1,16 @@ """Tests for the fixes of CESM2-FV2.""" from esmvalcore.cmor._fixes.cmip6.cesm2 import Cl as BaseCl +from esmvalcore.cmor._fixes.cmip6.cesm2 import Fgco2 as BaseFgco2 from esmvalcore.cmor._fixes.cmip6.cesm2 import Tas as BaseTas -from esmvalcore.cmor._fixes.cmip6.cesm2_fv2 import Cl, Cli, Clw, Tas +from esmvalcore.cmor._fixes.cmip6.cesm2_fv2 import ( + Cl, + Cli, + Clw, + Fgco2, + Siconc, + Tas, +) +from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor.fix import Fix @@ -38,6 +47,28 @@ def test_clw_fix(): assert Clw is BaseCl +def test_get_fgco2_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2-FV2', 'Omon', 'fgco2') + assert fix == [Fgco2(None)] + + +def test_fgco2_fix(): + """Test fix for ``fgco2``.""" + assert Fgco2 is BaseFgco2 + + +def test_get_siconc_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2-FV2', 'SImon', 'siconc') + assert fix == [Siconc(None)] + + +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is SiconcFixScalarCoord + + def test_get_tas_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'CESM2-FV2', 'Amon', 'tas') diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py index 6c9743701f..5cc66aef77 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py @@ -8,8 +8,17 @@ import pytest from esmvalcore.cmor._fixes.cmip6.cesm2 import Cl as BaseCl +from esmvalcore.cmor._fixes.cmip6.cesm2 import Fgco2 as BaseFgco2 from esmvalcore.cmor._fixes.cmip6.cesm2 import Tas as BaseTas -from esmvalcore.cmor._fixes.cmip6.cesm2_waccm import Cl, Cli, Clw, Tas +from esmvalcore.cmor._fixes.cmip6.cesm2_waccm import ( + Cl, + Cli, + Clw, + Fgco2, + Siconc, + Tas, +) +from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor.fix import Fix @@ -72,6 +81,28 @@ def test_clw_fix(): assert Clw is Cl +def test_get_fgco2_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM', 'Omon', 'fgco2') + assert fix == [Fgco2(None)] + + +def test_fgco2_fix(): + """Test fix for ``fgco2``.""" + assert Fgco2 is BaseFgco2 + + +def test_get_siconc_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM', 'SImon', 'siconc') + assert fix == [Siconc(None)] + + +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is SiconcFixScalarCoord + + @pytest.fixture def tas_cubes(): """Cubes to test fixes for ``tas``.""" diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py index c5837211b2..5aec254229 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py @@ -1,7 +1,16 @@ """Tests for the fixes of CESM2-WACCM-FV2.""" from esmvalcore.cmor._fixes.cmip6.cesm2 import Tas as BaseTas +from esmvalcore.cmor._fixes.cmip6.cesm2 import Fgco2 as BaseFgco2 from esmvalcore.cmor._fixes.cmip6.cesm2_waccm import Cl as BaseCl -from esmvalcore.cmor._fixes.cmip6.cesm2_waccm_fv2 import Cl, Cli, Clw, Tas +from esmvalcore.cmor._fixes.cmip6.cesm2_waccm_fv2 import ( + Cl, + Cli, + Clw, + Fgco2, + Siconc, + Tas, +) +from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor.fix import Fix @@ -38,6 +47,28 @@ def test_clw_fix(): assert Clw is BaseCl +def test_get_fgco2_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM-FV2', 'Omon', 'fgco2') + assert fix == [Fgco2(None)] + + +def test_fgco2_fix(): + """Test fix for ``fgco2``.""" + assert Fgco2 is BaseFgco2 + + +def test_get_siconc_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM-FV2', 'SImon', 'siconc') + assert fix == [Siconc(None)] + + +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is SiconcFixScalarCoord + + def test_get_tas_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM-FV2', 'Amon', 'tas') diff --git a/tests/integration/cmor/_fixes/cmip6/test_cnrm_esm2_1.py b/tests/integration/cmor/_fixes/cmip6/test_cnrm_esm2_1.py index 01ce238529..8ff7b7e0a2 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cnrm_esm2_1.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cnrm_esm2_1.py @@ -1,10 +1,16 @@ """Test fixes for CNRM-ESM2-1.""" +import iris +import numpy as np +import pytest + from esmvalcore.cmor._fixes.cmip6.cnrm_cm6_1 import Cl as BaseCl from esmvalcore.cmor._fixes.cmip6.cnrm_cm6_1 import Clcalipso as BaseClcalipso from esmvalcore.cmor._fixes.cmip6.cnrm_cm6_1 import Cli as BaseCli from esmvalcore.cmor._fixes.cmip6.cnrm_cm6_1 import Clw as BaseClw -from esmvalcore.cmor._fixes.cmip6.cnrm_esm2_1 import Cl, Clcalipso, Cli, Clw +from esmvalcore.cmor._fixes.cmip6.cnrm_esm2_1 import (Cl, Clcalipso, + Cli, Clw, Omon) from esmvalcore.cmor._fixes.fix import Fix +from esmvalcore.cmor.table import get_var_info def test_get_cl_fix(): @@ -49,3 +55,55 @@ def test_get_clw_fix(): def test_clw_fix(): """Test fix for ``clw``.""" assert Clw is BaseClw + + +@pytest.fixture +def thetao_cubes(): + """Cubes to test fixes for ``thetao``.""" + time_coord = iris.coords.DimCoord( + [0.0004, 1.09776], var_name='time', standard_name='time', + units='days since 1850-01-01 00:00:00') + lat_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') + lon_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lon', standard_name='longitude', units='degrees') + lev_coord = iris.coords.DimCoord( + [5.0, 10.0], bounds=[[2.5, 7.5], [7.5, 12.5]], + var_name='lev', standard_name=None, units='m', + attributes={'positive': 'up'}) + coord_specs = [ + (time_coord, 0), + (lev_coord, 1), + (lat_coord, 2), + (lon_coord, 3), + ] + thetao_cube = iris.cube.Cube( + np.ones((2, 2, 2, 2)), + var_name='thetao', + dim_coords_and_dims=coord_specs, + ) + return iris.cube.CubeList([thetao_cube]) + + +def test_get_thetao_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'CNRM-ESM2-1', 'Omon', 'thetao') + assert fix == [Omon(None)] + + +def test_thetao_fix_metadata(thetao_cubes): + """Test ``fix_metadata`` for ``thetao``.""" + vardef = get_var_info('CMIP6', 'Omon', 'thetao') + fix = Omon(vardef) + out_cubes = fix.fix_metadata(thetao_cubes) + assert out_cubes is thetao_cubes + assert len(out_cubes) == 1 + out_cube = out_cubes[0] + + # Check metadata of depth coordinate + depth_coord = out_cube.coord('depth') + assert depth_coord.standard_name == 'depth' + assert depth_coord.var_name == 'lev' + assert depth_coord.long_name == 'ocean depth coordinate' + assert depth_coord.units == 'm' + assert depth_coord.attributes == {'positive': 'down'} diff --git a/tests/integration/cmor/_fixes/cmip6/test_fgoals_g3.py b/tests/integration/cmor/_fixes/cmip6/test_fgoals_g3.py index bb0f2060f7..c134ae1228 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_fgoals_g3.py +++ b/tests/integration/cmor/_fixes/cmip6/test_fgoals_g3.py @@ -1,7 +1,14 @@ """Tests for the fixes of FGOALS-g3.""" +from unittest import mock + +import iris +import numpy as np + from esmvalcore.cmor._fixes.cmip5.fgoals_g2 import Cl as BaseCl -from esmvalcore.cmor._fixes.cmip6.fgoals_g3 import Cl, Cli, Clw +from esmvalcore.cmor._fixes.cmip6.fgoals_g3 import Cl, Cli, Clw, Siconc, Tos +from esmvalcore.cmor._fixes.common import OceanFixGrid from esmvalcore.cmor.fix import Fix +from esmvalcore.cmor.table import get_var_info def test_get_cl_fix(): @@ -35,3 +42,56 @@ def test_get_clw_fix(): def test_clw_fix(): """Test fix for ``clw``.""" assert Clw is BaseCl + + +def test_get_tos_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'Omon', 'tos') + assert fix == [Tos(None)] + + +def test_tos_fix(): + """Test fix for ``tos``.""" + assert issubclass(Tos, OceanFixGrid) + + +@mock.patch('esmvalcore.cmor._fixes.cmip6.fgoals_g3.OceanFixGrid.fix_metadata', + autospec=True) +def test_tos_fix_metadata(mock_base_fix_metadata): + """Test ``fix_metadata`` for ``tos``.""" + mock_base_fix_metadata.side_effect = lambda x, y: y + + # Create test cube + lat_coord = iris.coords.AuxCoord([3.14, 1200.0, 6.28], + var_name='lat', + standard_name='latitude') + lon_coord = iris.coords.AuxCoord([1.0, 2.0, 1e30], + var_name='lon', + standard_name='longitude') + cube = iris.cube.Cube([1.0, 2.0, 3.0], var_name='tos', + standard_name='sea_surface_temperature', + aux_coords_and_dims=[(lat_coord, 0), (lon_coord, 0)]) + cubes = iris.cube.CubeList([cube]) + + # Apply fix + vardef = get_var_info('CMIP6', 'Omon', 'tos') + fix = Tos(vardef) + fixed_cubes = fix.fix_metadata(cubes) + assert len(fixed_cubes) == 1 + fixed_cube = fixed_cubes[0] + np.testing.assert_allclose(fixed_cube.coord('latitude').points, + [3.14, 0.0, 6.28]) + np.testing.assert_allclose(fixed_cube.coord('longitude').points, + [1.0, 2.0, 0.0]) + mock_base_fix_metadata.assert_called_once_with(fix, cubes) + + +def test_get_siconc_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'SImon', 'siconc') + assert fix == [Siconc(None)] + + +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is Tos diff --git a/tests/integration/cmor/_fixes/cmip6/test_gfdl_cm4.py b/tests/integration/cmor/_fixes/cmip6/test_gfdl_cm4.py index a563fd1362..f53379a1dc 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_gfdl_cm4.py +++ b/tests/integration/cmor/_fixes/cmip6/test_gfdl_cm4.py @@ -1,8 +1,10 @@ """Tests for the fixes of GFDL-CM4.""" import iris import numpy as np +import pytest -from esmvalcore.cmor._fixes.cmip6.gfdl_cm4 import Cl, Cli, Clw +from esmvalcore.cmor._fixes.cmip6.gfdl_cm4 import Cl, Cli, Clw, Siconc +from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -33,6 +35,7 @@ def test_get_cl_fix(): [9.0, 21.0]]]]]) +@pytest.mark.sequential def test_cl_fix_metadata(test_data_path): """Test ``fix_metadata`` for ``cl``.""" nc_path = test_data_path / 'gfdl_cm4_cl.nc' @@ -88,3 +91,14 @@ def test_get_clw_fix(): def test_clw_fix(): """Test fix for ``clw``.""" assert Clw is Cl + + +def test_get_siconc_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'GFDL-CM4', 'SImon', 'siconc') + assert fix == [Siconc(None)] + + +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is SiconcFixScalarCoord diff --git a/tests/integration/cmor/_fixes/cmip6/test_gfdl_esm4.py b/tests/integration/cmor/_fixes/cmip6/test_gfdl_esm4.py index 57a834fe5a..d21323bb10 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_gfdl_esm4.py +++ b/tests/integration/cmor/_fixes/cmip6/test_gfdl_esm4.py @@ -3,57 +3,102 @@ import iris import numpy as np import pytest -from cf_units import Unit -from esmvalcore.cmor._fixes.cmip6.gfdl_esm4 import Siconc +from esmvalcore.cmor._fixes.cmip6.gfdl_esm4 import Fgco2, Omon, Siconc +from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor.fix import Fix from esmvalcore.cmor.table import get_var_info -@pytest.fixture -def siconc_cubes(): - """Sample cube.""" - time_coord = iris.coords.DimCoord([0.0], standard_name='time', - var_name='time', - units='days since 6543-2-1') - lat_coord = iris.coords.DimCoord([-30.0], standard_name='latitude', - var_name='lat', units='degrees_north') - lon_coord = iris.coords.DimCoord([30.0], standard_name='longitude', - var_name='lon', units='degrees_east') - coords_specs = [(time_coord, 0), (lat_coord, 1), (lon_coord, 2)] - cube = iris.cube.Cube([[[22.0]]], standard_name='sea_ice_area_fraction', - var_name='siconc', units='%', - dim_coords_and_dims=coords_specs) - return iris.cube.CubeList([cube]) - - def test_get_siconc_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'GFDL-ESM4', 'SImon', 'siconc') assert fix == [Siconc(None)] -def test_siconc_fix_metadata(siconc_cubes): - """Test ``fix_metadata`` for ``cl``.""" - assert len(siconc_cubes) == 1 - siconc_cube = siconc_cubes[0] - assert siconc_cube.var_name == "siconc" - - # Extract siconc cube - siconc_cube = siconc_cubes.extract_cube('sea_ice_area_fraction') - assert not siconc_cube.coords('typesi') - - # Apply fix - vardef = get_var_info('CMIP6', 'SImon', 'siconc') - fix = Siconc(vardef) - fixed_cubes = fix.fix_metadata(siconc_cubes) - assert len(fixed_cubes) == 1 - fixed_siconc_cube = fixed_cubes.extract_cube( - 'sea_ice_area_fraction') - fixed_typesi_coord = fixed_siconc_cube.coord('area_type') - assert fixed_typesi_coord.points is not None - assert fixed_typesi_coord.bounds is None - np.testing.assert_equal(fixed_typesi_coord.points, - ['siconc']) - np.testing.assert_equal(fixed_typesi_coord.units, - Unit('1')) +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is SiconcFixScalarCoord + + +@pytest.fixture +def thetao_cubes(): + """Cubes to test fixes for ``thetao``.""" + time_coord = iris.coords.DimCoord( + [0.0004, 1.09776], var_name='time', standard_name='time', + units='days since 1850-01-01 00:00:00') + lat_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') + lon_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lon', standard_name='longitude', units='degrees') + lev_coord = iris.coords.DimCoord( + [5.0, 10.0], bounds=[[2.5, 7.5], [7.5, 12.5]], + var_name='lev', standard_name=None, units='m', + attributes={'positive': 'up'}) + coord_specs = [ + (time_coord, 0), + (lev_coord, 1), + (lat_coord, 2), + (lon_coord, 3), + ] + thetao_cube = iris.cube.Cube( + np.ones((2, 2, 2, 2)), + var_name='thetao', + dim_coords_and_dims=coord_specs, + ) + return iris.cube.CubeList([thetao_cube]) + + +def test_get_thetao_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'GFDL-ESM4', 'Omon', 'thetao') + assert fix == [Omon(None)] + + +def test_thetao_fix_metadata(thetao_cubes): + """Test ``fix_metadata`` for ``thetao``.""" + vardef = get_var_info('CMIP6', 'Omon', 'thetao') + fix = Omon(vardef) + out_cubes = fix.fix_metadata(thetao_cubes) + assert out_cubes is thetao_cubes + assert len(out_cubes) == 1 + out_cube = out_cubes[0] + + # Check metadata of depth coordinate + depth_coord = out_cube.coord('depth') + assert depth_coord.standard_name == 'depth' + assert depth_coord.var_name == 'lev' + assert depth_coord.long_name == 'ocean depth coordinate' + assert depth_coord.units == 'm' + assert depth_coord.attributes == {'positive': 'down'} + + +def test_get_fgco2_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'GFDL-ESM4', 'Omon', 'fgco2') + assert fix == [Fgco2(None), Omon(None)] + + +def test_fgco2_fix_metadata(): + """Test ``fix_metadata`` for ``fgco2``.""" + vardef = get_var_info('CMIP6', 'Omon', 'fgco2') + cubes = iris.cube.CubeList([ + iris.cube.Cube(0.0, var_name='fgco2'), + ]) + fix = Fgco2(vardef) + out_cubes = fix.fix_metadata(cubes) + assert out_cubes is cubes + assert len(out_cubes) == 1 + out_cube = out_cubes[0] + + # Check depth coordinate + depth_coord = out_cube.coord('depth') + assert depth_coord.standard_name == 'depth' + assert depth_coord.var_name == 'depth' + assert depth_coord.long_name == 'depth' + assert depth_coord.units == 'm' + assert depth_coord.attributes == {'positive': 'down'} + + # Check values of depth coordinate + np.testing.assert_allclose(depth_coord.points, 0.0) + assert depth_coord.bounds is None diff --git a/tests/integration/cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py b/tests/integration/cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py index 526566a2e6..99f2a76aa2 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py +++ b/tests/integration/cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py @@ -8,7 +8,7 @@ from iris.cube import Cube, CubeList from iris.exceptions import CoordinateNotFoundError -from esmvalcore.cmor._fixes.cmip6.ipsl_cm6a_lr import AllVars, Clcalipso +from esmvalcore.cmor._fixes.cmip6.ipsl_cm6a_lr import AllVars, Clcalipso, Omon from esmvalcore.cmor._fixes.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -18,8 +18,9 @@ class TestAllVars(unittest.TestCase): def setUp(self): """Set up tests.""" - self.fix = AllVars(None) - self.cube = Cube(np.random.rand(2, 2, 2), var_name='ch4') + vardef = get_var_info('CMIP6', 'Omon', 'tos') + self.fix = AllVars(vardef) + self.cube = Cube(np.random.rand(2, 2, 2), var_name='tos') self.cube.add_aux_coord( AuxCoord(np.random.rand(2, 2), var_name='nav_lat', @@ -36,20 +37,43 @@ def test_fix_metadata_ocean_var(self): self.assertEqual(len(cubes), 1) cube = cubes[0] + self.assertEqual(cube.var_name, 'tos') self.assertEqual(cube.coord('latitude').var_name, 'lat') self.assertEqual(cube.coord('longitude').var_name, 'lon') - self.cube.coord('cell_area') - def test_fix_data_other_var(self): - """Test ``fix_metadata`` for other variables.""" + def test_fix_data_no_lat(self): + """Test ``fix_metadata`` when no latitude is present.""" + self.cube.remove_coord('latitude') cubes = self.fix.fix_metadata(CubeList([self.cube])) self.assertEqual(len(cubes), 1) cube = cubes[0] - self.assertEqual(cube.coord('latitude').var_name, 'nav_lat') - self.assertEqual(cube.coord('longitude').var_name, 'nav_lon') + self.assertEqual(cube.coord('longitude').var_name, 'lon') + with self.assertRaises(CoordinateNotFoundError): + self.cube.coord('latitude') + + def test_fix_data_no_lon(self): + """Test ``fix_metadata`` when no longitude is present.""" + self.cube.remove_coord('longitude') + cubes = self.fix.fix_metadata(CubeList([self.cube])) + + self.assertEqual(len(cubes), 1) + cube = cubes[0] + self.assertEqual(cube.coord('latitude').var_name, 'lat') with self.assertRaises(CoordinateNotFoundError): - self.cube.coord('cell_area') + self.cube.coord('longitude') + + def test_fix_data_no_lat_lon(self): + """Test ``fix_metadata`` for cubes with no latitude and longitude.""" + self.cube.remove_coord('latitude') + self.cube.remove_coord('longitude') + cubes = self.fix.fix_metadata(CubeList([self.cube])) + + self.assertEqual(len(cubes), 1) + with self.assertRaises(CoordinateNotFoundError): + self.cube.coord('latitude') + with self.assertRaises(CoordinateNotFoundError): + self.cube.coord('longitude') def test_get_clcalipso_fix(): @@ -80,3 +104,55 @@ def test_clcalipso_fix_metadata(clcalipso_cubes): assert coord.long_name == 'altitude' assert coord.standard_name == 'altitude' assert coord.var_name == 'alt40' + + +@pytest.fixture +def thetao_cubes(): + """Cubes to test fixes for ``thetao``.""" + time_coord = iris.coords.DimCoord( + [0.0004, 1.09776], var_name='time', standard_name='time', + units='days since 1850-01-01 00:00:00') + lat_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') + lon_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lon', standard_name='longitude', units='degrees') + lev_coord = iris.coords.DimCoord( + [5.0, 10.0], bounds=[[2.5, 7.5], [7.5, 12.5]], + var_name='olevel', standard_name=None, units='m', + attributes={'positive': 'up'}) + coord_specs = [ + (time_coord, 0), + (lev_coord, 1), + (lat_coord, 2), + (lon_coord, 3), + ] + thetao_cube = iris.cube.Cube( + np.ones((2, 2, 2, 2)), + var_name='thetao', + dim_coords_and_dims=coord_specs, + ) + return iris.cube.CubeList([thetao_cube]) + + +def test_get_thetao_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'IPSL-CM6A-LR', 'Omon', 'thetao') + assert fix == [Omon(None), AllVars(None)] + + +def test_thetao_fix_metadata(thetao_cubes): + """Test ``fix_metadata`` for ``thetao``.""" + vardef = get_var_info('CMIP6', 'Omon', 'thetao') + fix = Omon(vardef) + out_cubes = fix.fix_metadata(thetao_cubes) + assert out_cubes is thetao_cubes + assert len(out_cubes) == 1 + out_cube = out_cubes[0] + + # Check metadata of depth coordinate + depth_coord = out_cube.coord('depth') + assert depth_coord.standard_name == 'depth' + assert depth_coord.var_name == 'lev' + assert depth_coord.long_name == 'ocean depth coordinate' + assert depth_coord.units == 'm' + assert depth_coord.attributes == {'positive': 'down'} diff --git a/tests/integration/cmor/_fixes/cmip6/test_kiost_esm.py b/tests/integration/cmor/_fixes/cmip6/test_kiost_esm.py index bbe1c360d3..41de6089c5 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_kiost_esm.py +++ b/tests/integration/cmor/_fixes/cmip6/test_kiost_esm.py @@ -3,7 +3,12 @@ import pytest from cf_units import Unit -from esmvalcore.cmor._fixes.cmip6.kiost_esm import SfcWind, Tas +from esmvalcore.cmor._fixes.cmip6.kiost_esm import ( + SfcWind, + Siconc, + Tas, +) +from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord from esmvalcore.cmor._fixes.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -97,6 +102,17 @@ def test_sfcwind_fix_metadata(sfcwind_cubes): assert coord == height_coord +def test_get_siconc_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'KIOST-ESM', 'SImon', 'siconc') + assert fix == [Siconc(None)] + + +def test_siconc_fix(): + """Test fix for ``siconc``.""" + assert Siconc is SiconcFixScalarCoord + + def test_get_tas_fix(): fix = Fix.get_fixes('CMIP6', 'KIOST-ESM', 'Amon', 'tas') assert fix == [Tas(None)] diff --git a/tests/integration/cmor/_fixes/cmip6/test_mcm_ua_1_0.py b/tests/integration/cmor/_fixes/cmip6/test_mcm_ua_1_0.py index 303854968b..a886946818 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_mcm_ua_1_0.py +++ b/tests/integration/cmor/_fixes/cmip6/test_mcm_ua_1_0.py @@ -1,9 +1,10 @@ """Tests for the fixes of MCM-UA-1-0.""" import iris +import numpy as np import pytest from cf_units import Unit -from esmvalcore.cmor._fixes.cmip6.mcm_ua_1_0 import AllVars, Tas +from esmvalcore.cmor._fixes.cmip6.mcm_ua_1_0 import AllVars, Omon, Tas, Uas from esmvalcore.cmor.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -47,6 +48,45 @@ def cubes(): return iris.cube.CubeList([correct_cube, wrong_cube, scalar_cube]) +@pytest.fixture +def uas_cubes(): + correct_lat_coord = iris.coords.DimCoord([0.0], + var_name='lat', + standard_name=' latitude ', + long_name=' latitude') + wrong_lat_coord = iris.coords.DimCoord([0.0], + var_name='latitudeCoord', + standard_name=' latitude', + long_name='latitude') + correct_lon_coord = iris.coords.DimCoord([0.0], + var_name='lon', + standard_name=' longitude ', + long_name='longitude ') + wrong_lon_coord = iris.coords.DimCoord([0.0], + var_name='longitudeCoord', + standard_name='longitude', + long_name=' longitude') + correct_cube = iris.cube.Cube( + [[10.0]], + var_name='uas', + standard_name='eastward_wind ', + long_name=' East Near-Surface Wind ', + dim_coords_and_dims=[(correct_lat_coord, 0), (correct_lon_coord, 1)], + ) + wrong_cube = iris.cube.Cube( + [[10.0]], + var_name='ta', + standard_name=' air_temperature ', + long_name='Air Temperature', + dim_coords_and_dims=[(wrong_lat_coord, 0), (wrong_lon_coord, 1)], + attributes={'parent_time_units': 'days since 0000-00-00 (noleap)'}, + ) + scalar_cube = iris.cube.Cube(0.0, var_name='ps', + standard_name='air_pressure ', + long_name=' Air pressure ') + return iris.cube.CubeList([correct_cube, wrong_cube, scalar_cube]) + + @pytest.fixture def cubes_bounds(): lat_coord = iris.coords.DimCoord([0.0], @@ -86,6 +126,11 @@ def test_get_tas_fix(): assert fix == [Tas(None), AllVars(None)] +def test_get_uas_fix(): + fix = Fix.get_fixes('CMIP6', 'MCM-UA-1-0', 'Amon', 'uas') + assert fix == [Uas(None), AllVars(None)] + + def test_allvars_fix_metadata(cubes): fix = AllVars(None) out_cubes = fix.fix_metadata(cubes) @@ -157,3 +202,102 @@ def test_tas_fix_metadata(cubes): assert out_cubes_2[0].var_name == 'tas' coord = out_cubes_2[0].coord('height') assert coord == height_coord + + +def test_uas_fix_metadata(uas_cubes): + for cube in uas_cubes: + with pytest.raises(iris.exceptions.CoordinateNotFoundError): + cube.coord('height') + height_coord = iris.coords.AuxCoord(10.0, + var_name='height', + standard_name='height', + long_name='height', + units=Unit('m'), + attributes={'positive': 'up'}) + vardef = get_var_info('CMIP6', 'Amon', 'uas') + fix = Uas(vardef) + + # Check fix + out_cubes = fix.fix_metadata(uas_cubes) + assert out_cubes[0].var_name == 'uas' + coord = out_cubes[0].coord('height') + assert coord == height_coord + + # Check that height coordinate is not added twice + out_cubes_2 = fix.fix_metadata(out_cubes) + assert out_cubes_2[0].var_name == 'uas' + coord = out_cubes_2[0].coord('height') + assert coord == height_coord + + +@pytest.fixture +def thetao_cubes(): + time_coord = iris.coords.DimCoord( + [0.0004, 1.09776], var_name='time', standard_name='time', + units='days since 1850-01-01 00:00:00') + lat_coord = iris.coords.DimCoord( + [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') + lon_coord = iris.coords.DimCoord([-0.9375, 357.1875], + bounds=[[-1.875, 0.], [356.25, 358.125]], + var_name='lon', + standard_name='longitude') + lev_coord = iris.coords.DimCoord( + [5.0, 10.0], bounds=[[2.5, 7.5], [7.5, 12.5]], + var_name='lev', standard_name=None, units='m', + attributes={'positive': 'up'}) + coord_specs = [ + (time_coord, 0), + (lev_coord, 1), + (lat_coord, 2), + (lon_coord, 3), + ] + thetao_cube = iris.cube.Cube( + np.arange(16).reshape(2, 2, 2, 2), + var_name='thetao', + dim_coords_and_dims=coord_specs, + ) + + return iris.cube.CubeList([thetao_cube]) + + +def test_get_thetao_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'MCM-UA-1-0', 'Omon', 'thetao') + assert fix == [Omon(None), AllVars(None)] + + +def test_thetao_fix_metadata(thetao_cubes): + """Test ``fix_metadata`` for ``thetao``.""" + vardef = get_var_info('CMIP6', 'Omon', 'thetao') + fix_omon = Omon(vardef) + fix_allvars = AllVars(vardef) + out_cubes = fix_omon.fix_metadata(thetao_cubes) + out_cubes = fix_allvars.fix_metadata(out_cubes) + assert out_cubes is thetao_cubes + assert len(out_cubes) == 1 + out_cube = out_cubes[0] + + # Check data of cube + np.testing.assert_allclose(out_cube.data, + [[[[1, 0], + [3, 2]], + [[5, 4], + [7, 6]]], + [[[9, 8], + [11, 10]], + [[13, 12], + [15, 14]]]]) + + # Check data of longitude + lon_coord = out_cube.coord('longitude') + np.testing.assert_allclose(lon_coord.points, [357.1875, 359.0625]) + np.testing.assert_allclose(lon_coord.bounds, + [[356.25, 358.125], [358.125, 360.0]]) + + # Check metadata of depth coordinate + depth_coord = out_cube.coord('depth') + assert depth_coord.standard_name == 'depth' + assert depth_coord.var_name == 'lev' + assert depth_coord.long_name == 'ocean depth coordinate' + assert depth_coord.units == 'm' + assert depth_coord.attributes == {'positive': 'down'} diff --git a/tests/integration/cmor/_fixes/cmip6/test_sam0_unicon.py b/tests/integration/cmor/_fixes/cmip6/test_sam0_unicon.py index bf2398f790..09d35be726 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_sam0_unicon.py +++ b/tests/integration/cmor/_fixes/cmip6/test_sam0_unicon.py @@ -1,5 +1,9 @@ """Test fixes for SAM0-UNICON.""" -from esmvalcore.cmor._fixes.cmip6.sam0_unicon import Cl, Cli, Clw +import iris +import numpy as np +import pytest + +from esmvalcore.cmor._fixes.cmip6.sam0_unicon import Cl, Cli, Clw, Nbp from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord from esmvalcore.cmor._fixes.fix import Fix @@ -35,3 +39,29 @@ def test_get_clw_fix(): def test_clw_fix(): """Test fix for ``clw``.""" assert Clw is ClFixHybridPressureCoord + + +def test_get_nbp_fix(): + """Test getting of fix.""" + fix = Fix.get_fixes('CMIP6', 'SAM0-UNICON', 'Lmon', 'nbp') + assert fix == [Nbp(None)] + + +@pytest.fixture +def nbp_cube(): + """``nbp`` cube.""" + cube = iris.cube.Cube( + [1.0], + var_name='nbp', + standard_name='surface_net_downward_mass_flux_of_carbon_dioxide' + '_expressed_as_carbon_due_to_all_land_processes', + units='kg m-2 s-1', + ) + return cube + + +def test_nbp_fix_data(nbp_cube): + """Test ``fix_data`` for ``nbp``.""" + fix = Nbp(None) + out_cube = fix.fix_data(nbp_cube) + np.testing.assert_allclose(out_cube.data, [-1.0]) diff --git a/tests/integration/cmor/_fixes/test_common.py b/tests/integration/cmor/_fixes/test_common.py index 3adeadfa54..922f807fef 100644 --- a/tests/integration/cmor/_fixes/test_common.py +++ b/tests/integration/cmor/_fixes/test_common.py @@ -1,15 +1,18 @@ """Test for common fixes used for multiple datasets.""" import iris import numpy as np +import pytest +from cf_units import Unit from esmvalcore.cmor._fixes.common import ( ClFixHybridHeightCoord, ClFixHybridPressureCoord, + OceanFixGrid, + SiconcFixScalarCoord, ) from esmvalcore.cmor.table import get_var_info from esmvalcore.iris_helpers import var_name_constraint - AIR_PRESSURE_POINTS = np.array([[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]], @@ -81,6 +84,7 @@ def hybrid_pressure_coord_fix_metadata(nc_path, short_name, fix): return var_names +@pytest.mark.sequential def test_cl_hybrid_pressure_coord_fix_metadata_with_a(test_data_path): """Test ``fix_metadata`` for ``cl``.""" vardef = get_var_info('CMIP6', 'Amon', 'cl') @@ -90,6 +94,7 @@ def test_cl_hybrid_pressure_coord_fix_metadata_with_a(test_data_path): assert 'a_bnds' in var_names +@pytest.mark.sequential def test_cl_hybrid_pressure_coord_fix_metadata_with_ap(test_data_path): """Test ``fix_metadata`` for ``cl``.""" vardef = get_var_info('CMIP6', 'Amon', 'cl') @@ -157,9 +162,255 @@ def hybrid_height_coord_fix_metadata(nc_path, short_name, fix): assert air_pressure_coord.units == 'Pa' +@pytest.mark.sequential def test_cl_hybrid_height_coord_fix_metadata(test_data_path): """Test ``fix_metadata`` for ``cl``.""" vardef = get_var_info('CMIP6', 'Amon', 'cl') nc_path = test_data_path / 'common_cl_hybrid_height.nc' hybrid_height_coord_fix_metadata(nc_path, 'cl', ClFixHybridHeightCoord(vardef)) + + +@pytest.fixture +def siconc_cubes(): + """Sample cube.""" + time_coord = iris.coords.DimCoord([0.0], standard_name='time', + var_name='time', + units='days since 6543-2-1') + lat_coord = iris.coords.DimCoord([-30.0], standard_name='latitude', + var_name='lat', units='degrees_north') + lon_coord = iris.coords.DimCoord([30.0], standard_name='longitude', + var_name='lon', units='degrees_east') + coords_specs = [(time_coord, 0), (lat_coord, 1), (lon_coord, 2)] + cube = iris.cube.Cube([[[22.0]]], standard_name='sea_ice_area_fraction', + var_name='siconc', units='%', + dim_coords_and_dims=coords_specs) + return iris.cube.CubeList([cube]) + + +def test_siconc_fix_metadata(siconc_cubes): + """Test ``fix_metadata`` for ``siconc``.""" + assert len(siconc_cubes) == 1 + siconc_cube = siconc_cubes[0] + assert siconc_cube.var_name == "siconc" + + # Extract siconc cube + siconc_cube = siconc_cubes.extract_cube('sea_ice_area_fraction') + assert not siconc_cube.coords('typesi') + + # Apply fix + vardef = get_var_info('CMIP6', 'SImon', 'siconc') + fix = SiconcFixScalarCoord(vardef) + fixed_cubes = fix.fix_metadata(siconc_cubes) + assert len(fixed_cubes) == 1 + fixed_siconc_cube = fixed_cubes.extract_cube( + 'sea_ice_area_fraction') + fixed_typesi_coord = fixed_siconc_cube.coord('area_type') + assert fixed_typesi_coord.points is not None + assert fixed_typesi_coord.bounds is None + np.testing.assert_equal(fixed_typesi_coord.points, + ['sea_ice']) + np.testing.assert_equal(fixed_typesi_coord.units, + Unit('No unit')) + + +def get_tos_cubes(wrong_ij_names=False, ij_bounds=False): + """Cubes containing tos variable.""" + if wrong_ij_names: + j_var_name = 'lat' + j_long_name = 'latitude' + i_var_name = 'lon' + i_long_name = 'longitude' + else: + j_var_name = 'j' + j_long_name = 'cell index along second dimension' + i_var_name = 'i' + i_long_name = 'cell index along first dimension' + if ij_bounds: + j_bounds = [[10.0, 30.0], [30.0, 50.0]] + i_bounds = [[5.0, 15.0], [15.0, 25.0], [25.0, 35.0]] + else: + j_bounds = None + i_bounds = None + j_coord = iris.coords.DimCoord( + [20.0, 40.0], + bounds=j_bounds, + var_name=j_var_name, + long_name=j_long_name, + ) + i_coord = iris.coords.DimCoord( + [10.0, 20.0, 30.0], + bounds=i_bounds, + var_name=i_var_name, + long_name=i_long_name, + ) + lat_coord = iris.coords.AuxCoord( + [[-40.0, -20.0, 0.0], [-20.0, 0.0, 20.0]], + var_name='lat', + standard_name='latitude', + units='degrees_north', + ) + lon_coord = iris.coords.AuxCoord( + [[100.0, 140.0, 180.0], [80.0, 100.0, 120.0]], + var_name='lon', + standard_name='longitude', + units='degrees_east', + ) + time_coord = iris.coords.DimCoord( + 1.0, + bounds=[0.0, 2.0], + var_name='time', + standard_name='time', + long_name='time', + units='days since 1950-01-01', + ) + + # Create tos variable cube + cube = iris.cube.Cube( + np.full((1, 2, 3), 300.0), + var_name='tos', + long_name='sea_surface_temperature', + units='K', + dim_coords_and_dims=[(time_coord, 0), (j_coord, 1), (i_coord, 2)], + aux_coords_and_dims=[(lat_coord, (1, 2)), (lon_coord, (1, 2))], + ) + + # Create empty (dummy) cube + empty_cube = iris.cube.Cube(0.0) + return iris.cube.CubeList([cube, empty_cube]) + + +@pytest.fixture +def tos_cubes_wrong_ij_names(): + """Cubes with wrong ij names.""" + return get_tos_cubes(wrong_ij_names=True, ij_bounds=True) + + +def test_ocean_fix_grid_wrong_ij_names(tos_cubes_wrong_ij_names): + """Test ``fix_metadata`` with cubes with wrong ij names.""" + cube_in = tos_cubes_wrong_ij_names.extract_cube('sea_surface_temperature') + assert len(cube_in.coords('latitude')) == 2 + assert len(cube_in.coords('longitude')) == 2 + assert cube_in.coord('latitude', dimensions=1).bounds is not None + assert cube_in.coord('longitude', dimensions=2).bounds is not None + assert cube_in.coord('latitude', dimensions=(1, 2)).bounds is None + assert cube_in.coord('longitude', dimensions=(1, 2)).bounds is None + + # Apply fix + vardef = get_var_info('CMIP6', 'Omon', 'tos') + fix = OceanFixGrid(vardef) + fixed_cubes = fix.fix_metadata(tos_cubes_wrong_ij_names) + assert len(fixed_cubes) == 1 + fixed_cube = fixed_cubes.extract_cube('sea_surface_temperature') + assert fixed_cube is cube_in + + # Check ij names + i_coord = fixed_cube.coord('cell index along first dimension') + j_coord = fixed_cube.coord('cell index along second dimension') + assert i_coord.var_name == 'i' + assert i_coord.standard_name is None + assert i_coord.long_name == 'cell index along first dimension' + assert i_coord.units == '1' + assert i_coord.circular is False + assert j_coord.var_name == 'j' + assert j_coord.standard_name is None + assert j_coord.long_name == 'cell index along second dimension' + assert j_coord.units == '1' + + # Check ij points and bounds + np.testing.assert_allclose(i_coord.points, [0, 1, 2]) + np.testing.assert_allclose(i_coord.bounds, + [[-0.5, 0.5], [0.5, 1.5], [1.5, 2.5]]) + np.testing.assert_allclose(j_coord.points, [0, 1]) + np.testing.assert_allclose(j_coord.bounds, [[-0.5, 0.5], [0.5, 1.5]]) + + # Check bounds of latitude and longitude + assert len(fixed_cube.coords('latitude')) == 1 + assert len(fixed_cube.coords('longitude')) == 1 + assert fixed_cube.coord('latitude').bounds is not None + assert fixed_cube.coord('longitude').bounds is not None + latitude_bounds = np.array([[[-40, -33.75, -23.75, -30.0], + [-33.75, -6.25, 3.75, -23.75], + [-6.25, -1.02418074021670e-14, 10.0, 3.75]], + [[-30.0, -23.75, -13.75, -20.0], + [-23.75, 3.75, 13.75, -13.75], + [3.75, 10.0, 20.0, 13.75]]]) + np.testing.assert_allclose(fixed_cube.coord('latitude').bounds, + latitude_bounds) + longitude_bounds = np.array([[[140.625, 99.375, 99.375, 140.625], + [99.375, 140.625, 140.625, 99.375], + [140.625, 99.375, 99.375, 140.625]], + [[140.625, 99.375, 99.375, 140.625], + [99.375, 140.625, 140.625, 99.375], + [140.625, 99.375, 99.375, 140.625]]]) + np.testing.assert_allclose(fixed_cube.coord('longitude').bounds, + longitude_bounds) + + +@pytest.fixture +def tos_cubes_no_ij_bounds(): + """Cubes with no ij bounds.""" + return get_tos_cubes(wrong_ij_names=False, ij_bounds=False) + + +def test_ocean_fix_grid_no_ij_bounds(tos_cubes_no_ij_bounds): + """Test ``fix_metadata`` with cubes with no ij bounds.""" + cube_in = tos_cubes_no_ij_bounds.extract_cube('sea_surface_temperature') + assert len(cube_in.coords('latitude')) == 1 + assert len(cube_in.coords('longitude')) == 1 + assert cube_in.coord('latitude').bounds is None + assert cube_in.coord('longitude').bounds is None + assert cube_in.coord('cell index along first dimension').var_name == 'i' + assert cube_in.coord('cell index along second dimension').var_name == 'j' + assert cube_in.coord('cell index along first dimension').bounds is None + assert cube_in.coord('cell index along second dimension').bounds is None + + # Apply fix + vardef = get_var_info('CMIP6', 'Omon', 'tos') + fix = OceanFixGrid(vardef) + fixed_cubes = fix.fix_metadata(tos_cubes_no_ij_bounds) + assert len(fixed_cubes) == 1 + fixed_cube = fixed_cubes.extract_cube('sea_surface_temperature') + assert fixed_cube is cube_in + + # Check ij names + i_coord = fixed_cube.coord('cell index along first dimension') + j_coord = fixed_cube.coord('cell index along second dimension') + assert i_coord.var_name == 'i' + assert i_coord.standard_name is None + assert i_coord.long_name == 'cell index along first dimension' + assert i_coord.units == '1' + assert i_coord.circular is False + assert j_coord.var_name == 'j' + assert j_coord.standard_name is None + assert j_coord.long_name == 'cell index along second dimension' + assert j_coord.units == '1' + + # Check ij points and bounds + np.testing.assert_allclose(i_coord.points, [0, 1, 2]) + np.testing.assert_allclose(i_coord.bounds, + [[-0.5, 0.5], [0.5, 1.5], [1.5, 2.5]]) + np.testing.assert_allclose(j_coord.points, [0, 1]) + np.testing.assert_allclose(j_coord.bounds, [[-0.5, 0.5], [0.5, 1.5]]) + + # Check bounds of latitude and longitude + assert len(fixed_cube.coords('latitude')) == 1 + assert len(fixed_cube.coords('longitude')) == 1 + assert fixed_cube.coord('latitude').bounds is not None + assert fixed_cube.coord('longitude').bounds is not None + latitude_bounds = np.array([[[-40, -33.75, -23.75, -30.0], + [-33.75, -6.25, 3.75, -23.75], + [-6.25, -1.02418074021670e-14, 10.0, 3.75]], + [[-30.0, -23.75, -13.75, -20.0], + [-23.75, 3.75, 13.75, -13.75], + [3.75, 10.0, 20.0, 13.75]]]) + np.testing.assert_allclose(fixed_cube.coord('latitude').bounds, + latitude_bounds) + longitude_bounds = np.array([[[140.625, 99.375, 99.375, 140.625], + [99.375, 140.625, 140.625, 99.375], + [140.625, 99.375, 99.375, 140.625]], + [[140.625, 99.375, 99.375, 140.625], + [99.375, 140.625, 140.625, 99.375], + [140.625, 99.375, 99.375, 140.625]]]) + np.testing.assert_allclose(fixed_cube.coord('longitude').bounds, + longitude_bounds) diff --git a/tests/integration/cmor/_fixes/test_fix.py b/tests/integration/cmor/_fixes/test_fix.py index 0199911504..5ffc4ca868 100644 --- a/tests/integration/cmor/_fixes/test_fix.py +++ b/tests/integration/cmor/_fixes/test_fix.py @@ -1,3 +1,5 @@ +"""Integration tests for fixes.""" + import os import shutil import tempfile @@ -6,35 +8,35 @@ import pytest from iris.cube import Cube +from esmvalcore.cmor._fixes.cmip5.bnu_esm import Ch4 +from esmvalcore.cmor._fixes.cmip5.canesm2 import FgCo2 +from esmvalcore.cmor._fixes.cmip5.cesm1_bgc import Gpp +from esmvalcore.cmor._fixes.cmip6.cesm2 import Omon, Tos from esmvalcore.cmor.fix import Fix class TestFix(unittest.TestCase): def setUp(self): - """Set up temp folder""" + """Set up temp folder.""" self.temp_folder = tempfile.mkdtemp() def tearDown(self): - """Remove temp folder""" + """Remove temp folder.""" shutil.rmtree(self.temp_folder) def test_get_fix(self): - from esmvalcore.cmor._fixes.cmip5.canesm2 import FgCo2 self.assertListEqual( Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgco2'), [FgCo2(None)]) def test_get_fix_case_insensitive(self): - from esmvalcore.cmor._fixes.cmip5.canesm2 import FgCo2 self.assertListEqual( Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgCo2'), [FgCo2(None)]) def test_get_fixes_with_replace(self): - from esmvalcore.cmor._fixes.cmip5.bnu_esm import Ch4 self.assertListEqual(Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'ch4'), [Ch4(None)]) def test_get_fixes_with_generic(self): - from esmvalcore.cmor._fixes.cmip5.cesm1_bgc import Gpp self.assertListEqual( Fix.get_fixes('CMIP5', 'CESM1-BGC', 'Amon', 'gpp'), [Gpp(None)]) @@ -50,6 +52,19 @@ def test_get_fix_no_var(self): self.assertListEqual( Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'BAD_VAR'), []) + def test_get_fix_only_mip(self): + self.assertListEqual( + Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'thetao'), [Omon(None)]) + + def test_get_fix_only_mip_case_insensitive(self): + self.assertListEqual( + Fix.get_fixes('CMIP6', 'CESM2', 'omOn', 'thetao'), [Omon(None)]) + + def test_get_fix_mip_and_var(self): + self.assertListEqual( + Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'tos'), + [Tos(None), Omon(None)]) + def test_fix_metadata(self): cube = Cube([0]) reference = Cube([0]) diff --git a/tests/integration/cmor/_fixes/test_shared.py b/tests/integration/cmor/_fixes/test_shared.py index 7930d4e90e..69e9a0f092 100644 --- a/tests/integration/cmor/_fixes/test_shared.py +++ b/tests/integration/cmor/_fixes/test_shared.py @@ -12,9 +12,11 @@ add_scalar_height_coord, add_scalar_typeland_coord, add_scalar_typesea_coord, + add_scalar_typesi_coord, add_sigma_factory, cube_to_aux_coord, fix_bounds, + fix_ocean_depth_coord, get_altitude_to_pressure_func, get_bounds_cube, get_pressure_to_altitude_func, @@ -23,6 +25,7 @@ from esmvalcore.iris_helpers import var_name_constraint +@pytest.mark.sequential def test_altitude_to_pressure_func(): """Test altitude to pressure function.""" func = get_altitude_to_pressure_func() @@ -37,6 +40,7 @@ def test_altitude_to_pressure_func(): [101325.0, 100129.0]) +@pytest.mark.sequential def test_pressure_to_altitude_func(): """Test pressure to altitude function.""" func = get_pressure_to_altitude_func() @@ -65,6 +69,7 @@ def test_pressure_to_altitude_func(): ] +@pytest.mark.sequential @pytest.mark.parametrize('coord_dict,output', TEST_ADD_AUX_COORDS_FROM_CUBES) def test_add_aux_coords_from_cubes(coord_dict, output): """Test extraction of auxiliary coordinates from cubes.""" @@ -130,6 +135,7 @@ def test_add_aux_coords_from_cubes(coord_dict, output): ] +@pytest.mark.sequential @pytest.mark.parametrize('cube,output', TEST_ADD_PLEV_FROM_ALTITUDE) def test_add_plev_from_altitude(cube, output): """Test adding of pressure level coordinate.""" @@ -167,6 +173,7 @@ def test_add_plev_from_altitude(cube, output): ] +@pytest.mark.sequential @pytest.mark.parametrize('cube,output', TEST_ADD_ALTITUDE_FROM_PLEV) def test_add_altitude_from_plev(cube, output): """Test adding of altitude coordinate.""" @@ -211,6 +218,7 @@ def test_add_altitude_from_plev(cube, output): ] +@pytest.mark.sequential @pytest.mark.parametrize('cube_in,depth', TEST_ADD_SCALAR_COORD) def test_add_scalar_depth_coord(cube_in, depth): """Test adding of scalar depth coordinate.""" @@ -238,6 +246,7 @@ def test_add_scalar_depth_coord(cube_in, depth): assert coord == depth_coord +@pytest.mark.sequential @pytest.mark.parametrize('cube_in,height', TEST_ADD_SCALAR_COORD) def test_add_scalar_height_coord(cube_in, height): """Test adding of scalar height coordinate.""" @@ -265,6 +274,7 @@ def test_add_scalar_height_coord(cube_in, height): assert coord == height_coord +@pytest.mark.sequential @pytest.mark.parametrize('cube_in,typeland', TEST_ADD_SCALAR_COORD) def test_add_scalar_typeland_coord(cube_in, typeland): """Test adding of scalar typeland coordinate.""" @@ -291,6 +301,7 @@ def test_add_scalar_typeland_coord(cube_in, typeland): assert coord == typeland_coord +@pytest.mark.sequential @pytest.mark.parametrize('cube_in,typesea', TEST_ADD_SCALAR_COORD) def test_add_scalar_typesea_coord(cube_in, typesea): """Test adding of scalar typesea coordinate.""" @@ -317,6 +328,33 @@ def test_add_scalar_typesea_coord(cube_in, typesea): assert coord == typesea_coord +@pytest.mark.sequential +@pytest.mark.parametrize('cube_in,typesi', TEST_ADD_SCALAR_COORD) +def test_add_scalar_typesi_coord(cube_in, typesi): + """Test adding of scalar typesi coordinate.""" + cube_in = cube_in.copy() + if typesi is None: + typesi = 'sea_ice' + typesi_coord = iris.coords.AuxCoord(typesi, + var_name='type', + standard_name='area_type', + long_name='Sea Ice area type', + units=Unit('no unit')) + with pytest.raises(iris.exceptions.CoordinateNotFoundError): + cube_in.coord('area_type') + if typesi == 'sea_ice': + cube_out = add_scalar_typesi_coord(cube_in) + else: + cube_out = add_scalar_typesi_coord(cube_in, typesi) + assert cube_out is cube_in + coord = cube_in.coord('area_type') + assert coord == typesi_coord + cube_out_2 = add_scalar_typesi_coord(cube_out) + assert cube_out_2 is cube_out + coord = cube_in.coord('area_type') + assert coord == typesi_coord + + PS_COORD = iris.coords.AuxCoord([[[101000.0]]], var_name='ps', units='Pa') PTOP_COORD = iris.coords.AuxCoord(1000.0, var_name='ptop', units='Pa') LEV_COORD = iris.coords.AuxCoord([0.5], bounds=[[0.2, 0.8]], var_name='lev', @@ -337,6 +375,7 @@ def test_add_scalar_typesea_coord(cube_in, typesea): ] +@pytest.mark.sequential @pytest.mark.parametrize('cube,output', TEST_ADD_SIGMA_FACTORY) def test_add_sigma_factory(cube, output): """Test adding of factory for ``atmosphere_sigma_coordinate``.""" @@ -353,6 +392,7 @@ def test_add_sigma_factory(cube, output): assert air_pressure_coord == output +@pytest.mark.sequential def test_cube_to_aux_coord(): """Test converting cube to auxiliary coordinate.""" cube = iris.cube.Cube( @@ -380,6 +420,7 @@ def test_cube_to_aux_coord(): ] +@pytest.mark.sequential @pytest.mark.parametrize('coord_name,output', TEST_GET_BOUNDS_CUBE) def test_get_bounds_cube(coord_name, output): """Test retrieving of bounds cube from list of cubes.""" @@ -417,6 +458,7 @@ def test_get_bounds_cube(coord_name, output): ] +@pytest.mark.sequential @pytest.mark.parametrize('var_names,output', TEST_FIX_BOUNDS) def test_fix_bounds(var_names, output): """Test retrieving of bounds cube from list of cubes.""" @@ -455,6 +497,7 @@ def test_fix_bounds(var_names, output): ] +@pytest.mark.sequential @pytest.mark.parametrize('cubes_in,decimals,out', TEST_ROUND) def test_round_coordinate(cubes_in, decimals, out): """Test rounding of coordinates.""" @@ -469,6 +512,7 @@ def test_round_coordinate(cubes_in, decimals, out): assert coords[0] == out[idx] +@pytest.mark.sequential def test_round_coordinates_single_coord(): """Test rounding of specified coordinate.""" coords, bounds = [10.0001], [[9.0001, 11.0001]] @@ -485,3 +529,18 @@ def test_round_coordinates_single_coord(): assert cubes[0].coord('longitude') is out[0].coord('longitude') np.testing.assert_allclose(out[0].coord('latitude').points, [10]) np.testing.assert_allclose(out[0].coord('latitude').bounds, [[9, 11]]) + + +def test_fix_ocean_depth_coord(): + """Test `fix_ocean_depth_coord`.""" + z_coord = iris.coords.DimCoord(0.0, var_name='alt', + attributes={'positive': 'up'}) + cube = iris.cube.Cube([0.0], var_name='x', + dim_coords_and_dims=[(z_coord, 0)]) + fix_ocean_depth_coord(cube) + depth_coord = cube.coord('depth') + assert depth_coord.standard_name == 'depth' + assert depth_coord.var_name == 'lev' + assert depth_coord.units == 'm' + assert depth_coord.long_name == 'ocean depth coordinate' + assert depth_coord.attributes == {'positive': 'down'} diff --git a/tests/integration/cmor/test_table.py b/tests/integration/cmor/test_table.py index 0580fad4dc..568dd6aaba 100644 --- a/tests/integration/cmor/test_table.py +++ b/tests/integration/cmor/test_table.py @@ -381,3 +381,12 @@ def test_get_variable_tasconf95(self): self.assertEqual(var.long_name, 'Near-Surface Air Temperature Uncertainty Range') self.assertEqual(var.units, 'K') + + def test_get_variable_tasaga(self): + """Get tas variable.""" + CustomInfo() + var = self.variables_info.get_variable('Amon', 'tasaga') + self.assertEqual(var.short_name, 'tasaga') + self.assertEqual(var.long_name, + 'Global-mean Near-Surface Air Temperature Anomaly') + self.assertEqual(var.units, 'K') diff --git a/tests/integration/data_finder.yml b/tests/integration/data_finder.yml index 1677e6ff32..9ce5ea7ce4 100644 --- a/tests/integration/data_finder.yml +++ b/tests/integration/data_finder.yml @@ -249,82 +249,438 @@ get_input_filelist: found_files: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc + # Test other projects + + - drs: DKRZ + variable: + variable_group: test + short_name: ta + original_short_name: ta + dataset: HadGEM3-GC31-LL + activity: CMIP + project: CMIP6 + cmor_table: CMIP6 + institute: [MOHC, NERC] + frequency: mon + modeling_realm: [atmos] + mip: Amon + exp: historical + grid: gn + ensemble: r1i1p1f1 + start_year: 1999 + end_year: 2000 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_190001-194912.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_190001-194912.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_195001-199912.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_200001-201412.nc + dirs: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ + file_patterns: + - ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc + found_files: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_195001-199912.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_200001-201412.nc + + - drs: DKRZ + variable: + variable_group: test + short_name: ta + original_short_name: ta + dataset: HADGEM1 + project: CMIP3 + cmor_table: CMIP3 + institute: [UKMO] + frequency: mon + modeling_realm: [atmos] + mip: Amon + exp: historical + ensemble: r1i1p1 + start_year: 1999 + end_year: 2000 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - historical/atmos/mon/tas/HADGEM1/r1i1p1/tas_HADGEM1_190001-194912.nc + - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_190001-194912.nc + - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_195001-199912.nc + - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_200001-200112.nc + dirs: + - historical/atmos/mon/ta/HADGEM1/r1i1p1 + file_patterns: + - ta_*.nc + found_files: + - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_195001-199912.nc + - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_200001-200112.nc -get_input_fx_filelist: - drs: default variable: - <<: *variable - fx_files: - - areacella - - areacello - - basin - - deptho - - orog - - sftlf - - sftof - - thkcello - - volcello + variable_group: test + short_name: tas + original_short_name: tas + dataset: ERA-Interim + project: OBS + cmor_table: CMIP5 + frequency: mon + mip: Amon + tier: 3 + type: reanaly + version: 42 + start_year: 1999 + end_year: 2000 + diagnostic: test_diag + preprocessor: test_preproc available_files: - - sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - - areacella_fx_HadGEM2-ES_historical_r0i0p0.nc - - areacello_fx_HadGEM2-ES_historical_r0i0p0.nc - - basin_fx_HadGEM2-ES_historical_r0i0p0.nc - - deptho_fx_HadGEM2-ES_historical_r0i0p0.nc - - orog_fx_HadGEM2-ES_historical_r0i0p0.nc - - sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc - - sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - - thkcello_fx_HadGEM2-ES_historical_r0i0p0.nc - - volcello_fx_HadGEM2-ES_historical_r0i0p0.nc + - Tier2/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc + - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_pr_197901-199912.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc + - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc + - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_200001-201012.nc + dirs: + - Tier3/ERA-Interim + file_patterns: + - OBS_ERA-Interim_reanaly_42_Amon_tas[_.]*nc found_files: - areacella: areacella_fx_HadGEM2-ES_historical_r0i0p0.nc - areacello: areacello_fx_HadGEM2-ES_historical_r0i0p0.nc - basin: basin_fx_HadGEM2-ES_historical_r0i0p0.nc - deptho: deptho_fx_HadGEM2-ES_historical_r0i0p0.nc - orog: orog_fx_HadGEM2-ES_historical_r0i0p0.nc - sftlf: sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc - sftof: sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - thkcello: thkcello_fx_HadGEM2-ES_historical_r0i0p0.nc - volcello: volcello_fx_HadGEM2-ES_historical_r0i0p0.nc + - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc + - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_200001-201012.nc - drs: default variable: - <<: *variable - fx_files: - - sftof + variable_group: test + short_name: tas + original_short_name: tas + dataset: ERA-Interim + project: OBS6 + cmor_table: CMIP6 + frequency: mon + mip: Amon + tier: 3 + type: reanaly + version: 42 + start_year: 1999 + end_year: 2000 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - Tier2/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_pr_197901-199912.nc + - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_200001-201012.nc + dirs: + - Tier3/ERA-Interim + file_patterns: + - OBS6_ERA-Interim_reanaly_42_Amon_tas[_.]*nc found_files: - sftof: null + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_200001-201012.nc - - drs: BADC + # Test fx files + + - drs: default variable: - <<: *variable - fx_files: - - sftof + variable_group: test + short_name: areacella + original_short_name: areacella + dataset: HadGEM2-ES + project: CMIP5 + cmor_table: CMIP5 + institute: [INPE, MOHC] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: r1i1p1 + diagnostic: test_diag + preprocessor: test_preproc available_files: - - MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20120215/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - - MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20130612/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - available_symlinks: - - link_name: MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/latest - target: v20130612 + - areacella_fx_HadGEM2-ES_historical_r1i1p1.nc + - areacella_fx_HadGEM2-ES_historical_r0i0p0.nc + dirs: + - '' + file_patterns: + - areacella_fx_HadGEM2-ES_historical_r0i0p0*.nc found_files: - sftof: MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/latest/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + - areacella_fx_HadGEM2-ES_historical_r0i0p0.nc - drs: DKRZ variable: - <<: *variable - fx_files: - - sftof + variable_group: test + short_name: sftlf + original_short_name: sftlf + dataset: HadGEM2-ES + project: CMIP5 + cmor_table: CMIP5 + institute: [INPE, MOHC] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: r1i1p1 + diagnostic: test_diag + preprocessor: test_preproc available_files: - - MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20120215/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - - MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20130612/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r1i1p1.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc + dirs: + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf + file_patterns: + - sftlf_fx_HadGEM2-ES_historical_r0i0p0*.nc found_files: - sftof: MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20130612/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc - - drs: ETHZ + - drs: DKRZ variable: - <<: *variable - fx_files: - - sftof + variable_group: test + short_name: orog + original_short_name: orog + dataset: HadGEM2-ES + project: CMIP5 + cmor_table: CMIP5 + institute: [INPE, MOHC] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: r1i1p1 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc + - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/areacella/areacella_fx_HadGEM2-ES_historical_r0i0p0.nc + dirs: [] + file_patterns: + - orog_fx_HadGEM2-ES_historical_r0i0p0*.nc + found_files: [] + + - drs: DKRZ + variable: + variable_group: test + short_name: areacello + original_short_name: areacello + dataset: HadGEM3-GC31-LL + activity: CMIP + project: CMIP6 + cmor_table: CMIP6 + institute: [MOHC, NERC] + frequency: fx + modeling_realm: [ocean] + mip: Ofx + exp: historical + grid: gn + ensemble: r1i1p1f1 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f2/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f2_gn.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/areacello_Omon-GC31-LL_historical_r1i1p1f1_gn_199901-200012.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc + dirs: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/ + file_patterns: + - areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc + found_files: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc + + - drs: DKRZ + variable: + variable_group: test + short_name: areacello + original_short_name: areacello + dataset: HadGEM3-GC31-LL + activity: CMIP + project: CMIP6 + cmor_table: CMIP6 + institute: [MOHC, NERC] + frequency: mon + modeling_realm: [ocean] + mip: Omon + exp: historical + grid: gn + ensemble: r1i1p1f1 + start_year: 2000 + end_year: 2000 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f2/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f2_gn.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/areacello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_199901-200012.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc + dirs: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/ + file_patterns: + - areacello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc + found_files: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/areacello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_199901-200012.nc + + - drs: DKRZ + variable: + variable_group: test + short_name: volcello + original_short_name: volcello + dataset: HadGEM3-GC31-LL + activity: CMIP + project: CMIP6 + cmor_table: CMIP6 + institute: [MOHC, NERC] + frequency: mon + modeling_realm: [ocean] + mip: Omon + exp: historical + grid: gn + ensemble: r1i1p1f1 + start_year: 2000 + end_year: 2000 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f2/Ofx/volcello/gn/v20200101/volcello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f2_gn.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/volcello/gn/v20200101/this_is_a_wrong_file.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/volcello/gn/v20200101/volcello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc + dirs: + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/volcello/gn/v20200101/ + file_patterns: + - volcello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc + found_files: [] + + - drs: DKRZ + variable: + variable_group: test + short_name: volcello + original_short_name: volcello + dataset: HadGEM3-GC31-LL + activity: CMIP + project: CMIP6 + cmor_table: CMIP6 + institute: [MOHC, NERC] + frequency: fx + modeling_realm: [ocean] + mip: Ofx + exp: historical + grid: gn + ensemble: r1i1p1f1 + start_year: 2000 + end_year: 2000 + diagnostic: test_diag + preprocessor: test_preproc available_files: - - historical/fx/sftof/HadGEM2-ES/r0i0p0/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + - CMIP/MOHC/HadGEM3-GC31-LL/historical/r0i0p0/Ofx/volcello/gn/v20200101/volcello_Ofx_HadGEM3-GC31-LL_historical_r0i0p0_gn.nc + dirs: [] + file_patterns: + - volcello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc + found_files: [] + + - drs: DKRZ + variable: + variable_group: test + short_name: areacella + original_short_name: areacella + dataset: HADGEM1 + project: CMIP3 + cmor_table: CMIP3 + institute: [UKMO] + frequency: fx + modeling_realm: [atmos] + mip: fx + exp: historical + ensemble: r1i1p1 + start_year: 1999 + end_year: 2000 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - historical/atmos/fx/areacella/HADGEM1/r0i0p0/areacella_HADGEM1.nc + - historical/atmos/fx/areacella/HADGEM1/r1i1p1/areacella_HADGEM1.nc + dirs: + - historical/atmos/fx/areacella/HADGEM1/r1i1p1 + file_patterns: + - areacella_*.nc found_files: - sftof: historical/fx/sftof/HadGEM2-ES/r0i0p0/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + - historical/atmos/fx/areacella/HADGEM1/r1i1p1/areacella_HADGEM1.nc + + - drs: default + variable: + variable_group: test + short_name: basin + original_short_name: basin + dataset: ERA-Interim + project: OBS + cmor_table: CMIP5 + frequency: fx + mip: fx + tier: 3 + type: reanaly + version: 42 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - Tier2/ERA-Interim/OBS_ERA-Interim_reanaly_42_fx_areacello.nc + - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_fx_basin.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_fx_basin.nc + dirs: + - Tier3/ERA-Interim + file_patterns: + - OBS_ERA-Interim_reanaly_42_fx_basin[_.]*nc + found_files: + - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_fx_basin.nc + + - drs: default + variable: + variable_group: test + short_name: deptho + original_short_name: deptho + dataset: ERA-Interim + project: OBS6 + cmor_table: CMIP6 + frequency: mon + mip: Omon + tier: 3 + type: reanaly + version: 42 + start_year: 1995 + end_year: 1996 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - Tier2/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_areacello.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_basin.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_deptho.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199001-199912.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199901-200012.nc + dirs: + - Tier3/ERA-Interim + file_patterns: + - OBS6_ERA-Interim_reanaly_42_Omon_deptho[_.]*nc + found_files: + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199001-199912.nc + + - drs: default + variable: + variable_group: test + short_name: deptho + original_short_name: deptho + dataset: ERA-Interim + project: OBS6 + cmor_table: CMIP6 + frequency: mon + mip: Omon + tier: 3 + type: reanaly + version: 42 + start_year: 2050 + end_year: 2100 + diagnostic: test_diag + preprocessor: test_preproc + available_files: + - Tier2/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_areacello.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_basin.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_deptho.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199001-199912.nc + - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199901-200012.nc + dirs: + - Tier3/ERA-Interim + file_patterns: + - OBS6_ERA-Interim_reanaly_42_Omon_deptho[_.]*nc + found_files: [] diff --git a/tests/integration/preprocessor/_ancillary_vars/__init__.py b/tests/integration/preprocessor/_ancillary_vars/__init__.py new file mode 100644 index 0000000000..88b606fc48 --- /dev/null +++ b/tests/integration/preprocessor/_ancillary_vars/__init__.py @@ -0,0 +1,5 @@ +""" +Test _ancillary_vars.py + +Integration tests for the esmvalcore.preprocessor._ancillary_vars module +""" diff --git a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py new file mode 100644 index 0000000000..7afd2b5be1 --- /dev/null +++ b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py @@ -0,0 +1,257 @@ +""" +Test add_fx_variables. + +Integration tests for the +:func:`esmvalcore.preprocessor._ancillary_vars` module. + +""" +import logging +import iris +import numpy as np +import pytest + +from esmvalcore.cmor.check import CheckLevels +from esmvalcore.preprocessor._ancillary_vars import (_is_fx_broadcastable, + add_fx_variables, + add_ancillary_variable, + add_cell_measure, + remove_fx_variables) + +logger = logging.getLogger(__name__) + +SHAPES_TO_BROADCAST = [ + ((), (1, ), True), + ((), (10, 10), True), + ((1, ), (10, ), True), + ((1, ), (10, 10), True), + ((2, ), (10, ), False), + ((10, ), (), False), + ((10, ), (1, ), False), + ((10, ), (10, ), True), + ((10, ), (10, 10), True), + ((10, ), (7, 1), False), + ((10, ), (10, 7), False), + ((10, ), (7, 1, 10), True), + ((10, ), (7, 1, 1), False), + ((10, ), (7, 1, 7), False), + ((10, ), (7, 10, 7), False), + ((10, 1), (1, 1), False), + ((10, 1), (1, 100), False), + ((10, 1), (10, 7), True), + ((10, 12), (10, 1), False), + ((10, 1), (10, 12), True), + ((10, 12), (), False), + ((), (10, 12), True), + ((10, 12), (1, ), False), + ((1, ), (10, 12), True), + ((10, 12), (12, ), False), + ((10, 12), (1, 1), False), + ((1, 1), (10, 12), True), + ((10, 12), (1, 12), False), + ((1, 12), (10, 12), True), + ((10, 12), (10, 10, 1), False), + ((10, 12), (10, 12, 1), False), + ((10, 12), (10, 12, 12), False), + ((10, 12), (10, 10, 12), True)] + + +@pytest.mark.parametrize('shape_1,shape_2,out', SHAPES_TO_BROADCAST) +def test_shape_is_broadcastable(shape_1, shape_2, out): + """Test check if two shapes are broadcastable.""" + fx_cube = iris.cube.Cube(np.ones(shape_1)) + cube = iris.cube.Cube(np.ones(shape_2)) + is_broadcastable = _is_fx_broadcastable(fx_cube, cube) + assert is_broadcastable == out + + +class Test: + """Test class.""" + @pytest.fixture(autouse=True) + def setUp(self): + """Assemble a stock cube.""" + fx_area_data = np.ones((3, 3)) + fx_volume_data = np.ones((3, 3, 3)) + self.new_cube_data = np.empty((3, 3)) + self.new_cube_data[:] = 200. + self.new_cube_3D_data = np.empty((3, 3, 3)) + self.new_cube_3D_data[:] = 200. + crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) + self.lons = iris.coords.DimCoord([0, 1.5, 3], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_east', + coord_system=crd_sys) + self.lats = iris.coords.DimCoord([0, 1.5, 3], + standard_name='latitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_north', + coord_system=crd_sys) + self.depth = iris.coords.DimCoord([0, 1.5, 3], + standard_name='depth', + bounds=[[0, 1], [1, 2], [2, 3]], + units='m', + long_name='ocean depth coordinate') + self.monthly_times = iris.coords.DimCoord( + [15.5, 45, 74.5, 105, 135.5, 166, + 196.5, 227.5, 258, 288.5, 319, 349.5], + standard_name='time', + bounds=[[0, 31], [31, 59], [59, 90], + [90, 120], [120, 151], [151, 181], + [181, 212], [212, 243], [243, 273], + [273, 304], [304, 334], [334, 365]], + units='days since 1950-01-01 00:00:00') + self.yearly_times = iris.coords.DimCoord( + [182.5, 547.5], + standard_name='time', + bounds=[[0, 365], [365, 730]], + units='days since 1950-01-01 00:00') + self.coords_spec = [(self.lats, 0), (self.lons, 1)] + self.fx_area = iris.cube.Cube(fx_area_data, + dim_coords_and_dims=self.coords_spec) + self.fx_volume = iris.cube.Cube(fx_volume_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2) + ]) + + def test_add_cell_measure_area(self, tmp_path): + """Test add area fx variables as cell measures.""" + fx_vars = { + 'areacella': { + 'short_name': 'areacella', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx'}, + 'areacello': { + 'short_name': 'areacello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Ofx', + 'frequency': 'fx' + } + } + for fx_var in fx_vars: + self.fx_area.var_name = fx_var + self.fx_area.standard_name = 'cell_area' + self.fx_area.units = 'm2' + fx_file = str(tmp_path / f'{fx_var}.nc') + fx_vars[fx_var].update({'filename': fx_file}) + iris.save(self.fx_area, fx_file) + cube = iris.cube.Cube(self.new_cube_data, + dim_coords_and_dims=self.coords_spec) + cube = add_fx_variables( + cube, {fx_var: fx_vars[fx_var]}, CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_area.standard_name) is not None + + def test_add_cell_measure_volume(self, tmp_path): + """Test add volume as cell measure.""" + fx_vars = { + 'volcello': { + 'short_name': 'volcello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Ofx', + 'frequency': 'fx'} + } + self.fx_volume.var_name = 'volcello' + self.fx_volume.standard_name = 'ocean_volume' + self.fx_volume.units = 'm3' + fx_file = str(tmp_path / 'volcello.nc') + iris.save(self.fx_volume, fx_file) + fx_vars['volcello'].update({'filename': fx_file}) + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) + assert cube.cell_measure(self.fx_volume.standard_name) is not None + + def test_no_cell_measure(self): + """Test no cell measure is added.""" + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[ + (self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + cube = add_fx_variables(cube, {'areacello': None}, CheckLevels.IGNORE) + assert cube.cell_measures() == [] + + def test_add_ancillary_vars(self, tmp_path): + """Test invalid variable is not added as cell measure.""" + self.fx_area.var_name = 'sftlf' + self.fx_area.standard_name = "land_area_fraction" + self.fx_area.units = '%' + fx_file = str(tmp_path / f'{self.fx_area.var_name}.nc') + iris.save(self.fx_area, fx_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': fx_file} + } + cube = iris.cube.Cube(self.new_cube_data, + dim_coords_and_dims=self.coords_spec) + cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) + assert cube.ancillary_variable(self.fx_area.standard_name) is not None + + def test_wrong_shape(self, tmp_path): + """ + Test fx_variable is not added if it's not broadcastable to cube. + """ + volume_data = np.ones((2, 3, 3, 3)) + volume_cube = iris.cube.Cube( + volume_data, + dim_coords_and_dims=[(self.yearly_times, 0), + (self.depth, 1), + (self.lats, 2), + (self.lons, 3)]) + volume_cube.standard_name = 'ocean_volume' + volume_cube.var_name = 'volcello' + volume_cube.units = 'm3' + fx_file = str(tmp_path / f'{volume_cube.var_name}.nc') + iris.save(volume_cube, fx_file) + fx_vars = { + 'volcello': { + 'short_name': 'volcello', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'Oyr', + 'frequency': 'yr', + 'filename': fx_file} + } + data = np.ones((12, 3, 3, 3)) + cube = iris.cube.Cube( + data, + dim_coords_and_dims=[(self.monthly_times, 0), + (self.depth, 1), + (self.lats, 2), + (self.lons, 3)]) + cube.var_name = 'thetao' + cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) + assert cube.cell_measures() == [] + + def test_remove_fx_vars(self): + """Test fx_variables are removed from cube.""" + cube = iris.cube.Cube(self.new_cube_3D_data, + dim_coords_and_dims=[(self.depth, 0), + (self.lats, 1), + (self.lons, 2)]) + self.fx_area.var_name = 'areacella' + self.fx_area.standard_name = 'cell_area' + self.fx_area.units = 'm2' + add_cell_measure(cube, self.fx_area, measure='area') + assert cube.cell_measure(self.fx_area.standard_name) is not None + self.fx_area.var_name = 'sftlf' + self.fx_area.standard_name = "land_area_fraction" + self.fx_area.units = '%' + add_ancillary_variable(cube, self.fx_area) + assert cube.ancillary_variable(self.fx_area.standard_name) is not None + cube = remove_fx_variables(cube) + assert cube.cell_measures() == [] + assert cube.ancillary_variables() == [] diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index f8ef998f90..ce0d9529b1 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -1,7 +1,7 @@ """Integration tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" -import warnings import unittest +import warnings from unittest.mock import call import numpy as np @@ -243,6 +243,12 @@ def test_concatenate(self): np.testing.assert_array_equal( concatenated.coord('time').points, np.array([1, 2, 3, 4, 5, 6])) + def test_concatenate_empty_cubes(self): + """Test concatenation with empty :class:`iris.cube.CubeList`.""" + empty_cubes = CubeList([]) + result = _io.concatenate(empty_cubes) + assert result is empty_cubes + def test_concatenate_noop(self): """Test concatenation of a single cube.""" concatenated = _io.concatenate([self.raw_cubes[0]]) diff --git a/tests/integration/preprocessor/_io/test_save.py b/tests/integration/preprocessor/_io/test_save.py index cc6c98364c..9ccb25efcf 100644 --- a/tests/integration/preprocessor/_io/test_save.py +++ b/tests/integration/preprocessor/_io/test_save.py @@ -8,7 +8,7 @@ import netCDF4 import numpy as np from iris.coords import DimCoord -from iris.cube import Cube +from iris.cube import Cube, CubeList from esmvalcore.preprocessor import save @@ -78,6 +78,13 @@ def test_save_zlib(self): self.assertEqual(sample_filters['complevel'], 4) handler.close() + def test_fail_empty_cubes(self): + """Test save fails if empty cubes is provided.""" + (_, filename) = self._create_sample_cube() + empty_cubes = CubeList([]) + with self.assertRaises(ValueError): + save(empty_cubes, filename) + def test_fail_without_filename(self): """Test save fails if filename is not provided.""" cube, _ = self._create_sample_cube() diff --git a/tests/integration/preprocessor/_mask/test_mask.py b/tests/integration/preprocessor/_mask/test_mask.py index 5ddb30a228..4e2ef513f8 100644 --- a/tests/integration/preprocessor/_mask/test_mask.py +++ b/tests/integration/preprocessor/_mask/test_mask.py @@ -10,8 +10,10 @@ import numpy as np import pytest +from esmvalcore.cmor.check import CheckLevels from esmvalcore.preprocessor import (PreprocessorFile, mask_fillvalues, - mask_landsea, mask_landseaice) + mask_landsea, mask_landseaice, + add_fx_variables) from tests import assert_array_equal @@ -47,58 +49,90 @@ def setUp(self): units='hours') self.coords_spec = [(self.lats, 0), (self.lons, 1)] self.fx_mask = iris.cube.Cube(fx_data, - dim_coords_and_dims=self.coords_spec) + dim_coords_and_dims=self.coords_spec, + units='%') self.mock_data = np.ma.empty((4, 3, 3)) self.mock_data[:] = 10. - def test_components_fx_dict(self, tmp_path): - """Test compatibility of input fx dictionary.""" + def test_components_fx_var(self, tmp_path): + """Test compatibility of ancillary variables.""" + self.fx_mask.var_name = 'sftlf' + self.fx_mask.standard_name = 'land_area_fraction' sftlf_file = str(tmp_path / 'sftlf_mask.nc') iris.save(self.fx_mask, sftlf_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) result_land = mask_landsea( new_cube_land, - { - 'sftlf': sftlf_file, - 'sftof': [], - }, 'land', ) assert isinstance(result_land, iris.cube.Cube) + self.fx_mask.var_name = 'sftgif' + self.fx_mask.standard_name = 'land_ice_area_fraction' sftgif_file = str(tmp_path / 'sftgif_mask.nc') iris.save(self.fx_mask, sftgif_file) + fx_vars = { + 'sftgif': { + 'short_name': 'sftgif', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_ice = add_fx_variables(new_cube_ice, fx_vars, + CheckLevels.IGNORE) result_ice = mask_landseaice( new_cube_ice, - { - 'sftgif': sftgif_file, - 'sftof': [], - }, 'ice', ) assert isinstance(result_ice, iris.cube.Cube) def test_mask_landsea(self, tmp_path): """Test mask_landsea func.""" + self.fx_mask.var_name = 'sftlf' + self.fx_mask.standard_name = 'land_area_fraction' sftlf_file = str(tmp_path / 'sftlf_mask.nc') iris.save(self.fx_mask, sftlf_file) + fx_vars = { + 'sftlf': { + 'short_name': 'sftlf', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftlf_file} + } new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_sea = add_fx_variables(new_cube_sea, fx_vars, + CheckLevels.IGNORE) # mask with fx files result_land = mask_landsea( new_cube_land, - {'sftlf': sftlf_file}, 'land', ) result_sea = mask_landsea( new_cube_sea, - {'sftlf': sftlf_file}, 'sea', ) expected = np.ma.empty((3, 3)) @@ -117,17 +151,19 @@ def test_mask_landsea(self, tmp_path): # Mask with shp files although sftlf is available new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_land = add_fx_variables(new_cube_land, fx_vars, + CheckLevels.IGNORE) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) + new_cube_sea = add_fx_variables(new_cube_sea, fx_vars, + CheckLevels.IGNORE) result_land = mask_landsea( new_cube_land, - {'sftlf': sftlf_file}, 'land', always_use_ne_mask=True, ) result_sea = mask_landsea( new_cube_sea, - {'sftlf': sftlf_file}, 'sea', always_use_ne_mask=True, ) @@ -145,8 +181,8 @@ def test_mask_landsea(self, tmp_path): dim_coords_and_dims=self.coords_spec) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_land = mask_landsea(new_cube_land, {}, 'land') - result_sea = mask_landsea(new_cube_sea, {}, 'sea') + result_land = mask_landsea(new_cube_land, 'land') + result_sea = mask_landsea(new_cube_sea, 'sea') # bear in mind all points are in the ocean np.ma.set_fill_value(result_land.data, 1e+20) @@ -158,12 +194,24 @@ def test_mask_landsea(self, tmp_path): def test_mask_landseaice(self, tmp_path): """Test mask_landseaice func.""" + self.fx_mask.var_name = 'sftgif' + self.fx_mask.standard_name = 'land_ice_area_fraction' sftgif_file = str(tmp_path / 'sftgif_mask.nc') iris.save(self.fx_mask, sftgif_file) + fx_vars = { + 'sftgif': { + 'short_name': 'sftgif', + 'project': 'CMIP6', + 'dataset': 'EC-Earth3', + 'mip': 'fx', + 'frequency': 'fx', + 'filename': sftgif_file} + } new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_ice = mask_landseaice(new_cube_ice, {'sftgif': sftgif_file}, - 'ice') + new_cube_ice = add_fx_variables(new_cube_ice, fx_vars, + CheckLevels.IGNORE) + result_ice = mask_landseaice(new_cube_ice, 'ice') expected = np.ma.empty((3, 3)) expected.data[:] = 200. expected.mask = np.ones((3, 3), bool) diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index db7ec5b8da..b265ee59ab 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -11,9 +11,10 @@ from esmvalcore.cmor.table import read_cmor_tables # Initialize with standard config developer file -esmvalcore._config.CFG = esmvalcore._config.read_config_developer_file() +CFG_DEVELOPER = esmvalcore._config.read_config_developer_file() +esmvalcore._config._config.CFG = CFG_DEVELOPER # Initialize CMOR tables -read_cmor_tables(esmvalcore._config.CFG) +read_cmor_tables(CFG_DEVELOPER) # Load test configuration with open(os.path.join(os.path.dirname(__file__), 'data_finder.yml')) as file: diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 459c242b7e..8d2f932e4e 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -76,6 +76,7 @@ ) DEFAULT_PREPROCESSOR_STEPS = ( + 'add_fx_variables', 'cleanup', 'cmor_check_data', 'cmor_check_metadata', @@ -85,6 +86,7 @@ 'fix_file', 'fix_metadata', 'load', + 'remove_fx_variables', 'save', ) @@ -161,6 +163,11 @@ def _get_default_settings_for_chl(fix_dir, save_filename): 'short_name': 'chl', 'frequency': 'yr', }, + 'add_fx_variables': { + 'fx_variables': {}, + 'check_level': CheckLevels.DEFAULT, + }, + 'remove_fx_variables': {}, 'cleanup': { 'remove': [fix_dir] }, @@ -582,6 +589,11 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'short_name': 'sftlf', 'frequency': 'fx', }, + 'add_fx_variables': { + 'fx_variables': {}, + 'check_level': CheckLevels.DEFAULT, + }, + 'remove_fx_variables': {}, 'cleanup': { 'remove': [fix_dir] }, @@ -1655,9 +1667,9 @@ def test_weighting_landsea_fraction(tmp_path, patched_datafinder, config_user): for product in task.products: assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) if product.attributes['project'] == 'obs4mips': assert len(fx_variables) == 1 @@ -1706,18 +1718,12 @@ def test_weighting_landsea_fraction_no_fx(tmp_path, patched_failing_datafinder, for product in task.products: assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - if product.attributes['project'] == 'obs4mips': - assert len(fx_variables) == 1 - assert fx_variables['sftlf'] == [] - else: - assert len(fx_variables) == 2 - assert fx_variables['sftlf'] == [] - assert fx_variables['sftof'] == [] + assert len(fx_variables) == 0 def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, @@ -1763,13 +1769,9 @@ def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, continue assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 2 + assert len(settings) == 1 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert fx_variables.get('sftlf') def test_weighting_landsea_fraction_exclude_fail(tmp_path, patched_datafinder, @@ -1843,9 +1845,9 @@ def test_landmask(tmp_path, patched_datafinder, config_user): for product in task.products: assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) fx_variables = fx_variables.values() if product.attributes['project'] == 'obs4mips': @@ -1855,6 +1857,80 @@ def test_landmask(tmp_path, patched_datafinder, config_user): def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): + content = dedent(""" + preprocessors: + landmask: + mask_landsea: + mask_out: sea + fx_variables: + sftlf: + exp: piControl + mask_landseaice: + mask_out: sea + fx_variables: + sftgif: + exp: piControl + volume_statistics: + operator: mean + area_statistics: + operator: mean + fx_variables: + areacello: + mip: fx + exp: piControl + diagnostics: + diagnostic_name: + variables: + gpp: + preprocessor: landmask + project: CMIP5 + mip: Lmon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1 + additional_datasets: + - {dataset: CanESM2} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check custom fx variables + task = recipe.tasks.pop() + product = task.products.pop() + + # landsea + settings = product.settings['mask_landsea'] + assert len(settings) == 1 + assert settings['mask_out'] == 'sea' + fx_variables = product.settings['add_fx_variables']['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 4 + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] + + # landseaice + settings = product.settings['mask_landseaice'] + assert len(settings) == 1 + assert settings['mask_out'] == 'sea' + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] + + # volume statistics + settings = product.settings['volume_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + assert 'volcello' in fx_variables + + # area statistics + settings = product.settings['area_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + assert '_fx_' in fx_variables['areacello']['filename'] + assert '_piControl_' in fx_variables['areacello']['filename'] + + +def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user): content = dedent(""" preprocessors: landmask: @@ -1870,7 +1946,6 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): operator: mean fx_variables: [{'short_name': 'areacello', 'mip': 'fx', 'exp': 'piControl'}] - diagnostics: diagnostic_name: variables: @@ -1894,39 +1969,33 @@ def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): # landsea settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['sftlf'] - assert '_piControl_' in fx_variables['sftlf'] + assert len(fx_variables) == 4 + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] # landseaice settings = product.settings['mask_landseaice'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['sftgif'] - assert '_piControl_' in fx_variables['sftgif'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_piControl_' in fx_variables['sftlf']['filename'] # volume statistics settings = product.settings['volume_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' - assert 'fx_variables' not in settings + assert 'volcello' in fx_variables # area statistics settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['areacello'] - assert '_piControl_' in fx_variables['areacello'] + assert '_fx_' in fx_variables['areacello']['filename'] + assert '_piControl_' in fx_variables['areacello']['filename'] def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): @@ -1968,10 +2037,10 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): for product in task.products: assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 3 + assert len(settings) == 2 assert settings['mask_out'] == 'sea' assert settings['always_use_ne_mask'] is False - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) fx_variables = fx_variables.values() assert not any(fx_variables) @@ -1980,6 +2049,72 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): TAGS.set_tag_values(TAGS_FOR_TESTING) + content = dedent(""" + preprocessors: + preproc: + area_statistics: + operator: mean + fx_variables: + areacella: + ensemble: r2i1p1f1 + areacello: + clayfrac: + sftlf: + sftgif: + mip: fx + sftof: + mask_landsea: + mask_out: sea + + diagnostics: + diagnostic_name: + variables: + tas: + preprocessor: preproc + project: CMIP6 + mip: Amon + exp: historical + start_year: 2000 + end_year: 2005 + ensemble: r1i1p1f1 + grid: gn + additional_datasets: + - {dataset: CanESM5} + scripts: null + """) + recipe = get_recipe(tmp_path, content, config_user) + + # Check generated tasks + assert len(recipe.tasks) == 1 + task = recipe.tasks.pop() + assert task.name == 'diagnostic_name' + TASKSEP + 'tas' + assert len(task.products) == 1 + product = task.products.pop() + + # Check area_statistics + assert 'area_statistics' in product.settings + settings = product.settings['area_statistics'] + assert len(settings) == 1 + assert settings['operator'] == 'mean' + fx_variables = product.settings['add_fx_variables']['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 6 + assert '_fx_' in fx_variables['areacella']['filename'] + assert '_r2i1p1f1_' in fx_variables['areacella']['filename'] + assert '_Ofx_' in fx_variables['areacello']['filename'] + assert '_Efx_' in fx_variables['clayfrac']['filename'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_fx_' in fx_variables['sftgif']['filename'] + assert '_Ofx_' in fx_variables['sftof']['filename'] + + # Check mask_landsea + assert 'mask_landsea' in product.settings + settings = product.settings['mask_landsea'] + assert len(settings) == 1 + assert settings['mask_out'] == 'sea' + + +def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): content = dedent(""" preprocessors: preproc: @@ -1992,7 +2127,7 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): 'sftlf', 'sftgif', 'sftof', - ] + ] mask_landsea: mask_out: sea @@ -2024,34 +2159,23 @@ def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): # Check area_statistics assert 'area_statistics' in product.settings settings = product.settings['area_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 6 - assert '_fx_' in fx_variables['areacella'] - assert '_Ofx_' in fx_variables['areacello'] - assert '_Efx_' in fx_variables['clayfrac'] - assert '_fx_' in fx_variables['sftlf'] - assert '_fx_' in fx_variables['sftgif'] - assert '_Ofx_' in fx_variables['sftof'] + assert '_fx_' in fx_variables['areacella']['filename'] + assert '_Ofx_' in fx_variables['areacello']['filename'] + assert '_Efx_' in fx_variables['clayfrac']['filename'] + assert '_fx_' in fx_variables['sftlf']['filename'] + assert '_fx_' in fx_variables['sftgif']['filename'] + assert '_Ofx_' in fx_variables['sftof']['filename'] # Check mask_landsea assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['mask_out'] == 'sea' - fx_variables = settings['fx_variables'] - assert isinstance(fx_variables, dict) - fx_variables = fx_variables.values() - assert len(fx_variables) == 2 - for fx_file in fx_variables: - if 'sftlf' in fx_file: - assert '_fx_' in fx_file - elif 'sftof' in fx_file: - assert '_Ofx_' in fx_file - else: - assert False def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2063,7 +2187,9 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: + mip: Ofx diagnostics: diagnostic_name: @@ -2093,13 +2219,13 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Omon_' in fx_variables['volcello'] - assert '_Ofx_' not in fx_variables['volcello'] + assert '_Omon_' not in fx_variables['volcello']['filename'] + assert '_Ofx_' in fx_variables['volcello']['filename'] def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2109,8 +2235,10 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: [{'short_name': 'volcello', 'mip': 'Oyr', - 'exp': 'piControl'}] + fx_variables: + volcello: + mip: Oyr + exp: piControl diagnostics: diagnostic_name: @@ -2140,14 +2268,14 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Oyr_' in fx_variables['volcello'] - assert '_piControl_' in fx_variables['volcello'] - assert '_Omon_' not in fx_variables['volcello'] + assert '_Oyr_' in fx_variables['volcello']['filename'][0] + assert '_piControl_' in fx_variables['volcello']['filename'][0] + assert '_Omon_' not in fx_variables['volcello']['filename'][0] def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, @@ -2199,7 +2327,8 @@ def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, settings = product.settings['area_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' - assert 'fx_variables' not in settings + fx_variables = product.settings['add_fx_variables']['fx_variables'] + assert len(fx_variables) == 2 def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, @@ -2209,7 +2338,9 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: + mip: Omon diagnostics: diagnostic_name: @@ -2239,13 +2370,13 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello'] - assert '_Omon_' in fx_variables['volcello'] + assert '_Ofx_' not in fx_variables['volcello']['filename'][0] + assert '_Omon_' in fx_variables['volcello']['filename'][0] def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, @@ -2255,7 +2386,9 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: + mip: Oyr diagnostics: diagnostic_name: @@ -2285,13 +2418,13 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello'] - assert '_Oyr_' in fx_variables['volcello'] + assert '_Ofx_' not in fx_variables['volcello']['filename'][0] + assert '_Oyr_' in fx_variables['volcello']['filename'][0] def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, @@ -2301,7 +2434,8 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: diagnostics: diagnostic_name: @@ -2330,13 +2464,13 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 2 + assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = settings['fx_variables'] + fx_variables = product.settings['add_fx_variables']['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['volcello'] - assert '_Omon_' not in fx_variables['volcello'] + assert '_fx_' in fx_variables['volcello']['filename'] + assert '_Omon_' not in fx_variables['volcello']['filename'] def test_wrong_project(tmp_path, patched_datafinder, config_user): @@ -2345,7 +2479,8 @@ def test_wrong_project(tmp_path, patched_datafinder, config_user): preproc: volume_statistics: operator: mean - fx_variables: ['volcello'] + fx_variables: + volcello: diagnostics: diagnostic_name: @@ -2378,10 +2513,9 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): preproc: area_statistics: operator: mean - fx_variables: [ - 'areacella', - 'wrong_fx_variable', - ] + fx_variables: + areacella: + wrong_fx_variable: diagnostics: diagnostic_name: @@ -2400,7 +2534,7 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): scripts: null """) msg = ("Requested fx variable 'wrong_fx_variable' not available in any " - "'fx'-related CMOR table") + "CMOR table") with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG diff --git a/tests/integration/test_recipe_checks.py b/tests/integration/test_recipe_checks.py index 3eb2626a28..5cec9b2abf 100644 --- a/tests/integration/test_recipe_checks.py +++ b/tests/integration/test_recipe_checks.py @@ -1,4 +1,5 @@ """Integration tests for :mod:`esmvalcore._recipe_checks`.""" +from typing import Any, List from unittest import mock import pytest @@ -58,7 +59,7 @@ def test_data_availability_data(mock_logger, input_files, var, error): assert var == saved_var -DATA_AVAILABILITY_NO_DATA = [ +DATA_AVAILABILITY_NO_DATA: List[Any] = [ ([], [], None), ([], None, None), (None, [], None), diff --git a/tests/sample_data/multimodel_statistics/timeseries_daily_365_day-full-mean.nc b/tests/sample_data/multimodel_statistics/timeseries_daily_365_day-full-mean.nc index c473d78f49b5ca3320a423179772e5ee52a0b855..81e9e60a627758ee5e14304c4a5434366843d204 100644 GIT binary patch literal 23299 zcmeHP349bq)~_J}f(D2}I0Ru-kUJ)va)^O{2sdOoMbRXX0cH)8km;G8?w;(%0~J9; z1rhMWtm3txqN3<>7!U=IRXkYG1vmPkh#W=)5k>ZU)m<;i3<)aV>ZiXi1uy@3)vu0z z?^Shm9X)4AT6+C+E;vWX5Emz6MS}g4|NL8pm}{IE=J|RS|ELB>4@nzSE55YW#rE}> z`bzAh9?p;ao(|rt@SIx0C1Vm>QcZ-2X)0QeawO7QI+&K~ptwyUSBFttuCD$BP-R3( za!wMHRIMtDB-r|SCdP?tt1ew|-rCKhF=Tk$`7$ELm9J`Be6Ax9e~}Qi>DKCqIQkdx zc{M*>XhbA$*S=-K;qko%iMT|4TWqQ)H#c*pN1KtCBc*Ne^D2qY4CHFwE;I9TJh^?` zGdV3kQxHl14?;5<=#e(|)76 z(R!kuEkkkr_cybp>PabSHj+}xt&m!kbYaW%Awvh2TggrxGttQ;W~_siRVsgPf9keg zp3Et}9RGdY9EmzjB)Vd`^qAaiZKqOkzfp&~*RO$M+U*r(gqP z#DKB#<{J^oJlMc#Lq`r8HFoIeQJG^gV8kF1PyXYgf3`0Wtltx84o>5`7#peTLenK2{ z;0y|Dq7-J^hoo6y_5RZeYg3)VloWLu3XH%fNM>z5MJC^CO8V^Tb|C)uRy+P0gL7a)Ij#H2^D7*)mg z^!P41SdV$B|&-CRLXfvcs#nGh(b^(4Z+n1AFkdx`lp6Q_^vOZA- zd9^@}=U9uZt+FSoogj0qRjZauN;zTq;P@Y7kIO$ zQ*vctr}LSun#TL8Ty7y}=u)ndqAt;xJh)l82O}@9Ajj*=);zXb zp6tW7EYm36Y~D0uHuW%#uK7*t!()Ylxt?q$b)4w(_=YNjx>505A5PE6`S4?m z<2fH^lvPpgEL9OTNGLCt^NEitm-CTbh76oP=R*$zJ8*nLdxvir70>z0wk`w9bN+Hr zAOpv9{`TOeQatA``yv@wp5q%k(dF?yyE+kxQSmHqj{+*?!}XEgi(V+ z=p#aC78Ane&vj(&*YQTJLX59nh!HP@(c^DnT>O_XrV}mSqhrfQI!?Z-t`$G*^^ zAJ9>DPZ+Hhg>mBEFitHFQ09Sp-d9LDiu zVQl>-j2#JuC?hH`OiT-#c<4eCpR_iy=rIGg5RKUxM#io%mfaObG11V4VGJf(^kxX9 zZ_<*OBnOj{C}VxWCeIe@Y=b^(e&q%fcwTKaAalVf6k}7;k(YLRwh}_5Uk`Ll1g^VacUUke z@9u9}h+S%7#S#lw=aT;XI?^ZTc=l?tb3+}65(pEh-c7<-^0tM&?^x)u$if1m53_9$_9hD2 znW)#n#47q-@6U8?wvO9Mf_Q9G5DnJ`(TwQvKUx@^OS<{aL>ATS=vyXc6D__lguB{> zkY1$Y(i?P?WaxN=XuID+{45J=-DIQI78)Nharajy{`8EA_3KQ`>}bO5Vxr9jCQddt zG3i|sS+)-D+YJ(KQcQX!oFnb*6>ui9AIXdi~kLfNOMI;nC4=j*f@t>3HZ~9d+*0k@JLw zu1{MS)PwTwPX0jDs+)yPJ$0-dpyTG+I`-7najr|pokVx|=t|VDO&A44v+ISihG^ao79J-$NO^ta)-m{* zAU<6a#K}j35RXw=9ShNwX!a@UxBeYQhbDzMl30j+jf0q!5XAgOLEKEV^{OB`5S0us z#O@J==uFg~=;**gOegAdT>xI9ttSF#`&|H&Is|bm(TmApWD)I+qyCVn?^hPy{L;eL zYXZ1)T>$SiB_D1c#OPK*B(@IX#-$d9F1B#vP^$9~3;Wwx7)Nw8OUKITIy7nnf4@bC zDA4iOd>so9n%Hs3#762{A1Crr``J(JrxUgHdOF!OO-BQ=UH*+a&ZG90xY$Ik`%N64 zZKC;YCXP=xQ8t70|4k6-F*@FUD~KNN1TiHP#4SXz@0nQk4{Gl|6DRVh9jB824lpss zMg3u21J5D^q=&G$eF$2I5KiTgKTNYQ*G2ZKW#QdY6Y(FL*iO%vk3ZFM`5_%g4(eF` zKK1GEk$vY;dm$RsDvaxhy6+@=6YYFWN9;BobIE6JAsV=u+HeW+b}$fIt&Wnvl0!Zn>DG{y2je%33K7eTbUJbYI(=hH{4c&;AY|-#NQLLc;?UVuE^9G8ZGjQEs3```N z`e6XC><-|8>jRiQGk^_Lmm7!{?l#easMj15BZ!8R4Z0E?*c8C|+o(P%0bJKLfS!E< zsM9xq9=W7zq6O4{v^q|G?q3bmrRP*(n*hG;5WsCBfTCJd)=>?qL>uY(-LIR8MCxl+ z5RKbm;BlhyFKd{#O~WIbH0-7Oja{Lk-$NRvP@ni{OA~L#oA{ssl|y~lDWYvLeq2`1 zkCX&Ivd;J8{-7T_iNb5iPyS{gm7bf|%r_vWICOHqAJY!{amP#zi9QW4taRx8j}0{6 zYan-#fxC&O-$uG6y6y)J=_fUey(xfxL~~XIu%Aey=j@U01_m`W(17UOQz4urdixI= zo*;^wui;gq#rJ7&FVWC7k#yhKfagORAAJx){`L^Y6OE<&b-$wF3z8d3H0D|jmlJui zLwJzr)HNY&nH)kO7=l5xbv4<3m4?>_Xt+96!+@QB^dg!*C4@&k&NJ}INg=G55kjJu z#$aFhvF)fI4chzhbw@uwZ{)}1L_c~j4WW!ENekg4B5QaE=Z*;BvxWhjY8pUa^3h9& z2auE;fSah-qXs5DZs56>4XoZ`pz}=z1{2*xa^DfX@|}i9wa zY(~_zhvrY<(Q1C%W}{>g!*$&@b7-Cn@yopl9l4qV>mvSSEDL zsu#rZbA$M6{UC}OQlIr&80XV7Z({8*>eiv>W2uE}KDHpJe}4aC>Z5w;@buA9OwXn; z_0Nk6!g!0wb72^3iN+nb@FdY4_XKctaRBGhy>=QIz#BwiYMU3+*do6^Jr6Y#D}vNt z5w;`Dni9mzGlJ-y5k%jyK{OZ{MCs@t=DuxW-F6d`H_&*FXx%Oo9X>G8kbL2>t2C@_ z96+BY)VACKTzOFd%jX!_al3&NUR!lI^kEqce4I`-ypWl=Ea-!ms0sQfY08$4S z*f_|*+K=>iZ@3v@@9zBcUo1yr>dt~{VP-N61)N895G3~ z1b?Hhw3P3N6sq3BB20c6aipsJTXj-7#}F&dQE#;WP${e2C{wj)WLZC_kkfYJ{FDMJ z6<3Y8vj?P-+%Nxg*O(K}$x_^n4x~ye zFhSWbISMlr&zP@x#s!LJT&;Mz#l)7ggAJ!>fG~vY)#>mZGn{2q||jSGfpD zhlA|Er-jNJZe=5oL_J6BNB@$&%2PU7yGhfnM{vTjqj%-mD@C=XoZNGn)Bd8t&ee)D zMUBH34YQ@{NGZ9_N>W3krJhL2`nE;8f#XJ`(;{ngS?py_MgO+qJGXd1ieKbxbiZ}O zz+S@MS)A--vSr`B{lp>>)$gD|56{Rtjd$z2SCo zy_B#WMF>xP|9iyK@=`x}u7iSi`O{**{9&(03en`~p>@5QHe4;EYQit10o(rdBCH!> zw+Nd>ctwQ0BMcX5*2;bo;b+g^HMi3*bXPUm{!|TA^t@N7z5PWhY5WV$4vydEwgB02 z$iT>p^USwCvEq*OA!!54r^amQOM9(3!6(Rm>IuWD#yPT!x} zR8W3g`PX{8k$3pZyxp{+oTaJw?M5E;I^lL7KZ7^f1J>VLC(J$)Or;yF6nZ@M`h0n> ze3GB%3gkFn2Dx2O?(F=1JaYIE!>yC=mU(@|f^|+?8XUD^$NFLX&&>>6ry5OyqtNoU z^M7>IKhJlTZ`rbU`Dt(2SN+7J{&_xoJmXAfZBRWVuQJBd%VyOd0EnlGutE6)0G@2W z7M~(tPtP;s zCW>v@A$J)+hynC7QOsMmU*=j9ep?!_?O#WFgB=`ODNh#459;t$#{um5AQ>e+y(P6_ z6^aR)R?Mb3F2dggP<|w5#q8iXo9r9x;CN2q!-iW$)WEN!fin&!b~&dUew%{{9zHYv z*kFRsBJi_l)WNg{_9O&D54#lVRU9sGt>PhK{4@bG^Q!7r0m0wBX4+^+qxmm^W2YAYdPPFFX zggC0Eof+S_im!nR4V?KD)2B+y{57iblTK_Dg?tF4BHyZik*lz`s1cDo#WYY2I9#G* z{~pQC>+5;SsiVgtoNx3grs(grwN-+Rf?pm~iwF-&}a+j4tm)TPi**Gp)|Jf$- zs;YCp13pO3=KYmOU$q4$MhwMAv z+kEt!9M5+9ZixN0LghsSW$p;eMwloy* zVfF}@UZd8E^=!*%jyqI z$@GNiL>xVOd|Iac^*C7_*?^`0OAnX9m**>=nbqVc%_>A~S=j(tmS$yL_Gjth1b+aH zrDe9VNwBDv%`bqqB-J1{dx|HQB&4KlS1ehO;!VjSbT5`BS5T|GyfDZ2wXZ3v)hM$<6L8PsGm1nbzZql%$m8q@J_|!tP1ZMyHL< z9CX#VF$0pu`*P_+l$s|elRil4d>irf9>rPfkzcQj6WLQWZ_uk5mAi?`y$?nX&EOhG z_g1o_kmC<-__VW0%Pa8G@{x-=5t+Luq!%fXW(7RHsk~4nt9x2!nP`6l(M#XW^=9XK z&1~7Nm6!T6ia_k;R7330cGm$^T@O!8F%XRnjEas>#zaTFs zFjce5im#+P`%A=K@&a0ZK$DM8Ssr=J>Gt~Bp5lBqS>`2YSfqzkHd(edGtbXjjC@|7 N-m2ttn$lMa{{uKC=1~9u delta 6075 zcmb_gd0bW1w?F4{FET2E%qRlNpvWX(2qf@j(8LL~BsERJBoT-^oRM(MOjB{IX(mpT z4Q3i@^(-~Bw4XkQ%A70uIrUS^%p%L|t#i-4ioD|+j z`(k9Gw^8ThxK*_~cd@}dyk;Ic;yG+!eu=@GUJi;aiLZ^*d2aAJ^$+8;w_H5A(qMh2 zHd&`DliBguFe5jS<5*5yP{SADCa<2fRm1UU9Qci6d18keV?Yxu|qxN_-f^0x7wb~ZPSt+Smia_j?a^pN#o zQ^(ihPjRBlt#~q(ei~A!?P;Q9hcld~aMBA$zPjtMFZ;$NgAI=ji7!diw+PY+=QObg z_e8orb;CiIyROv^Wq}BxB(k>nsxE1>}`WC{$+=FVziI@;>!l< zGc68fbK`XJ%j_Nu2rxO`xKB(zp?Lt{}xZWohRD#saEXYaIdb6`uW-9w>{gYx@% zF3avAK5{@IJLI~PZAb}auXMO*+>!Xcck^7gF}z_Z&%SkkLL&n_0z#HLBCLyJJ2wa2 zlgz3;0yM699)%h?N>6FA?6Urp=xKi{@=UlVp?fs1 zAY_jtoTJ+<)5hUm0U>Fz`n-d1PA%!}VYKW#M6Y&llM3~;?A$xP-ExhIy+g}24vSNU z+J{p(Er073VAzw~-0HQ?lP_W`gFZ5j&Yk->3m@8zU&zGZ7~Z4dcno`SXh2|ersHjH z{G_2T>fyQY!SD=&BcPmN>7=vf*JNojJt&^-!`H}W29KK@4X4Z{Pc_VGpCi!&^ws+b zmpGzeZ~L^2)>4edeJ;7%f;wG4ik7zNGkB}3&g<06N$Y;7?P{0ns%2;=Vk(OrK3eC! zFZb&11XY*ocTa9mn2*C{Av!zfq{}+4|H)pPZkQ^x$}y*0IN0NWgx#M?u$~oh;Jk=y zTSS~Ed_SWO#|ZoT*I_xKtWO?)%rT*mU(B|J|kIZ5(PsaV~hVjZDqnTi6!io+sWj*#Z9LOOi@6tL=sfbgFMypmjp z-f4CCVzr7NUr}+cPDQ_GRUEn^BL9X6?=>Q>uN9HU)_3(A@#-uU+&C2*2pNeo5>jQ9 z^^kFauqH~ze8QscDwY#UK2vbG1*m67A7 zAm3fVcQ1%|cA<#39u+ZYj0mqZ5nTxfddslJ6Ynn)?%a~F+$7^+R~ais0o!K__@z|9 zBNN!r@BnX1j12cU8E?hPaAVJe_wCGLL=?^t5Is}CzTpCz*pBe1`s!XX)IKtzLuA|u zmho6Kh13=mO<82COcmDO6~q(r`YRX=1utaFaLto(ja2CKtAxvkRSZ6&V&Q+N==PzC z-D_3!Sf`?Ag^Z7;%g8Q}kzOR@+w&4m68t|T9}-@jO17_1>anFz!6HJ-Z{)r|B)mCC z#xF)0N%LgvBb07b5VJwS2eN`nLfH&*M!kZZu>!Ko1!T?_@ajSV8&(J~t)%RDTS3Y; z1qEpe-by8>5yJW^*qbh3XRd%bCIRO=2=L$qEavM4Ed4>j?Mn(Kou!S=DCoIe!Kh{h zn+FPT9VEcqMZoYt0S$X>7_!%fPZudjB2k70TZ#;sPkk{p)bCiw&hd zHZ1Y8VGiNI7#m^;dk5FyY;hgp2|0vc^Xf2-(EkZ5rV$R@wxZ`BR+PrsFpsb+NyS9M z1vjc9gv_569R5kc&Fxk!-etw{K$|Px2qr7B5s`lGKMquIxrc%hHa#-1eshh0W!Ge! zxGrNim7Gn4s)aHxFOboja>-pFcjOD`L{_hvCBT=$+5aUOhF4_Vm@6anc^R!w(f_AX znflF!LKgu?kJylQ+=epIhUW-|FJ-L$TE_M&8Mmt`O>$&pi=Mi%D3D~<=zyDnHT z@3IA>R#=ciShe4RKM6)o#^}3L^L9#jc87!~)=PMdF!_uXpPjQ}%`_|KOt<30N3EDi zSawcE7$IZ6jAFuIvRew_+Q(M-9<<_8vK8Y~tVr)~g(=gD)Jhr0D&=}Cp?nQ%m2me> z3GQ!6sOw?HA8}SZ&sj0UK+$u{f;_@*`Yp=rE5o01yNNLBgoI6m$3C%O%0UZW``Cg@ z`$+L73m)ER!9*%TuZGDu>?Px5M;S|~Xxt$jbTK2@-Hg6IW=!;D-FgP_<5*hH7(SGZ z?-?3AVzw1og!xTY{6MHpmvA#xLOyHg=~w^G2n*a6S@0R*rIi+RUqx2)mr&D1LirgH z#ivD79TPE%P(q4@e`>*1;u}OL8E?Tugr}Yqv5vrv6S1FQwTX}jpKY~Z!Db6S&$VD| zjs>|V&FDucn+NO897#3X`h};X4JT zEeiNk0={e!@J2rYW&H)Lqe?4J6|lTk#SubzR~7FON?H|cA-u?}7`M&}Khn6*5GxK5 zWD4CL^lMh*O}}Us8BI2s2{GRF`=;8EG2DjCkv4Q1V#9aCY?yyk#s|k_jQdE&0>Z9S zGGb24@FB;&HpYVYx>%7BKq1@RiiabtXq+$M#0wJsBsV`sXnIsa2Eq5d1w$z{eAB6J z5!T!6%uXrl?B=VnxbS~F zh8trk6B4u@kmJ1iYJWNIb)9hfY}Oe4@seYf3u?#>3!l^f+T(VU_H?v{W6vL{<*Nz`~@Eyt^pb+Z`2xcZMalcS2TG5w8e8eu-BIM$;evG<+$lBxPUS2}fAr{ne$UUghk@YDlY>?f>$ zpO3dUIn;I7M`)`3}zV`%D^otis)7xQfQLzW;1-+dlpo%&Gg6U1TR z0}2P`=iR$&p%sqX8@+gB^VZA-Jinb~WlOAEa0u&P>ec*C_8vaap}-MMsU(i;Ktxl3 zok)-Jd0CsJo?UCM4vL8-EVy4(x~NS?YDT3|(YTGa)8-!Rc+@LtF2at5(0eUl7NC$isjuCcX2>28{zuJAs8 zvv1O(jatEB$5Z{;(V-pK+Th-7Sh`=w$Wr@O^v5f}RqvK5dF-R7ixx;7BPTdH_U+tT z$@uJE+0Kx6DjHp+vpAopbWHPbxXw#cfn0Ylb{g5q^oWXc8Y@j}$JLz2?a?H}>W)jf zv{B;hWr%JyG{y-FX1>5Zt#N2~G(^h19%kG-+EO1qD(t?#tZ^LFBSQ0a%Qm%$Tx;W} zIOIGagKN~-BPdZd+ID}gt)K_DX{$Ha_G}rqmB2O;;JoY7M00Dt>ez=OAfJ71^kLr` zQ(0qLcfOS6m}1%NtgUpjsgLL)mYp5RW|@L1B9gRd=+ZVCil4MpSgd8`w99_YrWh zZ>j&^-06H-v$r<>}i|t=@X%InKuKIkC7+_owuCiAj3JIz{5&RemzzA*f*{mHN_v6GKgR={Q*!qHGEe8EH`#C*dTwV2)GYCK7 F{|m5cfZqTB diff --git a/tests/sample_data/multimodel_statistics/timeseries_daily_365_day-overlap-mean.nc b/tests/sample_data/multimodel_statistics/timeseries_daily_365_day-overlap-mean.nc index c473d78f49b5ca3320a423179772e5ee52a0b855..81e9e60a627758ee5e14304c4a5434366843d204 100644 GIT binary patch literal 23299 zcmeHP349bq)~_J}f(D2}I0Ru-kUJ)va)^O{2sdOoMbRXX0cH)8km;G8?w;(%0~J9; z1rhMWtm3txqN3<>7!U=IRXkYG1vmPkh#W=)5k>ZU)m<;i3<)aV>ZiXi1uy@3)vu0z z?^Shm9X)4AT6+C+E;vWX5Emz6MS}g4|NL8pm}{IE=J|RS|ELB>4@nzSE55YW#rE}> z`bzAh9?p;ao(|rt@SIx0C1Vm>QcZ-2X)0QeawO7QI+&K~ptwyUSBFttuCD$BP-R3( za!wMHRIMtDB-r|SCdP?tt1ew|-rCKhF=Tk$`7$ELm9J`Be6Ax9e~}Qi>DKCqIQkdx zc{M*>XhbA$*S=-K;qko%iMT|4TWqQ)H#c*pN1KtCBc*Ne^D2qY4CHFwE;I9TJh^?` zGdV3kQxHl14?;5<=#e(|)76 z(R!kuEkkkr_cybp>PabSHj+}xt&m!kbYaW%Awvh2TggrxGttQ;W~_siRVsgPf9keg zp3Et}9RGdY9EmzjB)Vd`^qAaiZKqOkzfp&~*RO$M+U*r(gqP z#DKB#<{J^oJlMc#Lq`r8HFoIeQJG^gV8kF1PyXYgf3`0Wtltx84o>5`7#peTLenK2{ z;0y|Dq7-J^hoo6y_5RZeYg3)VloWLu3XH%fNM>z5MJC^CO8V^Tb|C)uRy+P0gL7a)Ij#H2^D7*)mg z^!P41SdV$B|&-CRLXfvcs#nGh(b^(4Z+n1AFkdx`lp6Q_^vOZA- zd9^@}=U9uZt+FSoogj0qRjZauN;zTq;P@Y7kIO$ zQ*vctr}LSun#TL8Ty7y}=u)ndqAt;xJh)l82O}@9Ajj*=);zXb zp6tW7EYm36Y~D0uHuW%#uK7*t!()Ylxt?q$b)4w(_=YNjx>505A5PE6`S4?m z<2fH^lvPpgEL9OTNGLCt^NEitm-CTbh76oP=R*$zJ8*nLdxvir70>z0wk`w9bN+Hr zAOpv9{`TOeQatA``yv@wp5q%k(dF?yyE+kxQSmHqj{+*?!}XEgi(V+ z=p#aC78Ane&vj(&*YQTJLX59nh!HP@(c^DnT>O_XrV}mSqhrfQI!?Z-t`$G*^^ zAJ9>DPZ+Hhg>mBEFitHFQ09Sp-d9LDiu zVQl>-j2#JuC?hH`OiT-#c<4eCpR_iy=rIGg5RKUxM#io%mfaObG11V4VGJf(^kxX9 zZ_<*OBnOj{C}VxWCeIe@Y=b^(e&q%fcwTKaAalVf6k}7;k(YLRwh}_5Uk`Ll1g^VacUUke z@9u9}h+S%7#S#lw=aT;XI?^ZTc=l?tb3+}65(pEh-c7<-^0tM&?^x)u$if1m53_9$_9hD2 znW)#n#47q-@6U8?wvO9Mf_Q9G5DnJ`(TwQvKUx@^OS<{aL>ATS=vyXc6D__lguB{> zkY1$Y(i?P?WaxN=XuID+{45J=-DIQI78)Nharajy{`8EA_3KQ`>}bO5Vxr9jCQddt zG3i|sS+)-D+YJ(KQcQX!oFnb*6>ui9AIXdi~kLfNOMI;nC4=j*f@t>3HZ~9d+*0k@JLw zu1{MS)PwTwPX0jDs+)yPJ$0-dpyTG+I`-7najr|pokVx|=t|VDO&A44v+ISihG^ao79J-$NO^ta)-m{* zAU<6a#K}j35RXw=9ShNwX!a@UxBeYQhbDzMl30j+jf0q!5XAgOLEKEV^{OB`5S0us z#O@J==uFg~=;**gOegAdT>xI9ttSF#`&|H&Is|bm(TmApWD)I+qyCVn?^hPy{L;eL zYXZ1)T>$SiB_D1c#OPK*B(@IX#-$d9F1B#vP^$9~3;Wwx7)Nw8OUKITIy7nnf4@bC zDA4iOd>so9n%Hs3#762{A1Crr``J(JrxUgHdOF!OO-BQ=UH*+a&ZG90xY$Ik`%N64 zZKC;YCXP=xQ8t70|4k6-F*@FUD~KNN1TiHP#4SXz@0nQk4{Gl|6DRVh9jB824lpss zMg3u21J5D^q=&G$eF$2I5KiTgKTNYQ*G2ZKW#QdY6Y(FL*iO%vk3ZFM`5_%g4(eF` zKK1GEk$vY;dm$RsDvaxhy6+@=6YYFWN9;BobIE6JAsV=u+HeW+b}$fIt&Wnvl0!Zn>DG{y2je%33K7eTbUJbYI(=hH{4c&;AY|-#NQLLc;?UVuE^9G8ZGjQEs3```N z`e6XC><-|8>jRiQGk^_Lmm7!{?l#easMj15BZ!8R4Z0E?*c8C|+o(P%0bJKLfS!E< zsM9xq9=W7zq6O4{v^q|G?q3bmrRP*(n*hG;5WsCBfTCJd)=>?qL>uY(-LIR8MCxl+ z5RKbm;BlhyFKd{#O~WIbH0-7Oja{Lk-$NRvP@ni{OA~L#oA{ssl|y~lDWYvLeq2`1 zkCX&Ivd;J8{-7T_iNb5iPyS{gm7bf|%r_vWICOHqAJY!{amP#zi9QW4taRx8j}0{6 zYan-#fxC&O-$uG6y6y)J=_fUey(xfxL~~XIu%Aey=j@U01_m`W(17UOQz4urdixI= zo*;^wui;gq#rJ7&FVWC7k#yhKfagORAAJx){`L^Y6OE<&b-$wF3z8d3H0D|jmlJui zLwJzr)HNY&nH)kO7=l5xbv4<3m4?>_Xt+96!+@QB^dg!*C4@&k&NJ}INg=G55kjJu z#$aFhvF)fI4chzhbw@uwZ{)}1L_c~j4WW!ENekg4B5QaE=Z*;BvxWhjY8pUa^3h9& z2auE;fSah-qXs5DZs56>4XoZ`pz}=z1{2*xa^DfX@|}i9wa zY(~_zhvrY<(Q1C%W}{>g!*$&@b7-Cn@yopl9l4qV>mvSSEDL zsu#rZbA$M6{UC}OQlIr&80XV7Z({8*>eiv>W2uE}KDHpJe}4aC>Z5w;@buA9OwXn; z_0Nk6!g!0wb72^3iN+nb@FdY4_XKctaRBGhy>=QIz#BwiYMU3+*do6^Jr6Y#D}vNt z5w;`Dni9mzGlJ-y5k%jyK{OZ{MCs@t=DuxW-F6d`H_&*FXx%Oo9X>G8kbL2>t2C@_ z96+BY)VACKTzOFd%jX!_al3&NUR!lI^kEqce4I`-ypWl=Ea-!ms0sQfY08$4S z*f_|*+K=>iZ@3v@@9zBcUo1yr>dt~{VP-N61)N895G3~ z1b?Hhw3P3N6sq3BB20c6aipsJTXj-7#}F&dQE#;WP${e2C{wj)WLZC_kkfYJ{FDMJ z6<3Y8vj?P-+%Nxg*O(K}$x_^n4x~ye zFhSWbISMlr&zP@x#s!LJT&;Mz#l)7ggAJ!>fG~vY)#>mZGn{2q||jSGfpD zhlA|Er-jNJZe=5oL_J6BNB@$&%2PU7yGhfnM{vTjqj%-mD@C=XoZNGn)Bd8t&ee)D zMUBH34YQ@{NGZ9_N>W3krJhL2`nE;8f#XJ`(;{ngS?py_MgO+qJGXd1ieKbxbiZ}O zz+S@MS)A--vSr`B{lp>>)$gD|56{Rtjd$z2SCo zy_B#WMF>xP|9iyK@=`x}u7iSi`O{**{9&(03en`~p>@5QHe4;EYQit10o(rdBCH!> zw+Nd>ctwQ0BMcX5*2;bo;b+g^HMi3*bXPUm{!|TA^t@N7z5PWhY5WV$4vydEwgB02 z$iT>p^USwCvEq*OA!!54r^amQOM9(3!6(Rm>IuWD#yPT!x} zR8W3g`PX{8k$3pZyxp{+oTaJw?M5E;I^lL7KZ7^f1J>VLC(J$)Or;yF6nZ@M`h0n> ze3GB%3gkFn2Dx2O?(F=1JaYIE!>yC=mU(@|f^|+?8XUD^$NFLX&&>>6ry5OyqtNoU z^M7>IKhJlTZ`rbU`Dt(2SN+7J{&_xoJmXAfZBRWVuQJBd%VyOd0EnlGutE6)0G@2W z7M~(tPtP;s zCW>v@A$J)+hynC7QOsMmU*=j9ep?!_?O#WFgB=`ODNh#459;t$#{um5AQ>e+y(P6_ z6^aR)R?Mb3F2dggP<|w5#q8iXo9r9x;CN2q!-iW$)WEN!fin&!b~&dUew%{{9zHYv z*kFRsBJi_l)WNg{_9O&D54#lVRU9sGt>PhK{4@bG^Q!7r0m0wBX4+^+qxmm^W2YAYdPPFFX zggC0Eof+S_im!nR4V?KD)2B+y{57iblTK_Dg?tF4BHyZik*lz`s1cDo#WYY2I9#G* z{~pQC>+5;SsiVgtoNx3grs(grwN-+Rf?pm~iwF-&}a+j4tm)TPi**Gp)|Jf$- zs;YCp13pO3=KYmOU$q4$MhwMAv z+kEt!9M5+9ZixN0LghsSW$p;eMwloy* zVfF}@UZd8E^=!*%jyqI z$@GNiL>xVOd|Iac^*C7_*?^`0OAnX9m**>=nbqVc%_>A~S=j(tmS$yL_Gjth1b+aH zrDe9VNwBDv%`bqqB-J1{dx|HQB&4KlS1ehO;!VjSbT5`BS5T|GyfDZ2wXZ3v)hM$<6L8PsGm1nbzZql%$m8q@J_|!tP1ZMyHL< z9CX#VF$0pu`*P_+l$s|elRil4d>irf9>rPfkzcQj6WLQWZ_uk5mAi?`y$?nX&EOhG z_g1o_kmC<-__VW0%Pa8G@{x-=5t+Luq!%fXW(7RHsk~4nt9x2!nP`6l(M#XW^=9XK z&1~7Nm6!T6ia_k;R7330cGm$^T@O!8F%XRnjEas>#zaTFs zFjce5im#+P`%A=K@&a0ZK$DM8Ssr=J>Gt~Bp5lBqS>`2YSfqzkHd(edGtbXjjC@|7 N-m2ttn$lMa{{uKC=1~9u delta 6075 zcmb_gd0bW1w?F4{FET2E%qRlNpvWX(2qf@j(8LL~BsERJBoT-^oRM(MOjB{IX(mpT z4Q3i@^(-~Bw4XkQ%A70uIrUS^%p%L|t#i-4ioD|+j z`(k9Gw^8ThxK*_~cd@}dyk;Ic;yG+!eu=@GUJi;aiLZ^*d2aAJ^$+8;w_H5A(qMh2 zHd&`DliBguFe5jS<5*5yP{SADCa<2fRm1UU9Qci6d18keV?Yxu|qxN_-f^0x7wb~ZPSt+Smia_j?a^pN#o zQ^(ihPjRBlt#~q(ei~A!?P;Q9hcld~aMBA$zPjtMFZ;$NgAI=ji7!diw+PY+=QObg z_e8orb;CiIyROv^Wq}BxB(k>nsxE1>}`WC{$+=FVziI@;>!l< zGc68fbK`XJ%j_Nu2rxO`xKB(zp?Lt{}xZWohRD#saEXYaIdb6`uW-9w>{gYx@% zF3avAK5{@IJLI~PZAb}auXMO*+>!Xcck^7gF}z_Z&%SkkLL&n_0z#HLBCLyJJ2wa2 zlgz3;0yM699)%h?N>6FA?6Urp=xKi{@=UlVp?fs1 zAY_jtoTJ+<)5hUm0U>Fz`n-d1PA%!}VYKW#M6Y&llM3~;?A$xP-ExhIy+g}24vSNU z+J{p(Er073VAzw~-0HQ?lP_W`gFZ5j&Yk->3m@8zU&zGZ7~Z4dcno`SXh2|ersHjH z{G_2T>fyQY!SD=&BcPmN>7=vf*JNojJt&^-!`H}W29KK@4X4Z{Pc_VGpCi!&^ws+b zmpGzeZ~L^2)>4edeJ;7%f;wG4ik7zNGkB}3&g<06N$Y;7?P{0ns%2;=Vk(OrK3eC! zFZb&11XY*ocTa9mn2*C{Av!zfq{}+4|H)pPZkQ^x$}y*0IN0NWgx#M?u$~oh;Jk=y zTSS~Ed_SWO#|ZoT*I_xKtWO?)%rT*mU(B|J|kIZ5(PsaV~hVjZDqnTi6!io+sWj*#Z9LOOi@6tL=sfbgFMypmjp z-f4CCVzr7NUr}+cPDQ_GRUEn^BL9X6?=>Q>uN9HU)_3(A@#-uU+&C2*2pNeo5>jQ9 z^^kFauqH~ze8QscDwY#UK2vbG1*m67A7 zAm3fVcQ1%|cA<#39u+ZYj0mqZ5nTxfddslJ6Ynn)?%a~F+$7^+R~ais0o!K__@z|9 zBNN!r@BnX1j12cU8E?hPaAVJe_wCGLL=?^t5Is}CzTpCz*pBe1`s!XX)IKtzLuA|u zmho6Kh13=mO<82COcmDO6~q(r`YRX=1utaFaLto(ja2CKtAxvkRSZ6&V&Q+N==PzC z-D_3!Sf`?Ag^Z7;%g8Q}kzOR@+w&4m68t|T9}-@jO17_1>anFz!6HJ-Z{)r|B)mCC z#xF)0N%LgvBb07b5VJwS2eN`nLfH&*M!kZZu>!Ko1!T?_@ajSV8&(J~t)%RDTS3Y; z1qEpe-by8>5yJW^*qbh3XRd%bCIRO=2=L$qEavM4Ed4>j?Mn(Kou!S=DCoIe!Kh{h zn+FPT9VEcqMZoYt0S$X>7_!%fPZudjB2k70TZ#;sPkk{p)bCiw&hd zHZ1Y8VGiNI7#m^;dk5FyY;hgp2|0vc^Xf2-(EkZ5rV$R@wxZ`BR+PrsFpsb+NyS9M z1vjc9gv_569R5kc&Fxk!-etw{K$|Px2qr7B5s`lGKMquIxrc%hHa#-1eshh0W!Ge! zxGrNim7Gn4s)aHxFOboja>-pFcjOD`L{_hvCBT=$+5aUOhF4_Vm@6anc^R!w(f_AX znflF!LKgu?kJylQ+=epIhUW-|FJ-L$TE_M&8Mmt`O>$&pi=Mi%D3D~<=zyDnHT z@3IA>R#=ciShe4RKM6)o#^}3L^L9#jc87!~)=PMdF!_uXpPjQ}%`_|KOt<30N3EDi zSawcE7$IZ6jAFuIvRew_+Q(M-9<<_8vK8Y~tVr)~g(=gD)Jhr0D&=}Cp?nQ%m2me> z3GQ!6sOw?HA8}SZ&sj0UK+$u{f;_@*`Yp=rE5o01yNNLBgoI6m$3C%O%0UZW``Cg@ z`$+L73m)ER!9*%TuZGDu>?Px5M;S|~Xxt$jbTK2@-Hg6IW=!;D-FgP_<5*hH7(SGZ z?-?3AVzw1og!xTY{6MHpmvA#xLOyHg=~w^G2n*a6S@0R*rIi+RUqx2)mr&D1LirgH z#ivD79TPE%P(q4@e`>*1;u}OL8E?Tugr}Yqv5vrv6S1FQwTX}jpKY~Z!Db6S&$VD| zjs>|V&FDucn+NO897#3X`h};X4JT zEeiNk0={e!@J2rYW&H)Lqe?4J6|lTk#SubzR~7FON?H|cA-u?}7`M&}Khn6*5GxK5 zWD4CL^lMh*O}}Us8BI2s2{GRF`=;8EG2DjCkv4Q1V#9aCY?yyk#s|k_jQdE&0>Z9S zGGb24@FB;&HpYVYx>%7BKq1@RiiabtXq+$M#0wJsBsV`sXnIsa2Eq5d1w$z{eAB6J z5!T!6%uXrl?B=VnxbS~F zh8trk6B4u@kmJ1iYJWNIb)9hfY}Oe4@seYf3u?#>3!l^f+T(VU_H?v{W6vL{<*Nz`~@Eyt^pb+Z`2xcZMalcS2TG5w8e8eu-BIM$;evG<+$lBxPUS2}fAr{ne$UUghk@YDlY>?f>$ zpO3dUIn;I7M`)`3}zV`%D^otis)7xQfQLzW;1-+dlpo%&Gg6U1TR z0}2P`=iR$&p%sqX8@+gB^VZA-Jinb~WlOAEa0u&P>ec*C_8vaap}-MMsU(i;Ktxl3 zok)-Jd0CsJo?UCM4vL8-EVy4(x~NS?YDT3|(YTGa)8-!Rc+@LtF2at5(0eUl7NC$isjuCcX2>28{zuJAs8 zvv1O(jatEB$5Z{;(V-pK+Th-7Sh`=w$Wr@O^v5f}RqvK5dF-R7ixx;7BPTdH_U+tT z$@uJE+0Kx6DjHp+vpAopbWHPbxXw#cfn0Ylb{g5q^oWXc8Y@j}$JLz2?a?H}>W)jf zv{B;hWr%JyG{y-FX1>5Zt#N2~G(^h19%kG-+EO1qD(t?#tZ^LFBSQ0a%Qm%$Tx;W} zIOIGagKN~-BPdZd+ID}gt)K_DX{$Ha_G}rqmB2O;;JoY7M00Dt>ez=OAfJ71^kLr` zQ(0qLcfOS6m}1%NtgUpjsgLL)mYp5RW|@L1B9gRd=+ZVCil4MpSgd8`w99_YrWh zZ>j&^-06H-v$r<>}i|t=@X%InKuKIkC7+_owuCiAj3JIz{5&RemzzA*f*{mHN_v6GKgR={Q*!qHGEe8EH`#C*dTwV2)GYCK7 F{|m5cfZqTB diff --git a/tests/unit/cmor/test_cmor_check.py b/tests/unit/cmor/test_cmor_check.py index 504eaf06f1..fa1c314166 100644 --- a/tests/unit/cmor/test_cmor_check.py +++ b/tests/unit/cmor/test_cmor_check.py @@ -619,6 +619,23 @@ def test_non_increasing(self): self._update_coordinate_values(self.cube, coord, values) self._check_fails_in_metadata() + def test_non_increasing_fix(self): + """Check automatic fix for direction.""" + coord = self.cube.coord('latitude') + values = np.linspace( + coord.points[-1], + coord.points[0], + len(coord.points) + ) + self._update_coordinate_values(self.cube, coord, values) + self._check_cube(automatic_fixes=True) + self._check_cube() + # test bounds are contiguous + bounds = self.cube.coord('latitude').bounds + right_bounds = bounds[:-2, 1] + left_bounds = bounds[1:-1, 0] + self.assertTrue(np.all(left_bounds == right_bounds)) + def test_non_decreasing(self): """Fail in metadata if decreasing coordinate is increasing.""" self.var_info.coordinates['lat'].stored_direction = 'decreasing' @@ -639,6 +656,11 @@ def test_non_decreasing_fix(self): for index in range(20): self.assertTrue( iris.util.approx_equal(cube_points[index], reference[index])) + # test bounds are contiguous + bounds = self.cube.coord('latitude').bounds + right_bounds = bounds[:-2, 1] + left_bounds = bounds[1:-1, 0] + self.assertTrue(np.all(left_bounds == right_bounds)) def test_not_bounds(self): """Warning if bounds are not available.""" @@ -669,6 +691,15 @@ def test_lons_automatic_fix(self): self.cube = self.cube.intersection(longitude=(-180., 180.)) self._check_cube(automatic_fixes=True) + def test_lons_automatic_fix_with_bounds(self): + """Test automatic fixes for bad longitudes with added bounds.""" + self.cube.coord('longitude').bounds = None + self.cube = self.cube.intersection(longitude=(-180., 180.)) + self._check_cube(automatic_fixes=True) + self.assertTrue(self.cube.coord('longitude').points.min() >= 0.) + self.assertTrue(self.cube.coord('longitude').points.max() <= 360.) + self.assertTrue(self.cube.coord('longitude').has_bounds()) + def test_high_lons_automatic_fix(self): """Test automatic fixes for high longitudes.""" self.cube = self.cube.intersection(longitude=(180., 520.)) diff --git a/tests/unit/data_finder/test_get_start_end_year.py b/tests/unit/data_finder/test_get_start_end_year.py index 6e415c0dad..e485f3ccf9 100644 --- a/tests/unit/data_finder/test_get_start_end_year.py +++ b/tests/unit/data_finder/test_get_start_end_year.py @@ -19,6 +19,11 @@ ['var_control-19800101_whatever.nc', 1980, 1980], ['19800101_var_control-1950_whatever.nc', 1980, 1980], ['var_control-1950_whatever_19800101.nc', 1980, 1980], + ['CM61-LR-hist-03.1950_18500101_19491231_1M_concbc.nc', 1850, 1949], + [ + 'icon-2.6.1_atm_amip_R2B5_r1v1i1p1l1f1_phy_3d_ml_20150101T000000Z.nc', + 2015, 2015 + ], ] diff --git a/tests/unit/data_finder/test_replace_tags.py b/tests/unit/data_finder/test_replace_tags.py index 93ba42b41a..097cc7b328 100644 --- a/tests/unit/data_finder/test_replace_tags.py +++ b/tests/unit/data_finder/test_replace_tags.py @@ -1,22 +1,65 @@ """Tests for _replace_tags in _data_finder.py.""" - from esmvalcore._data_finder import _replace_tags VARIABLE = { + 'project': 'CMIP6', + 'dataset': 'ACCURATE-MODEL', + 'activity': 'act', + 'exp': 'experiment', + 'institute': 'HMA', + 'ensemble': 'r1i1p1f1', + 'mip': 'Amon', 'short_name': 'tas', + 'grid': 'gr', } -def test_replace_tags_str(): - assert _replace_tags('folder/subfolder/{short_name}', - VARIABLE) == ['folder/subfolder/tas'] +def test_replace_tags(): + """Tests for get_start_end_year function.""" + path = _replace_tags( + '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/' + '{grid}/{latestversion}', VARIABLE) + input_file = _replace_tags( + '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', VARIABLE) + output_file = _replace_tags( + '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', VARIABLE) + assert path == [ + 'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/' + '{latestversion}' + ] + assert input_file == ['tas_Amon_ACCURATE-MODEL_experiment_r1i1p1f1_gr*.nc'] + assert output_file == ['CMIP6_ACCURATE-MODEL_Amon_experiment_r1i1p1f1_tas'] def test_replace_tags_list_of_str(): - assert _replace_tags(('folder/subfolder/{short_name}', - 'folder2/{short_name}', 'subfolder/{short_name}'), - VARIABLE) == [ - 'folder/subfolder/tas', - 'folder2/tas', - 'subfolder/tas', - ] + assert sorted( + _replace_tags(('folder/subfolder/{short_name}', 'folder2/{short_name}', + 'subfolder/{short_name}'), VARIABLE)) == sorted([ + 'folder2/tas', + 'folder/subfolder/tas', + 'subfolder/tas', + ]) + + +def test_replace_tags_with_subexperiment(): + """Tests for get_start_end_year function.""" + variable = {'sub_experiment': '199411', **VARIABLE} + path = _replace_tags( + '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/' + '{grid}/{latestversion}', variable) + input_file = _replace_tags( + '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', variable) + output_file = _replace_tags( + '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', variable) + assert sorted(path) == sorted([ + 'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/' + '{latestversion}', + 'act/HMA/ACCURATE-MODEL/experiment/199411-r1i1p1f1/Amon/tas/gr/' + '{latestversion}' + ]) + assert input_file == [ + 'tas_Amon_ACCURATE-MODEL_experiment_199411-r1i1p1f1_gr*.nc' + ] + assert output_file == [ + 'CMIP6_ACCURATE-MODEL_Amon_experiment_199411-r1i1p1f1_tas' + ] diff --git a/tests/unit/experimental/test_config.py b/tests/unit/experimental/test_config.py index ac74b3b856..d7c8a8d761 100644 --- a/tests/unit/experimental/test_config.py +++ b/tests/unit/experimental/test_config.py @@ -1,3 +1,4 @@ +from collections.abc import MutableMapping from pathlib import Path import numpy as np @@ -224,7 +225,7 @@ def test_config_class(): assert isinstance(cfg['output_dir'], Path) assert isinstance(cfg['auxiliary_data_dir'], Path) - from esmvalcore._config import CFG as CFG_DEV + from esmvalcore._config._config import CFG as CFG_DEV assert CFG_DEV @@ -238,7 +239,7 @@ def test_config_update(): def test_config_init(): config = Config() - assert isinstance(config, dict) + assert isinstance(config, MutableMapping) def test_session(): diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index b0563e539a..96359502fd 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -20,6 +20,7 @@ extract_region, extract_shape, ) +from esmvalcore.preprocessor._shared import guess_bounds class Test(tests.Test): @@ -69,6 +70,22 @@ def test_area_statistics_mean(self): expected = np.array([1.]) self.assert_array_equal(result.data, expected) + def test_area_statistics_cell_measure_mean(self): + """ + Test for area average of a 2D field. + The area measure is pre-loaded in the cube""" + cube = guess_bounds(self.grid, ['longitude', 'latitude']) + grid_areas = iris.analysis.cartography.area_weights(cube) + measure = iris.coords.CellMeasure( + grid_areas, + standard_name='cell_area', + units='m2', + measure='area') + self.grid.add_cell_measure(measure, range(0, measure.ndim)) + result = area_statistics(self.grid, 'mean') + expected = np.array([1.]) + self.assert_array_equal(result.data, expected) + def test_area_statistics_min(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'min') @@ -125,6 +142,27 @@ def test_extract_region(self): expected = np.ones((2, 2)) self.assert_array_equal(result.data, expected) + def test_extract_region_mean(self): + """ + Test for extracting a region and performing + the area mean of a 2D field. + """ + cube = guess_bounds(self.grid, ['longitude', 'latitude']) + grid_areas = iris.analysis.cartography.area_weights(cube) + measure = iris.coords.CellMeasure( + grid_areas, + standard_name='cell_area', + units='m2', + measure='area') + self.grid.add_cell_measure(measure, range(0, measure.ndim)) + region = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5) + # expected outcome + expected = np.ones((2, 2)) + self.assert_array_equal(region.data, expected) + result = area_statistics(region, 'mean') + expected_mean = np.array([1.]) + self.assert_array_equal(result.data, expected_mean) + def test_extract_region_neg_lon(self): """Test for extracting a region with a negative longitude field.""" result = extract_region(self.negative_grid, -0.5, 0.5, -0.5, 0.5) @@ -176,8 +214,14 @@ def test_extract_named_region(self): def create_irregular_grid_cube(data, lons, lats): """Create test cube on irregular grid.""" - nlat = iris.coords.DimCoord(range(data.shape[0]), var_name='nlat') - nlon = iris.coords.DimCoord(range(data.shape[1]), var_name='nlon') + times = iris.coords.DimCoord(np.array([10, 20], dtype=np.float64), + standard_name='time', + units=Unit('days since 1950-01-01', + calendar='gregorian')) + + # Construct cube + nlat = iris.coords.DimCoord(range(data.shape[1]), var_name='nlat') + nlon = iris.coords.DimCoord(range(data.shape[2]), var_name='nlon') lat = iris.coords.AuxCoord(lats, var_name='lat', standard_name='latitude', @@ -187,12 +231,13 @@ def create_irregular_grid_cube(data, lons, lats): standard_name='longitude', units='degrees') dim_coord_spec = [ - (nlat, 0), - (nlon, 1), + (times, 0), + (nlat, 1), + (nlon, 2), ] aux_coord_spec = [ - (lat, [0, 1]), - (lon, [0, 1]), + (lat, [1, 2]), + (lon, [1, 2]), ] cube = iris.cube.Cube( data, @@ -205,29 +250,58 @@ def create_irregular_grid_cube(data, lons, lats): IRREGULAR_EXTRACT_REGION_TESTS = [ + { + 'region': (100, 140, -10, 90), + 'mask': np.array( + [ + [False], + [False], + ], + dtype=bool, + ), + 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, 1:3, 1:2] + }, + { + 'region': (100, 360, -60, 0), + 'mask': np.array( + [ + [True, False], + [False, False], + ], + dtype=bool, + ), + 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, 0:2, 1:3] + }, { 'region': (10, 360, 0, 90), - 'mask': - np.array( + 'mask': np.array( [ - [True, True, True], - [True, True, False], - [True, False, False], + [True, False], + [False, False], ], dtype=bool, ), + 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, 1:, 1:] + }, + { + 'region': (0, 360, -90, -30), + 'mask': np.array( + [ + [False, False, False], + ], + dtype=bool, + ), + 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, :1, :] }, { 'region': (200, 10, -90, -60), - 'mask': - np.array( + 'mask': np.array( [ [False, True, False], - [True, True, True], - [True, True, True], ], dtype=bool, ), + 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, :1, :] }, { 'region': (-150, 50, 50, -50), @@ -240,6 +314,8 @@ def create_irregular_grid_cube(data, lons, lats): ], dtype=bool, ), + 'data': + np.arange(18, dtype=np.float32).reshape((2, 3, 3)) }, { 'region': (0, 0, -100, 0), @@ -255,7 +331,7 @@ def create_irregular_grid_cube(data, lons, lats): @pytest.fixture def irregular_extract_region_cube(): """Create a test cube on an irregular grid to test `extract_region`.""" - data = np.arange(9, dtype=np.float32).reshape((3, 3)) + data = np.arange(18, dtype=np.float32).reshape((2, 3, 3)) lons = np.array( [ [0, 120, 240], @@ -289,9 +365,9 @@ def test_extract_region_irregular(irregular_extract_region_cube, case): end_latitude=end_lat, ) - data = np.arange(9, dtype=np.float32).reshape((3, 3)) - np.testing.assert_array_equal(cube.data.mask, case['mask']) - np.testing.assert_array_equal(cube.data.data, data) + for i in range(2): + np.testing.assert_array_equal(cube.data[i].mask, case['mask']) + np.testing.assert_array_equal(cube.data.data, case['data']) else: with pytest.raises(ValueError) as exc: extract_region( @@ -795,7 +871,7 @@ def test_extract_composite_shape_negative_bounds(make_testcube, @pytest.fixture def irreg_extract_shape_cube(): """Create a test cube on an irregular grid to test `extract_shape`.""" - data = np.arange(9, dtype=np.float32).reshape((3, 3)) + data = np.arange(18, dtype=np.float32).reshape((2, 3, 3)) lats = np.array( [ [0.0, 0.0, 0.1], @@ -832,7 +908,7 @@ def test_extract_shape_irregular(irreg_extract_shape_cube, tmp_path, method): cube = extract_shape(irreg_extract_shape_cube, shapefile, method) - data = np.arange(9, dtype=np.float32).reshape((3, 3)) + data = np.arange(18, dtype=np.float32).reshape((2, 3, 3)) mask = np.array( [ [True, True, True], @@ -844,7 +920,8 @@ def test_extract_shape_irregular(irreg_extract_shape_cube, tmp_path, method): if method == 'representative': mask[1, 1] = True np.testing.assert_array_equal(cube.data, data) - np.testing.assert_array_equal(cube.data.mask, mask) + for i in range(2): + np.testing.assert_array_equal(cube.data[i].mask, mask) def test_extract_shape_wrong_method_raises(): diff --git a/tests/unit/preprocessor/_derive/test_rlus.py b/tests/unit/preprocessor/_derive/test_rlus.py new file mode 100644 index 0000000000..7ac7191f05 --- /dev/null +++ b/tests/unit/preprocessor/_derive/test_rlus.py @@ -0,0 +1,35 @@ +"""Test derivation of `rlus`.""" +import iris +import numpy as np +import pytest + +import esmvalcore.preprocessor._derive.rlus as rlus + +from .test_shared import get_cube + + +@pytest.fixture +def cubes(): + rlds_name = 'surface_downwelling_longwave_flux_in_air' + rlns_name = 'surface_net_downward_longwave_flux' + rlds_cube = get_cube([[[100.]]], + air_pressure_coord=False, + standard_name=rlds_name) + rlds_cube.attributes["positive"] = "down" + rlns_cube = get_cube([[[50.0]]], + air_pressure_coord=False, + standard_name=rlns_name) + rlns_cube.attributes["positive"] = "down" + + rlns_cube.coord("longitude").var_name = "lon" + rlns_cube.coord("longitude").var_name = "lat" + + return iris.cube.CubeList([rlds_cube, rlns_cube]) + + +def test_rlntcs_calculation(cubes): + derived_var = rlus.DerivedVariable() + out_cube = derived_var.calculate(cubes) + np.testing.assert_allclose(out_cube.data, + np.array([[[50.0]]])) + assert out_cube.attributes['positive'] == 'up' diff --git a/tests/unit/preprocessor/_derive/test_rsus.py b/tests/unit/preprocessor/_derive/test_rsus.py new file mode 100644 index 0000000000..7636913561 --- /dev/null +++ b/tests/unit/preprocessor/_derive/test_rsus.py @@ -0,0 +1,35 @@ +"""Test derivation of `rsus`.""" +import iris +import numpy as np +import pytest + +import esmvalcore.preprocessor._derive.rsus as rsus + +from .test_shared import get_cube + + +@pytest.fixture +def cubes(): + rsds_name = 'surface_downwelling_shortwave_flux_in_air' + rsns_name = 'surface_net_downward_shortwave_flux' + rsds_cube = get_cube([[[100.]]], + air_pressure_coord=False, + standard_name=rsds_name) + rsds_cube.attributes["positive"] = "down" + rsns_cube = get_cube([[[50.0]]], + air_pressure_coord=False, + standard_name=rsns_name) + rsns_cube.attributes["positive"] = "down" + + rsns_cube.coord("longitude").var_name = "lon" + rsns_cube.coord("longitude").var_name = "lat" + + return iris.cube.CubeList([rsds_cube, rsns_cube]) + + +def test_rsntcs_calculation(cubes): + derived_var = rsus.DerivedVariable() + out_cube = derived_var.calculate(cubes) + np.testing.assert_allclose(out_cube.data, + np.array([[[50.0]]])) + assert out_cube.attributes['positive'] == 'up' diff --git a/tests/unit/preprocessor/_mask/test_mask.py b/tests/unit/preprocessor/_mask/test_mask.py index 2d4c8948e6..a6b28e2cae 100644 --- a/tests/unit/preprocessor/_mask/test_mask.py +++ b/tests/unit/preprocessor/_mask/test_mask.py @@ -7,7 +7,7 @@ import iris import tests from cf_units import Unit -from esmvalcore.preprocessor._mask import (_apply_fx_mask, _check_dims, +from esmvalcore.preprocessor._mask import (_apply_fx_mask, count_spells, _get_fx_mask, mask_above_threshold, mask_below_threshold, @@ -63,12 +63,6 @@ def test_apply_fx_mask_on_masked_data(self): mask=dummy_fx_mask) self.assert_array_equal(fixed_mask, app_mask) - def test_check_dims(self): - """Test _check_dims func.""" - malformed_cube = self.arr[0] - np.testing.assert_equal(True, _check_dims(self.arr, self.arr)) - np.testing.assert_equal(False, _check_dims(self.arr, malformed_cube)) - def test_count_spells(self): """Test count_spells func.""" ref_spells = count_spells(self.time_cube.data, -1000., 0, 1) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index dc9092eb2f..6da22c98e8 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -88,7 +88,7 @@ def generate_cube_from_dates( standard_name='time', units=unit) - data = np.array((fill_val, ) * len_data) + data = np.array((fill_val, ) * len_data, dtype=np.float32) if lazy: data = da.from_array(data) @@ -104,7 +104,7 @@ def get_cubes_for_validation_test(frequency, lazy=False): # Cube with masked data cube2 = cube1.copy() - data2 = np.ma.array([5, 5, 5], mask=[True, False, False]) + data2 = np.ma.array([5, 5, 5], mask=[True, False, False], dtype=np.float32) if lazy: data2 = da.from_array(data2) cube2.data = data2 diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py index 6335a74f64..08a1ee26c0 100644 --- a/tests/unit/preprocessor/_other/test_other.py +++ b/tests/unit/preprocessor/_other/test_other.py @@ -2,7 +2,6 @@ import unittest -import iris import iris.coord_categorisation import iris.coords import numpy as np diff --git a/tests/unit/preprocessor/_regrid/test_regrid.py b/tests/unit/preprocessor/_regrid/test_regrid.py index fb6ec94232..b7beaca442 100644 --- a/tests/unit/preprocessor/_regrid/test_regrid.py +++ b/tests/unit/preprocessor/_regrid/test_regrid.py @@ -1,16 +1,20 @@ -""" -Unit tests for the :func:`esmvalcore.preprocessor.regrid.regrid` function. - -""" +"""Unit tests for the :func:`esmvalcore.preprocessor.regrid.regrid` +function.""" import unittest from unittest import mock import iris +import numpy as np +import pytest import tests from esmvalcore.preprocessor import regrid -from esmvalcore.preprocessor._regrid import _CACHE, HORIZONTAL_SCHEMES +from esmvalcore.preprocessor._regrid import ( + _CACHE, + HORIZONTAL_SCHEMES, + _horizontal_grid_is_close, +) class Test(tests.Test): @@ -64,9 +68,17 @@ def setUp(self): 'unstructured_nearest' ] - def _return_mock_global_stock_cube(spec, - lat_offset=True, - lon_offset=True): + def _mock_horizontal_grid_is_close(src, tgt): + return False + + self.patch('esmvalcore.preprocessor._regrid._horizontal_grid_is_close', + side_effect=_mock_horizontal_grid_is_close) + + def _return_mock_global_stock_cube( + spec, + lat_offset=True, + lon_offset=True, + ): return self.tgt_grid self.mock_stock = self.patch( @@ -117,5 +129,108 @@ def test_regrid__cell_specification(self): _CACHE.clear() +def _make_coord(start: float, stop: float, step: int, *, name: str): + """Helper function for creating a coord.""" + coord = iris.coords.DimCoord( + np.linspace(start, stop, step), + standard_name=name, + units='degrees', + ) + coord.guess_bounds() + return coord + + +def _make_cube(*, lat: tuple, lon: tuple): + """Helper function for creating a cube.""" + lat_coord = _make_coord(*lat, name='latitude') + lon_coord = _make_coord(*lon, name='longitude') + + return iris.cube.Cube( + np.empty([len(lat_coord.points), + len(lon_coord.points)]), + dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], + ) + + +# 10x10 +LAT_SPEC1 = (-85, 85, 18) +LON_SPEC1 = (5, 355, 36) + +# almost 10x10, but different shape +LAT_SPEC2 = (-85, 85, 17) +LON_SPEC2 = (5, 355, 35) + +# 10x10, but different coords +LAT_SPEC3 = (-90, 90, 18) +LON_SPEC3 = (0, 360, 36) + + +@pytest.mark.parametrize( + 'cube2_spec, expected', + ( + # equal lat/lon + ( + { + 'lat': LAT_SPEC1, + 'lon': LON_SPEC1, + }, + True, + ), + # different lon shape + ( + { + 'lat': LAT_SPEC1, + 'lon': LON_SPEC2, + }, + False, + ), + # different lat shape + ( + { + 'lat': LAT_SPEC2, + 'lon': LON_SPEC1, + }, + False, + ), + # different lon values + ( + { + 'lat': LAT_SPEC1, + 'lon': LON_SPEC3, + }, + False, + ), + # different lat values + ( + { + 'lat': LAT_SPEC3, + 'lon': LON_SPEC1, + }, + False, + ), + ), +) +def test_horizontal_grid_is_close(cube2_spec: dict, expected: bool): + """Test for `_horizontal_grid_is_close`.""" + cube1 = _make_cube(lat=LAT_SPEC1, lon=LON_SPEC1) + cube2 = _make_cube(**cube2_spec) + + assert _horizontal_grid_is_close(cube1, cube2) == expected + + +def test_regrid_is_skipped_if_grids_are_the_same(): + """Test that regridding is skipped if the grids are the same.""" + cube = _make_cube(lat=LAT_SPEC1, lon=LON_SPEC1) + scheme = 'linear' + + # regridding to the same spec returns the same cube + expected_same_cube = regrid(cube, target_grid='10x10', scheme=scheme) + assert expected_same_cube is cube + + # regridding to a different spec returns a different cube + expected_different_cube = regrid(cube, target_grid='5x5', scheme=scheme) + assert expected_different_cube is not cube + + if __name__ == '__main__': unittest.main() diff --git a/tests/unit/preprocessor/_time/test_time.py b/tests/unit/preprocessor/_time/test_time.py index eddcd33f6a..1677c05fcd 100644 --- a/tests/unit/preprocessor/_time/test_time.py +++ b/tests/unit/preprocessor/_time/test_time.py @@ -3,6 +3,7 @@ import copy import datetime import unittest +from typing import List, Tuple import iris import iris.coord_categorisation @@ -1263,7 +1264,7 @@ def make_map_data(number_years=2): return cube -PARAMETERS = [] +PARAMETERS: List[Tuple] = [] for period in ('full', 'day', 'month', 'season'): PARAMETERS.append((period, None)) if period == 'season': diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index 0e07231609..82a755660d 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -10,7 +10,9 @@ from esmvalcore.preprocessor._volume import (volume_statistics, depth_integration, extract_trajectory, - extract_transect, extract_volume) + extract_transect, + extract_volume, + calculate_volume) class Test(tests.Test): @@ -83,12 +85,46 @@ def test_extract_volume(self): print(result.data, expected.data) self.assert_array_equal(result.data, expected) + def test_extract_volume_mean(self): + """ + Test to extract the top two layers and compute the + weighted average of a cube.""" + grid_volume = calculate_volume(self.grid_4d) + measure = iris.coords.CellMeasure( + grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume') + self.grid_4d.add_cell_measure(measure, range(0, measure.ndim)) + result = extract_volume(self.grid_4d, 0., 10.) + expected = np.ma.ones((2, 2, 2, 2)) + self.assert_array_equal(result.data, expected) + result_mean = volume_statistics(result, 'mean') + expected_mean = np.ma.array([1., 1.], mask=False) + self.assert_array_equal(result_mean.data, expected_mean) + def test_volume_statistics(self): """Test to take the volume weighted average of a (2,3,2,2) cube.""" result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) + def test_volume_statistics_cell_measure(self): + """ + Test to take the volume weighted average of a (2,3,2,2) cube. + The volume measure is pre-loaded in the cube. + """ + grid_volume = calculate_volume(self.grid_4d) + measure = iris.coords.CellMeasure( + grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume') + self.grid_4d.add_cell_measure(measure, range(0, measure.ndim)) + result = volume_statistics(self.grid_4d, 'mean') + expected = np.ma.array([1., 1.], mask=False) + self.assert_array_equal(result.data, expected) + def test_volume_statistics_long(self): """ Test to take the volume weighted average of a (4,3,2,2) cube. diff --git a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py index 387279ec9e..2de0ff3c80 100644 --- a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py +++ b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py @@ -1,5 +1,4 @@ """Unit tests for :mod:`esmvalcore.preprocessor._weighting`.""" -from unittest import mock import iris import numpy as np @@ -8,95 +7,69 @@ import esmvalcore.preprocessor._weighting as weighting +crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) +LON_3 = iris.coords.DimCoord([0, 1.5, 3], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3]], + units='degrees_east', + coord_system=crd_sys) +LON_4 = iris.coords.DimCoord([0, 1.5, 2.5, 3.5], + standard_name='longitude', + bounds=[[0, 1], [1, 2], [2, 3], + [3, 4]], + units='degrees_east', + coord_system=crd_sys) + CUBE_SFTLF = iris.cube.Cube( [10.0, 0.0, 100.0], var_name='sftlf', standard_name='land_area_fraction', units=Unit('%'), + dim_coords_and_dims=[(LON_3, 0), ] ) CUBE_SFTOF = iris.cube.Cube( [100.0, 0.0, 50.0, 70.0], var_name='sftof', standard_name='sea_area_fraction', units=Unit('%'), + dim_coords_and_dims=[(LON_4, 0), ] ) CUBE_3 = iris.cube.Cube( [10.0, 20.0, 0.0], var_name='dim3', + dim_coords_and_dims=[(LON_3, 0), ] ) CUBE_4 = iris.cube.Cube( [1.0, 2.0, -1.0, 2.0], var_name='dim4', + dim_coords_and_dims=[(LON_4, 0), ] ) + +CUBE_ANCILLARY_3 = CUBE_3.copy() +CUBE_ANCILLARY_3.add_ancillary_variable(CUBE_SFTLF, (0)) + +CUBE_ANCILLARY_4 = CUBE_4.copy() +CUBE_ANCILLARY_4.add_ancillary_variable(CUBE_SFTOF, (0)) + FRAC_SFTLF = np.array([0.1, 0.0, 1.0]) FRAC_SFTOF = np.array([0.0, 1.0, 0.5, 0.3]) -EMPTY_FX_FILES = { - 'sftlf': [], - 'sftof': [], -} -L_FX_FILES = { - 'sftlf': 'not/a/real/path', - 'sftof': [], -} -O_FX_FILES = { - 'sftlf': [], - 'sftof': 'not/a/real/path', -} -FX_FILES = { - 'sftlf': 'not/a/real/path', - 'sftof': 'i/was/mocked', -} -WRONG_FX_FILES = { - 'wrong': 'test', - 'sftlf': 'not/a/real/path', - 'sftof': 'i/was/mocked', -} LAND_FRACTION = [ - (CUBE_3, {}, [], None, ["No fx files given"]), - (CUBE_3, {'sftlf': []}, [], None, ["'sftlf' not found"]), - (CUBE_3, {'sftlf': 'a'}, [CUBE_SFTLF], FRAC_SFTLF, []), - (CUBE_3, {'sftof': 'a'}, [CUBE_SFTOF], None, ["not broadcastable"]), - (CUBE_3, EMPTY_FX_FILES, [], None, - ["'sftlf' not found", "'sftof' not found"]), - (CUBE_3, L_FX_FILES, [CUBE_SFTLF], FRAC_SFTLF, []), - (CUBE_3, O_FX_FILES, [CUBE_SFTOF], None, - ["'sftlf' not found", "not broadcastable"]), - (CUBE_3, FX_FILES, [CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, []), - (CUBE_3, {'wrong': 'a'}, [CUBE_SFTLF], None, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_3, {'wrong': 'a'}, [CUBE_SFTOF], None, ["not broadcastable"]), - (CUBE_3, WRONG_FX_FILES, [CUBE_SFTLF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_3, WRONG_FX_FILES, [CUBE_SFTOF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, - ["not broadcastable"]), - (CUBE_4, {}, [], None, ["No fx files given"]), - (CUBE_4, {'sftlf': []}, [], None, ["'sftlf' not found"]), - (CUBE_4, {'sftlf': 'a'}, [CUBE_SFTLF], None, ["not broadcastable"]), - (CUBE_4, {'sftof': 'a'}, [CUBE_SFTOF], FRAC_SFTOF, []), - (CUBE_4, EMPTY_FX_FILES, [], None, - ["'sftlf' not found", "'sftof' not found"]), - (CUBE_4, L_FX_FILES, [CUBE_SFTLF], None, - ["not broadcastable", "'sftof' not found"]), - (CUBE_4, O_FX_FILES, [CUBE_SFTOF], FRAC_SFTOF, ["'sftlf' not found"]), - (CUBE_4, FX_FILES, [CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["not broadcastable"]), - (CUBE_4, {'wrong': 'a'}, [CUBE_SFTLF], None, ["not broadcastable"]), - (CUBE_4, {'wrong': 'a'}, [CUBE_SFTOF], None, - ["expected 'sftlf' or 'sftof'"]), - (CUBE_4, WRONG_FX_FILES, [CUBE_SFTLF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["not broadcastable", "not broadcastable"]), - (CUBE_4, WRONG_FX_FILES, [CUBE_SFTOF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, - ["expected 'sftlf' or 'sftof'", "not broadcastable"]), + (CUBE_3, None, [ + 'Ancillary variables land/sea area fraction not found in cube. ' + 'Check fx_file availability.']), + (CUBE_4, None, [ + 'Ancillary variables land/sea area fraction not found in cube. ' + 'Check fx_file availability.']), + (CUBE_ANCILLARY_3, FRAC_SFTLF, []), + (CUBE_ANCILLARY_4, FRAC_SFTOF, []) ] -@pytest.mark.parametrize('cube,fx_files,fx_cubes,out,err', LAND_FRACTION) -@mock.patch.object(weighting, 'iris', autospec=True) -def test_get_land_fraction(mock_iris, cube, fx_files, fx_cubes, out, err): +@pytest.mark.parametrize('cube,out,err', LAND_FRACTION) +def test_get_land_fraction(cube, out, err): """Test calculation of land fraction.""" - mock_iris.load_cube.side_effect = fx_cubes - (land_fraction, errors) = weighting._get_land_fraction(cube, fx_files) + (land_fraction, errors) = weighting._get_land_fraction(cube) if land_fraction is None: assert land_fraction == out else: @@ -104,46 +77,6 @@ def test_get_land_fraction(mock_iris, cube, fx_files, fx_cubes, out, err): assert len(errors) == len(err) for (idx, error) in enumerate(errors): assert err[idx] in error - mock_iris.reset_mock() - - -SHAPES_TO_BROADCAST = [ - ((), (1, ), True), - ((), (10, 10), True), - ((1, ), (10, ), True), - ((1, ), (10, 10), True), - ((2, ), (10, ), False), - ((10, ), (), True), - ((10, ), (1, ), True), - ((10, ), (10, ), True), - ((10, ), (10, 10), True), - ((10, ), (7, 1), True), - ((10, ), (10, 7), False), - ((10, ), (7, 1, 10), True), - ((10, ), (7, 1, 1), True), - ((10, ), (7, 1, 7), False), - ((10, ), (7, 10, 7), False), - ((10, 1), (1, 1), True), - ((10, 1), (1, 100), True), - ((10, 1), (10, 7), True), - ((10, 12), (10, 1), True), - ((10, 12), (), True), - ((10, 12), (1, ), True), - ((10, 12), (12, ), True), - ((10, 12), (1, 1), True), - ((10, 12), (1, 12), True), - ((10, 12), (10, 10, 1), True), - ((10, 12), (10, 12, 1), False), - ((10, 12), (10, 12, 12), False), - ((10, 12), (10, 10, 12), True), -] - - -@pytest.mark.parametrize('shape_1,shape_2,out', SHAPES_TO_BROADCAST) -def test_shape_is_broadcastable(shape_1, shape_2, out): - """Test check if two shapes are broadcastable.""" - is_broadcastable = weighting._shape_is_broadcastable(shape_1, shape_2) - assert is_broadcastable == out CUBE_3_L = CUBE_3.copy([1.0, 0.0, 0.0]) @@ -152,37 +85,20 @@ def test_shape_is_broadcastable(shape_1, shape_2, out): CUBE_4_O = CUBE_4.copy([1.0, 0.0, -0.5, 1.4]) WEIGHTING_LANDSEA_FRACTION = [ - (CUBE_3, {}, 'land', ValueError), - (CUBE_3, {}, 'sea', ValueError), - (CUBE_3, EMPTY_FX_FILES, 'land', ValueError), - (CUBE_3, EMPTY_FX_FILES, 'sea', ValueError), - (CUBE_3, L_FX_FILES, 'land', CUBE_3_L), - (CUBE_3, L_FX_FILES, 'sea', CUBE_3_O), - (CUBE_3, O_FX_FILES, 'land', ValueError), - (CUBE_3, O_FX_FILES, 'sea', ValueError), - (CUBE_3, FX_FILES, 'land', CUBE_3_L), - (CUBE_3, FX_FILES, 'sea', CUBE_3_O), - (CUBE_3, FX_FILES, 'wrong', TypeError), - (CUBE_4, {}, 'land', ValueError), - (CUBE_4, {}, 'sea', ValueError), - (CUBE_4, EMPTY_FX_FILES, 'land', ValueError), - (CUBE_4, EMPTY_FX_FILES, 'sea', ValueError), - (CUBE_4, L_FX_FILES, 'land', ValueError), - (CUBE_4, L_FX_FILES, 'sea', ValueError), - (CUBE_4, O_FX_FILES, 'land', CUBE_4_L), - (CUBE_4, O_FX_FILES, 'sea', CUBE_4_O), - (CUBE_4, FX_FILES, 'land', CUBE_4_L), - (CUBE_4, FX_FILES, 'sea', CUBE_4_O), - (CUBE_4, FX_FILES, 'wrong', TypeError), + (CUBE_3, 'land', ValueError), + (CUBE_3, 'sea', ValueError), + (CUBE_ANCILLARY_3, 'land', CUBE_3_L), + (CUBE_ANCILLARY_3, 'sea', CUBE_3_O), + (CUBE_4, 'land', ValueError), + (CUBE_4, 'sea', ValueError), + (CUBE_ANCILLARY_4, 'land', CUBE_4_L), + (CUBE_ANCILLARY_4, 'sea', CUBE_4_O), ] -@pytest.mark.parametrize('cube,fx_files,area_type,out', +@pytest.mark.parametrize('cube,area_type,out', WEIGHTING_LANDSEA_FRACTION) -@mock.patch.object(weighting, 'iris', autospec=True) -def test_weighting_landsea_fraction(mock_iris, - cube, - fx_files, +def test_weighting_landsea_fraction(cube, area_type, out): """Test landsea fraction weighting preprocessor.""" @@ -190,18 +106,10 @@ def test_weighting_landsea_fraction(mock_iris, if isinstance(out, type): with pytest.raises(out): weighted_cube = weighting.weighting_landsea_fraction( - cube, fx_files, area_type) + cube, area_type) return # Regular cases - fx_cubes = [] - if fx_files.get('sftlf'): - fx_cubes.append(CUBE_SFTLF) - if fx_files.get('sftof'): - fx_cubes.append(CUBE_SFTOF) - mock_iris.load_cube.side_effect = fx_cubes - weighted_cube = weighting.weighting_landsea_fraction( - cube, fx_files, area_type) - assert weighted_cube == cube + weighted_cube = weighting.weighting_landsea_fraction(cube, area_type) + assert np.array_equal(weighted_cube.data, cube.data) assert weighted_cube is cube - mock_iris.reset_mock() diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py index 1ed1875926..493f366fef 100644 --- a/tests/unit/test_recipe.py +++ b/tests/unit/test_recipe.py @@ -1,6 +1,6 @@ import pytest -from esmvalcore._recipe import Recipe +from esmvalcore._recipe import Recipe, _allow_skipping from esmvalcore._recipe_checks import RecipeError @@ -14,7 +14,7 @@ def test_expand_ensemble(self): }, ] - expanded = Recipe._expand_ensemble(datasets) + expanded = Recipe._expand_tag(datasets, 'ensemble') ensembles = [ 'r1i2p3', @@ -29,6 +29,31 @@ def test_expand_ensemble(self): for i, ensemble in enumerate(ensembles): assert expanded[i] == {'dataset': 'XYZ', 'ensemble': ensemble} + def test_expand_subexperiment(self): + + datasets = [ + { + 'dataset': 'XYZ', + 'sub_experiment': 's(1998:2005)', + }, + ] + + expanded = Recipe._expand_tag(datasets, 'sub_experiment') + + subexperiments = [ + 's1998', + 's1999', + 's2000', + 's2001', + 's2002', + 's2003', + 's2004', + 's2005', + ] + for i, subexperiment in enumerate(subexperiments): + assert expanded[i] == {'dataset': 'XYZ', + 'sub_experiment': subexperiment} + def test_expand_ensemble_nolist(self): datasets = [ @@ -39,4 +64,38 @@ def test_expand_ensemble_nolist(self): ] with pytest.raises(RecipeError): - Recipe._expand_ensemble(datasets) + Recipe._expand_tag(datasets, 'ensemble') + + +VAR_A = {'dataset': 'A'} +VAR_A_REF_A = {'dataset': 'A', 'reference_dataset': 'A'} +VAR_A_REF_B = {'dataset': 'A', 'reference_dataset': 'B'} + + +TEST_ALLOW_SKIPPING = [ + ([], VAR_A, {}, False), + ([], VAR_A, {'skip-nonexistent': False}, False), + ([], VAR_A, {'skip-nonexistent': True}, True), + ([], VAR_A_REF_A, {}, False), + ([], VAR_A_REF_A, {'skip-nonexistent': False}, False), + ([], VAR_A_REF_A, {'skip-nonexistent': True}, False), + ([], VAR_A_REF_B, {}, False), + ([], VAR_A_REF_B, {'skip-nonexistent': False}, False), + ([], VAR_A_REF_B, {'skip-nonexistent': True}, True), + (['A'], VAR_A, {}, False), + (['A'], VAR_A, {'skip-nonexistent': False}, False), + (['A'], VAR_A, {'skip-nonexistent': True}, False), + (['A'], VAR_A_REF_A, {}, False), + (['A'], VAR_A_REF_A, {'skip-nonexistent': False}, False), + (['A'], VAR_A_REF_A, {'skip-nonexistent': True}, False), + (['A'], VAR_A_REF_B, {}, False), + (['A'], VAR_A_REF_B, {'skip-nonexistent': False}, False), + (['A'], VAR_A_REF_B, {'skip-nonexistent': True}, False), +] + + +@pytest.mark.parametrize('ancestors,var,cfg,out', TEST_ALLOW_SKIPPING) +def test_allow_skipping(ancestors, var, cfg, out): + """Test ``_allow_skipping``.""" + result = _allow_skipping(ancestors, var, cfg) + assert result is out diff --git a/yamale_meta.yaml b/yamale_meta.yaml deleted file mode 100644 index b97136636c..0000000000 --- a/yamale_meta.yaml +++ /dev/null @@ -1,52 +0,0 @@ -{% set name = "yamale" %} -{% set version = "2.0" %} -{% set file_ext = "tar.gz" %} -{% set hash_type = "sha256" %} -{% set hash_value = "532897422b590f617a075d47badde4874c0b1d49ac10e151c1f04f73d0524b03" %} - -package: - name: '{{ name|lower }}' - version: '{{ version }}' - -source: - fn: '{{ name }}-{{ version }}.{{ file_ext }}' - url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.{{ file_ext }} - '{{ hash_type }}': '{{ hash_value }}' - -build: - noarch: python - number: 0 - entry_points: - - yamale=yamale.command_line:main - script: python setup.py install --single-version-externally-managed --record=record.txt - -requirements: - build: - - python - - setuptools - - pyyaml - run: - - python - - pyyaml - -test: - imports: - - yamale - - yamale.readers - - yamale.readers.tests - - yamale.schema - - yamale.syntax - - yamale.syntax.tests - - yamale.tests - - yamale.validators - - yamale.validators.tests - commands: - - yamale --help - -about: - home: https://github.com/23andMe/Yamale - license: MIT License - license_family: MIT - summary: A schema and validator for YAML. - description: A schema and validator for YAML. - From 73840dc5cd586ea017933a24ac150f9a8da340db Mon Sep 17 00:00:00 2001 From: Peter Kalverla Date: Wed, 9 Jun 2021 18:06:13 +0200 Subject: [PATCH 61/68] Revert "Merge branch origin/main into this branch and resolve conflicts" This reverts commit 3ac43b5ef5a51d6f79d1f82c71d946e15be18f61. --- .circleci/config.yml | 41 +- .github/pull_request_template.md | 51 +- .github/workflows/action-conda-publish.yml | 6 +- .../workflows/action-install-from-conda.yml | 13 +- .../workflows/action-install-from-pypi.yml | 9 +- .../workflows/action-install-from-source.yml | 8 +- .../action-pypi-build-and-deploy.yml | 2 +- .github/workflows/action-test.yml | 16 +- .gitignore | 1 - .pre-commit-config.yaml | 4 - .prospector.yml | 3 - .zenodo.json | 6 - CITATION.cff | 5 - README.md | 2 +- doc/api/esmvalcore.api.config.rst | 2 +- doc/api/esmvalcore.rst | 6 +- doc/changelog.rst | 2 +- doc/conf.py | 4 +- doc/contributing.rst | 750 +++++------------- doc/develop/derivation.rst | 12 +- doc/develop/fixing_data.rst | 101 +-- doc/develop/index.rst | 10 +- doc/develop/preprocessor_function.rst | 269 ------- doc/interfaces.rst | 2 - doc/quickstart/configure.rst | 34 +- doc/quickstart/find_data.rst | 13 - doc/quickstart/install.rst | 23 +- doc/quickstart/recipes.rst | 15 +- doc/recipe/overview.rst | 15 +- doc/recipe/preprocessor.rst | 157 +--- esmvalcore/_citation.py | 2 +- esmvalcore/_config/_logging.py | 12 +- esmvalcore/_data_finder.py | 74 +- esmvalcore/_recipe.py | 254 +++--- esmvalcore/_recipe_checks.py | 2 +- esmvalcore/_task.py | 6 +- esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py | 65 +- esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py | 6 +- esmvalcore/cmor/_fixes/cmip5/ec_earth.py | 76 +- esmvalcore/cmor/_fixes/cmip5/miroc5.py | 4 - esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py | 57 +- esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py | 12 +- esmvalcore/cmor/_fixes/cmip6/cesm2.py | 36 +- esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py | 8 - esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py | 8 - .../cmor/_fixes/cmip6/cesm2_waccm_fv2.py | 9 - esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py | 28 - esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py | 29 +- esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py | 5 +- esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py | 47 +- esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py | 50 +- esmvalcore/cmor/_fixes/cmip6/kiost_esm.py | 4 - esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py | 77 +- esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py | 25 - esmvalcore/cmor/_fixes/common.py | 126 ++- esmvalcore/cmor/_fixes/fix.py | 10 +- esmvalcore/cmor/_fixes/shared.py | 32 - esmvalcore/cmor/check.py | 42 +- esmvalcore/cmor/table.py | 25 +- esmvalcore/cmor/tables/custom/CMOR_tasaga.dat | 25 - esmvalcore/cmor/variable_alt_names.yml | 3 +- esmvalcore/config-developer.yml | 2 - esmvalcore/experimental/_logging.py | 3 +- esmvalcore/experimental/_warnings.py | 7 +- .../experimental/config/_config_object.py | 12 +- .../experimental/config/_validated_config.py | 27 +- esmvalcore/experimental/recipe.py | 29 +- esmvalcore/experimental/recipe_info.py | 14 +- esmvalcore/experimental/recipe_metadata.py | 5 +- esmvalcore/experimental/recipe_output.py | 19 +- esmvalcore/experimental/templates/__init__.py | 3 +- esmvalcore/experimental/utils.py | 18 +- esmvalcore/preprocessor/__init__.py | 9 +- esmvalcore/preprocessor/_ancillary_vars.py | 214 ----- esmvalcore/preprocessor/_area.py | 97 ++- esmvalcore/preprocessor/_derive/rlus.py | 49 -- esmvalcore/preprocessor/_derive/rsus.py | 49 -- esmvalcore/preprocessor/_io.py | 10 - esmvalcore/preprocessor/_mask.py | 122 ++- esmvalcore/preprocessor/_multimodel.py | 44 +- esmvalcore/preprocessor/_regrid.py | 54 +- esmvalcore/preprocessor/_volume.py | 42 +- esmvalcore/preprocessor/_weighting.py | 54 +- package/meta.yaml | 10 +- setup.cfg | 5 - setup.py | 13 +- .../cmor/_fixes/cmip5/test_access1_0.py | 2 +- .../cmor/_fixes/cmip5/test_access1_3.py | 2 +- .../cmor/_fixes/cmip5/test_bcc_csm1_1.py | 56 +- .../cmor/_fixes/cmip5/test_bcc_csm1_1_m.py | 8 +- .../cmor/_fixes/cmip5/test_ec_earth.py | 85 +- .../cmor/_fixes/cmip5/test_miroc5.py | 18 +- .../cmor/_fixes/cmip6/test_bcc_csm2_mr.py | 178 ++++- .../cmor/_fixes/cmip6/test_bcc_esm1.py | 39 +- .../cmor/_fixes/cmip6/test_cesm2.py | 115 +-- .../cmor/_fixes/cmip6/test_cesm2_fv2.py | 33 +- .../cmor/_fixes/cmip6/test_cesm2_waccm.py | 33 +- .../cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py | 33 +- .../cmor/_fixes/cmip6/test_cnrm_esm2_1.py | 60 +- .../cmor/_fixes/cmip6/test_fgoals_g3.py | 62 +- .../cmor/_fixes/cmip6/test_gfdl_cm4.py | 16 +- .../cmor/_fixes/cmip6/test_gfdl_esm4.py | 131 +-- .../cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py | 94 +-- .../cmor/_fixes/cmip6/test_kiost_esm.py | 18 +- .../cmor/_fixes/cmip6/test_mcm_ua_1_0.py | 146 +--- .../cmor/_fixes/cmip6/test_sam0_unicon.py | 32 +- tests/integration/cmor/_fixes/test_common.py | 253 +----- tests/integration/cmor/_fixes/test_fix.py | 27 +- tests/integration/cmor/_fixes/test_shared.py | 59 -- tests/integration/cmor/test_table.py | 9 - tests/integration/data_finder.yml | 470 ++--------- .../preprocessor/_ancillary_vars/__init__.py | 5 - .../_ancillary_vars/test_add_fx_variables.py | 257 ------ .../preprocessor/_io/test_concatenate.py | 8 +- .../integration/preprocessor/_io/test_save.py | 9 +- .../preprocessor/_mask/test_mask.py | 88 +- tests/integration/test_data_finder.py | 5 +- tests/integration/test_recipe.py | 322 +++----- tests/integration/test_recipe_checks.py | 3 +- .../timeseries_daily_365_day-full-mean.nc | Bin 23299 -> 25378 bytes .../timeseries_daily_365_day-overlap-mean.nc | Bin 23299 -> 25378 bytes tests/unit/cmor/test_cmor_check.py | 31 - .../data_finder/test_get_start_end_year.py | 5 - tests/unit/data_finder/test_replace_tags.py | 65 +- tests/unit/experimental/test_config.py | 5 +- tests/unit/preprocessor/_area/test_area.py | 121 +-- tests/unit/preprocessor/_derive/test_rlus.py | 35 - tests/unit/preprocessor/_derive/test_rsus.py | 35 - tests/unit/preprocessor/_mask/test_mask.py | 8 +- .../_multimodel/test_multimodel.py | 4 +- tests/unit/preprocessor/_other/test_other.py | 1 + .../unit/preprocessor/_regrid/test_regrid.py | 131 +-- tests/unit/preprocessor/_time/test_time.py | 3 +- .../unit/preprocessor/_volume/test_volume.py | 38 +- .../test_weighting_landsea_fraction.py | 188 +++-- tests/unit/test_recipe.py | 65 +- yamale_meta.yaml | 52 ++ 137 files changed, 1688 insertions(+), 5427 deletions(-) delete mode 100644 doc/develop/preprocessor_function.rst delete mode 100644 esmvalcore/cmor/tables/custom/CMOR_tasaga.dat delete mode 100644 esmvalcore/preprocessor/_ancillary_vars.py delete mode 100644 esmvalcore/preprocessor/_derive/rlus.py delete mode 100644 esmvalcore/preprocessor/_derive/rsus.py delete mode 100644 tests/integration/preprocessor/_ancillary_vars/__init__.py delete mode 100644 tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py delete mode 100644 tests/unit/preprocessor/_derive/test_rlus.py delete mode 100644 tests/unit/preprocessor/_derive/test_rsus.py create mode 100644 yamale_meta.yaml diff --git a/.circleci/config.yml b/.circleci/config.yml index 09e336c98b..27be5c21ae 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -8,8 +8,8 @@ commands: check_changes: steps: - run: | - if (test "$CIRCLE_BRANCH" = main || - git --no-pager diff --name-only origin/main... | + if (test "$CIRCLE_BRANCH" = master || + git --no-pager diff --name-only origin/master... | grep -q -E -f .circleci/install_triggers) then echo Running installation tests @@ -33,8 +33,7 @@ jobs: . /opt/conda/etc/profile.d/conda.sh conda activate esmvaltool pip install .[test] - pytest -n 2 -m "not installation and not sequential" - pytest -n 0 -m "sequential" + pytest -n 2 -m "not installation" - save_cache: key: test-{{ .Branch }} paths: @@ -63,25 +62,19 @@ jobs: . /opt/conda/etc/profile.d/conda.sh set -x mkdir /logs - # Add additional requirements for running all tests - echo " - - r-base - - r-yaml - - ncl - " >> environment.yml # Install - conda env create >> /logs/conda.txt 2>&1 + # conda update -y conda > /logs/conda.txt 2>&1 + conda env update >> /logs/conda.txt 2>&1 set +x; conda activate esmvaltool; set -x + conda install -yS r-base r-yaml ncl -c conda-forge pip install .[test] > /logs/install.txt 2>&1 # Log versions dpkg -l > /logs/versions.txt conda env export > /logs/environment.yml pip freeze > /logs/requirements.txt # Test installation - pytest -n 2 -m "not sequential" - pytest -n 0 -m "sequential" + pytest -n 2 esmvaltool version - no_output_timeout: 30m - save_cache: key: install-{{ .Branch }} paths: @@ -134,25 +127,20 @@ jobs: command: | . /opt/conda/etc/profile.d/conda.sh mkdir /logs - # Add additional requirements for running all tests - echo " - - r-base - - r-yaml - - ncl - " >> environment.yml # Install - conda env create >> /logs/conda.txt 2>&1 + # conda update -y conda > /logs/conda.txt 2>&1 + conda env update >> /logs/conda.txt 2>&1 conda activate esmvaltool pip install -e .[develop] > /logs/install.txt 2>&1 + # install additional requirements for running all tests + conda install -yS r-base r-yaml ncl -c conda-forge # Log versions dpkg -l > /logs/versions.txt conda env export > /logs/environment.yml pip freeze > /logs/requirements.txt # Test installation esmvaltool version - pytest -n 2 -m "not sequential" - pytest -n 0 -m "sequential" - no_output_timeout: 30m + pytest -n 2 - store_artifacts: path: /logs @@ -176,8 +164,7 @@ jobs: dpkg -l > /logs/versions.txt conda env export -n base > /logs/build_environment.yml # Build conda package - conda build package -c conda-forge > /logs/build_log.txt - no_output_timeout: 60m + conda build package -c conda-forge -c esmvalgroup > /logs/build_log.txt - store_artifacts: path: /logs @@ -219,7 +206,7 @@ workflows: filters: branches: only: - - main + - master jobs: - test - install diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index c07a011dad..92cc2e7a0e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,54 +1,45 @@ ## Description -Closes #issue_number - -Link to documentation: +- Closes #issue_number +- Link to documentation: *** -## [Before you get started](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#getting-started) +## Before you get started -- [ ] [☝ Create an issue](https://github.com/ESMValGroup/ESMValCore/issues) to discuss what you are going to do +- [ ] [☝ Create an issue](https://github.com/ESMValGroup/ESMValCore/issues) to discuss what you are going to do -## [Checklist](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#checklist-for-pull-requests) +## Checklist -It is the responsibility of the author to make sure the pull request is ready to review. The icons indicate whether the item will be subject to the [🛠 Technical][1] or [🧪 Scientific][2] review. +- [ ] PR has a descriptive title for the [changelog](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) +- [ ] Labels are assigned so they can be used in the [changelog](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) +- [ ] Code follows the [style guide](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#code-style) +- [ ] [Documentation](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#documentation) is available for new functionality +- [ ] YAML files pass [`pre-commit`](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#pre-commit) or [`yamllint`](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/community/introduction.html#yaml) checks +- [ ] [Circle/CI tests pass](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) +- [ ] [Codacy code quality checks pass](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) +- [ ] [Documentation builds successfully](https://docs.esmvaltool.org/projects/esmvalcore/en/latest/contributing.html#branches-pull-requests-and-code-review) on readthedocs +- [ ] [Unit tests](https://docs.esmvaltool.org/projects/esmvalcore/projects/esmvalcore/en/latest/contributing.html#contributing-to-the-esmvalcore-package) are available - -[1]: https://docs.esmvaltool.org/en/latest/community/review.html#technical-review -[2]: https://docs.esmvaltool.org/en/latest/community/review.html#scientific-review +If you make backwards incompatible changes to the recipe format: -- [ ] [🧪][2] The new functionality is [relevant and scientifically sound](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#scientific-relevance) -- [ ] [🛠][1] This pull request has a [descriptive title and labels](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#pull-request-title-and-label) -- [ ] [🛠][1] Code is written according to the [code quality guidelines](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#code-quality) -- [ ] [🧪][2] and [🛠][1] [Documentation](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#documentation) is available -- [ ] [🛠][1] [Unit tests](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#tests) have been added -- [ ] [🛠][1] Changes are [backward compatible](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#backward-compatibility) -- [ ] [🛠][1] Any changed [dependencies have been added or removed](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#dependencies) correctly -- [ ] [🛠][1] The [list of authors](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#list-of-authors) is up to date -- [ ] [🛠][1] All [checks below this pull request](https://docs.esmvaltool.org/projects/ESMValCore/en/latest/contributing.html#pull-request-checks) were successful +- [ ] Update [ESMValTool](https://github.com/esmvalgroup/esmvaltool) and link the pull request(s) in the description *** To help with the number pull requests: -- 🙏 We kindly ask you to [review](https://docs.esmvaltool.org/en/latest/community/review.html#review-of-pull-requests) two other [open pull requests](https://github.com/ESMValGroup/ESMValCore/pulls) in this repository +- 🙏 We kindly ask you to [review](https://docs.esmvaltool.org/en/latest/community/review.html#review-of-pull-requests) two other [open pull requests](https://github.com/ESMValGroup/ESMValTool/pulls) in this repository diff --git a/.github/workflows/action-conda-publish.yml b/.github/workflows/action-conda-publish.yml index 1a20f935fc..12501b0f2c 100644 --- a/.github/workflows/action-conda-publish.yml +++ b/.github/workflows/action-conda-publish.yml @@ -6,7 +6,7 @@ on: # use this to test before actual release and publish push: branches: - - main + - master jobs: @@ -43,7 +43,7 @@ jobs: export BUILD_FOLDER=/tmp/esmvalcore/_build mkdir -p $BUILD_FOLDER conda build package \ - --channel conda-forge \ + --channel esmvalgroup --channel conda-forge \ --croot $BUILD_FOLDER \ - name: Push the package to anaconda cloud if: startsWith(github.ref, 'refs/tags') @@ -70,7 +70,7 @@ jobs: with: python-version: ${{ matrix.python-version }} miniconda-version: "latest" - channels: conda-forge + channels: esmvalgroup,conda-forge - shell: bash -l {0} run: conda --version - shell: bash -l {0} diff --git a/.github/workflows/action-install-from-conda.yml b/.github/workflows/action-install-from-conda.yml index 152d2692eb..b6141a161e 100644 --- a/.github/workflows/action-install-from-conda.yml +++ b/.github/workflows/action-install-from-conda.yml @@ -13,19 +13,20 @@ name: Conda Base Install -# runs on a push on main and at the end of every day +# runs on a push on master and at the end of every day on: # triggering on push without branch name will run tests everytime # there is a push on any branch # turn it on only if needed push: branches: - - main - # run the test only if the PR is to maain + - master + - github-actions2 + # run the test only if the PR is to master # turn it on if required #pull_request: # branches: - # - main + # - master schedule: - cron: '0 4 * * *' @@ -45,7 +46,7 @@ jobs: with: python-version: ${{ matrix.python-version }} miniconda-version: "latest" - channels: conda-forge + channels: esmvalgroup,conda-forge - shell: bash -l {0} run: mkdir -p conda_install_linux_artifacts_python_${{ matrix.python-version }} - shell: bash -l {0} @@ -81,7 +82,7 @@ jobs: activate-environment: esmvalcore python-version: ${{ matrix.python-version }} miniconda-version: "latest" - channels: conda-forge + channels: esmvalgroup,conda-forge - shell: bash -l {0} run: mkdir -p conda_install_osx_artifacts_python_${{ matrix.python-version }} - shell: bash -l {0} diff --git a/.github/workflows/action-install-from-pypi.yml b/.github/workflows/action-install-from-pypi.yml index 19877b0d7a..8d5e2abcab 100644 --- a/.github/workflows/action-install-from-pypi.yml +++ b/.github/workflows/action-install-from-pypi.yml @@ -13,20 +13,21 @@ name: PyPi Install -# runs on a push on main and at the end of every day +# runs on a push on master and at the end of every day on: # triggering on push without branch name will run tests everytime # there is a push on any branch # turn it on only if needed push: branches: - - main + - master + - github-actions2 - # run the test only if the PR is to main + # run the test only if the PR is to master # turn it on if required #pull_request: # branches: - # - main + # - master schedule: - cron: '0 0 * * *' diff --git a/.github/workflows/action-install-from-source.yml b/.github/workflows/action-install-from-source.yml index 60ec5b04d6..00ed97f61d 100644 --- a/.github/workflows/action-install-from-source.yml +++ b/.github/workflows/action-install-from-source.yml @@ -14,20 +14,20 @@ name: Source Install -# runs on a push on main and at the end of every day +# runs on a push on master and at the end of every day on: # triggering on push without branch name will run tests everytime # there is a push on any branch # turn it on only if needed push: branches: - - main + - master - github-actions2 - # run the test only if the PR is to main + # run the test only if the PR is to master # turn it on if required #pull_request: # branches: - # - main + # - master schedule: - cron: '0 0 * * *' diff --git a/.github/workflows/action-pypi-build-and-deploy.yml b/.github/workflows/action-pypi-build-and-deploy.yml index 4d2d0b34df..9bfa4997d5 100644 --- a/.github/workflows/action-pypi-build-and-deploy.yml +++ b/.github/workflows/action-pypi-build-and-deploy.yml @@ -6,7 +6,7 @@ on: # use this for testing push: branches: - - main + - master jobs: build-n-publish: diff --git a/.github/workflows/action-test.yml b/.github/workflows/action-test.yml index 0da6da7c58..a7a0080e6e 100644 --- a/.github/workflows/action-test.yml +++ b/.github/workflows/action-test.yml @@ -13,19 +13,19 @@ name: Test -# runs on a push on main and at the end of every day +# runs on a push on master and at the end of every day on: # triggering on push without branch name will run tests everytime # there is a push on any branch # turn it on only if needed push: branches: - - main - # run the test only if the PR is to main + - master + # run the test only if the PR is to master # turn it on if required #pull_request: # branches: - # - main + # - master schedule: - cron: '0 0 * * *' # nightly @@ -55,9 +55,7 @@ jobs: - shell: bash -l {0} run: pip install -e .[develop] 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/install.txt - shell: bash -l {0} - run: pytest -n 2 -m "not installation and not sequential" 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/test_report.txt - - shell: bash -l {0} - run: pytest -n 0 -m "sequential" + run: pytest -n 2 -m "not installation" 2>&1 | tee test_linux_artifacts_python_${{ matrix.python-version }}/test_report.txt - name: Upload artifacts if: ${{ always() }} # upload artifacts even if fail uses: actions/upload-artifact@v2 @@ -90,9 +88,7 @@ jobs: - shell: bash -l {0} run: pip install -e .[develop] --use-feature=2020-resolver 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/install.txt - shell: bash -l {0} - run: pytest -n 2 -m "not installation and not sequential" 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/test_report.txt - - shell: bash -l {0} - run: pytest -n 0 -m "sequential" + run: pytest -n 2 -m "not installation" 2>&1 | tee test_osx_artifacts_python_${{ matrix.python-version }}/test_report.txt - name: Upload artifacts if: ${{ always() }} # upload artifacts even if fail uses: actions/upload-artifact@v2 diff --git a/.gitignore b/.gitignore index bace074ff4..800d83c2c3 100644 --- a/.gitignore +++ b/.gitignore @@ -74,7 +74,6 @@ nosetests.xml coverage.xml *.cover .hypothesis/ -.mypy_cache # Jupyter Notebook .ipynb_checkpoints diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3e8e150768..04136d47dc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,3 @@ repos: rev: '3.8.4' hooks: - id: flake8 - - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v0.812' - hooks: - - id: mypy diff --git a/.prospector.yml b/.prospector.yml index f1272ec938..dbc62018eb 100644 --- a/.prospector.yml +++ b/.prospector.yml @@ -15,9 +15,6 @@ pyroma: pep8: full: true -mypy: - run: true - pep257: # disable rules that are allowed by the numpy convention # see https://github.com/PyCQA/pydocstyle/blob/master/src/pydocstyle/violations.py diff --git a/.zenodo.json b/.zenodo.json index ee05d13180..35270e4eac 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -152,11 +152,6 @@ "affiliation": "Barcelona Supercomputing Center", "name": "Jury, Martin", "orcid": "0000-0003-0590-7843" - }, - { - "affiliation": "Stéphane Sénési EIRL, Colomiers, France", - "name": "Sénési, Stéphane", - "orcid": "0000-0003-0892-5967" } ], "description": "ESMValCore: A community tool for pre-processing data from Earth system models in CMIP and running analysis scripts.", @@ -198,5 +193,4 @@ "id": "10.13039/501100000780::824084" } ] - } diff --git a/CITATION.cff b/CITATION.cff index 7e3a9c0a95..b229d5293c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -156,11 +156,6 @@ authors: "family-names": "Jury" "given-names": "Martin" "orcid": "https://orcid.org/0000-0003-0590-7843" - - - "affiliation": "Stéphane Sénési EIRL, Colomiers, France" - "family-names": "Sénési" - "given-names": "Stéphane" - "orcid": "https://orcid.org/0000-0003-0892-5967" cff-version: "1.0.3" date-released: 2021-2-8 diff --git a/README.md b/README.md index c7aac18c5b..0c35d47036 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Documentation Status](https://readthedocs.org/projects/esmvaltool/badge/?version=latest)](https://esmvaltool.readthedocs.io/en/latest/?badge=latest) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3387139.svg)](https://doi.org/10.5281/zenodo.3387139) [![Gitter](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/ESMValGroup?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[![CircleCI](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/main.svg?style=svg)](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/main) +[![CircleCI](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/master.svg?style=svg)](https://circleci.com/gh/ESMValGroup/ESMValCore/tree/master) [![Codacy Badge](https://app.codacy.com/project/badge/Coverage/5d496dea9ef64ec68e448a6df5a65783)](https://www.codacy.com/gh/ESMValGroup/ESMValCore?utm_source=github.com&utm_medium=referral&utm_content=ESMValGroup/ESMValCore&utm_campaign=Badge_Coverage) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/5d496dea9ef64ec68e448a6df5a65783)](https://www.codacy.com/gh/ESMValGroup/ESMValCore?utm_source=github.com&utm_medium=referral&utm_content=ESMValGroup/ESMValCore&utm_campaign=Badge_Grade) [![Docker Build Status](https://img.shields.io/docker/cloud/build/esmvalgroup/esmvalcore)](https://hub.docker.com/r/esmvalgroup/esmvalcore/) diff --git a/doc/api/esmvalcore.api.config.rst b/doc/api/esmvalcore.api.config.rst index 295d5bca70..983a085392 100644 --- a/doc/api/esmvalcore.api.config.rst +++ b/doc/api/esmvalcore.api.config.rst @@ -34,7 +34,7 @@ The global configuration can be imported from the :py:mod:`esmvalcore.experiment 'write_netcdf': True, 'write_plots': True}) -The parameters for the user configuration file are listed :ref:`here `. +The parameters for the user configuration file are listed `here `__. :py:data:`~esmvalcore.experimental.CFG` is essentially a python dictionary with a few extra functions, similar to :py:mod:`matplotlib.rcParams`. This means that values can be updated like this: diff --git a/doc/api/esmvalcore.rst b/doc/api/esmvalcore.rst index 6288927b09..b9d2127688 100644 --- a/doc/api/esmvalcore.rst +++ b/doc/api/esmvalcore.rst @@ -1,7 +1,5 @@ -.. _api: - -ESMValCore API Reference -======================== +ESMValTool Core API Reference +============================= ESMValCore is mostly used as a commandline tool. However, it is also possibly to use (parts of) ESMValTool as a library. This section documents the public API of ESMValCore. diff --git a/doc/changelog.rst b/doc/changelog.rst index ab1ab4881b..77432796fb 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -41,7 +41,7 @@ Documentation - Fix numbering of steps in release instructions (`#838 `__) `Bouwe Andela `__ - Add labels to changelogs of individual versions for easy reference (`#899 `__) `Klaus Zimmermann `__ -- Make CircleCI badge specific to main branch (`#902 `__) `Bouwe Andela `__ +- Make CircleCI badge specific to master branch (`#902 `__) `Bouwe Andela `__ - Fix docker build badge url (`#906 `__) `Stef Smeets `__ - Update github PR template (`#909 `__) `Stef Smeets `__ - Refer to ESMValTool GitHub discussions page in the error message (`#900 `__) `Bouwe Andela `__ diff --git a/doc/conf.py b/doc/conf.py index 218bcec1a4..9c42ec7eb8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -111,7 +111,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns: list = [] +exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -170,7 +170,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path: list = [] +html_static_path = [] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied diff --git a/doc/contributing.rst b/doc/contributing.rst index 679edb2033..3b3243dac6 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -1,5 +1,3 @@ -.. _contributing: - Contributions are very welcome ============================== @@ -11,638 +9,265 @@ If you have suggestions for improving the process of contributing, please do not If you have a bug or other issue to report or just need help, please open an issue on the `issues tab on the ESMValCore github repository `__. -If you would like to contribute a new -:ref:`preprocessor function `, -:ref:`derived variable `, :ref:`fix for a dataset `, or -another new feature, please discuss your idea with the development team before +If you would like to contribute a new preprocessor function, derived variable, fix for a dataset, or another new +feature, please discuss your idea with the development team before getting started, to avoid double work and/or disappointment later. A good way to do this is to open an -`issue `_ on GitHub. +`issue on GitHub `__. + +To get started developing, follow the instructions below. +For help with common new features, please have a look at :doc:`develop/index`. Getting started --------------- -See :ref:`installation-from-source` for instructions on how to set up a development -installation. +To install for development, follow the instructions in :doc:`quickstart/install`. -New development should preferably be done in the -`ESMValCore `__ -GitHub repository. -The default git branch is ``main``. -Use this branch to create a new feature branch from and make a pull request -against. -This -`page `__ -offers a good introduction to git branches, but it was written for -BitBucket while we use GitHub, so replace the word BitBucket by GitHub -whenever you read it. +Running tests +------------- -It is recommended that you open a `draft pull -request `__ -early, as this will cause :ref:`CircleCI to run the unit tests `, -:ref:`Codacy to analyse your code `, and -:ref:`readthedocs to build the documentation `. -It’s also easier to get help from other developers if your code is visible in a -pull request. - -:ref:`Make small pull requests `, the ideal pull requests changes -just a few files and adds/changes no more than 100 lines of production code. -The amount of test code added can be more extensive, but changes to existing -test code should be made sparingly. - -Design considerations -~~~~~~~~~~~~~~~~~~~~~ - -When making changes, try to respect the current structure of the program. -If you need to make major changes to the structure of program to add a feature, -chances are that you have either not come up with the -most optimal design or the feature is not a very good fit for the tool. -Discuss your feature with the `@ESMValGroup/esmvaltool-coreteam`_ in an issue_ -to find a solution. - -Please keep the following considerations in mind when programming: - -- Changes should preferably be :ref:`backward compatible `. -- Apply changes gradually and change no more than a few files in a single pull - request, but do make sure every pull request in itself brings a meaningful - improvement. - This reduces the risk of breaking existing functionality and making - :ref:`backward incompatible ` changes, because it - helps you as well as the reviewers of your pull request to better understand - what exactly is being changed. -- :ref:`preprocessor_functions` are Python functions (and not classes) so they - are easy to understand and implement for scientific contributors. -- No additional CMOR checks should be implemented inside preprocessor functions. - The input cube is fixed and confirmed to follow the specification in - `esmvalcore/cmor/tables `__ - before applying any other preprocessor functions. - This design helps to keep the preprocessor functions and diagnostics scripts - that use the preprocessed data from the tool simple and reliable. - See :ref:`cmor_table_configuration` for the mapping from ``project`` in the - recipe to the relevant CMOR table. -- The ESMValCore package is based on :ref:`iris `. - Preprocessor functions should preferably be small and just call the relevant - iris code. - Code that is more involved and more broadly applicable than just in the - ESMValCore, should be implemented in iris instead. -- Any settings in the recipe that can be checked before loading the data should - be checked at the :ref:`task creation stage `. - This avoids that users run a recipe for several hours before finding out they - made a mistake in the recipe. - No data should be processed or files written while creating the tasks. -- CMOR checks should provide a good balance between reliability of the tool - and ease of use. - Several :ref:`levels of strictness of the checks ` - are available to facilitate this. -- Keep your code short and simple: we would like to make contributing as easy as - possible. - For example, avoid implementing complicated class inheritance structures and - `boilerplate `__ - code. -- If you find yourself copy-pasting a piece of code and making minor changes - to every copy, instead put the repeated bit of code in a function that you can - re-use, and provide the changed bits as function arguments. -- Be careful when changing existing unit tests to make your new feature work. - You might be breaking existing features if you have to change existing tests. - -Finally, if you would like to improve the design of the tool, discuss your plans -with the `@ESMValGroup/esmvaltool-coreteam`_ to make sure you understand the -current functionality and you all agree on the new design. - -.. _pull_request_checklist: - -Checklist for pull requests ---------------------------- - -To clearly communicate up front what is expected from a pull request, we have -the following checklist. -Please try to do everything on the list before requesting a review. -If you are unsure about something on the list, please ask the -`@ESMValGroup/tech-reviewers`_ or `@ESMValGroup/science-reviewers`_ for help -by commenting on your (draft) pull request or by starting a new -`discussion `__. - -In the ESMValTool community we use -:ref:`pull request reviews ` to ensure all code and -documentation contributions are of good quality. -The icons indicate whether the item will be checked during the -:ref:`🛠 Technical review ` or -:ref:`🧪 Scientific review `. - -- 🧪 The new functionality is :ref:`relevant and scientifically sound` -- 🛠 :ref:`The pull request has a descriptive title and labels ` -- 🛠 Code is written according to the :ref:`code quality guidelines ` -- 🧪 and 🛠 Documentation_ is available -- 🛠 Unit tests_ have been added -- 🛠 Changes are :ref:`backward compatible ` -- 🛠 Changed :ref:`dependencies have been added or removed correctly ` -- 🛠 The :ref:`list of authors ` is up to date -- 🛠 The :ref:`checks shown below the pull request ` are successful - -.. _scientific_relevance: - -Scientific relevance --------------------- - -The proposed changes should be relevant for the larger scientific community. -The implementation of new features should be scientifically sound; e.g. -the formulas used in new preprocesssor functions should be accompanied by the -relevant references and checked for correctness by the scientific reviewer. -The `CF Conventions `_ as well as additional -standards imposed by `CMIP `_ should be -followed whenever possible. - -.. _descriptive_pr_title: - -Pull request title and label ----------------------------- - -The title of a pull request should clearly describe what the pull request changes. -If you need more text to describe what the pull request does, please add it in -the description. -`Add one or more labels `__ -to your pull request to indicate the type of change. -At least one of the following -`labels `__ should be used: -`bug`, `deprecated feature`, `fix for dataset`, `preprocessor`, `cmor`, `api`, -`testing`, `documentation` or `enhancement`. - -The titles and labels of pull requests are used to compile the :ref:`changelog`, -therefore it is important that they are easy to understand for people who are -not familiar with the code or people in the project. -Descriptive pull request titles also makes it easier to find back what was -changed when, which is useful in case a bug was introduced. - -.. _code_quality: - -Code quality ------------- +Go to the directory where the repository is cloned and run +``pytest``. Optionally you can skip tests which require +additional dependencies for supported diagnostic script languages by +adding ``-m 'not installation'`` to the previous command. +Tests will also be run automatically by +`CircleCI `__. -To increase the readability and maintainability or the ESMValCore source -code, we aim to adhere to best practices and coding standards. +Sample data +----------- + +If you need sample data to work with, `this repository `__ contains samples of real data for use with ESMValTool development, demonstration purposes and automated testing. The goal is to keep the repository size small (~ 100 MB), so it can be easily downloaded and distributed. + +The data are installed as part of the developer dependencies, and used by some larger tests (i.e. in the `multimodel tests `__) -We include checks for Python and yaml files, most of which are described in more -detail in the sections below. +The loading and preprocessing of the data can be somewhat time-consuming (~30 secs) and are cached by ``pytest`` to make the tests more performant. +Clear the cache by using running pytest with the ``--cache-clear`` flag. To avoid running these tests using sample data, use `pytest -m "not use_sample_data"`. +If you are adding new tests using sample data, please use the decorator ``@pytest.mark.use_sample_data``. + +Code style +---------- + +To increase the readability and maintainability or the ESMValCore source +code, we aim to adhere to best practices and coding standards. All pull +requests are reviewed and tested by one or more members of the core +development team. For code in all languages, it is highly recommended +that you split your code up in functions that are short enough to view +without scrolling. + +We include checks for Python and yaml files, which are +described in more detail in the sections below. This includes checks for invalid syntax and formatting errors. -:ref:`esmvaltool:pre-commit` is a handy tool that can run all of these checks -automatically just before you commit your code. +`Pre-commit `__ is a handy tool that can run +all of these checks automatically. It knows knows which tool to run for each filetype, and therefore provides -a convenient way to check your code. +a simple way to check your code! -Python -~~~~~~ -The standard document on best practices for Python code is -`PEP8 `__ and there is -`PEP257 `__ for code documentation. -We make use of -`numpy style docstrings `__ -to document Python functions that are visible on -`readthedocs `_. +Pre-commit +~~~~~~~~~~ -To check if your code adheres to the standard, go to the directory where -the repository is cloned, e.g. ``cd ESMValCore``, and run `prospector `_ +To run ``pre-commit`` on your code, go to the ESMValCore directory +(``cd ESMValCore``) and run :: - prospector esmvalcore/preprocessor/_regrid.py - -In addition to prospector, we use `flake8 `_ -to automatically check for bugs and formatting mistakes and -`mypy `_ for checking that -`type hints `_ are -correct. -Note that `type hints`_ are completely optional, but if you do choose to add -them, they should be correct. - -When you make a pull request, adherence to the Python development best practices -is checked in two ways: - -#. As part of the unit tests, flake8_ and mypy_ are run by - `CircleCI `_, - see the section on Tests_ for more information. -#. `Codacy `_ - is a service that runs prospector (and other code quality tools) on changed - files and reports the results. - Click the 'Details' link behind the Codacy check entry and then click - 'View more details on Codacy Production' to see the results of the static - code analysis done by Codacy_. - If you need to log in, you can do so using your GitHub account. - -The automatic code quality checks by prospector are really helpful to improve -the quality of your code, but they are not flawless. -If you suspect prospector or Codacy may be wrong, please ask the -`@ESMValGroup/tech-reviewers`_ by commenting on your pull request. - -Note that running prospector locally will give you quicker and sometimes more -accurate results than waiting for Codacy. + pre-commit run -Most formatting issues in Python code can be fixed automatically by -running the commands +By default, pre-commit will only run on the files that have been changed, +meaning those that have been staged in git (i.e. after +``git add your_script.py``). + +To make it only check some specific files, use :: - isort some_file.py + pre-commit run --files your_script.py -to sort the imports in `the standard way `__ -using `isort `__ and +or :: - yapf -i some_file.py + pre-commit run --files your_script.R -to add/remove whitespace as required by the standard using `yapf `__, +Alternatively, you can configure ``pre-commit`` to run on the staged files before +every commit (i.e. ``git commit``), by installing it as a `git hook `__ using :: - docformatter -i some_file.py + pre-commit install -to run `docformatter `__ which helps -formatting the docstrings (such as line length, spaces). +Pre-commit hooks are used to inspect the code that is about to be committed. The +commit will be aborted if files are changed or if any issues are found that +cannot be fixed automatically. Some issues cannot be fixed (easily), so to +bypass the check, run -YAML -~~~~ +:: -Please use `yamllint `_ to check that your -YAML files do not contain mistakes. -``yamllint`` checks for valid syntax, common mistakes like key repetition and -cosmetic problems such as line length, trailing spaces, wrong indentation, etc. + git commit --no-verify -Any text file -~~~~~~~~~~~~~ +or -A generic tool to check for common spelling mistakes is -`codespell `__. +:: -.. _documentation: + git commit -n -Documentation -------------- +or uninstall the pre-commit hook -The documentation lives on `docs.esmvaltool.org `_. +:: -Adding documentation -~~~~~~~~~~~~~~~~~~~~ + pre-commit uninstall -The documentation is built by readthedocs_ using `Sphinx `_. -There are two main ways of adding documentation: - -#. As written text in the directory - `doc `__. - When writing - `reStructuredText `_ - (``.rst``) files, please try to limit the line length to 80 characters and - always start a sentence on a new line. - This makes it easier to review changes to documentation on GitHub. - -#. As docstrings or comments in code. - For Python code, only the - `docstrings `__ - of Python modules, classes, and functions - that are mentioned in - `doc/api `__ - are used to generate the online documentation. - This results in the :ref:`api`. - The standard document with best practices on writing docstrings is - `PEP257 `__. - For the API documentation, we make use of - `numpy style docstrings `__. -What should be documented -~~~~~~~~~~~~~~~~~~~~~~~~~ +Python +~~~~~~ -Functionality that is visible to users should be documented. -Any documentation that is visible on readthedocs_ should be well -written and adhere to the standards for documentation. -Examples of this include: - -- The :ref:`recipe ` -- Preprocessor :ref:`functions ` and their - :ref:`use from the recipe ` -- :ref:`Configuration options ` -- :ref:`Installation ` -- :ref:`Output files ` -- :ref:`Command line interface ` -- :ref:`Diagnostic script interfaces ` -- :ref:`The experimental Python interface ` - -Note that: - -- For functions that compute scientific results, comments with references to - papers and/or other resources as well as formula numbers should be included. -- When making changes to/introducing a new preprocessor function, also update the - :ref:`preprocessor documentation `. -- There is no need to write complete numpy style documentation for functions that - are not visible in the :ref:`api` chapter on readthedocs. - However, adding a docstring describing what a function does is always a good - idea. - For short functions, a one-line docstring is usually sufficient, but more - complex functions might require slightly more extensive documentation. - -When reviewing a pull request, always check that documentation is easy to -understand and available in all expected places. - -How to build and view the documentation -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Whenever you make a pull request or push new commits to an existing pull -request, readthedocs will automatically build the documentation. -The link to the documentation will be shown in the list of checks below your -pull request. -Click 'Details' behind the check ``docs/readthedocs.org:esmvaltool`` to preview -the documentation. -If all checks were successful, you may need to click 'Show all checks' to see -the individual checks. - -To build the documentation on your own computer, go to the directory where the -repository was cloned and run +The standard document on best practices for Python code is +`PEP8 `__ and there is +`PEP257 `__ for +documentation. We make use of `numpy style +docstrings `__ +to document Python functions that are visible on +`readthedocs `__. + +Most formatting issues in Python code can be fixed automatically by +running the commands :: - python setup.py build_sphinx + isort some_file.py -or +to sort the imports in `the standard way `__ +using `isort `__ and :: - python setup.py build_sphinx -Ea + yapf -i some_file.py -to build it from scratch. +to add/remove whitespace as required by the standard using `yapf `__, -Make sure that your newly added documentation builds without warnings or -errors and looks correctly formatted. -CircleCI_ will build the documentation with the command: +:: -.. code-block:: bash + docformatter -i your_script.py - python setup.py build_sphinx --warning-is-error +to run `docformatter `__ which helps formatting the doc strings (such as line length, spaces). -This will catch mistakes that can be detected automatically. +To check if your code adheres to the standard, go to the directory where +the repository is cloned, e.g. ``cd ESMValCore``, and run `prospector `__ -The configuration file for Sphinx_ is -`doc/shinx/source/conf.py `_. +:: -See :ref:`esmvaltool:esmvalcore-documentation-integration` for information on -how the ESMValCore documentation is integrated into the complete ESMValTool -project documentation on readthedocs. + prospector esmvaltool/diag_scripts/your_diagnostic/your_script.py -When reviewing a pull request, always check that the documentation checks -shown below the pull request were successful. +Run -.. _tests: +:: -Tests ------ + python setup.py lint -To check that the code works correctly, there tests available in the -`tests directory `_. -We use `pytest `_ to write and run our tests. +to see the warnings about the code style of the entire project. -Contributions to ESMValCore should be covered by unit tests. -Have a look at the existing tests in the ``tests`` directory for inspiration on -how to write your own tests. -If you do not know how to start with writing unit tests, ask the -`@ESMValGroup/tech-reviewers`_ for help by commenting on the pull request and -they will try to help you. -To check which parts of your code are covered by tests, open the file -``test-reports/coverage_html/index.html`` and browse to the relevant file. -It is also possible to view code coverage on Codacy_ (click the Files tab) -and CircleCI_ (open the ``tests`` job and click the ARTIFACTS tab). +We use `flake8 `__ on CircleCI to automatically check that there are +no formatting mistakes and Codacy for monitoring (Python) code quality. +Running prospector locally will give you quicker and sometimes more +accurate results. -Whenever you make a pull request or push new commits to an existing pull -request, the tests in the `tests directory`_ of the branch associated with the -pull request will be run automatically on CircleCI_. -The results appear at the bottom of the pull request. -Click on 'Details' for more information on a specific test job. -To see some of the results on CircleCI, you may need to log in. -You can do so using your GitHub account. +YAML +~~~~ -To run the tests on your own computer, go to the directory where the repository -is cloned and run the command +Please use ``yamllint`` to check that your YAML files do not contain +mistakes. -.. code-block:: bash +Any text file +~~~~~~~~~~~~~ - pytest +A generic tool to check for common spelling mistakes is +`codespell `__. -Optionally you can skip tests which require additional dependencies for -supported diagnostic script languages by adding ``-m 'not installation'`` to the -previous command. +Documentation +------------- -When reviewing a pull request, always check that all test jobs on CircleCI_ were -successful. +What should be documented +~~~~~~~~~~~~~~~~~~~~~~~~~ -.. _sample_data_tests: +Any code documentation that is visible on +`readthedocs `__ should be well +written and adhere to the standards for documentation for the respective +language. Note that there is no need to write extensive documentation +for functions that are not visible on readthedocs. However, adding a one +line docstring describing what a function does is always a good idea. +When making changes/introducing a new preprocessor function, also update +the `preprocessor +documentation `__. + +How to build the documentation locally +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Sample data -~~~~~~~~~~~ - -New or modified preprocessor functions should preferably also be tested using -the sample data. -These tests are located in -`tests/sample_data `__. -Please mark new tests that use the sample data with the -`decorator `__ -``@pytest.mark.use_sample_data``. - -The `ESMValTool_sample_data `_ -repository contains samples of CMIP6 data for testing ESMValCore. -The `ESMValTool-sample-data `_ -package is installed as part of the developer dependencies. -The size of the package is relatively small (~ 100 MB), so it can be easily -downloaded and distributed. - -Preprocessing the sample data can be time-consuming, so some -intermediate results are cached by pytest to make the tests run faster. -If you suspect the tests are failing because the cache is invalid, clear it by -running - -.. code-block:: bash - - pytest --cache-clear - -To avoid running the time consuming tests that use sample data altogether, run - -.. code-block:: bash - - pytest -m "not use_sample_data" - - -Automated testing -~~~~~~~~~~~~~~~~~ - -Whenever you make a pull request or push new commits to an existing pull -request, the tests in the `tests directory`_ of the branch associated with the -pull request will be run automatically on CircleCI_. - -Every night, more extensive tests are run to make sure that problems with the -installation of the tool are discovered by the development team before users -encounter them. -These nightly tests have been designed to follow the installation procedures -described in the documentation, e.g. in the :ref:`install` chapter. -The nightly tests are run using both CircleCI and GitHub Actions. -The result of the tests ran by CircleCI can be seen on the -`CircleCI project page `__ -and the result of the tests ran by GitHub Actions can be viewed on the -`Actions tab `__ -of the repository. - -The configuration of the tests run by CircleCI can be found in the directory -`.circleci `__, -while the configuration of the tests run by GitHub Actions can be found in the -directory -`.github/workflows `__. - -.. _backward_compatibility: - -Backward compatibility ----------------------- - -The ESMValCore package is used by many people to run their recipes. -Many of these recipes are maintained in the public -`ESMValTool `_ repository, but -there are also users who choose not to share their work there. -While our commitment is first and foremost to users who do share their recipes -in the ESMValTool repository, we still try to be nice to all of the ESMValCore -users. -When making changes, e.g. to the :ref:`recipe format `, the -:ref:`diagnostic script interface `, the public -:ref:`Python API `, or the :ref:`configuration file format `, -keep in mind that this may affect many users. -To keep the tool user friendly, try to avoid making changes that are not -backward compatible, i.e. changes that require users to change their existing -recipes, diagnostics, configuration files, or scripts. - -If you really must change the public interfaces of the tool, always discuss this -with the `@ESMValGroup/esmvaltool-coreteam`_. -Try to deprecate the feature first by issuing a :py:class:`DeprecationWarning` -using the :py:mod:`warnings` module and schedule it for removal three -`minor versions `__ from the latest released version. -For example, when you deprecate a feature in a pull request that will be -included in version 2.3, that feature could be removed in version 2.5. -Mention the version in which the feature will be removed in the deprecation -message. -Label the pull request with the -`deprecated feature `__ -label. -When deprecating a feature, please follow up by actually removing the feature -in due course. - -If you must make backward incompatible changes, you need to update the available -recipes in ESMValTool and link the ESMValTool pull request(s) in the ESMValCore -pull request description. -You can ask the `@ESMValGroup/esmvaltool-recipe-maintainers`_ for help with -updating existing recipes, but please be considerate of their time. - -When reviewing a pull request, always check for backward incompatible changes -and make sure they are needed and have been discussed with the -`@ESMValGroup/esmvaltool-coreteam`_. -Also, make sure the author of the pull request has created the accompanying pull -request(s) to update the ESMValTool, before merging the ESMValCore pull request. - -.. _dependencies: - -Dependencies ------------- - -Before considering adding a new dependency, carefully check that the -`license `__ -of the dependency you want to add and any of its dependencies are -`compatible `__ -with the -`Apache 2.0 `_ -license that applies to the ESMValCore. -Note that GPL version 2 license is considered incompatible with the Apache 2.0 -license, while the compatibility of GPL version 3 license with the Apache 2.0 -license is questionable. -See this `statement `__ -by the authors of the Apache 2.0 license for more information. - -When adding or removing dependencies, please consider applying the changes in -the following files: - -- ``environment.yml`` - contains development dependencies that cannot be installed from - `PyPI `_ -- ``docs/requirements.txt`` - contains Python dependencies needed to build the documentation that can be - installed from PyPI -- ``docs/conf.py`` - contains a list of Python dependencies needed to build the documentation that - cannot be installed from PyPI and need to be mocked when building the - documentation. - (We do not use conda to build the documentation because this is too time - consuming.) -- ``setup.py`` - contains all Python dependencies, regardless of their installation source -- ``package/meta.yaml`` - contains dependencies for the conda package; all Python and compiled - dependencies that can be installed from conda should be listed here - -Note that packages may have a different name on -`conda-forge `__ than on PyPI_. - -Several test jobs on CircleCI_ related to the installation of the tool will only -run if you change the dependencies. -These will be skipped for most pull requests. - -When reviewing a pull request where dependencies are added or removed, always -check that the changes have been applied in all relevant files. - -.. _authors: +Go to the directory where the repository is cloned and run -List of authors ---------------- +:: -If you make a contribution to ESMValCore and you would like to be listed as an -author (e.g. on `Zenodo `__), please add your -name to the list of authors in ``CITATION.cff`` and generate the entry for the -``.zenodo.json`` file by running the commands + python setup.py build_sphinx -Ea -:: +Make sure that your newly added documentation builds without warnings or +errors. - pip install cffconvert - cffconvert --ignore-suspect-keys --outputformat zenodo --outfile .zenodo.json +Branches, pull requests and code review +--------------------------------------- -Presently, this method unfortunately discards entries `communities` -and `grants` from that file; please restore them manually, or -alternately proceed with the addition manually +The default git branch is ``master``. Use this branch to create a new +feature branch from and make a pull request against. This +`page `__ +offers a good introduction to git branches, but it was written for +BitBucket while we use GitHub, so replace the word BitBucket by GitHub +whenever you read it. -.. _pull_request_checks: +It is recommended that you open a `draft pull +request `__ +early, as this will cause CircleCI to run the unit tests and Codacy to +analyse your code. It’s also easier to get help from other developers if +your code is visible in a pull request. + +You also must assign at least one `label `__ +to it as they are used to organize the changelog. At least one of the following +ones must be used: `bug`, `deprecated feature`, `fix for dataset`, +`preprocessor`, `cmor`, `api`, `testing`, `documentation` or `enhancement`. + +You can view the results of the automatic checks below your pull +request. If one of the tests shows a red cross instead of a green +approval sign, please click the link and try to solve the issue. Note +that this kind of automated checks make it easier to review code, but +they are not flawless, so occasionally Codacy will report false +positives. + +Contributing to the ESMValCore package +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Pull request checks -------------------- +Contributions to ESMValCore should -To check that a pull request is up to standard, several automatic checks are -run when you make a pull request. -Read more about it in the Tests_ and Documentation_ sections. -Successful checks have a green ✓ in front, a ❌ means the check failed. +- Preferably be covered by unit tests. Unit tests are mandatory for new + preprocessor functions or modifications to existing functions. If you + do not know how to start with writing unit tests, let us know in a + comment on the pull request and a core development team member will + try to help you get started. +- Be accompanied by appropriate documentation. +- Introduce no new issues on Codacy. -If you need help with the checks, please ask the technical reviewer of your pull -request for help. -Ask `@ESMValGroup/tech-reviewers`_ if you do not have a technical reviewer yet. +List of authors +~~~~~~~~~~~~~~~ -If the checks are broken because of something unrelated to the current -pull request, please check if there is an open issue that reports the problem. -Create one if there is no issue yet. -You can attract the attention of the `@ESMValGroup/esmvaltool-coreteam`_ by -mentioning them in the issue if it looks like no-one is working on solving the -problem yet. -The issue needs to be fixed in a separate pull request first. -After that has been merged into the ``main`` branch and all checks on this -branch are green again, merge it into your own branch to get the tests to pass. +If you make a contribution to ESMValCore and would like to be listed as an +author, please add your name to the list of authors in CITATION.cff and +regenerate the file .zenodo.json by running the command -When reviewing a pull request, always make sure that all checks were successful. -If the Codacy check keeps failing, please run prospector locally. -If necessary, ask the pull request author to do the same and to address the -reported issues. -See the section on code_quality_ for more information. -Never merge a pull request with failing CircleCI or readthedocs checks. +:: + pip install cffconvert + cffconvert --ignore-suspect-keys --outputformat zenodo --outfile .zenodo.json .. _how-to-make-a-release: -Making a release ----------------- +How to make a release +--------------------- The release manager makes the release, assisted by the release manager of the previous release, or if that person is not available, another previous release @@ -655,14 +280,14 @@ To make a new release of the package, follow these steps: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Check the ``nightly`` -`build on CircleCI `__ +`build on CircleCI `__ and the `GitHub Actions run `__. All tests should pass before making a release (branch). 2. Create a release branch ~~~~~~~~~~~~~~~~~~~~~~~~~~ -Create a branch off the ``main`` branch and push it to GitHub. +Create a branch off the ``master`` branch and push it to GitHub. Ask someone with administrative permissions to set up branch protection rules for it so only you and the person helping you with the release can push to it. Announce the name of the branch in an issue and ask the members of the @@ -676,31 +301,31 @@ The version number is stored in ``esmvalcore/_version.py``, ``package/meta.yaml``, ``CITATION.cff``. Make sure to update all files. Also update the release date in ``CITATION.cff``. See https://semver.org for more information on choosing a version number. -Make a pull request and get it merged into ``main`` and cherry pick it into +Make a pull request and get it merged into ``master`` and cherry pick it into the release branch. 4. Add release notes ~~~~~~~~~~~~~~~~~~~~ Use the script -:ref:`esmvaltool/utils/draft_release_notes.py ` +`esmvaltool/utils/draft_release_notes.py `__ to create create a draft of the release notes. This script uses the titles and labels of merged pull requests since the previous release. Review the results, and if anything needs changing, change it on GitHub and re-run the script until the changelog looks acceptable. Copy the result to the file ``doc/changelog.rst``. -Make a pull request and get it merged into ``main`` and cherry pick it into +Make a pull request and get it merged into ``master`` and cherry pick it into the release branch.. 5. Cherry pick bugfixes into the release branch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If a bug is found and fixed (i.e. pull request merged into the -``main`` branch) during the period of testing, use the command +``master`` branch) during the period of testing, use the command ``git cherry-pick`` to include the commit for this bugfix into the release branch. When the testing period is over, make a pull request to update the release notes with the latest changes, get it merged into -``main`` and cherry-pick it into the release branch. +``master`` and cherry-pick it into the release branch. 6. Make the release on GitHub ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -709,7 +334,7 @@ Do a final check that all tests on CircleCI and GitHub Actions completed successfully. Then click the `releases tab `__ -and create the new release from the release branch (i.e. not from ``main``). +and create the new release from the release branch (i.e. not from ``master``). 7. Create and upload the Conda package ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -733,7 +358,7 @@ Follow these steps to create a new conda package: - Activate the base environment ``conda activate base`` - Install the required packages: ``conda install -y conda-build conda-verify ripgrep anaconda-client`` -- Run ``conda build package -c conda-forge`` to build the +- Run ``conda build package -c conda-forge -c esmvalgroup`` to build the conda package - If the build was successful, upload the package to the esmvalgroup conda channel, e.g. @@ -769,10 +394,3 @@ Follow these steps to create a new Python package: You can read more about this in `Packaging Python Projects `__. - - -.. _`@ESMValGroup/esmvaltool-coreteam`: https://github.com/orgs/ESMValGroup/teams/esmvaltool-coreteam -.. _`@ESMValGroup/esmvaltool-developmentteam`: https://github.com/orgs/ESMValGroup/teams/esmvaltool-developmentteam -.. _`@ESMValGroup/tech-reviewers`: https://github.com/orgs/ESMValGroup/teams/tech-reviewers -.. _`@ESMValGroup/science-reviewers`: https://github.com/orgs/ESMValGroup/teams/science-reviewers -.. _`@ESMValGroup/esmvaltool-recipe-maintainers`: https://github.com/orgs/ESMValGroup/teams/esmvaltool-recipe-maintainers diff --git a/doc/develop/derivation.rst b/doc/develop/derivation.rst index 9d097ff843..fcc317db26 100644 --- a/doc/develop/derivation.rst +++ b/doc/develop/derivation.rst @@ -1,14 +1,14 @@ .. _derivation: ******************* -Deriving a variable +Variable derivation ******************* -The variable derivation preprocessor module allows to derive variables which are -not in the CMIP standard data request using standard variables as input. -This is a special type of :ref:`preprocessor function `. -All derivation scripts are located in -`esmvalcore/preprocessor/_derive/ `_. +The variable derivation module allows to derive variables which are not in the +CMIP standard data request using standard variables as input. All derivations +scripts are located in +`ESMValCore/esmvalcore/preprocessor/_derive/ +`_. A typical example looks like this: .. code-block:: py diff --git a/doc/develop/fixing_data.rst b/doc/develop/fixing_data.rst index 6dbe5fe96b..55af3f752a 100644 --- a/doc/develop/fixing_data.rst +++ b/doc/develop/fixing_data.rst @@ -1,11 +1,11 @@ .. _fixing_data: -*********** -Dataset fix -*********** +************* +Dataset fixes +************* Some (model) datasets contain (known) errors that would normally prevent them -from being processed correctly by the ESMValCore. The errors can be in +from being processed correctly by the ESMValTool. The errors can be in the metadata describing the dataset and/or in the actual data. Typical examples of such errors are missing or wrong attributes (e.g. attribute ''units'' says 1e-9 but data are actually in 1e-6), missing or @@ -13,42 +13,31 @@ mislabeled coordinates (e.g. ''lev'' instead of ''plev'' or missing coordinate bounds like ''lat_bnds'') or problems with the actual data (e.g. cloud liquid water only instead of sum of liquid + ice as specified by the CMIP data request). -The ESMValCore can apply on the fly fixes to datasets that have +The ESMValTool can apply on the fly fixes to data sets that have known errors that can be fixed automatically. .. note:: - **CMORization as a fix**. - Support for many observational and reanalysis datasets is implemented through - :ref:`CMORizer scripts in the ESMValTool `. - However, it is also possible to add support for a dataset that is not part of - a CMIP data request by implementing fixes for it. - This is particularly useful for large datasets, where keeping a copy of both - the original and CMORized dataset is not feasible. - See `Natively supported non-CMIP datasets`_ for a list of currently supported - datasets. - + **CMORization as a fix**. As of early 2020, we've started implementing CMORization as fixes for + observational datasets. Previously, CMORization was an additional function implemented in ESMValTool. + This meant that users always had to store 2 copies of their observational data: both raw and CMORized. + Implementing CMORization as a fix removes this redundancy, as the fixes are applied 'on the fly' when + running a recipe. **ERA5** is the first dataset for which this 'CMORization on the fly' is supported. + For more information about CMORization, see: + `Contributing a CMORizing script for an observational dataset `_. Fix structure ============= -Fixes are Python classes stored in -``esmvalcore/cmor/_fixes/[PROJECT]/[DATASET].py`` that derive from -:class:`esmvalcore.cmor._fixes.fix.Fix` and are named after the short name of -the variable they fix. You can also use the names of ``mip`` tables (e.g., -``Amon``, ``Lmon``, ``Omon``, etc.) if you want the fix to be applied to all -variables of that table in the dataset or ``AllVars`` if you want the fix to be -applied to the whole dataset. +Fixes are Python classes stored in ``esmvalcore/cmor/_fixes/[PROJECT]/[DATASET].py`` +that derive from :class:`esmvalcore.cmor._fixes.fix.Fix` and +are named after the short name of the variable they fix. You can use the name +``AllVars`` if you want the fix to be applied to the whole dataset .. warning:: Be careful to replace any ``-`` with ``_`` in your dataset name. We need this replacement to have proper python module names. The fixes are automatically loaded and applied when the dataset is preprocessed. -They are a special type of :ref:`preprocessor function `, -called by the preprocessor functions -:py:func:`esmvalcore.preprocessor.fix_file`, -:py:func:`esmvalcore.preprocessor.fix_metadata`, and -:py:func:`esmvalcore.preprocessor.fix_data`. Fixing a dataset ================ @@ -150,13 +139,6 @@ so we will implement the ``fix_metadata`` method: This will fix the error. The next time you run ESMValTool you will find that the error is fixed on the fly and, hopefully, your recipe will run free of errors. -The ``cubes`` argument to the ``fix_metadata`` method will contain all cubes -loaded from a single input file. -Some care may need to be taken that the right cube is selected and fixed in case -multiple cubes are created. -Usually this happens when a coordinate is mistakenly loaded as a cube, because -the input data does not follow the -`CF Conventions `__. Sometimes other errors can appear after you fix the first one because they were hidden by it. In our case, the latitude coordinate could have bad units or @@ -173,12 +155,10 @@ submit it. It will also be very helpful if you just scan a couple of other variables from the same dataset and check if they share this error. In case that you find that -it is a general one, you can change the fix name to the corresponding ``mip`` -table name (e.g., ``Amon``, ``Lmon``, ``Omon``, etc.) so it gets executed for -all variables in that table in the dataset or to ``AllVars`` so it gets -executed for all variables in the dataset. If you find that this is shared only -by a handful of similar vars you can just make the fix for those new vars -derive from the one you just created: +it is a general one, you can change the fix name to ``AllVars`` so it gets +executed for all variables in the dataset. If you find that this is shared only by +a handful of similar vars you can just make the fix for those new vars derive +from the one you just created: .. code-block:: python @@ -288,8 +268,6 @@ missing coordinate you can create a fix for this model: return [data_cube] -.. _cmor_check_strictness: - Customizing checker strictness ============================== @@ -297,12 +275,15 @@ The data checker classifies its issues using four different levels of severity. From highest to lowest: - ``CRITICAL``: issues that most of the time will have severe consequences. + - ``ERROR``: issues that usually lead to unexpected errors, but can be safely - ignored sometimes. + ignored sometimes. + - ``WARNING``: something is not up to the standard but is unlikely to have - consequences later. + consequences later. + - ``DEBUG``: any info that the checker wants to communicate. Regardless of - checker strictness, those will always be reported as debug messages. + checker strictness, those will always be reported as debug messages. Users can have control about which levels of issues are interpreted as errors, and therefore make the checker fail or warnings or debug messages. @@ -311,28 +292,26 @@ that can take a number of values, listed below from the lowest level of strictness to the highest: - ``ignore``: all issues, regardless of severity, will be reported as - warnings. Checker will never fail. Use this at your own risk. + warnings. Checker will never fail. Use this at your own risk. + - ``relaxed``: only CRITICAL issues are treated as errors. We recommend not to - rely on this mode, although it can be useful if there are errors preventing - the run that you are sure you can manage on the diagnostics or that will - not affect you. + rely on this mode, although it can be useful if there are errors preventing + the run that you are sure you can manage on the diagnostics or that will + not affect you. + - ``default``: fail if there are any CRITICAL or ERROR issues (DEFAULT); Provides - a good measure of safety. + a good measure of safety. + - ``strict``: fail if there are any warnings, this is the highest level of - strictness. Mostly useful for checking datasets that you have produced, to - be sure that future users will not be distracted by inoffensive warnings. + strictness. Mostly useful for checking datasets that you have produced, to + be sure that future users will not be distracted by inoffensive warnings. -Natively supported non-CMIP datasets +Natively supported non-CMOR datasets ==================================== -Fixed datasets are supported through the ``native6`` project. -Put the files containing the data in the directory that you have configured -for the ``native6`` project in your :ref:`user configuration file`, in a -subdirectory called ``Tier{tier}/{dataset}/{version}/{frequency}/{short_name}``. -Replace the items in curly braces by the values used in the variable/dataset -definition in the :ref:`recipe `. -Below is a list of datasets currently supported. +Fixed datasets are supported through the ``native6`` project. Below is a list of +datasets currently supported. ERA5 ---- @@ -344,7 +323,7 @@ MSWEP ----- - Supported variables: ``pr`` -- Supported frequencies: ``mon``, ``day``, ``3hr``. +- Supported frequencies: ``mon``, ``day``, `3hr``. - Tier: 3 For example for monthly data, place the files in the ``/Tier3/MSWEP/latestversion/mon/pr`` subdirectory of your ``native6`` project location. diff --git a/doc/develop/index.rst b/doc/develop/index.rst index e10a5143f0..d75c5e993c 100644 --- a/doc/develop/index.rst +++ b/doc/develop/index.rst @@ -1,14 +1,8 @@ Development *********** -To get started developing, have a look at our -:ref:`contribution guidelines `. -This chapter describes how to implement the most commonly contributed new -features. - .. toctree:: :maxdepth: 1 - Preprocessor function - Dataset fix - Deriving a variable + Fixing data + Deriving variables diff --git a/doc/develop/preprocessor_function.rst b/doc/develop/preprocessor_function.rst deleted file mode 100644 index 2b828dc9dd..0000000000 --- a/doc/develop/preprocessor_function.rst +++ /dev/null @@ -1,269 +0,0 @@ -.. _preprocessor_function: - -Preprocessor function -********************* - -Preprocessor functions are located in :py:mod:`esmvalcore.preprocessor`. -To add a new preprocessor function, start by finding a likely looking file to -add your function to in -`esmvalcore/preprocessor `_. -Create a new file in that directory if you cannot find a suitable place. - -The function should look like this: - - -.. code-block:: python - - def example_preprocessor_function( - cube, - example_argument, - example_optional_argument=5, - ): - """Compute an example quantity. - - A more extensive explanation of the computation can be added here. Add - references to scientific literature if available. - - Parameters - ---------- - cube: iris.cube.Cube - Input cube. - - example_argument: str - Example argument, the value of this argument can be provided in the - recipe. Describe what valid values are here. In this case, a valid - argument is the name of a dimension of the input cube. - - example_optional_argument: int, optional - Another example argument, the value of this argument can optionally - be provided in the recipe. Describe what valid values are here. - - Returns - ------- - iris.cube.Cube - The result of the example computation. - """ - - # Replace this with your own computation - cube = cube.collapsed(example_argument, iris.analysis.MEAN) - - return cube - - -The above function needs to be imported in the file -`esmvalcore/preprocessor/__init__.py `__: - -.. code-block:: python - - from ._example_module import example_preprocessor_function - - __all__ = [ - ... - 'example_preprocessor_function', - ... - ] - -The location in the ``__all__`` list above determines the default order in which -preprocessor functions are applied, so carefully consider where you put it -and ask for advice if needed. - -The preprocessor function above can then be used from the :ref:`preprocessors` -like this: - -.. code-block:: yaml - - preprocessors: - example_preprocessor: - example_preprocessor_function: - example_argument: median - example_optional_argument: 6 - -The optional argument (in this example: ``example_optional_argument``) can be -omitted in the recipe. - -Lazy and real data -================== - -Preprocessor functions should support both -:ref:`real and lazy data `. -This is vital for supporting the large datasets that are typically used with -the ESMValCore. -If the data of the incoming cube has been realized (i.e. ``cube.has_lazy_data()`` -returns ``False`` so ``cube.core_data()`` is a `NumPy `__ -array), the returned cube should also have realized data. -Conversely, if the incoming cube has lazy data (i.e. ``cube.has_lazy_data()`` -returns ``True`` so ``cube.core_data()`` is a -`Dask array `__), the returned -cube should also have lazy data. -Note that NumPy functions will often call their Dask equivalent if it exists -and if their input array is a Dask array, and vice versa. - -Note that preprocessor functions should preferably be small and just call the -relevant :ref:`iris ` code. -Code that is more involved, e.g. lots of work with Numpy and Dask arrays, -and more broadly applicable, should be implemented in iris instead. - -Documentation -============= - -The documentation in the function docstring will be shown in -the :ref:`preprocessor_functions` chapter. -In addition, you should add documentation on how to use the new preprocessor -function from the recipe in -`doc/recipe/preprocessor.rst `__ -so it is shown in the :ref:`preprocessor` chapter. -See the introduction to :ref:`documentation` for more information on how to -best write documentation. - -Tests -===== - -Tests are should be implemented for new or modified preprocessor functions. -For an introduction to the topic, see :ref:`tests`. - -Unit tests ----------- - -To add a unit test for the preprocessor function from the example above, create -a file called -``tests/unit/preprocessor/_example_module/test_example_preprocessor_function.py`` -and add the following content: - -.. code-block:: python - - """Test function `esmvalcore.preprocessor.example_preprocessor_function`.""" - import cf_units - import dask.array as da - import iris - import numpy as np - import pytest - - from esmvalcore.preprocessor import example_preprocessor_function - - - @pytest.mark.parametrize('lazy', [True, False]) - def test_example_preprocessor_function(lazy): - """Test that the computed result is as expected.""" - - # Construct the input cube - data = np.array([1, 2], dtype=np.float32) - if lazy: - data = da.asarray(data, chunks=(1, )) - cube = iris.cube.Cube( - data, - var_name='tas', - units='K', - ) - cube.add_dim_coord( - iris.coords.DimCoord( - np.array([0.5, 1.5], dtype=np.float64), - bounds=np.array([[0, 1], [1, 2]], dtype=np.float64), - standard_name='time', - units=cf_units.Unit('days since 1950-01-01 00:00:00', - calendar='gregorian'), - ), - 0, - ) - - # Compute the result - result = example_preprocessor_function(cube, example_argument='time') - - # Check that lazy data is returned if and only if the input is lazy - assert result.has_lazy_data() is lazy - - # Construct the expected result cube - expected = iris.cube.Cube( - np.array(1.5, dtype=np.float32), - var_name='tas', - units='K', - ) - expected.add_aux_coord( - iris.coords.AuxCoord( - np.array([1], dtype=np.float64), - bounds=np.array([[0, 2]], dtype=np.float64), - standard_name='time', - units=cf_units.Unit('days since 1950-01-01 00:00:00', - calendar='gregorian'), - )) - expected.add_cell_method( - iris.coords.CellMethod(method='mean', coords=('time', ))) - - # Compare the result of the computation with the expected result - print('result:', result) - print('expected result:', expected) - assert result == expected - - -In this test we used the decorator -`pytest.mark.parametrize `_ -to test two scenarios, with both lazy and realized data, with a single test. - - -Sample data tests ------------------ - -The idea of adding :ref:`sample data tests ` is to check that -preprocessor functions work with realistic data. -This also provides an easy way to add regression tests, though these should -preferably be implemented as unit tests instead, because using the sample data -for this purpose is slow. -To add a test using the sample data, create a file -``tests/sample_data/preprocessor/example_preprocessor_function/test_example_preprocessor_function.py`` -and add the following content: - -.. code-block:: python - - """Test function `esmvalcore.preprocessor.example_preprocessor_function`.""" - from pathlib import Path - - import esmvaltool_sample_data - import iris - import pytest - - from esmvalcore.preprocessor import example_preprocessor_function - - - @pytest.mark.use_sample_data - def test_example_preprocessor_function(): - """Regression test to check that the computed result is as expected.""" - # Load an example input cube - cube = esmvaltool_sample_data.load_timeseries_cubes(mip_table='Amon')[0] - - # Compute the result - result = example_preprocessor_function(cube, example_argument='time') - - filename = Path(__file__).with_name('example_preprocessor_function.nc') - if not filename.exists(): - # Create the file the expected result if it doesn't exist - iris.save(result, target=str(filename)) - raise FileNotFoundError( - f'Reference data was missing, wrote new copy to {filename}') - - # Load the expected result cube - expected = iris.load_cube(str(filename)) - - # Compare the result of the computation with the expected result - print('result:', result) - print('expected result:', expected) - assert result == expected - - -This will use a file from the sample data repository as input. -The first time you run the test, the computed result will be stored in the file -``tests/sample_data/preprocessor/example_preprocessor_function/example_preprocessor_function.nc`` -Any subsequent runs will re-load the data from file and check that it did not -change. -Make sure the stored results are small, i.e. smaller than 100 kilobytes, to -keep the size of the ESMValCore repository small. - -Using multiple datasets as input -================================ - -The name of the first argument of the preprocessor function should in almost all -cases be ``cube``. -Only when implementing a preprocessor function that uses all datasets as input, -the name of the first argument should be ``products``. -If you would like to implement this type of preprocessor function, start by -having a look at the existing functions, e.g. -:py:func:`esmvalcore.preprocessor.multi_model_statistics` or -:py:func:`esmvalcore.preprocessor.mask_fillvalues`. diff --git a/doc/interfaces.rst b/doc/interfaces.rst index 88d4c5d96e..dad7dff4fc 100644 --- a/doc/interfaces.rst +++ b/doc/interfaces.rst @@ -1,5 +1,3 @@ -.. _interfaces: - Diagnostic script interfaces ============================ diff --git a/doc/quickstart/configure.rst b/doc/quickstart/configure.rst index cd8a92eca9..054016f0b5 100644 --- a/doc/quickstart/configure.rst +++ b/doc/quickstart/configure.rst @@ -13,6 +13,7 @@ There are several configuration files in ESMValCore: graphical output format, root paths to data, etc.; * ``config-developer.yml``: sets a number of standardized file-naming and paths to data formatting; +* ``config-logging.yml``: stores information on logging. and one configuration file which is distributed with ESMValTool: @@ -174,7 +175,7 @@ Most users and diagnostic developers will not need to change this file, but it may be useful to understand its content. It will be installed along with ESMValCore and can also be viewed on GitHub: `esmvalcore/config-developer.yml -`_. +`_. This configuration file describes the file system structure and CMOR tables for several key projects (CMIP6, CMIP5, obs4mips, OBS6, OBS) on several key machines (e.g. BADC, CP4CDS, DKRZ, ETHZ, SMHI, BSC). CMIP data is stored as part of the Earth System Grid @@ -264,17 +265,15 @@ The filename to use for preprocessed data is configured in a similar manner using ``output_file``. Note that the extension ``.nc`` (and if applicable, a start and end time) will automatically be appended to the filename. -.. _cmor_table_configuration: - -Project CMOR table configuration --------------------------------- +CMOR table configuration +------------------------- ESMValCore comes bundled with several CMOR tables, which are stored in the directory -`esmvalcore/cmor/tables `_. +`esmvalcore/cmor/tables +`_. These are copies of the tables available from `PCMDI `_. -For every ``project`` that can be used in the recipe, there are four settings -related to CMOR table settings available: +There are four settings related to CMOR tables available: * ``cmor_type``: can be ``CMIP5`` if the CMOR table is in the same format as the CMIP5 table or ``CMIP6`` if the table is in the same format as the CMIP6 table. @@ -282,19 +281,17 @@ related to CMOR table settings available: extended with variables from the ``esmvalcore/cmor/tables/custom`` directory and it is possible to use variables with a ``mip`` which is different from the MIP table in which they are defined. -* ``cmor_path``: path to the CMOR table. - Relative paths are with respect to `esmvalcore/cmor/tables`_. - Defaults to the value provided in ``cmor_type`` written in lower case. -* ``cmor_default_table_prefix``: Prefix that needs to be added to the ``mip`` - to get the name of the file containing the ``mip`` table. - Defaults to the value provided in ``cmor_type``. +* ``cmor_path``: path to the CMOR table. Defaults to the value provided in + ``cmor_type`` written in lower case. +* ``cmor_default_table_prefix``: defaults to the value provided in ``cmor_type``. + .. _config-ref: References configuration file ============================= -The `esmvaltool/config-references.yml `__ file contains the list of ESMValTool diagnostic and recipe authors, +The `esmvaltool/config-references.yml `__ file contains the list of ESMValTool diagnostic and recipe authors, references and projects. Each author, project and reference referred to in the documentation section of a recipe needs to be in this file in the relevant section. @@ -320,3 +317,10 @@ following documentation section: These four items here are named people, references and projects listed in the ``config-references.yml`` file. + + +Logging configuration file +========================== + +.. warning:: + Section to be added diff --git a/doc/quickstart/find_data.rst b/doc/quickstart/find_data.rst index e2fa0a61bd..5e823da409 100644 --- a/doc/quickstart/find_data.rst +++ b/doc/quickstart/find_data.rst @@ -75,19 +75,6 @@ first discuss the ``drs`` parameter: as we've seen in the previous section, the DRS as a standard is used for both file naming conventions and for directory structures. -Synda ------ - -If the `synda install `_ command is used to download data, -it maintains the directory structure as on ESGF. To find data downloaded by -synda, use the ``SYNDA`` ``drs`` parameter. - -.. code-block:: yaml - - drs: - CMIP6: SYNDA - CMIP5: SYNDA - .. _config-user-drs: Explaining ``config-user/drs: CMIP5:`` or ``config-user/drs: CMIP6:`` diff --git a/doc/quickstart/install.rst b/doc/quickstart/install.rst index 1c78cd38b2..4ec5633e48 100644 --- a/doc/quickstart/install.rst +++ b/doc/quickstart/install.rst @@ -1,5 +1,3 @@ -.. _install: - Installation ============ @@ -45,11 +43,11 @@ By far the easiest way to install these dependencies is to use conda_. For a minimal conda installation (recommended) go to https://conda.io/miniconda.html. After installing Conda, download -`the file with the list of dependencies `_: +`the file with the list of dependencies `_: .. code-block:: bash - wget https://raw.githubusercontent.com/ESMValGroup/ESMValCore/main/environment.yml + wget https://raw.githubusercontent.com/ESMValGroup/ESMValCore/master/environment.yml and install these dependencies into a new conda environment with the command @@ -84,7 +82,7 @@ You can get the latest release with docker pull esmvalgroup/esmvalcore:stable -If you want to use the current main branch, use +If you want to use the current master branch, use .. code-block:: bash @@ -160,16 +158,10 @@ To run the container using the image file ``esmvalcore.sif`` use: singularity run esmvalcore.sif -c ~/config-user.yml ~/recipes/recipe_example.yml -.. _installation-from-source: -Installation from source +Development installation ------------------------ -.. note:: - If you would like to install the development version of ESMValCore alongside - ESMValTool, please have a look at - :ref:`these instructions `. - To install from source for development, follow these instructions. - `Download and install @@ -192,7 +184,7 @@ To install from source for development, follow these instructions. - Install in development mode: ``pip install -e '.[develop]'``. If you are installing behind a proxy that does not trust the usual pip-urls you can declare them with the option ``--trusted-host``, - e.g. ``pip install --trusted-host=pypi.python.org --trusted-host=pypi.org --trusted-host=files.pythonhosted.org -e .[develop]`` + e.g. \ ``pip install --trusted-host=pypi.python.org --trusted-host=pypi.org --trusted-host=files.pythonhosted.org -e .[develop]`` - Test that your installation was successful by running ``esmvaltool -h``. @@ -203,8 +195,5 @@ You will find the tool available on HPC clusters and there will be no need to in yourself if you are just running diagnostics: - CEDA-JASMIN: `esmvaltool` is available on the scientific compute nodes (`sciX.jasmin.ac.uk` where - `X = 1, 2, 3, 4, 5`) after login and module loading via `module load esmvaltool`; see the helper page at + `X = 1, 2,`3, 4, 5`) after login and module loading via `module load esmvaltool`; see the helper page at `CEDA `__ ; - - DKRZ-Mistral: `esmvaltool` is available on login nodes (`mistral.dkrz.de`) and pre- and post-processing - nodes (`mistralpp.dkrz.de`) after login and module loading via `module load esmvaltool`; the command - `module help esmvaltool` provides some information about the module. diff --git a/doc/quickstart/recipes.rst b/doc/quickstart/recipes.rst index 141006a4fd..b9af83d913 100644 --- a/doc/quickstart/recipes.rst +++ b/doc/quickstart/recipes.rst @@ -3,14 +3,15 @@ Working with the installed recipes ********************************** -Although ESMValTool can be used just to simplify the management of data -and the creation of your own analysis code, one of its main strengths is the -continuously growing set of diagnostics and metrics that it directly provides to +Although ESMValTool can be used just to simplify the managment of data +and the creation of your own analysis code, one of its main strenghts is the +continuosly growing set of diagnostics and metrics that it directly provides to the user. These metrics and diagnostics are provided as a set of preconfigured recipes that users can run or customize for their own analysis. -The latest list of available recipes can be found :ref:`here `. +The latest list of available recipes can be found +`here `_. -In order to make the management of these installed recipes easier, ESMValTool +In order to make the managmenent of these installed recipes easier, ESMValTool provides the ``recipes`` command group with utilities that help the users in discovering and customizing the provided recipes. @@ -28,9 +29,9 @@ using the following command esmvaltool recipes show recipe_name.yml -And finally, to get a local copy that can then be customized and run, users can +And finally, to get a local copy that can then be cusotmized and run, users can use the following command .. code:: bash - esmvaltool recipes get recipe_name.yml + esmvaltool recipes get recipe_name.yml \ No newline at end of file diff --git a/doc/recipe/overview.rst b/doc/recipe/overview.rst index f62bdbe956..c8bef59085 100644 --- a/doc/recipe/overview.rst +++ b/doc/recipe/overview.rst @@ -60,7 +60,7 @@ the following: Note that all authors, projects, and references mentioned in the description section of the recipe need to be included in the (locally installed copy of the) file - `esmvaltool/config-references.yml `_, + `esmvaltool/config-references.yml `_, see :ref:`config-ref`. The author name uses the format: ``surname_name``. For instance, John Doe would be: ``doe_john``. This information can be omitted by new users @@ -82,8 +82,6 @@ data specifications: ``RCP8.5``) - mip (for CMIP data, key ``mip``, value e.g. ``Amon``, ``Omon``, ``LImon``) - ensemble member (key ``ensemble``, value e.g. ``r1i1p1``, ``r1i1p1f1``) -- sub-experiment id (key `sub_experiment`, value e.g. `s2000`, `s(2000:2002)`, - for DCPP data only) - time range (e.g. key-value ``start_year: 1982``, ``end_year: 1990``. Please note that `yaml`_ interprets numbers with a leading ``0`` as octal numbers, so we recommend to avoid them. For example, use ``128`` to specify the year @@ -99,7 +97,6 @@ For example, a datasets section could be: - {dataset: CanESM2, project: CMIP5, exp: historical, ensemble: r1i1p1, start_year: 2001, end_year: 2004} - {dataset: UKESM1-0-LL, project: CMIP6, exp: historical, ensemble: r1i1p1f2, start_year: 2001, end_year: 2004, grid: gn} - {dataset: EC-EARTH3, alias: custom_alias, project: CMIP6, exp: historical, ensemble: r1i1p1f1, start_year: 2001, end_year: 2004, grid: gn} - - {dataset: HadGEM3-GC31-MM, alias: custom_alias, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s2000, grid: gn, start_year: 2000, end_year, 2002} It is possible to define the experiment as a list to concatenate two experiments. Here it is an example concatenating the `historical` experiment with `rcp85` @@ -139,14 +136,6 @@ Please, bear in mind that this syntax can only be used in the ensemble tag. Also, note that the combination of multiple experiments and ensembles, like exp: [historical, rcp85], ensemble: [r1i1p1, "r(2:3)i1p1"] is not supported and will raise an error. -The same simplified syntax can be used to add multiple sub-experiment ids: - -.. code-block:: yaml - - datasets: - - {dataset: MIROC6, project: CMIP6, exp: dcppA-hindcast, ensemble: r1i1p1f1, sub_experiment: s(2000:2002), grid: gn, start_year: 2003, end_year: 2004} - - Note that this section is not required, as datasets can also be provided in the Diagnostics_ section. @@ -345,7 +334,7 @@ concentration changed from ``sic`` to ``siconc``). ESMValCore is aware of some of them and can do the automatic translation when needed. It will even do the translation in the preprocessed file so the diagnostic does not have to deal with this complexity, setting the short name in all files to match the one used -by the recipe. For example, if ``sic`` is requested, ESMValCore will +by the recipe. For example, if ``sic`` is requested, ESMValTool will find ``sic`` or ``siconc`` depending on the project, but all preprocessed files while use ``sic`` as their short_name. If the recipe requested ``siconc``, the preprocessed files will be identical except that they will use the short_name diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 5ec6ca5aa8..20a126de5c 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -9,7 +9,6 @@ roughly following the default order in which preprocessor functions are applied: * :ref:`Variable derivation` * :ref:`CMOR check and dataset-specific fixes` -* :ref:`Fx variables as cell measures or ancillary variables` * :ref:`Vertical interpolation` * :ref:`Weighting` * :ref:`Land/Sea/Ice masking` @@ -176,41 +175,6 @@ steps: To get an overview on data fixes and how to implement new ones, please go to :ref:`fixing_data`. -.. _Fx variables as cell measures or ancillary variables: - -Fx variables as cell measures or ancillary variables -==================================================== -The following preprocessor may require the use of ``fx_variables`` -to be able to perform the computations: - - - ``area_statistics`` - - ``mask_landsea`` - - ``mask_landseaice`` - - ``volume_statistics`` - - ``weighting_landsea_fraction`` - -The preprocessor step ``add_fx_variables`` loads the required ``fx_variables``, -checks them against CMOR standards and adds them either as ``cell_measure`` -or ``ancillary_variable`` inside the cube data. This ensures that the -defined preprocessor chain is applied to both ``variables`` and ``fx_variables``. - -Note that when calling steps that require ``fx_variables`` inside diagnostic -scripts, the variables are expected to contain the required ``cell_measures`` or -``ancillary_variables``. If missing, they can be added using the following functions: - -.. code-block:: - - from esmvalcore.preprocessor import (add_cell_measure, add_ancillary_variable) - - cube_with_area_measure = add_cell_measure(cube, area_cube, 'area') - - cube_with_volume_measure = add_cell_measure(cube, volume_cube, 'volume) - - cube_with_ancillary_sftlf = add_ancillary_variable(cube, sftlf_cube) - - cube_with_ancillary_sftgif = add_ancillary_variable(cube, sftgif_cube) - - Details on the arguments needed for each step can be found in the following sections. .. _Vertical interpolation: @@ -361,25 +325,8 @@ experiment is preferred for fx data retrieval: weighting_landsea_fraction: area_type: land exclude: ['CanESM2', 'reference_dataset'] - fx_variables: - sftlf: - exp: piControl - sftof: - exp: piControl - -or alternatively: + fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] -.. code-block:: yaml - - preprocessors: - preproc_weighting: - weighting_landsea_fraction: - area_type: land - exclude: ['CanESM2', 'reference_dataset'] - fx_variables: [ - {'short_name': 'sftlf', 'exp': 'piControl'}, - {'short_name': 'sftof', 'exp': 'piControl'} - ] See also :func:`esmvalcore.preprocessor.weighting_landsea_fraction`. @@ -432,24 +379,8 @@ missing. Conversely, it retrieves the ``fx: sftlf`` mask when land needs to be masked out, respectively. Optionally you can specify your own custom fx variable to be used in cases when e.g. a certain -experiment is preferred for fx data retrieval. Note that it is possible to specify as many tags -for the fx variable as required: - - -.. code-block:: yaml - - preprocessors: - landmask: - mask_landsea: - mask_out: sea - fx_variables: - sftlf: - exp: piControl - sftof: - exp: piControl - ensemble: r2i1p1f1 +experiment is preferred for fx data retrieval: -or alternatively: .. code-block:: yaml @@ -457,10 +388,7 @@ or alternatively: landmask: mask_landsea: mask_out: sea - fx_variables: [ - {'short_name': 'sftlf', 'exp': 'piControl'}, - {'short_name': 'sftof', 'exp': 'piControl', 'ensemble': 'r2i1p1f1'} - ] + fx_variables: [{'short_name': 'sftlf', 'exp': 'piControl'}, {'short_name': 'sftof', 'exp': 'piControl'}] If the corresponding fx file is not found (which is the case for some models and almost all observational datasets), the @@ -494,18 +422,6 @@ Optionally you can specify your own custom fx variable to be used in cases when experiment is preferred for fx data retrieval: -.. code-block:: yaml - - preprocessors: - landseaicemask: - mask_landseaice: - mask_out: sea - fx_variables: - sftgif: - exp: piControl - -or alternatively: - .. code-block:: yaml preprocessors: @@ -514,6 +430,7 @@ or alternatively: mask_out: sea fx_variables: [{'short_name': 'sftgif', 'exp': 'piControl'}] + See also :func:`esmvalcore.preprocessor.mask_landseaice`. Glaciated masking @@ -612,9 +529,7 @@ inter-comparison or comparison with observational datasets). Regridding is conceptually a very similar process to interpolation (in fact, the regridder engine uses interpolation and extrapolation, with various schemes). The primary difference is that interpolation is based on sample data points, while -regridding is based on the horizontal grid of another cube (the reference -grid). If the horizontal grids of a cube and its reference grid are sufficiently -the same, regridding is automatically and silently skipped for performance reasons. +regridding is based on the horizontal grid of another cube (the reference grid). The underlying regridding mechanism in ESMValTool uses :obj:`iris.cube.Cube.regrid` @@ -1265,9 +1180,9 @@ The area manipulation module contains the following preprocessor functions: ``extract_region`` ------------------ -This function returns a subset of the data on the rectangular region requested. -The boundaries of the region are provided as latitude and longitude coordinates -in the arguments: +This function masks data outside a rectangular region requested. The boundaries +of the region are provided as latitude and longitude coordinates in the +arguments: * ``start_longitude`` * ``end_longitude`` @@ -1277,10 +1192,6 @@ in the arguments: Note that this function can only be used to extract a rectangular region. Use ``extract_shape`` to extract any other shaped region from a shapefile. -If the grid is irregular, the returned region retains the original coordinates, -but is cropped to a rectangular bounding box defined by the start/end -coordinates. The deselected area inside the region is masked. - See also :func:`esmvalcore.preprocessor.extract_region`. @@ -1421,36 +1332,18 @@ region, depth layer or time period is required, then those regions need to be removed using other preprocessor operations in advance. The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it calling the variables e.g. - -.. code-block:: yaml - - fx_variables: - areacello: - volcello: - -or calling the variables and adding specific variable parameters (the key-value pair may be as specific -as a CMOR variable can permit): - -.. code-block:: yaml - - fx_variables: - areacello: - mip: Omon - volcello: - mip: fx - -Alternatively, the ``fx_variables`` argument can also be specified as a list: +the user may specify it as a list of variables e.g. .. code-block:: yaml fx_variables: ['areacello', 'volcello'] -or as a list of dictionaries: +or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +as a CMOR variable can permit): .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. @@ -1499,36 +1392,18 @@ No depth coordinate is required as this is determined by Iris. This function works best when the ``fx_variables`` provide the cell volume. The ``fx_variables`` argument specifies the fx variables that the user wishes to input to the function; -the user may specify it calling the variables e.g. - -.. code-block:: yaml - - fx_variables: - areacello: - volcello: - -or calling the variables and adding specific variable parameters (the key-value pair may be as specific -as a CMOR variable can permit): - -.. code-block:: yaml - - fx_variables: - areacello: - mip: Omon - volcello: - mip: fx - -Alternatively, the ``fx_variables`` argument can also be specified as a list: +the user may specify it as a list of variables e.g. .. code-block:: yaml fx_variables: ['areacello', 'volcello'] -or as a list of dictionaries: +or as list of dictionaries, with specific variable parameters (they key-value pair may be as specific +as a CMOR variable can permit): .. code-block:: yaml - fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello', 'mip': 'fx'}] + fx_variables: [{'short_name': 'areacello', 'mip': 'Omon'}, {'short_name': 'volcello, mip': 'fx'}] The recipe parser will automatically find the data files that are associated with these variables and pass them to the function for loading and processing. diff --git a/esmvalcore/_citation.py b/esmvalcore/_citation.py index c6129c21a6..c660759b0a 100644 --- a/esmvalcore/_citation.py +++ b/esmvalcore/_citation.py @@ -146,7 +146,7 @@ def _get_response(url): json_data = None if url.lower().startswith('https'): try: - response = requests.get(url, timeout=5) + response = requests.get(url) if response.status_code == 200: json_data = response.json() else: diff --git a/esmvalcore/_config/_logging.py b/esmvalcore/_config/_logging.py index 0831618fe3..acacecee16 100644 --- a/esmvalcore/_config/_logging.py +++ b/esmvalcore/_config/_logging.py @@ -5,7 +5,6 @@ import os import time from pathlib import Path -from typing import Union import yaml @@ -27,8 +26,7 @@ def _purge_file_handlers(cfg: dict) -> None: ] -def _get_log_files(cfg: dict, - output_dir: Union[os.PathLike, str] = None) -> list: +def _get_log_files(cfg: dict, output_dir: str = None) -> list: """Initialize log files for the file handlers.""" log_files = [] @@ -38,12 +36,8 @@ def _get_log_files(cfg: dict, filename = handler.get('filename', None) if filename: - if output_dir is None: - raise ValueError('`output_dir` must be defined') - if not os.path.isabs(filename): handler['filename'] = os.path.join(output_dir, filename) - log_files.append(handler['filename']) return log_files @@ -59,8 +53,8 @@ def _update_stream_level(cfg: dict, level=None): handler['level'] = level.upper() -def configure_logging(cfg_file: Union[os.PathLike, str] = None, - output_dir: Union[os.PathLike, str] = None, +def configure_logging(cfg_file: str = None, + output_dir: str = None, console_log_level: str = None) -> list: """Configure logging. diff --git a/esmvalcore/_data_finder.py b/esmvalcore/_data_finder.py index b9f8e8c8df..ad0367799f 100644 --- a/esmvalcore/_data_finder.py +++ b/esmvalcore/_data_finder.py @@ -33,60 +33,29 @@ def find_files(dirnames, filenames): def get_start_end_year(filename): - """Get the start and end year from a file name. - - Examples of allowed dates : 1980, 198001, 19801231, - 1980123123, 19801231T23, 19801231T2359, 19801231T235959, - 19801231T235959Z - - Dates must be surrounded by - or _ or string start or string end - (after removing filename suffix) - - Look first for two dates separated by - or _, then for one single - date, and if they are multiple, for one date at start or end - """ + """Get the start and end year from a file name.""" stem = Path(filename).stem start_year = end_year = None - # - time_pattern = (r"(?P[0-2][0-9]" - r"(?P[0-5][0-9]" - r"(?P[0-5][0-9])?)?Z?)") - date_pattern = (r"(?P[0-9]{4})" - r"(?P[01][0-9]" - r"(?P[0-3][0-9]" - rf"(T?{time_pattern})?)?)?") - # - end_date_pattern = date_pattern.replace(">", "_end>") - date_range_pattern = date_pattern + r"[-_]" + end_date_pattern - # - # Next string allows to test that there is an allowed delimiter (or - # string start or end) close to date range (or to single date) - context = r"(?:^|[-_]|$)" - # - # First check for a block of two potential dates - date_range_pattern = context + date_range_pattern + context - daterange = re.search(date_range_pattern, stem) + + # First check for a block of two potential dates separated by _ or - + daterange = re.findall(r'([0-9]{4,12}[-_][0-9]{4,12})', stem) if daterange: - start_year = daterange.group("year") - end_year = daterange.group("year_end") + start_date, end_date = re.findall(r'([0-9]{4,12})', daterange[0]) + start_year = start_date[:4] + end_year = end_date[:4] else: # Check for single dates in the filename - single_date_pattern = context + date_pattern + context - dates = re.findall(single_date_pattern, stem) + dates = re.findall(r'([0-9]{4,12})', stem) if len(dates) == 1: - start_year = end_year = dates[0][0] + start_year = end_year = dates[0][:4] elif len(dates) > 1: - # Check for dates at start or (exclusive or) end of filename - start = re.search(r'^' + date_pattern, stem) - end = re.search(date_pattern + r'$', stem) - if start and not end: - start_year = end_year = start.group('year') - elif end: - start_year = end_year = end.group('year') + # Check for dates at start or end of filename + outerdates = re.findall(r'^[0-9]{4,12}|[0-9]{4,12}$', stem) + if len(outerdates) == 1: + start_year = end_year = outerdates[0][:4] # As final resort, try to get the dates from the file contents if start_year is None or end_year is None: - logger.debug("Must load file %s for daterange ", filename) cubes = iris.load(filename) for cube in cubes: @@ -123,21 +92,13 @@ def select_files(filenames, start_year, end_year): def _replace_tags(paths, variable): """Replace tags in the config-developer's file with actual values.""" if isinstance(paths, str): - paths = set((paths.strip('/'),)) + paths = (paths.strip('/'), ) else: - paths = set(path.strip('/') for path in paths) + paths = [path.strip('/') for path in paths] tlist = set() + for path in paths: tlist = tlist.union(re.findall(r'{([^}]*)}', path)) - if 'sub_experiment' in variable: - new_paths = [] - for path in paths: - new_paths.extend(( - re.sub(r'(\b{ensemble}\b)', r'{sub_experiment}-\1', path), - re.sub(r'({ensemble})', r'{sub_experiment}-\1', path) - )) - tlist.add('sub_experiment') - paths = new_paths logger.debug(tlist) for tag in tlist: @@ -151,6 +112,7 @@ def _replace_tags(paths, variable): else: raise KeyError("Dataset key {} must be specified for {}, check " "your recipe entry".format(tag, variable)) + paths = _replace_tag(paths, original_tag, replacewith) return paths @@ -165,7 +127,7 @@ def _replace_tag(paths, tag, replacewith): else: text = _apply_caps(str(replacewith), lower, upper) result.extend(p.replace('{' + tag + '}', text) for p in paths) - return list(set(result)) + return result def _get_caps_options(tag): diff --git a/esmvalcore/_recipe.py b/esmvalcore/_recipe.py index 3e094c9ed4..1f7a5ab883 100644 --- a/esmvalcore/_recipe.py +++ b/esmvalcore/_recipe.py @@ -315,21 +315,16 @@ def _get_default_settings(variable, config_user, derive=False): if variable['short_name'] != variable['original_short_name']: settings['save']['alias'] = variable['short_name'] - # Configure fx settings - settings['add_fx_variables'] = { - 'fx_variables': {}, - 'check_level': config_user.get('check_level', CheckLevels.DEFAULT) - } - settings['remove_fx_variables'] = {} - return settings -def _add_fxvar_keys(fx_info, variable): +def _add_fxvar_keys(fx_var_dict, variable): """Add keys specific to fx variable to use get_input_filelist.""" - fx_variable = deepcopy(variable) - fx_variable.update(fx_info) - fx_variable['variable_group'] = fx_info['short_name'] + fx_variable = dict(variable) + fx_variable.update(fx_var_dict) + + # set variable names + fx_variable['variable_group'] = fx_var_dict['short_name'] # add special ensemble for CMIP5 only if fx_variable['project'] == 'CMIP5': @@ -341,69 +336,77 @@ def _add_fxvar_keys(fx_info, variable): return fx_variable -def _search_fx_mip(tables, found_mip, variable, fx_info, config_user): - fx_files = None - for mip in tables: - fx_cmor = tables[mip].get(fx_info['short_name']) - if fx_cmor: - found_mip = True - fx_info['mip'] = mip - fx_info = _add_fxvar_keys(fx_info, variable) - logger.debug( - "For fx variable '%s', found table '%s'", - fx_info['short_name'], mip) - fx_files = _get_input_files(fx_info, config_user)[0] - if fx_files: - logger.debug( - "Found fx variables '%s':\n%s", - fx_info['short_name'], pformat(fx_files)) - return found_mip, fx_info, fx_files - - -def _get_fx_files(variable, fx_info, config_user): +def _get_fx_file(variable, fx_variable, config_user): """Get fx files (searching all possible mips).""" + # make it a dict + if isinstance(fx_variable, str): + fx_varname = fx_variable + fx_variable = {'short_name': fx_varname} + else: + fx_varname = fx_variable['short_name'] # assemble info from master variable + var = dict(variable) var_project = variable['project'] # check if project in config-developer try: get_project_config(var_project) except ValueError: raise RecipeError( - f"Requested fx variable '{fx_info['short_name']}' " - f"with parent variable '{variable}' does not have " - f"a '{var_project}' project in config-developer.") - project_tables = CMOR_TABLES[var_project].tables + f"Requested fx variable '{fx_varname}' with parent variable" + f"'{variable}' does not have a '{var_project}' project" + f"in config-developer.") + cmor_table = CMOR_TABLES[var_project] + valid_fx_vars = [] # force only the mip declared by user - found_mip = False - if not fx_info['mip']: - found_mip, fx_info, fx_files = _search_fx_mip( - project_tables, found_mip, variable, fx_info, config_user) + if 'mip' in fx_variable: + fx_mips = [fx_variable['mip']] else: - fx_cmor = project_tables[fx_info['mip']].get(fx_info['short_name']) - if fx_cmor: - found_mip = True - fx_info = _add_fxvar_keys(fx_info, variable) - fx_files = _get_input_files(fx_info, config_user)[0] + # Get all fx-related mips (original var mip, + # 'fx' and extend from cmor tables) + fx_mips = [variable['mip']] + fx_mips.extend(mip for mip in cmor_table.tables if 'fx' in mip) + + # Search all mips for available variables + # priority goes to user specified mip if available + searched_mips = [] + fx_files = [] + for fx_mip in fx_mips: + fx_cmor_variable = cmor_table.get_variable(fx_mip, fx_varname) + if fx_cmor_variable is not None: + fx_var_dict = dict(fx_variable) + searched_mips.append(fx_mip) + fx_var_dict['mip'] = fx_mip + fx_var_dict = _add_fxvar_keys(fx_var_dict, var) + valid_fx_vars.append(fx_var_dict) + logger.debug("For fx variable '%s', found table '%s'", fx_varname, + fx_mip) + fx_files = _get_input_files(fx_var_dict, config_user)[0] + + # If files found, return them + if fx_files: + logger.debug("Found fx variables '%s':\n%s", fx_varname, + pformat(fx_files)) + break # If fx variable was not found in any table, raise exception - if not found_mip: + if not searched_mips: raise RecipeError( - f"Requested fx variable '{fx_info['short_name']}' " - f"not available in any CMOR table for '{var_project}'") + f"Requested fx variable '{fx_varname}' not available in " + f"any 'fx'-related CMOR table ({fx_mips}) for '{var_project}'") # flag a warning if not fx_files: - logger.warning( - "Missing data for fx variable '%s'", fx_info['short_name']) + logger.warning("Missing data for fx variable '%s'", fx_varname) # allow for empty lists corrected for by NE masks if fx_files: - if fx_info['frequency'] == 'fx': - fx_files = fx_files[0] + fx_files = fx_files[0] + if valid_fx_vars: + valid_fx_vars = valid_fx_vars[0] - return fx_files, fx_info + return fx_files, valid_fx_vars def _exclude_dataset(settings, variable, step): @@ -429,82 +432,44 @@ def _update_fx_files(step_name, settings, variable, config_user, fx_vars): """Update settings with mask fx file list or dict.""" if not fx_vars: return - for fx_var, fx_info in fx_vars.items(): - if not fx_info: - fx_info = {} - if 'mip' not in fx_info: - fx_info.update({'mip': None}) - if 'short_name' not in fx_info: - fx_info.update({'short_name': fx_var}) - fx_files, fx_info = _get_fx_files(variable, fx_info, config_user) - if fx_files: - fx_info['filename'] = fx_files - settings['add_fx_variables']['fx_variables'].update({ - fx_var: fx_info - }) - logger.info( - 'Using fx files for variable %s during step %s: %s', - variable['short_name'], step_name, pformat(fx_files)) - - -def _fx_list_to_dict(fx_vars): - """Convert fx list to dictionary. To be deprecated at some point.""" - user_fx_vars = {} - for fx_var in fx_vars: - if isinstance(fx_var, dict): - short_name = fx_var['short_name'] - user_fx_vars.update({short_name: fx_var}) - continue - user_fx_vars.update({fx_var: None}) - return user_fx_vars + + fx_vars = [_get_fx_file(variable, fxvar, config_user) for fxvar in fx_vars] + + fx_dict = {fx_var[1]['short_name']: fx_var[0] for fx_var in fx_vars} + settings['fx_variables'] = fx_dict + logger.info('Using fx_files: %s for variable %s during step %s', + pformat(settings['fx_variables']), variable['short_name'], + step_name) def _update_fx_settings(settings, variable, config_user): """Update fx settings depending on the needed method.""" + # get fx variables either from user defined attribute or fixed def _get_fx_vars_from_attribute(step_settings, step_name): user_fx_vars = step_settings.get('fx_variables') - if isinstance(user_fx_vars, list): - user_fx_vars = _fx_list_to_dict(user_fx_vars) - step_settings['fx_variables'] = user_fx_vars if not user_fx_vars: - default_fx = { - 'area_statistics': { - 'areacella': None, - 'areacello': None, - }, - 'mask_landsea': { - 'sftlf': None, - }, - 'mask_landseaice': { - 'sftgif': None, - }, - 'volume_statistics': { - 'volcello': None, - }, - 'weighting_landsea_fraction': { - 'sftlf': None, - }, - } - if variable['project'] != 'obs4mips': - default_fx['mask_landsea'].update({'sftof': None}) - default_fx['weighting_landsea_fraction'].update( - {'sftof': None}) - step_settings['fx_variables'] = default_fx[step_name] + if step_name in ('mask_landsea', 'weighting_landsea_fraction'): + user_fx_vars = ['sftlf'] + if variable['project'] != 'obs4mips': + user_fx_vars.append('sftof') + elif step_name == 'mask_landseaice': + user_fx_vars = ['sftgif'] + elif step_name in ('area_statistics', 'volume_statistics', + 'zonal_statistics'): + user_fx_vars = [] + return user_fx_vars fx_steps = [ 'mask_landsea', 'mask_landseaice', 'weighting_landsea_fraction', - 'area_statistics', 'volume_statistics' + 'area_statistics', 'volume_statistics', 'zonal_statistics' ] - for step_name in settings: + + for step_name, step_settings in settings.items(): if step_name in fx_steps: - _get_fx_vars_from_attribute(settings[step_name], step_name) - _update_fx_files(step_name, settings, variable, config_user, - settings[step_name]['fx_variables']) - # Remove unused attribute in 'fx_steps' preprocessors. - # The fx_variables information is saved in - # the 'add_fx_variables' step. - settings[step_name].pop('fx_variables', None) + fx_vars = _get_fx_vars_from_attribute(step_settings, step_name) + _update_fx_files(step_name, step_settings, variable, config_user, + fx_vars) def _read_attributes(filename): @@ -545,7 +510,9 @@ def _get_ancestors(variable, config_user): logger.info("Using input files for variable %s of dataset %s:\n%s", variable['short_name'], variable['dataset'], '\n'.join(input_files)) - check.data_availability(input_files, variable, dirnames, filenames) + if (not config_user.get('skip-nonexistent') + or variable['dataset'] == variable.get('reference_dataset')): + check.data_availability(input_files, variable, dirnames, filenames) # Set up provenance tracking for i, filename in enumerate(input_files): @@ -684,16 +651,6 @@ def get_matching(attributes): return grouped_products -def _allow_skipping(ancestors, variable, config_user): - """Allow skipping of datasets.""" - allow_skipping = all([ - config_user.get('skip-nonexistent'), - not ancestors, - variable['dataset'] != variable.get('reference_dataset'), - ]) - return allow_skipping - - def _get_preprocessor_products(variables, profile, order, ancestor_products, config_user, name): """Get preprocessor product definitions for a set of datasets. @@ -735,7 +692,7 @@ def _get_preprocessor_products(variables, profile, order, ancestor_products, try: ancestors = _get_ancestors(variable, config_user) except RecipeError as ex: - if _allow_skipping(ancestors, variable, config_user): + if config_user.get('skip-nonexistent') and not ancestors: logger.info("Skipping: %s", ex.message) else: missing_vars.add(ex.message) @@ -1040,38 +997,37 @@ def _initialize_datasets(raw_datasets): return datasets @staticmethod - def _expand_tag(variables, input_tag): - """ - Expand tags such as ensemble members or stardates to multiple datasets. + def _expand_ensemble(variables): + """Expand ensemble members to multiple datasets. Expansion only supports ensembles defined as strings, not lists. """ expanded = [] regex = re.compile(r'\(\d+:\d+\)') - def expand_tag(variable, input_tag): - tag = variable.get(input_tag, "") - match = regex.search(tag) + def expand_ensemble(variable): + ens = variable.get('ensemble', "") + match = regex.search(ens) if match: start, end = match.group(0)[1:-1].split(':') for i in range(int(start), int(end) + 1): expand = deepcopy(variable) - expand[input_tag] = regex.sub(str(i), tag, 1) - expand_tag(expand, input_tag) + expand['ensemble'] = regex.sub(str(i), ens, 1) + expand_ensemble(expand) else: expanded.append(variable) for variable in variables: - tag = variable.get(input_tag, "") - if isinstance(tag, (list, tuple)): - for elem in tag: + ensemble = variable.get('ensemble', "") + if isinstance(ensemble, (list, tuple)): + for elem in ensemble: if regex.search(elem): raise RecipeError( - f"In variable {variable}: {input_tag} expansion " - f"cannot be combined with {input_tag} lists") + f"In variable {variable}: ensemble expansion " + "cannot be combined with ensemble lists") expanded.append(variable) else: - expand_tag(variable, input_tag) + expand_ensemble(variable) return expanded @@ -1118,14 +1074,8 @@ def _initialize_variables(self, raw_variable, raw_datasets): activity = get_activity(variable) if activity: variable['activity'] = activity - if 'sub_experiment' in variable: - subexperiment_keys = deepcopy(required_keys) - subexperiment_keys.update({'sub_experiment'}) - check.variable(variable, subexperiment_keys) - else: - check.variable(variable, required_keys) - variables = self._expand_tag(variables, 'ensemble') - variables = self._expand_tag(variables, 'sub_experiment') + check.variable(variable, required_keys) + variables = self._expand_ensemble(variables) return variables def _initialize_preprocessor_output(self, diagnostic_name, raw_variables, @@ -1377,9 +1327,9 @@ def initialize_tasks(self): tasks.add(task) priority += 1 if failed_tasks: - recipe_error = RecipeError('Could not create all tasks') - recipe_error.failed_tasks.extend(failed_tasks) - raise recipe_error + ex = RecipeError('Could not create all tasks') + ex.failed_tasks.extend(failed_tasks) + raise ex check.tasks_valid(tasks) # Resolve diagnostic ancestors diff --git a/esmvalcore/_recipe_checks.py b/esmvalcore/_recipe_checks.py index 1133a36639..dd9fd00193 100644 --- a/esmvalcore/_recipe_checks.py +++ b/esmvalcore/_recipe_checks.py @@ -56,7 +56,7 @@ def recipe_with_schema(filename): logger.debug("Checking recipe against schema %s", schema_file) recipe = yamale.make_data(filename) schema = yamale.make_schema(schema_file) - yamale.validate(schema, recipe, strict=False) + yamale.validate(schema, recipe) def diagnostics(diags): diff --git a/esmvalcore/_task.py b/esmvalcore/_task.py index 7e6ad4429d..1bd649ecdf 100644 --- a/esmvalcore/_task.py +++ b/esmvalcore/_task.py @@ -13,10 +13,8 @@ import time from copy import deepcopy from multiprocessing import Pool -from multiprocessing.pool import ApplyResult from pathlib import Path, PosixPath from shutil import which -from typing import Dict, Type import psutil import yaml @@ -684,10 +682,10 @@ def _run_sequential(self) -> None: for task in sorted(tasks, key=lambda t: t.priority): task.run() - def _run_parallel(self, max_parallel_tasks=None): + def _run_parallel(self, max_parallel_tasks: int = None) -> None: """Run tasks in parallel.""" scheduled = self.flatten() - running: Dict[Type[BaseTask], Type[ApplyResult]] = {} + running = {} n_tasks = n_scheduled = len(scheduled) n_running = 0 diff --git a/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py b/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py index 28593a9d21..c61eeaa192 100644 --- a/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py +++ b/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1.py @@ -1,7 +1,68 @@ """Fixes for bcc-csm1-1.""" -from ..common import ClFixHybridPressureCoord, OceanFixGrid +import numpy as np +from scipy.interpolate import InterpolatedUnivariateSpline +from scipy.ndimage import map_coordinates + +from ..common import ClFixHybridPressureCoord +from ..fix import Fix + Cl = ClFixHybridPressureCoord -Tos = OceanFixGrid +class Tos(Fix): + """Fixes for tos.""" + + def fix_data(self, cube): + """Fix data. + + Calculate missing latitude/longitude boundaries using interpolation. + + Parameters + ---------- + cube: iris.cube.Cube + Input cube to fix. + + Returns + ------- + iris.cube.Cube + + """ + rlat = cube.coord('grid_latitude').points + rlon = cube.coord('grid_longitude').points + + # Transform grid latitude/longitude to array indices [0, 1, 2, ...] + rlat_to_idx = InterpolatedUnivariateSpline(rlat, + np.arange(len(rlat)), + k=1) + rlon_to_idx = InterpolatedUnivariateSpline(rlon, + np.arange(len(rlon)), + k=1) + rlat_idx_bnds = rlat_to_idx(cube.coord('grid_latitude').bounds) + rlon_idx_bnds = rlon_to_idx(cube.coord('grid_longitude').bounds) + + # Calculate latitude/longitude vertices by interpolation + lat_vertices = [] + lon_vertices = [] + for (i, j) in [(0, 0), (0, 1), (1, 1), (1, 0)]: + (rlat_v, rlon_v) = np.meshgrid(rlat_idx_bnds[:, i], + rlon_idx_bnds[:, j], + indexing='ij') + lat_vertices.append( + map_coordinates(cube.coord('latitude').points, + [rlat_v, rlon_v], + mode='nearest')) + lon_vertices.append( + map_coordinates(cube.coord('longitude').points, + [rlat_v, rlon_v], + mode='wrap')) + lat_vertices = np.array(lat_vertices) + lon_vertices = np.array(lon_vertices) + lat_vertices = np.moveaxis(lat_vertices, 0, -1) + lon_vertices = np.moveaxis(lon_vertices, 0, -1) + + # Copy vertices to cube + cube.coord('latitude').bounds = lat_vertices + cube.coord('longitude').bounds = lon_vertices + + return cube diff --git a/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py b/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py index 4794048f67..8a1ab262a0 100644 --- a/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py +++ b/esmvalcore/cmor/_fixes/cmip5/bcc_csm1_1_m.py @@ -1,7 +1,9 @@ """Fixes for bcc-csm1-1-m.""" -from ..common import ClFixHybridPressureCoord, OceanFixGrid +from ..common import ClFixHybridPressureCoord +from .bcc_csm1_1 import Tos as BaseTos + Cl = ClFixHybridPressureCoord -Tos = OceanFixGrid +Tos = BaseTos diff --git a/esmvalcore/cmor/_fixes/cmip5/ec_earth.py b/esmvalcore/cmor/_fixes/cmip5/ec_earth.py index 1ef16df970..86b5bc6dbe 100644 --- a/esmvalcore/cmor/_fixes/cmip5/ec_earth.py +++ b/esmvalcore/cmor/_fixes/cmip5/ec_earth.py @@ -1,7 +1,6 @@ """Fixes for EC-Earth model.""" -import iris -import numpy as np from dask import array as da +import iris from ..fix import Fix from ..shared import add_scalar_height_coord, cube_to_aux_coord @@ -130,76 +129,3 @@ def fix_metadata(self, cubes): areacello.add_aux_coord(cube_to_aux_coord(lon), (0, 1)) return iris.cube.CubeList([areacello, ]) - - -class Pr(Fix): - """Fixes for pr.""" - - def fix_metadata(self, cubes): - """Fix time coordinate. - - Last file (2000-2009) has erroneously duplicated points - in time coordinate (e.g. [t1, t2, t3, t4, t2, t3, t4, t5]) - which should be removed except the last one which is correct. - - Parameters - ---------- - cubes : iris.cube.CubeList - Cubes to fix. - - Returns - ------- - iris.cube.CubeList - - """ - new_list = iris.cube.CubeList() - for cube in cubes: - try: - old_time = cube.coord('time') - except iris.exceptions.CoordinateNotFoundError: - new_list.append(cube) - else: - if old_time.is_monotonic(): - new_list.append(cube) - else: - time_units = old_time.units - time_data = old_time.points - - # erase erroneously copy-pasted points - time_diff = np.diff(time_data) - idx_neg = np.where(time_diff <= 0.)[0] - while len(idx_neg) > 0: - time_data = np.delete(time_data, idx_neg[0] + 1) - time_diff = np.diff(time_data) - idx_neg = np.where(time_diff <= 0.)[0] - - # create the new time coord - new_time = iris.coords.DimCoord(time_data, - standard_name='time', - var_name='time', - units=time_units) - - # create a new cube with the right shape - dims = (time_data.shape[0], - cube.coord('latitude').shape[0], - cube.coord('longitude').shape[0]) - data = cube.data - new_data = np.ma.append(data[:dims[0] - 1, :, :], - data[-1, :, :]) - new_data = new_data.reshape(dims) - - tmp_cube = iris.cube.Cube( - new_data, - standard_name=cube.standard_name, - long_name=cube.long_name, - var_name=cube.var_name, - units=cube.units, - attributes=cube.attributes, - cell_methods=cube.cell_methods, - dim_coords_and_dims=[(new_time, 0), - (cube.coord('latitude'), 1), - (cube.coord('longitude'), 2)]) - - new_list.append(tmp_cube) - - return new_list diff --git a/esmvalcore/cmor/_fixes/cmip5/miroc5.py b/esmvalcore/cmor/_fixes/cmip5/miroc5.py index 4331652eb2..ae5ec6ee16 100644 --- a/esmvalcore/cmor/_fixes/cmip5/miroc5.py +++ b/esmvalcore/cmor/_fixes/cmip5/miroc5.py @@ -163,7 +163,3 @@ def fix_data(self, cube): """ cube.data = da.ma.masked_equal(cube.core_data(), 0.) return cube - - -class Pr(Tas): - """Fixes for pr.""" diff --git a/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py b/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py index 38f96526e1..e084fbebb5 100644 --- a/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py +++ b/esmvalcore/cmor/_fixes/cmip6/bcc_csm2_mr.py @@ -1,5 +1,7 @@ """Fixes for BCC-CSM2-MR model.""" -from ..common import ClFixHybridPressureCoord, OceanFixGrid +from ..cmip5.bcc_csm1_1 import Tos as BaseTos +from ..common import ClFixHybridPressureCoord + Cl = ClFixHybridPressureCoord @@ -10,10 +12,57 @@ Clw = ClFixHybridPressureCoord -Tos = OceanFixGrid +class Tos(BaseTos): + """Fixes for tos.""" + def fix_metadata(self, cubes): + """Rename ``var_name`` of 1D-``latitude`` and 1D-``longitude``. + Parameters + ---------- + cubes : iris.cube.CubeList + Input cubes. + Returns + ------- + iris.cube.CubeList + """ + cube = self.get_cube_from_list(cubes) + lat_coord = cube.coord('latitude', dimensions=(1, )) + lon_coord = cube.coord('longitude', dimensions=(2, )) + lat_coord.standard_name = None + lat_coord.long_name = 'grid_latitude' + lat_coord.var_name = 'i' + lat_coord.units = '1' + lon_coord.standard_name = None + lon_coord.long_name = 'grid_longitude' + lon_coord.var_name = 'j' + lon_coord.units = '1' + lon_coord.circular = False + return cubes -Siconc = OceanFixGrid +class Siconc(BaseTos): + """Fixes for siconc.""" -Sos = OceanFixGrid + def fix_metadata(self, cubes): + """Rename ``var_name`` of 1D-``latitude`` and 1D-``longitude``. + Parameters + ---------- + cubes : iris.cube.CubeList + Input cubes. + Returns + ------- + iris.cube.CubeList + """ + cube = self.get_cube_from_list(cubes) + lat_coord = cube.coord('latitude', dimensions=(1, )) + lon_coord = cube.coord('longitude', dimensions=(2, )) + lat_coord.standard_name = None + lat_coord.long_name = 'grid_latitude' + lat_coord.var_name = 'i' + lat_coord.units = '1' + lon_coord.standard_name = None + lon_coord.long_name = 'grid_longitude' + lon_coord.var_name = 'j' + lon_coord.units = '1' + lon_coord.circular = False + return cubes diff --git a/esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py b/esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py index 6a55b379a4..cae45b3f2f 100644 --- a/esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py +++ b/esmvalcore/cmor/_fixes/cmip6/bcc_esm1.py @@ -1,5 +1,7 @@ """Fixes for BCC-ESM1 model.""" -from ..common import ClFixHybridPressureCoord, OceanFixGrid +from ..common import ClFixHybridPressureCoord +from .bcc_csm2_mr import Tos as BaseTos + Cl = ClFixHybridPressureCoord @@ -10,10 +12,4 @@ Clw = ClFixHybridPressureCoord -Tos = OceanFixGrid - - -Sos = OceanFixGrid - - -Siconc = OceanFixGrid +Tos = BaseTos diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2.py index 51bc04685e..9b4bc499f4 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2.py @@ -4,15 +4,14 @@ import numpy as np from netCDF4 import Dataset -from ..common import SiconcFixScalarCoord from ..fix import Fix from ..shared import ( add_scalar_depth_coord, add_scalar_height_coord, add_scalar_typeland_coord, add_scalar_typesea_coord, - fix_ocean_depth_coord, ) +from .gfdl_esm4 import Siconc as Addtypesi class Cl(Fix): @@ -215,9 +214,6 @@ def fix_metadata(self, cubes): return cubes -Siconc = SiconcFixScalarCoord - - class Tos(Fix): """Fixes for tos.""" @@ -246,32 +242,4 @@ def fix_metadata(self, cubes): return cubes -class Omon(Fix): - """Fixes for ocean variables.""" - - def fix_metadata(self, cubes): - """Fix ocean depth coordinate. - - Parameters - ---------- - cubes: iris CubeList - List of cubes to fix - - Returns - ------- - iris.cube.CubeList - - """ - for cube in cubes: - if cube.coords(axis='Z'): - z_coord = cube.coord(axis='Z') - - # Only points need to be fixed, not bounds - if z_coord.units == 'cm': - z_coord.points = z_coord.core_points() / 100.0 - z_coord.units = 'm' - - # Fix depth metadata - if z_coord.standard_name is None: - fix_ocean_depth_coord(cube) - return cubes +Siconc = Addtypesi diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py index 80f2e58849..74dff9bc74 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_fv2.py @@ -1,8 +1,6 @@ """Fixes for CESM2-FV2 model.""" from .cesm2 import Cl as BaseCl -from .cesm2 import Fgco2 as BaseFgco2 from .cesm2 import Tas as BaseTas -from ..common import SiconcFixScalarCoord Cl = BaseCl @@ -14,10 +12,4 @@ Clw = Cl -Fgco2 = BaseFgco2 - - -Siconc = SiconcFixScalarCoord - - Tas = BaseTas diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py index d0014f308a..1bf837884f 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm.py @@ -2,9 +2,7 @@ from netCDF4 import Dataset from .cesm2 import Cl as BaseCl -from .cesm2 import Fgco2 as BaseFgco2 from .cesm2 import Tas as BaseTas -from ..common import SiconcFixScalarCoord class Cl(BaseCl): @@ -49,10 +47,4 @@ def fix_file(self, filepath, output_dir): Clw = Cl -Fgco2 = BaseFgco2 - - -Siconc = SiconcFixScalarCoord - - Tas = BaseTas diff --git a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py index bc8068af8a..cd453d66a5 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py +++ b/esmvalcore/cmor/_fixes/cmip6/cesm2_waccm_fv2.py @@ -1,11 +1,8 @@ """Fixes for CESM2-WACCM-FV2 model.""" from .cesm2 import Tas as BaseTas -from .cesm2 import Fgco2 as BaseFgco2 from .cesm2_waccm import Cl as BaseCl from .cesm2_waccm import Cli as BaseCli from .cesm2_waccm import Clw as BaseClw -from ..common import SiconcFixScalarCoord - Cl = BaseCl @@ -16,10 +13,4 @@ Clw = BaseClw -Fgco2 = BaseFgco2 - - -Siconc = SiconcFixScalarCoord - - Tas = BaseTas diff --git a/esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py b/esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py index 0dbb7fc9b2..25e2a22b74 100644 --- a/esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py +++ b/esmvalcore/cmor/_fixes/cmip6/cnrm_esm2_1.py @@ -1,8 +1,4 @@ """Fixes for CNRM-ESM2-1 model.""" -from ..fix import Fix -from ..shared import (fix_ocean_depth_coord) - - from .cnrm_cm6_1 import Cl as BaseCl from .cnrm_cm6_1 import Clcalipso as BaseClcalipso from .cnrm_cm6_1 import Cli as BaseCli @@ -19,27 +15,3 @@ Clw = BaseClw - - -class Omon(Fix): - """Fixes for ocean variables.""" - - def fix_metadata(self, cubes): - """Fix ocean depth coordinate. - - Parameters - ---------- - cubes: iris CubeList - List of cubes to fix - - Returns - ------- - iris.cube.CubeList - - """ - for cube in cubes: - if cube.coords(axis='Z'): - z_coord = cube.coord(axis='Z') - if z_coord.standard_name is None: - fix_ocean_depth_coord(cube) - return cubes diff --git a/esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py b/esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py index 69a6b6375b..c752ca2076 100644 --- a/esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py +++ b/esmvalcore/cmor/_fixes/cmip6/fgoals_g3.py @@ -2,6 +2,7 @@ from ..cmip5.fgoals_g2 import Cl as BaseCl from ..common import OceanFixGrid + Cl = BaseCl @@ -11,31 +12,7 @@ Clw = BaseCl -class Tos(OceanFixGrid): - """Fixes for tos.""" - - def fix_metadata(self, cubes): - """Fix metadata. - - FGOALS-g3 data contain latitude and longitude data set to >1e30 in some - places. - - Parameters - ---------- - cubes : iris.cube.CubeList - Input cubes. - - Returns - ------- - iris.cube.CubeList - - """ - cube = self.get_cube_from_list(cubes) - cube.coord('latitude').points[ - cube.coord('latitude').points > 1000.0] = 0.0 - cube.coord('longitude').points[ - cube.coord('longitude').points > 1000.0] = 0.0 - return super().fix_metadata(cubes) +Tos = OceanFixGrid -Siconc = Tos +Siconc = OceanFixGrid diff --git a/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py b/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py index 35baf68a1f..ce61e7fab6 100644 --- a/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py +++ b/esmvalcore/cmor/_fixes/cmip6/gfdl_cm4.py @@ -1,7 +1,7 @@ """Fixes for GFDL-CM4 model.""" import iris -from ..common import ClFixHybridPressureCoord, SiconcFixScalarCoord +from ..common import ClFixHybridPressureCoord from ..fix import Fix from ..shared import add_aux_coords_from_cubes, add_scalar_height_coord @@ -38,9 +38,6 @@ def fix_metadata(self, cubes): Clw = Cl -Siconc = SiconcFixScalarCoord - - class Tas(Fix): """Fixes for tas.""" diff --git a/esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py b/esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py index c2610a318b..a4e937842e 100644 --- a/esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py +++ b/esmvalcore/cmor/_fixes/cmip6/gfdl_esm4.py @@ -1,38 +1,14 @@ """Fixes for GFDL-ESM4 model.""" -from ..common import SiconcFixScalarCoord +import iris from ..fix import Fix -from ..shared import ( - add_scalar_depth_coord, - fix_ocean_depth_coord, -) -class Fgco2(Fix): - """Fixes for fgco2.""" +class Siconc(Fix): + """Fixes for siconc.""" def fix_metadata(self, cubes): - """Add depth (0m) coordinate. - - Parameters - ---------- - cubes : iris.cube.CubeList - Input cubes. - - Returns - ------- - iris.cube.CubeList - """ - cube = self.get_cube_from_list(cubes) - add_scalar_depth_coord(cube) - return cubes - - -class Omon(Fix): - """Fixes for ocean variables.""" - - def fix_metadata(self, cubes): - """Fix ocean depth coordinate. + Fix missing type. Parameters ---------- @@ -44,12 +20,13 @@ def fix_metadata(self, cubes): iris.cube.CubeList """ + typesi = iris.coords.AuxCoord( + 'siconc', + standard_name='area_type', + long_name='Sea Ice area type', + var_name='type', + units='1', + bounds=None) for cube in cubes: - if cube.coords(axis='Z'): - z_coord = cube.coord(axis='Z') - if z_coord.standard_name is None: - fix_ocean_depth_coord(cube) + cube.add_aux_coord(typesi) return cubes - - -Siconc = SiconcFixScalarCoord diff --git a/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py b/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py index dfd7116275..97d36e92b5 100644 --- a/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py +++ b/esmvalcore/cmor/_fixes/cmip6/ipsl_cm6a_lr.py @@ -1,8 +1,9 @@ """Fixes for IPSL-CM6A-LR model.""" from iris.cube import CubeList +from iris.coords import AuxCoord +from iris.exceptions import ConstraintMismatchError from ..fix import Fix -from ..shared import fix_ocean_depth_coord class AllVars(Fix): @@ -22,12 +23,27 @@ def fix_metadata(self, cubes): iris.cube.CubeList """ - cube = self.get_cube_from_list(cubes) - if cube.coords('latitude'): + try: + cell_area = cubes.extract_cube('cell_area') + except ConstraintMismatchError: + return cubes + + cell_area = AuxCoord( + cell_area.data, + standard_name=cell_area.standard_name, + long_name=cell_area.long_name, + var_name=cell_area.var_name, + units=cell_area.units, + ) + new_list = CubeList() + for cube in cubes: + if cube.name() == 'cell_area': + continue + cube.add_aux_coord(cell_area, cube.coord_dims('latitude')) cube.coord('latitude').var_name = 'lat' - if cube.coords('longitude'): cube.coord('longitude').var_name = 'lon' - return CubeList([cube]) + new_list.append(cube) + return CubeList(new_list) class Clcalipso(Fix): @@ -52,27 +68,3 @@ def fix_metadata(self, cubes): alt_40_coord.standard_name = 'altitude' alt_40_coord.var_name = 'alt40' return CubeList([cube]) - - -class Omon(Fix): - """Fixes for ocean variables.""" - - def fix_metadata(self, cubes): - """Fix ocean depth coordinate. - - Parameters - ---------- - cubes: iris CubeList - List of cubes to fix - - Returns - ------- - iris.cube.CubeList - - """ - for cube in cubes: - if cube.coords(axis='Z'): - z_coord = cube.coord(axis='Z') - if z_coord.var_name == 'olevel': - fix_ocean_depth_coord(cube) - return cubes diff --git a/esmvalcore/cmor/_fixes/cmip6/kiost_esm.py b/esmvalcore/cmor/_fixes/cmip6/kiost_esm.py index 518a82e541..9f98e319fd 100644 --- a/esmvalcore/cmor/_fixes/cmip6/kiost_esm.py +++ b/esmvalcore/cmor/_fixes/cmip6/kiost_esm.py @@ -1,5 +1,4 @@ """Fixes for KIOST-ESM model.""" -from ..common import SiconcFixScalarCoord from ..fix import Fix from ..shared import add_scalar_height_coord @@ -58,6 +57,3 @@ class Uas(SfcWind): class Vas(SfcWind): """Fixes for vas.""" - - -Siconc = SiconcFixScalarCoord diff --git a/esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py b/esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py index c31a5e7c51..99cf06b688 100644 --- a/esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py +++ b/esmvalcore/cmor/_fixes/cmip6/mcm_ua_1_0.py @@ -1,10 +1,8 @@ """Fixes for MCM-UA-1-0 model.""" import iris -import numpy as np -from dask import array as da from ..fix import Fix -from ..shared import add_scalar_height_coord, fix_ocean_depth_coord +from ..shared import add_scalar_height_coord def strip_cube_metadata(cube): @@ -61,54 +59,18 @@ def fix_metadata(self, cubes): for cube in cubes: coord_names = [cor.standard_name for cor in cube.coords()] if 'longitude' in coord_names: - lon_coord = cube.coord('longitude') - if lon_coord.ndim == 1 and lon_coord.has_bounds(): - lon_bnds = lon_coord.bounds.copy() - # atmos & land - if lon_coord.points[0] == 0. and \ - lon_coord.points[-1] == 356.25 and \ + if cube.coord('longitude').ndim == 1 and \ + cube.coord('longitude').has_bounds(): + lon_bnds = cube.coord('longitude').bounds.copy() + if cube.coord('longitude').points[0] == 0. and \ + cube.coord('longitude').points[-1] == 356.25 and \ lon_bnds[-1][-1] == 360.: lon_bnds[-1][-1] = 358.125 - lon_coord.bounds = lon_bnds - # ocean & seaice - if lon_coord.points[0] == -0.9375: - lon_dim = cube.coord_dims('longitude')[0] - cube.data = da.roll(cube.core_data(), -1, axis=lon_dim) - lon_points = np.roll(lon_coord.core_points(), -1) - lon_bounds = np.roll(lon_coord.core_bounds(), -1, - axis=0) - lon_points[-1] += 360.0 - lon_bounds[-1] += 360.0 - lon_coord.points = lon_points - lon_coord.bounds = lon_bounds + cube.coord('longitude').bounds = lon_bnds return cubes -class Omon(Fix): - """Fixes for ocean variables.""" - - def fix_metadata(self, cubes): - """Fix ocean depth coordinate. - - Parameters - ---------- - cubes: iris CubeList - List of cubes to fix - - Returns - ------- - iris.cube.CubeList - - """ - for cube in cubes: - if cube.coords(axis='Z'): - z_coord = cube.coord(axis='Z') - if z_coord.standard_name is None: - fix_ocean_depth_coord(cube) - return cubes - - class Tas(Fix): """Fixes for tas.""" @@ -122,30 +84,9 @@ def fix_metadata(self, cubes): Returns ------- - iris.cube.CubeList + iris.cube.Cube """ cube = self.get_cube_from_list(cubes) add_scalar_height_coord(cube, 2.0) - return cubes - - -class Uas(Fix): - """Fixes for uas.""" - - def fix_metadata(self, cubes): - """Add height (10m) coordinate. - - Parameters - ---------- - cubes : iris.cube.CubeList - Cubes to fix. - - Returns - ------- - iris.cube.CubeList - - """ - cube = self.get_cube_from_list(cubes) - add_scalar_height_coord(cube, 10.0) - return cubes + return [cube] diff --git a/esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py b/esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py index dc0aa1ccb8..696574b9a4 100644 --- a/esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py +++ b/esmvalcore/cmor/_fixes/cmip6/sam0_unicon.py @@ -1,6 +1,5 @@ """Fixes for SAM0-UNICON model.""" from ..common import ClFixHybridPressureCoord -from ..fix import Fix Cl = ClFixHybridPressureCoord @@ -10,27 +9,3 @@ Clw = ClFixHybridPressureCoord - - -class Nbp(Fix): - """Fixes for nbp.""" - - def fix_data(self, cube): - """Fix data. - - Fixes wrong sign for land surface flux. Tested for v20190323. - - Parameters - ---------- - cube : iris.cube.Cube - Input cube. - - Returns - ------- - iris.cube.Cube - - """ - metadata = cube.metadata - cube *= -1 - cube.metadata = metadata - return cube diff --git a/esmvalcore/cmor/_fixes/common.py b/esmvalcore/cmor/_fixes/common.py index 90c64162e7..2e3165dc33 100644 --- a/esmvalcore/cmor/_fixes/common.py +++ b/esmvalcore/cmor/_fixes/common.py @@ -1,18 +1,10 @@ """Common fixes used for multiple datasets.""" -import logging - import iris import numpy as np from scipy.ndimage import map_coordinates from .fix import Fix -from .shared import ( - add_plev_from_altitude, - add_scalar_typesi_coord, - fix_bounds, -) - -logger = logging.getLogger(__name__) +from .shared import add_plev_from_altitude, fix_bounds class ClFixHybridHeightCoord(Fix): @@ -116,74 +108,49 @@ def fix_metadata(self, cubes): class OceanFixGrid(Fix): """Fixes for tos, siconc in FGOALS-g3.""" - def fix_metadata(self, cubes): - """Fix ``latitude`` and ``longitude`` (metadata and bounds). + def fix_data(self, cube): + """ + Fix data. + + Calculate missing latitude/longitude boundaries using interpolation. + Based on a similar fix for BCC-CSM2-MR. Parameters ---------- - cubes : iris.cube.CubeList - Input cubes. + cube: iris.cube.Cube + Input cube to fix. Returns ------- - iris.cube.CubeList - + iris.cube.Cube """ - cube = self.get_cube_from_list(cubes) - if cube.ndim != 3: - logger.warning( - "OceanFixGrid is designed to work on any data with an " - "irregular ocean grid, but it was only tested on 3D (time, " - "latitude, longitude) data so far; got %dD data", cube.ndim) - - # Get dimensional coordinates. Note: - # - First dimension i -> X-direction (= longitude) - # - Second dimension j -> Y-direction (= latitude) - (j_dim, i_dim) = sorted(set( - cube.coord_dims(cube.coord('latitude', dim_coords=False)) + - cube.coord_dims(cube.coord('longitude', dim_coords=False)) - )) - i_coord = cube.coord(dim_coords=True, dimensions=i_dim) - j_coord = cube.coord(dim_coords=True, dimensions=j_dim) - - # Fix metadata of coordinate i - i_coord.var_name = 'i' - i_coord.standard_name = None - i_coord.long_name = 'cell index along first dimension' - i_coord.units = '1' - i_coord.circular = False - - # Fix metadata of coordinate j - j_coord.var_name = 'j' - j_coord.standard_name = None - j_coord.long_name = 'cell index along second dimension' - j_coord.units = '1' - - # Fix points and bounds of index coordinates i and j - for idx_coord in (i_coord, j_coord): - idx_coord.points = np.arange(len(idx_coord.points)) - idx_coord.bounds = None - idx_coord.guess_bounds() - - # Calculate latitude/longitude vertices by interpolation. - # Following the CF conventions (see - # cfconventions.org/cf-conventions/cf-conventions.html#cell-boundaries) - # we go counter-clockwise around the cells and construct a grid of - # index values which are in turn used to interpolate longitudes and - # latitudes in the midpoints between the cell centers. + rlat = cube.coord('grid_latitude').points + rlon = cube.coord('grid_longitude').points + + # Guess coordinate bounds in rlat, rlon (following BCC-CSM2-MR-1). + rlat_idx_bnds = np.zeros((len(rlat), 2)) + rlat_idx_bnds[:, 0] = np.arange(len(rlat)) - 0.5 + rlat_idx_bnds[:, 1] = np.arange(len(rlat)) + 0.5 + rlat_idx_bnds[0, 0] = 0. + rlat_idx_bnds[len(rlat) - 1, 1] = len(rlat) + rlon_idx_bnds = np.zeros((len(rlon), 2)) + rlon_idx_bnds[:, 0] = np.arange(len(rlon)) - 0.5 + rlon_idx_bnds[:, 1] = np.arange(len(rlon)) + 0.5 + + # Calculate latitude/longitude vertices by interpolation lat_vertices = [] lon_vertices = [] - for (j, i) in [(0, 0), (0, 1), (1, 1), (1, 0)]: - (j_v, i_v) = np.meshgrid(j_coord.bounds[:, j], - i_coord.bounds[:, i], - indexing='ij') + for (i, j) in [(0, 0), (0, 1), (1, 1), (1, 0)]: + (rlat_v, rlon_v) = np.meshgrid(rlat_idx_bnds[:, i], + rlon_idx_bnds[:, j], + indexing='ij') lat_vertices.append( map_coordinates(cube.coord('latitude').points, - [j_v, i_v], + [rlat_v, rlon_v], mode='nearest')) lon_vertices.append( map_coordinates(cube.coord('longitude').points, - [j_v, i_v], + [rlat_v, rlon_v], mode='wrap')) lat_vertices = np.array(lat_vertices) lon_vertices = np.array(lon_vertices) @@ -193,15 +160,11 @@ def fix_metadata(self, cubes): # Copy vertices to cube cube.coord('latitude').bounds = lat_vertices cube.coord('longitude').bounds = lon_vertices - - return iris.cube.CubeList([cube]) - - -class SiconcFixScalarCoord(Fix): - """Fixes for siconc.""" + return cube def fix_metadata(self, cubes): - """Add typesi coordinate. + """ + Rename ``var_name`` of 1D-``latitude`` and 1D-``longitude``. Parameters ---------- @@ -211,8 +174,25 @@ def fix_metadata(self, cubes): Returns ------- iris.cube.CubeList - """ cube = self.get_cube_from_list(cubes) - add_scalar_typesi_coord(cube) - return iris.cube.CubeList([cube]) + lat_coord = cube.coord('cell index along second dimension', + dimensions=(1, )) + lon_coord = cube.coord('cell index along first dimension', + dimensions=(2, )) + lat_coord.standard_name = None + lat_coord.long_name = 'grid_latitude' + lat_coord.var_name = 'i' + lat_coord.units = '1' + lon_coord.standard_name = None + lon_coord.long_name = 'grid_longitude' + lon_coord.var_name = 'j' + lon_coord.units = '1' + lon_coord.circular = False + # FGOALS-g3 data contain latitude and longitude data set to + # >1e30 in some places. Set to 0. to avoid problem in check.py. + cube.coord('latitude').points[cube.coord('latitude').points > 1000.]\ + = 0. + cube.coord('longitude').points[cube.coord('longitude').points > 1000.]\ + = 0. + return cubes diff --git a/esmvalcore/cmor/_fixes/fix.py b/esmvalcore/cmor/_fixes/fix.py index 4595c3914f..12aee7b9a0 100644 --- a/esmvalcore/cmor/_fixes/fix.py +++ b/esmvalcore/cmor/_fixes/fix.py @@ -15,7 +15,7 @@ def __init__(self, vardef): Parameters ---------- - vardef: str + vardef: basestring CMOR table entry """ @@ -31,14 +31,14 @@ def fix_file(self, filepath, output_dir): Parameters ---------- - filepath: str + filepath: basestring file to fix - output_dir: str + output_dir: basestring path to the folder to store the fixe files, if required Returns ------- - str + basestring Path to the corrected file. It can be different from the original filepath if a fix has been applied, but if not it should be the original filepath @@ -162,7 +162,7 @@ def get_fixes(project, dataset, mip, short_name): classes = inspect.getmembers(fixes_module, inspect.isclass) classes = dict((name.lower(), value) for name, value in classes) - for fix_name in (short_name, mip.lower(), 'allvars'): + for fix_name in (short_name, 'allvars'): try: fixes.append(classes[fix_name](vardef)) except KeyError: diff --git a/esmvalcore/cmor/_fixes/shared.py b/esmvalcore/cmor/_fixes/shared.py index d5c74c095a..62b9c76c8b 100644 --- a/esmvalcore/cmor/_fixes/shared.py +++ b/esmvalcore/cmor/_fixes/shared.py @@ -361,21 +361,6 @@ def add_scalar_typesea_coord(cube, value='default'): return cube -def add_scalar_typesi_coord(cube, value='sea_ice'): - """Add scalar coordinate 'typesi' with value of `value`.""" - logger.debug("Adding typesi coordinate (%s)", value) - typesi_coord = iris.coords.AuxCoord(value, - var_name='type', - standard_name='area_type', - long_name='Sea Ice area type', - units=Unit('no unit')) - try: - cube.coord('area_type') - except iris.exceptions.CoordinateNotFoundError: - cube.add_aux_coord(typesi_coord, ()) - return cube - - def cube_to_aux_coord(cube): """Convert cube to iris AuxCoord.""" return iris.coords.AuxCoord( @@ -532,20 +517,3 @@ def round_coordinates(cubes, decimals=5, coord_names=None): coord.bounds = da.round(da.asarray(coord.core_bounds()), decimals) return cubes - - -def fix_ocean_depth_coord(cube): - """Fix attributes of ocean vertical level coordinate. - - Parameters - ---------- - cube : iris.cube.Cube - Input cube. - - """ - depth_coord = cube.coord(axis='Z') - depth_coord.standard_name = 'depth' - depth_coord.var_name = 'lev' - depth_coord.units = 'm' - depth_coord.long_name = 'ocean depth coordinate' - depth_coord.attributes = {'positive': 'down'} diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index 48a5113d35..131de15209 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -527,7 +527,9 @@ def _check_coord(self, cmor, coord, var_name): if not fixed: self.report_critical(self._attr_msg, var_name, 'units', cmor.units, coord.units) - self._check_coord_points(cmor, coord, var_name) + self._check_coord_values(cmor, coord, var_name) + self._check_coord_bounds(cmor, coord, var_name) + self._check_coord_monotonicity_and_direction(cmor, coord, var_name) def _check_coord_bounds(self, cmor, coord, var_name): if cmor.must_have_bounds == 'yes' and not coord.has_bounds(): @@ -603,19 +605,9 @@ def _reverse_coord(self, coord): if coord.ndim == 1: self._cube = iris.util.reverse(self._cube, self._cube.coord_dims(coord)) - reversed_coord = self._cube.coord(var_name=coord.var_name) - if reversed_coord.has_bounds(): - bounds = reversed_coord.bounds - right_bounds = bounds[:-2, 1] - left_bounds = bounds[1:-1, 0] - if np.all(right_bounds != left_bounds): - reversed_coord.bounds = np.fliplr(bounds) - coord = reversed_coord - self.report_debug_message(f'Coordinate {coord.var_name} values' - 'have been reversed.') - - def _check_coord_points(self, coord_info, coord, var_name): - """Check coordinate points: values, bounds and monotonicity.""" + + def _check_coord_values(self, coord_info, coord, var_name): + """Check coordinate values.""" # Check requested coordinate values exist in coord.points self._check_requested_values(coord, coord_info, var_name) @@ -663,10 +655,6 @@ def _check_coord_points(self, coord_info, coord, var_name): dims = self._cube.coord_dims(coord) self._cube.remove_coord(coord) self._cube.add_aux_coord(new_coord, dims) - coord = self._cube.coord(var_name=var_name) - self._check_coord_bounds(coord_info, coord, var_name) - self._check_coord_monotonicity_and_direction(coord_info, coord, - var_name) def _check_longitude_max(self, coord, var_name): if np.any(coord.points > 720): @@ -981,13 +969,13 @@ def cmor_check_metadata(cube, ---------- cube: iris.cube.Cube Data cube to check. - cmor_table: str + cmor_table: basestring CMOR definitions to use. mip: Variable's mip. - short_name: str + short_name: basestring Variable's short name. - frequency: str + frequency: basestring Data frequency. check_level: CheckLevels Level of strictness of the checks. @@ -1015,13 +1003,13 @@ def cmor_check_data(cube, ---------- cube: iris.cube.Cube Data cube to check. - cmor_table: str + cmor_table: basestring CMOR definitions to use. mip: Variable's mip. - short_name: str + short_name: basestring Variable's short name - frequency: str + frequency: basestring Data frequency check_level: CheckLevels Level of strictness of the checks. @@ -1045,13 +1033,13 @@ def cmor_check(cube, cmor_table, mip, short_name, frequency, check_level): ---------- cube: iris.cube.Cube Data cube to check. - cmor_table: str + cmor_table: basestring CMOR definitions to use. mip: Variable's mip. - short_name: str + short_name: basestring Variable's short name. - frequency: str + frequency: basestring Data frequency. check_level: enum.IntEnum Level of strictness of the checks. diff --git a/esmvalcore/cmor/table.py b/esmvalcore/cmor/table.py index 4c5bd2e1ff..a1fc3a5903 100644 --- a/esmvalcore/cmor/table.py +++ b/esmvalcore/cmor/table.py @@ -12,13 +12,12 @@ from collections import Counter from functools import total_ordering from pathlib import Path -from typing import Dict, Type import yaml logger = logging.getLogger(__name__) -CMOR_TABLES: Dict[str, Type['InfoBase']] = {} +CMOR_TABLES = {} """dict of str, obj: CMOR info objects.""" @@ -128,7 +127,7 @@ def get_table(self, table): Parameters ---------- - table: str + table: basestring Table name Returns @@ -144,9 +143,9 @@ def get_variable(self, table_name, short_name, derived=False): Parameters ---------- - table_name: str + table_name: basestring Table name - short_name: str + short_name: basestring Variable's short name derived: bool, optional Variable is derived. Info retrieval for derived variables always @@ -227,7 +226,7 @@ class CMIP6Info(InfoBase): Parameters ---------- - cmor_tables_path: str + cmor_tables_path: basestring Path to the folder containing the Tables folder with the json files default: object @@ -367,7 +366,7 @@ def get_table(self, table): Parameters ---------- - table: str + table: basestring Table name Returns @@ -616,7 +615,7 @@ class CMIP5Info(InfoBase): Parameters ---------- - cmor_tables_path: str + cmor_tables_path: basestring Path to the folder containing the Tables folder with the json files default: object @@ -757,7 +756,7 @@ def get_table(self, table): Parameters ---------- - table: str + table: basestring Table name Returns @@ -774,7 +773,7 @@ class CMIP3Info(CMIP5Info): Parameters ---------- - cmor_tables_path: str + cmor_tables_path: basestring Path to the folder containing the Tables folder with the json files default: object @@ -811,7 +810,7 @@ class CustomInfo(CMIP5Info): Parameters ---------- - cmor_tables_path: str or None + cmor_tables_path: basestring or None Full path to the table or name for the table if it is present in ESMValTool repository """ @@ -848,9 +847,9 @@ def get_variable(self, table, short_name, derived=False): Parameters ---------- - table: str + table: basestring Table name - short_name: str + short_name: basestring Variable's short name derived: bool, optional Variable is derived. Info retrieval for derived variables always diff --git a/esmvalcore/cmor/tables/custom/CMOR_tasaga.dat b/esmvalcore/cmor/tables/custom/CMOR_tasaga.dat deleted file mode 100644 index f62d34c5e4..0000000000 --- a/esmvalcore/cmor/tables/custom/CMOR_tasaga.dat +++ /dev/null @@ -1,25 +0,0 @@ -SOURCE: CMIP5 (adapted from tas) -!============ -variable_entry: tasaga -!============ -modeling_realm: atmos -!---------------------------------- -! Variable attributes: -!---------------------------------- -standard_name: -units: K -cell_methods: time: mean -cell_measures: area: areacella -long_name: Global-mean Near-Surface Air Temperature Anomaly -!---------------------------------- -! Additional variable information: -!---------------------------------- -dimensions: time -out_name: tasaga -type: real -valid_min: -20.0 -valid_max: 20.0 -ok_min_mean_abs: -20 -ok_max_mean_abs: 20. -!---------------------------------- -! diff --git a/esmvalcore/cmor/variable_alt_names.yml b/esmvalcore/cmor/variable_alt_names.yml index e0416797eb..787a20c99a 100644 --- a/esmvalcore/cmor/variable_alt_names.yml +++ b/esmvalcore/cmor/variable_alt_names.yml @@ -9,6 +9,7 @@ ############################################################################### --- - ['sic', 'siconc'] +- ['sit', 'sithick'] - ['tro3', 'o3'] - ['usi', 'siu'] -- ['vsi', 'siv'] +- ['vsi', 'siv'] \ No newline at end of file diff --git a/esmvalcore/config-developer.yml b/esmvalcore/config-developer.yml index 19b00482da..a0a242e6a1 100644 --- a/esmvalcore/config-developer.yml +++ b/esmvalcore/config-developer.yml @@ -25,7 +25,6 @@ CMIP6: BADC: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}' DKRZ: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}' ETHZ: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/{grid}/' - SYNDA: '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{latestversion}' input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc' output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}' cmor_type: 'CMIP6' @@ -41,7 +40,6 @@ CMIP5: SMHI: '{dataset}/{ensemble}/{exp}/{frequency}' RCAST: '{exp}/{mip}/{short_name}/{dataset}/{ensemble}/' BSC: '{type}/{project}/{exp}/{dataset.lower}' - SYNDA: '{institute}/{dataset}/{exp}/{frequency}/{modeling_realm}/{mip}/{ensemble}/{latestversion}' input_file: '{short_name}_{mip}_{dataset}_{exp}_{ensemble}*.nc' output_file: '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}' institutes: diff --git a/esmvalcore/experimental/_logging.py b/esmvalcore/experimental/_logging.py index 206376c9c0..02c64491d5 100644 --- a/esmvalcore/experimental/_logging.py +++ b/esmvalcore/experimental/_logging.py @@ -2,11 +2,10 @@ import logging from contextlib import contextmanager -from pathlib import Path @contextmanager -def log_to_dir(drc: Path): +def log_to_dir(drc: str) -> None: """Log messages to the specified directory. This is a context manager to temporarily redirect the logging when diff --git a/esmvalcore/experimental/_warnings.py b/esmvalcore/experimental/_warnings.py index b31eb78c20..9efa752e84 100644 --- a/esmvalcore/experimental/_warnings.py +++ b/esmvalcore/experimental/_warnings.py @@ -3,7 +3,12 @@ import warnings -def _warning_formatter(message, category, filename, lineno, line=None): +def _warning_formatter(message, + category, + filename, + lineno, + file=None, + line=None): """Patch warning formatting to not mention itself.""" return f'{filename}:{lineno}: {category.__name__}: {message}\n' diff --git a/esmvalcore/experimental/config/_config_object.py b/esmvalcore/experimental/config/_config_object.py index 7f836d694a..bea358f773 100644 --- a/esmvalcore/experimental/config/_config_object.py +++ b/esmvalcore/experimental/config/_config_object.py @@ -1,9 +1,7 @@ """Importable config object.""" -import os from datetime import datetime from pathlib import Path -from typing import Union import yaml @@ -30,9 +28,7 @@ class Config(ValidatedConfig): ) @classmethod - def _load_user_config(cls, - filename: Union[os.PathLike, str], - raise_exception: bool = True): + def _load_user_config(cls, filename: str, raise_exception: bool = True): """Load user configuration from the given file. The config is cleared and updated in-place. @@ -65,7 +61,7 @@ def _load_user_config(cls, return new @classmethod - def _load_default_config(cls, filename: Union[os.PathLike, str]): + def _load_default_config(cls, filename: str): """Load the default configuration.""" new = cls() @@ -74,7 +70,7 @@ def _load_default_config(cls, filename: Union[os.PathLike, str]): return new - def load_from_file(self, filename: Union[os.PathLike, str]): + def load_from_file(self, filename): """Load user configuration from the given file.""" path = Path(filename).expanduser() if not path.exists(): @@ -134,7 +130,7 @@ class Session(ValidatedConfig): def __init__(self, config: dict, name: str = 'session'): super().__init__(config) - self.session_name: Union[str, None] = None + self.session_name = None self.set_session_name(name) def set_session_name(self, name: str = 'session'): diff --git a/esmvalcore/experimental/config/_validated_config.py b/esmvalcore/experimental/config/_validated_config.py index bed3f5e3c5..e3a5f1d26f 100644 --- a/esmvalcore/experimental/config/_validated_config.py +++ b/esmvalcore/experimental/config/_validated_config.py @@ -3,7 +3,6 @@ import pprint import warnings from collections.abc import MutableMapping -from typing import Callable, Dict, Tuple from .._exceptions import SuppressedError from ._config_validators import ValidationError @@ -21,16 +20,15 @@ class MissingConfigParameter(UserWarning): # fit the needs of ESMValCore. Matplotlib is licenced under the terms of # the the 'Python Software Foundation License' # (https://www.python.org/psf/license) -class ValidatedConfig(MutableMapping): +class ValidatedConfig(MutableMapping, dict): """Based on `matplotlib.rcParams`.""" - _validate: Dict[str, Callable] = {} - _warn_if_missing: Tuple[Tuple[str, str], ...] = () + _validate = {} + _warn_if_missing = () # validate values on the way in def __init__(self, *args, **kwargs): super().__init__() - self._mapping = {} self.update(*args, **kwargs) def __setitem__(self, key, val): @@ -43,37 +41,36 @@ def __setitem__(self, key, val): raise InvalidConfigParameter( f"`{key}` is not a valid config parameter.") from None - self._mapping[key] = cval + dict.__setitem__(self, key, cval) def __getitem__(self, key): """Return value mapped by key.""" - return self._mapping[key] + return dict.__getitem__(self, key) def __repr__(self): """Return canonical string representation.""" class_name = self.__class__.__name__ indent = len(class_name) + 1 - repr_split = pprint.pformat(self._mapping, indent=1, + repr_split = pprint.pformat(dict(self), indent=1, width=80 - indent).split('\n') repr_indented = ('\n' + ' ' * indent).join(repr_split) return '{}({})'.format(class_name, repr_indented) def __str__(self): """Return string representation.""" - return '\n'.join( - map('{0[0]}: {0[1]}'.format, sorted(self._mapping.items()))) + return '\n'.join(map('{0[0]}: {0[1]}'.format, sorted(self.items()))) def __iter__(self): """Yield sorted list of keys.""" - yield from sorted(self._mapping) + yield from sorted(dict.__iter__(self)) def __len__(self): """Return number of config keys.""" - return len(self._mapping) + return dict.__len__(self) def __delitem__(self, key): """Delete key/value from config.""" - del self._mapping[key] + dict.__delitem__(self, key) def check_missing(self): """Check and warn for missing variables.""" @@ -85,8 +82,8 @@ def check_missing(self): def copy(self): """Copy the keys/values of this object to a dict.""" - return {k: self._mapping[k] for k in self} + return {k: dict.__getitem__(self, k) for k in self} def clear(self): """Clear Config.""" - self._mapping.clear(self) + dict.clear(self) diff --git a/esmvalcore/experimental/recipe.py b/esmvalcore/experimental/recipe.py index c237164ed6..3cca24801c 100644 --- a/esmvalcore/experimental/recipe.py +++ b/esmvalcore/experimental/recipe.py @@ -1,16 +1,13 @@ """Recipe metadata.""" import logging -import os import pprint import shutil from pathlib import Path -from typing import Dict, Optional import yaml from esmvalcore._recipe import Recipe as RecipeEngine -from esmvalcore.experimental.config import Session from . import CFG from ._logging import log_to_dir @@ -31,14 +28,14 @@ class Recipe(): Path to the recipe. """ - def __init__(self, path: os.PathLike): + def __init__(self, path: str): self.path = Path(path) if not self.path.exists(): raise FileNotFoundError(f'Cannot find recipe: `{path}`.') - self._engine: Optional[RecipeEngine] = None - self._data: Optional[Dict] = None - self.last_session: Optional[Session] = None + self._engine = None + self._data = None + self.last_session = None self.info = RecipeInfo(self.data, filename=self.path.name) def __repr__(self) -> str: @@ -74,7 +71,7 @@ def data(self) -> dict: self._data = yaml.safe_load(open(self.path, 'r')) return self._data - def _load(self, session: Session) -> RecipeEngine: + def _load(self, session: dict): """Load the recipe. This method loads the recipe into the internal ESMValCore Recipe @@ -96,11 +93,11 @@ def _load(self, session: Session) -> RecipeEngine: logger.info(pprint.pformat(config_user)) - return RecipeEngine(raw_recipe=self.data, - config_user=config_user, - recipe_file=self.path) + self._engine = RecipeEngine(raw_recipe=self.data, + config_user=config_user, + recipe_file=self.path) - def run(self, task: str = None, session: Session = None): + def run(self, task: str = None, session: dict = None): """Run the recipe. This function loads the recipe into the ESMValCore recipe format @@ -122,7 +119,7 @@ def run(self, task: str = None, session: Session = None): Returns output of the recipe as instances of :obj:`OutputItem` grouped by diagnostic task. """ - if session is None: + if not session: session = CFG.start_session(self.path.stem) self.last_session = session @@ -131,7 +128,7 @@ def run(self, task: str = None, session: Session = None): session['diagnostics'] = task with log_to_dir(session.run_dir): - self._engine = self._load(session=session) + self._load(session=session) self._engine.run() shutil.copy2(self.path, session.run_dir) @@ -141,7 +138,7 @@ def run(self, task: str = None, session: Session = None): return output - def get_output(self) -> RecipeOutput: + def get_output(self) -> dict: """Get output from recipe. Returns @@ -150,7 +147,7 @@ def get_output(self) -> RecipeOutput: Returns output of the recipe as instances of :obj:`OutputFile` grouped by diagnostic task. """ - if self._engine is None: + if not self._engine: raise AttributeError('Run the recipe first using `.run()`.') output = self._engine.get_output() diff --git a/esmvalcore/experimental/recipe_info.py b/esmvalcore/experimental/recipe_info.py index 2fe0a9dc38..04aab5ebd8 100644 --- a/esmvalcore/experimental/recipe_info.py +++ b/esmvalcore/experimental/recipe_info.py @@ -1,8 +1,6 @@ """Handles recipe metadata (under 'documentation' section).""" -import os import textwrap from pathlib import Path -from typing import Optional, Tuple, Union import yaml @@ -21,14 +19,14 @@ class RecipeInfo(): Name of recipe file """ - def __init__(self, data, filename: Union[os.PathLike, str]): + def __init__(self, data, filename: str = None): self.filename = Path(filename).name self.data = data - self._authors: Optional[Tuple[Contributor, ...]] = None - self._maintainers: Optional[Tuple[Contributor, ...]] = None - self._projects: Optional[Tuple[Project, ...]] = None - self._references: Optional[Tuple[Reference, ...]] = None - self._description: Optional[str] = None + self._authors = None + self._maintainers = None + self._projects = None + self._references = None + self._description = None def __repr__(self) -> str: """Return canonical string representation.""" diff --git a/esmvalcore/experimental/recipe_metadata.py b/esmvalcore/experimental/recipe_metadata.py index da3a66e9c1..1beb96f9cb 100644 --- a/esmvalcore/experimental/recipe_metadata.py +++ b/esmvalcore/experimental/recipe_metadata.py @@ -180,11 +180,10 @@ def render(self, renderer: str = 'html') -> str: """ style = 'plain' # alpha, plain, unsrt, unsrtalpha backend = pybtex.plugin.find_plugin('pybtex.backends', renderer)() - formatter = pybtex.plugin.find_plugin('pybtex.style.formatting', - style)() + style = pybtex.plugin.find_plugin('pybtex.style.formatting', style)() try: - formatter = formatter.format_entry(self._key, self._entry) + formatter = style.format_entry(self._key, self._entry) rendered = formatter.text.render(backend) except Exception as err: raise RenderError( diff --git a/esmvalcore/experimental/recipe_output.py b/esmvalcore/experimental/recipe_output.py index 4e31361075..45b29be78f 100644 --- a/esmvalcore/experimental/recipe_output.py +++ b/esmvalcore/experimental/recipe_output.py @@ -4,7 +4,6 @@ import logging from collections.abc import Mapping from pathlib import Path -from typing import Optional, Tuple, Type import iris @@ -48,9 +47,9 @@ def __len__(self): """Return number of files.""" return len(self.files) - def __getitem__(self, index: int): - """Get item indexed by `index`.""" - return self.files[index] + def __getitem__(self, key: str): + """Get item indexed by `key`.""" + return self.files[key] @property def image_files(self) -> tuple: @@ -69,7 +68,7 @@ def from_task(cls, task) -> 'TaskOutput': Where task is an instance of `esmvalcore._task.BaseTask`. """ product_attributes = task.get_product_attributes() - return cls(name=task.name, files=product_attributes) + return cls(name=task.name, output=product_attributes) class RecipeOutput(Mapping): @@ -189,7 +188,7 @@ class OutputFile(): Attributes corresponding to the recipe output """ - kind: Optional[str] = None + kind = None def __init__(self, path: str, attributes: dict = None): if not attributes: @@ -198,8 +197,8 @@ def __init__(self, path: str, attributes: dict = None): self.attributes = attributes self.path = Path(path) - self._authors: Optional[Tuple[Contributor, ...]] = None - self._references: Optional[Tuple[Reference, ...]] = None + self._authors = None + self._references = None def __repr__(self): """Return canonical string representation.""" @@ -266,13 +265,11 @@ def provenance_xml_file(self): return self._get_derived_path('_provenance', '.xml') @classmethod - def create(cls, path: str, attributes: dict = None) -> 'OutputFile': + def create(cls, path: str, attributes: dict = None): """Construct new instances of OutputFile. Chooses a derived class if suitable. """ - item_class: Type[OutputFile] - ext = Path(path).suffix if ext in ('.png', ): item_class = ImageFile diff --git a/esmvalcore/experimental/templates/__init__.py b/esmvalcore/experimental/templates/__init__.py index e0f38b93e0..22bef2fa98 100644 --- a/esmvalcore/experimental/templates/__init__.py +++ b/esmvalcore/experimental/templates/__init__.py @@ -3,8 +3,7 @@ from jinja2 import Environment, FileSystemLoader -TEMPLATE_DIR = str(Path(__file__).parent) -file_loader = FileSystemLoader(TEMPLATE_DIR) +file_loader = FileSystemLoader(Path(__file__).parent) environment = Environment(loader=file_loader, autoescape=True) get_template = environment.get_template diff --git a/esmvalcore/experimental/utils.py b/esmvalcore/experimental/utils.py index a28e163267..0a7292e184 100644 --- a/esmvalcore/experimental/utils.py +++ b/esmvalcore/experimental/utils.py @@ -1,9 +1,7 @@ """ESMValCore utilities.""" -import os import re from pathlib import Path -from typing import Pattern, Tuple, Union from esmvalcore._config import DIAGNOSTICS @@ -12,7 +10,7 @@ class RecipeList(list): """Container for recipes.""" - def find(self, query: Pattern[str]): + def find(self, query: str): """Search for recipes matching the search query or pattern. Searches in the description, authors and project information fields. @@ -20,7 +18,7 @@ def find(self, query: Pattern[str]): Parameters ---------- - query : str, Pattern + query : str String to search for, e.g. ``find_recipes('righi')`` will return all matching that author. Can be a `regex` pattern. @@ -55,14 +53,14 @@ def get_all_recipes(subdir: str = None) -> list: RecipeList List of available recipes """ - if subdir is None: + if not subdir: subdir = '**' rootdir = DIAGNOSTICS.recipes files = rootdir.glob(f'{subdir}/*.yml') return RecipeList(Recipe(file) for file in files) -def get_recipe(name: Union[os.PathLike, str]) -> Recipe: +def get_recipe(name: str) -> 'Recipe': """Get a recipe by its name. The function looks first in the local directory, and second in the @@ -85,14 +83,12 @@ def get_recipe(name: Union[os.PathLike, str]) -> Recipe: FileNotFoundError If the name cannot be resolved to a recipe file. """ - filenames: Tuple[Union[str, os.PathLike], ...] - locations = Path(), DIAGNOSTICS.recipes - if isinstance(name, str): - filenames = (name, name + '.yml') - else: + if isinstance(name, Path): filenames = (name, ) + else: + filenames = (name, name + '.yml') for location in locations: for filename in filenames: diff --git a/esmvalcore/preprocessor/__init__.py b/esmvalcore/preprocessor/__init__.py index 9fe86d140e..e9747b2fc1 100644 --- a/esmvalcore/preprocessor/__init__.py +++ b/esmvalcore/preprocessor/__init__.py @@ -10,7 +10,6 @@ from .._task import BaseTask from ..cmor.check import cmor_check_data, cmor_check_metadata from ..cmor.fix import fix_data, fix_file, fix_metadata -from ._ancillary_vars import add_fx_variables, remove_fx_variables from ._area import ( area_statistics, extract_named_regions, @@ -94,8 +93,6 @@ # Data reformatting/CMORization 'fix_data', 'cmor_check_data', - # Load fx_variables in cube - 'add_fx_variables', # Time extraction (as defined in the preprocessor section) 'extract_time', 'extract_season', @@ -159,8 +156,6 @@ 'linear_trend', 'linear_trend_stderr', 'convert_units', - # Remove fx_variables from cube - 'remove_fx_variables', # Save to file 'save', 'cleanup', @@ -184,8 +179,8 @@ DEFAULT_ORDER = tuple(__all__) # The order of initial and final steps cannot be configured -INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('add_fx_variables') + 1] -FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('remove_fx_variables'):] +INITIAL_STEPS = DEFAULT_ORDER[:DEFAULT_ORDER.index('cmor_check_data') + 1] +FINAL_STEPS = DEFAULT_ORDER[DEFAULT_ORDER.index('save'):] MULTI_MODEL_FUNCTIONS = { 'multi_model_statistics', diff --git a/esmvalcore/preprocessor/_ancillary_vars.py b/esmvalcore/preprocessor/_ancillary_vars.py deleted file mode 100644 index 53a53d529a..0000000000 --- a/esmvalcore/preprocessor/_ancillary_vars.py +++ /dev/null @@ -1,214 +0,0 @@ -"""Preprocessor functions for ancillary variables and cell measures.""" - -import logging -import iris - -import dask.array as da - -from esmvalcore.preprocessor._io import load, concatenate_callback, concatenate -from esmvalcore.cmor.fix import fix_metadata, fix_data -from esmvalcore.cmor.check import cmor_check_metadata, cmor_check_data - -logger = logging.getLogger(__name__) - - -def _load_fx(var_cube, fx_info, check_level): - """Load and CMOR-check fx variables.""" - fx_cubes = iris.cube.CubeList() - - for fx_file in fx_info['filename']: - loaded_cube = load(fx_file, callback=concatenate_callback) - short_name = fx_info['short_name'] - project = fx_info['project'] - dataset = fx_info['dataset'] - mip = fx_info['mip'] - freq = fx_info['frequency'] - loaded_cube = fix_metadata(loaded_cube, short_name=short_name, - project=project, dataset=dataset, - mip=mip, frequency=freq, - check_level=check_level) - fx_cubes.append(loaded_cube[0]) - - fx_cube = concatenate(fx_cubes) - - if not _is_fx_broadcastable(fx_cube, var_cube): - return None - - fx_cube = cmor_check_metadata(fx_cube, cmor_table=project, mip=mip, - short_name=short_name, frequency=freq, - check_level=check_level) - - fx_cube = fix_data(fx_cube, short_name=short_name, project=project, - dataset=dataset, mip=mip, frequency=freq, - check_level=check_level) - - fx_cube = cmor_check_data(fx_cube, cmor_table=project, mip=mip, - short_name=fx_cube.var_name, frequency=freq, - check_level=check_level) - - return fx_cube - - -def _is_fx_broadcastable(fx_cube, cube): - try: - da.broadcast_to(fx_cube.core_data(), cube.shape) - except ValueError as exc: - logger.debug("Dimensions of %s and %s cubes do not match. " - "Discarding use of fx_variable: %s", - cube.var_name, fx_cube.var_name, exc) - return False - return True - - -def add_cell_measure(cube, fx_cube, measure): - """ - Broadcast fx_cube and add it as a cell_measure in - the cube containing the data. - - Parameters - ---------- - cube: iris.cube.Cube - Iris cube with input data. - fx_cube: iris.cube.Cube - Iris cube with fx data. - measure: str - Name of the measure, can be 'area' or 'volume'. - - Returns - ------- - iris.cube.Cube - Cube with added ancillary variables - - Raises - ------ - ValueError - If measure name is not 'area' or 'volume'. - """ - if measure not in ['area', 'volume']: - raise ValueError(f"measure name must be 'area' or 'volume', " - f"got {measure} instead") - try: - fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) - except ValueError: - logger.debug("Dimensions of %s and %s cubes do not match. " - "Cannot broadcast cubes.", - cube.var_name, fx_cube.var_name) - return - measure = iris.coords.CellMeasure( - fx_data, - standard_name=fx_cube.standard_name, - units=fx_cube.units, - measure=measure, - var_name=fx_cube.var_name, - attributes=fx_cube.attributes) - cube.add_cell_measure(measure, range(0, measure.ndim)) - logger.debug('Added %s as cell measure in cube of %s.', - fx_cube.var_name, cube.var_name) - - -def add_ancillary_variable(cube, fx_cube): - """ - Broadcast fx_cube and add it as an ancillary_variable in - the cube containing the data. - - Parameters - ---------- - cube: iris.cube.Cube - Iris cube with input data. - fx_cube: iris.cube.Cube - Iris cube with fx data. - - Returns - ------- - iris.cube.Cube - Cube with added ancillary variables - """ - try: - fx_data = da.broadcast_to(fx_cube.core_data(), cube.shape) - except ValueError: - logger.debug("Dimensions of %s and %s cubes do not match. " - "Cannot broadcast cubes.", - cube.var_name, fx_cube.var_name) - return - ancillary_var = iris.coords.AncillaryVariable( - fx_data, - standard_name=fx_cube.standard_name, - units=fx_cube.units, - var_name=fx_cube.var_name, - attributes=fx_cube.attributes) - cube.add_ancillary_variable(ancillary_var, range(0, ancillary_var.ndim)) - logger.debug('Added %s as ancillary variable in cube of %s.', - fx_cube.var_name, cube.var_name) - - -def add_fx_variables(cube, fx_variables, check_level): - """ - Load requested fx files, check with CMOR standards and add the - fx variables as cell measures or ancillary variables in - the cube containing the data. - - Parameters - ---------- - cube: iris.cube.Cube - Iris cube with input data. - fx_variables: dict - Dictionary with fx_variable information. - check_level: CheckLevels - Level of strictness of the checks. - - - Returns - ------- - iris.cube.Cube - Cube with added cell measures or ancillary variables. - """ - - if not fx_variables: - return cube - for fx_info in fx_variables.values(): - if not fx_info: - continue - if isinstance(fx_info['filename'], str): - fx_info['filename'] = [fx_info['filename']] - fx_cube = _load_fx(cube, fx_info, check_level) - - if fx_cube is None: - continue - - measure_name = { - 'areacella': 'area', - 'areacello': 'area', - 'volcello': 'volume' - } - - if fx_cube.var_name in measure_name: - add_cell_measure(cube, fx_cube, measure_name[fx_cube.var_name]) - else: - add_ancillary_variable(cube, fx_cube) - return cube - - -def remove_fx_variables(cube): - """ - Remove fx variables present as cell measures or ancillary variables in - the cube containing the data. - - Parameters - ---------- - cube: iris.cube.Cube - Iris cube with data and cell measures or ancillary variables. - - - Returns - ------- - iris.cube.Cube - Cube without cell measures or ancillary variables. - """ - - if cube.cell_measures(): - for measure in cube.cell_measures(): - cube.remove_cell_measure(measure.standard_name) - if cube.ancillary_variables(): - for variable in cube.ancillary_variables(): - cube.remove_ancillary_variable(variable.standard_name) - return cube diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 446c8d94a0..550747e2ec 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -22,12 +22,14 @@ logger = logging.getLogger(__name__) +# slice cube over a restricted area (box) def extract_region(cube, start_longitude, end_longitude, start_latitude, end_latitude): """Extract a region from a cube. Function that subsets a cube on a box (start_longitude, end_longitude, start_latitude, end_latitude) + This function is a restriction of masked_cube_lonlat(). Parameters ---------- @@ -61,29 +63,16 @@ def extract_region(cube, start_longitude, end_longitude, start_latitude, ignore_bounds=True, ) region_subset = region_subset.intersection(longitude=(0., 360.)) - else: - region_subset = _extract_irregular_region( - cube, - start_longitude, - end_longitude, - start_latitude, - end_latitude, - ) - return region_subset - - -def _extract_irregular_region(cube, start_longitude, end_longitude, - start_latitude, end_latitude): - """Extract a region from a cube on an irregular grid.""" + return region_subset + # Irregular grids + lats = cube.coord('latitude').points + lons = cube.coord('longitude').points # Convert longitudes to valid range if start_longitude != 360.: start_longitude %= 360. if end_longitude != 360.: end_longitude %= 360. - # Select coordinates inside the region - lats = cube.coord('latitude').points - lons = (cube.coord('longitude').points + 360.) % 360. if start_longitude <= end_longitude: select_lons = (lons >= start_longitude) & (lons <= end_longitude) else: @@ -95,19 +84,8 @@ def _extract_irregular_region(cube, start_longitude, end_longitude, select_lats = (lats >= start_latitude) | (lats <= end_latitude) selection = select_lats & select_lons - - # Crop the selection, but keep rectangular shape - i_range, j_range = selection.nonzero() - if i_range.size == 0: - raise ValueError("No data points available in selected region") - i_min, i_max = i_range.min(), i_range.max() - j_min, j_max = j_range.min(), j_range.max() - i_slice, j_slice = slice(i_min, i_max + 1), slice(j_min, j_max + 1) - cube = cube[..., i_slice, j_slice] - selection = selection[i_slice, j_slice] - # Mask remaining coordinates outside region - mask = da.broadcast_to(~selection, cube.shape) - cube.data = da.ma.masked_where(mask, cube.core_data()) + selection = da.broadcast_to(selection, cube.shape) + cube.data = da.ma.masked_where(~selection, cube.core_data()) return cube @@ -177,7 +155,46 @@ def meridional_statistics(cube, operator): raise ValueError(msg) -def area_statistics(cube, operator): +def tile_grid_areas(cube, fx_files): + """Tile the grid area data to match the dataset cube. + + Parameters + ---------- + cube: iris.cube.Cube + input cube. + fx_files: dict + dictionary of field:filename for the fx_files + + Returns + ------- + iris.cube.Cube + Freshly tiled grid areas cube. + """ + grid_areas = None + if fx_files: + for key, fx_file in fx_files.items(): + if not fx_file: + continue + logger.info('Attempting to load %s from file: %s', key, fx_file) + fx_cube = iris.load_cube(fx_file) + + grid_areas = fx_cube.core_data() + if cube.ndim == 4 and grid_areas.ndim == 2: + grid_areas = da.tile(grid_areas, + [cube.shape[0], cube.shape[1], 1, 1]) + elif cube.ndim == 4 and grid_areas.ndim == 3: + grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1, 1]) + elif cube.ndim == 3 and grid_areas.ndim == 2: + grid_areas = da.tile(grid_areas, [cube.shape[0], 1, 1]) + else: + raise ValueError('Grid and dataset number of dimensions not ' + 'recognised: {} and {}.' + ''.format(cube.ndim, grid_areas.ndim)) + return grid_areas + + +# get the area average +def area_statistics(cube, operator, fx_variables=None): """Apply a statistical operator in the horizontal direction. The average in the horizontal direction. We assume that the @@ -214,6 +231,8 @@ def area_statistics(cube, operator): operator: str The operation, options: mean, median, min, max, std_dev, sum, variance, rms. + fx_variables: dict + dictionary of field:filename for the fx_variables Returns ------- @@ -227,17 +246,9 @@ def area_statistics(cube, operator): ValueError if input data cube has different shape than grid area weights """ - grid_areas = None - try: - grid_areas = cube.cell_measure('cell_area').core_data() - except iris.exceptions.CellMeasureNotFoundError: - logger.info( - 'Cell measure "cell_area" not found in cube %s. ' - 'Check fx_file availability.', cube.summary(shorten=True) - ) - logger.info('Attempting to calculate grid cell area...') + grid_areas = tile_grid_areas(cube, fx_variables) - if grid_areas is None and cube.coord('latitude').points.ndim == 2: + if not fx_variables and cube.coord('latitude').points.ndim == 2: coord_names = [coord.standard_name for coord in cube.coords()] if 'grid_latitude' in coord_names and 'grid_longitude' in coord_names: cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) @@ -256,7 +267,7 @@ def area_statistics(cube, operator): cube.coord('latitude')) coord_names = ['longitude', 'latitude'] - if grid_areas is None: + if grid_areas is None or not grid_areas.any(): cube = guess_bounds(cube, coord_names) grid_areas = iris.analysis.cartography.area_weights(cube) logger.info('Calculated grid area shape: %s', grid_areas.shape) @@ -408,7 +419,7 @@ def _get_masks_from_geometries(geometries, lon, lat, method='contains', if ids: ids = [str(id_) for id_ in ids] for i, item in enumerate(geometries): - for id_prop in ('name', 'NAME', 'Name', 'id', 'ID'): + for id_prop in ('name', 'NAME', 'id', 'ID'): if id_prop in item['properties']: id_ = str(item['properties'][id_prop]) break diff --git a/esmvalcore/preprocessor/_derive/rlus.py b/esmvalcore/preprocessor/_derive/rlus.py deleted file mode 100644 index 4d536c29ad..0000000000 --- a/esmvalcore/preprocessor/_derive/rlus.py +++ /dev/null @@ -1,49 +0,0 @@ -"""Derivation of variable `rlus`. - -authors: - - lukas_brunner - -""" -from iris import Constraint - -from ._baseclass import DerivedVariableBase - - -class DerivedVariable(DerivedVariableBase): - """Derivation of variable `rlus`.""" - - @staticmethod - def required(project): - """Declare the variables needed for derivation.""" - required = [ - { - 'short_name': 'rlds' - }, - { - 'short_name': 'rlns' - }, - ] - return required - - @staticmethod - def calculate(cubes): - """Compute upwelling longwave flux from downwelling and net.""" - rlds_cube = cubes.extract_cube( - Constraint(name='surface_downwelling_longwave_flux_in_air')) - rlns_cube = cubes.extract_cube( - Constraint(name='surface_net_downward_longwave_flux')) - # fix latitude and longitude var_name - rlns_cube.coord(axis='X').long_name = rlds_cube.coord( - axis='X').long_name - rlns_cube.coord(axis='Y').long_name = rlds_cube.coord( - axis='Y').long_name - rlns_cube.coord(axis='X').var_name = rlds_cube.coord( - axis='X').var_name - rlns_cube.coord(axis='Y').var_name = rlds_cube.coord( - axis='Y').var_name - - rlus_cube = rlds_cube - rlns_cube - - rlus_cube.attributes['positive'] = 'up' - - return rlus_cube diff --git a/esmvalcore/preprocessor/_derive/rsus.py b/esmvalcore/preprocessor/_derive/rsus.py deleted file mode 100644 index 326d063c26..0000000000 --- a/esmvalcore/preprocessor/_derive/rsus.py +++ /dev/null @@ -1,49 +0,0 @@ -"""Derivation of variable `rsus`. - -authors: - - lukas_brunner - -""" -from iris import Constraint - -from ._baseclass import DerivedVariableBase - - -class DerivedVariable(DerivedVariableBase): - """Derivation of variable `rsus`.""" - - @staticmethod - def required(project): - """Declare the variables needed for derivation.""" - required = [ - { - 'short_name': 'rsds' - }, - { - 'short_name': 'rsns' - }, - ] - return required - - @staticmethod - def calculate(cubes): - """Compute upwelling shortwave flux from downwelling and net.""" - rsds_cube = cubes.extract_cube( - Constraint(name='surface_downwelling_shortwave_flux_in_air')) - rsns_cube = cubes.extract_cube( - Constraint(name='surface_net_downward_shortwave_flux')) - # fix latitude and longitude var_name - rsns_cube.coord(axis='X').long_name = rsds_cube.coord( - axis='X').long_name - rsns_cube.coord(axis='Y').long_name = rsds_cube.coord( - axis='Y').long_name - rsns_cube.coord(axis='X').var_name = rsds_cube.coord( - axis='X').var_name - rsns_cube.coord(axis='Y').var_name = rsds_cube.coord( - axis='Y').var_name - - rsus_cube = rsds_cube - rsns_cube - - rsus_cube.attributes['positive'] = 'up' - - return rsus_cube diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 938e4b6f2e..f508f0fa4f 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -173,8 +173,6 @@ def _get_concatenation_error(cubes): def concatenate(cubes): """Concatenate all cubes after fixing metadata.""" - if not cubes: - return cubes if len(cubes) == 1: return cubes[0] @@ -229,15 +227,7 @@ def save(cubes, filename, optimize_access='', compress=False, alias='', str filename - Raises - ------ - ValueError - cubes is empty. - """ - if not cubes: - raise ValueError(f"Cannot save empty cubes '{cubes}'") - # Rename some arguments kwargs['target'] = filename kwargs['zlib'] = compress diff --git a/esmvalcore/preprocessor/_mask.py b/esmvalcore/preprocessor/_mask.py index db328f97e3..08ba463275 100644 --- a/esmvalcore/preprocessor/_mask.py +++ b/esmvalcore/preprocessor/_mask.py @@ -2,7 +2,7 @@ Mask module. Module that performs a number of masking -operations that include: masking with ancillary variables, masking with +operations that include: masking with fx files, masking with Natural Earth shapefiles (land or ocean), masking on thresholds, missing values masking. """ @@ -21,6 +21,28 @@ logger = logging.getLogger(__name__) +def _check_dims(cube, mask_cube): + """Check for same ndim and x-y dimensions for data and mask cubes.""" + x_dim = cube.coord('longitude').points.ndim + y_dim = cube.coord('latitude').points.ndim + mx_dim = mask_cube.coord('longitude').points.ndim + my_dim = mask_cube.coord('latitude').points.ndim + len_x = len(cube.coord('longitude').points) + len_y = len(cube.coord('latitude').points) + len_mx = len(mask_cube.coord('longitude').points) + len_my = len(mask_cube.coord('latitude').points) + if (x_dim == mx_dim and y_dim == my_dim and len_x == len_mx + and len_y == len_my): + logger.debug('Data cube and fx mask have same dims') + return True + + logger.debug( + 'Data cube and fx mask differ in dims: ' + 'cube: ((%i, %i), grid=(%i, %i)), mask: ((%i, %i), grid=(%i, %i))', + x_dim, y_dim, len_x, len_y, mx_dim, my_dim, len_mx, len_my) + return False + + def _get_fx_mask(fx_data, fx_option, mask_type): """Build a percentage-thresholded mask from an fx file.""" inmask = np.zeros_like(fx_data, bool) @@ -51,32 +73,37 @@ def _get_fx_mask(fx_data, fx_option, mask_type): def _apply_fx_mask(fx_mask, var_data): """Apply the fx data extracted mask on the actual processed data.""" + # Broadcast mask + var_mask = np.zeros_like(var_data, bool) + var_mask = np.broadcast_to(fx_mask, var_mask.shape).copy() + # Apply mask across if np.ma.is_masked(var_data): - fx_mask |= var_data.mask + var_mask |= var_data.mask # Build the new masked data - var_data = np.ma.array(var_data, mask=fx_mask, fill_value=1e+20) + var_data = np.ma.array(var_data, mask=var_mask, fill_value=1e+20) return var_data -def mask_landsea(cube, mask_out, always_use_ne_mask=False): +def mask_landsea(cube, fx_variables, mask_out, always_use_ne_mask=False): """ Mask out either land mass or sea (oceans, seas and lakes). - It uses dedicated ancillary variables (sftlf or sftof) or, - in their absence, it applies a - Natural Earth mask (land or ocean contours). - Note that the Natural Earth masks have different resolutions: - 10m for land, and 50m for seas. - These are more than enough for ESMValTool purposes. + It uses dedicated fx files (sftlf or sftof) or, in their absence, it + applies a Natural Earth mask (land or ocean contours). Note that the + Natural Earth masks have different resolutions: 10m for land, and 50m + for seas; these are more than enough for ESMValTool puprpose. Parameters ---------- cube: iris.cube.Cube data cube to be masked. + fx_variables: dict + dict: keys: fx variables, values: full paths to fx files. + mask_out: str either "land" to mask out land mass or "sea" to mask out seas. @@ -105,24 +132,30 @@ def mask_landsea(cube, mask_out, always_use_ne_mask=False): 'sea': os.path.join(cwd, 'ne_masks/ne_50m_ocean.shp') } - if not always_use_ne_mask: - # preserve importance order: try stflf first then sftof - fx_cube = None - try: - fx_cube = cube.ancillary_variable('land_area_fraction') - except iris.exceptions.AncillaryVariableNotFoundError: - try: - fx_cube = cube.ancillary_variable('sea_area_fraction') - except iris.exceptions.AncillaryVariableNotFoundError: - logger.debug( - 'Ancillary variables land/sea area fraction ' - 'not found in cube. Check fx_file availability.') + fx_files = fx_variables.values() + if any(fx_files) and not always_use_ne_mask: + fx_cubes = {} + for fx_file in fx_files: + if not fx_file: + continue + fxfile_members = os.path.basename(fx_file).split('_') + for fx_root in ['sftlf', 'sftof']: + if fx_root in fxfile_members: + fx_cubes[fx_root] = iris.load_cube(fx_file) - if fx_cube: - landsea_mask = _get_fx_mask( - fx_cube.data, mask_out, fx_cube.var_name) + # preserve importance order: try stflf first then sftof + if ('sftlf' in fx_cubes.keys() + and _check_dims(cube, fx_cubes['sftlf'])): + landsea_mask = _get_fx_mask(fx_cubes['sftlf'].data, mask_out, + 'sftlf') cube.data = _apply_fx_mask(landsea_mask, cube.data) - logger.debug("Applying land-sea mask: %s", fx_cube.var_name) + logger.debug("Applying land-sea mask: sftlf") + elif ('sftof' in fx_cubes.keys() + and _check_dims(cube, fx_cubes['sftof'])): + landsea_mask = _get_fx_mask(fx_cubes['sftof'].data, mask_out, + 'sftof') + cube.data = _apply_fx_mask(landsea_mask, cube.data) + logger.debug("Applying land-sea mask: sftof") else: if cube.coord('longitude').points.ndim < 2: cube = _mask_with_shp(cube, shapefiles[mask_out], [ @@ -151,20 +184,21 @@ def mask_landsea(cube, mask_out, always_use_ne_mask=False): return cube -def mask_landseaice(cube, mask_out): +def mask_landseaice(cube, fx_variables, mask_out): """ Mask out either landsea (combined) or ice. Function that masks out either landsea (land and seas) or ice (Antarctica - and Greenland and some wee glaciers). - - It uses dedicated ancillary variables (sftgif). + and Greenland and some wee glaciers). It uses dedicated fx files (sftgif). Parameters ---------- cube: iris.cube.Cube data cube to be masked. + fx_variables: dict + dict: keys: fx variables, values: full paths to fx files. + mask_out: str either "landsea" to mask out landsea or "ice" to mask out ice. @@ -176,20 +210,26 @@ def mask_landseaice(cube, mask_out): Raises ------ ValueError - Error raised if landsea-ice mask not found as an ancillary variable. + Error raised if fx mask and data have different dimensions. + ValueError + Error raised if fx files list is empty. """ # sftgif is the only one so far but users can set others - fx_cube = None - try: - fx_cube = cube.ancillary_variable('land_ice_area_fraction') - except iris.exceptions.AncillaryVariableNotFoundError: - logger.debug('Ancillary variable land ice area fraction ' - 'not found in cube. Check fx_file availability.') - if fx_cube: - landice_mask = _get_fx_mask(fx_cube.data, mask_out, fx_cube.var_name) - cube.data = _apply_fx_mask(landice_mask, cube.data) - logger.debug("Applying landsea-ice mask: sftgif") + fx_files = fx_variables.values() + if any(fx_files): + for fx_file in fx_files: + if not fx_file: + continue + fx_cube = iris.load_cube(fx_file) + + if _check_dims(cube, fx_cube): + landice_mask = _get_fx_mask(fx_cube.data, mask_out, 'sftgif') + cube.data = _apply_fx_mask(landice_mask, cube.data) + logger.debug("Applying landsea-ice mask: sftgif") + else: + msg = "Landsea-ice mask and data have different dimensions." + raise ValueError(msg) else: msg = "Landsea-ice mask could not be found. Stopping. " raise ValueError(msg) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index dea5d1d93a..483019c6ec 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -44,10 +44,8 @@ def _resolve_operator(statistic: str): # special cases if statistic == 'std': logger.warning( - "Changing statistics from specified `std` to `std_dev`, " - "since multimodel statistics is now using the iris.analysis module" - ", which also uses `std_dev`. Please consider replacing 'std' " - " with 'std_dev' in your recipe or code.") + "Multicube statistics is aligning its behaviour with iris.analysis" + ". Please consider replacing 'std' with 'std_dev' in your code.") statistic = 'std_dev' elif re.match(r"^(p\d{1,2})(\.\d*)?$", statistic): @@ -147,13 +145,7 @@ def _subset(cube, time_points): begin = cube.coord('time').units.num2date(time_points[0]) end = cube.coord('time').units.num2date(time_points[-1]) constraint = iris.Constraint(time=lambda cell: begin <= cell.point <= end) - try: - return cube.extract(constraint) - except Exception as excinfo: - raise ValueError( - "Tried to align cubes in multi-model statistics, but failed for" - f" cube {cube} and time points {time_points}. Encountered the " - f"following exception: {excinfo}") + return cube.extract(constraint) def _extend(cube, time_points): @@ -197,13 +189,7 @@ def _extend(cube, time_points): cube_list = iris.cube.CubeList(cube_list) - try: - new_cube = cube_list.concatenate_cube() - except Exception as excinfo: - raise ValueError( - "Tried to align cubes in multi-model statistics, but failed for" - f" cube {cube} and time points {time_points}. Encountered the " - f"following exception: {excinfo}") + new_cube = cube_list.concatenate_cube() return new_cube @@ -280,33 +266,23 @@ def rechunk(cube): def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator, **kwargs): - """Compute statistics one slice at a time.""" + """Loop over slices of a cube if iris has no lazy aggregator.""" _ = [cube.data for cube in cubes] # make sure the cubes' data are realized result_slices = [] for i in range(cubes[0].shape[0]): - single_model_slices = [cube[i] for cube in cubes] + single_model_slices = [cube[i] for cube in cubes + ] # maybe filter the iris warning here? combined_slice = _combine(single_model_slices) collapsed_slice = combined_slice.collapsed(CONCAT_DIM, operator, **kwargs) - - # some iris aggregators modify dtype, see e.g. - # https://numpy.org/doc/stable/reference/generated/numpy.ma.average.html - collapsed_slice.data = collapsed_slice.data.astype(np.float32) - result_slices.append(collapsed_slice) - try: - result_cube = iris.cube.CubeList(result_slices).merge_cube() - except Exception as excinfo: - raise ValueError( - "Multi-model statistics failed to concatenate results into a" - f" single array. This happened for operator {operator}" - f" with computed statistics {result_slices}." - "This can happen e.g. if the calculation results in inconsistent" - f" dtypes. Encountered the following exception: {excinfo}") + result_cube = iris.cube.CubeList(result_slices).merge_cube() + # For consistency with lazy procedure result_cube.data = np.ma.array(result_cube.data) + result_cube.remove_coord(CONCAT_DIM) return result_cube diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py index b35c4b3a25..2fc3eb60b5 100644 --- a/esmvalcore/preprocessor/_regrid.py +++ b/esmvalcore/preprocessor/_regrid.py @@ -4,7 +4,6 @@ import re from copy import deepcopy from decimal import Decimal -from typing import Dict import iris import numpy as np @@ -41,7 +40,7 @@ _LON_RANGE = _LON_MAX - _LON_MIN # A cached stock of standard horizontal target grids. -_CACHE: Dict[str, iris.cube.Cube] = dict() +_CACHE = dict() # Supported point interpolation schemes. POINT_INTERPOLATION_SCHEMES = { @@ -467,56 +466,15 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True): [coord] = coords cube.remove_coord(coord) - # Return non-regridded cube if horizontal grid is the same. - if not _horizontal_grid_is_close(cube, target_grid): - - # Perform the horizontal regridding. - if _attempt_irregular_regridding(cube, scheme): - cube = esmpy_regrid(cube, target_grid, scheme) - else: - cube = cube.regrid(target_grid, HORIZONTAL_SCHEMES[scheme]) + # Perform the horizontal regridding. + if _attempt_irregular_regridding(cube, scheme): + cube = esmpy_regrid(cube, target_grid, scheme) + else: + cube = cube.regrid(target_grid, HORIZONTAL_SCHEMES[scheme]) return cube -def _horizontal_grid_is_close(cube1, cube2): - """Check if two cubes have the same horizontal grid definition. - - The result of the function is a boolean answer, if both cubes have the - same horizontal grid definition. The function checks both longitude and - latitude, based on extent and resolution. - - Parameters - ---------- - cube1 : cube - The first of the cubes to be checked. - cube2 : cube - The second of the cubes to be checked. - - Returns - ------- - bool - - .. note:: - - The current implementation checks if the bounds and the - grid shapes are the same. - Exits on first difference. - """ - # Go through the 2 expected horizontal coordinates longitude and latitude. - for coord in ['latitude', 'longitude']: - coord1 = cube1.coord(coord) - coord2 = cube2.coord(coord) - - if not coord1.shape == coord2.shape: - return False - - if not np.allclose(coord1.bounds, coord2.bounds): - return False - - return True - - def _create_cube(src_cube, data, src_levels, levels): """Generate a new cube with the interpolated data. diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 27182081be..589a6c68e3 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -174,7 +174,10 @@ def calculate_volume(cube): return grid_volume -def volume_statistics(cube, operator): +def volume_statistics( + cube, + operator, + fx_variables=None): """ Apply a statistical operation over a volume. @@ -184,10 +187,12 @@ def volume_statistics(cube, operator): Parameters ---------- - cube: iris.cube.Cube - Input cube. - operator: str - The operation to apply to the cube, options are: 'mean'. + cube: iris.cube.Cube + Input cube. + operator: str + The operation to apply to the cube, options are: 'mean'. + fx_variables: dict + dictionary of field:filename for the fx_variables Returns ------- @@ -206,16 +211,27 @@ def volume_statistics(cube, operator): # Load z coordinate field and figure out which dim is which. t_dim = cube.coord_dims('time')[0] - try: - grid_volume = cube.cell_measure('ocean_volume').core_data() - except iris.exceptions.CellMeasureNotFoundError: - logger.info( - 'Cell measure "ocean_volume" not found in cube. ' - 'Check fx_file availability.' - ) - logger.info('Attempting to calculate grid cell volume...') + grid_volume_found = False + grid_volume = None + if fx_variables: + for key, fx_file in fx_variables.items(): + if fx_file is None: + continue + logger.info('Attempting to load %s from file: %s', key, fx_file) + fx_cube = iris.load_cube(fx_file) + + grid_volume = fx_cube.data + grid_volume_found = True + cube_shape = cube.data.shape + + if not grid_volume_found: grid_volume = calculate_volume(cube) + # Check whether the dimensions are right. + if cube.data.ndim == 4 and grid_volume.ndim == 3: + grid_volume = np.tile(grid_volume, + [cube_shape[0], 1, 1, 1]) + if cube.data.shape != grid_volume.shape: raise ValueError('Cube shape ({}) doesn`t match grid volume shape ' '({})'.format(cube.data.shape, grid_volume.shape)) diff --git a/esmvalcore/preprocessor/_weighting.py b/esmvalcore/preprocessor/_weighting.py index 32e6c526a0..b786684135 100644 --- a/esmvalcore/preprocessor/_weighting.py +++ b/esmvalcore/preprocessor/_weighting.py @@ -7,31 +7,43 @@ logger = logging.getLogger(__name__) -def _get_land_fraction(cube): +def _get_land_fraction(cube, fx_variables): """Extract land fraction as :mod:`dask.array`.""" - fx_cube = None land_fraction = None errors = [] - try: - fx_cube = cube.ancillary_variable('land_area_fraction') - except iris.exceptions.AncillaryVariableNotFoundError: - try: - fx_cube = cube.ancillary_variable('sea_area_fraction') - except iris.exceptions.AncillaryVariableNotFoundError: + if not fx_variables: + errors.append("No fx files given.") + return (land_fraction, errors) + for (fx_var, fx_path) in fx_variables.items(): + if not fx_path: + errors.append(f"File for '{fx_var}' not found.") + continue + fx_cube = iris.load_cube(fx_path) + if not _shape_is_broadcastable(fx_cube.shape, cube.shape): errors.append( - 'Ancillary variables land/sea area fraction ' - 'not found in cube. Check fx_file availability.') - return (land_fraction, errors) + f"Cube '{fx_var}' with shape {fx_cube.shape} not " + f"broadcastable to cube '{cube.var_name}' with shape " + f"{cube.shape}.") + continue + if fx_var == 'sftlf': + land_fraction = fx_cube.core_data() / 100.0 + break + if fx_var == 'sftof': + land_fraction = 1.0 - fx_cube.core_data() / 100.0 + break + errors.append( + f"Cannot calculate land fraction from '{fx_var}', expected " + f"'sftlf' or 'sftof'.") + return (land_fraction, errors) - if fx_cube.var_name == 'sftlf': - land_fraction = fx_cube.core_data() / 100.0 - if fx_cube.var_name == 'sftof': - land_fraction = 1.0 - fx_cube.core_data() / 100.0 - return (land_fraction, errors) +def _shape_is_broadcastable(shape_1, shape_2): + """Check if two :mod:`numpy.array' shapes are broadcastable.""" + return all((m == n) or (m == 1) or (n == 1) + for (m, n) in zip(shape_1[::-1], shape_2[::-1])) -def weighting_landsea_fraction(cube, area_type): +def weighting_landsea_fraction(cube, fx_variables, area_type): """Weight fields using land or sea fraction. This preprocessor function weights a field with its corresponding land or @@ -46,6 +58,9 @@ def weighting_landsea_fraction(cube, area_type): ---------- cube : iris.cube.Cube Data cube to be weighted. + fx_variables : dict + Dictionary holding ``var_name`` (keys) and full paths (values) to the + fx files as ``str`` or empty ``list`` (if not available). area_type : str Use land (``'land'``) or sea (``'sea'``) fraction for weighting. @@ -59,13 +74,14 @@ def weighting_landsea_fraction(cube, area_type): TypeError ``area_type`` is not ``'land'`` or ``'sea'``. ValueError - Land/sea fraction variables ``sftlf`` or ``sftof`` not found. + Land/sea fraction variables ``sftlf`` or ``sftof`` not found or shape + of them is not broadcastable to ``cube``. """ if area_type not in ('land', 'sea'): raise TypeError( f"Expected 'land' or 'sea' for area_type, got '{area_type}'") - (land_fraction, errors) = _get_land_fraction(cube) + (land_fraction, errors) = _get_land_fraction(cube, fx_variables) if land_fraction is None: raise ValueError( f"Weighting of '{cube.var_name}' with '{area_type}' fraction " diff --git a/package/meta.yaml b/package/meta.yaml index 59e4de85c6..d07ba8f841 100644 --- a/package/meta.yaml +++ b/package/meta.yaml @@ -2,7 +2,7 @@ --- # Build command (run this from the root of the repository): -# conda build package -c conda-forge +# conda build package -c conda-forge -c esmvalgroup # Package version number {% set version = "2.2.0" %} @@ -42,7 +42,7 @@ requirements: - scipy<1.6 # until ESMValGroup/ESMValCore/issues/927 gets resolved # Normally installed via pip: - cftime # iris=3.0.1 needs <=1.2.1; >=1.3.0 years<999 get a 0 instead of empty space - - cf-units>=2.1.5 + - cf-units - cython # required by cf-units but not automatically installed - esmpy - fiona @@ -58,7 +58,7 @@ requirements: - pyyaml - requests - shapely - - yamale + - yamale==2.* # in esmvalgroup channel test: source_files: @@ -72,13 +72,11 @@ test: - pytest-html!=2.1.0 - pytest-metadata>=1.5.1 - pytest-mock - - pytest-mypy - pytest-xdist - r-yaml - ncl commands: - - pytest -n 2 -m "not sequential" --ignore=run_test.py - - pytest -n 0 -m "sequential" --ignore=run_test.py + - pytest -n 2 --ignore=run_test.py - esmvaltool -- --help - esmvaltool version imports: diff --git a/setup.cfg b/setup.cfg index 6989a18c54..e794a1784d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,7 +7,6 @@ builder = html [tool:pytest] addopts = --flake8 - --mypy --doctest-modules --ignore=esmvalcore/cmor/tables/ --cov=esmvalcore @@ -33,7 +32,3 @@ convention = numpy [isort] multi_line_output = 3 include_trailing_comma = true - -[mypy] -ignore_missing_imports = True -files = esmvalcore, tests diff --git a/setup.py b/setup.py index 12f992c827..5b5c07655a 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ # Installation dependencies # Use with pip install . to install from source 'install': [ - 'cf-units>=2.1.5', + 'cf-units', 'dask[array]', 'fiona', 'fire', @@ -46,7 +46,7 @@ 'scitools-iris>=3.0.1', 'shapely[vectorized]', 'stratify', - 'yamale', + 'yamale==2.*', ], # Test dependencies # Execute 'python setup.py test' to run tests @@ -57,14 +57,9 @@ 'pytest-flake8>=1.0.6', 'pytest-html!=2.1.0', 'pytest-metadata>=1.5.1', - 'pytest-mypy', 'pytest-mock', 'pytest-xdist', 'ESMValTool_sample_data==0.0.3', - # MyPy library stubs - 'types-requests', - 'types-pkg_resources', - 'types-PyYAML', ], # Development dependencies # Use pip install -e .[develop] to install in development mode @@ -74,7 +69,7 @@ 'docformatter', 'isort', 'pre-commit', - 'prospector[with_pyroma,with_mypy]!=1.1.6.3,!=1.1.6.4', + 'prospector[with_pyroma]!=1.1.6.3,!=1.1.6.4', 'sphinx>2', 'sphinx_rtd_theme', 'vprof', @@ -115,7 +110,7 @@ def install_deps_temp(self): class RunLinter(CustomCommand): """Class to run a linter and generate reports.""" - user_options: list = [] + user_options = [] def initialize_options(self): """Do nothing.""" diff --git a/tests/integration/cmor/_fixes/cmip5/test_access1_0.py b/tests/integration/cmor/_fixes/cmip5/test_access1_0.py index 42290a971a..6bb2cd1f14 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_access1_0.py +++ b/tests/integration/cmor/_fixes/cmip5/test_access1_0.py @@ -44,7 +44,7 @@ def test_fix_metadata(self): time = cube.coord('time') dates = num2date(time.points, time.units.name, time.units.calendar) self.assertEqual(time.units.calendar, 'gregorian') - self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), '30001161200') + self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), ' 30001161200') self.assertEqual(dates[1].strftime('%Y%m%d%H%M'), '185001161200') def test_fix_metadata_if_not_time(self): diff --git a/tests/integration/cmor/_fixes/cmip5/test_access1_3.py b/tests/integration/cmor/_fixes/cmip5/test_access1_3.py index b5e1959ca9..435782e5b9 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_access1_3.py +++ b/tests/integration/cmor/_fixes/cmip5/test_access1_3.py @@ -43,7 +43,7 @@ def test_fix_metadata(self): time = cube.coord('time') dates = num2date(time.points, time.units.name, time.units.calendar) self.assertEqual(time.units.calendar, 'gregorian') - self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), '30001161200') + self.assertEqual(dates[0].strftime('%Y%m%d%H%M'), ' 30001161200') self.assertEqual(dates[1].strftime('%Y%m%d%H%M'), '185001161200') def test_fix_metadata_if_not_time(self): diff --git a/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1.py b/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1.py index dd38a6f36c..a55007923e 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1.py +++ b/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1.py @@ -5,12 +5,8 @@ import numpy as np from esmvalcore.cmor._fixes.cmip5.bcc_csm1_1 import Cl, Tos -from esmvalcore.cmor._fixes.common import ( - ClFixHybridPressureCoord, - OceanFixGrid, -) +from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord from esmvalcore.cmor.fix import Fix -from esmvalcore.cmor.table import get_var_info def test_get_cl_fix(): @@ -30,16 +26,11 @@ class TestTos(unittest.TestCase): def test_get(self): """Test fix get.""" self.assertListEqual( - Fix.get_fixes('CMIP5', 'bcc-csm1-1', 'Amon', 'tos'), [Tos(None)]) + Fix.get_fixes('CMIP5', 'BCC-CSM1-1', 'Amon', 'tos'), [Tos(None)]) -def test_tos_fix(): - """Test fix for ``tos``.""" - assert Tos is OceanFixGrid - - -def test_tos_fix_metadata(): - """Test ``fix_metadata`` for ``tos``.""" +def test_tos_fix_data(): + """Test ``fix_data`` for ``tos``.""" grid_lat = iris.coords.DimCoord( [20.0, 40.0], bounds=[[10.0, 30.0], [30.0, 50.0]], @@ -64,51 +55,22 @@ def test_tos_fix_metadata(): standard_name='longitude', units='degrees_east', ) - time_coord = iris.coords.DimCoord( - 1.0, - bounds=[0.0, 2.0], - var_name='time', - standard_name='time', - long_name='time', - units='days since 1950-01-01', - ) # Create cube without bounds cube = iris.cube.Cube( - np.full((1, 2, 3), 300.0), + np.full((2, 3), 300.0), var_name='tos', - standard_name='sea_surface_temperature', units='K', - dim_coords_and_dims=[(time_coord, 0), (grid_lat, 1), (grid_lon, 2)], - aux_coords_and_dims=[(latitude, (1, 2)), (longitude, (1, 2))], + dim_coords_and_dims=[(grid_lat, 0), (grid_lon, 1)], + aux_coords_and_dims=[(latitude, (0, 1)), (longitude, (0, 1))], ) assert cube.coord('latitude').bounds is None assert cube.coord('longitude').bounds is None # Apply fix - vardef = get_var_info('CMIP6', 'Omon', 'tos') - fix = Tos(vardef) - cubes = iris.cube.CubeList([cube]) - fixed_cubes = fix.fix_metadata(cubes) - assert len(fixed_cubes) == 1 - fixed_cube = fixed_cubes.extract_cube('sea_surface_temperature') + fix = Tos(None) + fixed_cube = fix.fix_data(cube) assert fixed_cube is cube - i_coord = fixed_cube.coord('cell index along first dimension') - j_coord = fixed_cube.coord('cell index along second dimension') - assert i_coord.var_name == 'i' - assert i_coord.standard_name is None - assert i_coord.long_name == 'cell index along first dimension' - assert i_coord.units == '1' - assert i_coord.circular is False - assert j_coord.var_name == 'j' - assert j_coord.standard_name is None - assert j_coord.long_name == 'cell index along second dimension' - assert j_coord.units == '1' - np.testing.assert_allclose(i_coord.points, [0, 1, 2]) - np.testing.assert_allclose(i_coord.bounds, - [[-0.5, 0.5], [0.5, 1.5], [1.5, 2.5]]) - np.testing.assert_allclose(j_coord.points, [0, 1]) - np.testing.assert_allclose(j_coord.bounds, [[-0.5, 0.5], [0.5, 1.5]]) assert fixed_cube.coord('latitude').bounds is not None assert fixed_cube.coord('longitude').bounds is not None latitude_bounds = np.array([[[-40, -33.75, -23.75, -30.0], diff --git a/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1_m.py b/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1_m.py index 8473c84f44..ce8af0eeb5 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1_m.py +++ b/tests/integration/cmor/_fixes/cmip5/test_bcc_csm1_1_m.py @@ -1,11 +1,9 @@ """Test fixes for bcc-csm1-1-m.""" import unittest +from esmvalcore.cmor._fixes.cmip5.bcc_csm1_1 import Tos as BaseTos from esmvalcore.cmor._fixes.cmip5.bcc_csm1_1_m import Cl, Tos -from esmvalcore.cmor._fixes.common import ( - ClFixHybridPressureCoord, - OceanFixGrid, -) +from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord from esmvalcore.cmor._fixes.fix import Fix @@ -31,4 +29,4 @@ def test_get(self): def test_tos_fix(): """Test fix for ``tos``.""" - assert Tos is OceanFixGrid + assert Tos is BaseTos diff --git a/tests/integration/cmor/_fixes/cmip5/test_ec_earth.py b/tests/integration/cmor/_fixes/cmip5/test_ec_earth.py index 8ec734a90c..1b5ee52e63 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_ec_earth.py +++ b/tests/integration/cmor/_fixes/cmip5/test_ec_earth.py @@ -2,18 +2,11 @@ import unittest import numpy as np - from cf_units import Unit -from iris.coords import AuxCoord, DimCoord +from iris.coords import DimCoord from iris.cube import Cube, CubeList -from iris.exceptions import CoordinateNotFoundError -from esmvalcore.cmor._fixes.cmip5.ec_earth import ( - Areacello, - Pr, - Sftlf, - Sic, - Tas, - ) + +from esmvalcore.cmor._fixes.cmip5.ec_earth import Areacello, Sftlf, Sic, Tas from esmvalcore.cmor.fix import Fix @@ -159,75 +152,3 @@ def test_areacello_fix_metadata(self): out_cube[0].coord('latitude') out_cube[0].coord('longitude') - - -class TestPr(unittest.TestCase): - """Test pr fixes.""" - def setUp(self): - """Prepare tests.""" - - wrong_time_coord = AuxCoord( - points=[1.0, 2.0, 1.0, 2.0, 3.0], - var_name='time', - standard_name='time', - units='days since 1850-01-01', - ) - - correct_time_coord = AuxCoord( - points=[1.0, 2.0, 3.0], - var_name='time', - standard_name='time', - units='days since 1850-01-01', - ) - - lat_coord = DimCoord( - [0.0], - standard_name='latitude', - var_name='lat', - ) - - lon_coord = DimCoord( - [0.0], - standard_name='longitude', - var_name='lon', - ) - - self.time_coord = correct_time_coord - self.wrong_cube = CubeList([Cube(np.ones((5, 1, 1)), - var_name='pr', - units='kg m-2 s-1')]) - self.wrong_cube[0].add_aux_coord(wrong_time_coord, 0) - self.wrong_cube[0].add_dim_coord(lat_coord, 1) - self.wrong_cube[0].add_dim_coord(lon_coord, 2) - self.correct_cube = CubeList([Cube(np.ones(3), - var_name='pr', - units='kg m-2 s-1')]) - self.correct_cube[0].add_aux_coord(correct_time_coord, 0) - - self.fix = Pr(None) - - def test_get(self): - """Test fix get""" - self.assertListEqual( - Fix.get_fixes('CMIP5', 'EC-EARTH', 'Amon', 'pr'), - [Pr(None)], - ) - - def test_pr_fix_metadata(self): - """Test metadata fix.""" - - out_wrong_cube = self.fix.fix_metadata(self.wrong_cube) - out_correct_cube = self.fix.fix_metadata(self.correct_cube) - - time = out_wrong_cube[0].coord('time') - assert time == self.time_coord - - time = out_correct_cube[0].coord('time') - assert time == self.time_coord - - def test_pr_fix_metadata_no_time(self): - """Test metadata fix with no time coord.""" - self.correct_cube[0].remove_coord('time') - out_correct_cube = self.fix.fix_metadata(self.correct_cube) - with self.assertRaises(CoordinateNotFoundError): - out_correct_cube[0].coord('time') diff --git a/tests/integration/cmor/_fixes/cmip5/test_miroc5.py b/tests/integration/cmor/_fixes/cmip5/test_miroc5.py index a9d61dde10..9be2a5f269 100644 --- a/tests/integration/cmor/_fixes/cmip5/test_miroc5.py +++ b/tests/integration/cmor/_fixes/cmip5/test_miroc5.py @@ -5,7 +5,7 @@ from cf_units import Unit from iris.cube import Cube -from esmvalcore.cmor._fixes.cmip5.miroc5 import Cl, Hur, Pr, Sftof, Tas +from esmvalcore.cmor._fixes.cmip5.miroc5 import Cl, Hur, Sftof, Tas from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord from esmvalcore.cmor.fix import Fix @@ -27,12 +27,6 @@ def test_get_hur_fix(): assert fix == [Hur(None)] -def test_get_pr_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP5', 'MIROC5', 'Amon', 'pr') - assert fix == [Pr(None)] - - @unittest.mock.patch( 'esmvalcore.cmor._fixes.cmip5.miroc5.Tas.fix_metadata', autospec=True) @@ -43,16 +37,6 @@ def test_hur_fix_metadata(mock_base_fix_metadata): mock_base_fix_metadata.assert_called_once_with(fix, 'cubes') -@unittest.mock.patch( - 'esmvalcore.cmor._fixes.cmip5.miroc5.Tas.fix_metadata', - autospec=True) -def test_pr_fix_metadata(mock_base_fix_metadata): - """Test ``fix_metadata`` for ``pr``.""" - fix = Pr(None) - fix.fix_metadata('cubes') - mock_base_fix_metadata.assert_called_once_with(fix, 'cubes') - - class TestSftof(unittest.TestCase): """Test sftof fixes.""" diff --git a/tests/integration/cmor/_fixes/cmip6/test_bcc_csm2_mr.py b/tests/integration/cmor/_fixes/cmip6/test_bcc_csm2_mr.py index 8b0b03916e..21aa51dfca 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_bcc_csm2_mr.py +++ b/tests/integration/cmor/_fixes/cmip6/test_bcc_csm2_mr.py @@ -1,17 +1,13 @@ """Test fixes for BCC-CSM2-MR.""" -from esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr import ( - Cl, - Cli, - Clw, - Siconc, - Sos, - Tos, -) -from esmvalcore.cmor._fixes.common import ( - ClFixHybridPressureCoord, - OceanFixGrid, -) +import unittest.mock + +import iris + +from esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr import (Cl, Cli, + Clw, Tos, Siconc) +from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord from esmvalcore.cmor._fixes.fix import Fix +from esmvalcore.cmor.table import get_var_info def test_get_cl_fix(): @@ -53,28 +49,146 @@ def test_get_tos_fix(): assert fix == [Tos(None)] -def test_tos_fix(): - """Test fix for ``tos``.""" - assert Tos is OceanFixGrid - - def test_get_siconc_fix(): """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'SImon', 'siconc') + fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'Omon', 'siconc') assert fix == [Siconc(None)] -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is OceanFixGrid - - -def test_get_sos_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'Omon', 'sos') - assert fix == [Sos(None)] - - -def test_sos_fix(): - """Test fix for ``sos``.""" - assert Sos is OceanFixGrid +@unittest.mock.patch( + 'esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr.BaseTos.fix_data', + autospec=True) +def test_tos_fix_data(mock_base_fix_data): + """Test ``fix_data`` for ``tos``.""" + fix = Tos(None) + fix.fix_data('cubes') + mock_base_fix_data.assert_called_once_with(fix, 'cubes') + + +@unittest.mock.patch( + 'esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr.BaseTos.fix_data', + autospec=True) +def test_siconc_fix_data(mock_base_fix_data): + """Test ``fix_data`` for ``siconc``.""" + fix = Siconc(None) + fix.fix_data('cubes') + mock_base_fix_data.assert_called_once_with(fix, 'cubes') + + +def test_tos_fix_metadata(): + """Test ``fix_metadata`` for ``tos``.""" + grid_lat = iris.coords.DimCoord([1.0], + var_name='lat', + standard_name='latitude', + long_name='latitude', + units='degrees_north', + attributes={'1D': '1'}) + grid_lon = iris.coords.DimCoord([1.0], + var_name='lon', + standard_name='longitude', + long_name='longitude', + units='degrees_east', + circular=True, + attributes={'1D': '1'}) + latitude = iris.coords.AuxCoord([[0.0]], + var_name='lat', + standard_name='latitude', + long_name='latitude', + units='degrees_north') + longitude = iris.coords.AuxCoord([[0]], + var_name='lon', + standard_name='longitude', + long_name='longitude', + units='degrees_east') + cube = iris.cube.Cube( + [[[0.0]]], + var_name='tos', + long_name='sea_surface_temperature', + dim_coords_and_dims=[(grid_lat.copy(), 1), (grid_lon.copy(), 2)], + aux_coords_and_dims=[(latitude.copy(), (1, 2)), + (longitude.copy(), (1, 2))], + ) + cubes = iris.cube.CubeList([cube, iris.cube.Cube(0.0)]) + vardef = get_var_info('CMIP6', 'Omon', 'tos') + fix = Tos(vardef) + fixed_cubes = fix.fix_metadata(cubes) + tos_cube = fixed_cubes.extract_cube('sea_surface_temperature') + + # No duplicates anymore + assert len(tos_cube.coords('latitude')) == 1 + assert len(tos_cube.coords('longitude')) == 1 + + # Latitude + grid_lat = tos_cube.coord('grid_latitude') + assert grid_lat.var_name == 'i' + assert grid_lat.long_name == 'grid_latitude' + assert grid_lat.standard_name is None + assert grid_lat.units == '1' + + # Longitude + grid_lon = tos_cube.coord('grid_longitude') + assert grid_lon.var_name == 'j' + assert grid_lon.long_name == 'grid_longitude' + assert grid_lon.standard_name is None + assert grid_lon.units == '1' + assert not grid_lon.circular + + +def test_siconc_fix_metadata(): + """Test ``fix_metadata`` for ``tos``.""" + grid_lat = iris.coords.DimCoord([1.0], + var_name='lat', + standard_name='latitude', + long_name='latitude', + units='degrees_north', + attributes={'1D': '1'}) + grid_lon = iris.coords.DimCoord([1.0], + var_name='lon', + standard_name='longitude', + long_name='longitude', + units='degrees_east', + circular=True, + attributes={'1D': '1'}) + latitude = iris.coords.AuxCoord([[0.0]], + var_name='lat', + standard_name='latitude', + long_name='latitude', + units='degrees_north') + longitude = iris.coords.AuxCoord([[0]], + var_name='lon', + standard_name='longitude', + long_name='longitude', + units='degrees_east') + + cube = iris.cube.Cube( + [[[0.0]]], + var_name='siconc', + long_name='sea_ice_area_fraction', + dim_coords_and_dims=[(grid_lat.copy(), 1), (grid_lon.copy(), 2)], + aux_coords_and_dims=[(latitude.copy(), (1, 2)), + (longitude.copy(), (1, 2))], + ) + cubes = iris.cube.CubeList([cube, iris.cube.Cube(0.0)]) + vardef = get_var_info('CMIP6', 'SImon', 'siconc') + fix = Siconc(vardef) + fixed_cubes = fix.fix_metadata(cubes) + siconc_cube = fixed_cubes.extract_cube('sea_ice_area_fraction') + + # No duplicates anymore + assert len(siconc_cube.coords('latitude')) == 1 + assert len(siconc_cube.coords('longitude')) == 1 + + # Latitude + grid_lat = siconc_cube.coord('grid_latitude') + assert grid_lat.var_name == 'i' + assert grid_lat.long_name == 'grid_latitude' + assert grid_lat.standard_name is None + assert grid_lat.units == '1' + + # Longitude + grid_lon = siconc_cube.coord('grid_longitude') + assert grid_lon.var_name == 'j' + assert grid_lon.long_name == 'grid_longitude' + assert grid_lon.standard_name is None + assert grid_lon.units == '1' + assert not grid_lon.circular diff --git a/tests/integration/cmor/_fixes/cmip6/test_bcc_esm1.py b/tests/integration/cmor/_fixes/cmip6/test_bcc_esm1.py index 382d0268f3..c6ce251355 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_bcc_esm1.py +++ b/tests/integration/cmor/_fixes/cmip6/test_bcc_esm1.py @@ -1,16 +1,7 @@ """Test fixes for BCC-ESM1.""" -from esmvalcore.cmor._fixes.cmip6.bcc_esm1 import ( - Cl, - Cli, - Clw, - Siconc, - Sos, - Tos, -) -from esmvalcore.cmor._fixes.common import ( - ClFixHybridPressureCoord, - OceanFixGrid, -) +from esmvalcore.cmor._fixes.cmip6.bcc_csm2_mr import Tos as BaseTos +from esmvalcore.cmor._fixes.cmip6.bcc_esm1 import Cl, Cli, Clw, Tos +from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord from esmvalcore.cmor._fixes.fix import Fix @@ -47,28 +38,6 @@ def test_clw_fix(): assert Clw is ClFixHybridPressureCoord -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'BCC-ESM1', 'SImon', 'siconc') - assert fix == [Siconc(None)] - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is OceanFixGrid - - -def test_get_sos_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'BCC-ESM1', 'Omon', 'sos') - assert fix == [Sos(None)] - - -def test_sos_fix(): - """Test fix for ``sos``.""" - assert Sos is OceanFixGrid - - def test_get_tos_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'BCC-ESM1', 'Omon', 'tos') @@ -77,4 +46,4 @@ def test_get_tos_fix(): def test_tos_fix(): """Test fix for ``tos``.""" - assert Tos is OceanFixGrid + assert Tos is BaseTos diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py index 212902dfed..7ec96ffb5e 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2.py @@ -8,17 +8,7 @@ import pytest from cf_units import Unit -from esmvalcore.cmor._fixes.cmip6.cesm2 import ( - Cl, - Cli, - Clw, - Fgco2, - Omon, - Siconc, - Tas, - Tos, -) -from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord +from esmvalcore.cmor._fixes.cmip6.cesm2 import Cl, Cli, Clw, Tas, Tos from esmvalcore.cmor.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -61,7 +51,6 @@ def test_get_cl_fix(): [7.0, 25.0]]]]]) -@pytest.mark.sequential @pytest.mark.skipif(sys.version_info < (3, 7, 6), reason="requires python3.7.6 or newer") @unittest.mock.patch( @@ -235,34 +224,6 @@ def tos_cubes(): return iris.cube.CubeList([tos_cube]) -@pytest.fixture -def thetao_cubes(): - """Cubes to test fixes for ``thetao``.""" - time_coord = iris.coords.DimCoord( - [0.0004, 1.09776], var_name='time', standard_name='time', - units='days since 1850-01-01 00:00:00') - lat_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') - lon_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lon', standard_name='longitude', units='degrees') - lev_coord = iris.coords.DimCoord( - [500.0, 1000.0], bounds=[[2.5, 7.5], [7.5, 12.5]], - var_name='lev', standard_name=None, units='cm', - attributes={'positive': 'up'}) - coord_specs = [ - (time_coord, 0), - (lev_coord, 1), - (lat_coord, 2), - (lon_coord, 3), - ] - thetao_cube = iris.cube.Cube( - np.ones((2, 2, 2, 2)), - var_name='thetao', - dim_coords_and_dims=coord_specs, - ) - return iris.cube.CubeList([thetao_cube]) - - def test_get_tas_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'CESM2', 'Amon', 'tas') @@ -271,26 +232,8 @@ def test_get_tas_fix(): def test_get_tos_fix(): """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'tos') - assert fix == [Tos(None), Omon(None)] - - -def test_get_thetao_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'thetao') - assert fix == [Omon(None)] - - -def test_get_fgco2_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'fgco2') - assert fix == [Fgco2(None), Omon(None)] - - -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2', 'SImon', 'siconc') - assert fix == [Siconc(None)] + fix = Fix.get_fixes('CMIP6', 'CESM2', 'Amon', 'tos') + assert fix == [Tos(None)] def test_tas_fix_metadata(tas_cubes): @@ -327,55 +270,3 @@ def test_tos_fix_metadata(tos_cubes): assert out_cubes is tos_cubes for cube in out_cubes: np.testing.assert_equal(cube.coord("time").points, [0., 1.1]) - - -def test_thetao_fix_metadata(thetao_cubes): - """Test ``fix_metadata`` for ``thetao``.""" - vardef = get_var_info('CMIP6', 'Omon', 'thetao') - fix = Omon(vardef) - out_cubes = fix.fix_metadata(thetao_cubes) - assert out_cubes is thetao_cubes - assert len(out_cubes) == 1 - out_cube = out_cubes[0] - - # Check metadata of depth coordinate - depth_coord = out_cube.coord('depth') - assert depth_coord.standard_name == 'depth' - assert depth_coord.var_name == 'lev' - assert depth_coord.long_name == 'ocean depth coordinate' - assert depth_coord.units == 'm' - assert depth_coord.attributes == {'positive': 'down'} - - # Check values of depth coordinate - np.testing.assert_allclose(depth_coord.points, [5.0, 10.0]) - np.testing.assert_allclose(depth_coord.bounds, [[2.5, 7.5], [7.5, 12.5]]) - - -def test_fgco2_fix_metadata(): - """Test ``fix_metadata`` for ``fgco2``.""" - vardef = get_var_info('CMIP6', 'Omon', 'fgco2') - cubes = iris.cube.CubeList([ - iris.cube.Cube(0.0, var_name='fgco2'), - ]) - fix = Fgco2(vardef) - out_cubes = fix.fix_metadata(cubes) - assert out_cubes is cubes - assert len(out_cubes) == 1 - out_cube = out_cubes[0] - - # Check depth coordinate - depth_coord = out_cube.coord('depth') - assert depth_coord.standard_name == 'depth' - assert depth_coord.var_name == 'depth' - assert depth_coord.long_name == 'depth' - assert depth_coord.units == 'm' - assert depth_coord.attributes == {'positive': 'down'} - - # Check values of depth coordinate - np.testing.assert_allclose(depth_coord.points, 0.0) - assert depth_coord.bounds is None - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is SiconcFixScalarCoord diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_fv2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_fv2.py index 4a5e049126..5159cbea9b 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_fv2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_fv2.py @@ -1,16 +1,7 @@ """Tests for the fixes of CESM2-FV2.""" from esmvalcore.cmor._fixes.cmip6.cesm2 import Cl as BaseCl -from esmvalcore.cmor._fixes.cmip6.cesm2 import Fgco2 as BaseFgco2 from esmvalcore.cmor._fixes.cmip6.cesm2 import Tas as BaseTas -from esmvalcore.cmor._fixes.cmip6.cesm2_fv2 import ( - Cl, - Cli, - Clw, - Fgco2, - Siconc, - Tas, -) -from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord +from esmvalcore.cmor._fixes.cmip6.cesm2_fv2 import Cl, Cli, Clw, Tas from esmvalcore.cmor.fix import Fix @@ -47,28 +38,6 @@ def test_clw_fix(): assert Clw is BaseCl -def test_get_fgco2_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2-FV2', 'Omon', 'fgco2') - assert fix == [Fgco2(None)] - - -def test_fgco2_fix(): - """Test fix for ``fgco2``.""" - assert Fgco2 is BaseFgco2 - - -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2-FV2', 'SImon', 'siconc') - assert fix == [Siconc(None)] - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is SiconcFixScalarCoord - - def test_get_tas_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'CESM2-FV2', 'Amon', 'tas') diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py index 5cc66aef77..6c9743701f 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm.py @@ -8,17 +8,8 @@ import pytest from esmvalcore.cmor._fixes.cmip6.cesm2 import Cl as BaseCl -from esmvalcore.cmor._fixes.cmip6.cesm2 import Fgco2 as BaseFgco2 from esmvalcore.cmor._fixes.cmip6.cesm2 import Tas as BaseTas -from esmvalcore.cmor._fixes.cmip6.cesm2_waccm import ( - Cl, - Cli, - Clw, - Fgco2, - Siconc, - Tas, -) -from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord +from esmvalcore.cmor._fixes.cmip6.cesm2_waccm import Cl, Cli, Clw, Tas from esmvalcore.cmor.fix import Fix @@ -81,28 +72,6 @@ def test_clw_fix(): assert Clw is Cl -def test_get_fgco2_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM', 'Omon', 'fgco2') - assert fix == [Fgco2(None)] - - -def test_fgco2_fix(): - """Test fix for ``fgco2``.""" - assert Fgco2 is BaseFgco2 - - -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM', 'SImon', 'siconc') - assert fix == [Siconc(None)] - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is SiconcFixScalarCoord - - @pytest.fixture def tas_cubes(): """Cubes to test fixes for ``tas``.""" diff --git a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py index 5aec254229..c5837211b2 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cesm2_waccm_fv2.py @@ -1,16 +1,7 @@ """Tests for the fixes of CESM2-WACCM-FV2.""" from esmvalcore.cmor._fixes.cmip6.cesm2 import Tas as BaseTas -from esmvalcore.cmor._fixes.cmip6.cesm2 import Fgco2 as BaseFgco2 from esmvalcore.cmor._fixes.cmip6.cesm2_waccm import Cl as BaseCl -from esmvalcore.cmor._fixes.cmip6.cesm2_waccm_fv2 import ( - Cl, - Cli, - Clw, - Fgco2, - Siconc, - Tas, -) -from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord +from esmvalcore.cmor._fixes.cmip6.cesm2_waccm_fv2 import Cl, Cli, Clw, Tas from esmvalcore.cmor.fix import Fix @@ -47,28 +38,6 @@ def test_clw_fix(): assert Clw is BaseCl -def test_get_fgco2_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM-FV2', 'Omon', 'fgco2') - assert fix == [Fgco2(None)] - - -def test_fgco2_fix(): - """Test fix for ``fgco2``.""" - assert Fgco2 is BaseFgco2 - - -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM-FV2', 'SImon', 'siconc') - assert fix == [Siconc(None)] - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is SiconcFixScalarCoord - - def test_get_tas_fix(): """Test getting of fix.""" fix = Fix.get_fixes('CMIP6', 'CESM2-WACCM-FV2', 'Amon', 'tas') diff --git a/tests/integration/cmor/_fixes/cmip6/test_cnrm_esm2_1.py b/tests/integration/cmor/_fixes/cmip6/test_cnrm_esm2_1.py index 8ff7b7e0a2..01ce238529 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_cnrm_esm2_1.py +++ b/tests/integration/cmor/_fixes/cmip6/test_cnrm_esm2_1.py @@ -1,16 +1,10 @@ """Test fixes for CNRM-ESM2-1.""" -import iris -import numpy as np -import pytest - from esmvalcore.cmor._fixes.cmip6.cnrm_cm6_1 import Cl as BaseCl from esmvalcore.cmor._fixes.cmip6.cnrm_cm6_1 import Clcalipso as BaseClcalipso from esmvalcore.cmor._fixes.cmip6.cnrm_cm6_1 import Cli as BaseCli from esmvalcore.cmor._fixes.cmip6.cnrm_cm6_1 import Clw as BaseClw -from esmvalcore.cmor._fixes.cmip6.cnrm_esm2_1 import (Cl, Clcalipso, - Cli, Clw, Omon) +from esmvalcore.cmor._fixes.cmip6.cnrm_esm2_1 import Cl, Clcalipso, Cli, Clw from esmvalcore.cmor._fixes.fix import Fix -from esmvalcore.cmor.table import get_var_info def test_get_cl_fix(): @@ -55,55 +49,3 @@ def test_get_clw_fix(): def test_clw_fix(): """Test fix for ``clw``.""" assert Clw is BaseClw - - -@pytest.fixture -def thetao_cubes(): - """Cubes to test fixes for ``thetao``.""" - time_coord = iris.coords.DimCoord( - [0.0004, 1.09776], var_name='time', standard_name='time', - units='days since 1850-01-01 00:00:00') - lat_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') - lon_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lon', standard_name='longitude', units='degrees') - lev_coord = iris.coords.DimCoord( - [5.0, 10.0], bounds=[[2.5, 7.5], [7.5, 12.5]], - var_name='lev', standard_name=None, units='m', - attributes={'positive': 'up'}) - coord_specs = [ - (time_coord, 0), - (lev_coord, 1), - (lat_coord, 2), - (lon_coord, 3), - ] - thetao_cube = iris.cube.Cube( - np.ones((2, 2, 2, 2)), - var_name='thetao', - dim_coords_and_dims=coord_specs, - ) - return iris.cube.CubeList([thetao_cube]) - - -def test_get_thetao_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'CNRM-ESM2-1', 'Omon', 'thetao') - assert fix == [Omon(None)] - - -def test_thetao_fix_metadata(thetao_cubes): - """Test ``fix_metadata`` for ``thetao``.""" - vardef = get_var_info('CMIP6', 'Omon', 'thetao') - fix = Omon(vardef) - out_cubes = fix.fix_metadata(thetao_cubes) - assert out_cubes is thetao_cubes - assert len(out_cubes) == 1 - out_cube = out_cubes[0] - - # Check metadata of depth coordinate - depth_coord = out_cube.coord('depth') - assert depth_coord.standard_name == 'depth' - assert depth_coord.var_name == 'lev' - assert depth_coord.long_name == 'ocean depth coordinate' - assert depth_coord.units == 'm' - assert depth_coord.attributes == {'positive': 'down'} diff --git a/tests/integration/cmor/_fixes/cmip6/test_fgoals_g3.py b/tests/integration/cmor/_fixes/cmip6/test_fgoals_g3.py index c134ae1228..bb0f2060f7 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_fgoals_g3.py +++ b/tests/integration/cmor/_fixes/cmip6/test_fgoals_g3.py @@ -1,14 +1,7 @@ """Tests for the fixes of FGOALS-g3.""" -from unittest import mock - -import iris -import numpy as np - from esmvalcore.cmor._fixes.cmip5.fgoals_g2 import Cl as BaseCl -from esmvalcore.cmor._fixes.cmip6.fgoals_g3 import Cl, Cli, Clw, Siconc, Tos -from esmvalcore.cmor._fixes.common import OceanFixGrid +from esmvalcore.cmor._fixes.cmip6.fgoals_g3 import Cl, Cli, Clw from esmvalcore.cmor.fix import Fix -from esmvalcore.cmor.table import get_var_info def test_get_cl_fix(): @@ -42,56 +35,3 @@ def test_get_clw_fix(): def test_clw_fix(): """Test fix for ``clw``.""" assert Clw is BaseCl - - -def test_get_tos_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'Omon', 'tos') - assert fix == [Tos(None)] - - -def test_tos_fix(): - """Test fix for ``tos``.""" - assert issubclass(Tos, OceanFixGrid) - - -@mock.patch('esmvalcore.cmor._fixes.cmip6.fgoals_g3.OceanFixGrid.fix_metadata', - autospec=True) -def test_tos_fix_metadata(mock_base_fix_metadata): - """Test ``fix_metadata`` for ``tos``.""" - mock_base_fix_metadata.side_effect = lambda x, y: y - - # Create test cube - lat_coord = iris.coords.AuxCoord([3.14, 1200.0, 6.28], - var_name='lat', - standard_name='latitude') - lon_coord = iris.coords.AuxCoord([1.0, 2.0, 1e30], - var_name='lon', - standard_name='longitude') - cube = iris.cube.Cube([1.0, 2.0, 3.0], var_name='tos', - standard_name='sea_surface_temperature', - aux_coords_and_dims=[(lat_coord, 0), (lon_coord, 0)]) - cubes = iris.cube.CubeList([cube]) - - # Apply fix - vardef = get_var_info('CMIP6', 'Omon', 'tos') - fix = Tos(vardef) - fixed_cubes = fix.fix_metadata(cubes) - assert len(fixed_cubes) == 1 - fixed_cube = fixed_cubes[0] - np.testing.assert_allclose(fixed_cube.coord('latitude').points, - [3.14, 0.0, 6.28]) - np.testing.assert_allclose(fixed_cube.coord('longitude').points, - [1.0, 2.0, 0.0]) - mock_base_fix_metadata.assert_called_once_with(fix, cubes) - - -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'BCC-CSM2-MR', 'SImon', 'siconc') - assert fix == [Siconc(None)] - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is Tos diff --git a/tests/integration/cmor/_fixes/cmip6/test_gfdl_cm4.py b/tests/integration/cmor/_fixes/cmip6/test_gfdl_cm4.py index f53379a1dc..a563fd1362 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_gfdl_cm4.py +++ b/tests/integration/cmor/_fixes/cmip6/test_gfdl_cm4.py @@ -1,10 +1,8 @@ """Tests for the fixes of GFDL-CM4.""" import iris import numpy as np -import pytest -from esmvalcore.cmor._fixes.cmip6.gfdl_cm4 import Cl, Cli, Clw, Siconc -from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord +from esmvalcore.cmor._fixes.cmip6.gfdl_cm4 import Cl, Cli, Clw from esmvalcore.cmor.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -35,7 +33,6 @@ def test_get_cl_fix(): [9.0, 21.0]]]]]) -@pytest.mark.sequential def test_cl_fix_metadata(test_data_path): """Test ``fix_metadata`` for ``cl``.""" nc_path = test_data_path / 'gfdl_cm4_cl.nc' @@ -91,14 +88,3 @@ def test_get_clw_fix(): def test_clw_fix(): """Test fix for ``clw``.""" assert Clw is Cl - - -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'GFDL-CM4', 'SImon', 'siconc') - assert fix == [Siconc(None)] - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is SiconcFixScalarCoord diff --git a/tests/integration/cmor/_fixes/cmip6/test_gfdl_esm4.py b/tests/integration/cmor/_fixes/cmip6/test_gfdl_esm4.py index d21323bb10..57a834fe5a 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_gfdl_esm4.py +++ b/tests/integration/cmor/_fixes/cmip6/test_gfdl_esm4.py @@ -3,102 +3,57 @@ import iris import numpy as np import pytest +from cf_units import Unit -from esmvalcore.cmor._fixes.cmip6.gfdl_esm4 import Fgco2, Omon, Siconc -from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord +from esmvalcore.cmor._fixes.cmip6.gfdl_esm4 import Siconc from esmvalcore.cmor.fix import Fix from esmvalcore.cmor.table import get_var_info -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'GFDL-ESM4', 'SImon', 'siconc') - assert fix == [Siconc(None)] - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is SiconcFixScalarCoord - - @pytest.fixture -def thetao_cubes(): - """Cubes to test fixes for ``thetao``.""" - time_coord = iris.coords.DimCoord( - [0.0004, 1.09776], var_name='time', standard_name='time', - units='days since 1850-01-01 00:00:00') - lat_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') - lon_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lon', standard_name='longitude', units='degrees') - lev_coord = iris.coords.DimCoord( - [5.0, 10.0], bounds=[[2.5, 7.5], [7.5, 12.5]], - var_name='lev', standard_name=None, units='m', - attributes={'positive': 'up'}) - coord_specs = [ - (time_coord, 0), - (lev_coord, 1), - (lat_coord, 2), - (lon_coord, 3), - ] - thetao_cube = iris.cube.Cube( - np.ones((2, 2, 2, 2)), - var_name='thetao', - dim_coords_and_dims=coord_specs, - ) - return iris.cube.CubeList([thetao_cube]) +def siconc_cubes(): + """Sample cube.""" + time_coord = iris.coords.DimCoord([0.0], standard_name='time', + var_name='time', + units='days since 6543-2-1') + lat_coord = iris.coords.DimCoord([-30.0], standard_name='latitude', + var_name='lat', units='degrees_north') + lon_coord = iris.coords.DimCoord([30.0], standard_name='longitude', + var_name='lon', units='degrees_east') + coords_specs = [(time_coord, 0), (lat_coord, 1), (lon_coord, 2)] + cube = iris.cube.Cube([[[22.0]]], standard_name='sea_ice_area_fraction', + var_name='siconc', units='%', + dim_coords_and_dims=coords_specs) + return iris.cube.CubeList([cube]) -def test_get_thetao_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'GFDL-ESM4', 'Omon', 'thetao') - assert fix == [Omon(None)] - - -def test_thetao_fix_metadata(thetao_cubes): - """Test ``fix_metadata`` for ``thetao``.""" - vardef = get_var_info('CMIP6', 'Omon', 'thetao') - fix = Omon(vardef) - out_cubes = fix.fix_metadata(thetao_cubes) - assert out_cubes is thetao_cubes - assert len(out_cubes) == 1 - out_cube = out_cubes[0] - - # Check metadata of depth coordinate - depth_coord = out_cube.coord('depth') - assert depth_coord.standard_name == 'depth' - assert depth_coord.var_name == 'lev' - assert depth_coord.long_name == 'ocean depth coordinate' - assert depth_coord.units == 'm' - assert depth_coord.attributes == {'positive': 'down'} - - -def test_get_fgco2_fix(): +def test_get_siconc_fix(): """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'GFDL-ESM4', 'Omon', 'fgco2') - assert fix == [Fgco2(None), Omon(None)] - - -def test_fgco2_fix_metadata(): - """Test ``fix_metadata`` for ``fgco2``.""" - vardef = get_var_info('CMIP6', 'Omon', 'fgco2') - cubes = iris.cube.CubeList([ - iris.cube.Cube(0.0, var_name='fgco2'), - ]) - fix = Fgco2(vardef) - out_cubes = fix.fix_metadata(cubes) - assert out_cubes is cubes - assert len(out_cubes) == 1 - out_cube = out_cubes[0] + fix = Fix.get_fixes('CMIP6', 'GFDL-ESM4', 'SImon', 'siconc') + assert fix == [Siconc(None)] - # Check depth coordinate - depth_coord = out_cube.coord('depth') - assert depth_coord.standard_name == 'depth' - assert depth_coord.var_name == 'depth' - assert depth_coord.long_name == 'depth' - assert depth_coord.units == 'm' - assert depth_coord.attributes == {'positive': 'down'} - # Check values of depth coordinate - np.testing.assert_allclose(depth_coord.points, 0.0) - assert depth_coord.bounds is None +def test_siconc_fix_metadata(siconc_cubes): + """Test ``fix_metadata`` for ``cl``.""" + assert len(siconc_cubes) == 1 + siconc_cube = siconc_cubes[0] + assert siconc_cube.var_name == "siconc" + + # Extract siconc cube + siconc_cube = siconc_cubes.extract_cube('sea_ice_area_fraction') + assert not siconc_cube.coords('typesi') + + # Apply fix + vardef = get_var_info('CMIP6', 'SImon', 'siconc') + fix = Siconc(vardef) + fixed_cubes = fix.fix_metadata(siconc_cubes) + assert len(fixed_cubes) == 1 + fixed_siconc_cube = fixed_cubes.extract_cube( + 'sea_ice_area_fraction') + fixed_typesi_coord = fixed_siconc_cube.coord('area_type') + assert fixed_typesi_coord.points is not None + assert fixed_typesi_coord.bounds is None + np.testing.assert_equal(fixed_typesi_coord.points, + ['siconc']) + np.testing.assert_equal(fixed_typesi_coord.units, + Unit('1')) diff --git a/tests/integration/cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py b/tests/integration/cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py index 99f2a76aa2..526566a2e6 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py +++ b/tests/integration/cmor/_fixes/cmip6/test_ipsl_cm6a_lr.py @@ -8,7 +8,7 @@ from iris.cube import Cube, CubeList from iris.exceptions import CoordinateNotFoundError -from esmvalcore.cmor._fixes.cmip6.ipsl_cm6a_lr import AllVars, Clcalipso, Omon +from esmvalcore.cmor._fixes.cmip6.ipsl_cm6a_lr import AllVars, Clcalipso from esmvalcore.cmor._fixes.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -18,9 +18,8 @@ class TestAllVars(unittest.TestCase): def setUp(self): """Set up tests.""" - vardef = get_var_info('CMIP6', 'Omon', 'tos') - self.fix = AllVars(vardef) - self.cube = Cube(np.random.rand(2, 2, 2), var_name='tos') + self.fix = AllVars(None) + self.cube = Cube(np.random.rand(2, 2, 2), var_name='ch4') self.cube.add_aux_coord( AuxCoord(np.random.rand(2, 2), var_name='nav_lat', @@ -37,43 +36,20 @@ def test_fix_metadata_ocean_var(self): self.assertEqual(len(cubes), 1) cube = cubes[0] - self.assertEqual(cube.var_name, 'tos') self.assertEqual(cube.coord('latitude').var_name, 'lat') self.assertEqual(cube.coord('longitude').var_name, 'lon') + self.cube.coord('cell_area') - def test_fix_data_no_lat(self): - """Test ``fix_metadata`` when no latitude is present.""" - self.cube.remove_coord('latitude') + def test_fix_data_other_var(self): + """Test ``fix_metadata`` for other variables.""" cubes = self.fix.fix_metadata(CubeList([self.cube])) self.assertEqual(len(cubes), 1) cube = cubes[0] - self.assertEqual(cube.coord('longitude').var_name, 'lon') - with self.assertRaises(CoordinateNotFoundError): - self.cube.coord('latitude') - - def test_fix_data_no_lon(self): - """Test ``fix_metadata`` when no longitude is present.""" - self.cube.remove_coord('longitude') - cubes = self.fix.fix_metadata(CubeList([self.cube])) - - self.assertEqual(len(cubes), 1) - cube = cubes[0] - self.assertEqual(cube.coord('latitude').var_name, 'lat') + self.assertEqual(cube.coord('latitude').var_name, 'nav_lat') + self.assertEqual(cube.coord('longitude').var_name, 'nav_lon') with self.assertRaises(CoordinateNotFoundError): - self.cube.coord('longitude') - - def test_fix_data_no_lat_lon(self): - """Test ``fix_metadata`` for cubes with no latitude and longitude.""" - self.cube.remove_coord('latitude') - self.cube.remove_coord('longitude') - cubes = self.fix.fix_metadata(CubeList([self.cube])) - - self.assertEqual(len(cubes), 1) - with self.assertRaises(CoordinateNotFoundError): - self.cube.coord('latitude') - with self.assertRaises(CoordinateNotFoundError): - self.cube.coord('longitude') + self.cube.coord('cell_area') def test_get_clcalipso_fix(): @@ -104,55 +80,3 @@ def test_clcalipso_fix_metadata(clcalipso_cubes): assert coord.long_name == 'altitude' assert coord.standard_name == 'altitude' assert coord.var_name == 'alt40' - - -@pytest.fixture -def thetao_cubes(): - """Cubes to test fixes for ``thetao``.""" - time_coord = iris.coords.DimCoord( - [0.0004, 1.09776], var_name='time', standard_name='time', - units='days since 1850-01-01 00:00:00') - lat_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') - lon_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lon', standard_name='longitude', units='degrees') - lev_coord = iris.coords.DimCoord( - [5.0, 10.0], bounds=[[2.5, 7.5], [7.5, 12.5]], - var_name='olevel', standard_name=None, units='m', - attributes={'positive': 'up'}) - coord_specs = [ - (time_coord, 0), - (lev_coord, 1), - (lat_coord, 2), - (lon_coord, 3), - ] - thetao_cube = iris.cube.Cube( - np.ones((2, 2, 2, 2)), - var_name='thetao', - dim_coords_and_dims=coord_specs, - ) - return iris.cube.CubeList([thetao_cube]) - - -def test_get_thetao_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'IPSL-CM6A-LR', 'Omon', 'thetao') - assert fix == [Omon(None), AllVars(None)] - - -def test_thetao_fix_metadata(thetao_cubes): - """Test ``fix_metadata`` for ``thetao``.""" - vardef = get_var_info('CMIP6', 'Omon', 'thetao') - fix = Omon(vardef) - out_cubes = fix.fix_metadata(thetao_cubes) - assert out_cubes is thetao_cubes - assert len(out_cubes) == 1 - out_cube = out_cubes[0] - - # Check metadata of depth coordinate - depth_coord = out_cube.coord('depth') - assert depth_coord.standard_name == 'depth' - assert depth_coord.var_name == 'lev' - assert depth_coord.long_name == 'ocean depth coordinate' - assert depth_coord.units == 'm' - assert depth_coord.attributes == {'positive': 'down'} diff --git a/tests/integration/cmor/_fixes/cmip6/test_kiost_esm.py b/tests/integration/cmor/_fixes/cmip6/test_kiost_esm.py index 41de6089c5..bbe1c360d3 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_kiost_esm.py +++ b/tests/integration/cmor/_fixes/cmip6/test_kiost_esm.py @@ -3,12 +3,7 @@ import pytest from cf_units import Unit -from esmvalcore.cmor._fixes.cmip6.kiost_esm import ( - SfcWind, - Siconc, - Tas, -) -from esmvalcore.cmor._fixes.common import SiconcFixScalarCoord +from esmvalcore.cmor._fixes.cmip6.kiost_esm import SfcWind, Tas from esmvalcore.cmor._fixes.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -102,17 +97,6 @@ def test_sfcwind_fix_metadata(sfcwind_cubes): assert coord == height_coord -def test_get_siconc_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'KIOST-ESM', 'SImon', 'siconc') - assert fix == [Siconc(None)] - - -def test_siconc_fix(): - """Test fix for ``siconc``.""" - assert Siconc is SiconcFixScalarCoord - - def test_get_tas_fix(): fix = Fix.get_fixes('CMIP6', 'KIOST-ESM', 'Amon', 'tas') assert fix == [Tas(None)] diff --git a/tests/integration/cmor/_fixes/cmip6/test_mcm_ua_1_0.py b/tests/integration/cmor/_fixes/cmip6/test_mcm_ua_1_0.py index a886946818..303854968b 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_mcm_ua_1_0.py +++ b/tests/integration/cmor/_fixes/cmip6/test_mcm_ua_1_0.py @@ -1,10 +1,9 @@ """Tests for the fixes of MCM-UA-1-0.""" import iris -import numpy as np import pytest from cf_units import Unit -from esmvalcore.cmor._fixes.cmip6.mcm_ua_1_0 import AllVars, Omon, Tas, Uas +from esmvalcore.cmor._fixes.cmip6.mcm_ua_1_0 import AllVars, Tas from esmvalcore.cmor.fix import Fix from esmvalcore.cmor.table import get_var_info @@ -48,45 +47,6 @@ def cubes(): return iris.cube.CubeList([correct_cube, wrong_cube, scalar_cube]) -@pytest.fixture -def uas_cubes(): - correct_lat_coord = iris.coords.DimCoord([0.0], - var_name='lat', - standard_name=' latitude ', - long_name=' latitude') - wrong_lat_coord = iris.coords.DimCoord([0.0], - var_name='latitudeCoord', - standard_name=' latitude', - long_name='latitude') - correct_lon_coord = iris.coords.DimCoord([0.0], - var_name='lon', - standard_name=' longitude ', - long_name='longitude ') - wrong_lon_coord = iris.coords.DimCoord([0.0], - var_name='longitudeCoord', - standard_name='longitude', - long_name=' longitude') - correct_cube = iris.cube.Cube( - [[10.0]], - var_name='uas', - standard_name='eastward_wind ', - long_name=' East Near-Surface Wind ', - dim_coords_and_dims=[(correct_lat_coord, 0), (correct_lon_coord, 1)], - ) - wrong_cube = iris.cube.Cube( - [[10.0]], - var_name='ta', - standard_name=' air_temperature ', - long_name='Air Temperature', - dim_coords_and_dims=[(wrong_lat_coord, 0), (wrong_lon_coord, 1)], - attributes={'parent_time_units': 'days since 0000-00-00 (noleap)'}, - ) - scalar_cube = iris.cube.Cube(0.0, var_name='ps', - standard_name='air_pressure ', - long_name=' Air pressure ') - return iris.cube.CubeList([correct_cube, wrong_cube, scalar_cube]) - - @pytest.fixture def cubes_bounds(): lat_coord = iris.coords.DimCoord([0.0], @@ -126,11 +86,6 @@ def test_get_tas_fix(): assert fix == [Tas(None), AllVars(None)] -def test_get_uas_fix(): - fix = Fix.get_fixes('CMIP6', 'MCM-UA-1-0', 'Amon', 'uas') - assert fix == [Uas(None), AllVars(None)] - - def test_allvars_fix_metadata(cubes): fix = AllVars(None) out_cubes = fix.fix_metadata(cubes) @@ -202,102 +157,3 @@ def test_tas_fix_metadata(cubes): assert out_cubes_2[0].var_name == 'tas' coord = out_cubes_2[0].coord('height') assert coord == height_coord - - -def test_uas_fix_metadata(uas_cubes): - for cube in uas_cubes: - with pytest.raises(iris.exceptions.CoordinateNotFoundError): - cube.coord('height') - height_coord = iris.coords.AuxCoord(10.0, - var_name='height', - standard_name='height', - long_name='height', - units=Unit('m'), - attributes={'positive': 'up'}) - vardef = get_var_info('CMIP6', 'Amon', 'uas') - fix = Uas(vardef) - - # Check fix - out_cubes = fix.fix_metadata(uas_cubes) - assert out_cubes[0].var_name == 'uas' - coord = out_cubes[0].coord('height') - assert coord == height_coord - - # Check that height coordinate is not added twice - out_cubes_2 = fix.fix_metadata(out_cubes) - assert out_cubes_2[0].var_name == 'uas' - coord = out_cubes_2[0].coord('height') - assert coord == height_coord - - -@pytest.fixture -def thetao_cubes(): - time_coord = iris.coords.DimCoord( - [0.0004, 1.09776], var_name='time', standard_name='time', - units='days since 1850-01-01 00:00:00') - lat_coord = iris.coords.DimCoord( - [0.0, 1.0], var_name='lat', standard_name='latitude', units='degrees') - lon_coord = iris.coords.DimCoord([-0.9375, 357.1875], - bounds=[[-1.875, 0.], [356.25, 358.125]], - var_name='lon', - standard_name='longitude') - lev_coord = iris.coords.DimCoord( - [5.0, 10.0], bounds=[[2.5, 7.5], [7.5, 12.5]], - var_name='lev', standard_name=None, units='m', - attributes={'positive': 'up'}) - coord_specs = [ - (time_coord, 0), - (lev_coord, 1), - (lat_coord, 2), - (lon_coord, 3), - ] - thetao_cube = iris.cube.Cube( - np.arange(16).reshape(2, 2, 2, 2), - var_name='thetao', - dim_coords_and_dims=coord_specs, - ) - - return iris.cube.CubeList([thetao_cube]) - - -def test_get_thetao_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'MCM-UA-1-0', 'Omon', 'thetao') - assert fix == [Omon(None), AllVars(None)] - - -def test_thetao_fix_metadata(thetao_cubes): - """Test ``fix_metadata`` for ``thetao``.""" - vardef = get_var_info('CMIP6', 'Omon', 'thetao') - fix_omon = Omon(vardef) - fix_allvars = AllVars(vardef) - out_cubes = fix_omon.fix_metadata(thetao_cubes) - out_cubes = fix_allvars.fix_metadata(out_cubes) - assert out_cubes is thetao_cubes - assert len(out_cubes) == 1 - out_cube = out_cubes[0] - - # Check data of cube - np.testing.assert_allclose(out_cube.data, - [[[[1, 0], - [3, 2]], - [[5, 4], - [7, 6]]], - [[[9, 8], - [11, 10]], - [[13, 12], - [15, 14]]]]) - - # Check data of longitude - lon_coord = out_cube.coord('longitude') - np.testing.assert_allclose(lon_coord.points, [357.1875, 359.0625]) - np.testing.assert_allclose(lon_coord.bounds, - [[356.25, 358.125], [358.125, 360.0]]) - - # Check metadata of depth coordinate - depth_coord = out_cube.coord('depth') - assert depth_coord.standard_name == 'depth' - assert depth_coord.var_name == 'lev' - assert depth_coord.long_name == 'ocean depth coordinate' - assert depth_coord.units == 'm' - assert depth_coord.attributes == {'positive': 'down'} diff --git a/tests/integration/cmor/_fixes/cmip6/test_sam0_unicon.py b/tests/integration/cmor/_fixes/cmip6/test_sam0_unicon.py index 09d35be726..bf2398f790 100644 --- a/tests/integration/cmor/_fixes/cmip6/test_sam0_unicon.py +++ b/tests/integration/cmor/_fixes/cmip6/test_sam0_unicon.py @@ -1,9 +1,5 @@ """Test fixes for SAM0-UNICON.""" -import iris -import numpy as np -import pytest - -from esmvalcore.cmor._fixes.cmip6.sam0_unicon import Cl, Cli, Clw, Nbp +from esmvalcore.cmor._fixes.cmip6.sam0_unicon import Cl, Cli, Clw from esmvalcore.cmor._fixes.common import ClFixHybridPressureCoord from esmvalcore.cmor._fixes.fix import Fix @@ -39,29 +35,3 @@ def test_get_clw_fix(): def test_clw_fix(): """Test fix for ``clw``.""" assert Clw is ClFixHybridPressureCoord - - -def test_get_nbp_fix(): - """Test getting of fix.""" - fix = Fix.get_fixes('CMIP6', 'SAM0-UNICON', 'Lmon', 'nbp') - assert fix == [Nbp(None)] - - -@pytest.fixture -def nbp_cube(): - """``nbp`` cube.""" - cube = iris.cube.Cube( - [1.0], - var_name='nbp', - standard_name='surface_net_downward_mass_flux_of_carbon_dioxide' - '_expressed_as_carbon_due_to_all_land_processes', - units='kg m-2 s-1', - ) - return cube - - -def test_nbp_fix_data(nbp_cube): - """Test ``fix_data`` for ``nbp``.""" - fix = Nbp(None) - out_cube = fix.fix_data(nbp_cube) - np.testing.assert_allclose(out_cube.data, [-1.0]) diff --git a/tests/integration/cmor/_fixes/test_common.py b/tests/integration/cmor/_fixes/test_common.py index 922f807fef..3adeadfa54 100644 --- a/tests/integration/cmor/_fixes/test_common.py +++ b/tests/integration/cmor/_fixes/test_common.py @@ -1,18 +1,15 @@ """Test for common fixes used for multiple datasets.""" import iris import numpy as np -import pytest -from cf_units import Unit from esmvalcore.cmor._fixes.common import ( ClFixHybridHeightCoord, ClFixHybridPressureCoord, - OceanFixGrid, - SiconcFixScalarCoord, ) from esmvalcore.cmor.table import get_var_info from esmvalcore.iris_helpers import var_name_constraint + AIR_PRESSURE_POINTS = np.array([[[[1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0]], @@ -84,7 +81,6 @@ def hybrid_pressure_coord_fix_metadata(nc_path, short_name, fix): return var_names -@pytest.mark.sequential def test_cl_hybrid_pressure_coord_fix_metadata_with_a(test_data_path): """Test ``fix_metadata`` for ``cl``.""" vardef = get_var_info('CMIP6', 'Amon', 'cl') @@ -94,7 +90,6 @@ def test_cl_hybrid_pressure_coord_fix_metadata_with_a(test_data_path): assert 'a_bnds' in var_names -@pytest.mark.sequential def test_cl_hybrid_pressure_coord_fix_metadata_with_ap(test_data_path): """Test ``fix_metadata`` for ``cl``.""" vardef = get_var_info('CMIP6', 'Amon', 'cl') @@ -162,255 +157,9 @@ def hybrid_height_coord_fix_metadata(nc_path, short_name, fix): assert air_pressure_coord.units == 'Pa' -@pytest.mark.sequential def test_cl_hybrid_height_coord_fix_metadata(test_data_path): """Test ``fix_metadata`` for ``cl``.""" vardef = get_var_info('CMIP6', 'Amon', 'cl') nc_path = test_data_path / 'common_cl_hybrid_height.nc' hybrid_height_coord_fix_metadata(nc_path, 'cl', ClFixHybridHeightCoord(vardef)) - - -@pytest.fixture -def siconc_cubes(): - """Sample cube.""" - time_coord = iris.coords.DimCoord([0.0], standard_name='time', - var_name='time', - units='days since 6543-2-1') - lat_coord = iris.coords.DimCoord([-30.0], standard_name='latitude', - var_name='lat', units='degrees_north') - lon_coord = iris.coords.DimCoord([30.0], standard_name='longitude', - var_name='lon', units='degrees_east') - coords_specs = [(time_coord, 0), (lat_coord, 1), (lon_coord, 2)] - cube = iris.cube.Cube([[[22.0]]], standard_name='sea_ice_area_fraction', - var_name='siconc', units='%', - dim_coords_and_dims=coords_specs) - return iris.cube.CubeList([cube]) - - -def test_siconc_fix_metadata(siconc_cubes): - """Test ``fix_metadata`` for ``siconc``.""" - assert len(siconc_cubes) == 1 - siconc_cube = siconc_cubes[0] - assert siconc_cube.var_name == "siconc" - - # Extract siconc cube - siconc_cube = siconc_cubes.extract_cube('sea_ice_area_fraction') - assert not siconc_cube.coords('typesi') - - # Apply fix - vardef = get_var_info('CMIP6', 'SImon', 'siconc') - fix = SiconcFixScalarCoord(vardef) - fixed_cubes = fix.fix_metadata(siconc_cubes) - assert len(fixed_cubes) == 1 - fixed_siconc_cube = fixed_cubes.extract_cube( - 'sea_ice_area_fraction') - fixed_typesi_coord = fixed_siconc_cube.coord('area_type') - assert fixed_typesi_coord.points is not None - assert fixed_typesi_coord.bounds is None - np.testing.assert_equal(fixed_typesi_coord.points, - ['sea_ice']) - np.testing.assert_equal(fixed_typesi_coord.units, - Unit('No unit')) - - -def get_tos_cubes(wrong_ij_names=False, ij_bounds=False): - """Cubes containing tos variable.""" - if wrong_ij_names: - j_var_name = 'lat' - j_long_name = 'latitude' - i_var_name = 'lon' - i_long_name = 'longitude' - else: - j_var_name = 'j' - j_long_name = 'cell index along second dimension' - i_var_name = 'i' - i_long_name = 'cell index along first dimension' - if ij_bounds: - j_bounds = [[10.0, 30.0], [30.0, 50.0]] - i_bounds = [[5.0, 15.0], [15.0, 25.0], [25.0, 35.0]] - else: - j_bounds = None - i_bounds = None - j_coord = iris.coords.DimCoord( - [20.0, 40.0], - bounds=j_bounds, - var_name=j_var_name, - long_name=j_long_name, - ) - i_coord = iris.coords.DimCoord( - [10.0, 20.0, 30.0], - bounds=i_bounds, - var_name=i_var_name, - long_name=i_long_name, - ) - lat_coord = iris.coords.AuxCoord( - [[-40.0, -20.0, 0.0], [-20.0, 0.0, 20.0]], - var_name='lat', - standard_name='latitude', - units='degrees_north', - ) - lon_coord = iris.coords.AuxCoord( - [[100.0, 140.0, 180.0], [80.0, 100.0, 120.0]], - var_name='lon', - standard_name='longitude', - units='degrees_east', - ) - time_coord = iris.coords.DimCoord( - 1.0, - bounds=[0.0, 2.0], - var_name='time', - standard_name='time', - long_name='time', - units='days since 1950-01-01', - ) - - # Create tos variable cube - cube = iris.cube.Cube( - np.full((1, 2, 3), 300.0), - var_name='tos', - long_name='sea_surface_temperature', - units='K', - dim_coords_and_dims=[(time_coord, 0), (j_coord, 1), (i_coord, 2)], - aux_coords_and_dims=[(lat_coord, (1, 2)), (lon_coord, (1, 2))], - ) - - # Create empty (dummy) cube - empty_cube = iris.cube.Cube(0.0) - return iris.cube.CubeList([cube, empty_cube]) - - -@pytest.fixture -def tos_cubes_wrong_ij_names(): - """Cubes with wrong ij names.""" - return get_tos_cubes(wrong_ij_names=True, ij_bounds=True) - - -def test_ocean_fix_grid_wrong_ij_names(tos_cubes_wrong_ij_names): - """Test ``fix_metadata`` with cubes with wrong ij names.""" - cube_in = tos_cubes_wrong_ij_names.extract_cube('sea_surface_temperature') - assert len(cube_in.coords('latitude')) == 2 - assert len(cube_in.coords('longitude')) == 2 - assert cube_in.coord('latitude', dimensions=1).bounds is not None - assert cube_in.coord('longitude', dimensions=2).bounds is not None - assert cube_in.coord('latitude', dimensions=(1, 2)).bounds is None - assert cube_in.coord('longitude', dimensions=(1, 2)).bounds is None - - # Apply fix - vardef = get_var_info('CMIP6', 'Omon', 'tos') - fix = OceanFixGrid(vardef) - fixed_cubes = fix.fix_metadata(tos_cubes_wrong_ij_names) - assert len(fixed_cubes) == 1 - fixed_cube = fixed_cubes.extract_cube('sea_surface_temperature') - assert fixed_cube is cube_in - - # Check ij names - i_coord = fixed_cube.coord('cell index along first dimension') - j_coord = fixed_cube.coord('cell index along second dimension') - assert i_coord.var_name == 'i' - assert i_coord.standard_name is None - assert i_coord.long_name == 'cell index along first dimension' - assert i_coord.units == '1' - assert i_coord.circular is False - assert j_coord.var_name == 'j' - assert j_coord.standard_name is None - assert j_coord.long_name == 'cell index along second dimension' - assert j_coord.units == '1' - - # Check ij points and bounds - np.testing.assert_allclose(i_coord.points, [0, 1, 2]) - np.testing.assert_allclose(i_coord.bounds, - [[-0.5, 0.5], [0.5, 1.5], [1.5, 2.5]]) - np.testing.assert_allclose(j_coord.points, [0, 1]) - np.testing.assert_allclose(j_coord.bounds, [[-0.5, 0.5], [0.5, 1.5]]) - - # Check bounds of latitude and longitude - assert len(fixed_cube.coords('latitude')) == 1 - assert len(fixed_cube.coords('longitude')) == 1 - assert fixed_cube.coord('latitude').bounds is not None - assert fixed_cube.coord('longitude').bounds is not None - latitude_bounds = np.array([[[-40, -33.75, -23.75, -30.0], - [-33.75, -6.25, 3.75, -23.75], - [-6.25, -1.02418074021670e-14, 10.0, 3.75]], - [[-30.0, -23.75, -13.75, -20.0], - [-23.75, 3.75, 13.75, -13.75], - [3.75, 10.0, 20.0, 13.75]]]) - np.testing.assert_allclose(fixed_cube.coord('latitude').bounds, - latitude_bounds) - longitude_bounds = np.array([[[140.625, 99.375, 99.375, 140.625], - [99.375, 140.625, 140.625, 99.375], - [140.625, 99.375, 99.375, 140.625]], - [[140.625, 99.375, 99.375, 140.625], - [99.375, 140.625, 140.625, 99.375], - [140.625, 99.375, 99.375, 140.625]]]) - np.testing.assert_allclose(fixed_cube.coord('longitude').bounds, - longitude_bounds) - - -@pytest.fixture -def tos_cubes_no_ij_bounds(): - """Cubes with no ij bounds.""" - return get_tos_cubes(wrong_ij_names=False, ij_bounds=False) - - -def test_ocean_fix_grid_no_ij_bounds(tos_cubes_no_ij_bounds): - """Test ``fix_metadata`` with cubes with no ij bounds.""" - cube_in = tos_cubes_no_ij_bounds.extract_cube('sea_surface_temperature') - assert len(cube_in.coords('latitude')) == 1 - assert len(cube_in.coords('longitude')) == 1 - assert cube_in.coord('latitude').bounds is None - assert cube_in.coord('longitude').bounds is None - assert cube_in.coord('cell index along first dimension').var_name == 'i' - assert cube_in.coord('cell index along second dimension').var_name == 'j' - assert cube_in.coord('cell index along first dimension').bounds is None - assert cube_in.coord('cell index along second dimension').bounds is None - - # Apply fix - vardef = get_var_info('CMIP6', 'Omon', 'tos') - fix = OceanFixGrid(vardef) - fixed_cubes = fix.fix_metadata(tos_cubes_no_ij_bounds) - assert len(fixed_cubes) == 1 - fixed_cube = fixed_cubes.extract_cube('sea_surface_temperature') - assert fixed_cube is cube_in - - # Check ij names - i_coord = fixed_cube.coord('cell index along first dimension') - j_coord = fixed_cube.coord('cell index along second dimension') - assert i_coord.var_name == 'i' - assert i_coord.standard_name is None - assert i_coord.long_name == 'cell index along first dimension' - assert i_coord.units == '1' - assert i_coord.circular is False - assert j_coord.var_name == 'j' - assert j_coord.standard_name is None - assert j_coord.long_name == 'cell index along second dimension' - assert j_coord.units == '1' - - # Check ij points and bounds - np.testing.assert_allclose(i_coord.points, [0, 1, 2]) - np.testing.assert_allclose(i_coord.bounds, - [[-0.5, 0.5], [0.5, 1.5], [1.5, 2.5]]) - np.testing.assert_allclose(j_coord.points, [0, 1]) - np.testing.assert_allclose(j_coord.bounds, [[-0.5, 0.5], [0.5, 1.5]]) - - # Check bounds of latitude and longitude - assert len(fixed_cube.coords('latitude')) == 1 - assert len(fixed_cube.coords('longitude')) == 1 - assert fixed_cube.coord('latitude').bounds is not None - assert fixed_cube.coord('longitude').bounds is not None - latitude_bounds = np.array([[[-40, -33.75, -23.75, -30.0], - [-33.75, -6.25, 3.75, -23.75], - [-6.25, -1.02418074021670e-14, 10.0, 3.75]], - [[-30.0, -23.75, -13.75, -20.0], - [-23.75, 3.75, 13.75, -13.75], - [3.75, 10.0, 20.0, 13.75]]]) - np.testing.assert_allclose(fixed_cube.coord('latitude').bounds, - latitude_bounds) - longitude_bounds = np.array([[[140.625, 99.375, 99.375, 140.625], - [99.375, 140.625, 140.625, 99.375], - [140.625, 99.375, 99.375, 140.625]], - [[140.625, 99.375, 99.375, 140.625], - [99.375, 140.625, 140.625, 99.375], - [140.625, 99.375, 99.375, 140.625]]]) - np.testing.assert_allclose(fixed_cube.coord('longitude').bounds, - longitude_bounds) diff --git a/tests/integration/cmor/_fixes/test_fix.py b/tests/integration/cmor/_fixes/test_fix.py index 5ffc4ca868..0199911504 100644 --- a/tests/integration/cmor/_fixes/test_fix.py +++ b/tests/integration/cmor/_fixes/test_fix.py @@ -1,5 +1,3 @@ -"""Integration tests for fixes.""" - import os import shutil import tempfile @@ -8,35 +6,35 @@ import pytest from iris.cube import Cube -from esmvalcore.cmor._fixes.cmip5.bnu_esm import Ch4 -from esmvalcore.cmor._fixes.cmip5.canesm2 import FgCo2 -from esmvalcore.cmor._fixes.cmip5.cesm1_bgc import Gpp -from esmvalcore.cmor._fixes.cmip6.cesm2 import Omon, Tos from esmvalcore.cmor.fix import Fix class TestFix(unittest.TestCase): def setUp(self): - """Set up temp folder.""" + """Set up temp folder""" self.temp_folder = tempfile.mkdtemp() def tearDown(self): - """Remove temp folder.""" + """Remove temp folder""" shutil.rmtree(self.temp_folder) def test_get_fix(self): + from esmvalcore.cmor._fixes.cmip5.canesm2 import FgCo2 self.assertListEqual( Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgco2'), [FgCo2(None)]) def test_get_fix_case_insensitive(self): + from esmvalcore.cmor._fixes.cmip5.canesm2 import FgCo2 self.assertListEqual( Fix.get_fixes('CMIP5', 'CanESM2', 'Amon', 'fgCo2'), [FgCo2(None)]) def test_get_fixes_with_replace(self): + from esmvalcore.cmor._fixes.cmip5.bnu_esm import Ch4 self.assertListEqual(Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'ch4'), [Ch4(None)]) def test_get_fixes_with_generic(self): + from esmvalcore.cmor._fixes.cmip5.cesm1_bgc import Gpp self.assertListEqual( Fix.get_fixes('CMIP5', 'CESM1-BGC', 'Amon', 'gpp'), [Gpp(None)]) @@ -52,19 +50,6 @@ def test_get_fix_no_var(self): self.assertListEqual( Fix.get_fixes('CMIP5', 'BNU-ESM', 'Amon', 'BAD_VAR'), []) - def test_get_fix_only_mip(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'thetao'), [Omon(None)]) - - def test_get_fix_only_mip_case_insensitive(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'omOn', 'thetao'), [Omon(None)]) - - def test_get_fix_mip_and_var(self): - self.assertListEqual( - Fix.get_fixes('CMIP6', 'CESM2', 'Omon', 'tos'), - [Tos(None), Omon(None)]) - def test_fix_metadata(self): cube = Cube([0]) reference = Cube([0]) diff --git a/tests/integration/cmor/_fixes/test_shared.py b/tests/integration/cmor/_fixes/test_shared.py index 69e9a0f092..7930d4e90e 100644 --- a/tests/integration/cmor/_fixes/test_shared.py +++ b/tests/integration/cmor/_fixes/test_shared.py @@ -12,11 +12,9 @@ add_scalar_height_coord, add_scalar_typeland_coord, add_scalar_typesea_coord, - add_scalar_typesi_coord, add_sigma_factory, cube_to_aux_coord, fix_bounds, - fix_ocean_depth_coord, get_altitude_to_pressure_func, get_bounds_cube, get_pressure_to_altitude_func, @@ -25,7 +23,6 @@ from esmvalcore.iris_helpers import var_name_constraint -@pytest.mark.sequential def test_altitude_to_pressure_func(): """Test altitude to pressure function.""" func = get_altitude_to_pressure_func() @@ -40,7 +37,6 @@ def test_altitude_to_pressure_func(): [101325.0, 100129.0]) -@pytest.mark.sequential def test_pressure_to_altitude_func(): """Test pressure to altitude function.""" func = get_pressure_to_altitude_func() @@ -69,7 +65,6 @@ def test_pressure_to_altitude_func(): ] -@pytest.mark.sequential @pytest.mark.parametrize('coord_dict,output', TEST_ADD_AUX_COORDS_FROM_CUBES) def test_add_aux_coords_from_cubes(coord_dict, output): """Test extraction of auxiliary coordinates from cubes.""" @@ -135,7 +130,6 @@ def test_add_aux_coords_from_cubes(coord_dict, output): ] -@pytest.mark.sequential @pytest.mark.parametrize('cube,output', TEST_ADD_PLEV_FROM_ALTITUDE) def test_add_plev_from_altitude(cube, output): """Test adding of pressure level coordinate.""" @@ -173,7 +167,6 @@ def test_add_plev_from_altitude(cube, output): ] -@pytest.mark.sequential @pytest.mark.parametrize('cube,output', TEST_ADD_ALTITUDE_FROM_PLEV) def test_add_altitude_from_plev(cube, output): """Test adding of altitude coordinate.""" @@ -218,7 +211,6 @@ def test_add_altitude_from_plev(cube, output): ] -@pytest.mark.sequential @pytest.mark.parametrize('cube_in,depth', TEST_ADD_SCALAR_COORD) def test_add_scalar_depth_coord(cube_in, depth): """Test adding of scalar depth coordinate.""" @@ -246,7 +238,6 @@ def test_add_scalar_depth_coord(cube_in, depth): assert coord == depth_coord -@pytest.mark.sequential @pytest.mark.parametrize('cube_in,height', TEST_ADD_SCALAR_COORD) def test_add_scalar_height_coord(cube_in, height): """Test adding of scalar height coordinate.""" @@ -274,7 +265,6 @@ def test_add_scalar_height_coord(cube_in, height): assert coord == height_coord -@pytest.mark.sequential @pytest.mark.parametrize('cube_in,typeland', TEST_ADD_SCALAR_COORD) def test_add_scalar_typeland_coord(cube_in, typeland): """Test adding of scalar typeland coordinate.""" @@ -301,7 +291,6 @@ def test_add_scalar_typeland_coord(cube_in, typeland): assert coord == typeland_coord -@pytest.mark.sequential @pytest.mark.parametrize('cube_in,typesea', TEST_ADD_SCALAR_COORD) def test_add_scalar_typesea_coord(cube_in, typesea): """Test adding of scalar typesea coordinate.""" @@ -328,33 +317,6 @@ def test_add_scalar_typesea_coord(cube_in, typesea): assert coord == typesea_coord -@pytest.mark.sequential -@pytest.mark.parametrize('cube_in,typesi', TEST_ADD_SCALAR_COORD) -def test_add_scalar_typesi_coord(cube_in, typesi): - """Test adding of scalar typesi coordinate.""" - cube_in = cube_in.copy() - if typesi is None: - typesi = 'sea_ice' - typesi_coord = iris.coords.AuxCoord(typesi, - var_name='type', - standard_name='area_type', - long_name='Sea Ice area type', - units=Unit('no unit')) - with pytest.raises(iris.exceptions.CoordinateNotFoundError): - cube_in.coord('area_type') - if typesi == 'sea_ice': - cube_out = add_scalar_typesi_coord(cube_in) - else: - cube_out = add_scalar_typesi_coord(cube_in, typesi) - assert cube_out is cube_in - coord = cube_in.coord('area_type') - assert coord == typesi_coord - cube_out_2 = add_scalar_typesi_coord(cube_out) - assert cube_out_2 is cube_out - coord = cube_in.coord('area_type') - assert coord == typesi_coord - - PS_COORD = iris.coords.AuxCoord([[[101000.0]]], var_name='ps', units='Pa') PTOP_COORD = iris.coords.AuxCoord(1000.0, var_name='ptop', units='Pa') LEV_COORD = iris.coords.AuxCoord([0.5], bounds=[[0.2, 0.8]], var_name='lev', @@ -375,7 +337,6 @@ def test_add_scalar_typesi_coord(cube_in, typesi): ] -@pytest.mark.sequential @pytest.mark.parametrize('cube,output', TEST_ADD_SIGMA_FACTORY) def test_add_sigma_factory(cube, output): """Test adding of factory for ``atmosphere_sigma_coordinate``.""" @@ -392,7 +353,6 @@ def test_add_sigma_factory(cube, output): assert air_pressure_coord == output -@pytest.mark.sequential def test_cube_to_aux_coord(): """Test converting cube to auxiliary coordinate.""" cube = iris.cube.Cube( @@ -420,7 +380,6 @@ def test_cube_to_aux_coord(): ] -@pytest.mark.sequential @pytest.mark.parametrize('coord_name,output', TEST_GET_BOUNDS_CUBE) def test_get_bounds_cube(coord_name, output): """Test retrieving of bounds cube from list of cubes.""" @@ -458,7 +417,6 @@ def test_get_bounds_cube(coord_name, output): ] -@pytest.mark.sequential @pytest.mark.parametrize('var_names,output', TEST_FIX_BOUNDS) def test_fix_bounds(var_names, output): """Test retrieving of bounds cube from list of cubes.""" @@ -497,7 +455,6 @@ def test_fix_bounds(var_names, output): ] -@pytest.mark.sequential @pytest.mark.parametrize('cubes_in,decimals,out', TEST_ROUND) def test_round_coordinate(cubes_in, decimals, out): """Test rounding of coordinates.""" @@ -512,7 +469,6 @@ def test_round_coordinate(cubes_in, decimals, out): assert coords[0] == out[idx] -@pytest.mark.sequential def test_round_coordinates_single_coord(): """Test rounding of specified coordinate.""" coords, bounds = [10.0001], [[9.0001, 11.0001]] @@ -529,18 +485,3 @@ def test_round_coordinates_single_coord(): assert cubes[0].coord('longitude') is out[0].coord('longitude') np.testing.assert_allclose(out[0].coord('latitude').points, [10]) np.testing.assert_allclose(out[0].coord('latitude').bounds, [[9, 11]]) - - -def test_fix_ocean_depth_coord(): - """Test `fix_ocean_depth_coord`.""" - z_coord = iris.coords.DimCoord(0.0, var_name='alt', - attributes={'positive': 'up'}) - cube = iris.cube.Cube([0.0], var_name='x', - dim_coords_and_dims=[(z_coord, 0)]) - fix_ocean_depth_coord(cube) - depth_coord = cube.coord('depth') - assert depth_coord.standard_name == 'depth' - assert depth_coord.var_name == 'lev' - assert depth_coord.units == 'm' - assert depth_coord.long_name == 'ocean depth coordinate' - assert depth_coord.attributes == {'positive': 'down'} diff --git a/tests/integration/cmor/test_table.py b/tests/integration/cmor/test_table.py index 568dd6aaba..0580fad4dc 100644 --- a/tests/integration/cmor/test_table.py +++ b/tests/integration/cmor/test_table.py @@ -381,12 +381,3 @@ def test_get_variable_tasconf95(self): self.assertEqual(var.long_name, 'Near-Surface Air Temperature Uncertainty Range') self.assertEqual(var.units, 'K') - - def test_get_variable_tasaga(self): - """Get tas variable.""" - CustomInfo() - var = self.variables_info.get_variable('Amon', 'tasaga') - self.assertEqual(var.short_name, 'tasaga') - self.assertEqual(var.long_name, - 'Global-mean Near-Surface Air Temperature Anomaly') - self.assertEqual(var.units, 'K') diff --git a/tests/integration/data_finder.yml b/tests/integration/data_finder.yml index 9ce5ea7ce4..1677e6ff32 100644 --- a/tests/integration/data_finder.yml +++ b/tests/integration/data_finder.yml @@ -249,438 +249,82 @@ get_input_filelist: found_files: - historical/Amon/ta/HadGEM2-ES/r1i1p1/ta_Amon_HadGEM2-ES_historical_r1i1p1_198412-200511.nc - # Test other projects - - - drs: DKRZ - variable: - variable_group: test - short_name: ta - original_short_name: ta - dataset: HadGEM3-GC31-LL - activity: CMIP - project: CMIP6 - cmor_table: CMIP6 - institute: [MOHC, NERC] - frequency: mon - modeling_realm: [atmos] - mip: Amon - exp: historical - grid: gn - ensemble: r1i1p1f1 - start_year: 1999 - end_year: 2000 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/tas/gn/v20200101/tas_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_190001-194912.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_190001-194912.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_195001-199912.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_200001-201412.nc - dirs: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ - file_patterns: - - ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc - found_files: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_195001-199912.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Amon/ta/gn/v20200101/ta_Amon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_200001-201412.nc - - - drs: DKRZ - variable: - variable_group: test - short_name: ta - original_short_name: ta - dataset: HADGEM1 - project: CMIP3 - cmor_table: CMIP3 - institute: [UKMO] - frequency: mon - modeling_realm: [atmos] - mip: Amon - exp: historical - ensemble: r1i1p1 - start_year: 1999 - end_year: 2000 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - historical/atmos/mon/tas/HADGEM1/r1i1p1/tas_HADGEM1_190001-194912.nc - - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_190001-194912.nc - - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_195001-199912.nc - - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_200001-200112.nc - dirs: - - historical/atmos/mon/ta/HADGEM1/r1i1p1 - file_patterns: - - ta_*.nc - found_files: - - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_195001-199912.nc - - historical/atmos/mon/ta/HADGEM1/r1i1p1/ta_HADGEM1_200001-200112.nc - - - drs: default - variable: - variable_group: test - short_name: tas - original_short_name: tas - dataset: ERA-Interim - project: OBS - cmor_table: CMIP5 - frequency: mon - mip: Amon - tier: 3 - type: reanaly - version: 42 - start_year: 1999 - end_year: 2000 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - Tier2/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc - - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_pr_197901-199912.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc - - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc - - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_200001-201012.nc - dirs: - - Tier3/ERA-Interim - file_patterns: - - OBS_ERA-Interim_reanaly_42_Amon_tas[_.]*nc - found_files: - - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc - - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_200001-201012.nc - - - drs: default - variable: - variable_group: test - short_name: tas - original_short_name: tas - dataset: ERA-Interim - project: OBS6 - cmor_table: CMIP6 - frequency: mon - mip: Amon - tier: 3 - type: reanaly - version: 42 - start_year: 1999 - end_year: 2000 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - Tier2/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_pr_197901-199912.nc - - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_200001-201012.nc - dirs: - - Tier3/ERA-Interim - file_patterns: - - OBS6_ERA-Interim_reanaly_42_Amon_tas[_.]*nc - found_files: - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_197901-199912.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Amon_tas_200001-201012.nc - - # Test fx files +get_input_fx_filelist: - drs: default variable: - variable_group: test - short_name: areacella - original_short_name: areacella - dataset: HadGEM2-ES - project: CMIP5 - cmor_table: CMIP5 - institute: [INPE, MOHC] - frequency: fx - modeling_realm: [atmos] - mip: fx - exp: historical - ensemble: r1i1p1 - diagnostic: test_diag - preprocessor: test_preproc + <<: *variable + fx_files: + - areacella + - areacello + - basin + - deptho + - orog + - sftlf + - sftof + - thkcello + - volcello available_files: - - areacella_fx_HadGEM2-ES_historical_r1i1p1.nc + - sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - areacella_fx_HadGEM2-ES_historical_r0i0p0.nc - dirs: - - '' - file_patterns: - - areacella_fx_HadGEM2-ES_historical_r0i0p0*.nc + - areacello_fx_HadGEM2-ES_historical_r0i0p0.nc + - basin_fx_HadGEM2-ES_historical_r0i0p0.nc + - deptho_fx_HadGEM2-ES_historical_r0i0p0.nc + - orog_fx_HadGEM2-ES_historical_r0i0p0.nc + - sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc + - sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + - thkcello_fx_HadGEM2-ES_historical_r0i0p0.nc + - volcello_fx_HadGEM2-ES_historical_r0i0p0.nc found_files: - - areacella_fx_HadGEM2-ES_historical_r0i0p0.nc - - - drs: DKRZ - variable: - variable_group: test - short_name: sftlf - original_short_name: sftlf - dataset: HadGEM2-ES - project: CMIP5 - cmor_table: CMIP5 - institute: [INPE, MOHC] - frequency: fx - modeling_realm: [atmos] - mip: fx - exp: historical - ensemble: r1i1p1 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r1i1p1.nc - - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc - dirs: - - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf - file_patterns: - - sftlf_fx_HadGEM2-ES_historical_r0i0p0*.nc - found_files: - - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r0i0p0/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc - - - drs: DKRZ - variable: - variable_group: test - short_name: orog - original_short_name: orog - dataset: HadGEM2-ES - project: CMIP5 - cmor_table: CMIP5 - institute: [INPE, MOHC] - frequency: fx - modeling_realm: [atmos] - mip: fx - exp: historical - ensemble: r1i1p1 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/sftlf/sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc - - MOHC/HadGEM2-ES/historical/fx/atmos/fx/r1i1p1/v20110330/areacella/areacella_fx_HadGEM2-ES_historical_r0i0p0.nc - dirs: [] - file_patterns: - - orog_fx_HadGEM2-ES_historical_r0i0p0*.nc - found_files: [] + areacella: areacella_fx_HadGEM2-ES_historical_r0i0p0.nc + areacello: areacello_fx_HadGEM2-ES_historical_r0i0p0.nc + basin: basin_fx_HadGEM2-ES_historical_r0i0p0.nc + deptho: deptho_fx_HadGEM2-ES_historical_r0i0p0.nc + orog: orog_fx_HadGEM2-ES_historical_r0i0p0.nc + sftlf: sftlf_fx_HadGEM2-ES_historical_r0i0p0.nc + sftof: sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + thkcello: thkcello_fx_HadGEM2-ES_historical_r0i0p0.nc + volcello: volcello_fx_HadGEM2-ES_historical_r0i0p0.nc - - drs: DKRZ + - drs: default variable: - variable_group: test - short_name: areacello - original_short_name: areacello - dataset: HadGEM3-GC31-LL - activity: CMIP - project: CMIP6 - cmor_table: CMIP6 - institute: [MOHC, NERC] - frequency: fx - modeling_realm: [ocean] - mip: Ofx - exp: historical - grid: gn - ensemble: r1i1p1f1 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f2/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f2_gn.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/areacello_Omon-GC31-LL_historical_r1i1p1f1_gn_199901-200012.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc - dirs: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/ - file_patterns: - - areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc + <<: *variable + fx_files: + - sftof found_files: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc + sftof: null - - drs: DKRZ + - drs: BADC variable: - variable_group: test - short_name: areacello - original_short_name: areacello - dataset: HadGEM3-GC31-LL - activity: CMIP - project: CMIP6 - cmor_table: CMIP6 - institute: [MOHC, NERC] - frequency: mon - modeling_realm: [ocean] - mip: Omon - exp: historical - grid: gn - ensemble: r1i1p1f1 - start_year: 2000 - end_year: 2000 - diagnostic: test_diag - preprocessor: test_preproc + <<: *variable + fx_files: + - sftof available_files: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f2/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f2_gn.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/areacello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_199901-200012.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/areacello/gn/v20200101/areacello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc - dirs: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/ - file_patterns: - - areacello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc + - MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20120215/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + - MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20130612/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + available_symlinks: + - link_name: MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/latest + target: v20130612 found_files: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/areacello/gn/v20200101/areacello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn_199901-200012.nc - - - drs: DKRZ - variable: - variable_group: test - short_name: volcello - original_short_name: volcello - dataset: HadGEM3-GC31-LL - activity: CMIP - project: CMIP6 - cmor_table: CMIP6 - institute: [MOHC, NERC] - frequency: mon - modeling_realm: [ocean] - mip: Omon - exp: historical - grid: gn - ensemble: r1i1p1f1 - start_year: 2000 - end_year: 2000 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f2/Ofx/volcello/gn/v20200101/volcello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f2_gn.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/volcello/gn/v20200101/this_is_a_wrong_file.nc - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Ofx/volcello/gn/v20200101/volcello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn.nc - dirs: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r1i1p1f1/Omon/volcello/gn/v20200101/ - file_patterns: - - volcello_Omon_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc - found_files: [] - - - drs: DKRZ - variable: - variable_group: test - short_name: volcello - original_short_name: volcello - dataset: HadGEM3-GC31-LL - activity: CMIP - project: CMIP6 - cmor_table: CMIP6 - institute: [MOHC, NERC] - frequency: fx - modeling_realm: [ocean] - mip: Ofx - exp: historical - grid: gn - ensemble: r1i1p1f1 - start_year: 2000 - end_year: 2000 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - CMIP/MOHC/HadGEM3-GC31-LL/historical/r0i0p0/Ofx/volcello/gn/v20200101/volcello_Ofx_HadGEM3-GC31-LL_historical_r0i0p0_gn.nc - dirs: [] - file_patterns: - - volcello_Ofx_HadGEM3-GC31-LL_historical_r1i1p1f1_gn*.nc - found_files: [] + sftof: MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/latest/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - drs: DKRZ variable: - variable_group: test - short_name: areacella - original_short_name: areacella - dataset: HADGEM1 - project: CMIP3 - cmor_table: CMIP3 - institute: [UKMO] - frequency: fx - modeling_realm: [atmos] - mip: fx - exp: historical - ensemble: r1i1p1 - start_year: 1999 - end_year: 2000 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - historical/atmos/fx/areacella/HADGEM1/r0i0p0/areacella_HADGEM1.nc - - historical/atmos/fx/areacella/HADGEM1/r1i1p1/areacella_HADGEM1.nc - dirs: - - historical/atmos/fx/areacella/HADGEM1/r1i1p1 - file_patterns: - - areacella_*.nc - found_files: - - historical/atmos/fx/areacella/HADGEM1/r1i1p1/areacella_HADGEM1.nc - - - drs: default - variable: - variable_group: test - short_name: basin - original_short_name: basin - dataset: ERA-Interim - project: OBS - cmor_table: CMIP5 - frequency: fx - mip: fx - tier: 3 - type: reanaly - version: 42 - diagnostic: test_diag - preprocessor: test_preproc + <<: *variable + fx_files: + - sftof available_files: - - Tier2/ERA-Interim/OBS_ERA-Interim_reanaly_42_fx_areacello.nc - - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_fx_basin.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_fx_basin.nc - dirs: - - Tier3/ERA-Interim - file_patterns: - - OBS_ERA-Interim_reanaly_42_fx_basin[_.]*nc + - MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20120215/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc + - MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20130612/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc found_files: - - Tier3/ERA-Interim/OBS_ERA-Interim_reanaly_42_fx_basin.nc + sftof: MOHC/HadGEM2-ES/historical/fx/ocean/fx/r0i0p0/v20130612/sftof/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc - - drs: default + - drs: ETHZ variable: - variable_group: test - short_name: deptho - original_short_name: deptho - dataset: ERA-Interim - project: OBS6 - cmor_table: CMIP6 - frequency: mon - mip: Omon - tier: 3 - type: reanaly - version: 42 - start_year: 1995 - end_year: 1996 - diagnostic: test_diag - preprocessor: test_preproc + <<: *variable + fx_files: + - sftof available_files: - - Tier2/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_areacello.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_basin.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_deptho.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199001-199912.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199901-200012.nc - dirs: - - Tier3/ERA-Interim - file_patterns: - - OBS6_ERA-Interim_reanaly_42_Omon_deptho[_.]*nc + - historical/fx/sftof/HadGEM2-ES/r0i0p0/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc found_files: - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199001-199912.nc - - - drs: default - variable: - variable_group: test - short_name: deptho - original_short_name: deptho - dataset: ERA-Interim - project: OBS6 - cmor_table: CMIP6 - frequency: mon - mip: Omon - tier: 3 - type: reanaly - version: 42 - start_year: 2050 - end_year: 2100 - diagnostic: test_diag - preprocessor: test_preproc - available_files: - - Tier2/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_areacello.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_basin.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Ofx_deptho.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199001-199912.nc - - Tier3/ERA-Interim/OBS6_ERA-Interim_reanaly_42_Omon_deptho_199901-200012.nc - dirs: - - Tier3/ERA-Interim - file_patterns: - - OBS6_ERA-Interim_reanaly_42_Omon_deptho[_.]*nc - found_files: [] + sftof: historical/fx/sftof/HadGEM2-ES/r0i0p0/sftof_fx_HadGEM2-ES_historical_r0i0p0.nc diff --git a/tests/integration/preprocessor/_ancillary_vars/__init__.py b/tests/integration/preprocessor/_ancillary_vars/__init__.py deleted file mode 100644 index 88b606fc48..0000000000 --- a/tests/integration/preprocessor/_ancillary_vars/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Test _ancillary_vars.py - -Integration tests for the esmvalcore.preprocessor._ancillary_vars module -""" diff --git a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py b/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py deleted file mode 100644 index 7afd2b5be1..0000000000 --- a/tests/integration/preprocessor/_ancillary_vars/test_add_fx_variables.py +++ /dev/null @@ -1,257 +0,0 @@ -""" -Test add_fx_variables. - -Integration tests for the -:func:`esmvalcore.preprocessor._ancillary_vars` module. - -""" -import logging -import iris -import numpy as np -import pytest - -from esmvalcore.cmor.check import CheckLevels -from esmvalcore.preprocessor._ancillary_vars import (_is_fx_broadcastable, - add_fx_variables, - add_ancillary_variable, - add_cell_measure, - remove_fx_variables) - -logger = logging.getLogger(__name__) - -SHAPES_TO_BROADCAST = [ - ((), (1, ), True), - ((), (10, 10), True), - ((1, ), (10, ), True), - ((1, ), (10, 10), True), - ((2, ), (10, ), False), - ((10, ), (), False), - ((10, ), (1, ), False), - ((10, ), (10, ), True), - ((10, ), (10, 10), True), - ((10, ), (7, 1), False), - ((10, ), (10, 7), False), - ((10, ), (7, 1, 10), True), - ((10, ), (7, 1, 1), False), - ((10, ), (7, 1, 7), False), - ((10, ), (7, 10, 7), False), - ((10, 1), (1, 1), False), - ((10, 1), (1, 100), False), - ((10, 1), (10, 7), True), - ((10, 12), (10, 1), False), - ((10, 1), (10, 12), True), - ((10, 12), (), False), - ((), (10, 12), True), - ((10, 12), (1, ), False), - ((1, ), (10, 12), True), - ((10, 12), (12, ), False), - ((10, 12), (1, 1), False), - ((1, 1), (10, 12), True), - ((10, 12), (1, 12), False), - ((1, 12), (10, 12), True), - ((10, 12), (10, 10, 1), False), - ((10, 12), (10, 12, 1), False), - ((10, 12), (10, 12, 12), False), - ((10, 12), (10, 10, 12), True)] - - -@pytest.mark.parametrize('shape_1,shape_2,out', SHAPES_TO_BROADCAST) -def test_shape_is_broadcastable(shape_1, shape_2, out): - """Test check if two shapes are broadcastable.""" - fx_cube = iris.cube.Cube(np.ones(shape_1)) - cube = iris.cube.Cube(np.ones(shape_2)) - is_broadcastable = _is_fx_broadcastable(fx_cube, cube) - assert is_broadcastable == out - - -class Test: - """Test class.""" - @pytest.fixture(autouse=True) - def setUp(self): - """Assemble a stock cube.""" - fx_area_data = np.ones((3, 3)) - fx_volume_data = np.ones((3, 3, 3)) - self.new_cube_data = np.empty((3, 3)) - self.new_cube_data[:] = 200. - self.new_cube_3D_data = np.empty((3, 3, 3)) - self.new_cube_3D_data[:] = 200. - crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) - self.lons = iris.coords.DimCoord([0, 1.5, 3], - standard_name='longitude', - bounds=[[0, 1], [1, 2], [2, 3]], - units='degrees_east', - coord_system=crd_sys) - self.lats = iris.coords.DimCoord([0, 1.5, 3], - standard_name='latitude', - bounds=[[0, 1], [1, 2], [2, 3]], - units='degrees_north', - coord_system=crd_sys) - self.depth = iris.coords.DimCoord([0, 1.5, 3], - standard_name='depth', - bounds=[[0, 1], [1, 2], [2, 3]], - units='m', - long_name='ocean depth coordinate') - self.monthly_times = iris.coords.DimCoord( - [15.5, 45, 74.5, 105, 135.5, 166, - 196.5, 227.5, 258, 288.5, 319, 349.5], - standard_name='time', - bounds=[[0, 31], [31, 59], [59, 90], - [90, 120], [120, 151], [151, 181], - [181, 212], [212, 243], [243, 273], - [273, 304], [304, 334], [334, 365]], - units='days since 1950-01-01 00:00:00') - self.yearly_times = iris.coords.DimCoord( - [182.5, 547.5], - standard_name='time', - bounds=[[0, 365], [365, 730]], - units='days since 1950-01-01 00:00') - self.coords_spec = [(self.lats, 0), (self.lons, 1)] - self.fx_area = iris.cube.Cube(fx_area_data, - dim_coords_and_dims=self.coords_spec) - self.fx_volume = iris.cube.Cube(fx_volume_data, - dim_coords_and_dims=[ - (self.depth, 0), - (self.lats, 1), - (self.lons, 2) - ]) - - def test_add_cell_measure_area(self, tmp_path): - """Test add area fx variables as cell measures.""" - fx_vars = { - 'areacella': { - 'short_name': 'areacella', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'fx', - 'frequency': 'fx'}, - 'areacello': { - 'short_name': 'areacello', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'Ofx', - 'frequency': 'fx' - } - } - for fx_var in fx_vars: - self.fx_area.var_name = fx_var - self.fx_area.standard_name = 'cell_area' - self.fx_area.units = 'm2' - fx_file = str(tmp_path / f'{fx_var}.nc') - fx_vars[fx_var].update({'filename': fx_file}) - iris.save(self.fx_area, fx_file) - cube = iris.cube.Cube(self.new_cube_data, - dim_coords_and_dims=self.coords_spec) - cube = add_fx_variables( - cube, {fx_var: fx_vars[fx_var]}, CheckLevels.IGNORE) - assert cube.cell_measure(self.fx_area.standard_name) is not None - - def test_add_cell_measure_volume(self, tmp_path): - """Test add volume as cell measure.""" - fx_vars = { - 'volcello': { - 'short_name': 'volcello', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'Ofx', - 'frequency': 'fx'} - } - self.fx_volume.var_name = 'volcello' - self.fx_volume.standard_name = 'ocean_volume' - self.fx_volume.units = 'm3' - fx_file = str(tmp_path / 'volcello.nc') - iris.save(self.fx_volume, fx_file) - fx_vars['volcello'].update({'filename': fx_file}) - cube = iris.cube.Cube(self.new_cube_3D_data, - dim_coords_and_dims=[ - (self.depth, 0), - (self.lats, 1), - (self.lons, 2)]) - cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) - assert cube.cell_measure(self.fx_volume.standard_name) is not None - - def test_no_cell_measure(self): - """Test no cell measure is added.""" - cube = iris.cube.Cube(self.new_cube_3D_data, - dim_coords_and_dims=[ - (self.depth, 0), - (self.lats, 1), - (self.lons, 2)]) - cube = add_fx_variables(cube, {'areacello': None}, CheckLevels.IGNORE) - assert cube.cell_measures() == [] - - def test_add_ancillary_vars(self, tmp_path): - """Test invalid variable is not added as cell measure.""" - self.fx_area.var_name = 'sftlf' - self.fx_area.standard_name = "land_area_fraction" - self.fx_area.units = '%' - fx_file = str(tmp_path / f'{self.fx_area.var_name}.nc') - iris.save(self.fx_area, fx_file) - fx_vars = { - 'sftlf': { - 'short_name': 'sftlf', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'fx', - 'frequency': 'fx', - 'filename': fx_file} - } - cube = iris.cube.Cube(self.new_cube_data, - dim_coords_and_dims=self.coords_spec) - cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) - assert cube.ancillary_variable(self.fx_area.standard_name) is not None - - def test_wrong_shape(self, tmp_path): - """ - Test fx_variable is not added if it's not broadcastable to cube. - """ - volume_data = np.ones((2, 3, 3, 3)) - volume_cube = iris.cube.Cube( - volume_data, - dim_coords_and_dims=[(self.yearly_times, 0), - (self.depth, 1), - (self.lats, 2), - (self.lons, 3)]) - volume_cube.standard_name = 'ocean_volume' - volume_cube.var_name = 'volcello' - volume_cube.units = 'm3' - fx_file = str(tmp_path / f'{volume_cube.var_name}.nc') - iris.save(volume_cube, fx_file) - fx_vars = { - 'volcello': { - 'short_name': 'volcello', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'Oyr', - 'frequency': 'yr', - 'filename': fx_file} - } - data = np.ones((12, 3, 3, 3)) - cube = iris.cube.Cube( - data, - dim_coords_and_dims=[(self.monthly_times, 0), - (self.depth, 1), - (self.lats, 2), - (self.lons, 3)]) - cube.var_name = 'thetao' - cube = add_fx_variables(cube, fx_vars, CheckLevels.IGNORE) - assert cube.cell_measures() == [] - - def test_remove_fx_vars(self): - """Test fx_variables are removed from cube.""" - cube = iris.cube.Cube(self.new_cube_3D_data, - dim_coords_and_dims=[(self.depth, 0), - (self.lats, 1), - (self.lons, 2)]) - self.fx_area.var_name = 'areacella' - self.fx_area.standard_name = 'cell_area' - self.fx_area.units = 'm2' - add_cell_measure(cube, self.fx_area, measure='area') - assert cube.cell_measure(self.fx_area.standard_name) is not None - self.fx_area.var_name = 'sftlf' - self.fx_area.standard_name = "land_area_fraction" - self.fx_area.units = '%' - add_ancillary_variable(cube, self.fx_area) - assert cube.ancillary_variable(self.fx_area.standard_name) is not None - cube = remove_fx_variables(cube) - assert cube.cell_measures() == [] - assert cube.ancillary_variables() == [] diff --git a/tests/integration/preprocessor/_io/test_concatenate.py b/tests/integration/preprocessor/_io/test_concatenate.py index ce0d9529b1..f8ef998f90 100644 --- a/tests/integration/preprocessor/_io/test_concatenate.py +++ b/tests/integration/preprocessor/_io/test_concatenate.py @@ -1,7 +1,7 @@ """Integration tests for :func:`esmvalcore.preprocessor._io.concatenate`.""" -import unittest import warnings +import unittest from unittest.mock import call import numpy as np @@ -243,12 +243,6 @@ def test_concatenate(self): np.testing.assert_array_equal( concatenated.coord('time').points, np.array([1, 2, 3, 4, 5, 6])) - def test_concatenate_empty_cubes(self): - """Test concatenation with empty :class:`iris.cube.CubeList`.""" - empty_cubes = CubeList([]) - result = _io.concatenate(empty_cubes) - assert result is empty_cubes - def test_concatenate_noop(self): """Test concatenation of a single cube.""" concatenated = _io.concatenate([self.raw_cubes[0]]) diff --git a/tests/integration/preprocessor/_io/test_save.py b/tests/integration/preprocessor/_io/test_save.py index 9ccb25efcf..cc6c98364c 100644 --- a/tests/integration/preprocessor/_io/test_save.py +++ b/tests/integration/preprocessor/_io/test_save.py @@ -8,7 +8,7 @@ import netCDF4 import numpy as np from iris.coords import DimCoord -from iris.cube import Cube, CubeList +from iris.cube import Cube from esmvalcore.preprocessor import save @@ -78,13 +78,6 @@ def test_save_zlib(self): self.assertEqual(sample_filters['complevel'], 4) handler.close() - def test_fail_empty_cubes(self): - """Test save fails if empty cubes is provided.""" - (_, filename) = self._create_sample_cube() - empty_cubes = CubeList([]) - with self.assertRaises(ValueError): - save(empty_cubes, filename) - def test_fail_without_filename(self): """Test save fails if filename is not provided.""" cube, _ = self._create_sample_cube() diff --git a/tests/integration/preprocessor/_mask/test_mask.py b/tests/integration/preprocessor/_mask/test_mask.py index 4e2ef513f8..5ddb30a228 100644 --- a/tests/integration/preprocessor/_mask/test_mask.py +++ b/tests/integration/preprocessor/_mask/test_mask.py @@ -10,10 +10,8 @@ import numpy as np import pytest -from esmvalcore.cmor.check import CheckLevels from esmvalcore.preprocessor import (PreprocessorFile, mask_fillvalues, - mask_landsea, mask_landseaice, - add_fx_variables) + mask_landsea, mask_landseaice) from tests import assert_array_equal @@ -49,90 +47,58 @@ def setUp(self): units='hours') self.coords_spec = [(self.lats, 0), (self.lons, 1)] self.fx_mask = iris.cube.Cube(fx_data, - dim_coords_and_dims=self.coords_spec, - units='%') + dim_coords_and_dims=self.coords_spec) self.mock_data = np.ma.empty((4, 3, 3)) self.mock_data[:] = 10. - def test_components_fx_var(self, tmp_path): - """Test compatibility of ancillary variables.""" - self.fx_mask.var_name = 'sftlf' - self.fx_mask.standard_name = 'land_area_fraction' + def test_components_fx_dict(self, tmp_path): + """Test compatibility of input fx dictionary.""" sftlf_file = str(tmp_path / 'sftlf_mask.nc') iris.save(self.fx_mask, sftlf_file) - fx_vars = { - 'sftlf': { - 'short_name': 'sftlf', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'fx', - 'frequency': 'fx', - 'filename': sftlf_file} - } new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - new_cube_land = add_fx_variables(new_cube_land, fx_vars, - CheckLevels.IGNORE) result_land = mask_landsea( new_cube_land, + { + 'sftlf': sftlf_file, + 'sftof': [], + }, 'land', ) assert isinstance(result_land, iris.cube.Cube) - self.fx_mask.var_name = 'sftgif' - self.fx_mask.standard_name = 'land_ice_area_fraction' sftgif_file = str(tmp_path / 'sftgif_mask.nc') iris.save(self.fx_mask, sftgif_file) - fx_vars = { - 'sftgif': { - 'short_name': 'sftgif', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'fx', - 'frequency': 'fx', - 'filename': sftlf_file} - } new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - new_cube_ice = add_fx_variables(new_cube_ice, fx_vars, - CheckLevels.IGNORE) result_ice = mask_landseaice( new_cube_ice, + { + 'sftgif': sftgif_file, + 'sftof': [], + }, 'ice', ) assert isinstance(result_ice, iris.cube.Cube) def test_mask_landsea(self, tmp_path): """Test mask_landsea func.""" - self.fx_mask.var_name = 'sftlf' - self.fx_mask.standard_name = 'land_area_fraction' sftlf_file = str(tmp_path / 'sftlf_mask.nc') iris.save(self.fx_mask, sftlf_file) - fx_vars = { - 'sftlf': { - 'short_name': 'sftlf', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'fx', - 'frequency': 'fx', - 'filename': sftlf_file} - } new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - new_cube_land = add_fx_variables(new_cube_land, fx_vars, - CheckLevels.IGNORE) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - new_cube_sea = add_fx_variables(new_cube_sea, fx_vars, - CheckLevels.IGNORE) # mask with fx files result_land = mask_landsea( new_cube_land, + {'sftlf': sftlf_file}, 'land', ) result_sea = mask_landsea( new_cube_sea, + {'sftlf': sftlf_file}, 'sea', ) expected = np.ma.empty((3, 3)) @@ -151,19 +117,17 @@ def test_mask_landsea(self, tmp_path): # Mask with shp files although sftlf is available new_cube_land = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - new_cube_land = add_fx_variables(new_cube_land, fx_vars, - CheckLevels.IGNORE) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - new_cube_sea = add_fx_variables(new_cube_sea, fx_vars, - CheckLevels.IGNORE) result_land = mask_landsea( new_cube_land, + {'sftlf': sftlf_file}, 'land', always_use_ne_mask=True, ) result_sea = mask_landsea( new_cube_sea, + {'sftlf': sftlf_file}, 'sea', always_use_ne_mask=True, ) @@ -181,8 +145,8 @@ def test_mask_landsea(self, tmp_path): dim_coords_and_dims=self.coords_spec) new_cube_sea = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - result_land = mask_landsea(new_cube_land, 'land') - result_sea = mask_landsea(new_cube_sea, 'sea') + result_land = mask_landsea(new_cube_land, {}, 'land') + result_sea = mask_landsea(new_cube_sea, {}, 'sea') # bear in mind all points are in the ocean np.ma.set_fill_value(result_land.data, 1e+20) @@ -194,24 +158,12 @@ def test_mask_landsea(self, tmp_path): def test_mask_landseaice(self, tmp_path): """Test mask_landseaice func.""" - self.fx_mask.var_name = 'sftgif' - self.fx_mask.standard_name = 'land_ice_area_fraction' sftgif_file = str(tmp_path / 'sftgif_mask.nc') iris.save(self.fx_mask, sftgif_file) - fx_vars = { - 'sftgif': { - 'short_name': 'sftgif', - 'project': 'CMIP6', - 'dataset': 'EC-Earth3', - 'mip': 'fx', - 'frequency': 'fx', - 'filename': sftgif_file} - } new_cube_ice = iris.cube.Cube(self.new_cube_data, dim_coords_and_dims=self.coords_spec) - new_cube_ice = add_fx_variables(new_cube_ice, fx_vars, - CheckLevels.IGNORE) - result_ice = mask_landseaice(new_cube_ice, 'ice') + result_ice = mask_landseaice(new_cube_ice, {'sftgif': sftgif_file}, + 'ice') expected = np.ma.empty((3, 3)) expected.data[:] = 200. expected.mask = np.ones((3, 3), bool) diff --git a/tests/integration/test_data_finder.py b/tests/integration/test_data_finder.py index b265ee59ab..db7ec5b8da 100644 --- a/tests/integration/test_data_finder.py +++ b/tests/integration/test_data_finder.py @@ -11,10 +11,9 @@ from esmvalcore.cmor.table import read_cmor_tables # Initialize with standard config developer file -CFG_DEVELOPER = esmvalcore._config.read_config_developer_file() -esmvalcore._config._config.CFG = CFG_DEVELOPER +esmvalcore._config.CFG = esmvalcore._config.read_config_developer_file() # Initialize CMOR tables -read_cmor_tables(CFG_DEVELOPER) +read_cmor_tables(esmvalcore._config.CFG) # Load test configuration with open(os.path.join(os.path.dirname(__file__), 'data_finder.yml')) as file: diff --git a/tests/integration/test_recipe.py b/tests/integration/test_recipe.py index 8d2f932e4e..459c242b7e 100644 --- a/tests/integration/test_recipe.py +++ b/tests/integration/test_recipe.py @@ -76,7 +76,6 @@ ) DEFAULT_PREPROCESSOR_STEPS = ( - 'add_fx_variables', 'cleanup', 'cmor_check_data', 'cmor_check_metadata', @@ -86,7 +85,6 @@ 'fix_file', 'fix_metadata', 'load', - 'remove_fx_variables', 'save', ) @@ -163,11 +161,6 @@ def _get_default_settings_for_chl(fix_dir, save_filename): 'short_name': 'chl', 'frequency': 'yr', }, - 'add_fx_variables': { - 'fx_variables': {}, - 'check_level': CheckLevels.DEFAULT, - }, - 'remove_fx_variables': {}, 'cleanup': { 'remove': [fix_dir] }, @@ -589,11 +582,6 @@ def test_default_fx_preprocessor(tmp_path, patched_datafinder, config_user): 'short_name': 'sftlf', 'frequency': 'fx', }, - 'add_fx_variables': { - 'fx_variables': {}, - 'check_level': CheckLevels.DEFAULT, - }, - 'remove_fx_variables': {}, 'cleanup': { 'remove': [fix_dir] }, @@ -1667,9 +1655,9 @@ def test_weighting_landsea_fraction(tmp_path, patched_datafinder, config_user): for product in task.products: assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['area_type'] == 'land' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) if product.attributes['project'] == 'obs4mips': assert len(fx_variables) == 1 @@ -1718,12 +1706,18 @@ def test_weighting_landsea_fraction_no_fx(tmp_path, patched_failing_datafinder, for product in task.products: assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 1 + assert len(settings) == 2 assert 'exclude' not in settings assert settings['area_type'] == 'land' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 0 + if product.attributes['project'] == 'obs4mips': + assert len(fx_variables) == 1 + assert fx_variables['sftlf'] == [] + else: + assert len(fx_variables) == 2 + assert fx_variables['sftlf'] == [] + assert fx_variables['sftof'] == [] def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, @@ -1769,9 +1763,13 @@ def test_weighting_landsea_fraction_exclude(tmp_path, patched_datafinder, continue assert 'weighting_landsea_fraction' in product.settings settings = product.settings['weighting_landsea_fraction'] - assert len(settings) == 1 + assert len(settings) == 2 assert 'exclude' not in settings assert settings['area_type'] == 'land' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert fx_variables.get('sftlf') def test_weighting_landsea_fraction_exclude_fail(tmp_path, patched_datafinder, @@ -1845,9 +1843,9 @@ def test_landmask(tmp_path, patched_datafinder, config_user): for product in task.products: assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['mask_out'] == 'sea' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) fx_variables = fx_variables.values() if product.attributes['project'] == 'obs4mips': @@ -1857,80 +1855,6 @@ def test_landmask(tmp_path, patched_datafinder, config_user): def test_user_defined_fxvar(tmp_path, patched_datafinder, config_user): - content = dedent(""" - preprocessors: - landmask: - mask_landsea: - mask_out: sea - fx_variables: - sftlf: - exp: piControl - mask_landseaice: - mask_out: sea - fx_variables: - sftgif: - exp: piControl - volume_statistics: - operator: mean - area_statistics: - operator: mean - fx_variables: - areacello: - mip: fx - exp: piControl - diagnostics: - diagnostic_name: - variables: - gpp: - preprocessor: landmask - project: CMIP5 - mip: Lmon - exp: historical - start_year: 2000 - end_year: 2005 - ensemble: r1i1p1 - additional_datasets: - - {dataset: CanESM2} - scripts: null - """) - recipe = get_recipe(tmp_path, content, config_user) - - # Check custom fx variables - task = recipe.tasks.pop() - product = task.products.pop() - - # landsea - settings = product.settings['mask_landsea'] - assert len(settings) == 1 - assert settings['mask_out'] == 'sea' - fx_variables = product.settings['add_fx_variables']['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 4 - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_piControl_' in fx_variables['sftlf']['filename'] - - # landseaice - settings = product.settings['mask_landseaice'] - assert len(settings) == 1 - assert settings['mask_out'] == 'sea' - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_piControl_' in fx_variables['sftlf']['filename'] - - # volume statistics - settings = product.settings['volume_statistics'] - assert len(settings) == 1 - assert settings['operator'] == 'mean' - assert 'volcello' in fx_variables - - # area statistics - settings = product.settings['area_statistics'] - assert len(settings) == 1 - assert settings['operator'] == 'mean' - assert '_fx_' in fx_variables['areacello']['filename'] - assert '_piControl_' in fx_variables['areacello']['filename'] - - -def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user): content = dedent(""" preprocessors: landmask: @@ -1946,6 +1870,7 @@ def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user): operator: mean fx_variables: [{'short_name': 'areacello', 'mip': 'fx', 'exp': 'piControl'}] + diagnostics: diagnostic_name: variables: @@ -1969,33 +1894,39 @@ def test_user_defined_fxlist(tmp_path, patched_datafinder, config_user): # landsea settings = product.settings['mask_landsea'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['mask_out'] == 'sea' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) - assert len(fx_variables) == 4 - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_piControl_' in fx_variables['sftlf']['filename'] + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['sftlf'] + assert '_piControl_' in fx_variables['sftlf'] # landseaice settings = product.settings['mask_landseaice'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['mask_out'] == 'sea' - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_piControl_' in fx_variables['sftlf']['filename'] + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['sftgif'] + assert '_piControl_' in fx_variables['sftgif'] # volume statistics settings = product.settings['volume_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' - assert 'volcello' in fx_variables + assert 'fx_variables' not in settings # area statistics settings = product.settings['area_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - assert '_fx_' in fx_variables['areacello']['filename'] - assert '_piControl_' in fx_variables['areacello']['filename'] + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + assert len(fx_variables) == 1 + assert '_fx_' in fx_variables['areacello'] + assert '_piControl_' in fx_variables['areacello'] def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): @@ -2037,10 +1968,10 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): for product in task.products: assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 2 + assert len(settings) == 3 assert settings['mask_out'] == 'sea' assert settings['always_use_ne_mask'] is False - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) fx_variables = fx_variables.values() assert not any(fx_variables) @@ -2049,72 +1980,6 @@ def test_landmask_no_fx(tmp_path, patched_failing_datafinder, config_user): def test_fx_vars_mip_change_cmip6(tmp_path, patched_datafinder, config_user): TAGS.set_tag_values(TAGS_FOR_TESTING) - content = dedent(""" - preprocessors: - preproc: - area_statistics: - operator: mean - fx_variables: - areacella: - ensemble: r2i1p1f1 - areacello: - clayfrac: - sftlf: - sftgif: - mip: fx - sftof: - mask_landsea: - mask_out: sea - - diagnostics: - diagnostic_name: - variables: - tas: - preprocessor: preproc - project: CMIP6 - mip: Amon - exp: historical - start_year: 2000 - end_year: 2005 - ensemble: r1i1p1f1 - grid: gn - additional_datasets: - - {dataset: CanESM5} - scripts: null - """) - recipe = get_recipe(tmp_path, content, config_user) - - # Check generated tasks - assert len(recipe.tasks) == 1 - task = recipe.tasks.pop() - assert task.name == 'diagnostic_name' + TASKSEP + 'tas' - assert len(task.products) == 1 - product = task.products.pop() - - # Check area_statistics - assert 'area_statistics' in product.settings - settings = product.settings['area_statistics'] - assert len(settings) == 1 - assert settings['operator'] == 'mean' - fx_variables = product.settings['add_fx_variables']['fx_variables'] - assert isinstance(fx_variables, dict) - assert len(fx_variables) == 6 - assert '_fx_' in fx_variables['areacella']['filename'] - assert '_r2i1p1f1_' in fx_variables['areacella']['filename'] - assert '_Ofx_' in fx_variables['areacello']['filename'] - assert '_Efx_' in fx_variables['clayfrac']['filename'] - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_fx_' in fx_variables['sftgif']['filename'] - assert '_Ofx_' in fx_variables['sftof']['filename'] - - # Check mask_landsea - assert 'mask_landsea' in product.settings - settings = product.settings['mask_landsea'] - assert len(settings) == 1 - assert settings['mask_out'] == 'sea' - - -def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): content = dedent(""" preprocessors: preproc: @@ -2127,7 +1992,7 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): 'sftlf', 'sftgif', 'sftof', - ] + ] mask_landsea: mask_out: sea @@ -2159,23 +2024,34 @@ def test_fx_list_mip_change_cmip6(tmp_path, patched_datafinder, config_user): # Check area_statistics assert 'area_statistics' in product.settings settings = product.settings['area_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 6 - assert '_fx_' in fx_variables['areacella']['filename'] - assert '_Ofx_' in fx_variables['areacello']['filename'] - assert '_Efx_' in fx_variables['clayfrac']['filename'] - assert '_fx_' in fx_variables['sftlf']['filename'] - assert '_fx_' in fx_variables['sftgif']['filename'] - assert '_Ofx_' in fx_variables['sftof']['filename'] + assert '_fx_' in fx_variables['areacella'] + assert '_Ofx_' in fx_variables['areacello'] + assert '_Efx_' in fx_variables['clayfrac'] + assert '_fx_' in fx_variables['sftlf'] + assert '_fx_' in fx_variables['sftgif'] + assert '_Ofx_' in fx_variables['sftof'] # Check mask_landsea assert 'mask_landsea' in product.settings settings = product.settings['mask_landsea'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['mask_out'] == 'sea' + fx_variables = settings['fx_variables'] + assert isinstance(fx_variables, dict) + fx_variables = fx_variables.values() + assert len(fx_variables) == 2 + for fx_file in fx_variables: + if 'sftlf' in fx_file: + assert '_fx_' in fx_file + elif 'sftof' in fx_file: + assert '_Ofx_' in fx_file + else: + assert False def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2187,9 +2063,7 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: - volcello: - mip: Ofx + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -2219,13 +2093,13 @@ def test_fx_vars_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Omon_' not in fx_variables['volcello']['filename'] - assert '_Ofx_' in fx_variables['volcello']['filename'] + assert '_Omon_' in fx_variables['volcello'] + assert '_Ofx_' not in fx_variables['volcello'] def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, @@ -2235,10 +2109,8 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: - volcello: - mip: Oyr - exp: piControl + fx_variables: [{'short_name': 'volcello', 'mip': 'Oyr', + 'exp': 'piControl'}] diagnostics: diagnostic_name: @@ -2268,14 +2140,14 @@ def test_fx_dicts_volcello_in_ofx_cmip6(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Oyr_' in fx_variables['volcello']['filename'][0] - assert '_piControl_' in fx_variables['volcello']['filename'][0] - assert '_Omon_' not in fx_variables['volcello']['filename'][0] + assert '_Oyr_' in fx_variables['volcello'] + assert '_piControl_' in fx_variables['volcello'] + assert '_Omon_' not in fx_variables['volcello'] def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, @@ -2327,8 +2199,7 @@ def test_fx_vars_list_no_preproc_cmip6(tmp_path, patched_datafinder, settings = product.settings['area_statistics'] assert len(settings) == 1 assert settings['operator'] == 'mean' - fx_variables = product.settings['add_fx_variables']['fx_variables'] - assert len(fx_variables) == 2 + assert 'fx_variables' not in settings def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, @@ -2338,9 +2209,7 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: - volcello: - mip: Omon + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -2370,13 +2239,13 @@ def test_fx_vars_volcello_in_omon_cmip6(tmp_path, patched_failing_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello']['filename'][0] - assert '_Omon_' in fx_variables['volcello']['filename'][0] + assert '_Ofx_' not in fx_variables['volcello'] + assert '_Omon_' in fx_variables['volcello'] def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, @@ -2386,9 +2255,7 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, preproc: volume_statistics: operator: mean - fx_variables: - volcello: - mip: Oyr + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -2418,13 +2285,13 @@ def test_fx_vars_volcello_in_oyr_cmip6(tmp_path, patched_failing_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_Ofx_' not in fx_variables['volcello']['filename'][0] - assert '_Oyr_' in fx_variables['volcello']['filename'][0] + assert '_Ofx_' not in fx_variables['volcello'] + assert '_Oyr_' in fx_variables['volcello'] def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, @@ -2434,8 +2301,7 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, preproc: volume_statistics: operator: mean - fx_variables: - volcello: + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -2464,13 +2330,13 @@ def test_fx_vars_volcello_in_fx_cmip5(tmp_path, patched_datafinder, # Check volume_statistics assert 'volume_statistics' in product.settings settings = product.settings['volume_statistics'] - assert len(settings) == 1 + assert len(settings) == 2 assert settings['operator'] == 'mean' - fx_variables = product.settings['add_fx_variables']['fx_variables'] + fx_variables = settings['fx_variables'] assert isinstance(fx_variables, dict) assert len(fx_variables) == 1 - assert '_fx_' in fx_variables['volcello']['filename'] - assert '_Omon_' not in fx_variables['volcello']['filename'] + assert '_fx_' in fx_variables['volcello'] + assert '_Omon_' not in fx_variables['volcello'] def test_wrong_project(tmp_path, patched_datafinder, config_user): @@ -2479,8 +2345,7 @@ def test_wrong_project(tmp_path, patched_datafinder, config_user): preproc: volume_statistics: operator: mean - fx_variables: - volcello: + fx_variables: ['volcello'] diagnostics: diagnostic_name: @@ -2513,9 +2378,10 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): preproc: area_statistics: operator: mean - fx_variables: - areacella: - wrong_fx_variable: + fx_variables: [ + 'areacella', + 'wrong_fx_variable', + ] diagnostics: diagnostic_name: @@ -2534,7 +2400,7 @@ def test_invalid_fx_var_cmip6(tmp_path, patched_datafinder, config_user): scripts: null """) msg = ("Requested fx variable 'wrong_fx_variable' not available in any " - "CMOR table") + "'fx'-related CMOR table") with pytest.raises(RecipeError) as rec_err_exp: get_recipe(tmp_path, content, config_user) assert str(rec_err_exp.value) == INITIALIZATION_ERROR_MSG diff --git a/tests/integration/test_recipe_checks.py b/tests/integration/test_recipe_checks.py index 5cec9b2abf..3eb2626a28 100644 --- a/tests/integration/test_recipe_checks.py +++ b/tests/integration/test_recipe_checks.py @@ -1,5 +1,4 @@ """Integration tests for :mod:`esmvalcore._recipe_checks`.""" -from typing import Any, List from unittest import mock import pytest @@ -59,7 +58,7 @@ def test_data_availability_data(mock_logger, input_files, var, error): assert var == saved_var -DATA_AVAILABILITY_NO_DATA: List[Any] = [ +DATA_AVAILABILITY_NO_DATA = [ ([], [], None), ([], None, None), (None, [], None), diff --git a/tests/sample_data/multimodel_statistics/timeseries_daily_365_day-full-mean.nc b/tests/sample_data/multimodel_statistics/timeseries_daily_365_day-full-mean.nc index 81e9e60a627758ee5e14304c4a5434366843d204..c473d78f49b5ca3320a423179772e5ee52a0b855 100644 GIT binary patch delta 6075 zcmb_gd0bW1w?F4{FET2E%qRlNpvWX(2qf@j(8LL~BsERJBoT-^oRM(MOjB{IX(mpT z4Q3i@^(-~Bw4XkQ%A70uIrUS^%p%L|t#i-4ioD|+j z`(k9Gw^8ThxK*_~cd@}dyk;Ic;yG+!eu=@GUJi;aiLZ^*d2aAJ^$+8;w_H5A(qMh2 zHd&`DliBguFe5jS<5*5yP{SADCa<2fRm1UU9Qci6d18keV?Yxu|qxN_-f^0x7wb~ZPSt+Smia_j?a^pN#o zQ^(ihPjRBlt#~q(ei~A!?P;Q9hcld~aMBA$zPjtMFZ;$NgAI=ji7!diw+PY+=QObg z_e8orb;CiIyROv^Wq}BxB(k>nsxE1>}`WC{$+=FVziI@;>!l< zGc68fbK`XJ%j_Nu2rxO`xKB(zp?Lt{}xZWohRD#saEXYaIdb6`uW-9w>{gYx@% zF3avAK5{@IJLI~PZAb}auXMO*+>!Xcck^7gF}z_Z&%SkkLL&n_0z#HLBCLyJJ2wa2 zlgz3;0yM699)%h?N>6FA?6Urp=xKi{@=UlVp?fs1 zAY_jtoTJ+<)5hUm0U>Fz`n-d1PA%!}VYKW#M6Y&llM3~;?A$xP-ExhIy+g}24vSNU z+J{p(Er073VAzw~-0HQ?lP_W`gFZ5j&Yk->3m@8zU&zGZ7~Z4dcno`SXh2|ersHjH z{G_2T>fyQY!SD=&BcPmN>7=vf*JNojJt&^-!`H}W29KK@4X4Z{Pc_VGpCi!&^ws+b zmpGzeZ~L^2)>4edeJ;7%f;wG4ik7zNGkB}3&g<06N$Y;7?P{0ns%2;=Vk(OrK3eC! zFZb&11XY*ocTa9mn2*C{Av!zfq{}+4|H)pPZkQ^x$}y*0IN0NWgx#M?u$~oh;Jk=y zTSS~Ed_SWO#|ZoT*I_xKtWO?)%rT*mU(B|J|kIZ5(PsaV~hVjZDqnTi6!io+sWj*#Z9LOOi@6tL=sfbgFMypmjp z-f4CCVzr7NUr}+cPDQ_GRUEn^BL9X6?=>Q>uN9HU)_3(A@#-uU+&C2*2pNeo5>jQ9 z^^kFauqH~ze8QscDwY#UK2vbG1*m67A7 zAm3fVcQ1%|cA<#39u+ZYj0mqZ5nTxfddslJ6Ynn)?%a~F+$7^+R~ais0o!K__@z|9 zBNN!r@BnX1j12cU8E?hPaAVJe_wCGLL=?^t5Is}CzTpCz*pBe1`s!XX)IKtzLuA|u zmho6Kh13=mO<82COcmDO6~q(r`YRX=1utaFaLto(ja2CKtAxvkRSZ6&V&Q+N==PzC z-D_3!Sf`?Ag^Z7;%g8Q}kzOR@+w&4m68t|T9}-@jO17_1>anFz!6HJ-Z{)r|B)mCC z#xF)0N%LgvBb07b5VJwS2eN`nLfH&*M!kZZu>!Ko1!T?_@ajSV8&(J~t)%RDTS3Y; z1qEpe-by8>5yJW^*qbh3XRd%bCIRO=2=L$qEavM4Ed4>j?Mn(Kou!S=DCoIe!Kh{h zn+FPT9VEcqMZoYt0S$X>7_!%fPZudjB2k70TZ#;sPkk{p)bCiw&hd zHZ1Y8VGiNI7#m^;dk5FyY;hgp2|0vc^Xf2-(EkZ5rV$R@wxZ`BR+PrsFpsb+NyS9M z1vjc9gv_569R5kc&Fxk!-etw{K$|Px2qr7B5s`lGKMquIxrc%hHa#-1eshh0W!Ge! zxGrNim7Gn4s)aHxFOboja>-pFcjOD`L{_hvCBT=$+5aUOhF4_Vm@6anc^R!w(f_AX znflF!LKgu?kJylQ+=epIhUW-|FJ-L$TE_M&8Mmt`O>$&pi=Mi%D3D~<=zyDnHT z@3IA>R#=ciShe4RKM6)o#^}3L^L9#jc87!~)=PMdF!_uXpPjQ}%`_|KOt<30N3EDi zSawcE7$IZ6jAFuIvRew_+Q(M-9<<_8vK8Y~tVr)~g(=gD)Jhr0D&=}Cp?nQ%m2me> z3GQ!6sOw?HA8}SZ&sj0UK+$u{f;_@*`Yp=rE5o01yNNLBgoI6m$3C%O%0UZW``Cg@ z`$+L73m)ER!9*%TuZGDu>?Px5M;S|~Xxt$jbTK2@-Hg6IW=!;D-FgP_<5*hH7(SGZ z?-?3AVzw1og!xTY{6MHpmvA#xLOyHg=~w^G2n*a6S@0R*rIi+RUqx2)mr&D1LirgH z#ivD79TPE%P(q4@e`>*1;u}OL8E?Tugr}Yqv5vrv6S1FQwTX}jpKY~Z!Db6S&$VD| zjs>|V&FDucn+NO897#3X`h};X4JT zEeiNk0={e!@J2rYW&H)Lqe?4J6|lTk#SubzR~7FON?H|cA-u?}7`M&}Khn6*5GxK5 zWD4CL^lMh*O}}Us8BI2s2{GRF`=;8EG2DjCkv4Q1V#9aCY?yyk#s|k_jQdE&0>Z9S zGGb24@FB;&HpYVYx>%7BKq1@RiiabtXq+$M#0wJsBsV`sXnIsa2Eq5d1w$z{eAB6J z5!T!6%uXrl?B=VnxbS~F zh8trk6B4u@kmJ1iYJWNIb)9hfY}Oe4@seYf3u?#>3!l^f+T(VU_H?v{W6vL{<*Nz`~@Eyt^pb+Z`2xcZMalcS2TG5w8e8eu-BIM$;evG<+$lBxPUS2}fAr{ne$UUghk@YDlY>?f>$ zpO3dUIn;I7M`)`3}zV`%D^otis)7xQfQLzW;1-+dlpo%&Gg6U1TR z0}2P`=iR$&p%sqX8@+gB^VZA-Jinb~WlOAEa0u&P>ec*C_8vaap}-MMsU(i;Ktxl3 zok)-Jd0CsJo?UCM4vL8-EVy4(x~NS?YDT3|(YTGa)8-!Rc+@LtF2at5(0eUl7NC$isjuCcX2>28{zuJAs8 zvv1O(jatEB$5Z{;(V-pK+Th-7Sh`=w$Wr@O^v5f}RqvK5dF-R7ixx;7BPTdH_U+tT z$@uJE+0Kx6DjHp+vpAopbWHPbxXw#cfn0Ylb{g5q^oWXc8Y@j}$JLz2?a?H}>W)jf zv{B;hWr%JyG{y-FX1>5Zt#N2~G(^h19%kG-+EO1qD(t?#tZ^LFBSQ0a%Qm%$Tx;W} zIOIGagKN~-BPdZd+ID}gt)K_DX{$Ha_G}rqmB2O;;JoY7M00Dt>ez=OAfJ71^kLr` zQ(0qLcfOS6m}1%NtgUpjsgLL)mYp5RW|@L1B9gRd=+ZVCil4MpSgd8`w99_YrWh zZ>j&^-06H-v$r<>}i|t=@X%InKuKIkC7+_owuCiAj3JIz{5&RemzzA*f*{mHN_v6GKgR={Q*!qHGEe8EH`#C*dTwV2)GYCK7 F{|m5cfZqTB literal 23299 zcmeHP349bq)~_J}f(D2}I0Ru-kUJ)va)^O{2sdOoMbRXX0cH)8km;G8?w;(%0~J9; z1rhMWtm3txqN3<>7!U=IRXkYG1vmPkh#W=)5k>ZU)m<;i3<)aV>ZiXi1uy@3)vu0z z?^Shm9X)4AT6+C+E;vWX5Emz6MS}g4|NL8pm}{IE=J|RS|ELB>4@nzSE55YW#rE}> z`bzAh9?p;ao(|rt@SIx0C1Vm>QcZ-2X)0QeawO7QI+&K~ptwyUSBFttuCD$BP-R3( za!wMHRIMtDB-r|SCdP?tt1ew|-rCKhF=Tk$`7$ELm9J`Be6Ax9e~}Qi>DKCqIQkdx zc{M*>XhbA$*S=-K;qko%iMT|4TWqQ)H#c*pN1KtCBc*Ne^D2qY4CHFwE;I9TJh^?` zGdV3kQxHl14?;5<=#e(|)76 z(R!kuEkkkr_cybp>PabSHj+}xt&m!kbYaW%Awvh2TggrxGttQ;W~_siRVsgPf9keg zp3Et}9RGdY9EmzjB)Vd`^qAaiZKqOkzfp&~*RO$M+U*r(gqP z#DKB#<{J^oJlMc#Lq`r8HFoIeQJG^gV8kF1PyXYgf3`0Wtltx84o>5`7#peTLenK2{ z;0y|Dq7-J^hoo6y_5RZeYg3)VloWLu3XH%fNM>z5MJC^CO8V^Tb|C)uRy+P0gL7a)Ij#H2^D7*)mg z^!P41SdV$B|&-CRLXfvcs#nGh(b^(4Z+n1AFkdx`lp6Q_^vOZA- zd9^@}=U9uZt+FSoogj0qRjZauN;zTq;P@Y7kIO$ zQ*vctr}LSun#TL8Ty7y}=u)ndqAt;xJh)l82O}@9Ajj*=);zXb zp6tW7EYm36Y~D0uHuW%#uK7*t!()Ylxt?q$b)4w(_=YNjx>505A5PE6`S4?m z<2fH^lvPpgEL9OTNGLCt^NEitm-CTbh76oP=R*$zJ8*nLdxvir70>z0wk`w9bN+Hr zAOpv9{`TOeQatA``yv@wp5q%k(dF?yyE+kxQSmHqj{+*?!}XEgi(V+ z=p#aC78Ane&vj(&*YQTJLX59nh!HP@(c^DnT>O_XrV}mSqhrfQI!?Z-t`$G*^^ zAJ9>DPZ+Hhg>mBEFitHFQ09Sp-d9LDiu zVQl>-j2#JuC?hH`OiT-#c<4eCpR_iy=rIGg5RKUxM#io%mfaObG11V4VGJf(^kxX9 zZ_<*OBnOj{C}VxWCeIe@Y=b^(e&q%fcwTKaAalVf6k}7;k(YLRwh}_5Uk`Ll1g^VacUUke z@9u9}h+S%7#S#lw=aT;XI?^ZTc=l?tb3+}65(pEh-c7<-^0tM&?^x)u$if1m53_9$_9hD2 znW)#n#47q-@6U8?wvO9Mf_Q9G5DnJ`(TwQvKUx@^OS<{aL>ATS=vyXc6D__lguB{> zkY1$Y(i?P?WaxN=XuID+{45J=-DIQI78)Nharajy{`8EA_3KQ`>}bO5Vxr9jCQddt zG3i|sS+)-D+YJ(KQcQX!oFnb*6>ui9AIXdi~kLfNOMI;nC4=j*f@t>3HZ~9d+*0k@JLw zu1{MS)PwTwPX0jDs+)yPJ$0-dpyTG+I`-7najr|pokVx|=t|VDO&A44v+ISihG^ao79J-$NO^ta)-m{* zAU<6a#K}j35RXw=9ShNwX!a@UxBeYQhbDzMl30j+jf0q!5XAgOLEKEV^{OB`5S0us z#O@J==uFg~=;**gOegAdT>xI9ttSF#`&|H&Is|bm(TmApWD)I+qyCVn?^hPy{L;eL zYXZ1)T>$SiB_D1c#OPK*B(@IX#-$d9F1B#vP^$9~3;Wwx7)Nw8OUKITIy7nnf4@bC zDA4iOd>so9n%Hs3#762{A1Crr``J(JrxUgHdOF!OO-BQ=UH*+a&ZG90xY$Ik`%N64 zZKC;YCXP=xQ8t70|4k6-F*@FUD~KNN1TiHP#4SXz@0nQk4{Gl|6DRVh9jB824lpss zMg3u21J5D^q=&G$eF$2I5KiTgKTNYQ*G2ZKW#QdY6Y(FL*iO%vk3ZFM`5_%g4(eF` zKK1GEk$vY;dm$RsDvaxhy6+@=6YYFWN9;BobIE6JAsV=u+HeW+b}$fIt&Wnvl0!Zn>DG{y2je%33K7eTbUJbYI(=hH{4c&;AY|-#NQLLc;?UVuE^9G8ZGjQEs3```N z`e6XC><-|8>jRiQGk^_Lmm7!{?l#easMj15BZ!8R4Z0E?*c8C|+o(P%0bJKLfS!E< zsM9xq9=W7zq6O4{v^q|G?q3bmrRP*(n*hG;5WsCBfTCJd)=>?qL>uY(-LIR8MCxl+ z5RKbm;BlhyFKd{#O~WIbH0-7Oja{Lk-$NRvP@ni{OA~L#oA{ssl|y~lDWYvLeq2`1 zkCX&Ivd;J8{-7T_iNb5iPyS{gm7bf|%r_vWICOHqAJY!{amP#zi9QW4taRx8j}0{6 zYan-#fxC&O-$uG6y6y)J=_fUey(xfxL~~XIu%Aey=j@U01_m`W(17UOQz4urdixI= zo*;^wui;gq#rJ7&FVWC7k#yhKfagORAAJx){`L^Y6OE<&b-$wF3z8d3H0D|jmlJui zLwJzr)HNY&nH)kO7=l5xbv4<3m4?>_Xt+96!+@QB^dg!*C4@&k&NJ}INg=G55kjJu z#$aFhvF)fI4chzhbw@uwZ{)}1L_c~j4WW!ENekg4B5QaE=Z*;BvxWhjY8pUa^3h9& z2auE;fSah-qXs5DZs56>4XoZ`pz}=z1{2*xa^DfX@|}i9wa zY(~_zhvrY<(Q1C%W}{>g!*$&@b7-Cn@yopl9l4qV>mvSSEDL zsu#rZbA$M6{UC}OQlIr&80XV7Z({8*>eiv>W2uE}KDHpJe}4aC>Z5w;@buA9OwXn; z_0Nk6!g!0wb72^3iN+nb@FdY4_XKctaRBGhy>=QIz#BwiYMU3+*do6^Jr6Y#D}vNt z5w;`Dni9mzGlJ-y5k%jyK{OZ{MCs@t=DuxW-F6d`H_&*FXx%Oo9X>G8kbL2>t2C@_ z96+BY)VACKTzOFd%jX!_al3&NUR!lI^kEqce4I`-ypWl=Ea-!ms0sQfY08$4S z*f_|*+K=>iZ@3v@@9zBcUo1yr>dt~{VP-N61)N895G3~ z1b?Hhw3P3N6sq3BB20c6aipsJTXj-7#}F&dQE#;WP${e2C{wj)WLZC_kkfYJ{FDMJ z6<3Y8vj?P-+%Nxg*O(K}$x_^n4x~ye zFhSWbISMlr&zP@x#s!LJT&;Mz#l)7ggAJ!>fG~vY)#>mZGn{2q||jSGfpD zhlA|Er-jNJZe=5oL_J6BNB@$&%2PU7yGhfnM{vTjqj%-mD@C=XoZNGn)Bd8t&ee)D zMUBH34YQ@{NGZ9_N>W3krJhL2`nE;8f#XJ`(;{ngS?py_MgO+qJGXd1ieKbxbiZ}O zz+S@MS)A--vSr`B{lp>>)$gD|56{Rtjd$z2SCo zy_B#WMF>xP|9iyK@=`x}u7iSi`O{**{9&(03en`~p>@5QHe4;EYQit10o(rdBCH!> zw+Nd>ctwQ0BMcX5*2;bo;b+g^HMi3*bXPUm{!|TA^t@N7z5PWhY5WV$4vydEwgB02 z$iT>p^USwCvEq*OA!!54r^amQOM9(3!6(Rm>IuWD#yPT!x} zR8W3g`PX{8k$3pZyxp{+oTaJw?M5E;I^lL7KZ7^f1J>VLC(J$)Or;yF6nZ@M`h0n> ze3GB%3gkFn2Dx2O?(F=1JaYIE!>yC=mU(@|f^|+?8XUD^$NFLX&&>>6ry5OyqtNoU z^M7>IKhJlTZ`rbU`Dt(2SN+7J{&_xoJmXAfZBRWVuQJBd%VyOd0EnlGutE6)0G@2W z7M~(tPtP;s zCW>v@A$J)+hynC7QOsMmU*=j9ep?!_?O#WFgB=`ODNh#459;t$#{um5AQ>e+y(P6_ z6^aR)R?Mb3F2dggP<|w5#q8iXo9r9x;CN2q!-iW$)WEN!fin&!b~&dUew%{{9zHYv z*kFRsBJi_l)WNg{_9O&D54#lVRU9sGt>PhK{4@bG^Q!7r0m0wBX4+^+qxmm^W2YAYdPPFFX zggC0Eof+S_im!nR4V?KD)2B+y{57iblTK_Dg?tF4BHyZik*lz`s1cDo#WYY2I9#G* z{~pQC>+5;SsiVgtoNx3grs(grwN-+Rf?pm~iwF-&}a+j4tm)TPi**Gp)|Jf$- zs;YCp13pO3=KYmOU$q4$MhwMAv z+kEt!9M5+9ZixN0LghsSW$p;eMwloy* zVfF}@UZd8E^=!*%jyqI z$@GNiL>xVOd|Iac^*C7_*?^`0OAnX9m**>=nbqVc%_>A~S=j(tmS$yL_Gjth1b+aH zrDe9VNwBDv%`bqqB-J1{dx|HQB&4KlS1ehO;!VjSbT5`BS5T|GyfDZ2wXZ3v)hM$<6L8PsGm1nbzZql%$m8q@J_|!tP1ZMyHL< z9CX#VF$0pu`*P_+l$s|elRil4d>irf9>rPfkzcQj6WLQWZ_uk5mAi?`y$?nX&EOhG z_g1o_kmC<-__VW0%Pa8G@{x-=5t+Luq!%fXW(7RHsk~4nt9x2!nP`6l(M#XW^=9XK z&1~7Nm6!T6ia_k;R7330cGm$^T@O!8F%XRnjEas>#zaTFs zFjce5im#+P`%A=K@&a0ZK$DM8Ssr=J>Gt~Bp5lBqS>`2YSfqzkHd(edGtbXjjC@|7 N-m2ttn$lMa{{uKC=1~9u diff --git a/tests/sample_data/multimodel_statistics/timeseries_daily_365_day-overlap-mean.nc b/tests/sample_data/multimodel_statistics/timeseries_daily_365_day-overlap-mean.nc index 81e9e60a627758ee5e14304c4a5434366843d204..c473d78f49b5ca3320a423179772e5ee52a0b855 100644 GIT binary patch delta 6075 zcmb_gd0bW1w?F4{FET2E%qRlNpvWX(2qf@j(8LL~BsERJBoT-^oRM(MOjB{IX(mpT z4Q3i@^(-~Bw4XkQ%A70uIrUS^%p%L|t#i-4ioD|+j z`(k9Gw^8ThxK*_~cd@}dyk;Ic;yG+!eu=@GUJi;aiLZ^*d2aAJ^$+8;w_H5A(qMh2 zHd&`DliBguFe5jS<5*5yP{SADCa<2fRm1UU9Qci6d18keV?Yxu|qxN_-f^0x7wb~ZPSt+Smia_j?a^pN#o zQ^(ihPjRBlt#~q(ei~A!?P;Q9hcld~aMBA$zPjtMFZ;$NgAI=ji7!diw+PY+=QObg z_e8orb;CiIyROv^Wq}BxB(k>nsxE1>}`WC{$+=FVziI@;>!l< zGc68fbK`XJ%j_Nu2rxO`xKB(zp?Lt{}xZWohRD#saEXYaIdb6`uW-9w>{gYx@% zF3avAK5{@IJLI~PZAb}auXMO*+>!Xcck^7gF}z_Z&%SkkLL&n_0z#HLBCLyJJ2wa2 zlgz3;0yM699)%h?N>6FA?6Urp=xKi{@=UlVp?fs1 zAY_jtoTJ+<)5hUm0U>Fz`n-d1PA%!}VYKW#M6Y&llM3~;?A$xP-ExhIy+g}24vSNU z+J{p(Er073VAzw~-0HQ?lP_W`gFZ5j&Yk->3m@8zU&zGZ7~Z4dcno`SXh2|ersHjH z{G_2T>fyQY!SD=&BcPmN>7=vf*JNojJt&^-!`H}W29KK@4X4Z{Pc_VGpCi!&^ws+b zmpGzeZ~L^2)>4edeJ;7%f;wG4ik7zNGkB}3&g<06N$Y;7?P{0ns%2;=Vk(OrK3eC! zFZb&11XY*ocTa9mn2*C{Av!zfq{}+4|H)pPZkQ^x$}y*0IN0NWgx#M?u$~oh;Jk=y zTSS~Ed_SWO#|ZoT*I_xKtWO?)%rT*mU(B|J|kIZ5(PsaV~hVjZDqnTi6!io+sWj*#Z9LOOi@6tL=sfbgFMypmjp z-f4CCVzr7NUr}+cPDQ_GRUEn^BL9X6?=>Q>uN9HU)_3(A@#-uU+&C2*2pNeo5>jQ9 z^^kFauqH~ze8QscDwY#UK2vbG1*m67A7 zAm3fVcQ1%|cA<#39u+ZYj0mqZ5nTxfddslJ6Ynn)?%a~F+$7^+R~ais0o!K__@z|9 zBNN!r@BnX1j12cU8E?hPaAVJe_wCGLL=?^t5Is}CzTpCz*pBe1`s!XX)IKtzLuA|u zmho6Kh13=mO<82COcmDO6~q(r`YRX=1utaFaLto(ja2CKtAxvkRSZ6&V&Q+N==PzC z-D_3!Sf`?Ag^Z7;%g8Q}kzOR@+w&4m68t|T9}-@jO17_1>anFz!6HJ-Z{)r|B)mCC z#xF)0N%LgvBb07b5VJwS2eN`nLfH&*M!kZZu>!Ko1!T?_@ajSV8&(J~t)%RDTS3Y; z1qEpe-by8>5yJW^*qbh3XRd%bCIRO=2=L$qEavM4Ed4>j?Mn(Kou!S=DCoIe!Kh{h zn+FPT9VEcqMZoYt0S$X>7_!%fPZudjB2k70TZ#;sPkk{p)bCiw&hd zHZ1Y8VGiNI7#m^;dk5FyY;hgp2|0vc^Xf2-(EkZ5rV$R@wxZ`BR+PrsFpsb+NyS9M z1vjc9gv_569R5kc&Fxk!-etw{K$|Px2qr7B5s`lGKMquIxrc%hHa#-1eshh0W!Ge! zxGrNim7Gn4s)aHxFOboja>-pFcjOD`L{_hvCBT=$+5aUOhF4_Vm@6anc^R!w(f_AX znflF!LKgu?kJylQ+=epIhUW-|FJ-L$TE_M&8Mmt`O>$&pi=Mi%D3D~<=zyDnHT z@3IA>R#=ciShe4RKM6)o#^}3L^L9#jc87!~)=PMdF!_uXpPjQ}%`_|KOt<30N3EDi zSawcE7$IZ6jAFuIvRew_+Q(M-9<<_8vK8Y~tVr)~g(=gD)Jhr0D&=}Cp?nQ%m2me> z3GQ!6sOw?HA8}SZ&sj0UK+$u{f;_@*`Yp=rE5o01yNNLBgoI6m$3C%O%0UZW``Cg@ z`$+L73m)ER!9*%TuZGDu>?Px5M;S|~Xxt$jbTK2@-Hg6IW=!;D-FgP_<5*hH7(SGZ z?-?3AVzw1og!xTY{6MHpmvA#xLOyHg=~w^G2n*a6S@0R*rIi+RUqx2)mr&D1LirgH z#ivD79TPE%P(q4@e`>*1;u}OL8E?Tugr}Yqv5vrv6S1FQwTX}jpKY~Z!Db6S&$VD| zjs>|V&FDucn+NO897#3X`h};X4JT zEeiNk0={e!@J2rYW&H)Lqe?4J6|lTk#SubzR~7FON?H|cA-u?}7`M&}Khn6*5GxK5 zWD4CL^lMh*O}}Us8BI2s2{GRF`=;8EG2DjCkv4Q1V#9aCY?yyk#s|k_jQdE&0>Z9S zGGb24@FB;&HpYVYx>%7BKq1@RiiabtXq+$M#0wJsBsV`sXnIsa2Eq5d1w$z{eAB6J z5!T!6%uXrl?B=VnxbS~F zh8trk6B4u@kmJ1iYJWNIb)9hfY}Oe4@seYf3u?#>3!l^f+T(VU_H?v{W6vL{<*Nz`~@Eyt^pb+Z`2xcZMalcS2TG5w8e8eu-BIM$;evG<+$lBxPUS2}fAr{ne$UUghk@YDlY>?f>$ zpO3dUIn;I7M`)`3}zV`%D^otis)7xQfQLzW;1-+dlpo%&Gg6U1TR z0}2P`=iR$&p%sqX8@+gB^VZA-Jinb~WlOAEa0u&P>ec*C_8vaap}-MMsU(i;Ktxl3 zok)-Jd0CsJo?UCM4vL8-EVy4(x~NS?YDT3|(YTGa)8-!Rc+@LtF2at5(0eUl7NC$isjuCcX2>28{zuJAs8 zvv1O(jatEB$5Z{;(V-pK+Th-7Sh`=w$Wr@O^v5f}RqvK5dF-R7ixx;7BPTdH_U+tT z$@uJE+0Kx6DjHp+vpAopbWHPbxXw#cfn0Ylb{g5q^oWXc8Y@j}$JLz2?a?H}>W)jf zv{B;hWr%JyG{y-FX1>5Zt#N2~G(^h19%kG-+EO1qD(t?#tZ^LFBSQ0a%Qm%$Tx;W} zIOIGagKN~-BPdZd+ID}gt)K_DX{$Ha_G}rqmB2O;;JoY7M00Dt>ez=OAfJ71^kLr` zQ(0qLcfOS6m}1%NtgUpjsgLL)mYp5RW|@L1B9gRd=+ZVCil4MpSgd8`w99_YrWh zZ>j&^-06H-v$r<>}i|t=@X%InKuKIkC7+_owuCiAj3JIz{5&RemzzA*f*{mHN_v6GKgR={Q*!qHGEe8EH`#C*dTwV2)GYCK7 F{|m5cfZqTB literal 23299 zcmeHP349bq)~_J}f(D2}I0Ru-kUJ)va)^O{2sdOoMbRXX0cH)8km;G8?w;(%0~J9; z1rhMWtm3txqN3<>7!U=IRXkYG1vmPkh#W=)5k>ZU)m<;i3<)aV>ZiXi1uy@3)vu0z z?^Shm9X)4AT6+C+E;vWX5Emz6MS}g4|NL8pm}{IE=J|RS|ELB>4@nzSE55YW#rE}> z`bzAh9?p;ao(|rt@SIx0C1Vm>QcZ-2X)0QeawO7QI+&K~ptwyUSBFttuCD$BP-R3( za!wMHRIMtDB-r|SCdP?tt1ew|-rCKhF=Tk$`7$ELm9J`Be6Ax9e~}Qi>DKCqIQkdx zc{M*>XhbA$*S=-K;qko%iMT|4TWqQ)H#c*pN1KtCBc*Ne^D2qY4CHFwE;I9TJh^?` zGdV3kQxHl14?;5<=#e(|)76 z(R!kuEkkkr_cybp>PabSHj+}xt&m!kbYaW%Awvh2TggrxGttQ;W~_siRVsgPf9keg zp3Et}9RGdY9EmzjB)Vd`^qAaiZKqOkzfp&~*RO$M+U*r(gqP z#DKB#<{J^oJlMc#Lq`r8HFoIeQJG^gV8kF1PyXYgf3`0Wtltx84o>5`7#peTLenK2{ z;0y|Dq7-J^hoo6y_5RZeYg3)VloWLu3XH%fNM>z5MJC^CO8V^Tb|C)uRy+P0gL7a)Ij#H2^D7*)mg z^!P41SdV$B|&-CRLXfvcs#nGh(b^(4Z+n1AFkdx`lp6Q_^vOZA- zd9^@}=U9uZt+FSoogj0qRjZauN;zTq;P@Y7kIO$ zQ*vctr}LSun#TL8Ty7y}=u)ndqAt;xJh)l82O}@9Ajj*=);zXb zp6tW7EYm36Y~D0uHuW%#uK7*t!()Ylxt?q$b)4w(_=YNjx>505A5PE6`S4?m z<2fH^lvPpgEL9OTNGLCt^NEitm-CTbh76oP=R*$zJ8*nLdxvir70>z0wk`w9bN+Hr zAOpv9{`TOeQatA``yv@wp5q%k(dF?yyE+kxQSmHqj{+*?!}XEgi(V+ z=p#aC78Ane&vj(&*YQTJLX59nh!HP@(c^DnT>O_XrV}mSqhrfQI!?Z-t`$G*^^ zAJ9>DPZ+Hhg>mBEFitHFQ09Sp-d9LDiu zVQl>-j2#JuC?hH`OiT-#c<4eCpR_iy=rIGg5RKUxM#io%mfaObG11V4VGJf(^kxX9 zZ_<*OBnOj{C}VxWCeIe@Y=b^(e&q%fcwTKaAalVf6k}7;k(YLRwh}_5Uk`Ll1g^VacUUke z@9u9}h+S%7#S#lw=aT;XI?^ZTc=l?tb3+}65(pEh-c7<-^0tM&?^x)u$if1m53_9$_9hD2 znW)#n#47q-@6U8?wvO9Mf_Q9G5DnJ`(TwQvKUx@^OS<{aL>ATS=vyXc6D__lguB{> zkY1$Y(i?P?WaxN=XuID+{45J=-DIQI78)Nharajy{`8EA_3KQ`>}bO5Vxr9jCQddt zG3i|sS+)-D+YJ(KQcQX!oFnb*6>ui9AIXdi~kLfNOMI;nC4=j*f@t>3HZ~9d+*0k@JLw zu1{MS)PwTwPX0jDs+)yPJ$0-dpyTG+I`-7najr|pokVx|=t|VDO&A44v+ISihG^ao79J-$NO^ta)-m{* zAU<6a#K}j35RXw=9ShNwX!a@UxBeYQhbDzMl30j+jf0q!5XAgOLEKEV^{OB`5S0us z#O@J==uFg~=;**gOegAdT>xI9ttSF#`&|H&Is|bm(TmApWD)I+qyCVn?^hPy{L;eL zYXZ1)T>$SiB_D1c#OPK*B(@IX#-$d9F1B#vP^$9~3;Wwx7)Nw8OUKITIy7nnf4@bC zDA4iOd>so9n%Hs3#762{A1Crr``J(JrxUgHdOF!OO-BQ=UH*+a&ZG90xY$Ik`%N64 zZKC;YCXP=xQ8t70|4k6-F*@FUD~KNN1TiHP#4SXz@0nQk4{Gl|6DRVh9jB824lpss zMg3u21J5D^q=&G$eF$2I5KiTgKTNYQ*G2ZKW#QdY6Y(FL*iO%vk3ZFM`5_%g4(eF` zKK1GEk$vY;dm$RsDvaxhy6+@=6YYFWN9;BobIE6JAsV=u+HeW+b}$fIt&Wnvl0!Zn>DG{y2je%33K7eTbUJbYI(=hH{4c&;AY|-#NQLLc;?UVuE^9G8ZGjQEs3```N z`e6XC><-|8>jRiQGk^_Lmm7!{?l#easMj15BZ!8R4Z0E?*c8C|+o(P%0bJKLfS!E< zsM9xq9=W7zq6O4{v^q|G?q3bmrRP*(n*hG;5WsCBfTCJd)=>?qL>uY(-LIR8MCxl+ z5RKbm;BlhyFKd{#O~WIbH0-7Oja{Lk-$NRvP@ni{OA~L#oA{ssl|y~lDWYvLeq2`1 zkCX&Ivd;J8{-7T_iNb5iPyS{gm7bf|%r_vWICOHqAJY!{amP#zi9QW4taRx8j}0{6 zYan-#fxC&O-$uG6y6y)J=_fUey(xfxL~~XIu%Aey=j@U01_m`W(17UOQz4urdixI= zo*;^wui;gq#rJ7&FVWC7k#yhKfagORAAJx){`L^Y6OE<&b-$wF3z8d3H0D|jmlJui zLwJzr)HNY&nH)kO7=l5xbv4<3m4?>_Xt+96!+@QB^dg!*C4@&k&NJ}INg=G55kjJu z#$aFhvF)fI4chzhbw@uwZ{)}1L_c~j4WW!ENekg4B5QaE=Z*;BvxWhjY8pUa^3h9& z2auE;fSah-qXs5DZs56>4XoZ`pz}=z1{2*xa^DfX@|}i9wa zY(~_zhvrY<(Q1C%W}{>g!*$&@b7-Cn@yopl9l4qV>mvSSEDL zsu#rZbA$M6{UC}OQlIr&80XV7Z({8*>eiv>W2uE}KDHpJe}4aC>Z5w;@buA9OwXn; z_0Nk6!g!0wb72^3iN+nb@FdY4_XKctaRBGhy>=QIz#BwiYMU3+*do6^Jr6Y#D}vNt z5w;`Dni9mzGlJ-y5k%jyK{OZ{MCs@t=DuxW-F6d`H_&*FXx%Oo9X>G8kbL2>t2C@_ z96+BY)VACKTzOFd%jX!_al3&NUR!lI^kEqce4I`-ypWl=Ea-!ms0sQfY08$4S z*f_|*+K=>iZ@3v@@9zBcUo1yr>dt~{VP-N61)N895G3~ z1b?Hhw3P3N6sq3BB20c6aipsJTXj-7#}F&dQE#;WP${e2C{wj)WLZC_kkfYJ{FDMJ z6<3Y8vj?P-+%Nxg*O(K}$x_^n4x~ye zFhSWbISMlr&zP@x#s!LJT&;Mz#l)7ggAJ!>fG~vY)#>mZGn{2q||jSGfpD zhlA|Er-jNJZe=5oL_J6BNB@$&%2PU7yGhfnM{vTjqj%-mD@C=XoZNGn)Bd8t&ee)D zMUBH34YQ@{NGZ9_N>W3krJhL2`nE;8f#XJ`(;{ngS?py_MgO+qJGXd1ieKbxbiZ}O zz+S@MS)A--vSr`B{lp>>)$gD|56{Rtjd$z2SCo zy_B#WMF>xP|9iyK@=`x}u7iSi`O{**{9&(03en`~p>@5QHe4;EYQit10o(rdBCH!> zw+Nd>ctwQ0BMcX5*2;bo;b+g^HMi3*bXPUm{!|TA^t@N7z5PWhY5WV$4vydEwgB02 z$iT>p^USwCvEq*OA!!54r^amQOM9(3!6(Rm>IuWD#yPT!x} zR8W3g`PX{8k$3pZyxp{+oTaJw?M5E;I^lL7KZ7^f1J>VLC(J$)Or;yF6nZ@M`h0n> ze3GB%3gkFn2Dx2O?(F=1JaYIE!>yC=mU(@|f^|+?8XUD^$NFLX&&>>6ry5OyqtNoU z^M7>IKhJlTZ`rbU`Dt(2SN+7J{&_xoJmXAfZBRWVuQJBd%VyOd0EnlGutE6)0G@2W z7M~(tPtP;s zCW>v@A$J)+hynC7QOsMmU*=j9ep?!_?O#WFgB=`ODNh#459;t$#{um5AQ>e+y(P6_ z6^aR)R?Mb3F2dggP<|w5#q8iXo9r9x;CN2q!-iW$)WEN!fin&!b~&dUew%{{9zHYv z*kFRsBJi_l)WNg{_9O&D54#lVRU9sGt>PhK{4@bG^Q!7r0m0wBX4+^+qxmm^W2YAYdPPFFX zggC0Eof+S_im!nR4V?KD)2B+y{57iblTK_Dg?tF4BHyZik*lz`s1cDo#WYY2I9#G* z{~pQC>+5;SsiVgtoNx3grs(grwN-+Rf?pm~iwF-&}a+j4tm)TPi**Gp)|Jf$- zs;YCp13pO3=KYmOU$q4$MhwMAv z+kEt!9M5+9ZixN0LghsSW$p;eMwloy* zVfF}@UZd8E^=!*%jyqI z$@GNiL>xVOd|Iac^*C7_*?^`0OAnX9m**>=nbqVc%_>A~S=j(tmS$yL_Gjth1b+aH zrDe9VNwBDv%`bqqB-J1{dx|HQB&4KlS1ehO;!VjSbT5`BS5T|GyfDZ2wXZ3v)hM$<6L8PsGm1nbzZql%$m8q@J_|!tP1ZMyHL< z9CX#VF$0pu`*P_+l$s|elRil4d>irf9>rPfkzcQj6WLQWZ_uk5mAi?`y$?nX&EOhG z_g1o_kmC<-__VW0%Pa8G@{x-=5t+Luq!%fXW(7RHsk~4nt9x2!nP`6l(M#XW^=9XK z&1~7Nm6!T6ia_k;R7330cGm$^T@O!8F%XRnjEas>#zaTFs zFjce5im#+P`%A=K@&a0ZK$DM8Ssr=J>Gt~Bp5lBqS>`2YSfqzkHd(edGtbXjjC@|7 N-m2ttn$lMa{{uKC=1~9u diff --git a/tests/unit/cmor/test_cmor_check.py b/tests/unit/cmor/test_cmor_check.py index fa1c314166..504eaf06f1 100644 --- a/tests/unit/cmor/test_cmor_check.py +++ b/tests/unit/cmor/test_cmor_check.py @@ -619,23 +619,6 @@ def test_non_increasing(self): self._update_coordinate_values(self.cube, coord, values) self._check_fails_in_metadata() - def test_non_increasing_fix(self): - """Check automatic fix for direction.""" - coord = self.cube.coord('latitude') - values = np.linspace( - coord.points[-1], - coord.points[0], - len(coord.points) - ) - self._update_coordinate_values(self.cube, coord, values) - self._check_cube(automatic_fixes=True) - self._check_cube() - # test bounds are contiguous - bounds = self.cube.coord('latitude').bounds - right_bounds = bounds[:-2, 1] - left_bounds = bounds[1:-1, 0] - self.assertTrue(np.all(left_bounds == right_bounds)) - def test_non_decreasing(self): """Fail in metadata if decreasing coordinate is increasing.""" self.var_info.coordinates['lat'].stored_direction = 'decreasing' @@ -656,11 +639,6 @@ def test_non_decreasing_fix(self): for index in range(20): self.assertTrue( iris.util.approx_equal(cube_points[index], reference[index])) - # test bounds are contiguous - bounds = self.cube.coord('latitude').bounds - right_bounds = bounds[:-2, 1] - left_bounds = bounds[1:-1, 0] - self.assertTrue(np.all(left_bounds == right_bounds)) def test_not_bounds(self): """Warning if bounds are not available.""" @@ -691,15 +669,6 @@ def test_lons_automatic_fix(self): self.cube = self.cube.intersection(longitude=(-180., 180.)) self._check_cube(automatic_fixes=True) - def test_lons_automatic_fix_with_bounds(self): - """Test automatic fixes for bad longitudes with added bounds.""" - self.cube.coord('longitude').bounds = None - self.cube = self.cube.intersection(longitude=(-180., 180.)) - self._check_cube(automatic_fixes=True) - self.assertTrue(self.cube.coord('longitude').points.min() >= 0.) - self.assertTrue(self.cube.coord('longitude').points.max() <= 360.) - self.assertTrue(self.cube.coord('longitude').has_bounds()) - def test_high_lons_automatic_fix(self): """Test automatic fixes for high longitudes.""" self.cube = self.cube.intersection(longitude=(180., 520.)) diff --git a/tests/unit/data_finder/test_get_start_end_year.py b/tests/unit/data_finder/test_get_start_end_year.py index e485f3ccf9..6e415c0dad 100644 --- a/tests/unit/data_finder/test_get_start_end_year.py +++ b/tests/unit/data_finder/test_get_start_end_year.py @@ -19,11 +19,6 @@ ['var_control-19800101_whatever.nc', 1980, 1980], ['19800101_var_control-1950_whatever.nc', 1980, 1980], ['var_control-1950_whatever_19800101.nc', 1980, 1980], - ['CM61-LR-hist-03.1950_18500101_19491231_1M_concbc.nc', 1850, 1949], - [ - 'icon-2.6.1_atm_amip_R2B5_r1v1i1p1l1f1_phy_3d_ml_20150101T000000Z.nc', - 2015, 2015 - ], ] diff --git a/tests/unit/data_finder/test_replace_tags.py b/tests/unit/data_finder/test_replace_tags.py index 097cc7b328..93ba42b41a 100644 --- a/tests/unit/data_finder/test_replace_tags.py +++ b/tests/unit/data_finder/test_replace_tags.py @@ -1,65 +1,22 @@ """Tests for _replace_tags in _data_finder.py.""" + from esmvalcore._data_finder import _replace_tags VARIABLE = { - 'project': 'CMIP6', - 'dataset': 'ACCURATE-MODEL', - 'activity': 'act', - 'exp': 'experiment', - 'institute': 'HMA', - 'ensemble': 'r1i1p1f1', - 'mip': 'Amon', 'short_name': 'tas', - 'grid': 'gr', } -def test_replace_tags(): - """Tests for get_start_end_year function.""" - path = _replace_tags( - '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/' - '{grid}/{latestversion}', VARIABLE) - input_file = _replace_tags( - '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', VARIABLE) - output_file = _replace_tags( - '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', VARIABLE) - assert path == [ - 'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/' - '{latestversion}' - ] - assert input_file == ['tas_Amon_ACCURATE-MODEL_experiment_r1i1p1f1_gr*.nc'] - assert output_file == ['CMIP6_ACCURATE-MODEL_Amon_experiment_r1i1p1f1_tas'] +def test_replace_tags_str(): + assert _replace_tags('folder/subfolder/{short_name}', + VARIABLE) == ['folder/subfolder/tas'] def test_replace_tags_list_of_str(): - assert sorted( - _replace_tags(('folder/subfolder/{short_name}', 'folder2/{short_name}', - 'subfolder/{short_name}'), VARIABLE)) == sorted([ - 'folder2/tas', - 'folder/subfolder/tas', - 'subfolder/tas', - ]) - - -def test_replace_tags_with_subexperiment(): - """Tests for get_start_end_year function.""" - variable = {'sub_experiment': '199411', **VARIABLE} - path = _replace_tags( - '{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/' - '{grid}/{latestversion}', variable) - input_file = _replace_tags( - '{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc', variable) - output_file = _replace_tags( - '{project}_{dataset}_{mip}_{exp}_{ensemble}_{short_name}', variable) - assert sorted(path) == sorted([ - 'act/HMA/ACCURATE-MODEL/experiment/r1i1p1f1/Amon/tas/gr/' - '{latestversion}', - 'act/HMA/ACCURATE-MODEL/experiment/199411-r1i1p1f1/Amon/tas/gr/' - '{latestversion}' - ]) - assert input_file == [ - 'tas_Amon_ACCURATE-MODEL_experiment_199411-r1i1p1f1_gr*.nc' - ] - assert output_file == [ - 'CMIP6_ACCURATE-MODEL_Amon_experiment_199411-r1i1p1f1_tas' - ] + assert _replace_tags(('folder/subfolder/{short_name}', + 'folder2/{short_name}', 'subfolder/{short_name}'), + VARIABLE) == [ + 'folder/subfolder/tas', + 'folder2/tas', + 'subfolder/tas', + ] diff --git a/tests/unit/experimental/test_config.py b/tests/unit/experimental/test_config.py index d7c8a8d761..ac74b3b856 100644 --- a/tests/unit/experimental/test_config.py +++ b/tests/unit/experimental/test_config.py @@ -1,4 +1,3 @@ -from collections.abc import MutableMapping from pathlib import Path import numpy as np @@ -225,7 +224,7 @@ def test_config_class(): assert isinstance(cfg['output_dir'], Path) assert isinstance(cfg['auxiliary_data_dir'], Path) - from esmvalcore._config._config import CFG as CFG_DEV + from esmvalcore._config import CFG as CFG_DEV assert CFG_DEV @@ -239,7 +238,7 @@ def test_config_update(): def test_config_init(): config = Config() - assert isinstance(config, MutableMapping) + assert isinstance(config, dict) def test_session(): diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index 96359502fd..b0563e539a 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -20,7 +20,6 @@ extract_region, extract_shape, ) -from esmvalcore.preprocessor._shared import guess_bounds class Test(tests.Test): @@ -70,22 +69,6 @@ def test_area_statistics_mean(self): expected = np.array([1.]) self.assert_array_equal(result.data, expected) - def test_area_statistics_cell_measure_mean(self): - """ - Test for area average of a 2D field. - The area measure is pre-loaded in the cube""" - cube = guess_bounds(self.grid, ['longitude', 'latitude']) - grid_areas = iris.analysis.cartography.area_weights(cube) - measure = iris.coords.CellMeasure( - grid_areas, - standard_name='cell_area', - units='m2', - measure='area') - self.grid.add_cell_measure(measure, range(0, measure.ndim)) - result = area_statistics(self.grid, 'mean') - expected = np.array([1.]) - self.assert_array_equal(result.data, expected) - def test_area_statistics_min(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'min') @@ -142,27 +125,6 @@ def test_extract_region(self): expected = np.ones((2, 2)) self.assert_array_equal(result.data, expected) - def test_extract_region_mean(self): - """ - Test for extracting a region and performing - the area mean of a 2D field. - """ - cube = guess_bounds(self.grid, ['longitude', 'latitude']) - grid_areas = iris.analysis.cartography.area_weights(cube) - measure = iris.coords.CellMeasure( - grid_areas, - standard_name='cell_area', - units='m2', - measure='area') - self.grid.add_cell_measure(measure, range(0, measure.ndim)) - region = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5) - # expected outcome - expected = np.ones((2, 2)) - self.assert_array_equal(region.data, expected) - result = area_statistics(region, 'mean') - expected_mean = np.array([1.]) - self.assert_array_equal(result.data, expected_mean) - def test_extract_region_neg_lon(self): """Test for extracting a region with a negative longitude field.""" result = extract_region(self.negative_grid, -0.5, 0.5, -0.5, 0.5) @@ -214,14 +176,8 @@ def test_extract_named_region(self): def create_irregular_grid_cube(data, lons, lats): """Create test cube on irregular grid.""" - times = iris.coords.DimCoord(np.array([10, 20], dtype=np.float64), - standard_name='time', - units=Unit('days since 1950-01-01', - calendar='gregorian')) - - # Construct cube - nlat = iris.coords.DimCoord(range(data.shape[1]), var_name='nlat') - nlon = iris.coords.DimCoord(range(data.shape[2]), var_name='nlon') + nlat = iris.coords.DimCoord(range(data.shape[0]), var_name='nlat') + nlon = iris.coords.DimCoord(range(data.shape[1]), var_name='nlon') lat = iris.coords.AuxCoord(lats, var_name='lat', standard_name='latitude', @@ -231,13 +187,12 @@ def create_irregular_grid_cube(data, lons, lats): standard_name='longitude', units='degrees') dim_coord_spec = [ - (times, 0), - (nlat, 1), - (nlon, 2), + (nlat, 0), + (nlon, 1), ] aux_coord_spec = [ - (lat, [1, 2]), - (lon, [1, 2]), + (lat, [0, 1]), + (lon, [0, 1]), ] cube = iris.cube.Cube( data, @@ -250,58 +205,29 @@ def create_irregular_grid_cube(data, lons, lats): IRREGULAR_EXTRACT_REGION_TESTS = [ - { - 'region': (100, 140, -10, 90), - 'mask': np.array( - [ - [False], - [False], - ], - dtype=bool, - ), - 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, 1:3, 1:2] - }, - { - 'region': (100, 360, -60, 0), - 'mask': np.array( - [ - [True, False], - [False, False], - ], - dtype=bool, - ), - 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, 0:2, 1:3] - }, { 'region': (10, 360, 0, 90), - 'mask': np.array( - [ - [True, False], - [False, False], - ], - dtype=bool, - ), - 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, 1:, 1:] - }, - { - 'region': (0, 360, -90, -30), - 'mask': np.array( + 'mask': + np.array( [ - [False, False, False], + [True, True, True], + [True, True, False], + [True, False, False], ], dtype=bool, ), - 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, :1, :] }, { 'region': (200, 10, -90, -60), - 'mask': np.array( + 'mask': + np.array( [ [False, True, False], + [True, True, True], + [True, True, True], ], dtype=bool, ), - 'data': np.arange(18, dtype=np.float32).reshape((2, 3, 3))[:, :1, :] }, { 'region': (-150, 50, 50, -50), @@ -314,8 +240,6 @@ def create_irregular_grid_cube(data, lons, lats): ], dtype=bool, ), - 'data': - np.arange(18, dtype=np.float32).reshape((2, 3, 3)) }, { 'region': (0, 0, -100, 0), @@ -331,7 +255,7 @@ def create_irregular_grid_cube(data, lons, lats): @pytest.fixture def irregular_extract_region_cube(): """Create a test cube on an irregular grid to test `extract_region`.""" - data = np.arange(18, dtype=np.float32).reshape((2, 3, 3)) + data = np.arange(9, dtype=np.float32).reshape((3, 3)) lons = np.array( [ [0, 120, 240], @@ -365,9 +289,9 @@ def test_extract_region_irregular(irregular_extract_region_cube, case): end_latitude=end_lat, ) - for i in range(2): - np.testing.assert_array_equal(cube.data[i].mask, case['mask']) - np.testing.assert_array_equal(cube.data.data, case['data']) + data = np.arange(9, dtype=np.float32).reshape((3, 3)) + np.testing.assert_array_equal(cube.data.mask, case['mask']) + np.testing.assert_array_equal(cube.data.data, data) else: with pytest.raises(ValueError) as exc: extract_region( @@ -871,7 +795,7 @@ def test_extract_composite_shape_negative_bounds(make_testcube, @pytest.fixture def irreg_extract_shape_cube(): """Create a test cube on an irregular grid to test `extract_shape`.""" - data = np.arange(18, dtype=np.float32).reshape((2, 3, 3)) + data = np.arange(9, dtype=np.float32).reshape((3, 3)) lats = np.array( [ [0.0, 0.0, 0.1], @@ -908,7 +832,7 @@ def test_extract_shape_irregular(irreg_extract_shape_cube, tmp_path, method): cube = extract_shape(irreg_extract_shape_cube, shapefile, method) - data = np.arange(18, dtype=np.float32).reshape((2, 3, 3)) + data = np.arange(9, dtype=np.float32).reshape((3, 3)) mask = np.array( [ [True, True, True], @@ -920,8 +844,7 @@ def test_extract_shape_irregular(irreg_extract_shape_cube, tmp_path, method): if method == 'representative': mask[1, 1] = True np.testing.assert_array_equal(cube.data, data) - for i in range(2): - np.testing.assert_array_equal(cube.data[i].mask, mask) + np.testing.assert_array_equal(cube.data.mask, mask) def test_extract_shape_wrong_method_raises(): diff --git a/tests/unit/preprocessor/_derive/test_rlus.py b/tests/unit/preprocessor/_derive/test_rlus.py deleted file mode 100644 index 7ac7191f05..0000000000 --- a/tests/unit/preprocessor/_derive/test_rlus.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Test derivation of `rlus`.""" -import iris -import numpy as np -import pytest - -import esmvalcore.preprocessor._derive.rlus as rlus - -from .test_shared import get_cube - - -@pytest.fixture -def cubes(): - rlds_name = 'surface_downwelling_longwave_flux_in_air' - rlns_name = 'surface_net_downward_longwave_flux' - rlds_cube = get_cube([[[100.]]], - air_pressure_coord=False, - standard_name=rlds_name) - rlds_cube.attributes["positive"] = "down" - rlns_cube = get_cube([[[50.0]]], - air_pressure_coord=False, - standard_name=rlns_name) - rlns_cube.attributes["positive"] = "down" - - rlns_cube.coord("longitude").var_name = "lon" - rlns_cube.coord("longitude").var_name = "lat" - - return iris.cube.CubeList([rlds_cube, rlns_cube]) - - -def test_rlntcs_calculation(cubes): - derived_var = rlus.DerivedVariable() - out_cube = derived_var.calculate(cubes) - np.testing.assert_allclose(out_cube.data, - np.array([[[50.0]]])) - assert out_cube.attributes['positive'] == 'up' diff --git a/tests/unit/preprocessor/_derive/test_rsus.py b/tests/unit/preprocessor/_derive/test_rsus.py deleted file mode 100644 index 7636913561..0000000000 --- a/tests/unit/preprocessor/_derive/test_rsus.py +++ /dev/null @@ -1,35 +0,0 @@ -"""Test derivation of `rsus`.""" -import iris -import numpy as np -import pytest - -import esmvalcore.preprocessor._derive.rsus as rsus - -from .test_shared import get_cube - - -@pytest.fixture -def cubes(): - rsds_name = 'surface_downwelling_shortwave_flux_in_air' - rsns_name = 'surface_net_downward_shortwave_flux' - rsds_cube = get_cube([[[100.]]], - air_pressure_coord=False, - standard_name=rsds_name) - rsds_cube.attributes["positive"] = "down" - rsns_cube = get_cube([[[50.0]]], - air_pressure_coord=False, - standard_name=rsns_name) - rsns_cube.attributes["positive"] = "down" - - rsns_cube.coord("longitude").var_name = "lon" - rsns_cube.coord("longitude").var_name = "lat" - - return iris.cube.CubeList([rsds_cube, rsns_cube]) - - -def test_rsntcs_calculation(cubes): - derived_var = rsus.DerivedVariable() - out_cube = derived_var.calculate(cubes) - np.testing.assert_allclose(out_cube.data, - np.array([[[50.0]]])) - assert out_cube.attributes['positive'] == 'up' diff --git a/tests/unit/preprocessor/_mask/test_mask.py b/tests/unit/preprocessor/_mask/test_mask.py index a6b28e2cae..2d4c8948e6 100644 --- a/tests/unit/preprocessor/_mask/test_mask.py +++ b/tests/unit/preprocessor/_mask/test_mask.py @@ -7,7 +7,7 @@ import iris import tests from cf_units import Unit -from esmvalcore.preprocessor._mask import (_apply_fx_mask, +from esmvalcore.preprocessor._mask import (_apply_fx_mask, _check_dims, count_spells, _get_fx_mask, mask_above_threshold, mask_below_threshold, @@ -63,6 +63,12 @@ def test_apply_fx_mask_on_masked_data(self): mask=dummy_fx_mask) self.assert_array_equal(fixed_mask, app_mask) + def test_check_dims(self): + """Test _check_dims func.""" + malformed_cube = self.arr[0] + np.testing.assert_equal(True, _check_dims(self.arr, self.arr)) + np.testing.assert_equal(False, _check_dims(self.arr, malformed_cube)) + def test_count_spells(self): """Test count_spells func.""" ref_spells = count_spells(self.time_cube.data, -1000., 0, 1) diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 6da22c98e8..dc9092eb2f 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -88,7 +88,7 @@ def generate_cube_from_dates( standard_name='time', units=unit) - data = np.array((fill_val, ) * len_data, dtype=np.float32) + data = np.array((fill_val, ) * len_data) if lazy: data = da.from_array(data) @@ -104,7 +104,7 @@ def get_cubes_for_validation_test(frequency, lazy=False): # Cube with masked data cube2 = cube1.copy() - data2 = np.ma.array([5, 5, 5], mask=[True, False, False], dtype=np.float32) + data2 = np.ma.array([5, 5, 5], mask=[True, False, False]) if lazy: data2 = da.from_array(data2) cube2.data = data2 diff --git a/tests/unit/preprocessor/_other/test_other.py b/tests/unit/preprocessor/_other/test_other.py index 08a1ee26c0..6335a74f64 100644 --- a/tests/unit/preprocessor/_other/test_other.py +++ b/tests/unit/preprocessor/_other/test_other.py @@ -2,6 +2,7 @@ import unittest +import iris import iris.coord_categorisation import iris.coords import numpy as np diff --git a/tests/unit/preprocessor/_regrid/test_regrid.py b/tests/unit/preprocessor/_regrid/test_regrid.py index b7beaca442..fb6ec94232 100644 --- a/tests/unit/preprocessor/_regrid/test_regrid.py +++ b/tests/unit/preprocessor/_regrid/test_regrid.py @@ -1,20 +1,16 @@ -"""Unit tests for the :func:`esmvalcore.preprocessor.regrid.regrid` -function.""" +""" +Unit tests for the :func:`esmvalcore.preprocessor.regrid.regrid` function. + +""" import unittest from unittest import mock import iris -import numpy as np -import pytest import tests from esmvalcore.preprocessor import regrid -from esmvalcore.preprocessor._regrid import ( - _CACHE, - HORIZONTAL_SCHEMES, - _horizontal_grid_is_close, -) +from esmvalcore.preprocessor._regrid import _CACHE, HORIZONTAL_SCHEMES class Test(tests.Test): @@ -68,17 +64,9 @@ def setUp(self): 'unstructured_nearest' ] - def _mock_horizontal_grid_is_close(src, tgt): - return False - - self.patch('esmvalcore.preprocessor._regrid._horizontal_grid_is_close', - side_effect=_mock_horizontal_grid_is_close) - - def _return_mock_global_stock_cube( - spec, - lat_offset=True, - lon_offset=True, - ): + def _return_mock_global_stock_cube(spec, + lat_offset=True, + lon_offset=True): return self.tgt_grid self.mock_stock = self.patch( @@ -129,108 +117,5 @@ def test_regrid__cell_specification(self): _CACHE.clear() -def _make_coord(start: float, stop: float, step: int, *, name: str): - """Helper function for creating a coord.""" - coord = iris.coords.DimCoord( - np.linspace(start, stop, step), - standard_name=name, - units='degrees', - ) - coord.guess_bounds() - return coord - - -def _make_cube(*, lat: tuple, lon: tuple): - """Helper function for creating a cube.""" - lat_coord = _make_coord(*lat, name='latitude') - lon_coord = _make_coord(*lon, name='longitude') - - return iris.cube.Cube( - np.empty([len(lat_coord.points), - len(lon_coord.points)]), - dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)], - ) - - -# 10x10 -LAT_SPEC1 = (-85, 85, 18) -LON_SPEC1 = (5, 355, 36) - -# almost 10x10, but different shape -LAT_SPEC2 = (-85, 85, 17) -LON_SPEC2 = (5, 355, 35) - -# 10x10, but different coords -LAT_SPEC3 = (-90, 90, 18) -LON_SPEC3 = (0, 360, 36) - - -@pytest.mark.parametrize( - 'cube2_spec, expected', - ( - # equal lat/lon - ( - { - 'lat': LAT_SPEC1, - 'lon': LON_SPEC1, - }, - True, - ), - # different lon shape - ( - { - 'lat': LAT_SPEC1, - 'lon': LON_SPEC2, - }, - False, - ), - # different lat shape - ( - { - 'lat': LAT_SPEC2, - 'lon': LON_SPEC1, - }, - False, - ), - # different lon values - ( - { - 'lat': LAT_SPEC1, - 'lon': LON_SPEC3, - }, - False, - ), - # different lat values - ( - { - 'lat': LAT_SPEC3, - 'lon': LON_SPEC1, - }, - False, - ), - ), -) -def test_horizontal_grid_is_close(cube2_spec: dict, expected: bool): - """Test for `_horizontal_grid_is_close`.""" - cube1 = _make_cube(lat=LAT_SPEC1, lon=LON_SPEC1) - cube2 = _make_cube(**cube2_spec) - - assert _horizontal_grid_is_close(cube1, cube2) == expected - - -def test_regrid_is_skipped_if_grids_are_the_same(): - """Test that regridding is skipped if the grids are the same.""" - cube = _make_cube(lat=LAT_SPEC1, lon=LON_SPEC1) - scheme = 'linear' - - # regridding to the same spec returns the same cube - expected_same_cube = regrid(cube, target_grid='10x10', scheme=scheme) - assert expected_same_cube is cube - - # regridding to a different spec returns a different cube - expected_different_cube = regrid(cube, target_grid='5x5', scheme=scheme) - assert expected_different_cube is not cube - - if __name__ == '__main__': unittest.main() diff --git a/tests/unit/preprocessor/_time/test_time.py b/tests/unit/preprocessor/_time/test_time.py index 1677c05fcd..eddcd33f6a 100644 --- a/tests/unit/preprocessor/_time/test_time.py +++ b/tests/unit/preprocessor/_time/test_time.py @@ -3,7 +3,6 @@ import copy import datetime import unittest -from typing import List, Tuple import iris import iris.coord_categorisation @@ -1264,7 +1263,7 @@ def make_map_data(number_years=2): return cube -PARAMETERS: List[Tuple] = [] +PARAMETERS = [] for period in ('full', 'day', 'month', 'season'): PARAMETERS.append((period, None)) if period == 'season': diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index 82a755660d..0e07231609 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -10,9 +10,7 @@ from esmvalcore.preprocessor._volume import (volume_statistics, depth_integration, extract_trajectory, - extract_transect, - extract_volume, - calculate_volume) + extract_transect, extract_volume) class Test(tests.Test): @@ -85,46 +83,12 @@ def test_extract_volume(self): print(result.data, expected.data) self.assert_array_equal(result.data, expected) - def test_extract_volume_mean(self): - """ - Test to extract the top two layers and compute the - weighted average of a cube.""" - grid_volume = calculate_volume(self.grid_4d) - measure = iris.coords.CellMeasure( - grid_volume, - standard_name='ocean_volume', - units='m3', - measure='volume') - self.grid_4d.add_cell_measure(measure, range(0, measure.ndim)) - result = extract_volume(self.grid_4d, 0., 10.) - expected = np.ma.ones((2, 2, 2, 2)) - self.assert_array_equal(result.data, expected) - result_mean = volume_statistics(result, 'mean') - expected_mean = np.ma.array([1., 1.], mask=False) - self.assert_array_equal(result_mean.data, expected_mean) - def test_volume_statistics(self): """Test to take the volume weighted average of a (2,3,2,2) cube.""" result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) - def test_volume_statistics_cell_measure(self): - """ - Test to take the volume weighted average of a (2,3,2,2) cube. - The volume measure is pre-loaded in the cube. - """ - grid_volume = calculate_volume(self.grid_4d) - measure = iris.coords.CellMeasure( - grid_volume, - standard_name='ocean_volume', - units='m3', - measure='volume') - self.grid_4d.add_cell_measure(measure, range(0, measure.ndim)) - result = volume_statistics(self.grid_4d, 'mean') - expected = np.ma.array([1., 1.], mask=False) - self.assert_array_equal(result.data, expected) - def test_volume_statistics_long(self): """ Test to take the volume weighted average of a (4,3,2,2) cube. diff --git a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py index 2de0ff3c80..387279ec9e 100644 --- a/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py +++ b/tests/unit/preprocessor/_weighting/test_weighting_landsea_fraction.py @@ -1,4 +1,5 @@ """Unit tests for :mod:`esmvalcore.preprocessor._weighting`.""" +from unittest import mock import iris import numpy as np @@ -7,69 +8,95 @@ import esmvalcore.preprocessor._weighting as weighting -crd_sys = iris.coord_systems.GeogCS(iris.fileformats.pp.EARTH_RADIUS) -LON_3 = iris.coords.DimCoord([0, 1.5, 3], - standard_name='longitude', - bounds=[[0, 1], [1, 2], [2, 3]], - units='degrees_east', - coord_system=crd_sys) -LON_4 = iris.coords.DimCoord([0, 1.5, 2.5, 3.5], - standard_name='longitude', - bounds=[[0, 1], [1, 2], [2, 3], - [3, 4]], - units='degrees_east', - coord_system=crd_sys) - CUBE_SFTLF = iris.cube.Cube( [10.0, 0.0, 100.0], var_name='sftlf', standard_name='land_area_fraction', units=Unit('%'), - dim_coords_and_dims=[(LON_3, 0), ] ) CUBE_SFTOF = iris.cube.Cube( [100.0, 0.0, 50.0, 70.0], var_name='sftof', standard_name='sea_area_fraction', units=Unit('%'), - dim_coords_and_dims=[(LON_4, 0), ] ) CUBE_3 = iris.cube.Cube( [10.0, 20.0, 0.0], var_name='dim3', - dim_coords_and_dims=[(LON_3, 0), ] ) CUBE_4 = iris.cube.Cube( [1.0, 2.0, -1.0, 2.0], var_name='dim4', - dim_coords_and_dims=[(LON_4, 0), ] ) - -CUBE_ANCILLARY_3 = CUBE_3.copy() -CUBE_ANCILLARY_3.add_ancillary_variable(CUBE_SFTLF, (0)) - -CUBE_ANCILLARY_4 = CUBE_4.copy() -CUBE_ANCILLARY_4.add_ancillary_variable(CUBE_SFTOF, (0)) - FRAC_SFTLF = np.array([0.1, 0.0, 1.0]) FRAC_SFTOF = np.array([0.0, 1.0, 0.5, 0.3]) +EMPTY_FX_FILES = { + 'sftlf': [], + 'sftof': [], +} +L_FX_FILES = { + 'sftlf': 'not/a/real/path', + 'sftof': [], +} +O_FX_FILES = { + 'sftlf': [], + 'sftof': 'not/a/real/path', +} +FX_FILES = { + 'sftlf': 'not/a/real/path', + 'sftof': 'i/was/mocked', +} +WRONG_FX_FILES = { + 'wrong': 'test', + 'sftlf': 'not/a/real/path', + 'sftof': 'i/was/mocked', +} LAND_FRACTION = [ - (CUBE_3, None, [ - 'Ancillary variables land/sea area fraction not found in cube. ' - 'Check fx_file availability.']), - (CUBE_4, None, [ - 'Ancillary variables land/sea area fraction not found in cube. ' - 'Check fx_file availability.']), - (CUBE_ANCILLARY_3, FRAC_SFTLF, []), - (CUBE_ANCILLARY_4, FRAC_SFTOF, []) + (CUBE_3, {}, [], None, ["No fx files given"]), + (CUBE_3, {'sftlf': []}, [], None, ["'sftlf' not found"]), + (CUBE_3, {'sftlf': 'a'}, [CUBE_SFTLF], FRAC_SFTLF, []), + (CUBE_3, {'sftof': 'a'}, [CUBE_SFTOF], None, ["not broadcastable"]), + (CUBE_3, EMPTY_FX_FILES, [], None, + ["'sftlf' not found", "'sftof' not found"]), + (CUBE_3, L_FX_FILES, [CUBE_SFTLF], FRAC_SFTLF, []), + (CUBE_3, O_FX_FILES, [CUBE_SFTOF], None, + ["'sftlf' not found", "not broadcastable"]), + (CUBE_3, FX_FILES, [CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, []), + (CUBE_3, {'wrong': 'a'}, [CUBE_SFTLF], None, + ["expected 'sftlf' or 'sftof'"]), + (CUBE_3, {'wrong': 'a'}, [CUBE_SFTOF], None, ["not broadcastable"]), + (CUBE_3, WRONG_FX_FILES, [CUBE_SFTLF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, + ["expected 'sftlf' or 'sftof'"]), + (CUBE_3, WRONG_FX_FILES, [CUBE_SFTOF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTLF, + ["not broadcastable"]), + (CUBE_4, {}, [], None, ["No fx files given"]), + (CUBE_4, {'sftlf': []}, [], None, ["'sftlf' not found"]), + (CUBE_4, {'sftlf': 'a'}, [CUBE_SFTLF], None, ["not broadcastable"]), + (CUBE_4, {'sftof': 'a'}, [CUBE_SFTOF], FRAC_SFTOF, []), + (CUBE_4, EMPTY_FX_FILES, [], None, + ["'sftlf' not found", "'sftof' not found"]), + (CUBE_4, L_FX_FILES, [CUBE_SFTLF], None, + ["not broadcastable", "'sftof' not found"]), + (CUBE_4, O_FX_FILES, [CUBE_SFTOF], FRAC_SFTOF, ["'sftlf' not found"]), + (CUBE_4, FX_FILES, [CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, + ["not broadcastable"]), + (CUBE_4, {'wrong': 'a'}, [CUBE_SFTLF], None, ["not broadcastable"]), + (CUBE_4, {'wrong': 'a'}, [CUBE_SFTOF], None, + ["expected 'sftlf' or 'sftof'"]), + (CUBE_4, WRONG_FX_FILES, [CUBE_SFTLF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, + ["not broadcastable", "not broadcastable"]), + (CUBE_4, WRONG_FX_FILES, [CUBE_SFTOF, CUBE_SFTLF, CUBE_SFTOF], FRAC_SFTOF, + ["expected 'sftlf' or 'sftof'", "not broadcastable"]), ] -@pytest.mark.parametrize('cube,out,err', LAND_FRACTION) -def test_get_land_fraction(cube, out, err): +@pytest.mark.parametrize('cube,fx_files,fx_cubes,out,err', LAND_FRACTION) +@mock.patch.object(weighting, 'iris', autospec=True) +def test_get_land_fraction(mock_iris, cube, fx_files, fx_cubes, out, err): """Test calculation of land fraction.""" - (land_fraction, errors) = weighting._get_land_fraction(cube) + mock_iris.load_cube.side_effect = fx_cubes + (land_fraction, errors) = weighting._get_land_fraction(cube, fx_files) if land_fraction is None: assert land_fraction == out else: @@ -77,6 +104,46 @@ def test_get_land_fraction(cube, out, err): assert len(errors) == len(err) for (idx, error) in enumerate(errors): assert err[idx] in error + mock_iris.reset_mock() + + +SHAPES_TO_BROADCAST = [ + ((), (1, ), True), + ((), (10, 10), True), + ((1, ), (10, ), True), + ((1, ), (10, 10), True), + ((2, ), (10, ), False), + ((10, ), (), True), + ((10, ), (1, ), True), + ((10, ), (10, ), True), + ((10, ), (10, 10), True), + ((10, ), (7, 1), True), + ((10, ), (10, 7), False), + ((10, ), (7, 1, 10), True), + ((10, ), (7, 1, 1), True), + ((10, ), (7, 1, 7), False), + ((10, ), (7, 10, 7), False), + ((10, 1), (1, 1), True), + ((10, 1), (1, 100), True), + ((10, 1), (10, 7), True), + ((10, 12), (10, 1), True), + ((10, 12), (), True), + ((10, 12), (1, ), True), + ((10, 12), (12, ), True), + ((10, 12), (1, 1), True), + ((10, 12), (1, 12), True), + ((10, 12), (10, 10, 1), True), + ((10, 12), (10, 12, 1), False), + ((10, 12), (10, 12, 12), False), + ((10, 12), (10, 10, 12), True), +] + + +@pytest.mark.parametrize('shape_1,shape_2,out', SHAPES_TO_BROADCAST) +def test_shape_is_broadcastable(shape_1, shape_2, out): + """Test check if two shapes are broadcastable.""" + is_broadcastable = weighting._shape_is_broadcastable(shape_1, shape_2) + assert is_broadcastable == out CUBE_3_L = CUBE_3.copy([1.0, 0.0, 0.0]) @@ -85,20 +152,37 @@ def test_get_land_fraction(cube, out, err): CUBE_4_O = CUBE_4.copy([1.0, 0.0, -0.5, 1.4]) WEIGHTING_LANDSEA_FRACTION = [ - (CUBE_3, 'land', ValueError), - (CUBE_3, 'sea', ValueError), - (CUBE_ANCILLARY_3, 'land', CUBE_3_L), - (CUBE_ANCILLARY_3, 'sea', CUBE_3_O), - (CUBE_4, 'land', ValueError), - (CUBE_4, 'sea', ValueError), - (CUBE_ANCILLARY_4, 'land', CUBE_4_L), - (CUBE_ANCILLARY_4, 'sea', CUBE_4_O), + (CUBE_3, {}, 'land', ValueError), + (CUBE_3, {}, 'sea', ValueError), + (CUBE_3, EMPTY_FX_FILES, 'land', ValueError), + (CUBE_3, EMPTY_FX_FILES, 'sea', ValueError), + (CUBE_3, L_FX_FILES, 'land', CUBE_3_L), + (CUBE_3, L_FX_FILES, 'sea', CUBE_3_O), + (CUBE_3, O_FX_FILES, 'land', ValueError), + (CUBE_3, O_FX_FILES, 'sea', ValueError), + (CUBE_3, FX_FILES, 'land', CUBE_3_L), + (CUBE_3, FX_FILES, 'sea', CUBE_3_O), + (CUBE_3, FX_FILES, 'wrong', TypeError), + (CUBE_4, {}, 'land', ValueError), + (CUBE_4, {}, 'sea', ValueError), + (CUBE_4, EMPTY_FX_FILES, 'land', ValueError), + (CUBE_4, EMPTY_FX_FILES, 'sea', ValueError), + (CUBE_4, L_FX_FILES, 'land', ValueError), + (CUBE_4, L_FX_FILES, 'sea', ValueError), + (CUBE_4, O_FX_FILES, 'land', CUBE_4_L), + (CUBE_4, O_FX_FILES, 'sea', CUBE_4_O), + (CUBE_4, FX_FILES, 'land', CUBE_4_L), + (CUBE_4, FX_FILES, 'sea', CUBE_4_O), + (CUBE_4, FX_FILES, 'wrong', TypeError), ] -@pytest.mark.parametrize('cube,area_type,out', +@pytest.mark.parametrize('cube,fx_files,area_type,out', WEIGHTING_LANDSEA_FRACTION) -def test_weighting_landsea_fraction(cube, +@mock.patch.object(weighting, 'iris', autospec=True) +def test_weighting_landsea_fraction(mock_iris, + cube, + fx_files, area_type, out): """Test landsea fraction weighting preprocessor.""" @@ -106,10 +190,18 @@ def test_weighting_landsea_fraction(cube, if isinstance(out, type): with pytest.raises(out): weighted_cube = weighting.weighting_landsea_fraction( - cube, area_type) + cube, fx_files, area_type) return # Regular cases - weighted_cube = weighting.weighting_landsea_fraction(cube, area_type) - assert np.array_equal(weighted_cube.data, cube.data) + fx_cubes = [] + if fx_files.get('sftlf'): + fx_cubes.append(CUBE_SFTLF) + if fx_files.get('sftof'): + fx_cubes.append(CUBE_SFTOF) + mock_iris.load_cube.side_effect = fx_cubes + weighted_cube = weighting.weighting_landsea_fraction( + cube, fx_files, area_type) + assert weighted_cube == cube assert weighted_cube is cube + mock_iris.reset_mock() diff --git a/tests/unit/test_recipe.py b/tests/unit/test_recipe.py index 493f366fef..1ed1875926 100644 --- a/tests/unit/test_recipe.py +++ b/tests/unit/test_recipe.py @@ -1,6 +1,6 @@ import pytest -from esmvalcore._recipe import Recipe, _allow_skipping +from esmvalcore._recipe import Recipe from esmvalcore._recipe_checks import RecipeError @@ -14,7 +14,7 @@ def test_expand_ensemble(self): }, ] - expanded = Recipe._expand_tag(datasets, 'ensemble') + expanded = Recipe._expand_ensemble(datasets) ensembles = [ 'r1i2p3', @@ -29,31 +29,6 @@ def test_expand_ensemble(self): for i, ensemble in enumerate(ensembles): assert expanded[i] == {'dataset': 'XYZ', 'ensemble': ensemble} - def test_expand_subexperiment(self): - - datasets = [ - { - 'dataset': 'XYZ', - 'sub_experiment': 's(1998:2005)', - }, - ] - - expanded = Recipe._expand_tag(datasets, 'sub_experiment') - - subexperiments = [ - 's1998', - 's1999', - 's2000', - 's2001', - 's2002', - 's2003', - 's2004', - 's2005', - ] - for i, subexperiment in enumerate(subexperiments): - assert expanded[i] == {'dataset': 'XYZ', - 'sub_experiment': subexperiment} - def test_expand_ensemble_nolist(self): datasets = [ @@ -64,38 +39,4 @@ def test_expand_ensemble_nolist(self): ] with pytest.raises(RecipeError): - Recipe._expand_tag(datasets, 'ensemble') - - -VAR_A = {'dataset': 'A'} -VAR_A_REF_A = {'dataset': 'A', 'reference_dataset': 'A'} -VAR_A_REF_B = {'dataset': 'A', 'reference_dataset': 'B'} - - -TEST_ALLOW_SKIPPING = [ - ([], VAR_A, {}, False), - ([], VAR_A, {'skip-nonexistent': False}, False), - ([], VAR_A, {'skip-nonexistent': True}, True), - ([], VAR_A_REF_A, {}, False), - ([], VAR_A_REF_A, {'skip-nonexistent': False}, False), - ([], VAR_A_REF_A, {'skip-nonexistent': True}, False), - ([], VAR_A_REF_B, {}, False), - ([], VAR_A_REF_B, {'skip-nonexistent': False}, False), - ([], VAR_A_REF_B, {'skip-nonexistent': True}, True), - (['A'], VAR_A, {}, False), - (['A'], VAR_A, {'skip-nonexistent': False}, False), - (['A'], VAR_A, {'skip-nonexistent': True}, False), - (['A'], VAR_A_REF_A, {}, False), - (['A'], VAR_A_REF_A, {'skip-nonexistent': False}, False), - (['A'], VAR_A_REF_A, {'skip-nonexistent': True}, False), - (['A'], VAR_A_REF_B, {}, False), - (['A'], VAR_A_REF_B, {'skip-nonexistent': False}, False), - (['A'], VAR_A_REF_B, {'skip-nonexistent': True}, False), -] - - -@pytest.mark.parametrize('ancestors,var,cfg,out', TEST_ALLOW_SKIPPING) -def test_allow_skipping(ancestors, var, cfg, out): - """Test ``_allow_skipping``.""" - result = _allow_skipping(ancestors, var, cfg) - assert result is out + Recipe._expand_ensemble(datasets) diff --git a/yamale_meta.yaml b/yamale_meta.yaml new file mode 100644 index 0000000000..b97136636c --- /dev/null +++ b/yamale_meta.yaml @@ -0,0 +1,52 @@ +{% set name = "yamale" %} +{% set version = "2.0" %} +{% set file_ext = "tar.gz" %} +{% set hash_type = "sha256" %} +{% set hash_value = "532897422b590f617a075d47badde4874c0b1d49ac10e151c1f04f73d0524b03" %} + +package: + name: '{{ name|lower }}' + version: '{{ version }}' + +source: + fn: '{{ name }}-{{ version }}.{{ file_ext }}' + url: https://pypi.io/packages/source/{{ name[0] }}/{{ name }}/{{ name }}-{{ version }}.{{ file_ext }} + '{{ hash_type }}': '{{ hash_value }}' + +build: + noarch: python + number: 0 + entry_points: + - yamale=yamale.command_line:main + script: python setup.py install --single-version-externally-managed --record=record.txt + +requirements: + build: + - python + - setuptools + - pyyaml + run: + - python + - pyyaml + +test: + imports: + - yamale + - yamale.readers + - yamale.readers.tests + - yamale.schema + - yamale.syntax + - yamale.syntax.tests + - yamale.tests + - yamale.validators + - yamale.validators.tests + commands: + - yamale --help + +about: + home: https://github.com/23andMe/Yamale + license: MIT License + license_family: MIT + summary: A schema and validator for YAML. + description: A schema and validator for YAML. + From 9d15ceab218747460572f0058e16d0c2eafe0467 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Fri, 11 Jun 2021 23:09:19 +0200 Subject: [PATCH 62/68] Various improvements, move rechunking to before regridding --- esmvalcore/preprocessor/_multimodel.py | 110 +++++++++--------- esmvalcore/preprocessor/_regrid.py | 28 ++++- setup.cfg | 3 +- .../unit/preprocessor/_regrid/test_regrid.py | 1 + 4 files changed, 84 insertions(+), 58 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index dea5d1d93a..2b6bd18fc9 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -4,6 +4,7 @@ import re from datetime import datetime from functools import reduce +from warnings import catch_warnings, filterwarnings import cf_units import dask.array as da @@ -210,6 +211,13 @@ def _extend(cube, time_points): def _align(cubes, span): """Expand or subset cubes so they share a common time span.""" + if not cubes[0].coords('time'): + return cubes + + if cubes.coord('time').points.size == 1: + # TODO: improve support for this case + return cubes + _unify_time_coordinates(cubes) if _time_coords_are_aligned(cubes): @@ -264,68 +272,62 @@ def _combine(cubes): return merged_cube -def rechunk(cube): - """Rechunk the cube to speed up out-of-memory computation.""" - new_chunks = {0: -1} # don't chunk along the multimodel dimension - if cube.ndim > 1: - new_chunks[1] = 'auto' # do chunk along the first subsequent dimension - - cube.data = cube.lazy_data().rechunk(new_chunks) - - logger.debug("Total data size: %s MB", cube.lazy_data().nbytes * 1e-6) - logger.debug("New chunk block size: %s MB", - cube.lazy_data().nbytes / cube.lazy_data().npartitions * 1e-6) - logger.debug("New chunk configuration: %s", cube.lazy_data()) - - def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator, **kwargs): - """Compute statistics one slice at a time.""" + """Compute statistics one time slice at a time.""" _ = [cube.data for cube in cubes] # make sure the cubes' data are realized - result_slices = [] - for i in range(cubes[0].shape[0]): - single_model_slices = [cube[i] for cube in cubes] - combined_slice = _combine(single_model_slices) - collapsed_slice = combined_slice.collapsed(CONCAT_DIM, operator, - **kwargs) - - # some iris aggregators modify dtype, see e.g. - # https://numpy.org/doc/stable/reference/generated/numpy.ma.average.html - collapsed_slice.data = collapsed_slice.data.astype(np.float32) - - result_slices.append(collapsed_slice) - - try: - result_cube = iris.cube.CubeList(result_slices).merge_cube() - except Exception as excinfo: - raise ValueError( - "Multi-model statistics failed to concatenate results into a" - f" single array. This happened for operator {operator}" - f" with computed statistics {result_slices}." - "This can happen e.g. if the calculation results in inconsistent" - f" dtypes. Encountered the following exception: {excinfo}") + if cubes[0].coords('time', dim_coords=True): + # If there is a time coordinate, compute result one time step at a time + # to reduce memory use. + result_slices = iris.cube.CubeList() + for i in range(cubes[0].shape[0]): + cubes_slice = [cube[i] for cube in cubes] + result_slice = _compute(cubes_slice, operator=operator, **kwargs) + result_slices.append(result_slice) + + try: + result_cube = result_slices.merge_cube() + except Exception as exc: + raise ValueError( + "Multi-model statistics failed to concatenate results into a" + f" single array. This happened for operator {operator}" + f" with computed statistics {result_slices}." + "This can happen e.g. if the calculation results in" + f" inconsistent data types.") from exc + else: + result_cube = _compute(cubes, operator=operator, **kwargs) result_cube.data = np.ma.array(result_cube.data) - result_cube.remove_coord(CONCAT_DIM) return result_cube -def _compute_lazy(cubes: list, *, operator: iris.analysis.Aggregator, - **kwargs): - """Compute statistics using lazy iris function.""" - cube = _combine( - cubes) # this is now done for each statistic, can we avoid that? - rechunk(cube) +def _compute(cubes: list, *, operator: iris.analysis.Aggregator, **kwargs): + """Compute statistic.""" + cube = _combine(cubes) - # This will always return a masked array - result_cube = cube.collapsed(CONCAT_DIM, operator, **kwargs) - result_cube.remove_coord(CONCAT_DIM) + with catch_warnings(): + filterwarnings( + 'ignore', + message=( + "Collapsing a non-contiguous coordinate. " + f"Metadata may not be fully descriptive for '{CONCAT_DIM}."), + category=UserWarning, + module='iris', + ) + # This will always return a masked array + result_cube = cube.collapsed(CONCAT_DIM, operator, **kwargs) + # Remove concatenation dimension added by _combine + result_cube.remove_coord(CONCAT_DIM) for cube in cubes: cube.remove_coord(CONCAT_DIM) + # some iris aggregators modify dtype, see e.g. + # https://numpy.org/doc/stable/reference/generated/numpy.ma.average.html + result_cube.data = result_cube.core_data().astype(np.float32) + return result_cube @@ -341,10 +343,10 @@ def _multicube_statistics(cubes, statistics, span): raise ValueError('Cannot perform multicube statistics ' 'for a single cube.') - lazy_input = bool(all(cube.has_lazy_data() for cube in cubes)) + lazy_input = all(cube.has_lazy_data() for cube in cubes) - copied_cubes = [cube.copy() for cube in cubes] # avoid modifying inputs - aligned_cubes = _align(copied_cubes, span=span) + cubes = [cube.copy() for cube in cubes] # avoid modifying inputs + cubes = _align(cubes, span=span) statistics_cubes = {} for statistic in statistics: @@ -352,13 +354,9 @@ def _multicube_statistics(cubes, statistics, span): operator, kwargs = _resolve_operator(statistic) if operator.lazy_func is None: - result_cube = _compute_eager(aligned_cubes, - operator=operator, - **kwargs) + result_cube = _compute_eager(cubes, operator=operator, **kwargs) else: - result_cube = _compute_lazy(aligned_cubes, - operator=operator, - **kwargs) + result_cube = _compute(cubes, operator=operator, **kwargs) # lazy input --> lazy output result_cube.data = result_cube.lazy_data( diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py index b35c4b3a25..3df45b4fb3 100644 --- a/esmvalcore/preprocessor/_regrid.py +++ b/esmvalcore/preprocessor/_regrid.py @@ -1,5 +1,4 @@ """Horizontal and vertical regridding module.""" - import os import re from copy import deepcopy @@ -474,11 +473,38 @@ def regrid(cube, target_grid, scheme, lat_offset=True, lon_offset=True): if _attempt_irregular_regridding(cube, scheme): cube = esmpy_regrid(cube, target_grid, scheme) else: + cube = _rechunk(cube, target_grid) cube = cube.regrid(target_grid, HORIZONTAL_SCHEMES[scheme]) return cube +def _rechunk(cube, target_grid): + """Re-chunk cube with optimal chunk sizes for target grid.""" + if not cube.has_lazy_data() or cube.ndim < 3: + # Only rechunk lazy multidimensional data + return cube + + if 2 * np.prod(cube.shape[-2:]) > np.prod(target_grid.shape): + # Only rechunk if target grid is more than a factor of 2 larger, + # because rechunking will keep the original chunk in memory. + return cube + + data = cube.lazy_data() + + # Compute a good chunk size for the target array + tgt_shape = data.shape[:-2] + target_grid.shape + tgt_chunks = data.chunks[:-2] + target_grid.shape + tgt_data = da.empty(tgt_shape, dtype=data.dtype, chunks=tgt_chunks) + tgt_data = tgt_data.rechunk({i: "auto" for i in range(cube.ndim - 2)}) + + # Adjust chunks to source array and rechunk + chunks = tgt_data.chunks[:-2] + data.shape[-2:] + cube.data = data.rechunk(chunks) + + return cube + + def _horizontal_grid_is_close(cube1, cube2): """Check if two cubes have the same horizontal grid definition. diff --git a/setup.cfg b/setup.cfg index 6989a18c54..37c33b6e85 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,8 +21,9 @@ flake8-ignore = doc/conf.py ALL log_level = WARNING markers = - installation: test requires installation of dependencies + installation: Test requires installation of dependencies use_sample_data: Run functional tests using real data + sequential: Run these tests sequentially, see https://github.com/ESMValGroup/ESMValCore/issues/644 [coverage:run] parallel = true diff --git a/tests/unit/preprocessor/_regrid/test_regrid.py b/tests/unit/preprocessor/_regrid/test_regrid.py index b7beaca442..24db3bcfa5 100644 --- a/tests/unit/preprocessor/_regrid/test_regrid.py +++ b/tests/unit/preprocessor/_regrid/test_regrid.py @@ -60,6 +60,7 @@ def setUp(self): coords=self.coords, remove_coord=self.remove_coord, regrid=self.regrid) + self.src_cube.ndim = 1 self.tgt_grid_coord = mock.Mock() self.tgt_grid = mock.Mock( spec=iris.cube.Cube, coord=self.tgt_grid_coord) From e839dbe1890e9037078795154463a8ee0eb5bdcd Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Sat, 12 Jun 2021 18:47:48 +0200 Subject: [PATCH 63/68] Fix typo --- esmvalcore/preprocessor/_multimodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 2b6bd18fc9..3c54f56a8e 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -214,7 +214,7 @@ def _align(cubes, span): if not cubes[0].coords('time'): return cubes - if cubes.coord('time').points.size == 1: + if cubes[0].coord('time').points.size == 1: # TODO: improve support for this case return cubes From efbcb34022fcea27fe4e8085ea0e251975845135 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Sun, 13 Jun 2021 22:31:25 +0200 Subject: [PATCH 64/68] Add unit tests for _rechunk --- .../unit/preprocessor/_regrid/test_regrid.py | 69 +++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/tests/unit/preprocessor/_regrid/test_regrid.py b/tests/unit/preprocessor/_regrid/test_regrid.py index 24db3bcfa5..a7a5013d05 100644 --- a/tests/unit/preprocessor/_regrid/test_regrid.py +++ b/tests/unit/preprocessor/_regrid/test_regrid.py @@ -4,6 +4,8 @@ import unittest from unittest import mock +import dask +import dask.array as da import iris import numpy as np import pytest @@ -14,6 +16,7 @@ _CACHE, HORIZONTAL_SCHEMES, _horizontal_grid_is_close, + _rechunk, ) @@ -59,11 +62,12 @@ def setUp(self): coord_system=self.coord_system, coords=self.coords, remove_coord=self.remove_coord, - regrid=self.regrid) + regrid=self.regrid, + ) self.src_cube.ndim = 1 self.tgt_grid_coord = mock.Mock() - self.tgt_grid = mock.Mock( - spec=iris.cube.Cube, coord=self.tgt_grid_coord) + self.tgt_grid = mock.Mock(spec=iris.cube.Cube, + coord=self.tgt_grid_coord) self.regrid_schemes = [ 'linear', 'linear_extrapolate', 'nearest', 'area_weighted', 'unstructured_nearest' @@ -104,8 +108,8 @@ def test_invalid_scheme__unknown(self): regrid(dummy, dummy, 'wibble') def test_horizontal_schemes(self): - self.assertEqual( - set(HORIZONTAL_SCHEMES.keys()), set(self.regrid_schemes)) + self.assertEqual(set(HORIZONTAL_SCHEMES.keys()), + set(self.regrid_schemes)) def test_regrid__horizontal_schemes(self): for scheme in self.regrid_schemes: @@ -233,5 +237,60 @@ def test_regrid_is_skipped_if_grids_are_the_same(): assert expected_different_cube is not cube +def test_rechunk_on_increased_grid(): + """Test that an increase in grid size rechunks.""" + with dask.config.set({'array.chunk-size': '128 M'}): + + time_dim = 200 + src_grid_dims = (91, 180) + data = da.empty((time_dim, ) + src_grid_dims, dtype=np.float32) + + tgt_grid_dims = (361, 720) + tgt_grid = da.empty(tgt_grid_dims, dtype=np.float32) + + result = _rechunk(iris.cube.Cube(data), iris.cube.Cube(tgt_grid)) + + assert result.core_data().chunks == ((100, 100), (91, ), (180, )) + + +def test_no_rechunk_on_decreased_grid(): + """Test that a decrease in grid size does not rechunk.""" + with dask.config.set({'array.chunk-size': '128 M'}): + + time_dim = 200 + src_grid_dims = (361, 720) + data = da.empty((time_dim, ) + src_grid_dims, dtype=np.float32) + + tgt_grid_dims = (91, 180) + tgt_grid = da.empty(tgt_grid_dims, dtype=np.float32) + + result = _rechunk(iris.cube.Cube(data), iris.cube.Cube(tgt_grid)) + + assert result.core_data().chunks == data.chunks + + +def test_no_rechunk_2d(): + """Test that a 2D cube is not rechunked.""" + with dask.config.set({'array.chunk-size': '64 MiB'}): + + src_grid_dims = (361, 720) + data = da.empty(src_grid_dims, dtype=np.float32) + + tgt_grid_dims = (3601, 7200) + tgt_grid = da.empty(tgt_grid_dims, dtype=np.float32) + + result = _rechunk(iris.cube.Cube(data), iris.cube.Cube(tgt_grid)) + + assert result.core_data().chunks == data.chunks + + +def test_no_rechunk_non_lazy(): + """Test that a cube with non-lazy data does not crash.""" + cube = iris.cube.Cube(np.arange(2 * 4).reshape([1, 2, 4])) + tgt_cube = iris.cube.Cube(np.arange(4 * 8).reshape([4, 8])) + result = _rechunk(cube, tgt_cube) + assert result.data is cube.data + + if __name__ == '__main__': unittest.main() From 78d73b3edb72b2b28507affbb99848abe2f5846a Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Sun, 13 Jun 2021 22:46:57 +0200 Subject: [PATCH 65/68] Log whether or not data is lazy --- esmvalcore/preprocessor/_io.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/esmvalcore/preprocessor/_io.py b/esmvalcore/preprocessor/_io.py index 938e4b6f2e..8510b63598 100644 --- a/esmvalcore/preprocessor/_io.py +++ b/esmvalcore/preprocessor/_io.py @@ -253,7 +253,9 @@ def save(cubes, filename, optimize_access='', compress=False, alias='', "The cube is probably unchanged.", cubes, filename) return filename - logger.debug("Saving cubes %s to %s", cubes, filename) + for cube in cubes: + logger.debug("Saving cube:\n%s\nwith %s data to %s", cube, + "lazy" if cube.has_lazy_data() else "realized", filename) if optimize_access: cube = cubes[0] if optimize_access == 'map': From 6f6e61f248d50d42118c278894bf1c2c039b603c Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Sun, 13 Jun 2021 23:43:56 +0200 Subject: [PATCH 66/68] Fix sample data tests --- .../multimodel_statistics/test_multimodel.py | 63 +++++++++++-------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/tests/sample_data/multimodel_statistics/test_multimodel.py b/tests/sample_data/multimodel_statistics/test_multimodel.py index 485328d85b..72b603e0ae 100644 --- a/tests/sample_data/multimodel_statistics/test_multimodel.py +++ b/tests/sample_data/multimodel_statistics/test_multimodel.py @@ -15,7 +15,7 @@ esmvaltool_sample_data = pytest.importorskip("esmvaltool_sample_data") # Increase this number anytime you change the cached input data to the tests. -TEST_REVISION = 1 +TEST_REVISION = 2 CALENDAR_PARAMS = ( pytest.param( @@ -24,11 +24,7 @@ reason='Cannot calculate statistics with single cube in list')), '365_day', 'gregorian', - pytest.param( - 'proleptic_gregorian', - marks=pytest.mark.xfail( - raises=iris.exceptions.MergeError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/956')), + 'proleptic_gregorian', pytest.param( 'julian', marks=pytest.mark.skip( @@ -38,12 +34,12 @@ SPAN_PARAMS = ('overlap', 'full') -def assert_array_almost_equal(this, other): +def assert_array_almost_equal(this, other, rtol=1e-7): """Assert that array `this` almost equals array `other`.""" if np.ma.isMaskedArray(this) or np.ma.isMaskedArray(other): np.testing.assert_array_equal(this.mask, other.mask) - np.testing.assert_allclose(this, other) + np.testing.assert_allclose(this, other, rtol=rtol) def assert_coords_equal(this: list, other: list): @@ -85,6 +81,14 @@ def preprocess_data(cubes, time_slice: dict = None): cubes = [cube.regrid(**regrid_kwargs) for cube in cubes] + # Fix minute differences in vertical coordinate + if first_cube.coords(axis='Z'): + levels = first_cube.coord(axis='Z').points + for cube in cubes: + z_coord = cube.coord(axis='Z') + if np.allclose(z_coord.points, levels): + z_coord.points = levels + return cubes @@ -201,7 +205,11 @@ def multimodel_regression_test(cubes, span, name): if filename.exists(): reference_cube = iris.load_cube(str(filename)) - assert_array_almost_equal(result_cube.data, reference_cube.data) + assert_array_almost_equal( + result_cube.data, + reference_cube.data, + rtol=5e-7, + ) assert_metadata_equal(result_cube.metadata, reference_cube.metadata) assert_coords_equal(result_cube.coords(), reference_cube.coords()) @@ -211,9 +219,6 @@ def multimodel_regression_test(cubes, span, name): raise RuntimeError(f'Wrote reference data to {filename.absolute()}') -@pytest.mark.xfail( - raises=iris.exceptions.MergeError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/956') @pytest.mark.use_sample_data @pytest.mark.parametrize('span', SPAN_PARAMS) def test_multimodel_regression_month(timeseries_cubes_month, span): @@ -251,20 +256,14 @@ def test_multimodel_no_vertical_dimension(timeseries_cubes_month): @pytest.mark.use_sample_data -@pytest.mark.xfail( - raises=iris.exceptions.MergeError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/956') -# @pytest.mark.xfail( -# raises=iris.exceptions.CoordinateNotFoundError, -# reason='https://github.com/ESMValGroup/ESMValCore/issues/891') def test_multimodel_no_horizontal_dimension(timeseries_cubes_month): """Test statistic without horizontal dimension using monthly data.""" span = 'full' cubes = timeseries_cubes_month cubes = [cube[:, :, 0, 0] for cube in cubes] - # Coordinate not found error - # iris.exceptions.CoordinateNotFoundError: - # 'Expected to find exactly 1 depth coordinate, but found none.' + for cube in cubes: + cube.remove_coord('latitude') + cube.remove_coord('longitude') multimodel_test(cubes, span=span, statistic='mean') @@ -274,17 +273,31 @@ def test_multimodel_only_time_dimension(timeseries_cubes_month): cubes = timeseries_cubes_month span = 'full' cubes = [cube[:, 0, 0, 0] for cube in cubes] + for cube in cubes: + cube.remove_coord('air_pressure') + cube.remove_coord('latitude') + cube.remove_coord('longitude') multimodel_test(cubes, span=span, statistic='mean') @pytest.mark.use_sample_data -@pytest.mark.xfail( - raises=ValueError, - reason='https://github.com/ESMValGroup/ESMValCore/issues/890') def test_multimodel_no_time_dimension(timeseries_cubes_month): """Test statistic without time dimension using monthly data.""" span = 'full' cubes = timeseries_cubes_month cubes = [cube[0] for cube in cubes] - # ValueError: Cannot guess bounds for a coordinate of length 1. + for cube in cubes: + cube.remove_coord('time') + multimodel_test(cubes, span=span, statistic='mean') + + +@pytest.mark.use_sample_data +@pytest.mark.xfail( + raises=iris.exceptions.MergeError, + reason='https://github.com/ESMValGroup/ESMValCore/issues/890') +def test_multimodel_scalar_time_dimension(timeseries_cubes_month): + """Test statistic scalar time dimension using monthly data.""" + span = 'full' + cubes = timeseries_cubes_month + cubes = [cube[0] for cube in cubes] multimodel_test(cubes, span=span, statistic='mean') From 0658219573c27f84d952ae76207c4a8d1f7e97c2 Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Tue, 22 Jun 2021 22:25:18 +0200 Subject: [PATCH 67/68] Fix some types --- esmvalcore/preprocessor/_regrid.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py index 3df45b4fb3..d8fad61deb 100644 --- a/esmvalcore/preprocessor/_regrid.py +++ b/esmvalcore/preprocessor/_regrid.py @@ -673,7 +673,7 @@ def extract_levels(cube, levels, scheme, coordinate=None): Parameters ---------- - cube : cube + cube : iris.cube.Cube The source cube to be vertically interpolated. levels : array One or more target levels for the vertical interpolation. Assumed @@ -695,7 +695,7 @@ def extract_levels(cube, levels, scheme, coordinate=None): Returns ------- - cube + iris.cube.Cube See Also -------- From 9039242419fd02dbab033b3e45ec660c3ba6398b Mon Sep 17 00:00:00 2001 From: Bouwe Andela Date: Thu, 1 Jun 2023 17:55:02 +0200 Subject: [PATCH 68/68] Undo needless changes and fix test --- esmvalcore/preprocessor/_multimodel.py | 4 ++-- esmvalcore/preprocessor/_regrid.py | 1 + tests/unit/preprocessor/_regrid/test_regrid.py | 12 ++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index 56d9ea1730..0f862b0e90 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -250,6 +250,7 @@ def _align_time_coord(cubes, span): "Must be one of 'overlap', 'full'.") new_cubes = [_map_to_new_time(cube, new_time_points) for cube in cubes] + for cube in new_cubes: # Make sure bounds exist and are consistent _guess_time_bounds(cube) @@ -743,8 +744,7 @@ def multi_model_statistics(products, arguments. Except for percentiles, these operators are currently not supported. - Lazy operation is supported for all statistics, except - ``median``, ``percentile``, ``gmean`` and ``hmean``. + Lazy operation is supported for all statistics, except ``median``. Parameters ---------- diff --git a/esmvalcore/preprocessor/_regrid.py b/esmvalcore/preprocessor/_regrid.py index f41550d1d9..fe94744ffb 100644 --- a/esmvalcore/preprocessor/_regrid.py +++ b/esmvalcore/preprocessor/_regrid.py @@ -1,4 +1,5 @@ """Horizontal and vertical regridding module.""" + import importlib import inspect import logging diff --git a/tests/unit/preprocessor/_regrid/test_regrid.py b/tests/unit/preprocessor/_regrid/test_regrid.py index 4778243509..9a73835518 100644 --- a/tests/unit/preprocessor/_regrid/test_regrid.py +++ b/tests/unit/preprocessor/_regrid/test_regrid.py @@ -70,8 +70,8 @@ def setUp(self): ) self.src_cube.ndim = 1 self.tgt_grid_coord = mock.Mock() - self.tgt_grid = mock.Mock(spec=iris.cube.Cube, - coord=self.tgt_grid_coord) + self.tgt_grid = mock.Mock( + spec=iris.cube.Cube, coord=self.tgt_grid_coord) self.regrid_schemes = [ 'linear', 'linear_extrapolate', 'nearest', 'area_weighted', 'unstructured_nearest' @@ -111,8 +111,8 @@ def test_invalid_scheme__unknown(self): regrid(self.src_cube, self.src_cube, 'wibble') def test_horizontal_schemes(self): - self.assertEqual(set(HORIZONTAL_SCHEMES.keys()), - set(self.regrid_schemes)) + self.assertEqual( + set(HORIZONTAL_SCHEMES.keys()), set(self.regrid_schemes)) def test_regrid__horizontal_schemes(self): for scheme in self.regrid_schemes: @@ -270,7 +270,7 @@ def test_rechunk_on_increased_grid(): """Test that an increase in grid size rechunks.""" with dask.config.set({'array.chunk-size': '128 M'}): - time_dim = 200 + time_dim = 246 src_grid_dims = (91, 180) data = da.empty((time_dim, ) + src_grid_dims, dtype=np.float32) @@ -279,7 +279,7 @@ def test_rechunk_on_increased_grid(): result = _rechunk(iris.cube.Cube(data), iris.cube.Cube(tgt_grid)) - assert result.core_data().chunks == ((100, 100), (91, ), (180, )) + assert result.core_data().chunks == ((123, 123), (91, ), (180, )) def test_no_rechunk_on_decreased_grid():