From 72876406de0bdd8dff97d127c7ed7bacb5d32df4 Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Sat, 22 Apr 2023 08:27:40 +0200 Subject: [PATCH 01/10] added xarray test in feature detection test function --- tobac/tests/test_feature_detection.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index 9826c82e..d9e937aa 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -6,11 +6,13 @@ @pytest.mark.parametrize( - "test_threshs, dxy, wavelength_filtering", - [([1.5], -1, None), ([1.5], 10000, (100 * 1000, 500 * 1000))], + "test_threshs, dxy, wavelength_filtering, data_type", + [ ([1.5], -1, None, "iris"), + ([1.5], -1, None, "xarray"), + ([1.5], 10000, (100 * 1000, 500 * 1000), "iris")], ) def test_feature_detection_multithreshold_timestep( - test_threshs, dxy, wavelength_filtering + test_threshs, dxy, wavelength_filtering, data_type ): """ Tests ```tobac.feature_detection.feature_detection_multithreshold_timestep``` @@ -36,7 +38,7 @@ def test_feature_detection_multithreshold_timestep( h2_size=test_hdim_2_sz, amplitude=test_amp, ) - test_data_iris = tbtest.make_dataset_from_arr(test_data, data_type="iris") + test_data_iris = tbtest.make_dataset_from_arr(test_data, data_type) fd_output = feat_detect.feature_detection_multithreshold_timestep( test_data_iris, 0, From 7071a9aa9ec6b5b2a090a8b65fdf757b0e6201d8 Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Sat, 22 Apr 2023 08:51:43 +0200 Subject: [PATCH 02/10] bugfix in feature detection test and included xarray test again --- tobac/tests/test_feature_detection.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index 0428c1ec..9f23d1ee 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -9,13 +9,14 @@ "test_threshs, n_min_threshold, dxy, wavelength_filtering, data_type", [ ([1.5], 2, -1, None, "iris"), + ([1.5], 2, -1, None, "xarray"), ([1, 1.5, 2], 2, 10000, (100 * 1000, 500 * 1000), "iris"), ([1, 2, 1.5], [3, 1, 2], -1, None, "iris"), ([1, 1.5, 2], {1.5: 2, 1: 3, 2: 1}, -1, None, "iris"), ], ) def test_feature_detection_multithreshold_timestep( - test_threshs, n_min_threshold, dxy, wavelength_filtering + test_threshs, n_min_threshold, dxy, wavelength_filtering, data_type, ): """ Tests ```tobac.feature_detection.feature_detection_multithreshold_timestep``` From 44ec91686dbbc031f5e186409c2bf8368bbfb095 Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Sat, 22 Apr 2023 09:00:11 +0200 Subject: [PATCH 03/10] added decorator to feature_detection part and made nice formatting with black --- tobac/feature_detection.py | 10 +++++++++- tobac/tests/test_feature_detection.py | 6 +++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index d36b83bc..baf9cf31 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -22,6 +22,14 @@ import pandas as pd from .utils import internal as internal_utils from tobac.utils.general import spectral_filtering +from tobac.utils.internal import ( + xarray_to_iris, + iris_to_xarray, + xarray_to_irispandas, + irispandas_to_xarray, +) + + import warnings @@ -522,7 +530,7 @@ def feature_detection_threshold( return features_threshold, regions - +@xarray_to_iris def feature_detection_multithreshold_timestep( data_i, i_time, diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index 9f23d1ee..27b6d9a0 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -16,7 +16,11 @@ ], ) def test_feature_detection_multithreshold_timestep( - test_threshs, n_min_threshold, dxy, wavelength_filtering, data_type, + test_threshs, + n_min_threshold, + dxy, + wavelength_filtering, + data_type, ): """ Tests ```tobac.feature_detection.feature_detection_multithreshold_timestep``` From 6aefd66de1a06223af54a306ef4d80d9d813ee61 Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Sun, 7 May 2023 11:03:31 +0200 Subject: [PATCH 04/10] adjusted data access in `def feature_detection_multithreshold_timestep`; should now work with xarray as input --- tobac/feature_detection.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index baf9cf31..0d38008c 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -530,7 +530,7 @@ def feature_detection_threshold( return features_threshold, regions -@xarray_to_iris +@irispandas_to_xarray def feature_detection_multithreshold_timestep( data_i, i_time, @@ -619,7 +619,8 @@ def feature_detection_multithreshold_timestep( ) # get actual numpy array - track_data = data_i.core_data() +# track_data = data_i.core_data() + track_data = data_i.data track_data = gaussian_filter( track_data, sigma=sigma_threshold From ffbbdefbce710f281049fc884fa7c7af2545a55d Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Sun, 7 May 2023 11:14:52 +0200 Subject: [PATCH 05/10] `def feature_detection_threshold` needed no adjustment -> numpy array is considered as input type --- tobac/feature_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 0d38008c..034fda3e 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -274,7 +274,7 @@ def feature_detection_threshold( Parameters ---------- - data_i : iris.cube.Cube + data_i : numpy array 2D field to perform the feature detection (single timestep) on. i_time : int From 97118cdd96ed2f0e82f61d18bab51b4494c289d8 Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Sun, 7 May 2023 12:25:28 +0200 Subject: [PATCH 06/10] added data type testing for function `feature_detection_multithreshold` --- tobac/tests/test_feature_detection.py | 32 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index 27b6d9a0..e8e65c00 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -375,17 +375,26 @@ def test_filter_min_distance( @pytest.mark.parametrize( "test_dset_size, vertical_axis_num, " - "vertical_coord_name," - " vertical_coord_opt, expected_raise", + "vertical_coord_name, " + "vertical_coord_opt, expected_raise, " + "data_type", [ - ((1, 20, 30, 40), 1, "altitude", "auto", False), - ((1, 20, 30, 40), 2, "altitude", "auto", False), - ((1, 20, 30, 40), 3, "altitude", "auto", False), - ((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False), - ((1, 20, 30, 40), 1, "air_pressure", "auto", True), - ((1, 20, 30, 40), 1, "model_level_number", "auto", False), - ((1, 20, 30, 40), 1, "altitude", "auto", False), - ((1, 20, 30, 40), 1, "geopotential_height", "auto", False), + ((1, 20, 30, 40), 1, "altitude", "auto", False, 'iris'), + ((1, 20, 30, 40), 2, "altitude", "auto", False, 'iris'), + ((1, 20, 30, 40), 3, "altitude", "auto", False, 'iris'), + ((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False, 'iris'), + ((1, 20, 30, 40), 1, "air_pressure", "auto", True, 'iris'), + ((1, 20, 30, 40), 1, "model_level_number", "auto", False, 'iris'), + ((1, 20, 30, 40), 1, "altitude", "auto", False, 'iris'), + ((1, 20, 30, 40), 1, "geopotential_height", "auto", False, 'iris'), + ((1, 20, 30, 40), 1, "altitude", "auto", False, 'xarray'), + ((1, 20, 30, 40), 2, "altitude", "auto", False, 'xarray'), + ((1, 20, 30, 40), 3, "altitude", "auto", False, 'xarray'), + ((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False, 'xarray'), + ((1, 20, 30, 40), 1, "air_pressure", "auto", True, 'xarray'), + ((1, 20, 30, 40), 1, "model_level_number", "auto", False, 'xarray'), + ((1, 20, 30, 40), 1, "altitude", "auto", False, 'xarray'), + ((1, 20, 30, 40), 1, "geopotential_height", "auto", False, 'xarray'), ], ) def test_feature_detection_multiple_z_coords( @@ -394,6 +403,7 @@ def test_feature_detection_multiple_z_coords( vertical_coord_name, vertical_coord_opt, expected_raise, + data_type ): """Tests ```tobac.feature_detection.feature_detection_multithreshold``` with different axes @@ -422,7 +432,7 @@ def test_feature_detection_multiple_z_coords( test_data[0, 0:5, 0:5, 0:5] = 3 common_dset_opts = { "in_arr": test_data, - "data_type": "iris", + "data_type": data_type, "z_dim_name": vertical_coord_name, } if vertical_axis_num == 1: From 26e6dcf5c2686e83730f4658f716ac4c4523f4ac Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Sun, 7 May 2023 12:27:19 +0200 Subject: [PATCH 07/10] decorated `feature_detection_multithreshold` and updated docstring for `feature_detection_multithreshold_timestep` --- tobac/feature_detection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 034fda3e..243b8c71 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -556,7 +556,7 @@ def feature_detection_multithreshold_timestep( Parameters ---------- - data_i : iris.cube.Cube + data_i : xarray.Dataset (or for legacy: iris.cube.Cube) 2D field to perform the feature detection (single timestep) on. threshold : float, optional @@ -721,7 +721,7 @@ def feature_detection_multithreshold_timestep( ) return features_thresholds - +@xarray_to_iris def feature_detection_multithreshold( field_in, dxy=None, From 43f5197844471d15b96ed7265300d917552e4d8e Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Wed, 10 May 2023 16:45:37 +0200 Subject: [PATCH 08/10] added coordinate support for xarray output in `make_dataset_from_arr` --- tobac/testing.py | 60 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/tobac/testing.py b/tobac/testing.py index 8d259f59..c929cd6e 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -518,39 +518,73 @@ def make_dataset_from_arr( import xarray as xr import iris + # dimension handling has_time = time_dim_num is not None - is_3D = z_dim_num is not None - output_arr = xr.DataArray(in_arr) + + + dims = [] + for idim in range( in_arr.ndim ): + dims += [f'dim{idim}'] + + + t_dim_name = "time" + + if has_time: + dims[time_dim_num] = t_dim_name + + if is_3D: + dims[z_dim_num] = z_dim_name + + + # coordinates handling if is_3D: z_max = in_arr.shape[z_dim_num] + z_coordinate = np.arange(0, z_max) + z_attrs = dict(standard_name=z_dim_name) if has_time: time_min = datetime.datetime(2022, 1, 1) time_num = in_arr.shape[time_dim_num] - + time_coordinate = ( + pd.date_range(start=time_min, periods=time_num) + .values.astype("datetime64[s]") + .astype(int) + ) + time_attrs = dict( + standard_name=t_dim_name, + units="seconds since epoch", + ) + + # setup data structures if data_type == "xarray": - return output_arr + coords = {} + + if is_3D: + coords[z_dim_name] = ([z_dim_name], z_coordinate, z_attrs) + + if has_time: + coords[t_dim_name] = ([t_dim_name], time_coordinate, time_attrs) + + output_xarray = xr.DataArray(data = in_arr, coords = coords, dims = dims) + + return output_xarray + elif data_type == "iris": - out_arr_iris = output_arr.to_iris() + out_arr_iris = xr.DataArray(data = in_arr).to_iris() if is_3D: out_arr_iris.add_dim_coord( - iris.coords.DimCoord(np.arange(0, z_max), standard_name=z_dim_name), + iris.coords.DimCoord(z_coordinate, **z_attrs), z_dim_num, ) if has_time: out_arr_iris.add_dim_coord( - iris.coords.DimCoord( - pd.date_range(start=time_min, periods=time_num) - .values.astype("datetime64[s]") - .astype(int), - standard_name="time", - units="seconds since epoch", - ), + iris.coords.DimCoord(time_coordinate, **time_attrs), time_dim_num, ) return out_arr_iris + else: raise ValueError("data_type must be 'xarray' or 'iris'") From 6978451dd719d708ba2ce2d544d963e78253a967 Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Wed, 10 May 2023 16:49:55 +0200 Subject: [PATCH 09/10] black formatting --- tobac/feature_detection.py | 4 +++- tobac/testing.py | 23 ++++++++---------- tobac/tests/test_feature_detection.py | 34 +++++++++++++-------------- 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 243b8c71..5b47a0b7 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -530,6 +530,7 @@ def feature_detection_threshold( return features_threshold, regions + @irispandas_to_xarray def feature_detection_multithreshold_timestep( data_i, @@ -619,7 +620,7 @@ def feature_detection_multithreshold_timestep( ) # get actual numpy array -# track_data = data_i.core_data() + # track_data = data_i.core_data() track_data = data_i.data track_data = gaussian_filter( @@ -721,6 +722,7 @@ def feature_detection_multithreshold_timestep( ) return features_thresholds + @xarray_to_iris def feature_detection_multithreshold( field_in, diff --git a/tobac/testing.py b/tobac/testing.py index c929cd6e..a5fc0c1f 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -521,22 +521,19 @@ def make_dataset_from_arr( # dimension handling has_time = time_dim_num is not None is_3D = z_dim_num is not None - - + dims = [] - for idim in range( in_arr.ndim ): - dims += [f'dim{idim}'] - - + for idim in range(in_arr.ndim): + dims += [f"dim{idim}"] + t_dim_name = "time" if has_time: dims[time_dim_num] = t_dim_name - + if is_3D: dims[z_dim_num] = z_dim_name - - + # coordinates handling if is_3D: z_max = in_arr.shape[z_dim_num] @@ -555,7 +552,7 @@ def make_dataset_from_arr( standard_name=t_dim_name, units="seconds since epoch", ) - + # setup data structures if data_type == "xarray": coords = {} @@ -565,13 +562,13 @@ def make_dataset_from_arr( if has_time: coords[t_dim_name] = ([t_dim_name], time_coordinate, time_attrs) - - output_xarray = xr.DataArray(data = in_arr, coords = coords, dims = dims) + + output_xarray = xr.DataArray(data=in_arr, coords=coords, dims=dims) return output_xarray elif data_type == "iris": - out_arr_iris = xr.DataArray(data = in_arr).to_iris() + out_arr_iris = xr.DataArray(data=in_arr).to_iris() if is_3D: out_arr_iris.add_dim_coord( diff --git a/tobac/tests/test_feature_detection.py b/tobac/tests/test_feature_detection.py index e8e65c00..968aa9f1 100644 --- a/tobac/tests/test_feature_detection.py +++ b/tobac/tests/test_feature_detection.py @@ -379,22 +379,22 @@ def test_filter_min_distance( "vertical_coord_opt, expected_raise, " "data_type", [ - ((1, 20, 30, 40), 1, "altitude", "auto", False, 'iris'), - ((1, 20, 30, 40), 2, "altitude", "auto", False, 'iris'), - ((1, 20, 30, 40), 3, "altitude", "auto", False, 'iris'), - ((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False, 'iris'), - ((1, 20, 30, 40), 1, "air_pressure", "auto", True, 'iris'), - ((1, 20, 30, 40), 1, "model_level_number", "auto", False, 'iris'), - ((1, 20, 30, 40), 1, "altitude", "auto", False, 'iris'), - ((1, 20, 30, 40), 1, "geopotential_height", "auto", False, 'iris'), - ((1, 20, 30, 40), 1, "altitude", "auto", False, 'xarray'), - ((1, 20, 30, 40), 2, "altitude", "auto", False, 'xarray'), - ((1, 20, 30, 40), 3, "altitude", "auto", False, 'xarray'), - ((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False, 'xarray'), - ((1, 20, 30, 40), 1, "air_pressure", "auto", True, 'xarray'), - ((1, 20, 30, 40), 1, "model_level_number", "auto", False, 'xarray'), - ((1, 20, 30, 40), 1, "altitude", "auto", False, 'xarray'), - ((1, 20, 30, 40), 1, "geopotential_height", "auto", False, 'xarray'), + ((1, 20, 30, 40), 1, "altitude", "auto", False, "iris"), + ((1, 20, 30, 40), 2, "altitude", "auto", False, "iris"), + ((1, 20, 30, 40), 3, "altitude", "auto", False, "iris"), + ((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False, "iris"), + ((1, 20, 30, 40), 1, "air_pressure", "auto", True, "iris"), + ((1, 20, 30, 40), 1, "model_level_number", "auto", False, "iris"), + ((1, 20, 30, 40), 1, "altitude", "auto", False, "iris"), + ((1, 20, 30, 40), 1, "geopotential_height", "auto", False, "iris"), + ((1, 20, 30, 40), 1, "altitude", "auto", False, "xarray"), + ((1, 20, 30, 40), 2, "altitude", "auto", False, "xarray"), + ((1, 20, 30, 40), 3, "altitude", "auto", False, "xarray"), + ((1, 20, 30, 40), 1, "air_pressure", "air_pressure", False, "xarray"), + ((1, 20, 30, 40), 1, "air_pressure", "auto", True, "xarray"), + ((1, 20, 30, 40), 1, "model_level_number", "auto", False, "xarray"), + ((1, 20, 30, 40), 1, "altitude", "auto", False, "xarray"), + ((1, 20, 30, 40), 1, "geopotential_height", "auto", False, "xarray"), ], ) def test_feature_detection_multiple_z_coords( @@ -403,7 +403,7 @@ def test_feature_detection_multiple_z_coords( vertical_coord_name, vertical_coord_opt, expected_raise, - data_type + data_type, ): """Tests ```tobac.feature_detection.feature_detection_multithreshold``` with different axes From 7e9b32881577ced2375bf434875d24e4e747d3fb Mon Sep 17 00:00:00 2001 From: Fabian Senf Date: Thu, 11 May 2023 10:21:34 +0200 Subject: [PATCH 10/10] restructured test data generation again to have Iris Cube as base first and than go to xarray in a conversion steps; this is consistent now for `make_dataset_from_arr` with the other approaches in the module so that the planned conversion tio xarray internals can always use the same scheme --- tobac/testing.py | 45 ++++++++++++++++++++------------------------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/tobac/testing.py b/tobac/testing.py index a5fc0c1f..34786cfa 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -516,8 +516,10 @@ def make_dataset_from_arr( """ import xarray as xr + from iris.cube import Cube import iris + # dimension handling has_time = time_dim_num is not None is_3D = z_dim_num is not None @@ -554,36 +556,29 @@ def make_dataset_from_arr( ) # setup data structures - if data_type == "xarray": - coords = {} - - if is_3D: - coords[z_dim_name] = ([z_dim_name], z_coordinate, z_attrs) - - if has_time: - coords[t_dim_name] = ([t_dim_name], time_coordinate, time_attrs) + sample_data = Cube( in_arr, ) - output_xarray = xr.DataArray(data=in_arr, coords=coords, dims=dims) + # out_arr_iris = xr.DataArray(data=in_arr).to_iris() - return output_xarray + if is_3D: + sample_data.add_dim_coord( + iris.coords.DimCoord(z_coordinate, **z_attrs), + z_dim_num, + ) + if has_time: + sample_data.add_dim_coord( + iris.coords.DimCoord(time_coordinate, **time_attrs), + time_dim_num, + ) - elif data_type == "iris": - out_arr_iris = xr.DataArray(data=in_arr).to_iris() + if data_type == "xarray": + sample_data = DataArray.from_iris(sample_data) + + elif not data_type == "iris": + raise ValueError("data_type must be 'xarray' or 'iris'") - if is_3D: - out_arr_iris.add_dim_coord( - iris.coords.DimCoord(z_coordinate, **z_attrs), - z_dim_num, - ) - if has_time: - out_arr_iris.add_dim_coord( - iris.coords.DimCoord(time_coordinate, **time_attrs), - time_dim_num, - ) - return out_arr_iris - else: - raise ValueError("data_type must be 'xarray' or 'iris'") + return sample_data def make_feature_blob(