From 604ec7ce81b456120bfcb5e543276acfa9c7de63 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Wed, 18 Dec 2024 13:21:45 +0300 Subject: [PATCH 1/3] feature: rework FoldMask to ignore tail nans --- etna/pipeline/base.py | 8 -------- tests/test_pipeline/test_base.py | 20 -------------------- 2 files changed, 28 deletions(-) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index c5b5dba3f..28173086f 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -184,8 +184,6 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int): Some of target timestamps aren't present in a given dataset ValueError: First train timestamp should be later than minimal dataset timestamp - ValueError: - Last train timestamp should be not later than the ending of the shortest segment ValueError: Last target timestamp should be not later than horizon steps after last train timestamp """ @@ -201,12 +199,6 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int): diff = set(self.target_timestamps).difference(set(timestamps)) raise ValueError(f"Some target timestamps aren't present in a given dataset: {reprlib.repr(diff)}") - dataset_description = ts.describe() - - dataset_min_last_timestamp = dataset_description["end_timestamp"].min() - if self.last_train_timestamp > dataset_min_last_timestamp: - raise ValueError(f"Last train timestamp should be not later than {dataset_min_last_timestamp}!") - dataset_horizon_border_timestamp = timestamps[timestamps.index(self.last_train_timestamp) + horizon] mask_last_target_timestamp = self.target_timestamps[-1] if dataset_horizon_border_timestamp < mask_last_target_timestamp: diff --git a/tests/test_pipeline/test_base.py b/tests/test_pipeline/test_base.py index 535eb9094..f42b85c8f 100644 --- a/tests/test_pipeline/test_base.py +++ b/tests/test_pipeline/test_base.py @@ -351,26 +351,6 @@ def test_fold_mask_validate_on_dataset_fail_not_present_some_target_timestamps(t fold_mask.validate_on_dataset(ts=ts, horizon=horizon) -@pytest.mark.parametrize( - "ts_name, horizon, fold_mask", - [ - ( - "ts_with_nans_in_tails", - 1, - FoldMask( - first_train_timestamp=None, - last_train_timestamp="2020-01-31 22:00", - target_timestamps=["2020-01-31 23:00"], - ), - ), - ], -) -def test_fold_mask_validate_on_dataset_fail_not_enough_future(ts_name, fold_mask, horizon, request): - ts = request.getfixturevalue(ts_name) - with pytest.raises(ValueError, match="Last train timestamp should be not later than"): - fold_mask.validate_on_dataset(ts=ts, horizon=horizon) - - @pytest.mark.parametrize( "ts_name, horizon, fold_mask", [ From 24f6f11b03c9ed1edef920f2c1d296471cee5c38 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Wed, 18 Dec 2024 13:23:38 +0300 Subject: [PATCH 2/3] chore: update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5262a3151..de7ea997e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,7 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522)) - - -- +- Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536)) - - From b99b730059684088bad6e7430b1cbd4234041497 Mon Sep 17 00:00:00 2001 From: Dmitry Bunin Date: Wed, 18 Dec 2024 18:17:01 +0300 Subject: [PATCH 3/3] fix: remove redundant patches --- tests/test_auto/test_auto.py | 4 ---- tests/test_auto/test_tune.py | 4 ---- 2 files changed, 8 deletions(-) diff --git a/tests/test_auto/test_auto.py b/tests/test_auto/test_auto.py index 76f8e44e4..b59e89176 100644 --- a/tests/test_auto/test_auto.py +++ b/tests/test_auto/test_auto.py @@ -47,7 +47,6 @@ def pool_list(): ] -@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix @pytest.mark.parametrize( "ts_name", [ @@ -59,7 +58,6 @@ def pool_list(): ], ) def test_objective( - validate_on_dataset_mock, ts_name, request, target_metric=MAE(missing_mode="ignore"), @@ -93,10 +91,8 @@ def test_objective( callback.assert_called_once() -@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix @pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"]) def test_objective_fail_none( - validate_on_dataset_mock, ts_name, request, target_metric=MAE(missing_mode="ignore"), diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py index 41562994d..388eba917 100644 --- a/tests/test_auto/test_tune.py +++ b/tests/test_auto/test_tune.py @@ -24,7 +24,6 @@ from etna.transforms import TimeSeriesImputerTransform -@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix @pytest.mark.parametrize( "ts_name", [ @@ -36,7 +35,6 @@ ], ) def test_objective( - validate_on_dataset_mock, ts_name, request, target_metric=MAE(missing_mode="ignore"), @@ -68,10 +66,8 @@ def test_objective( callback.assert_called_once() -@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix @pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"]) def test_objective_fail_none( - validate_on_dataset_mock, ts_name, request, target_metric=MAE(missing_mode="ignore"),