etna-team · d-a-bunin · Dec 20, 2024 · Dec 18, 2024 · Dec 18, 2024 · Dec 18, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -37,7 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
 - 
 - 
-- 
+- Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536))
 - 
 - 
 

diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py
@@ -184,8 +184,6 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int):
             Some of target timestamps aren't present in a given dataset
         ValueError:
             First train timestamp should be later than minimal dataset timestamp
-        ValueError:
-            Last train timestamp should be not later than the ending of the shortest segment
         ValueError:
             Last target timestamp should be not later than horizon steps after last train timestamp
         """
@@ -201,12 +199,6 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int):
             diff = set(self.target_timestamps).difference(set(timestamps))
             raise ValueError(f"Some target timestamps aren't present in a given dataset: {reprlib.repr(diff)}")
 
-        dataset_description = ts.describe()
-
-        dataset_min_last_timestamp = dataset_description["end_timestamp"].min()
-        if self.last_train_timestamp > dataset_min_last_timestamp:
-            raise ValueError(f"Last train timestamp should be not later than {dataset_min_last_timestamp}!")
-
         dataset_horizon_border_timestamp = timestamps[timestamps.index(self.last_train_timestamp) + horizon]
         mask_last_target_timestamp = self.target_timestamps[-1]
         if dataset_horizon_border_timestamp < mask_last_target_timestamp:

diff --git a/tests/test_auto/test_auto.py b/tests/test_auto/test_auto.py
@@ -47,7 +47,6 @@ def pool_list():
     ]
 
 
-@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock())  # TODO: remove after fix
 @pytest.mark.parametrize(
     "ts_name",
     [
@@ -59,7 +58,6 @@ def pool_list():
     ],
 )
 def test_objective(
-    validate_on_dataset_mock,
     ts_name,
     request,
     target_metric=MAE(missing_mode="ignore"),
@@ -93,10 +91,8 @@ def test_objective(
     callback.assert_called_once()
 
 
-@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock())  # TODO: remove after fix
 @pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"])
 def test_objective_fail_none(
-    validate_on_dataset_mock,
     ts_name,
     request,
     target_metric=MAE(missing_mode="ignore"),

diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py
@@ -24,7 +24,6 @@
 from etna.transforms import TimeSeriesImputerTransform
 
 
-@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock())  # TODO: remove after fix
 @pytest.mark.parametrize(
     "ts_name",
     [
@@ -36,7 +35,6 @@
     ],
 )
 def test_objective(
-    validate_on_dataset_mock,
     ts_name,
     request,
     target_metric=MAE(missing_mode="ignore"),
@@ -68,10 +66,8 @@ def test_objective(
     callback.assert_called_once()
 
 
-@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock())  # TODO: remove after fix
 @pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"])
 def test_objective_fail_none(
-    validate_on_dataset_mock,
     ts_name,
     request,
     target_metric=MAE(missing_mode="ignore"),

diff --git a/tests/test_pipeline/test_base.py b/tests/test_pipeline/test_base.py
@@ -351,26 +351,6 @@ def test_fold_mask_validate_on_dataset_fail_not_present_some_target_timestamps(t
         fold_mask.validate_on_dataset(ts=ts, horizon=horizon)
 
 
-@pytest.mark.parametrize(
-    "ts_name, horizon, fold_mask",
-    [
-        (
-            "ts_with_nans_in_tails",
-            1,
-            FoldMask(
-                first_train_timestamp=None,
-                last_train_timestamp="2020-01-31 22:00",
-                target_timestamps=["2020-01-31 23:00"],
-            ),
-        ),
-    ],
-)
-def test_fold_mask_validate_on_dataset_fail_not_enough_future(ts_name, fold_mask, horizon, request):
-    ts = request.getfixturevalue(ts_name)
-    with pytest.raises(ValueError, match="Last train timestamp should be not later than"):
-        fold_mask.validate_on_dataset(ts=ts, horizon=horizon)
-
-
 @pytest.mark.parametrize(
     "ts_name, horizon, fold_mask",
     [
-Original file line number
+Diff line change
@@ Expand Up @@
     - Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
     -
     -
-    -
+    - Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536))
     -
     -
@@ Expand Down @@