From 604ec7ce81b456120bfcb5e543276acfa9c7de63 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Wed, 18 Dec 2024 13:21:45 +0300
Subject: [PATCH 1/3] feature: rework FoldMask to ignore tail nans

---
 etna/pipeline/base.py            |  8 --------
 tests/test_pipeline/test_base.py | 20 --------------------
 2 files changed, 28 deletions(-)

diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py
index c5b5dba3f..28173086f 100644
--- a/etna/pipeline/base.py
+++ b/etna/pipeline/base.py
@@ -184,8 +184,6 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int):
             Some of target timestamps aren't present in a given dataset
         ValueError:
             First train timestamp should be later than minimal dataset timestamp
-        ValueError:
-            Last train timestamp should be not later than the ending of the shortest segment
         ValueError:
             Last target timestamp should be not later than horizon steps after last train timestamp
         """
@@ -201,12 +199,6 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int):
             diff = set(self.target_timestamps).difference(set(timestamps))
             raise ValueError(f"Some target timestamps aren't present in a given dataset: {reprlib.repr(diff)}")
 
-        dataset_description = ts.describe()
-
-        dataset_min_last_timestamp = dataset_description["end_timestamp"].min()
-        if self.last_train_timestamp > dataset_min_last_timestamp:
-            raise ValueError(f"Last train timestamp should be not later than {dataset_min_last_timestamp}!")
-
         dataset_horizon_border_timestamp = timestamps[timestamps.index(self.last_train_timestamp) + horizon]
         mask_last_target_timestamp = self.target_timestamps[-1]
         if dataset_horizon_border_timestamp < mask_last_target_timestamp:
diff --git a/tests/test_pipeline/test_base.py b/tests/test_pipeline/test_base.py
index 535eb9094..f42b85c8f 100644
--- a/tests/test_pipeline/test_base.py
+++ b/tests/test_pipeline/test_base.py
@@ -351,26 +351,6 @@ def test_fold_mask_validate_on_dataset_fail_not_present_some_target_timestamps(t
         fold_mask.validate_on_dataset(ts=ts, horizon=horizon)
 
 
-@pytest.mark.parametrize(
-    "ts_name, horizon, fold_mask",
-    [
-        (
-            "ts_with_nans_in_tails",
-            1,
-            FoldMask(
-                first_train_timestamp=None,
-                last_train_timestamp="2020-01-31 22:00",
-                target_timestamps=["2020-01-31 23:00"],
-            ),
-        ),
-    ],
-)
-def test_fold_mask_validate_on_dataset_fail_not_enough_future(ts_name, fold_mask, horizon, request):
-    ts = request.getfixturevalue(ts_name)
-    with pytest.raises(ValueError, match="Last train timestamp should be not later than"):
-        fold_mask.validate_on_dataset(ts=ts, horizon=horizon)
-
-
 @pytest.mark.parametrize(
     "ts_name, horizon, fold_mask",
     [

From 24f6f11b03c9ed1edef920f2c1d296471cee5c38 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Wed, 18 Dec 2024 13:23:38 +0300
Subject: [PATCH 2/3] chore: update changelog

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5262a3151..de7ea997e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,7 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
 - 
 - 
-- 
+- Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536))
 - 
 - 
 

From b99b730059684088bad6e7430b1cbd4234041497 Mon Sep 17 00:00:00 2001
From: Dmitry Bunin <bunin260200@gmail.com>
Date: Wed, 18 Dec 2024 18:17:01 +0300
Subject: [PATCH 3/3] fix: remove redundant patches

---
 tests/test_auto/test_auto.py | 4 ----
 tests/test_auto/test_tune.py | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/tests/test_auto/test_auto.py b/tests/test_auto/test_auto.py
index 76f8e44e4..b59e89176 100644
--- a/tests/test_auto/test_auto.py
+++ b/tests/test_auto/test_auto.py
@@ -47,7 +47,6 @@ def pool_list():
     ]
 
 
-@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock())  # TODO: remove after fix
 @pytest.mark.parametrize(
     "ts_name",
     [
@@ -59,7 +58,6 @@ def pool_list():
     ],
 )
 def test_objective(
-    validate_on_dataset_mock,
     ts_name,
     request,
     target_metric=MAE(missing_mode="ignore"),
@@ -93,10 +91,8 @@ def test_objective(
     callback.assert_called_once()
 
 
-@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock())  # TODO: remove after fix
 @pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"])
 def test_objective_fail_none(
-    validate_on_dataset_mock,
     ts_name,
     request,
     target_metric=MAE(missing_mode="ignore"),
diff --git a/tests/test_auto/test_tune.py b/tests/test_auto/test_tune.py
index 41562994d..388eba917 100644
--- a/tests/test_auto/test_tune.py
+++ b/tests/test_auto/test_tune.py
@@ -24,7 +24,6 @@
 from etna.transforms import TimeSeriesImputerTransform
 
 
-@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock())  # TODO: remove after fix
 @pytest.mark.parametrize(
     "ts_name",
     [
@@ -36,7 +35,6 @@
     ],
 )
 def test_objective(
-    validate_on_dataset_mock,
     ts_name,
     request,
     target_metric=MAE(missing_mode="ignore"),
@@ -68,10 +66,8 @@ def test_objective(
     callback.assert_called_once()
 
 
-@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock())  # TODO: remove after fix
 @pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"])
 def test_objective_fail_none(
-    validate_on_dataset_mock,
     ts_name,
     request,
     target_metric=MAE(missing_mode="ignore"),