Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework validation of FoldMask to not fail on tail nans #536

Merged
merged 3 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Update `aggregate_metrics_df` to work with `None` values ([#522](https://github.com/etna-team/etna/pull/522))
-
-
-
- Rework validation of `FoldMask` to not fail on tail nans ([#536](https://github.com/etna-team/etna/pull/536))
-
-

Expand Down
8 changes: 0 additions & 8 deletions etna/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,6 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int):
Some of target timestamps aren't present in a given dataset
ValueError:
First train timestamp should be later than minimal dataset timestamp
ValueError:
Last train timestamp should be not later than the ending of the shortest segment
ValueError:
Last target timestamp should be not later than horizon steps after last train timestamp
"""
Expand All @@ -201,12 +199,6 @@ def validate_on_dataset(self, ts: TSDataset, horizon: int):
diff = set(self.target_timestamps).difference(set(timestamps))
raise ValueError(f"Some target timestamps aren't present in a given dataset: {reprlib.repr(diff)}")

dataset_description = ts.describe()

dataset_min_last_timestamp = dataset_description["end_timestamp"].min()
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
if self.last_train_timestamp > dataset_min_last_timestamp:
raise ValueError(f"Last train timestamp should be not later than {dataset_min_last_timestamp}!")

dataset_horizon_border_timestamp = timestamps[timestamps.index(self.last_train_timestamp) + horizon]
mask_last_target_timestamp = self.target_timestamps[-1]
if dataset_horizon_border_timestamp < mask_last_target_timestamp:
Expand Down
4 changes: 0 additions & 4 deletions tests/test_auto/test_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ def pool_list():
]


@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix
@pytest.mark.parametrize(
"ts_name",
[
Expand All @@ -59,7 +58,6 @@ def pool_list():
],
)
def test_objective(
validate_on_dataset_mock,
ts_name,
request,
target_metric=MAE(missing_mode="ignore"),
Expand Down Expand Up @@ -93,10 +91,8 @@ def test_objective(
callback.assert_called_once()


@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix
@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"])
def test_objective_fail_none(
validate_on_dataset_mock,
ts_name,
request,
target_metric=MAE(missing_mode="ignore"),
Expand Down
4 changes: 0 additions & 4 deletions tests/test_auto/test_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from etna.transforms import TimeSeriesImputerTransform


@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix
@pytest.mark.parametrize(
"ts_name",
[
Expand All @@ -36,7 +35,6 @@
],
)
def test_objective(
validate_on_dataset_mock,
ts_name,
request,
target_metric=MAE(missing_mode="ignore"),
Expand Down Expand Up @@ -68,10 +66,8 @@ def test_objective(
callback.assert_called_once()


@patch("etna.pipeline.FoldMask.validate_on_dataset", return_value=MagicMock()) # TODO: remove after fix
@pytest.mark.parametrize("ts_name", ["ts_with_all_folds_missing_all_segments"])
def test_objective_fail_none(
validate_on_dataset_mock,
ts_name,
request,
target_metric=MAE(missing_mode="ignore"),
Expand Down
20 changes: 0 additions & 20 deletions tests/test_pipeline/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,26 +351,6 @@ def test_fold_mask_validate_on_dataset_fail_not_present_some_target_timestamps(t
fold_mask.validate_on_dataset(ts=ts, horizon=horizon)


@pytest.mark.parametrize(
"ts_name, horizon, fold_mask",
[
(
"ts_with_nans_in_tails",
1,
FoldMask(
first_train_timestamp=None,
last_train_timestamp="2020-01-31 22:00",
target_timestamps=["2020-01-31 23:00"],
),
),
],
)
def test_fold_mask_validate_on_dataset_fail_not_enough_future(ts_name, fold_mask, horizon, request):
ts = request.getfixturevalue(ts_name)
with pytest.raises(ValueError, match="Last train timestamp should be not later than"):
fold_mask.validate_on_dataset(ts=ts, horizon=horizon)


@pytest.mark.parametrize(
"ts_name, horizon, fold_mask",
[
Expand Down
Loading