Implement params_to_tune properly for StackingEnsemble (#561)

etna-team · Jan 16, 2025 · 3f185f9 · 3f185f9
1 parent 620e096
commit 3f185f9
Show file tree

Hide file tree

Showing 3 changed files with 107 additions and 6 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 
 - Implement non-empty `params_to_tune` for `VotingEnsemble` ([#557](https://github.com/etna-team/etna/pull/557))
 - Implement non-empty `params_to_tune` for `DirectEnsemble` ([#559](https://github.com/etna-team/etna/pull/559))
-- 
+- Implement non-empty `params_to_tune` for `StackingEnsemble` ([#561](https://github.com/etna-team/etna/pull/561))
 - 
 - 
 - 

diff --git a/etna/ensembles/stacking_ensemble.py b/etna/ensembles/stacking_ensemble.py
@@ -284,11 +284,16 @@ def _predict(
     def params_to_tune(self) -> Dict[str, BaseDistribution]:
         """Get hyperparameter grid to tune.
 
-        Currently, returns empty dict, but could have a proper implementation in the future.
+        Parameters for pipelines have prefix "pipelines.idx.", e.g. "pipelines.0.model.alpha".
 
         Returns
         -------
         :
             Grid with hyperparameters.
         """
-        return {}
+        all_params = {}
+        for ind, pipeline in enumerate(self.pipelines):
+            for key, value in pipeline.params_to_tune().items():
+                new_key = f"pipelines.{ind}.{key}"
+                all_params[new_key] = value
+        return all_params
diff --git a/tests/test_ensembles/test_stacking_ensemble.py b/tests/test_ensembles/test_stacking_ensemble.py
@@ -11,9 +11,18 @@
 from typing_extensions import Literal
 
 from etna.datasets import TSDataset
+from etna.distributions import CategoricalDistribution
+from etna.distributions import FloatDistribution
+from etna.distributions import IntDistribution
 from etna.ensembles.stacking_ensemble import StackingEnsemble
 from etna.metrics import MAE
+from etna.models import CatBoostPerSegmentModel
+from etna.models import NaiveModel
+from etna.models import ProphetModel
 from etna.pipeline import Pipeline
+from etna.transforms import DateFlagsTransform
+from etna.transforms import LagTransform
+from etna.transforms import StandardScalerTransform
 from tests.test_pipeline.utils import assert_pipeline_equals_loaded_original
 from tests.test_pipeline.utils import assert_pipeline_forecast_raise_error_if_no_ts
 from tests.test_pipeline.utils import assert_pipeline_forecasts_given_ts
@@ -453,6 +462,93 @@ def test_ts_with_segment_named_target(
         assert isinstance(df, pd.DataFrame)
 
 
-def test_params_to_tune(stacking_ensemble_pipeline):
-    result = stacking_ensemble_pipeline.params_to_tune()
-    assert result == {}
+@pytest.mark.parametrize(
+    "pipeline_0_tune_params, pipeline_1_tune_params, expected_tune_params",
+    [
+        (
+            {
+                "model.alpha": [0, 3, 5],
+                "model.beta": [0.1, 0.2, 0.3],
+                "transforms.0.param_1": ["option_1", "option_2"],
+                "transforms.0.param_2": [False, True],
+                "transforms.1.param_1": [1, 2],
+            },
+            {
+                "model.alpha": [0, 3, 5],
+                "model.beta": [0.1, 0.2, 0.3],
+                "transforms.0.param_1": ["option_1", "option_2"],
+                "transforms.0.param_2": [False, True],
+                "transforms.1.param_1": [1, 2],
+            },
+            {
+                "pipelines.0.model.alpha": [0, 3, 5],
+                "pipelines.0.model.beta": [0.1, 0.2, 0.3],
+                "pipelines.0.transforms.0.param_1": ["option_1", "option_2"],
+                "pipelines.0.transforms.0.param_2": [False, True],
+                "pipelines.0.transforms.1.param_1": [1, 2],
+                "pipelines.1.model.alpha": [0, 3, 5],
+                "pipelines.1.model.beta": [0.1, 0.2, 0.3],
+                "pipelines.1.transforms.0.param_1": ["option_1", "option_2"],
+                "pipelines.1.transforms.0.param_2": [False, True],
+                "pipelines.1.transforms.1.param_1": [1, 2],
+            },
+        )
+    ],
+)
+def test_params_to_tune_mocked(pipeline_0_tune_params, pipeline_1_tune_params, expected_tune_params):
+    pipeline_0 = MagicMock()
+    pipeline_0.params_to_tune.return_value = pipeline_0_tune_params
+    pipeline_0.horizon = 5
+
+    pipeline_1 = MagicMock()
+    pipeline_1.params_to_tune.return_value = pipeline_1_tune_params
+    pipeline_1.horizon = 5
+
+    ensemble_pipeline = StackingEnsemble(pipelines=[pipeline_0, pipeline_1])
+
+    assert ensemble_pipeline.params_to_tune() == expected_tune_params
+
+
+@pytest.mark.parametrize(
+    "pipelines, expected_params_to_tune",
+    [
+        (
+            [
+                Pipeline(
+                    model=CatBoostPerSegmentModel(iterations=100),
+                    transforms=[DateFlagsTransform(), LagTransform(in_column="target", lags=[1, 2, 3])],
+                    horizon=5,
+                ),
+                Pipeline(model=ProphetModel(), transforms=[StandardScalerTransform()], horizon=5),
+                Pipeline(model=NaiveModel(lag=3), horizon=5),
+            ],
+            {
+                "pipelines.0.model.learning_rate": FloatDistribution(low=1e-4, high=0.5, log=True),
+                "pipelines.0.model.depth": IntDistribution(low=1, high=11, step=1),
+                "pipelines.0.model.l2_leaf_reg": FloatDistribution(low=0.1, high=200.0, log=True),
+                "pipelines.0.model.random_strength": FloatDistribution(low=1e-05, high=10.0, log=True),
+                "pipelines.0.transforms.0.day_number_in_week": CategoricalDistribution([False, True]),
+                "pipelines.0.transforms.0.day_number_in_month": CategoricalDistribution([False, True]),
+                "pipelines.0.transforms.0.day_number_in_year": CategoricalDistribution([False, True]),
+                "pipelines.0.transforms.0.week_number_in_month": CategoricalDistribution([False, True]),
+                "pipelines.0.transforms.0.week_number_in_year": CategoricalDistribution([False, True]),
+                "pipelines.0.transforms.0.month_number_in_year": CategoricalDistribution([False, True]),
+                "pipelines.0.transforms.0.season_number": CategoricalDistribution([False, True]),
+                "pipelines.0.transforms.0.year_number": CategoricalDistribution([False, True]),
+                "pipelines.0.transforms.0.is_weekend": CategoricalDistribution([False, True]),
+                "pipelines.1.model.seasonality_mode": CategoricalDistribution(["additive", "multiplicative"]),
+                "pipelines.1.model.seasonality_prior_scale": FloatDistribution(low=1e-2, high=10, log=True),
+                "pipelines.1.model.changepoint_prior_scale": FloatDistribution(low=1e-3, high=0.5, log=True),
+                "pipelines.1.model.changepoint_range": FloatDistribution(low=0.8, high=0.95),
+                "pipelines.1.model.holidays_prior_scale": FloatDistribution(low=1e-2, high=10, log=True),
+                "pipelines.1.transforms.0.mode": CategoricalDistribution(["per-segment", "macro"]),
+                "pipelines.1.transforms.0.with_mean": CategoricalDistribution([False, True]),
+                "pipelines.1.transforms.0.with_std": CategoricalDistribution([False, True]),
+            },
+        )
+    ],
+)
+def test_params_to_tune(pipelines, expected_params_to_tune):
+    ensemble_pipeline = StackingEnsemble(pipelines=pipelines)
+
+    assert ensemble_pipeline.params_to_tune() == expected_params_to_tune
-Original file line number
+Diff line change
@@ Expand Up @@
     -
     - Implement non-empty `params_to_tune` for `VotingEnsemble` ([#557](https://github.com/etna-team/etna/pull/557))
     - Implement non-empty `params_to_tune` for `DirectEnsemble` ([#559](https://github.com/etna-team/etna/pull/559))
-    -
+    - Implement non-empty `params_to_tune` for `StackingEnsemble` ([#561](https://github.com/etna-team/etna/pull/561))
     -
     -
     -
@@ Expand Down @@