Skip to content

Commit

Permalink
Add IQROutlierTransform (#387)
Browse files Browse the repository at this point in the history
* added implementation

* updated documentation

* added fixture

* added tests

* added inference tests

* bugfix

* added changelog
  • Loading branch information
brsnw250 authored Jun 14, 2024
1 parent 901acca commit 1318ff3
Show file tree
Hide file tree
Showing 11 changed files with 208 additions and 17 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add `get_anomalies_iqr` function for anomaly detection ([#374](https://github.com/etna-team/etna/pull/374))
- Add `get_anomalies_isolation_forest` method for anomaly detection ([#375](https://github.com/etna-team/etna/pull/375))
- Add `IForestOutlierTransform` ([#381](https://github.com/etna-team/etna/pull/381))
-
- Add `IQROutlierTransform` ([#387](https://github.com/etna-team/etna/pull/387))
-
-
-
Expand Down
1 change: 1 addition & 0 deletions docs/source/api_reference/transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ Transforms to detect outliers:
MedianOutliersTransform
PredictionIntervalOutliersTransform
IForestOutlierTransform
IQROutlierTransform

Transforms to work with time-related features:

Expand Down
2 changes: 1 addition & 1 deletion etna/analysis/outliers/rolling_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def iqr_method(
if iqr_scale <= 0:
raise ValueError("Scaling parameter must be positive!")

window = series[indices]
window = series.iloc[indices]

if trend or seasonality:
if stl_params is None:
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from etna.transforms.missing_values import TimeSeriesImputerTransform
from etna.transforms.outliers import DensityOutliersTransform
from etna.transforms.outliers import IForestOutlierTransform
from etna.transforms.outliers import IQROutlierTransform
from etna.transforms.outliers import MedianOutliersTransform
from etna.transforms.outliers import PredictionIntervalOutliersTransform
from etna.transforms.timestamp import DateFlagsTransform
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/outliers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from etna.transforms.outliers.base import OutliersTransform
from etna.transforms.outliers.point_outliers import DensityOutliersTransform
from etna.transforms.outliers.point_outliers import IForestOutlierTransform
from etna.transforms.outliers.point_outliers import IQROutlierTransform
from etna.transforms.outliers.point_outliers import MedianOutliersTransform
from etna.transforms.outliers.point_outliers import PredictionIntervalOutliersTransform
2 changes: 1 addition & 1 deletion etna/transforms/outliers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def _inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
return df

@abstractmethod
def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
def detect_outliers(self, ts: TSDataset) -> Dict[str, pd.Series]:
"""Call function for detection outliers with self parameters.
Parameters
Expand Down
99 changes: 97 additions & 2 deletions etna/transforms/outliers/point_outliers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Any
from typing import Callable
from typing import Dict
from typing import Optional
Expand All @@ -11,9 +12,10 @@

from etna import SETTINGS
from etna.analysis import get_anomalies_density
from etna.analysis import get_anomalies_iqr
from etna.analysis import get_anomalies_isolation_forest
from etna.analysis import get_anomalies_median
from etna.analysis import get_anomalies_prediction_interval
from etna.analysis.outliers import get_anomalies_isolation_forest
from etna.datasets import TSDataset
from etna.distributions import BaseDistribution
from etna.distributions import CategoricalDistribution
Expand Down Expand Up @@ -379,7 +381,8 @@ def detect_outliers(self, ts: TSDataset) -> Dict[str, pd.Series]:
def params_to_tune(self) -> Dict[str, BaseDistribution]:
"""Get default grid for tuning hyperparameters.
This grid tunes parameters: ``interval_width``, ``model``. Other parameters are expected to be set by the user.
This grid tunes parameters: ``n_estimators``, ``max_samples``, ``contamination``, ``max_features``, ``bootstrap``.
Other parameters are expected to be set by the user.
Returns
-------
Expand All @@ -395,9 +398,101 @@ def params_to_tune(self) -> Dict[str, BaseDistribution]:
}


class IQROutlierTransform(OutliersTransform):
"""Transform that uses :py:func:`~etna.analysis.outliers.rolling_statistics.get_anomalies_iqr` to find anomalies in data."""

def __init__(
self,
in_column: str = "target",
ignore_flag_column: Optional[str] = None,
window_size: int = 10,
stride: int = 1,
iqr_scale: float = 1.5,
trend: bool = False,
seasonality: bool = False,
period: Optional[int] = None,
stl_params: Optional[Dict[str, Any]] = None,
):
"""Create instance of ``PredictionIntervalOutliersTransform``.
Parameters
----------
in_column:
Name of the column in which the anomaly is searching
ignore_flag_column:
Column name for skipping values from outlier check
window_size:
Number of points in the window
stride:
Offset between neighboring windows
iqr_scale:
Scaling parameter of the estimated interval
trend:
Whether to remove trend from the series
seasonality:
Whether to remove seasonality from the series
period:
Periodicity of the sequence for STL
stl_params:
Other parameters for STL. See :py:class:`statsmodels.tsa.seasonal.STL`
"""
self.window_size = window_size
self.stride = stride
self.iqr_scale = iqr_scale
self.trend = trend
self.seasonality = seasonality
self.period = period
self.stl_params = stl_params
super().__init__(in_column=in_column, ignore_flag_column=ignore_flag_column)

def detect_outliers(self, ts: TSDataset) -> Dict[str, pd.Series]:
"""Call :py:func:`~etna.analysis.outliers.rolling_statistics.get_anomalies_iqr` function with self parameters.
Parameters
----------
ts:
Dataset to process
Returns
-------
:
Dict of outliers in format {segment: [outliers_timestamps]}
"""
return get_anomalies_iqr(
ts=ts,
in_column=self.in_column,
window_size=self.window_size,
stride=self.stride,
iqr_scale=self.iqr_scale,
trend=self.trend,
seasonality=self.seasonality,
period=self.period,
stl_params=self.stl_params,
index_only=False,
)

def params_to_tune(self) -> Dict[str, BaseDistribution]:
"""Get default grid for tuning hyperparameters.
This grid tunes parameters: ``iqr_scale``, ``trend``, ``seasonality``.
Other parameters are expected to be set by the user.
Returns
-------
:
Grid to tune.
"""
return {
"iqr_scale": FloatDistribution(low=0.5, high=10),
"trend": CategoricalDistribution([True, False]),
"seasonality": CategoricalDistribution([True, False]),
}


__all__ = [
"MedianOutliersTransform",
"DensityOutliersTransform",
"PredictionIntervalOutliersTransform",
"IForestOutlierTransform",
"IQROutlierTransform",
]
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ def outliers_tsds():
return tsds


@pytest.fixture()
def outliers_tsds_without_missing(outliers_tsds):
tsds = TSDataset(df=outliers_tsds[..., "target"].dropna(), freq="1d", df_exog=outliers_tsds.df_exog.dropna())
return tsds


@pytest.fixture
def outliers_df_with_two_columns() -> TSDataset:
timestamp1 = np.arange(np.datetime64("2021-01-01"), np.datetime64("2021-02-10"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from etna.transforms import GaleShapleyFeatureSelectionTransform
from etna.transforms import HolidayTransform
from etna.transforms import IForestOutlierTransform
from etna.transforms import IQROutlierTransform
from etna.transforms import LabelEncoderTransform
from etna.transforms import LagTransform
from etna.transforms import LambdaTransform
Expand Down Expand Up @@ -465,6 +466,7 @@ def _test_inverse_transform_train(self, ts, transform, expected_changes):
{"change": {"target"}},
),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -912,6 +914,7 @@ def test_inverse_transform_train_fail_resample(self, transform, dataset_name, ex
(DensityOutliersTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
# timestamp
(
DateFlagsTransform(out_column="res", in_column="external_timestamp"),
Expand Down Expand Up @@ -1240,6 +1243,7 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments)
(MedianOutliersTransform(in_column="target"), "ts_with_outliers"),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(DateFlagsTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -1546,6 +1550,7 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments
(MedianOutliersTransform(in_column="target"), "ts_with_outliers"),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(DateFlagsTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -1997,6 +2002,7 @@ def test_inverse_transform_train_new_segments(self, transform, dataset_name, exp
(MedianOutliersTransform(in_column="target"), "ts_with_outliers"),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(SpecialDaysTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -2441,6 +2447,7 @@ def test_inverse_transform_future_new_segments(self, transform, dataset_name, ex
(MedianOutliersTransform(in_column="target"), "ts_with_outliers"),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(SpecialDaysTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -2885,6 +2892,7 @@ def _test_inverse_transform_future_with_target(
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -3412,6 +3420,7 @@ def _test_inverse_transform_future_without_target(
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down
9 changes: 9 additions & 0 deletions tests/test_transforms/test_inference/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from etna.transforms import GaleShapleyFeatureSelectionTransform
from etna.transforms import HolidayTransform
from etna.transforms import IForestOutlierTransform
from etna.transforms import IQROutlierTransform
from etna.transforms import LabelEncoderTransform
from etna.transforms import LagTransform
from etna.transforms import LambdaTransform
Expand Down Expand Up @@ -428,6 +429,7 @@ def _test_transform_train(self, ts, transform, expected_changes):
{"change": {"target"}},
),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -868,6 +870,7 @@ def test_transform_train_datetime_timestamp(self, transform, dataset_name, expec
(DensityOutliersTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {"change": {"target"}}),
# timestamp
(
DateFlagsTransform(out_column="res", in_column="external_timestamp"),
Expand Down Expand Up @@ -1191,6 +1194,7 @@ def _test_transform_train_subset_segments(self, ts, transform, segments):
(MedianOutliersTransform(in_column="target"), "ts_with_outliers"),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(DateFlagsTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -1484,6 +1488,7 @@ def _test_transform_future_subset_segments(self, ts, transform, segments, horizo
(MedianOutliersTransform(in_column="target"), "ts_with_outliers"),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(DateFlagsTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -1900,6 +1905,7 @@ def test_transform_train_new_segments(self, transform, dataset_name, expected_ch
(MedianOutliersTransform(in_column="target"), "ts_with_outliers"),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(SpecialDaysTransform(), "regular_ts"),
(SpecialDaysTransform(in_column="external_timestamp"), "ts_with_external_timestamp"),
Expand Down Expand Up @@ -2334,6 +2340,7 @@ def test_transform_future_new_segments(self, transform, dataset_name, expected_c
(MedianOutliersTransform(in_column="target"), "ts_with_outliers"),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers"),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers"),
(IQROutlierTransform(in_column="target"), "ts_with_outliers"),
# timestamp
(SpecialDaysTransform(), "regular_ts"),
(
Expand Down Expand Up @@ -2694,6 +2701,7 @@ def _test_transform_future_with_target(self, ts, transform, expected_changes, ga
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down Expand Up @@ -3191,6 +3199,7 @@ def _test_transform_future_without_target(self, ts, transform, expected_changes,
(MedianOutliersTransform(in_column="target"), "ts_with_outliers", {}),
(PredictionIntervalOutliersTransform(in_column="target", model=ProphetModel), "ts_with_outliers", {}),
(IForestOutlierTransform(in_column="target"), "ts_with_outliers", {}),
(IQROutlierTransform(in_column="target"), "ts_with_outliers", {}),
# timestamp
(
DateFlagsTransform(out_column="res"),
Expand Down
Loading

0 comments on commit 1318ff3

Please sign in to comment.