From b844595535a6340eb8d0bbbfc5de307771cb0d54 Mon Sep 17 00:00:00 2001 From: Nick Knize Date: Fri, 3 Nov 2023 08:08:15 -0700 Subject: [PATCH] fixing problem in remove seasonalty CUSUM Summary: We used to have a problem with seasonaluty definition in cusum: on cusum level we map string to hours on daily basis but on SeasnalHandler expect to recieve hour-based. Fortunately their no using this property in 1D. New test for SeasnaltyHandle was added Differential Revision: D50120272 fbshipit-source-id: 287c0636a719ffbe4a0b21d0784513ef5c8989ad --- kats/detectors/cusum_model.py | 10 ++------- kats/tests/utils/test_decomposition.py | 13 ++++++++++- kats/utils/decomposition.py | 31 ++++++++++++++++---------- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/kats/detectors/cusum_model.py b/kats/detectors/cusum_model.py index 6507ce521..e67ef4067 100644 --- a/kats/detectors/cusum_model.py +++ b/kats/detectors/cusum_model.py @@ -56,12 +56,6 @@ NORMAL_TOLERENCE = 1 # number of window CHANGEPOINT_RETENTION: int = 7 * 24 * 60 * 60 # in seconds MAX_CHANGEPOINT = 10 -SEASON_PERIOD_FREQ_MAP: Dict[str, int] = { - "daily": 1, - "weekly": 7, - "monthly": 30, - "yearly": 365, -} _log: logging.Logger = logging.getLogger("cusum_model") @@ -197,7 +191,7 @@ def __init__( ] = CUSUMDefaultArgs.change_directions, score_func: Union[str, CusumScoreFunction] = DEFAULT_SCORE_FUNCTION, remove_seasonality: bool = CUSUMDefaultArgs.remove_seasonality, - season_period_freq: str = "daily", + season_period_freq: Union[str, int] = "daily", vectorized: Optional[bool] = None, adapted_pre_mean: Optional[bool] = None, ) -> None: @@ -229,7 +223,7 @@ def __init__( else: self.remove_seasonality: bool = remove_seasonality - self.season_period_freq: str = previous_model.get( + self.season_period_freq: Union[str, int] = previous_model.get( "season_period_freq", "daily" ) diff --git a/kats/tests/utils/test_decomposition.py b/kats/tests/utils/test_decomposition.py index 6452a79a7..90583ce93 100644 --- a/kats/tests/utils/test_decomposition.py +++ b/kats/tests/utils/test_decomposition.py @@ -12,7 +12,7 @@ from kats.consts import TimeSeriesData from kats.data.utils import load_air_passengers, load_data from kats.detectors.residual_translation import KDEResidualTranslator -from kats.utils.decomposition import TimeSeriesDecomposition +from kats.utils.decomposition import SeasonalityHandler, TimeSeriesDecomposition from kats.utils.simulator import Simulator from scipy.stats import ks_2samp from statsmodels.tsa.seasonal import seasonal_decompose, STL @@ -293,6 +293,17 @@ def test_plot(self) -> None: m.plot() + def test_seasnality_handler(self) -> None: + sh_data = SeasonalityHandler( + data=self.ts_data_daily, seasonal_period=24 * 60 * 60 + ) + historical_data = sh_data.remove_seasonality() + self.assertNotEqual(self.ts_data_daily, historical_data) + + sh_data = SeasonalityHandler(data=self.ts_data_daily, seasonal_period="daily") + historical_data = sh_data.remove_seasonality() + self.assertNotEqual(self.ts_data_daily, historical_data) + def test_multiplicative_assert(self) -> None: data_new = self.ts_data.to_dataframe().copy() data_new["y"] = -1.0 * data_new["y"] diff --git a/kats/utils/decomposition.py b/kats/utils/decomposition.py index 5b7cf5e35..5f7792bbe 100644 --- a/kats/utils/decomposition.py +++ b/kats/utils/decomposition.py @@ -278,7 +278,7 @@ class SeasonalityHandler: SeasonalityHandler is a class that do timeseries STL decomposition for detecors Attributes: data: TimeSeriesData that need to be decomposed - seasonal_period: str, default value is 'daily'. Other possible values: 'hourly', 'weekly', 'biweekly', 'monthly', 'yearly' + seasonal_period: str, default value is 'daily'. Other possible values: 'hourly', 'weekly', 'biweekly', 'monthly', 'yearly' or integer which represent amoutn of seconds >>> # Example usage: >>> from kats.utils.simulator import Simulator @@ -290,18 +290,18 @@ class SeasonalityHandler: """ PERIOD_MAP: Dict[str, int] = { - "hourly": 1, - "daily": 24, - "weekly": 7 * 24, - "biweekly": 14 * 24, - "monthly": 30 * 24, - "yearly": 365 * 24, + "hourly": 1 * 60 * 60, + "daily": 24 * 60 * 60, + "weekly": 7 * 24 * 60 * 60, + "biweekly": 14 * 24 * 60 * 60, + "monthly": 30 * 24 * 60 * 60, + "yearly": 365 * 24 * 60 * 60, } def __init__( self, data: TimeSeriesData, - seasonal_period: str = "daily", + seasonal_period: Union[str, int] = "daily", ignore_irregular_freq: bool = False, **kwargs: Any, ) -> None: @@ -312,11 +312,18 @@ def __init__( self.data = data - if seasonal_period not in SeasonalityHandler.PERIOD_MAP: - msg = "Invalid seasonal_period, possible values are 'hourly', 'daily', 'weekly', 'biweekly', 'monthly', and 'yearly'" + if isinstance(seasonal_period, str): + if seasonal_period not in SeasonalityHandler.PERIOD_MAP: + msg = "Invalid seasonal_period str value, possible values are integer or 'hourly', 'daily', 'weekly', 'biweekly', 'monthly', and 'yearly'" + logging.error(msg) + raise ParameterError(msg) + self.seasonal_period: int = SeasonalityHandler.PERIOD_MAP[seasonal_period] + elif type(seasonal_period) is int: + self.seasonal_period: int = seasonal_period + else: + msg = "Invalid seasonal_period type, possible values are integer or 'hourly', 'daily', 'weekly', 'biweekly', 'monthly', and 'yearly'" logging.error(msg) raise ParameterError(msg) - self.seasonal_period: int = SeasonalityHandler.PERIOD_MAP[seasonal_period] self.low_pass_jump_factor: float = kwargs.get("lpj_factor", 0.15) self.trend_jump_factor: float = kwargs.get("tj_factor", 0.15) @@ -360,7 +367,7 @@ def __init__( raise DataIrregularGranularityError(IRREGULAR_GRANULARITY_ERROR) self.period: int = min( - int(self.seasonal_period * 60 * 60 / self.frequency.total_seconds()), + int(self.seasonal_period / self.frequency.total_seconds()), len(self.data) // 2, )