Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update for package with fixes #6

Merged
merged 6 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion forecastout/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ models:
autoarima:
"seasonality": "True"
"freq": 12
"alpha_intervals": 0.05
"alpha_intervals": 95
prophet:
holtwinters:
"freq": 12
Expand Down
7 changes: 4 additions & 3 deletions forecastout/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ def __get_daily_disaggregation_shares(self):
df=df.copy(),
current_month=(
self.df_monthly.loc[
~self.df_monthly['value'].isna(), 'date'
].max()
self.df_monthly['value'].isna(), 'date'
].min()
),
granularity_month=False
)
Expand Down Expand Up @@ -192,5 +192,6 @@ def __make_daily_forecast(self):
def __remake_monthly_forecast(self):
self.df_monthly_forecast = remake_monthly_forecast_current_month(
df_daily_forecast=self.df_daily_forecast.copy(),
df_actuals=self.df.copy()
df_actuals=self.df.copy(),
sum_aggregation=self.sum_aggregation
)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

def remake_monthly_forecast_current_month(
df_daily_forecast: pd.DataFrame,
df_actuals: pd.DataFrame
df_actuals: pd.DataFrame,
sum_aggregation: bool
) -> pd.DataFrame:
# -- Get actuals of unfinished month
first_forecast_day = df_daily_forecast['date'].min()
Expand All @@ -26,13 +27,22 @@ def remake_monthly_forecast_current_month(
df_daily_forecast["date"] = (
df_daily_forecast["date"].astype(str).str[0:7]
)
df_monthly_forecast = (
df_daily_forecast
.groupby(["date", "model"])
[["forecast", "forecast_lower", "forecast_upper"]]
.sum()
.reset_index()
)
if sum_aggregation:
df_monthly_forecast = (
df_daily_forecast
.groupby(["date", "model"])
[["forecast", "forecast_lower", "forecast_upper"]]
.sum()
.reset_index()
)
else:
df_monthly_forecast = (
df_daily_forecast
.groupby(["date", "model"])
[["forecast", "forecast_lower", "forecast_upper"]]
.mean()
.reset_index()
)
df_monthly_forecast['date'] = pd.to_datetime(
df_monthly_forecast['date'] + '-01'
)
Expand Down
3 changes: 2 additions & 1 deletion forecastout/forecast_engine/forecast_model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def get_model(
if model == "autoarima":
return AutoArimaModel(
df_train_y=df_train_y,
dict_config=dict_config
dict_config=dict_config,
series_train_dates=series_train_dates
)
if model == "prophet":
return ProphetModel(
Expand Down
7 changes: 5 additions & 2 deletions forecastout/forecast_models/abstract_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
from abc import ABC, abstractmethod
import pandas as pd


class ForecastModel(ABC):

def __init__(
self,
df_train_y:pd.DataFrame,
dict_config:dict
df_train_y: pd.DataFrame,
dict_config: dict,
series_train_dates: pd.Series
):
self.df_train_y = df_train_y
self.dict_config = dict_config
self.series_train_dates = series_train_dates

@abstractmethod
def do_forecast(self, list_dates: list):
Expand Down
66 changes: 41 additions & 25 deletions forecastout/forecast_models/autoarima_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from forecastout.forecast_models.abstract_model import ForecastModel
from pmdarima.arima import auto_arima
from statsforecast.models import AutoARIMA
from statsforecast import StatsForecast
import pandas as pd
import numpy as np


class AutoArimaModel(ForecastModel):
Expand All @@ -13,34 +13,50 @@ def __init__(self,
**kwargs
):
super(AutoArimaModel, self).__init__(*args, **kwargs)
self.arima_model = (
auto_arima(self.df_train_y,
seasonal=self.dict_config["seasonality"],
m=self.dict_config["freq"]
)
# -- Constructor
# -- Initialize autoarima
self.autoarima_model = StatsForecast(models=[
AutoARIMA(
season_length=self.dict_config['freq'],
seasonal=self.dict_config['seasonality'])],
freq='MS',
n_jobs=-1
)
# -- Transform data
df_train_y_autoarima = (
pd.concat([self.series_train_dates, self.df_train_y], axis=1)
)
df_train_y_autoarima.columns = ['ds', 'y']
df_train_y_autoarima['ds'] = (
pd.to_datetime(df_train_y_autoarima['ds'])
)
df_train_y_autoarima['ds'] = (
df_train_y_autoarima['ds'].dt.tz_localize(None)
)
df_train_y_autoarima['unique_id'] = 'id'
# -- Fit forecast_models
self.autoarima_model.fit(df_train_y_autoarima)

def do_forecast(self, list_dates: list) -> pd.DataFrame:
arima_prediction, arima_conf_int = self.arima_model.predict(
len(list_dates),
return_conf_int=True,
alpha=self.dict_config["alpha_intervals"]
# -- Predict
autoarima_prediction = self.autoarima_model.predict(
h=len(list_dates),
level=[self.dict_config['alpha_intervals']]
).reset_index()
# -- Transform data for correct output
autoarima_prediction = (
autoarima_prediction.drop(['unique_id'], axis=1)
)
arima_prediction = pd.concat(
[
pd.DataFrame(
np.array(arima_prediction.reset_index()[0]),
columns=['forecast']),
pd.DataFrame(
arima_conf_int,
columns=['forecast_lower', 'forecast_upper'])
],
axis=1)
arima_prediction['model'] = 'autoarima'
arima_prediction['date'] = list_dates
autoarima_prediction.columns = [
'date', 'forecast', 'forecast_lower', 'forecast_upper'
]
autoarima_prediction['model'] = 'autoarima'
autoarima_prediction['date'] = list_dates
# -- Ensure date
arima_prediction['date'] = pd.to_datetime(arima_prediction['date'])
return arima_prediction
autoarima_prediction['date'] = (
pd.to_datetime(autoarima_prediction['date'])
)
return autoarima_prediction

def get_feature_importance(self):
return None
3 changes: 1 addition & 2 deletions forecastout/forecast_models/holtwinters_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

class HoltWintersModel(ForecastModel):
def __init__(self,
series_train_dates: pd.Series,
*args,
**kwargs
):
Expand All @@ -33,7 +32,7 @@ def __init__(self,
)
# -- Transform data
df_train_y_holtwinters = pd.concat(
[series_train_dates, self.df_train_y],
[self.series_train_dates, self.df_train_y],
axis=1
)
df_train_y_holtwinters.columns = ['ds', 'y']
Expand Down
3 changes: 1 addition & 2 deletions forecastout/forecast_models/naive_seasonal.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

class NaiveSeasonalModel(ForecastModel):
def __init__(self,
series_train_dates: pd.Series,
*args,
**kwargs
):
Expand All @@ -24,7 +23,7 @@ def __init__(self,
)
# -- Transform data
df_train_y_seasonalnaive = (
pd.concat([series_train_dates, self.df_train_y], axis=1)
pd.concat([self.series_train_dates, self.df_train_y], axis=1)
)
df_train_y_seasonalnaive.columns = ['ds', 'y']
df_train_y_seasonalnaive['ds'] = (
Expand Down
3 changes: 1 addition & 2 deletions forecastout/forecast_models/prophet_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

class ProphetModel(ForecastModel):
def __init__(self,
series_train_dates: pd.Series,
*args,
**kwargs
):
Expand All @@ -16,7 +15,7 @@ def __init__(self,
# -- Prophet
# -- Constructor
df_train_y_prophet = pd.concat(
[series_train_dates, self.df_train_y],
[self.series_train_dates, self.df_train_y],
axis=1)
df_train_y_prophet.columns = ['ds', 'y']
df_train_y_prophet['ds'] = pd.to_datetime(df_train_y_prophet['ds'])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ def test_remake_monthly_forecast_current_month():
pd.testing.assert_frame_equal(
remake_monthly_forecast_current_month(
df_daily_forecast=input_df_daily_forecast(),
df_actuals=input_df_actuals()
df_actuals=input_df_actuals(),
sum_aggregation=True
),
expected_df()
)
Expand Down
Loading
Loading