diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 0f8934c57..44157ceb9 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -40,4 +40,4 @@ jobs:
uv pip install --system "numpy<2" ".[dev]"
- name: Tests
- run: nbdev_test --do_print --timing --n_workers 0 --flags polars
+ run: nbdev_test --do_print --timing --n_workers 0 --flags polars
\ No newline at end of file
diff --git a/action_files/test_models/src/evaluation.py b/action_files/test_models/src/evaluation.py
index e93d0d9e9..cda6e059b 100644
--- a/action_files/test_models/src/evaluation.py
+++ b/action_files/test_models/src/evaluation.py
@@ -41,9 +41,12 @@ def evaluate(model: str, dataset: str, group: str):
if __name__ == '__main__':
groups = ['Monthly']
- models = ['AutoDilatedRNN', 'RNN', 'TCN', 'DeepAR',
+ models = ['AutoDilatedRNN', 'RNN',
+ 'TCN',
+ 'DeepAR',
'NHITS', 'TFT', 'AutoMLP', 'DLinear', 'VanillaTransformer',
- 'BiTCN', 'TiDE', 'DeepNPTS', 'NBEATS', 'KAN']
+ 'BiTCN', 'TiDE', 'DeepNPTS', 'NBEATS', 'KAN'
+ ]
datasets = ['M3']
evaluation = [evaluate(model, dataset, group) for model, group in product(models, groups) for dataset in datasets]
evaluation = [eval_ for eval_ in evaluation if eval_ is not None]
diff --git a/action_files/test_models/src/models.py b/action_files/test_models/src/models.py
index ec32b5a82..cc56e46c9 100644
--- a/action_files/test_models/src/models.py
+++ b/action_files/test_models/src/models.py
@@ -1,30 +1,17 @@
-import os
import time
import fire
-# import numpy as np
import pandas as pd
-# import pytorch_lightning as pl
-# import torch
-# import neuralforecast
from neuralforecast.core import NeuralForecast
-# from neuralforecast.models.gru import GRU
from neuralforecast.models.rnn import RNN
from neuralforecast.models.tcn import TCN
-# from neuralforecast.models.lstm import LSTM
-# from neuralforecast.models.dilated_rnn import DilatedRNN
from neuralforecast.models.deepar import DeepAR
-# from neuralforecast.models.mlp import MLP
from neuralforecast.models.nhits import NHITS
from neuralforecast.models.nbeats import NBEATS
-# from neuralforecast.models.nbeatsx import NBEATSx
from neuralforecast.models.tft import TFT
from neuralforecast.models.vanillatransformer import VanillaTransformer
-# from neuralforecast.models.informer import Informer
-# from neuralforecast.models.autoformer import Autoformer
-# from neuralforecast.models.patchtst import PatchTST
from neuralforecast.models.dlinear import DLinear
from neuralforecast.models.bitcn import BiTCN
from neuralforecast.models.tide import TiDE
@@ -33,10 +20,7 @@
from neuralforecast.auto import (
AutoMLP,
- # AutoNHITS,
- # AutoNBEATS,
AutoDilatedRNN,
- # AutoTFT
)
from neuralforecast.losses.pytorch import SMAPE, MAE, IQLoss
@@ -44,9 +28,6 @@
from src.data import get_data
-os.environ['NIXTLA_ID_AS_COL'] = '1'
-
-
def main(dataset: str = 'M3', group: str = 'Monthly') -> None:
train, horizon, freq, seasonality = get_data('data/', dataset, group)
train['ds'] = pd.to_datetime(train['ds'])
@@ -61,21 +42,22 @@ def main(dataset: str = 'M3', group: str = 'Monthly') -> None:
"random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
}
config_drnn = {'input_size': tune.choice([2 * horizon]),
- 'encoder_hidden_size': tune.choice([124]),
+ 'encoder_hidden_size': tune.choice([16]),
"max_steps": 300,
"val_check_steps": 100,
- "random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),}
+ "random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
+ "scaler_type": "minmax1"}
models = [
AutoDilatedRNN(h=horizon, loss=MAE(), config=config_drnn, num_samples=2, cpus=1),
- RNN(h=horizon, input_size=2 * horizon, encoder_hidden_size=50, max_steps=300),
- TCN(h=horizon, input_size=2 * horizon, encoder_hidden_size=20, max_steps=300),
+ RNN(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
+ TCN(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
NHITS(h=horizon, input_size=2 * horizon, dropout_prob_theta=0.5, loss=MAE(), max_steps=1000, val_check_steps=500),
AutoMLP(h=horizon, loss=MAE(), config=config, num_samples=2, cpus=1),
DLinear(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=2000, val_check_steps=500),
TFT(h=horizon, input_size=2 * horizon, loss=SMAPE(), hidden_size=64, scaler_type='robust', windows_batch_size=512, max_steps=1500, val_check_steps=500),
VanillaTransformer(h=horizon, input_size=2 * horizon, loss=MAE(), hidden_size=64, scaler_type='minmax1', windows_batch_size=512, max_steps=1500, val_check_steps=500),
- DeepAR(h=horizon, input_size=2 * horizon, scaler_type='minmax1', max_steps=1000),
+ DeepAR(h=horizon, input_size=2 * horizon, scaler_type='minmax1', max_steps=500),
BiTCN(h=horizon, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
TiDE(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=1000, val_check_steps=500),
DeepNPTS(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=1000, val_check_steps=500),
diff --git a/action_files/test_models/src/models2.py b/action_files/test_models/src/models2.py
index b309003fb..2f5f26cd2 100644
--- a/action_files/test_models/src/models2.py
+++ b/action_files/test_models/src/models2.py
@@ -1,43 +1,25 @@
-import os
import time
import fire
-import numpy as np
import pandas as pd
-import pytorch_lightning as pl
-import torch
-import neuralforecast
from neuralforecast.core import NeuralForecast
from neuralforecast.models.gru import GRU
-from neuralforecast.models.rnn import RNN
-from neuralforecast.models.tcn import TCN
from neuralforecast.models.lstm import LSTM
from neuralforecast.models.dilated_rnn import DilatedRNN
-from neuralforecast.models.deepar import DeepAR
-from neuralforecast.models.mlp import MLP
-from neuralforecast.models.nhits import NHITS
-from neuralforecast.models.nbeats import NBEATS
from neuralforecast.models.nbeatsx import NBEATSx
-from neuralforecast.models.tft import TFT
-from neuralforecast.models.vanillatransformer import VanillaTransformer
-from neuralforecast.models.informer import Informer
-from neuralforecast.models.autoformer import Autoformer
-from neuralforecast.models.patchtst import PatchTST
from neuralforecast.auto import (
- AutoMLP, AutoNHITS, AutoNBEATS, AutoDilatedRNN, AutoTFT
+ AutoNHITS,
+ AutoNBEATS,
)
-from neuralforecast.losses.pytorch import SMAPE, MAE
+from neuralforecast.losses.pytorch import MAE
from ray import tune
from src.data import get_data
-os.environ['NIXTLA_ID_AS_COL'] = '1'
-
-
def main(dataset: str = 'M3', group: str = 'Monthly') -> None:
train, horizon, freq, seasonality = get_data('data/', dataset, group)
train['ds'] = pd.to_datetime(train['ds'])
@@ -49,32 +31,17 @@ def main(dataset: str = 'M3', group: str = 'Monthly') -> None:
"scaler_type": "minmax1",
"random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
}
- config = {
- "hidden_size": tune.choice([256, 512]),
- "num_layers": tune.choice([2, 4]),
- "input_size": tune.choice([2 * horizon]),
- "max_steps": 1000,
- "val_check_steps": 300,
- "scaler_type": "minmax1",
- "random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
- }
- config_drnn = {'input_size': tune.choice([2 * horizon]),
- 'encoder_hidden_size': tune.choice([124]),
- "max_steps": 300,
- "val_check_steps": 100,
- "random_seed": tune.choice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),}
models = [
- LSTM(h=horizon, input_size=2 * horizon, encoder_hidden_size=50, max_steps=300),
- DilatedRNN(h=horizon, input_size=2 * horizon, encoder_hidden_size=50, max_steps=300),
- GRU(h=horizon, input_size=2 * horizon, encoder_hidden_size=50, max_steps=300),
+ LSTM(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
+ DilatedRNN(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
+ GRU(h=horizon, input_size=2 * horizon, encoder_hidden_size=64, max_steps=300),
AutoNBEATS(h=horizon, loss=MAE(), config=config_nbeats, num_samples=2, cpus=1),
AutoNHITS(h=horizon, loss=MAE(), config=config_nbeats, num_samples=2, cpus=1),
NBEATSx(h=horizon, input_size=2 * horizon, loss=MAE(), max_steps=1000),
- PatchTST(h=horizon, input_size=2 * horizon, patch_len=4, stride=4, loss=MAE(), scaler_type='minmax1', windows_batch_size=512, max_steps=1000, val_check_steps=500),
]
# Models
- for model in models[:-1]:
+ for model in models:
model_name = type(model).__name__
print(50*'-', model_name, 50*'-')
start = time.time()
diff --git a/action_files/test_models/src/multivariate_evaluation.py b/action_files/test_models/src/multivariate_evaluation.py
index f92346bbc..ec0b9c233 100644
--- a/action_files/test_models/src/multivariate_evaluation.py
+++ b/action_files/test_models/src/multivariate_evaluation.py
@@ -61,6 +61,5 @@ def evaluate(model: str, dataset: str, group: str):
df_evaluation.columns = ['dataset', 'model', 'metric', 'val']
df_evaluation = df_evaluation.set_index(['dataset', 'metric', 'model']).unstack().round(3)
df_evaluation = df_evaluation.droplevel(0, 1).reset_index()
- # df_evaluation['AutoARIMA'] = [666.82, 15.35, 3.000]
df_evaluation.to_csv('data/evaluation.csv')
print(df_evaluation.T)
diff --git a/action_files/test_models/src/multivariate_models.py b/action_files/test_models/src/multivariate_models.py
index 1b1d9593b..e613a1125 100644
--- a/action_files/test_models/src/multivariate_models.py
+++ b/action_files/test_models/src/multivariate_models.py
@@ -10,7 +10,6 @@
from neuralforecast.models.tsmixer import TSMixer
from neuralforecast.models.tsmixerx import TSMixerx
from neuralforecast.models.itransformer import iTransformer
-# from neuralforecast.models.stemgnn import StemGNN
from neuralforecast.models.mlpmultivariate import MLPMultivariate
from neuralforecast.models.timemixer import TimeMixer
@@ -26,13 +25,12 @@ def main(dataset: str = 'multivariate', group: str = 'ETTm2') -> None:
train['ds'] = pd.to_datetime(train['ds'])
models = [
- SOFTS(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
- TSMixer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
- TSMixerx(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
- iTransformer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500),
- # StemGNN(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout_rate=0.0, max_steps=1000, val_check_steps=500),
- MLPMultivariate(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), max_steps=1000, val_check_steps=500),
- TimeMixer(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=500)
+ SOFTS(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=500, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ TSMixer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ TSMixerx(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=1000, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ iTransformer(h=horizon, n_series=7, input_size=2 * horizon, loss=MAE(), dropout=0.0, max_steps=500, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ MLPMultivariate(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), max_steps=1000, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64),
+ TimeMixer(h=horizon, n_series=7, input_size=2*horizon, loss=MAE(), dropout=0.0, max_steps=500, val_check_steps=100, windows_batch_size=64, inference_windows_batch_size=64)
]
# Models
diff --git a/nbs/common.base_auto.ipynb b/nbs/common.base_auto.ipynb
index e120c2f33..16db978b4 100644
--- a/nbs/common.base_auto.ipynb
+++ b/nbs/common.base_auto.ipynb
@@ -238,7 +238,11 @@
" self.callbacks = callbacks\n",
"\n",
" # Base Class attributes\n",
- " self.SAMPLING_TYPE = cls_model.SAMPLING_TYPE\n",
+ " self.EXOGENOUS_FUTR = cls_model.EXOGENOUS_FUTR\n",
+ " self.EXOGENOUS_HIST = cls_model.EXOGENOUS_HIST\n",
+ " self.EXOGENOUS_STAT = cls_model.EXOGENOUS_STAT\n",
+ " self.MULTIVARIATE = cls_model.MULTIVARIATE \n",
+ " self.RECURRENT = cls_model.RECURRENT \n",
"\n",
" def __repr__(self):\n",
" return type(self).__name__ if self.alias is None else self.alias\n",
diff --git a/nbs/common.base_model.ipynb b/nbs/common.base_model.ipynb
index 0f6aaae5a..458136169 100644
--- a/nbs/common.base_model.ipynb
+++ b/nbs/common.base_model.ipynb
@@ -36,19 +36,25 @@
"from contextlib import contextmanager\n",
"from copy import deepcopy\n",
"from dataclasses import dataclass\n",
+ "from typing import List, Dict, Union\n",
"\n",
"import fsspec\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
+ "import torch.nn.functional as F\n",
"import pytorch_lightning as pl\n",
+ "import neuralforecast.losses.pytorch as losses\n",
+ "\n",
+ "from neuralforecast.losses.pytorch import BasePointLoss, DistributionLoss\n",
"from pytorch_lightning.callbacks.early_stopping import EarlyStopping\n",
"from neuralforecast.tsdataset import (\n",
" TimeSeriesDataModule,\n",
" BaseTimeSeriesDataset,\n",
" _DistributedTimeSeriesDataModule,\n",
")\n",
- "from neuralforecast.losses.pytorch import IQLoss"
+ "from neuralforecast.common._scalers import TemporalNorm\n",
+ "from neuralforecast.utils import get_indexer_raise_missing"
]
},
{
@@ -128,27 +134,94 @@
"source": [
"#| export\n",
"class BaseModel(pl.LightningModule):\n",
- " EXOGENOUS_FUTR = True\n",
- " EXOGENOUS_HIST = True\n",
- " EXOGENOUS_STAT = True\n",
+ " EXOGENOUS_FUTR = True # If the model can handle future exogenous variables\n",
+ " EXOGENOUS_HIST = True # If the model can handle historical exogenous variables\n",
+ " EXOGENOUS_STAT = True # If the model can handle static exogenous variables\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(\n",
" self,\n",
- " random_seed,\n",
- " loss,\n",
- " valid_loss,\n",
- " optimizer,\n",
- " optimizer_kwargs,\n",
- " lr_scheduler,\n",
- " lr_scheduler_kwargs,\n",
- " futr_exog_list,\n",
- " hist_exog_list,\n",
- " stat_exog_list,\n",
- " max_steps,\n",
- " early_stop_patience_steps,\n",
+ " h: int,\n",
+ " input_size: int,\n",
+ " loss: Union[BasePointLoss, DistributionLoss, nn.Module],\n",
+ " valid_loss: Union[BasePointLoss, DistributionLoss, nn.Module],\n",
+ " learning_rate: float,\n",
+ " max_steps: int,\n",
+ " val_check_steps: int,\n",
+ " batch_size: int,\n",
+ " valid_batch_size: Union[int, None],\n",
+ " windows_batch_size: int,\n",
+ " inference_windows_batch_size: Union[int, None],\n",
+ " start_padding_enabled: bool,\n",
+ " n_series: Union[int, None] = None,\n",
+ " n_samples: Union[int, None] = 100,\n",
+ " h_train: int = 1,\n",
+ " inference_input_size: Union[int, None] = None,\n",
+ " step_size: int = 1,\n",
+ " num_lr_decays: int = 0,\n",
+ " early_stop_patience_steps: int = -1,\n",
+ " scaler_type: str = 'identity',\n",
+ " futr_exog_list: Union[List, None] = None,\n",
+ " hist_exog_list: Union[List, None] = None,\n",
+ " stat_exog_list: Union[List, None] = None,\n",
+ " exclude_insample_y: Union[bool, None] = False,\n",
+ " drop_last_loader: Union[bool, None] = False,\n",
+ " random_seed: Union[int, None] = 1,\n",
+ " alias: Union[str, None] = None,\n",
+ " optimizer: Union[torch.optim.Optimizer, None] = None,\n",
+ " optimizer_kwargs: Union[Dict, None] = None,\n",
+ " lr_scheduler: Union[torch.optim.lr_scheduler.LRScheduler, None] = None,\n",
+ " lr_scheduler_kwargs: Union[Dict, None] = None,\n",
+ " dataloader_kwargs=None,\n",
" **trainer_kwargs,\n",
" ):\n",
" super().__init__()\n",
+ "\n",
+ " # Multivarariate checks\n",
+ " if self.MULTIVARIATE and n_series is None:\n",
+ " raise Exception(f'{type(self).__name__} is a multivariate model. Please set n_series to the number of unique time series in your dataset.')\n",
+ " if not self.MULTIVARIATE:\n",
+ " if n_series is not None:\n",
+ " warnings.warn(\n",
+ " f'{type(self).__name__} is a univariate model. Parameter n_series is ignored.'\n",
+ " )\n",
+ " n_series = 1\n",
+ " self.n_series = n_series \n",
+ "\n",
+ " # Protections for previous recurrent models\n",
+ " if input_size < 1:\n",
+ " input_size = 3 * h\n",
+ " warnings.warn(\n",
+ " f'Input size too small. Automatically setting input size to 3 * horizon = {input_size}'\n",
+ " )\n",
+ "\n",
+ " if inference_input_size is None:\n",
+ " inference_input_size = input_size \n",
+ " elif inference_input_size is not None and inference_input_size < 1:\n",
+ " inference_input_size = input_size\n",
+ " warnings.warn(\n",
+ " f'Inference input size too small. Automatically setting inference input size to input_size = {input_size}'\n",
+ " )\n",
+ "\n",
+ " # For recurrent models we need one additional input as we need to shift insample_y to use it as input\n",
+ " if self.RECURRENT:\n",
+ " input_size += 1\n",
+ " inference_input_size += 1\n",
+ "\n",
+ " # Attributes needed for recurrent models\n",
+ " self.horizon_backup = h\n",
+ " self.input_size_backup = input_size\n",
+ " self.n_samples = n_samples\n",
+ " if self.RECURRENT:\n",
+ " if hasattr(loss, 'horizon_weight') and loss.horizon_weight is not None and h_train != h:\n",
+ " warnings.warn(f'Setting h_train={h} to match the horizon_weight length.') \n",
+ " h_train = h\n",
+ " self.h_train = h_train\n",
+ " self.inference_input_size = inference_input_size\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = False\n",
+ " \n",
" with warnings.catch_warnings(record=False):\n",
" warnings.filterwarnings('ignore')\n",
" # the following line issues a warning about the loss attribute being saved\n",
@@ -163,8 +236,8 @@
" self.valid_loss = loss\n",
" else:\n",
" self.valid_loss = valid_loss\n",
- " self.train_trajectories = []\n",
- " self.valid_trajectories = []\n",
+ " self.train_trajectories: List = []\n",
+ " self.valid_trajectories: List = []\n",
"\n",
" # Optimization\n",
" if optimizer is not None and not issubclass(optimizer, torch.optim.Optimizer):\n",
@@ -178,7 +251,6 @@
" self.lr_scheduler = lr_scheduler\n",
" self.lr_scheduler_kwargs = lr_scheduler_kwargs if lr_scheduler_kwargs is not None else {}\n",
"\n",
- "\n",
" # Variables\n",
" self.futr_exog_list = list(futr_exog_list) if futr_exog_list is not None else []\n",
" self.hist_exog_list = list(hist_exog_list) if hist_exog_list is not None else []\n",
@@ -197,12 +269,28 @@
" if not self.EXOGENOUS_STAT and self.stat_exog_size > 0:\n",
" raise Exception(f'{type(self).__name__} does not support static exogenous variables.')\n",
"\n",
- " # Implicit Quantile Loss\n",
- " if isinstance(self.loss, IQLoss):\n",
- " if not isinstance(self.valid_loss, IQLoss):\n",
- " raise Exception('Please set valid_loss to IQLoss() when training with IQLoss')\n",
- " if isinstance(self.valid_loss, IQLoss) and not isinstance(self.loss, IQLoss):\n",
- " raise Exception('Please set loss to IQLoss() when validating with IQLoss') \n",
+ " # Protections for loss functions\n",
+ " if isinstance(self.loss, (losses.IQLoss, losses.MQLoss, losses.HuberMQLoss)):\n",
+ " loss_type = type(self.loss)\n",
+ " if not isinstance(self.valid_loss, loss_type):\n",
+ " raise Exception(f'Please set valid_loss={type(self.loss).__name__}() when training with {type(self.loss).__name__}')\n",
+ " if isinstance(self.valid_loss, losses.IQLoss):\n",
+ " valid_loss_type = type(self.valid_loss)\n",
+ " if not isinstance(self.loss, valid_loss_type):\n",
+ " raise Exception(f'Please set loss={type(self.valid_loss).__name__}() when validating with {type(self.valid_loss).__name__}') \n",
+ "\n",
+ " # Deny impossible loss / valid_loss combinations\n",
+ " if isinstance(self.loss, losses.BasePointLoss) and self.valid_loss.is_distribution_output:\n",
+ " raise Exception(f'Validation with distribution loss {type(self.valid_loss).__name__} is not possible when using loss={type(self.loss).__name__}. Please use a point valid_loss (MAE, MSE, ...)')\n",
+ " elif self.valid_loss.is_distribution_output and self.valid_loss is not loss:\n",
+ " # Maybe we should raise a Warning or an Exception here, but meh for now.\n",
+ " self.valid_loss = loss\n",
+ " \n",
+ " if isinstance(self.loss, (losses.relMSE, losses.Accuracy, losses.sCRPS)):\n",
+ " raise Exception(f\"{type(self.loss).__name__} cannot be used for training. Please use another loss function (MAE, MSE, ...)\")\n",
+ " \n",
+ " if isinstance(self.valid_loss, (losses.relMSE)):\n",
+ " raise Exception(f\"{type(self.valid_loss).__name__} cannot be used for validation. Please use another valid_loss (MAE, MSE, ...)\")\n",
"\n",
" ## Trainer arguments ##\n",
" # Max steps, validation steps and check_val_every_n_epoch\n",
@@ -233,7 +321,72 @@
" if trainer_kwargs.get('enable_checkpointing', None) is None:\n",
" trainer_kwargs['enable_checkpointing'] = False\n",
"\n",
+ " # Set other attributes\n",
" self.trainer_kwargs = trainer_kwargs\n",
+ " self.h = h\n",
+ " self.input_size = input_size\n",
+ " self.windows_batch_size = windows_batch_size\n",
+ " self.start_padding_enabled = start_padding_enabled\n",
+ "\n",
+ " # Padder to complete train windows, \n",
+ " # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]\n",
+ " if start_padding_enabled:\n",
+ " self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0.0)\n",
+ " else:\n",
+ " self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
+ "\n",
+ " # Batch sizes\n",
+ " if self.MULTIVARIATE and n_series is not None:\n",
+ " self.batch_size = max(batch_size, n_series)\n",
+ " else:\n",
+ " self.batch_size = batch_size\n",
+ " if valid_batch_size is None:\n",
+ " self.valid_batch_size = batch_size\n",
+ " else:\n",
+ " self.valid_batch_size = valid_batch_size\n",
+ " if inference_windows_batch_size is None:\n",
+ " self.inference_windows_batch_size = windows_batch_size\n",
+ " else:\n",
+ " self.inference_windows_batch_size = inference_windows_batch_size\n",
+ "\n",
+ " # Optimization \n",
+ " self.learning_rate = learning_rate\n",
+ " self.max_steps = max_steps\n",
+ " self.num_lr_decays = num_lr_decays\n",
+ " self.lr_decay_steps = (\n",
+ " max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7\n",
+ " )\n",
+ " self.early_stop_patience_steps = early_stop_patience_steps\n",
+ " self.val_check_steps = val_check_steps\n",
+ " self.windows_batch_size = windows_batch_size\n",
+ " self.step_size = step_size\n",
+ " \n",
+ " # If the model does not support exogenous, it can't support exclude_insample_y\n",
+ " if exclude_insample_y and not (self.EXOGENOUS_FUTR or self.EXOGENOUS_HIST or self.EXOGENOUS_STAT):\n",
+ " raise Exception(f'{type(self).__name__} does not support `exclude_insample_y=True`. Please set `exclude_insample_y=False`')\n",
+ "\n",
+ " self.exclude_insample_y = exclude_insample_y\n",
+ "\n",
+ " # Scaler\n",
+ " self.scaler = TemporalNorm(\n",
+ " scaler_type=scaler_type,\n",
+ " dim=1, # Time dimension is 1.\n",
+ " num_features= 1 + len(self.hist_exog_list) + len(self.futr_exog_list)\n",
+ " )\n",
+ "\n",
+ " # Fit arguments\n",
+ " self.val_size = 0\n",
+ " self.test_size = 0\n",
+ "\n",
+ " # Model state\n",
+ " self.decompose_forecast = False\n",
+ "\n",
+ " # DataModule arguments\n",
+ " self.dataloader_kwargs = dataloader_kwargs\n",
+ " self.drop_last_loader = drop_last_loader\n",
+ " # used by on_validation_epoch_end hook\n",
+ " self.validation_step_outputs: List = []\n",
+ " self.alias = alias\n",
"\n",
" def __repr__(self):\n",
" return type(self).__name__ if self.alias is None else self.alias\n",
@@ -262,21 +415,11 @@
" set(temporal_cols.tolist()) & set(self.hist_exog_list + self.futr_exog_list)\n",
" )\n",
" \n",
- " def _set_quantile_for_iqloss(self, **data_module_kwargs):\n",
- " if \"quantile\" in data_module_kwargs:\n",
- " if not isinstance(self.loss, IQLoss):\n",
- " raise Exception(\n",
- " \"Please train with loss=IQLoss() to make use of the quantile argument.\"\n",
- " )\n",
- " else:\n",
- " self.quantile = data_module_kwargs[\"quantile\"]\n",
- " data_module_kwargs.pop(\"quantile\")\n",
- " self.loss.update_quantile(q=self.quantile)\n",
- " elif isinstance(self.loss, IQLoss):\n",
- " self.quantile = 0.5\n",
- " self.loss.update_quantile(q=self.quantile)\n",
- "\n",
- " return data_module_kwargs\n",
+ " def _set_quantiles(self, quantiles=None):\n",
+ " if quantiles is None and isinstance(self.loss, losses.IQLoss):\n",
+ " self.loss.update_quantile(q=[0.5])\n",
+ " elif hasattr(self.loss, 'update_quantile') and callable(self.loss.update_quantile):\n",
+ " self.loss.update_quantile(q=quantiles)\n",
"\n",
" def _fit_distributed(\n",
" self,\n",
@@ -490,7 +633,793 @@
" model.load_state_dict(content[\"state_dict\"], strict=True, assign=True)\n",
" else: # pytorch<2.1\n",
" model.load_state_dict(content[\"state_dict\"], strict=True)\n",
- " return model"
+ " return model\n",
+ "\n",
+ " def _create_windows(self, batch, step, w_idxs=None):\n",
+ " # Parse common data\n",
+ " window_size = self.input_size + self.h\n",
+ " temporal_cols = batch['temporal_cols']\n",
+ " temporal = batch['temporal'] \n",
+ "\n",
+ " if step == 'train':\n",
+ " if self.val_size + self.test_size > 0:\n",
+ " cutoff = -self.val_size - self.test_size\n",
+ " temporal = temporal[:, :, :cutoff]\n",
+ "\n",
+ " temporal = self.padder_train(temporal)\n",
+ " \n",
+ " if temporal.shape[-1] < window_size:\n",
+ " raise Exception('Time series is too short for training, consider setting a smaller input size or set start_padding_enabled=True')\n",
+ " \n",
+ " windows = temporal.unfold(dimension=-1, \n",
+ " size=window_size, \n",
+ " step=self.step_size)\n",
+ "\n",
+ " if self.MULTIVARIATE:\n",
+ " # [n_series, C, Ws, L + h] -> [Ws, L + h, C, n_series]\n",
+ " windows = windows.permute(2, 3, 1, 0)\n",
+ " else:\n",
+ " # [n_series, C, Ws, L + h] -> [Ws * n_series, L + h, C, 1]\n",
+ " windows_per_serie = windows.shape[2]\n",
+ " windows = windows.permute(0, 2, 3, 1)\n",
+ " windows = windows.flatten(0, 1)\n",
+ " windows = windows.unsqueeze(-1)\n",
+ "\n",
+ " # Sample and Available conditions\n",
+ " available_idx = temporal_cols.get_loc('available_mask') \n",
+ " available_condition = windows[:, :self.input_size, available_idx]\n",
+ " available_condition = torch.sum(available_condition, axis=(1, -1)) # Sum over time & series dimension\n",
+ " final_condition = (available_condition > 0)\n",
+ " \n",
+ " if self.h > 0:\n",
+ " sample_condition = windows[:, self.input_size:, available_idx]\n",
+ " sample_condition = torch.sum(sample_condition, axis=(1, -1)) # Sum over time & series dimension\n",
+ " final_condition = (sample_condition > 0) & (available_condition > 0)\n",
+ " \n",
+ " windows = windows[final_condition]\n",
+ " \n",
+ " # Parse Static data to match windows\n",
+ " static = batch.get('static', None)\n",
+ " static_cols=batch.get('static_cols', None)\n",
+ "\n",
+ " # Repeat static if univariate: [n_series, S] -> [Ws * n_series, S]\n",
+ " if static is not None and not self.MULTIVARIATE:\n",
+ " static = torch.repeat_interleave(static, \n",
+ " repeats=windows_per_serie, dim=0)\n",
+ " static = static[final_condition] \n",
+ "\n",
+ " # Protection of empty windows\n",
+ " if final_condition.sum() == 0:\n",
+ " raise Exception('No windows available for training')\n",
+ "\n",
+ " # Sample windows\n",
+ " if self.windows_batch_size is not None:\n",
+ " n_windows = windows.shape[0]\n",
+ " w_idxs = np.random.choice(n_windows, \n",
+ " size=self.windows_batch_size,\n",
+ " replace=(n_windows < self.windows_batch_size))\n",
+ " windows = windows[w_idxs]\n",
+ " \n",
+ " if static is not None and not self.MULTIVARIATE:\n",
+ " static = static[w_idxs]\n",
+ "\n",
+ " windows_batch = dict(temporal=windows,\n",
+ " temporal_cols=temporal_cols,\n",
+ " static=static,\n",
+ " static_cols=static_cols)\n",
+ " return windows_batch\n",
+ "\n",
+ " elif step in ['predict', 'val']:\n",
+ "\n",
+ " if step == 'predict':\n",
+ " initial_input = temporal.shape[-1] - self.test_size\n",
+ " if initial_input <= self.input_size: # There is not enough data to predict first timestamp\n",
+ " temporal = F.pad(temporal, pad=(self.input_size-initial_input, 0), mode=\"constant\", value=0.0)\n",
+ " predict_step_size = self.predict_step_size\n",
+ " cutoff = - self.input_size - self.test_size\n",
+ " temporal = temporal[:, :, cutoff:]\n",
+ "\n",
+ " elif step == 'val':\n",
+ " predict_step_size = self.step_size\n",
+ " cutoff = -self.input_size - self.val_size - self.test_size\n",
+ " if self.test_size > 0:\n",
+ " temporal = batch['temporal'][:, :, cutoff:-self.test_size]\n",
+ " else:\n",
+ " temporal = batch['temporal'][:, :, cutoff:]\n",
+ " if temporal.shape[-1] < window_size:\n",
+ " initial_input = temporal.shape[-1] - self.val_size\n",
+ " temporal = F.pad(temporal, pad=(self.input_size-initial_input, 0), mode=\"constant\", value=0.0)\n",
+ "\n",
+ " if (step=='predict') and (self.test_size==0) and (len(self.futr_exog_list)==0):\n",
+ " temporal = F.pad(temporal, pad=(0, self.h), mode=\"constant\", value=0.0)\n",
+ "\n",
+ " windows = temporal.unfold(dimension=-1,\n",
+ " size=window_size,\n",
+ " step=predict_step_size)\n",
+ "\n",
+ " static = batch.get('static', None)\n",
+ " static_cols=batch.get('static_cols', None)\n",
+ "\n",
+ " if self.MULTIVARIATE:\n",
+ " # [n_series, C, Ws, L + h] -> [Ws, L + h, C, n_series]\n",
+ " windows = windows.permute(2, 3, 1, 0)\n",
+ " else:\n",
+ " # [n_series, C, Ws, L + h] -> [Ws * n_series, L + h, C, 1]\n",
+ " windows_per_serie = windows.shape[2]\n",
+ " windows = windows.permute(0, 2, 3, 1)\n",
+ " windows = windows.flatten(0, 1)\n",
+ " windows = windows.unsqueeze(-1)\n",
+ " if static is not None:\n",
+ " static = torch.repeat_interleave(static, \n",
+ " repeats=windows_per_serie, dim=0)\n",
+ "\n",
+ " # Sample windows for batched prediction\n",
+ " if w_idxs is not None:\n",
+ " windows = windows[w_idxs]\n",
+ " if static is not None and not self.MULTIVARIATE:\n",
+ " static = static[w_idxs]\n",
+ "\n",
+ " windows_batch = dict(temporal=windows,\n",
+ " temporal_cols=temporal_cols,\n",
+ " static=static,\n",
+ " static_cols=static_cols)\n",
+ " return windows_batch\n",
+ " else:\n",
+ " raise ValueError(f'Unknown step {step}') \n",
+ "\n",
+ " def _normalization(self, windows, y_idx):\n",
+ " # windows are already filtered by train/validation/test\n",
+ " # from the `create_windows_method` nor leakage risk\n",
+ " temporal = windows['temporal'] # [Ws, L + h, C, n_series]\n",
+ " temporal_cols = windows['temporal_cols'].copy() # [Ws, L + h, C, n_series]\n",
+ "\n",
+ " # To avoid leakage uses only the lags\n",
+ " temporal_data_cols = self._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
+ " temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)\n",
+ " temporal_idxs = np.append(y_idx, temporal_idxs)\n",
+ " temporal_data = temporal[:, :, temporal_idxs] \n",
+ " temporal_mask = temporal[:, :, temporal_cols.get_loc('available_mask')].clone()\n",
+ " if self.h > 0:\n",
+ " temporal_mask[:, -self.h:] = 0.0\n",
+ "\n",
+ " # Normalize. self.scaler stores the shift and scale for inverse transform\n",
+ " temporal_mask = temporal_mask.unsqueeze(2) # Add channel dimension for scaler.transform.\n",
+ " temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)\n",
+ "\n",
+ " # Replace values in windows dict\n",
+ " temporal[:, :, temporal_idxs] = temporal_data\n",
+ " windows['temporal'] = temporal\n",
+ "\n",
+ " return windows\n",
+ "\n",
+ " def _inv_normalization(self, y_hat, y_idx):\n",
+ " # Receives window predictions [Ws, h, output, n_series]\n",
+ " # Broadcasts scale if necessary and inverts normalization\n",
+ " add_channel_dim = y_hat.ndim > 3\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx, add_channel_dim=add_channel_dim)\n",
+ " y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)\n",
+ "\n",
+ " return y_hat\n",
+ "\n",
+ " def _parse_windows(self, batch, windows):\n",
+ " # windows: [Ws, L + h, C, n_series]\n",
+ "\n",
+ " # Filter insample lags from outsample horizon\n",
+ " y_idx = batch['y_idx']\n",
+ " mask_idx = batch['temporal_cols'].get_loc('available_mask')\n",
+ "\n",
+ " insample_y = windows['temporal'][:, :self.input_size, y_idx]\n",
+ " insample_mask = windows['temporal'][:, :self.input_size, mask_idx]\n",
+ "\n",
+ " # Declare additional information\n",
+ " outsample_y = None\n",
+ " outsample_mask = None\n",
+ " hist_exog = None\n",
+ " futr_exog = None\n",
+ " stat_exog = None\n",
+ "\n",
+ " if self.h > 0:\n",
+ " outsample_y = windows['temporal'][:, self.input_size:, y_idx]\n",
+ " outsample_mask = windows['temporal'][:, self.input_size:, mask_idx]\n",
+ "\n",
+ " # Recurrent models at t predict t+1, so we shift the input (insample_y) by one\n",
+ " if self.RECURRENT:\n",
+ " insample_y = torch.cat((insample_y, outsample_y[:, :-1]), dim=1)\n",
+ " insample_mask = torch.cat((insample_mask, outsample_mask[:, :-1]), dim=1)\n",
+ " self.maintain_state = False\n",
+ "\n",
+ " if len(self.hist_exog_list):\n",
+ " hist_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.hist_exog_list)\n",
+ " if self.RECURRENT:\n",
+ " hist_exog = windows['temporal'][:, :, hist_exog_idx]\n",
+ " hist_exog[:, self.input_size:] = 0.0\n",
+ " hist_exog = hist_exog[:, 1:]\n",
+ " else:\n",
+ " hist_exog = windows['temporal'][:, :self.input_size, hist_exog_idx]\n",
+ " if not self.MULTIVARIATE:\n",
+ " hist_exog = hist_exog.squeeze(-1)\n",
+ " else:\n",
+ " hist_exog = hist_exog.swapaxes(1, 2)\n",
+ "\n",
+ " if len(self.futr_exog_list):\n",
+ " futr_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.futr_exog_list)\n",
+ " futr_exog = windows['temporal'][:, :, futr_exog_idx]\n",
+ " if self.RECURRENT:\n",
+ " futr_exog = futr_exog[:, 1:]\n",
+ " if not self.MULTIVARIATE:\n",
+ " futr_exog = futr_exog.squeeze(-1)\n",
+ " else:\n",
+ " futr_exog = futr_exog.swapaxes(1, 2) \n",
+ "\n",
+ " if len(self.stat_exog_list):\n",
+ " static_idx = get_indexer_raise_missing(windows['static_cols'], self.stat_exog_list)\n",
+ " stat_exog = windows['static'][:, static_idx]\n",
+ "\n",
+ " # TODO: think a better way of removing insample_y features\n",
+ " if self.exclude_insample_y:\n",
+ " insample_y = insample_y * 0\n",
+ "\n",
+ " return insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
+ " hist_exog, futr_exog, stat_exog \n",
+ "\n",
+ " def _get_loc_scale(self, y_idx, add_channel_dim=False):\n",
+ " # [B, L, C, n_series] -> [B, L, n_series]\n",
+ " y_scale = self.scaler.x_scale[:, :, y_idx]\n",
+ " y_loc = self.scaler.x_shift[:, :, y_idx]\n",
+ " \n",
+ " # [B, L, n_series] -> [B, L, n_series, 1]\n",
+ " if add_channel_dim:\n",
+ " y_scale = y_scale.unsqueeze(-1)\n",
+ " y_loc = y_loc.unsqueeze(-1)\n",
+ "\n",
+ " return y_loc, y_scale\n",
+ "\n",
+ " def _compute_valid_loss(self, insample_y, outsample_y, output, outsample_mask, y_idx):\n",
+ " if self.loss.is_distribution_output:\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
+ " if isinstance(self.valid_loss, (losses.sCRPS, losses.MQLoss, losses.HuberMQLoss)):\n",
+ " _, _, quants = self.loss.sample(distr_args=distr_args) \n",
+ " output = quants\n",
+ " elif isinstance(self.valid_loss, losses.BasePointLoss):\n",
+ " distr = self.loss.get_distribution(distr_args=distr_args)\n",
+ " output = distr.mean\n",
+ "\n",
+ " # Validation Loss evaluation\n",
+ " if self.valid_loss.is_distribution_output:\n",
+ " valid_loss = self.valid_loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
+ " else:\n",
+ " output = self._inv_normalization(y_hat=output, y_idx=y_idx)\n",
+ " valid_loss = self.valid_loss(y=outsample_y, y_hat=output, y_insample=insample_y, mask=outsample_mask)\n",
+ " return valid_loss\n",
+ " \n",
+ " def _validate_step_recurrent_batch(self, insample_y, insample_mask, futr_exog, hist_exog, stat_exog, y_idx):\n",
+ " # Remember state in network and set horizon to 1\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = True\n",
+ " self.h = 1\n",
+ "\n",
+ " # Initialize results array\n",
+ " n_outputs = self.loss.outputsize_multiplier\n",
+ " y_hat = torch.zeros((insample_y.shape[0],\n",
+ " self.horizon_backup,\n",
+ " self.n_series * n_outputs),\n",
+ " device=insample_y.device,\n",
+ " dtype=insample_y.dtype)\n",
+ "\n",
+ " # First step prediction\n",
+ " tau = 0\n",
+ " \n",
+ " # Set exogenous\n",
+ " hist_exog_current = None\n",
+ " if self.hist_exog_size > 0:\n",
+ " hist_exog_current = hist_exog[:, :self.input_size + tau - 1]\n",
+ "\n",
+ " futr_exog_current = None\n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_current = futr_exog[:, :self.input_size + tau - 1]\n",
+ "\n",
+ " # First forecast step\n",
+ " y_hat[:, tau], insample_y = self._validate_step_recurrent_single(\n",
+ " insample_y=insample_y[:, :self.input_size + tau - 1],\n",
+ " insample_mask=insample_mask[:, :self.input_size + tau - 1],\n",
+ " hist_exog=hist_exog_current,\n",
+ " futr_exog=futr_exog_current,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx,\n",
+ " )\n",
+ "\n",
+ " # Horizon prediction recursively\n",
+ " for tau in range(self.horizon_backup):\n",
+ " # Set exogenous\n",
+ " if self.hist_exog_size > 0:\n",
+ " hist_exog_current = hist_exog[:, self.input_size + tau - 1].unsqueeze(1)\n",
+ "\n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_current = futr_exog[:, self.input_size + tau - 1].unsqueeze(1)\n",
+ " \n",
+ " y_hat[:, tau], insample_y = self._validate_step_recurrent_single(\n",
+ " insample_y=insample_y,\n",
+ " insample_mask=None,\n",
+ " hist_exog=hist_exog_current,\n",
+ " futr_exog=futr_exog_current,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx = y_idx,\n",
+ " )\n",
+ " \n",
+ " # Reset state and horizon\n",
+ " self.maintain_state = False\n",
+ " self.rnn_state = None\n",
+ " self.h = self.horizon_backup\n",
+ "\n",
+ " return y_hat \n",
+ "\n",
+ " def _validate_step_recurrent_single(self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx):\n",
+ " # Input sequence\n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ "\n",
+ " # Model Predictions\n",
+ " output_batch_unmapped = self(windows_batch)\n",
+ " output_batch = self.loss.domain_map(output_batch_unmapped)\n",
+ " \n",
+ " # Inverse normalization and sampling\n",
+ " if self.loss.is_distribution_output:\n",
+ " # Sample distribution\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " distr_args = self.loss.scale_decouple(output=output_batch, loc=y_loc, scale=y_scale)\n",
+ " # When validating, the output is the mean of the distribution which is an attribute\n",
+ " distr = self.loss.get_distribution(distr_args=distr_args)\n",
+ "\n",
+ " # Scale back to feed back as input\n",
+ " insample_y = self.scaler.scaler(distr.mean, y_loc, y_scale)\n",
+ " else:\n",
+ " # Todo: for now, we assume that in case of a BasePointLoss with ndim==4, the last dimension\n",
+ " # contains a set of predictions for the target (e.g. MQLoss multiple quantiles), for which we use the \n",
+ " # mean as feedback signal for the recurrent predictions. A more precise way is to increase the\n",
+ " # insample input size of the recurrent network by the number of outputs so that each output\n",
+ " # can be fed back to a specific input channel. \n",
+ " if output_batch.ndim == 4:\n",
+ " output_batch = output_batch.mean(dim=-1)\n",
+ "\n",
+ " insample_y = output_batch\n",
+ "\n",
+ " # Remove horizon dim: [B, 1, N * n_outputs] -> [B, N * n_outputs]\n",
+ " y_hat = output_batch_unmapped.squeeze(1)\n",
+ " return y_hat, insample_y\n",
+ "\n",
+ " def _predict_step_recurrent_batch(self, insample_y, insample_mask, futr_exog, hist_exog, stat_exog, y_idx):\n",
+ " # Remember state in network and set horizon to 1\n",
+ " self.rnn_state = None\n",
+ " self.maintain_state = True\n",
+ " self.h = 1\n",
+ "\n",
+ " # Initialize results array\n",
+ " n_outputs = len(self.loss.output_names)\n",
+ " y_hat = torch.zeros((insample_y.shape[0],\n",
+ " self.horizon_backup,\n",
+ " self.n_series,\n",
+ " n_outputs),\n",
+ " device=insample_y.device,\n",
+ " dtype=insample_y.dtype)\n",
+ "\n",
+ " # First step prediction\n",
+ " tau = 0\n",
+ " \n",
+ " # Set exogenous\n",
+ " hist_exog_current = None\n",
+ " if self.hist_exog_size > 0:\n",
+ " hist_exog_current = hist_exog[:, :self.input_size + tau - 1]\n",
+ "\n",
+ " futr_exog_current = None\n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_current = futr_exog[:, :self.input_size + tau - 1]\n",
+ "\n",
+ " # First forecast step\n",
+ " y_hat[:, tau], insample_y = self._predict_step_recurrent_single(\n",
+ " insample_y=insample_y[:, :self.input_size + tau - 1],\n",
+ " insample_mask=insample_mask[:, :self.input_size + tau - 1],\n",
+ " hist_exog=hist_exog_current,\n",
+ " futr_exog=futr_exog_current,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx,\n",
+ " )\n",
+ "\n",
+ " # Horizon prediction recursively\n",
+ " for tau in range(self.horizon_backup):\n",
+ " # Set exogenous\n",
+ " if self.hist_exog_size > 0:\n",
+ " hist_exog_current = hist_exog[:, self.input_size + tau - 1].unsqueeze(1)\n",
+ "\n",
+ " if self.futr_exog_size > 0:\n",
+ " futr_exog_current = futr_exog[:, self.input_size + tau - 1].unsqueeze(1)\n",
+ " \n",
+ " y_hat[:, tau], insample_y = self._predict_step_recurrent_single(\n",
+ " insample_y=insample_y,\n",
+ " insample_mask=None,\n",
+ " hist_exog=hist_exog_current,\n",
+ " futr_exog=futr_exog_current,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx = y_idx,\n",
+ " )\n",
+ " \n",
+ " # Reset state and horizon\n",
+ " self.maintain_state = False\n",
+ " self.rnn_state = None\n",
+ " self.h = self.horizon_backup\n",
+ "\n",
+ " # Squeeze for univariate case\n",
+ " if not self.MULTIVARIATE:\n",
+ " y_hat = y_hat.squeeze(2)\n",
+ "\n",
+ " return y_hat \n",
+ "\n",
+ " def _predict_step_recurrent_single(self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx):\n",
+ " # Input sequence\n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ "\n",
+ " # Model Predictions\n",
+ " output_batch_unmapped = self(windows_batch)\n",
+ " output_batch = self.loss.domain_map(output_batch_unmapped)\n",
+ " \n",
+ " # Inverse normalization and sampling\n",
+ " if self.loss.is_distribution_output:\n",
+ " # Sample distribution\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " distr_args = self.loss.scale_decouple(output=output_batch, loc=y_loc, scale=y_scale)\n",
+ " # When predicting, we need to sample to get the quantiles. The mean is an attribute.\n",
+ " _, _, quants = self.loss.sample(distr_args=distr_args, num_samples=self.n_samples)\n",
+ " mean = self.loss.distr_mean\n",
+ "\n",
+ " # Scale back to feed back as input\n",
+ " insample_y = self.scaler.scaler(mean, y_loc, y_scale)\n",
+ " \n",
+ " # Save predictions\n",
+ " y_hat = torch.concat((mean.unsqueeze(-1), quants), axis=-1)\n",
+ "\n",
+ " if self.loss.return_params:\n",
+ " distr_args = torch.stack(distr_args, dim=-1)\n",
+ " if distr_args.ndim > 4:\n",
+ " distr_args = distr_args.flatten(-2, -1)\n",
+ " y_hat = torch.concat((y_hat, distr_args), axis=-1)\n",
+ " else:\n",
+ " # Todo: for now, we assume that in case of a BasePointLoss with ndim==4, the last dimension\n",
+ " # contains a set of predictions for the target (e.g. MQLoss multiple quantiles), for which we use the \n",
+ " # mean as feedback signal for the recurrent predictions. A more precise way is to increase the\n",
+ " # insample input size of the recurrent network by the number of outputs so that each output\n",
+ " # can be fed back to a specific input channel. \n",
+ " if output_batch.ndim == 4:\n",
+ " output_batch = output_batch.mean(dim=-1)\n",
+ "\n",
+ " insample_y = output_batch\n",
+ " y_hat = self._inv_normalization(y_hat=output_batch, y_idx=y_idx)\n",
+ " y_hat = y_hat.unsqueeze(-1)\n",
+ "\n",
+ " # Remove horizon dim: [B, 1, N, n_outputs] -> [B, N, n_outputs]\n",
+ " y_hat = y_hat.squeeze(1)\n",
+ " return y_hat, insample_y\n",
+ "\n",
+ " def _predict_step_direct_batch(self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx):\n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ "\n",
+ " # Model Predictions\n",
+ " output_batch = self(windows_batch)\n",
+ " output_batch = self.loss.domain_map(output_batch)\n",
+ "\n",
+ " # Inverse normalization and sampling\n",
+ " if self.loss.is_distribution_output:\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " distr_args = self.loss.scale_decouple(output=output_batch, loc=y_loc, scale=y_scale)\n",
+ " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
+ " y_hat = torch.concat((sample_mean, quants), axis=-1)\n",
+ "\n",
+ " if self.loss.return_params:\n",
+ " distr_args = torch.stack(distr_args, dim=-1)\n",
+ " if distr_args.ndim > 4:\n",
+ " distr_args = distr_args.flatten(-2, -1)\n",
+ " y_hat = torch.concat((y_hat, distr_args), axis=-1) \n",
+ " else:\n",
+ " y_hat = self._inv_normalization(y_hat=output_batch, \n",
+ " y_idx=y_idx)\n",
+ "\n",
+ " return y_hat\n",
+ " \n",
+ " def training_step(self, batch, batch_idx):\n",
+ " # Set horizon to h_train in case of recurrent model to speed up training\n",
+ " if self.RECURRENT:\n",
+ " self.h = self.h_train\n",
+ " \n",
+ " # windows: [Ws, L + h, C, n_series] or [Ws, L + h, C]\n",
+ " y_idx = batch['y_idx']\n",
+ "\n",
+ " windows = self._create_windows(batch, step='train')\n",
+ " original_outsample_y = torch.clone(windows['temporal'][:, self.input_size:, y_idx])\n",
+ " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
+ " \n",
+ " # Parse windows\n",
+ " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
+ " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
+ "\n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ "\n",
+ " # Model Predictions\n",
+ " output = self(windows_batch)\n",
+ " output = self.loss.domain_map(output)\n",
+ " \n",
+ " if self.loss.is_distribution_output:\n",
+ " y_loc, y_scale = self._get_loc_scale(y_idx)\n",
+ " outsample_y = original_outsample_y\n",
+ " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
+ " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
+ " else:\n",
+ " loss = self.loss(y=outsample_y, y_hat=output, y_insample=insample_y, mask=outsample_mask)\n",
+ "\n",
+ " if torch.isnan(loss):\n",
+ " print('Model Parameters', self.hparams)\n",
+ " print('insample_y', torch.isnan(insample_y).sum())\n",
+ " print('outsample_y', torch.isnan(outsample_y).sum())\n",
+ " raise Exception('Loss is NaN, training stopped.')\n",
+ "\n",
+ " train_loss_log = loss.detach().item()\n",
+ " self.log(\n",
+ " 'train_loss',\n",
+ " train_loss_log,\n",
+ " batch_size=outsample_y.size(0),\n",
+ " prog_bar=True,\n",
+ " on_epoch=True,\n",
+ " )\n",
+ " self.train_trajectories.append((self.global_step, train_loss_log))\n",
+ "\n",
+ " self.h = self.horizon_backup\n",
+ "\n",
+ " return loss\n",
+ "\n",
+ "\n",
+ " def validation_step(self, batch, batch_idx):\n",
+ " if self.val_size == 0:\n",
+ " return np.nan\n",
+ "\n",
+ " # TODO: Hack to compute number of windows\n",
+ " windows = self._create_windows(batch, step='val')\n",
+ " n_windows = len(windows['temporal'])\n",
+ " y_idx = batch['y_idx']\n",
+ "\n",
+ " # Number of windows in batch\n",
+ " windows_batch_size = self.inference_windows_batch_size\n",
+ " if windows_batch_size < 0:\n",
+ " windows_batch_size = n_windows\n",
+ " n_batches = int(np.ceil(n_windows / windows_batch_size))\n",
+ "\n",
+ " valid_losses = []\n",
+ " batch_sizes = []\n",
+ " for i in range(n_batches):\n",
+ " # Create and normalize windows [Ws, L + h, C, n_series]\n",
+ " w_idxs = np.arange(i*windows_batch_size, \n",
+ " min((i+1)*windows_batch_size, n_windows))\n",
+ " windows = self._create_windows(batch, step='val', w_idxs=w_idxs)\n",
+ " original_outsample_y = torch.clone(windows['temporal'][:, self.input_size:, y_idx])\n",
+ "\n",
+ " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
+ "\n",
+ " # Parse windows\n",
+ " insample_y, insample_mask, _, outsample_mask, \\\n",
+ " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
+ "\n",
+ " if self.RECURRENT:\n",
+ " output_batch = self._validate_step_recurrent_batch(insample_y=insample_y,\n",
+ " insample_mask=insample_mask,\n",
+ " futr_exog=futr_exog,\n",
+ " hist_exog=hist_exog,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx)\n",
+ " else: \n",
+ " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
+ " insample_mask=insample_mask, # [Ws, L, n_series]\n",
+ " futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]\n",
+ " hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]\n",
+ " stat_exog=stat_exog) # univariate: [Ws, S]; multivariate: [n_series, S]\n",
+ " \n",
+ " # Model Predictions\n",
+ " output_batch = self(windows_batch) \n",
+ "\n",
+ " output_batch = self.loss.domain_map(output_batch)\n",
+ " valid_loss_batch = self._compute_valid_loss(insample_y=insample_y,\n",
+ " outsample_y=original_outsample_y,\n",
+ " output=output_batch, \n",
+ " outsample_mask=outsample_mask,\n",
+ " y_idx=batch['y_idx'])\n",
+ " valid_losses.append(valid_loss_batch)\n",
+ " batch_sizes.append(len(output_batch))\n",
+ " \n",
+ " valid_loss = torch.stack(valid_losses)\n",
+ " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n",
+ " batch_size = torch.sum(batch_sizes)\n",
+ " valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size\n",
+ "\n",
+ " if torch.isnan(valid_loss):\n",
+ " raise Exception('Loss is NaN, training stopped.')\n",
+ "\n",
+ " valid_loss_log = valid_loss.detach()\n",
+ " self.log(\n",
+ " 'valid_loss',\n",
+ " valid_loss_log.item(),\n",
+ " batch_size=batch_size,\n",
+ " prog_bar=True,\n",
+ " on_epoch=True,\n",
+ " )\n",
+ " self.validation_step_outputs.append(valid_loss_log)\n",
+ " return valid_loss\n",
+ "\n",
+ " def predict_step(self, batch, batch_idx):\n",
+ " if self.RECURRENT:\n",
+ " self.input_size = self.inference_input_size\n",
+ "\n",
+ " # TODO: Hack to compute number of windows\n",
+ " windows = self._create_windows(batch, step='predict')\n",
+ " n_windows = len(windows['temporal'])\n",
+ " y_idx = batch['y_idx']\n",
+ "\n",
+ " # Number of windows in batch\n",
+ " windows_batch_size = self.inference_windows_batch_size\n",
+ " if windows_batch_size < 0:\n",
+ " windows_batch_size = n_windows\n",
+ " n_batches = int(np.ceil(n_windows / windows_batch_size))\n",
+ " y_hats = []\n",
+ " for i in range(n_batches):\n",
+ " # Create and normalize windows [Ws, L+H, C]\n",
+ " w_idxs = np.arange(i*windows_batch_size, \n",
+ " min((i+1)*windows_batch_size, n_windows))\n",
+ " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n",
+ " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
+ "\n",
+ " # Parse windows\n",
+ " insample_y, insample_mask, _, _, \\\n",
+ " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
+ "\n",
+ " if self.RECURRENT: \n",
+ " y_hat = self._predict_step_recurrent_batch(insample_y=insample_y,\n",
+ " insample_mask=insample_mask,\n",
+ " futr_exog=futr_exog,\n",
+ " hist_exog=hist_exog,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx)\n",
+ " else:\n",
+ " y_hat = self._predict_step_direct_batch(insample_y=insample_y,\n",
+ " insample_mask=insample_mask,\n",
+ " futr_exog=futr_exog,\n",
+ " hist_exog=hist_exog,\n",
+ " stat_exog=stat_exog,\n",
+ " y_idx=y_idx) \n",
+ "\n",
+ "\n",
+ " y_hats.append(y_hat)\n",
+ " y_hat = torch.cat(y_hats, dim=0)\n",
+ " self.input_size = self.input_size_backup\n",
+ "\n",
+ " return y_hat\n",
+ " \n",
+ " def fit(self, dataset, val_size=0, test_size=0, random_seed=None, distributed_config=None):\n",
+ " \"\"\" Fit.\n",
+ "\n",
+ " The `fit` method, optimizes the neural network's weights using the\n",
+ " initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
+ " and the `loss` function as defined during the initialization. \n",
+ " Within `fit` we use a PyTorch Lightning `Trainer` that\n",
+ " inherits the initialization's `self.trainer_kwargs`, to customize\n",
+ " its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
+ "\n",
+ " The method is designed to be compatible with SKLearn-like classes\n",
+ " and in particular to be compatible with the StatsForecast library.\n",
+ "\n",
+ " By default the `model` is not saving training checkpoints to protect \n",
+ " disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
+ "\n",
+ " **Parameters:** \n",
+ " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
+ " `val_size`: int, validation size for temporal cross-validation. \n",
+ " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s. \n",
+ " `test_size`: int, test size for temporal cross-validation. \n",
+ " \"\"\"\n",
+ " return self._fit(\n",
+ " dataset=dataset,\n",
+ " batch_size=self.batch_size,\n",
+ " valid_batch_size=self.valid_batch_size,\n",
+ " val_size=val_size,\n",
+ " test_size=test_size,\n",
+ " random_seed=random_seed,\n",
+ " distributed_config=distributed_config,\n",
+ " )\n",
+ "\n",
+ " def predict(self, dataset, test_size=None, step_size=1,\n",
+ " random_seed=None, quantiles=None, **data_module_kwargs):\n",
+ " \"\"\" Predict.\n",
+ "\n",
+ " Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
+ "\n",
+ " **Parameters:** \n",
+ " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
+ " `test_size`: int=None, test size for temporal cross-validation. \n",
+ " `step_size`: int=1, Step size between each window. \n",
+ " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s. \n",
+ " `quantiles`: list of floats, optional (default=None), target quantiles to predict. \n",
+ " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
+ " \"\"\"\n",
+ " self._check_exog(dataset)\n",
+ " self._restart_seed(random_seed)\n",
+ " if \"quantile\" in data_module_kwargs:\n",
+ " warnings.warn(\"The 'quantile' argument will be deprecated, use 'quantiles' instead.\")\n",
+ " if quantiles is not None:\n",
+ " raise ValueError(\"You can't specify quantile and quantiles.\")\n",
+ " quantiles = [data_module_kwargs.pop(\"quantile\")]\n",
+ " self._set_quantiles(quantiles)\n",
+ "\n",
+ " self.predict_step_size = step_size\n",
+ " self.decompose_forecast = False\n",
+ " datamodule = TimeSeriesDataModule(dataset=dataset,\n",
+ " valid_batch_size=self.valid_batch_size,\n",
+ " **data_module_kwargs)\n",
+ "\n",
+ " # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.\n",
+ " pred_trainer_kwargs = self.trainer_kwargs.copy()\n",
+ " if (pred_trainer_kwargs.get('accelerator', None) == \"gpu\") and (torch.cuda.device_count() > 1):\n",
+ " pred_trainer_kwargs['devices'] = [0]\n",
+ "\n",
+ " trainer = pl.Trainer(**pred_trainer_kwargs)\n",
+ " fcsts = trainer.predict(self, datamodule=datamodule) \n",
+ " fcsts = torch.vstack(fcsts)\n",
+ "\n",
+ " if self.MULTIVARIATE:\n",
+ " # [B, h, n_series (, Q)] -> [n_series, B, h (, Q)]\n",
+ " fcsts = fcsts.swapaxes(0, 2)\n",
+ " fcsts = fcsts.swapaxes(1, 2)\n",
+ "\n",
+ " fcsts = tensor_to_numpy(fcsts).flatten()\n",
+ " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
+ " return fcsts\n",
+ "\n",
+ " def decompose(self, dataset, step_size=1, random_seed=None, quantiles=None, **data_module_kwargs):\n",
+ " \"\"\" Decompose Predictions.\n",
+ "\n",
+ " Decompose the predictions through the network's layers.\n",
+ " Available methods are `ESRNN`, `NHITS`, `NBEATS`, and `NBEATSx`.\n",
+ "\n",
+ " **Parameters:** \n",
+ " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation here](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
+ " `step_size`: int=1, step size between each window of temporal data. \n",
+ " `quantiles`: list of floats, optional (default=None), target quantiles to predict. \n",
+ " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
+ " \"\"\"\n",
+ " # Restart random seed\n",
+ " if random_seed is None:\n",
+ " random_seed = self.random_seed\n",
+ " torch.manual_seed(random_seed)\n",
+ " self._set_quantiles(quantiles)\n",
+ "\n",
+ " self.predict_step_size = step_size\n",
+ " self.decompose_forecast = True\n",
+ " datamodule = TimeSeriesDataModule(dataset=dataset,\n",
+ " valid_batch_size=self.valid_batch_size,\n",
+ " **data_module_kwargs)\n",
+ " trainer = pl.Trainer(**self.trainer_kwargs)\n",
+ " fcsts = trainer.predict(self, datamodule=datamodule)\n",
+ " self.decompose_forecast = False # Default decomposition back to false\n",
+ " fcsts = torch.vstack(fcsts)\n",
+ " return tensor_to_numpy(fcsts) "
]
}
],
diff --git a/nbs/common.base_multivariate.ipynb b/nbs/common.base_multivariate.ipynb
deleted file mode 100644
index 2096e8350..000000000
--- a/nbs/common.base_multivariate.ipynb
+++ /dev/null
@@ -1,623 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| default_exp common._base_multivariate"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# BaseMultivariate\n",
- "\n",
- "> The `BaseWindows` class contains standard methods shared across window-based multivariate neural networks; in contrast to recurrent neural networks these models commit to a fixed sequence length input."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The standard methods include data preprocessing `_normalization`, optimization utilities like parameter initialization, `training_step`, `validation_step`, and shared `fit` and `predict` methods.These shared methods enable all the `neuralforecast.models` compatibility with the `core.NeuralForecast` wrapper class. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "import numpy as np\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "import pytorch_lightning as pl\n",
- "import neuralforecast.losses.pytorch as losses\n",
- "\n",
- "from neuralforecast.common._base_model import BaseModel, tensor_to_numpy\n",
- "from neuralforecast.common._scalers import TemporalNorm\n",
- "from neuralforecast.tsdataset import TimeSeriesDataModule\n",
- "from neuralforecast.utils import get_indexer_raise_missing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "class BaseMultivariate(BaseModel):\n",
- " \"\"\" Base Multivariate\n",
- " \n",
- " Base class for all multivariate models. The forecasts for all time-series are produced simultaneously \n",
- " within each window, which are randomly sampled during training.\n",
- " \n",
- " This class implements the basic functionality for all windows-based models, including:\n",
- " - PyTorch Lightning's methods training_step, validation_step, predict_step. \n",
- " - fit and predict methods used by NeuralForecast.core class. \n",
- " - sampling and wrangling methods to generate multivariate windows.\n",
- " \"\"\"\n",
- " def __init__(self, \n",
- " h,\n",
- " input_size,\n",
- " loss,\n",
- " valid_loss,\n",
- " learning_rate,\n",
- " max_steps,\n",
- " val_check_steps,\n",
- " n_series,\n",
- " batch_size,\n",
- " step_size=1,\n",
- " num_lr_decays=0,\n",
- " early_stop_patience_steps=-1,\n",
- " scaler_type='robust',\n",
- " futr_exog_list=None,\n",
- " hist_exog_list=None,\n",
- " stat_exog_list=None,\n",
- " drop_last_loader=False,\n",
- " random_seed=1, \n",
- " alias=None,\n",
- " optimizer=None,\n",
- " optimizer_kwargs=None,\n",
- " lr_scheduler=None,\n",
- " lr_scheduler_kwargs=None,\n",
- " dataloader_kwargs=None,\n",
- " **trainer_kwargs):\n",
- " super().__init__(\n",
- " random_seed=random_seed,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " optimizer=optimizer,\n",
- " optimizer_kwargs=optimizer_kwargs,\n",
- " lr_scheduler=lr_scheduler,\n",
- " lr_scheduler_kwargs=lr_scheduler_kwargs, \n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
- " max_steps=max_steps,\n",
- " early_stop_patience_steps=early_stop_patience_steps,\n",
- " **trainer_kwargs,\n",
- " )\n",
- "\n",
- " # Padder to complete train windows, \n",
- " # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]\n",
- " self.h = h\n",
- " self.input_size = input_size\n",
- " self.n_series = n_series\n",
- " self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
- "\n",
- " # Multivariate models do not support these loss functions yet.\n",
- " unsupported_losses = (\n",
- " losses.sCRPS,\n",
- " losses.MQLoss,\n",
- " losses.DistributionLoss,\n",
- " losses.PMM,\n",
- " losses.GMM,\n",
- " losses.HuberMQLoss,\n",
- " losses.MASE,\n",
- " losses.relMSE,\n",
- " losses.NBMM,\n",
- " )\n",
- " if isinstance(self.loss, unsupported_losses):\n",
- " raise Exception(f\"{self.loss} is not supported in a Multivariate model.\") \n",
- " if isinstance(self.valid_loss, unsupported_losses):\n",
- " raise Exception(f\"{self.valid_loss} is not supported in a Multivariate model.\") \n",
- "\n",
- " self.batch_size = batch_size\n",
- " \n",
- " # Optimization\n",
- " self.learning_rate = learning_rate\n",
- " self.max_steps = max_steps\n",
- " self.num_lr_decays = num_lr_decays\n",
- " self.lr_decay_steps = max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7\n",
- " self.early_stop_patience_steps = early_stop_patience_steps\n",
- " self.val_check_steps = val_check_steps\n",
- " self.step_size = step_size\n",
- "\n",
- " # Scaler\n",
- " self.scaler = TemporalNorm(scaler_type=scaler_type, dim=2) # Time dimension is in the second axis\n",
- "\n",
- " # Fit arguments\n",
- " self.val_size = 0\n",
- " self.test_size = 0\n",
- "\n",
- " # Model state\n",
- " self.decompose_forecast = False\n",
- "\n",
- " # DataModule arguments\n",
- " self.dataloader_kwargs = dataloader_kwargs\n",
- " self.drop_last_loader = drop_last_loader\n",
- " # used by on_validation_epoch_end hook\n",
- " self.validation_step_outputs = []\n",
- " self.alias = alias\n",
- "\n",
- " def _create_windows(self, batch, step):\n",
- " # Parse common data\n",
- " window_size = self.input_size + self.h\n",
- " temporal_cols = batch['temporal_cols']\n",
- " temporal = batch['temporal']\n",
- "\n",
- " if step == 'train':\n",
- " if self.val_size + self.test_size > 0:\n",
- " cutoff = -self.val_size - self.test_size\n",
- " temporal = temporal[:, :, :cutoff]\n",
- "\n",
- " temporal = self.padder(temporal)\n",
- " windows = temporal.unfold(dimension=-1, \n",
- " size=window_size, \n",
- " step=self.step_size)\n",
- " # [n_series, C, Ws, L+H] 0, 1, 2, 3\n",
- "\n",
- " # Sample and Available conditions\n",
- " available_idx = temporal_cols.get_loc('available_mask')\n",
- " sample_condition = windows[:, available_idx, :, -self.h:]\n",
- " sample_condition = torch.sum(sample_condition, axis=2) # Sum over time\n",
- " sample_condition = torch.sum(sample_condition, axis=0) # Sum over time-series\n",
- " available_condition = windows[:, available_idx, :, :-self.h]\n",
- " available_condition = torch.sum(available_condition, axis=2) # Sum over time\n",
- " available_condition = torch.sum(available_condition, axis=0) # Sum over time-series\n",
- " final_condition = (sample_condition > 0) & (available_condition > 0) # Of shape [Ws]\n",
- " windows = windows[:, :, final_condition, :]\n",
- "\n",
- " # Get Static data\n",
- " static = batch.get('static', None)\n",
- " static_cols = batch.get('static_cols', None)\n",
- "\n",
- " # Protection of empty windows\n",
- " if final_condition.sum() == 0:\n",
- " raise Exception('No windows available for training')\n",
- "\n",
- " # Sample windows\n",
- " n_windows = windows.shape[2]\n",
- " if self.batch_size is not None:\n",
- " w_idxs = np.random.choice(n_windows, \n",
- " size=self.batch_size,\n",
- " replace=(n_windows < self.batch_size))\n",
- " windows = windows[:, :, w_idxs, :]\n",
- "\n",
- " windows = windows.permute(2, 1, 3, 0) # [Ws, C, L+H, n_series]\n",
- "\n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=static,\n",
- " static_cols=static_cols)\n",
- "\n",
- " return windows_batch\n",
- "\n",
- " elif step in ['predict', 'val']:\n",
- "\n",
- " if step == 'predict':\n",
- " predict_step_size = self.predict_step_size\n",
- " cutoff = - self.input_size - self.test_size\n",
- " temporal = batch['temporal'][:, :, cutoff:]\n",
- "\n",
- " elif step == 'val':\n",
- " predict_step_size = self.step_size\n",
- " cutoff = -self.input_size - self.val_size - self.test_size\n",
- " if self.test_size > 0:\n",
- " temporal = batch['temporal'][:, :, cutoff:-self.test_size]\n",
- " else:\n",
- " temporal = batch['temporal'][:, :, cutoff:]\n",
- "\n",
- " if (step=='predict') and (self.test_size==0) and (len(self.futr_exog_list)==0):\n",
- " temporal = self.padder(temporal)\n",
- "\n",
- " windows = temporal.unfold(dimension=-1,\n",
- " size=window_size,\n",
- " step=predict_step_size)\n",
- " # [n_series, C, Ws, L+H] -> [Ws, C, L+H, n_series]\n",
- " windows = windows.permute(2, 1, 3, 0)\n",
- "\n",
- " # Get Static data\n",
- " static = batch.get('static', None)\n",
- " static_cols=batch.get('static_cols', None)\n",
- "\n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=static,\n",
- " static_cols=static_cols)\n",
- "\n",
- "\n",
- " return windows_batch\n",
- " else:\n",
- " raise ValueError(f'Unknown step {step}') \n",
- "\n",
- " def _normalization(self, windows, y_idx):\n",
- " \n",
- " # windows are already filtered by train/validation/test\n",
- " # from the `create_windows_method` nor leakage risk\n",
- " temporal = windows['temporal'] # [Ws, C, L+H, n_series]\n",
- " temporal_cols = windows['temporal_cols'].copy() # [Ws, C, L+H, n_series]\n",
- "\n",
- " # To avoid leakage uses only the lags\n",
- " temporal_data_cols = self._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- " temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)\n",
- " temporal_idxs = np.append(y_idx, temporal_idxs)\n",
- " temporal_data = temporal[:, temporal_idxs, :, :]\n",
- " temporal_mask = temporal[:, temporal_cols.get_loc('available_mask'), :, :].clone()\n",
- " temporal_mask[:, -self.h:, :] = 0.0\n",
- "\n",
- " # Normalize. self.scaler stores the shift and scale for inverse transform\n",
- " temporal_mask = temporal_mask.unsqueeze(1) # Add channel dimension for scaler.transform.\n",
- " temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)\n",
- " # Replace values in windows dict\n",
- " temporal[:, temporal_idxs, :, :] = temporal_data\n",
- " windows['temporal'] = temporal\n",
- "\n",
- " return windows\n",
- "\n",
- " def _inv_normalization(self, y_hat, temporal_cols, y_idx):\n",
- " # Receives window predictions [Ws, H, n_series]\n",
- " # Broadcasts outputs and inverts normalization\n",
- "\n",
- " # Add C dimension\n",
- " # if y_hat.ndim == 2:\n",
- " # remove_dimension = True\n",
- " # y_hat = y_hat.unsqueeze(-1)\n",
- " # else:\n",
- " # remove_dimension = False\n",
- " \n",
- " y_scale = self.scaler.x_scale[:, [y_idx], :].squeeze(1)\n",
- " y_loc = self.scaler.x_shift[:, [y_idx], :].squeeze(1)\n",
- "\n",
- " # y_scale = torch.repeat_interleave(y_scale, repeats=y_hat.shape[-1], dim=-1)\n",
- " # y_loc = torch.repeat_interleave(y_loc, repeats=y_hat.shape[-1], dim=-1)\n",
- "\n",
- " y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)\n",
- "\n",
- " # if remove_dimension:\n",
- " # y_hat = y_hat.squeeze(-1)\n",
- " # y_loc = y_loc.squeeze(-1)\n",
- " # y_scale = y_scale.squeeze(-1)\n",
- "\n",
- " return y_hat, y_loc, y_scale\n",
- "\n",
- " def _parse_windows(self, batch, windows):\n",
- " # Temporal: [Ws, C, L+H, n_series]\n",
- "\n",
- " # Filter insample lags from outsample horizon\n",
- " mask_idx = batch['temporal_cols'].get_loc('available_mask')\n",
- " y_idx = batch['y_idx'] \n",
- " insample_y = windows['temporal'][:, y_idx, :-self.h, :]\n",
- " insample_mask = windows['temporal'][:, mask_idx, :-self.h, :]\n",
- " outsample_y = windows['temporal'][:, y_idx, -self.h:, :]\n",
- " outsample_mask = windows['temporal'][:, mask_idx, -self.h:, :]\n",
- "\n",
- " # Filter historic exogenous variables\n",
- " if len(self.hist_exog_list):\n",
- " hist_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.hist_exog_list)\n",
- " hist_exog = windows['temporal'][:, hist_exog_idx, :-self.h, :]\n",
- " else:\n",
- " hist_exog = None\n",
- " \n",
- " # Filter future exogenous variables\n",
- " if len(self.futr_exog_list):\n",
- " futr_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.futr_exog_list)\n",
- " futr_exog = windows['temporal'][:, futr_exog_idx, :, :]\n",
- " else:\n",
- " futr_exog = None\n",
- "\n",
- " # Filter static variables\n",
- " if len(self.stat_exog_list):\n",
- " static_idx = get_indexer_raise_missing(windows['static_cols'], self.stat_exog_list)\n",
- " stat_exog = windows['static'][:, static_idx]\n",
- " else:\n",
- " stat_exog = None\n",
- "\n",
- " return insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog\n",
- "\n",
- " def training_step(self, batch, batch_idx): \n",
- " # Create and normalize windows [batch_size, n_series, C, L+H]\n",
- " windows = self._create_windows(batch, step='train')\n",
- " y_idx = batch['y_idx']\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
- " insample_mask=insample_mask, # [Ws, L, n_series]\n",
- " futr_exog=futr_exog, # [Ws, F, L + h, n_series]\n",
- " hist_exog=hist_exog, # [Ws, X, L, n_series]\n",
- " stat_exog=stat_exog) # [n_series, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch)\n",
- " if self.loss.is_distribution_output:\n",
- " outsample_y, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(loss):\n",
- " print('Model Parameters', self.hparams)\n",
- " print('insample_y', torch.isnan(insample_y).sum())\n",
- " print('outsample_y', torch.isnan(outsample_y).sum())\n",
- " print('output', torch.isnan(output).sum())\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'train_loss',\n",
- " loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.train_trajectories.append((self.global_step, loss.detach().item()))\n",
- " return loss\n",
- "\n",
- " def validation_step(self, batch, batch_idx):\n",
- " if self.val_size == 0:\n",
- " return np.nan\n",
- " \n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " windows = self._create_windows(batch, step='val')\n",
- " y_idx = batch['y_idx']\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
- " insample_mask=insample_mask, # [Ws, L, n_series]\n",
- " futr_exog=futr_exog, # [Ws, F, L + h, n_series]\n",
- " hist_exog=hist_exog, # [Ws, X, L, n_series]\n",
- " stat_exog=stat_exog) # [n_series, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch)\n",
- " if self.loss.is_distribution_output:\n",
- " outsample_y, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- "\n",
- " if str(type(self.valid_loss)) in\\\n",
- " [\"\", \"\"]:\n",
- " _, output = self.loss.sample(distr_args=distr_args)\n",
- "\n",
- " # Validation Loss evaluation\n",
- " if self.valid_loss.is_distribution_output:\n",
- " valid_loss = self.valid_loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " valid_loss = self.valid_loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(valid_loss):\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'valid_loss',\n",
- " valid_loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.validation_step_outputs.append(valid_loss)\n",
- " return valid_loss\n",
- "\n",
- " def predict_step(self, batch, batch_idx): \n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " windows = self._create_windows(batch, step='predict')\n",
- " y_idx = batch['y_idx'] \n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, _, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L, n_series]\n",
- " insample_mask=insample_mask, # [Ws, L, n_series]\n",
- " futr_exog=futr_exog, # [Ws, F, L + h, n_series]\n",
- " hist_exog=hist_exog, # [Ws, X, L, n_series]\n",
- " stat_exog=stat_exog) # [n_series, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch)\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=torch.empty(size=(insample_y.shape[0], \n",
- " self.h, \n",
- " self.n_series),\n",
- " dtype=output[0].dtype,\n",
- " device=output[0].device),\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " _, y_hat = self.loss.sample(distr_args=distr_args)\n",
- "\n",
- " if self.loss.return_params:\n",
- " distr_args = torch.stack(distr_args, dim=-1)\n",
- " distr_args = torch.reshape(distr_args, (len(windows[\"temporal\"]), self.h, -1))\n",
- " y_hat = torch.concat((y_hat, distr_args), axis=2)\n",
- " else:\n",
- " y_hat, _, _ = self._inv_normalization(y_hat=output,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " return y_hat\n",
- " \n",
- " def fit(self, dataset, val_size=0, test_size=0, random_seed=None, distributed_config=None):\n",
- " \"\"\" Fit.\n",
- "\n",
- " The `fit` method, optimizes the neural network's weights using the\n",
- " initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
- " and the `loss` function as defined during the initialization. \n",
- " Within `fit` we use a PyTorch Lightning `Trainer` that\n",
- " inherits the initialization's `self.trainer_kwargs`, to customize\n",
- " its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
- "\n",
- " The method is designed to be compatible with SKLearn-like classes\n",
- " and in particular to be compatible with the StatsForecast library.\n",
- "\n",
- " By default the `model` is not saving training checkpoints to protect \n",
- " disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
- "\n",
- " **Parameters:** \n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
- " `val_size`: int, validation size for temporal cross-validation. \n",
- " `test_size`: int, test size for temporal cross-validation. \n",
- " \"\"\"\n",
- " if distributed_config is not None:\n",
- " raise ValueError(\"multivariate models cannot be trained using distributed data parallel.\")\n",
- " return self._fit(\n",
- " dataset=dataset,\n",
- " batch_size=self.n_series,\n",
- " valid_batch_size=self.n_series,\n",
- " val_size=val_size,\n",
- " test_size=test_size,\n",
- " random_seed=random_seed,\n",
- " shuffle_train=False,\n",
- " distributed_config=None,\n",
- " )\n",
- "\n",
- " def predict(self, dataset, test_size=None, step_size=1, random_seed=None, **data_module_kwargs):\n",
- " \"\"\" Predict.\n",
- "\n",
- " Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
- "\n",
- " **Parameters:** \n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
- " `test_size`: int=None, test size for temporal cross-validation. \n",
- " `step_size`: int=1, Step size between each window. \n",
- " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
- " \"\"\"\n",
- " self._check_exog(dataset)\n",
- " self._restart_seed(random_seed)\n",
- " data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)\n",
- "\n",
- " self.predict_step_size = step_size\n",
- " self.decompose_forecast = False\n",
- " datamodule = TimeSeriesDataModule(dataset=dataset, \n",
- " valid_batch_size=self.n_series, \n",
- " batch_size=self.n_series,\n",
- " **data_module_kwargs)\n",
- "\n",
- " # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.\n",
- " pred_trainer_kwargs = self.trainer_kwargs.copy()\n",
- " if (pred_trainer_kwargs.get('accelerator', None) == \"gpu\") and (torch.cuda.device_count() > 1):\n",
- " pred_trainer_kwargs['devices'] = [0]\n",
- "\n",
- " trainer = pl.Trainer(**pred_trainer_kwargs)\n",
- " fcsts = trainer.predict(self, datamodule=datamodule)\n",
- " fcsts = tensor_to_numpy(torch.vstack(fcsts))\n",
- "\n",
- " fcsts = np.transpose(fcsts, (2,0,1))\n",
- " fcsts = fcsts.flatten()\n",
- " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
- " return fcsts\n",
- "\n",
- " def decompose(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):\n",
- " raise NotImplementedError('decompose')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from fastcore.test import test_fail"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# test unsupported losses\n",
- "test_fail(\n",
- " lambda: BaseMultivariate(\n",
- " h=1,\n",
- " input_size=1,\n",
- " loss=losses.MQLoss(),\n",
- " valid_loss=losses.RMSE(),\n",
- " learning_rate=1,\n",
- " max_steps=1,\n",
- " val_check_steps=1,\n",
- " n_series=1,\n",
- " batch_size=1,\n",
- " ),\n",
- " contains='MQLoss() is not supported'\n",
- ")\n",
- "\n",
- "test_fail(\n",
- " lambda: BaseMultivariate(\n",
- " h=1,\n",
- " input_size=1,\n",
- " loss=losses.RMSE(),\n",
- " valid_loss=losses.MASE(seasonality=1),\n",
- " learning_rate=1,\n",
- " max_steps=1,\n",
- " val_check_steps=1,\n",
- " n_series=1,\n",
- " batch_size=1,\n",
- " ),\n",
- " contains='MASE() is not supported'\n",
- ")"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "python3",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/nbs/common.base_recurrent.ipynb b/nbs/common.base_recurrent.ipynb
deleted file mode 100644
index 1796c2584..000000000
--- a/nbs/common.base_recurrent.ipynb
+++ /dev/null
@@ -1,661 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| default_exp common._base_recurrent"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "attachments": {},
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# BaseRecurrent"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "> The `BaseRecurrent` class contains standard methods shared across recurrent neural networks; these models possess the ability to process variable-length sequences of inputs through their internal memory states. The class is represented by `LSTM`, `GRU`, and `RNN`, along with other more sophisticated architectures like `MQCNN`."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "The standard methods include `TemporalNorm` preprocessing, optimization utilities like parameter initialization, `training_step`, `validation_step`, and shared `fit` and `predict` methods.These shared methods enable all the `neuralforecast.models` compatibility with the `core.NeuralForecast` wrapper class."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "import numpy as np\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "import pytorch_lightning as pl\n",
- "import neuralforecast.losses.pytorch as losses\n",
- "\n",
- "from neuralforecast.common._base_model import BaseModel, tensor_to_numpy\n",
- "from neuralforecast.common._scalers import TemporalNorm\n",
- "from neuralforecast.tsdataset import TimeSeriesDataModule\n",
- "from neuralforecast.utils import get_indexer_raise_missing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "class BaseRecurrent(BaseModel):\n",
- " \"\"\" Base Recurrent\n",
- " \n",
- " Base class for all recurrent-based models. The forecasts are produced sequentially between \n",
- " windows.\n",
- " \n",
- " This class implements the basic functionality for all windows-based models, including:\n",
- " - PyTorch Lightning's methods training_step, validation_step, predict_step. \n",
- " - fit and predict methods used by NeuralForecast.core class. \n",
- " - sampling and wrangling methods to sequential windows. \n",
- " \"\"\"\n",
- " def __init__(self,\n",
- " h,\n",
- " input_size,\n",
- " inference_input_size,\n",
- " loss,\n",
- " valid_loss,\n",
- " learning_rate,\n",
- " max_steps,\n",
- " val_check_steps,\n",
- " batch_size,\n",
- " valid_batch_size,\n",
- " scaler_type='robust',\n",
- " num_lr_decays=0,\n",
- " early_stop_patience_steps=-1,\n",
- " futr_exog_list=None,\n",
- " hist_exog_list=None,\n",
- " stat_exog_list=None,\n",
- " drop_last_loader=False,\n",
- " random_seed=1, \n",
- " alias=None,\n",
- " optimizer=None,\n",
- " optimizer_kwargs=None,\n",
- " lr_scheduler=None,\n",
- " lr_scheduler_kwargs=None,\n",
- " dataloader_kwargs=None,\n",
- " **trainer_kwargs):\n",
- " super().__init__(\n",
- " random_seed=random_seed,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " optimizer=optimizer,\n",
- " optimizer_kwargs=optimizer_kwargs,\n",
- " lr_scheduler=lr_scheduler,\n",
- " lr_scheduler_kwargs=lr_scheduler_kwargs,\n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
- " max_steps=max_steps,\n",
- " early_stop_patience_steps=early_stop_patience_steps, \n",
- " **trainer_kwargs,\n",
- " )\n",
- "\n",
- " # Padder to complete train windows, \n",
- " # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]\n",
- " self.h = h\n",
- " self.input_size = input_size\n",
- " self.inference_input_size = inference_input_size\n",
- " self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
- "\n",
- " unsupported_distributions = ['Bernoulli', 'ISQF']\n",
- " if isinstance(self.loss, losses.DistributionLoss) and\\\n",
- " self.loss.distribution in unsupported_distributions:\n",
- " raise Exception(f'Distribution {self.loss.distribution} not available for Recurrent-based models. Please choose another distribution.')\n",
- "\n",
- " # Valid batch_size\n",
- " self.batch_size = batch_size\n",
- " if valid_batch_size is None:\n",
- " self.valid_batch_size = batch_size\n",
- " else:\n",
- " self.valid_batch_size = valid_batch_size\n",
- "\n",
- " # Optimization\n",
- " self.learning_rate = learning_rate\n",
- " self.max_steps = max_steps\n",
- " self.num_lr_decays = num_lr_decays\n",
- " self.lr_decay_steps = max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7\n",
- " self.early_stop_patience_steps = early_stop_patience_steps\n",
- " self.val_check_steps = val_check_steps\n",
- "\n",
- " # Scaler\n",
- " self.scaler = TemporalNorm(\n",
- " scaler_type=scaler_type,\n",
- " dim=-1, # Time dimension is -1.\n",
- " num_features=1+len(self.hist_exog_list)+len(self.futr_exog_list)\n",
- " )\n",
- "\n",
- " # Fit arguments\n",
- " self.val_size = 0\n",
- " self.test_size = 0\n",
- "\n",
- " # DataModule arguments\n",
- " self.dataloader_kwargs = dataloader_kwargs\n",
- " self.drop_last_loader = drop_last_loader\n",
- " # used by on_validation_epoch_end hook\n",
- " self.validation_step_outputs = []\n",
- " self.alias = alias\n",
- "\n",
- " def _normalization(self, batch, val_size=0, test_size=0):\n",
- " temporal = batch['temporal'] # B, C, T\n",
- " temporal_cols = batch['temporal_cols'].copy()\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Separate data and mask\n",
- " temporal_data_cols = self._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- " temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)\n",
- " temporal_idxs = np.append(y_idx, temporal_idxs)\n",
- " temporal_data = temporal[:, temporal_idxs, :]\n",
- " temporal_mask = temporal[:, temporal_cols.get_loc('available_mask'), :].clone()\n",
- "\n",
- " # Remove validation and test set to prevent leakeage\n",
- " if val_size + test_size > 0:\n",
- " cutoff = val_size + test_size\n",
- " temporal_mask[:, -cutoff:] = 0\n",
- "\n",
- " # Normalize. self.scaler stores the shift and scale for inverse transform\n",
- " temporal_mask = temporal_mask.unsqueeze(1) # Add channel dimension for scaler.transform.\n",
- " temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)\n",
- "\n",
- " # Replace values in windows dict\n",
- " temporal[:, temporal_idxs, :] = temporal_data\n",
- " batch['temporal'] = temporal\n",
- "\n",
- " return batch\n",
- "\n",
- " def _inv_normalization(self, y_hat, temporal_cols, y_idx):\n",
- " # Receives window predictions [B, seq_len, H, output]\n",
- " # Broadcasts outputs and inverts normalization\n",
- "\n",
- " # Get 'y' scale and shift, and add W dimension\n",
- " y_loc = self.scaler.x_shift[:, [y_idx], 0].flatten() #[B,C,T] -> [B] \n",
- " y_scale = self.scaler.x_scale[:, [y_idx], 0].flatten() #[B,C,T] -> [B]\n",
- "\n",
- " # Expand scale and shift to y_hat dimensions\n",
- " y_loc = y_loc.view(*y_loc.shape, *(1,)*(y_hat.ndim-1))#.expand(y_hat) \n",
- " y_scale = y_scale.view(*y_scale.shape, *(1,)*(y_hat.ndim-1))#.expand(y_hat)\n",
- "\n",
- " y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)\n",
- "\n",
- " return y_hat, y_loc, y_scale\n",
- "\n",
- " def _create_windows(self, batch, step):\n",
- " temporal = batch['temporal']\n",
- " temporal_cols = batch['temporal_cols']\n",
- "\n",
- " if step == 'train':\n",
- " if self.val_size + self.test_size > 0:\n",
- " cutoff = -self.val_size - self.test_size\n",
- " temporal = temporal[:, :, :cutoff]\n",
- " temporal = self.padder(temporal)\n",
- "\n",
- " # Truncate batch to shorter time-series \n",
- " av_condition = torch.nonzero(torch.min(temporal[:, temporal_cols.get_loc('available_mask')], axis=0).values)\n",
- " min_time_stamp = int(av_condition.min())\n",
- " \n",
- " available_ts = temporal.shape[-1] - min_time_stamp\n",
- " if available_ts < 1 + self.h:\n",
- " raise Exception(\n",
- " 'Time series too short for given input and output size. \\n'\n",
- " f'Available timestamps: {available_ts}'\n",
- " )\n",
- "\n",
- " temporal = temporal[:, :, min_time_stamp:]\n",
- "\n",
- " if step == 'val':\n",
- " if self.test_size > 0:\n",
- " temporal = temporal[:, :, :-self.test_size]\n",
- " temporal = self.padder(temporal)\n",
- "\n",
- " if step == 'predict':\n",
- " if (self.test_size == 0) and (len(self.futr_exog_list)==0):\n",
- " temporal = self.padder(temporal)\n",
- "\n",
- " # Test size covers all data, pad left one timestep with zeros\n",
- " if temporal.shape[-1] == self.test_size:\n",
- " padder_left = nn.ConstantPad1d(padding=(1, 0), value=0.0)\n",
- " temporal = padder_left(temporal)\n",
- "\n",
- " # Parse batch\n",
- " window_size = 1 + self.h # 1 for current t and h for future\n",
- " windows = temporal.unfold(dimension=-1,\n",
- " size=window_size,\n",
- " step=1)\n",
- "\n",
- " # Truncated backprogatation/inference (shorten sequence where RNNs unroll)\n",
- " n_windows = windows.shape[2]\n",
- " input_size = -1\n",
- " if (step == 'train') and (self.input_size>0):\n",
- " input_size = self.input_size\n",
- " if (input_size > 0) and (n_windows > input_size):\n",
- " max_sampleable_time = n_windows-self.input_size+1\n",
- " start = np.random.choice(max_sampleable_time)\n",
- " windows = windows[:, :, start:(start+input_size), :]\n",
- "\n",
- " if (step == 'val') and (self.inference_input_size>0):\n",
- " cutoff = self.inference_input_size + self.val_size\n",
- " windows = windows[:, :, -cutoff:, :]\n",
- "\n",
- " if (step == 'predict') and (self.inference_input_size>0):\n",
- " cutoff = self.inference_input_size + self.test_size\n",
- " windows = windows[:, :, -cutoff:, :]\n",
- " \n",
- " # [B, C, input_size, 1+H]\n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=batch.get('static', None),\n",
- " static_cols=batch.get('static_cols', None))\n",
- "\n",
- " return windows_batch\n",
- "\n",
- " def _parse_windows(self, batch, windows):\n",
- " # [B, C, seq_len, 1+H]\n",
- " # Filter insample lags from outsample horizon\n",
- " mask_idx = batch['temporal_cols'].get_loc('available_mask')\n",
- " y_idx = batch['y_idx'] \n",
- " insample_y = windows['temporal'][:, y_idx, :, :-self.h]\n",
- " insample_mask = windows['temporal'][:, mask_idx, :, :-self.h]\n",
- " outsample_y = windows['temporal'][:, y_idx, :, -self.h:].contiguous()\n",
- " outsample_mask = windows['temporal'][:, mask_idx, :, -self.h:].contiguous()\n",
- "\n",
- " # Filter historic exogenous variables\n",
- " if len(self.hist_exog_list):\n",
- " hist_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.hist_exog_list)\n",
- " hist_exog = windows['temporal'][:, hist_exog_idx, :, :-self.h]\n",
- " else:\n",
- " hist_exog = None\n",
- " \n",
- " # Filter future exogenous variables\n",
- " if len(self.futr_exog_list):\n",
- " futr_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.futr_exog_list)\n",
- " futr_exog = windows['temporal'][:, futr_exog_idx, :, :]\n",
- " else:\n",
- " futr_exog = None\n",
- " # Filter static variables\n",
- " if len(self.stat_exog_list):\n",
- " static_idx = get_indexer_raise_missing(windows['static_cols'], self.stat_exog_list)\n",
- " stat_exog = windows['static'][:, static_idx]\n",
- " else:\n",
- " stat_exog = None\n",
- "\n",
- " return insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog\n",
- "\n",
- " def training_step(self, batch, batch_idx):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " batch = self._normalization(batch, val_size=self.val_size, test_size=self.test_size)\n",
- " windows = self._create_windows(batch, step='train')\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [B, seq_len, 1]\n",
- " insample_mask=insample_mask, # [B, seq_len, 1]\n",
- " futr_exog=futr_exog, # [B, F, seq_len, 1+H]\n",
- " hist_exog=hist_exog, # [B, C, seq_len]\n",
- " stat_exog=stat_exog) # [B, S]\n",
- "\n",
- " # Model predictions\n",
- " output = self(windows_batch) # tuple([B, seq_len, H, output])\n",
- " if self.loss.is_distribution_output:\n",
- " outsample_y, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=batch['y_idx'])\n",
- " B = output[0].size()[0]\n",
- " T = output[0].size()[1]\n",
- " H = output[0].size()[2]\n",
- " output = [arg.view(-1, *(arg.size()[2:])) for arg in output]\n",
- " outsample_y = outsample_y.view(B*T,H)\n",
- " outsample_mask = outsample_mask.view(B*T,H)\n",
- " y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(loss):\n",
- " print('Model Parameters', self.hparams)\n",
- " print('insample_y', torch.isnan(insample_y).sum())\n",
- " print('outsample_y', torch.isnan(outsample_y).sum())\n",
- " print('output', torch.isnan(output).sum())\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'train_loss',\n",
- " loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.train_trajectories.append((self.global_step, loss.detach().item()))\n",
- " return loss\n",
- "\n",
- " def validation_step(self, batch, batch_idx):\n",
- " if self.val_size == 0:\n",
- " return np.nan\n",
- "\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " batch = self._normalization(batch, val_size=self.val_size, test_size=self.test_size)\n",
- " windows = self._create_windows(batch, step='val')\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [B, seq_len, 1]\n",
- " insample_mask=insample_mask, # [B, seq_len, 1]\n",
- " futr_exog=futr_exog, # [B, F, seq_len, 1+H]\n",
- " hist_exog=hist_exog, # [B, C, seq_len]\n",
- " stat_exog=stat_exog) # [B, S]\n",
- "\n",
- " # Remove train y_hat (+1 and -1 for padded last window with zeros)\n",
- " # tuple([B, seq_len, H, output]) -> tuple([B, validation_size, H, output])\n",
- " val_windows = (self.val_size) + 1\n",
- " outsample_y = outsample_y[:, -val_windows:-1, :]\n",
- " outsample_mask = outsample_mask[:, -val_windows:-1, :] \n",
- "\n",
- " # Model predictions\n",
- " output = self(windows_batch) # tuple([B, seq_len, H, output])\n",
- " if self.loss.is_distribution_output:\n",
- " output = [arg[:, -val_windows:-1] for arg in output]\n",
- " outsample_y, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " B = output[0].size()[0]\n",
- " T = output[0].size()[1]\n",
- " H = output[0].size()[2]\n",
- " output = [arg.reshape(-1, *(arg.size()[2:])) for arg in output]\n",
- " outsample_y = outsample_y.reshape(B*T,H)\n",
- " outsample_mask = outsample_mask.reshape(B*T,H)\n",
- " y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
- "\n",
- " if str(type(self.valid_loss)) in\\\n",
- " [\"\", \"\"]:\n",
- " output = quants\n",
- " elif str(type(self.valid_loss)) in [\"\"]:\n",
- " output = torch.unsqueeze(sample_mean, dim=-1) # [N,H,1] -> [N,H]\n",
- " \n",
- " else:\n",
- " output = output[:, -val_windows:-1, :]\n",
- "\n",
- " # Validation Loss evaluation\n",
- " if self.valid_loss.is_distribution_output:\n",
- " valid_loss = self.valid_loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " outsample_y, _, _ = self._inv_normalization(y_hat=outsample_y, temporal_cols=batch['temporal_cols'], y_idx=y_idx)\n",
- " output, _, _ = self._inv_normalization(y_hat=output, temporal_cols=batch['temporal_cols'], y_idx=y_idx)\n",
- " valid_loss = self.valid_loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(valid_loss):\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'valid_loss',\n",
- " valid_loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.validation_step_outputs.append(valid_loss)\n",
- " return valid_loss\n",
- "\n",
- " def predict_step(self, batch, batch_idx):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " batch = self._normalization(batch, val_size=0, test_size=self.test_size)\n",
- " windows = self._create_windows(batch, step='predict')\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, _, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [B, seq_len, 1]\n",
- " insample_mask=insample_mask, # [B, seq_len, 1]\n",
- " futr_exog=futr_exog, # [B, F, seq_len, 1+H]\n",
- " hist_exog=hist_exog, # [B, C, seq_len]\n",
- " stat_exog=stat_exog) # [B, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch) # tuple([B, seq_len, H], ...)\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=output[0],\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " B = output[0].size()[0]\n",
- " T = output[0].size()[1]\n",
- " H = output[0].size()[2]\n",
- " output = [arg.reshape(-1, *(arg.size()[2:])) for arg in output]\n",
- " y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
- " y_hat = torch.concat((sample_mean, quants), axis=2)\n",
- " y_hat = y_hat.view(B, T, H, -1)\n",
- "\n",
- " if self.loss.return_params:\n",
- " distr_args = torch.stack(distr_args, dim=-1)\n",
- " distr_args = torch.reshape(distr_args, (B, T, H, -1))\n",
- " y_hat = torch.concat((y_hat, distr_args), axis=3)\n",
- " else:\n",
- " y_hat, _, _ = self._inv_normalization(y_hat=output,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " return y_hat\n",
- "\n",
- " def fit(self, dataset, val_size=0, test_size=0, random_seed=None, distributed_config=None):\n",
- " \"\"\" Fit.\n",
- "\n",
- " The `fit` method, optimizes the neural network's weights using the\n",
- " initialization parameters (`learning_rate`, `batch_size`, ...)\n",
- " and the `loss` function as defined during the initialization. \n",
- " Within `fit` we use a PyTorch Lightning `Trainer` that\n",
- " inherits the initialization's `self.trainer_kwargs`, to customize\n",
- " its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
- "\n",
- " The method is designed to be compatible with SKLearn-like classes\n",
- " and in particular to be compatible with the StatsForecast library.\n",
- "\n",
- " By default the `model` is not saving training checkpoints to protect \n",
- " disk memory, to get them change `enable_checkpointing=True` in `__init__`. \n",
- "\n",
- " **Parameters:** \n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
- " `val_size`: int, validation size for temporal cross-validation. \n",
- " `test_size`: int, test size for temporal cross-validation. \n",
- " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s. \n",
- " \"\"\"\n",
- " return self._fit(\n",
- " dataset=dataset,\n",
- " batch_size=self.batch_size,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " val_size=val_size,\n",
- " test_size=test_size,\n",
- " random_seed=random_seed,\n",
- " distributed_config=distributed_config,\n",
- " )\n",
- "\n",
- " def predict(self, dataset, step_size=1,\n",
- " random_seed=None, **data_module_kwargs):\n",
- " \"\"\" Predict.\n",
- "\n",
- " Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
- "\n",
- " **Parameters:** \n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
- " `step_size`: int=1, Step size between each window. \n",
- " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s. \n",
- " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
- " \"\"\"\n",
- " self._check_exog(dataset)\n",
- " self._restart_seed(random_seed)\n",
- " data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)\n",
- " \n",
- " if step_size > 1:\n",
- " raise Exception('Recurrent models do not support step_size > 1')\n",
- "\n",
- " # fcsts (window, batch, h)\n",
- " # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.\n",
- " pred_trainer_kwargs = self.trainer_kwargs.copy()\n",
- " if (pred_trainer_kwargs.get('accelerator', None) == \"gpu\") and (torch.cuda.device_count() > 1):\n",
- " pred_trainer_kwargs['devices'] = [0]\n",
- "\n",
- " trainer = pl.Trainer(**pred_trainer_kwargs)\n",
- "\n",
- " datamodule = TimeSeriesDataModule(\n",
- " dataset=dataset,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " **data_module_kwargs\n",
- " )\n",
- " fcsts = trainer.predict(self, datamodule=datamodule)\n",
- " if self.test_size > 0:\n",
- " # Remove warmup windows (from train and validation)\n",
- " # [N,T,H,output], avoid indexing last dim for univariate output compatibility\n",
- " fcsts = torch.vstack([fcst[:, -(1+self.test_size-self.h):,:] for fcst in fcsts])\n",
- " fcsts = tensor_to_numpy(fcsts).flatten()\n",
- " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
- " else:\n",
- " fcsts = torch.vstack([fcst[:,-1:,:] for fcst in fcsts])\n",
- " fcsts = tensor_to_numpy(fcsts).flatten()\n",
- " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
- " return fcsts"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseRecurrent, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseRecurrent.fit, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseRecurrent.predict, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.utils import AirPassengersDF\n",
- "from neuralforecast.tsdataset import TimeSeriesDataset, TimeSeriesDataModule"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# add h=0,1 unit test for _parse_windows \n",
- "# Declare batch\n",
- "AirPassengersDF['x'] = np.array(len(AirPassengersDF))\n",
- "AirPassengersDF['x2'] = np.array(len(AirPassengersDF)) * 2\n",
- "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=AirPassengersDF)\n",
- "data = TimeSeriesDataModule(dataset=dataset, batch_size=1, drop_last=True)\n",
- "\n",
- "train_loader = data.train_dataloader()\n",
- "batch = next(iter(train_loader))\n",
- "\n",
- "# Test that hist_exog_list and futr_exog_list correctly filter data that is sent to scaler.\n",
- "baserecurrent = BaseRecurrent(h=12,\n",
- " input_size=117,\n",
- " hist_exog_list=['x', 'x2'],\n",
- " futr_exog_list=['x'],\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " learning_rate=0.001,\n",
- " max_steps=1,\n",
- " val_check_steps=0,\n",
- " batch_size=1,\n",
- " valid_batch_size=1,\n",
- " windows_batch_size=10,\n",
- " inference_input_size=2,\n",
- " start_padding_enabled=True)\n",
- "\n",
- "windows = baserecurrent._create_windows(batch, step='train')\n",
- "\n",
- "temporal_cols = windows['temporal_cols'].copy() # B, L+H, C\n",
- "temporal_data_cols = baserecurrent._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- "\n",
- "test_eq(set(temporal_data_cols), set(['x', 'x2']))\n",
- "test_eq(windows['temporal'].shape, torch.Size([1,len(['y', 'x', 'x2', 'available_mask']),117,12+1]))"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "python3",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/nbs/common.base_windows.ipynb b/nbs/common.base_windows.ipynb
deleted file mode 100644
index 24bbf251c..000000000
--- a/nbs/common.base_windows.ipynb
+++ /dev/null
@@ -1,897 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "524620c1",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| default_exp common._base_windows"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "15392f6f",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "%load_ext autoreload\n",
- "%autoreload 2"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1e0f9607-d12d-44e5-b2be-91a57a0bca79",
- "metadata": {},
- "source": [
- "# BaseWindows\n",
- "\n",
- "> The `BaseWindows` class contains standard methods shared across window-based neural networks; in contrast to recurrent neural networks these models commit to a fixed sequence length input. The class is represented by `MLP`, and other more sophisticated architectures like `NBEATS`, and `NHITS`."
- ]
- },
- {
- "cell_type": "markdown",
- "id": "1730a556-1574-40ad-92a2-23b924ceb398",
- "metadata": {},
- "source": [
- "The standard methods include data preprocessing `_normalization`, optimization utilities like parameter initialization, `training_step`, `validation_step`, and shared `fit` and `predict` methods.These shared methods enable all the `neuralforecast.models` compatibility with the `core.NeuralForecast` wrapper class. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2508f7a9-1433-4ad8-8f2f-0078c6ed6c3c",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "44065066-e72a-431f-938f-1528adef9fe8",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "import numpy as np\n",
- "import torch\n",
- "import torch.nn as nn\n",
- "import pytorch_lightning as pl\n",
- "\n",
- "from neuralforecast.common._base_model import BaseModel, tensor_to_numpy\n",
- "from neuralforecast.common._scalers import TemporalNorm\n",
- "from neuralforecast.tsdataset import TimeSeriesDataModule\n",
- "from neuralforecast.utils import get_indexer_raise_missing"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ce70cd14-ecb1-4205-8511-fecbd26c8408",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "class BaseWindows(BaseModel):\n",
- " \"\"\" Base Windows\n",
- " \n",
- " Base class for all windows-based models. The forecasts are produced separately \n",
- " for each window, which are randomly sampled during training.\n",
- " \n",
- " This class implements the basic functionality for all windows-based models, including:\n",
- " - PyTorch Lightning's methods training_step, validation_step, predict_step. \n",
- " - fit and predict methods used by NeuralForecast.core class. \n",
- " - sampling and wrangling methods to generate windows.\n",
- " \"\"\"\n",
- " def __init__(self,\n",
- " h,\n",
- " input_size,\n",
- " loss,\n",
- " valid_loss,\n",
- " learning_rate,\n",
- " max_steps,\n",
- " val_check_steps,\n",
- " batch_size,\n",
- " valid_batch_size,\n",
- " windows_batch_size,\n",
- " inference_windows_batch_size,\n",
- " start_padding_enabled,\n",
- " step_size=1,\n",
- " num_lr_decays=0,\n",
- " early_stop_patience_steps=-1,\n",
- " scaler_type='identity',\n",
- " futr_exog_list=None,\n",
- " hist_exog_list=None,\n",
- " stat_exog_list=None,\n",
- " exclude_insample_y=False,\n",
- " drop_last_loader=False,\n",
- " random_seed=1,\n",
- " alias=None,\n",
- " optimizer=None,\n",
- " optimizer_kwargs=None,\n",
- " lr_scheduler=None,\n",
- " lr_scheduler_kwargs=None,\n",
- " dataloader_kwargs=None,\n",
- " **trainer_kwargs):\n",
- " super().__init__(\n",
- " random_seed=random_seed,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " optimizer=optimizer,\n",
- " optimizer_kwargs=optimizer_kwargs,\n",
- " lr_scheduler=lr_scheduler,\n",
- " lr_scheduler_kwargs=lr_scheduler_kwargs,\n",
- " futr_exog_list=futr_exog_list,\n",
- " hist_exog_list=hist_exog_list,\n",
- " stat_exog_list=stat_exog_list,\n",
- " max_steps=max_steps,\n",
- " early_stop_patience_steps=early_stop_patience_steps, \n",
- " **trainer_kwargs,\n",
- " )\n",
- "\n",
- " # Padder to complete train windows, \n",
- " # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]\n",
- " self.h = h\n",
- " self.input_size = input_size\n",
- " self.windows_batch_size = windows_batch_size\n",
- " self.start_padding_enabled = start_padding_enabled\n",
- " if start_padding_enabled:\n",
- " self.padder_train = nn.ConstantPad1d(padding=(self.input_size-1, self.h), value=0.0)\n",
- " else:\n",
- " self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
- "\n",
- " # Batch sizes\n",
- " self.batch_size = batch_size\n",
- " if valid_batch_size is None:\n",
- " self.valid_batch_size = batch_size\n",
- " else:\n",
- " self.valid_batch_size = valid_batch_size\n",
- " if inference_windows_batch_size is None:\n",
- " self.inference_windows_batch_size = windows_batch_size\n",
- " else:\n",
- " self.inference_windows_batch_size = inference_windows_batch_size\n",
- "\n",
- " # Optimization \n",
- " self.learning_rate = learning_rate\n",
- " self.max_steps = max_steps\n",
- " self.num_lr_decays = num_lr_decays\n",
- " self.lr_decay_steps = (\n",
- " max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7\n",
- " )\n",
- " self.early_stop_patience_steps = early_stop_patience_steps\n",
- " self.val_check_steps = val_check_steps\n",
- " self.windows_batch_size = windows_batch_size\n",
- " self.step_size = step_size\n",
- " \n",
- " self.exclude_insample_y = exclude_insample_y\n",
- "\n",
- " # Scaler\n",
- " self.scaler = TemporalNorm(\n",
- " scaler_type=scaler_type,\n",
- " dim=1, # Time dimension is 1.\n",
- " num_features=1+len(self.hist_exog_list)+len(self.futr_exog_list)\n",
- " )\n",
- "\n",
- " # Fit arguments\n",
- " self.val_size = 0\n",
- " self.test_size = 0\n",
- "\n",
- " # Model state\n",
- " self.decompose_forecast = False\n",
- "\n",
- " # DataModule arguments\n",
- " self.dataloader_kwargs = dataloader_kwargs\n",
- " self.drop_last_loader = drop_last_loader\n",
- " # used by on_validation_epoch_end hook\n",
- " self.validation_step_outputs = []\n",
- " self.alias = alias\n",
- "\n",
- " def _create_windows(self, batch, step, w_idxs=None):\n",
- " # Parse common data\n",
- " window_size = self.input_size + self.h\n",
- " temporal_cols = batch['temporal_cols']\n",
- " temporal = batch['temporal']\n",
- "\n",
- " if step == 'train':\n",
- " if self.val_size + self.test_size > 0:\n",
- " cutoff = -self.val_size - self.test_size\n",
- " temporal = temporal[:, :, :cutoff]\n",
- "\n",
- " temporal = self.padder_train(temporal)\n",
- " if temporal.shape[-1] < window_size:\n",
- " raise Exception('Time series is too short for training, consider setting a smaller input size or set start_padding_enabled=True')\n",
- " windows = temporal.unfold(dimension=-1, \n",
- " size=window_size, \n",
- " step=self.step_size)\n",
- "\n",
- " # [B, C, Ws, L+H] 0, 1, 2, 3\n",
- " # -> [B * Ws, L+H, C] 0, 2, 3, 1\n",
- " windows_per_serie = windows.shape[2]\n",
- " windows = windows.permute(0, 2, 3, 1).contiguous()\n",
- " windows = windows.reshape(-1, window_size, len(temporal_cols))\n",
- "\n",
- " # Sample and Available conditions\n",
- " available_idx = temporal_cols.get_loc('available_mask')\n",
- " available_condition = windows[:, :self.input_size, available_idx]\n",
- " available_condition = torch.sum(available_condition, axis=1)\n",
- " final_condition = (available_condition > 0)\n",
- " if self.h > 0:\n",
- " sample_condition = windows[:, self.input_size:, available_idx]\n",
- " sample_condition = torch.sum(sample_condition, axis=1)\n",
- " final_condition = (sample_condition > 0) & (available_condition > 0)\n",
- " windows = windows[final_condition]\n",
- "\n",
- " # Parse Static data to match windows\n",
- " # [B, S_in] -> [B, Ws, S_in] -> [B*Ws, S_in]\n",
- " static = batch.get('static', None)\n",
- " static_cols=batch.get('static_cols', None)\n",
- " if static is not None:\n",
- " static = torch.repeat_interleave(static, \n",
- " repeats=windows_per_serie, dim=0)\n",
- " static = static[final_condition]\n",
- "\n",
- " # Protection of empty windows\n",
- " if final_condition.sum() == 0:\n",
- " raise Exception('No windows available for training')\n",
- "\n",
- " # Sample windows\n",
- " n_windows = len(windows)\n",
- " if self.windows_batch_size is not None:\n",
- " w_idxs = np.random.choice(n_windows, \n",
- " size=self.windows_batch_size,\n",
- " replace=(n_windows < self.windows_batch_size))\n",
- " windows = windows[w_idxs]\n",
- " \n",
- " if static is not None:\n",
- " static = static[w_idxs]\n",
- "\n",
- " # think about interaction available * sample mask\n",
- " # [B, C, Ws, L+H]\n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=static,\n",
- " static_cols=static_cols)\n",
- " return windows_batch\n",
- "\n",
- " elif step in ['predict', 'val']:\n",
- "\n",
- " if step == 'predict':\n",
- " initial_input = temporal.shape[-1] - self.test_size\n",
- " if initial_input <= self.input_size: # There is not enough data to predict first timestamp\n",
- " padder_left = nn.ConstantPad1d(padding=(self.input_size-initial_input, 0), value=0.0)\n",
- " temporal = padder_left(temporal)\n",
- " predict_step_size = self.predict_step_size\n",
- " cutoff = - self.input_size - self.test_size\n",
- " temporal = temporal[:, :, cutoff:]\n",
- "\n",
- " elif step == 'val':\n",
- " predict_step_size = self.step_size\n",
- " cutoff = -self.input_size - self.val_size - self.test_size\n",
- " if self.test_size > 0:\n",
- " temporal = batch['temporal'][:, :, cutoff:-self.test_size]\n",
- " else:\n",
- " temporal = batch['temporal'][:, :, cutoff:]\n",
- " if temporal.shape[-1] < window_size:\n",
- " initial_input = temporal.shape[-1] - self.val_size\n",
- " padder_left = nn.ConstantPad1d(padding=(self.input_size-initial_input, 0), value=0.0)\n",
- " temporal = padder_left(temporal)\n",
- "\n",
- " if (step=='predict') and (self.test_size==0) and (len(self.futr_exog_list)==0):\n",
- " padder_right = nn.ConstantPad1d(padding=(0, self.h), value=0.0)\n",
- " temporal = padder_right(temporal)\n",
- "\n",
- " windows = temporal.unfold(dimension=-1,\n",
- " size=window_size,\n",
- " step=predict_step_size)\n",
- "\n",
- " # [batch, channels, windows, window_size] 0, 1, 2, 3\n",
- " # -> [batch * windows, window_size, channels] 0, 2, 3, 1\n",
- " windows_per_serie = windows.shape[2]\n",
- " windows = windows.permute(0, 2, 3, 1).contiguous()\n",
- " windows = windows.reshape(-1, window_size, len(temporal_cols))\n",
- "\n",
- " static = batch.get('static', None)\n",
- " static_cols=batch.get('static_cols', None)\n",
- " if static is not None:\n",
- " static = torch.repeat_interleave(static, \n",
- " repeats=windows_per_serie, dim=0)\n",
- " \n",
- " # Sample windows for batched prediction\n",
- " if w_idxs is not None:\n",
- " windows = windows[w_idxs]\n",
- " if static is not None:\n",
- " static = static[w_idxs]\n",
- " \n",
- " windows_batch = dict(temporal=windows,\n",
- " temporal_cols=temporal_cols,\n",
- " static=static,\n",
- " static_cols=static_cols)\n",
- " return windows_batch\n",
- " else:\n",
- " raise ValueError(f'Unknown step {step}')\n",
- "\n",
- " def _normalization(self, windows, y_idx):\n",
- " # windows are already filtered by train/validation/test\n",
- " # from the `create_windows_method` nor leakage risk\n",
- " temporal = windows['temporal'] # B, L+H, C\n",
- " temporal_cols = windows['temporal_cols'].copy() # B, L+H, C\n",
- "\n",
- " # To avoid leakage uses only the lags\n",
- " #temporal_data_cols = temporal_cols.drop('available_mask').tolist()\n",
- " temporal_data_cols = self._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- " temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)\n",
- " temporal_idxs = np.append(y_idx, temporal_idxs)\n",
- " temporal_data = temporal[:, :, temporal_idxs]\n",
- " temporal_mask = temporal[:, :, temporal_cols.get_loc('available_mask')].clone()\n",
- " if self.h > 0:\n",
- " temporal_mask[:, -self.h:] = 0.0\n",
- "\n",
- " # Normalize. self.scaler stores the shift and scale for inverse transform\n",
- " temporal_mask = temporal_mask.unsqueeze(-1) # Add channel dimension for scaler.transform.\n",
- " temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)\n",
- "\n",
- " # Replace values in windows dict\n",
- " temporal[:, :, temporal_idxs] = temporal_data\n",
- " windows['temporal'] = temporal\n",
- "\n",
- " return windows\n",
- "\n",
- " def _inv_normalization(self, y_hat, temporal_cols, y_idx):\n",
- " # Receives window predictions [B, H, output]\n",
- " # Broadcasts outputs and inverts normalization\n",
- "\n",
- " # Add C dimension\n",
- " if y_hat.ndim == 2:\n",
- " remove_dimension = True\n",
- " y_hat = y_hat.unsqueeze(-1)\n",
- " else:\n",
- " remove_dimension = False\n",
- "\n",
- " y_scale = self.scaler.x_scale[:, :, [y_idx]]\n",
- " y_loc = self.scaler.x_shift[:, :, [y_idx]]\n",
- "\n",
- " y_scale = torch.repeat_interleave(y_scale, repeats=y_hat.shape[-1], dim=-1).to(y_hat.device)\n",
- " y_loc = torch.repeat_interleave(y_loc, repeats=y_hat.shape[-1], dim=-1).to(y_hat.device)\n",
- "\n",
- " y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)\n",
- " y_loc = y_loc.to(y_hat.device)\n",
- " y_scale = y_scale.to(y_hat.device)\n",
- " \n",
- " if remove_dimension:\n",
- " y_hat = y_hat.squeeze(-1)\n",
- " y_loc = y_loc.squeeze(-1)\n",
- " y_scale = y_scale.squeeze(-1)\n",
- "\n",
- " return y_hat, y_loc, y_scale\n",
- "\n",
- " def _parse_windows(self, batch, windows):\n",
- " # Filter insample lags from outsample horizon\n",
- " y_idx = batch['y_idx']\n",
- " mask_idx = batch['temporal_cols'].get_loc('available_mask')\n",
- "\n",
- " insample_y = windows['temporal'][:, :self.input_size, y_idx]\n",
- " insample_mask = windows['temporal'][:, :self.input_size, mask_idx]\n",
- "\n",
- " # Declare additional information\n",
- " outsample_y = None\n",
- " outsample_mask = None\n",
- " hist_exog = None\n",
- " futr_exog = None\n",
- " stat_exog = None\n",
- "\n",
- " if self.h > 0:\n",
- " outsample_y = windows['temporal'][:, self.input_size:, y_idx]\n",
- " outsample_mask = windows['temporal'][:, self.input_size:, mask_idx]\n",
- "\n",
- " if len(self.hist_exog_list):\n",
- " hist_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.hist_exog_list)\n",
- " hist_exog = windows['temporal'][:, :self.input_size, hist_exog_idx]\n",
- "\n",
- " if len(self.futr_exog_list):\n",
- " futr_exog_idx = get_indexer_raise_missing(windows['temporal_cols'], self.futr_exog_list)\n",
- " futr_exog = windows['temporal'][:, :, futr_exog_idx]\n",
- "\n",
- " if len(self.stat_exog_list):\n",
- " static_idx = get_indexer_raise_missing(windows['static_cols'], self.stat_exog_list)\n",
- " stat_exog = windows['static'][:, static_idx]\n",
- "\n",
- " # TODO: think a better way of removing insample_y features\n",
- " if self.exclude_insample_y:\n",
- " insample_y = insample_y * 0\n",
- "\n",
- " return insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog\n",
- "\n",
- " def training_step(self, batch, batch_idx):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " windows = self._create_windows(batch, step='train')\n",
- " y_idx = batch['y_idx']\n",
- " original_outsample_y = torch.clone(windows['temporal'][:,-self.h:,y_idx])\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n",
- " insample_mask=insample_mask, # [Ws, L]\n",
- " futr_exog=futr_exog, # [Ws, L + h, F]\n",
- " hist_exog=hist_exog, # [Ws, L, X]\n",
- " stat_exog=stat_exog) # [Ws, S]\n",
- "\n",
- " # Model Predictions\n",
- " output = self(windows_batch)\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " outsample_y = original_outsample_y\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- "\n",
- " if torch.isnan(loss):\n",
- " print('Model Parameters', self.hparams)\n",
- " print('insample_y', torch.isnan(insample_y).sum())\n",
- " print('outsample_y', torch.isnan(outsample_y).sum())\n",
- " print('output', torch.isnan(output).sum())\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'train_loss',\n",
- " loss.detach().item(),\n",
- " batch_size=outsample_y.size(0),\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.train_trajectories.append((self.global_step, loss.detach().item()))\n",
- " return loss\n",
- "\n",
- " def _compute_valid_loss(self, outsample_y, output, outsample_mask, temporal_cols, y_idx):\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=outsample_y,\n",
- " temporal_cols=temporal_cols,\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output, loc=y_loc, scale=y_scale)\n",
- " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
- "\n",
- " if str(type(self.valid_loss)) in\\\n",
- " [\"\", \"\"]:\n",
- " output = quants\n",
- " elif str(type(self.valid_loss)) in [\"\"]:\n",
- " output = torch.unsqueeze(sample_mean, dim=-1) # [N,H,1] -> [N,H]\n",
- "\n",
- " # Validation Loss evaluation\n",
- " if self.valid_loss.is_distribution_output:\n",
- " valid_loss = self.valid_loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)\n",
- " else:\n",
- " output, _, _ = self._inv_normalization(y_hat=output,\n",
- " temporal_cols=temporal_cols,\n",
- " y_idx=y_idx)\n",
- " valid_loss = self.valid_loss(y=outsample_y, y_hat=output, mask=outsample_mask)\n",
- " return valid_loss\n",
- " \n",
- " def validation_step(self, batch, batch_idx):\n",
- " if self.val_size == 0:\n",
- " return np.nan\n",
- "\n",
- " # TODO: Hack to compute number of windows\n",
- " windows = self._create_windows(batch, step='val')\n",
- " n_windows = len(windows['temporal'])\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Number of windows in batch\n",
- " windows_batch_size = self.inference_windows_batch_size\n",
- " if windows_batch_size < 0:\n",
- " windows_batch_size = n_windows\n",
- " n_batches = int(np.ceil(n_windows/windows_batch_size))\n",
- "\n",
- " valid_losses = []\n",
- " batch_sizes = []\n",
- " for i in range(n_batches):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " w_idxs = np.arange(i*windows_batch_size, \n",
- " min((i+1)*windows_batch_size, n_windows))\n",
- " windows = self._create_windows(batch, step='val', w_idxs=w_idxs)\n",
- " original_outsample_y = torch.clone(windows['temporal'][:,-self.h:,y_idx])\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n",
- " insample_mask=insample_mask, # [Ws, L]\n",
- " futr_exog=futr_exog, # [Ws, L + h, F]\n",
- " hist_exog=hist_exog, # [Ws, L, X]\n",
- " stat_exog=stat_exog) # [Ws, S]\n",
- " \n",
- " # Model Predictions\n",
- " output_batch = self(windows_batch)\n",
- " valid_loss_batch = self._compute_valid_loss(outsample_y=original_outsample_y,\n",
- " output=output_batch, outsample_mask=outsample_mask,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=batch['y_idx'])\n",
- " valid_losses.append(valid_loss_batch)\n",
- " batch_sizes.append(len(output_batch))\n",
- " \n",
- " valid_loss = torch.stack(valid_losses)\n",
- " batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)\n",
- " batch_size = torch.sum(batch_sizes)\n",
- " valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size\n",
- "\n",
- " if torch.isnan(valid_loss):\n",
- " raise Exception('Loss is NaN, training stopped.')\n",
- "\n",
- " self.log(\n",
- " 'valid_loss',\n",
- " valid_loss.detach().item(),\n",
- " batch_size=batch_size,\n",
- " prog_bar=True,\n",
- " on_epoch=True,\n",
- " )\n",
- " self.validation_step_outputs.append(valid_loss)\n",
- " return valid_loss\n",
- "\n",
- " def predict_step(self, batch, batch_idx):\n",
- "\n",
- " # TODO: Hack to compute number of windows\n",
- " windows = self._create_windows(batch, step='predict')\n",
- " n_windows = len(windows['temporal'])\n",
- " y_idx = batch['y_idx']\n",
- "\n",
- " # Number of windows in batch\n",
- " windows_batch_size = self.inference_windows_batch_size\n",
- " if windows_batch_size < 0:\n",
- " windows_batch_size = n_windows\n",
- " n_batches = int(np.ceil(n_windows/windows_batch_size))\n",
- "\n",
- " y_hats = []\n",
- " for i in range(n_batches):\n",
- " # Create and normalize windows [Ws, L+H, C]\n",
- " w_idxs = np.arange(i*windows_batch_size, \n",
- " min((i+1)*windows_batch_size, n_windows))\n",
- " windows = self._create_windows(batch, step='predict', w_idxs=w_idxs)\n",
- " windows = self._normalization(windows=windows, y_idx=y_idx)\n",
- "\n",
- " # Parse windows\n",
- " insample_y, insample_mask, _, _, \\\n",
- " hist_exog, futr_exog, stat_exog = self._parse_windows(batch, windows)\n",
- "\n",
- " windows_batch = dict(insample_y=insample_y, # [Ws, L]\n",
- " insample_mask=insample_mask, # [Ws, L]\n",
- " futr_exog=futr_exog, # [Ws, L + h, F]\n",
- " hist_exog=hist_exog, # [Ws, L, X]\n",
- " stat_exog=stat_exog) # [Ws, S] \n",
- "\n",
- " # Model Predictions\n",
- " output_batch = self(windows_batch)\n",
- " # Inverse normalization and sampling\n",
- " if self.loss.is_distribution_output:\n",
- " _, y_loc, y_scale = self._inv_normalization(y_hat=torch.empty(size=(insample_y.shape[0], self.h),\n",
- " dtype=output_batch[0].dtype,\n",
- " device=output_batch[0].device),\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " distr_args = self.loss.scale_decouple(output=output_batch, loc=y_loc, scale=y_scale)\n",
- " _, sample_mean, quants = self.loss.sample(distr_args=distr_args)\n",
- " y_hat = torch.concat((sample_mean, quants), axis=2)\n",
- "\n",
- " if self.loss.return_params:\n",
- " distr_args = torch.stack(distr_args, dim=-1)\n",
- " distr_args = torch.reshape(distr_args, (len(windows[\"temporal\"]), self.h, -1))\n",
- " y_hat = torch.concat((y_hat, distr_args), axis=2)\n",
- " else:\n",
- " y_hat, _, _ = self._inv_normalization(y_hat=output_batch,\n",
- " temporal_cols=batch['temporal_cols'],\n",
- " y_idx=y_idx)\n",
- " y_hats.append(y_hat)\n",
- " y_hat = torch.cat(y_hats, dim=0)\n",
- " return y_hat\n",
- " \n",
- " def fit(self, dataset, val_size=0, test_size=0, random_seed=None, distributed_config=None):\n",
- " \"\"\" Fit.\n",
- "\n",
- " The `fit` method, optimizes the neural network's weights using the\n",
- " initialization parameters (`learning_rate`, `windows_batch_size`, ...)\n",
- " and the `loss` function as defined during the initialization. \n",
- " Within `fit` we use a PyTorch Lightning `Trainer` that\n",
- " inherits the initialization's `self.trainer_kwargs`, to customize\n",
- " its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).\n",
- "\n",
- " The method is designed to be compatible with SKLearn-like classes\n",
- " and in particular to be compatible with the StatsForecast library.\n",
- "\n",
- " By default the `model` is not saving training checkpoints to protect \n",
- " disk memory, to get them change `enable_checkpointing=True` in `__init__`.\n",
- "\n",
- " **Parameters:** \n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
- " `val_size`: int, validation size for temporal cross-validation. \n",
- " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s. \n",
- " `test_size`: int, test size for temporal cross-validation. \n",
- " \"\"\"\n",
- " return self._fit(\n",
- " dataset=dataset,\n",
- " batch_size=self.batch_size,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " val_size=val_size,\n",
- " test_size=test_size,\n",
- " random_seed=random_seed,\n",
- " distributed_config=distributed_config,\n",
- " )\n",
- "\n",
- " def predict(self, dataset, test_size=None, step_size=1,\n",
- " random_seed=None, **data_module_kwargs):\n",
- " \"\"\" Predict.\n",
- "\n",
- " Neural network prediction with PL's `Trainer` execution of `predict_step`.\n",
- "\n",
- " **Parameters:** \n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
- " `test_size`: int=None, test size for temporal cross-validation. \n",
- " `step_size`: int=1, Step size between each window. \n",
- " `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s. \n",
- " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
- " \"\"\"\n",
- " self._check_exog(dataset)\n",
- " self._restart_seed(random_seed)\n",
- " data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)\n",
- "\n",
- " self.predict_step_size = step_size\n",
- " self.decompose_forecast = False\n",
- " datamodule = TimeSeriesDataModule(dataset=dataset,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " **data_module_kwargs)\n",
- "\n",
- " # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.\n",
- " pred_trainer_kwargs = self.trainer_kwargs.copy()\n",
- " if (pred_trainer_kwargs.get('accelerator', None) == \"gpu\") and (torch.cuda.device_count() > 1):\n",
- " pred_trainer_kwargs['devices'] = [0]\n",
- "\n",
- " trainer = pl.Trainer(**pred_trainer_kwargs)\n",
- " fcsts = trainer.predict(self, datamodule=datamodule) \n",
- " fcsts = torch.vstack(fcsts)\n",
- " fcsts = tensor_to_numpy(fcsts).flatten()\n",
- " fcsts = fcsts.reshape(-1, len(self.loss.output_names))\n",
- " return fcsts\n",
- "\n",
- " def decompose(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):\n",
- " \"\"\" Decompose Predictions.\n",
- "\n",
- " Decompose the predictions through the network's layers.\n",
- " Available methods are `ESRNN`, `NHITS`, `NBEATS`, and `NBEATSx`.\n",
- "\n",
- " **Parameters:** \n",
- " `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation here](https://nixtla.github.io/neuralforecast/tsdataset.html). \n",
- " `step_size`: int=1, step size between each window of temporal data. \n",
- " `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).\n",
- " \"\"\"\n",
- " # Restart random seed\n",
- " if random_seed is None:\n",
- " random_seed = self.random_seed\n",
- " torch.manual_seed(random_seed)\n",
- " data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)\n",
- "\n",
- " self.predict_step_size = step_size\n",
- " self.decompose_forecast = True\n",
- " datamodule = TimeSeriesDataModule(dataset=dataset,\n",
- " valid_batch_size=self.valid_batch_size,\n",
- " **data_module_kwargs)\n",
- " trainer = pl.Trainer(**self.trainer_kwargs)\n",
- " fcsts = trainer.predict(self, datamodule=datamodule)\n",
- " self.decompose_forecast = False # Default decomposition back to false\n",
- " fcsts = torch.vstack(fcsts)\n",
- " return tensor_to_numpy(fcsts)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "1712ea15",
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseWindows, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "48063f70",
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseWindows.fit, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "75529be6",
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseWindows.predict, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a1f8315d",
- "metadata": {},
- "outputs": [],
- "source": [
- "show_doc(BaseWindows.decompose, title_level=3)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8927f2e5-f376-4c99-bb8f-8cbb73efe01e",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.utils import AirPassengersDF\n",
- "from neuralforecast.tsdataset import TimeSeriesDataset, TimeSeriesDataModule"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "61490e69-f014-4087-83c5-540d5bd7d458",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# add h=0,1 unit test for _parse_windows \n",
- "# Declare batch\n",
- "AirPassengersDF['x'] = np.array(len(AirPassengersDF))\n",
- "AirPassengersDF['x2'] = np.array(len(AirPassengersDF)) * 2\n",
- "dataset, indices, dates, ds = TimeSeriesDataset.from_df(df=AirPassengersDF)\n",
- "data = TimeSeriesDataModule(dataset=dataset, batch_size=1, drop_last=True)\n",
- "\n",
- "train_loader = data.train_dataloader()\n",
- "batch = next(iter(train_loader))\n",
- "\n",
- "# Instantiate BaseWindows to test _parse_windows method h in [0,1]\n",
- "for h in [0, 1]:\n",
- " basewindows = BaseWindows(h=h,\n",
- " input_size=len(AirPassengersDF)-h,\n",
- " hist_exog_list=['x'],\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " learning_rate=0.001,\n",
- " max_steps=1,\n",
- " val_check_steps=0,\n",
- " batch_size=1,\n",
- " valid_batch_size=1,\n",
- " windows_batch_size=1,\n",
- " inference_windows_batch_size=1,\n",
- " start_padding_enabled=False)\n",
- "\n",
- " windows = basewindows._create_windows(batch, step='train')\n",
- " original_outsample_y = torch.clone(windows['temporal'][:,-basewindows.h:,0])\n",
- " windows = basewindows._normalization(windows=windows, y_idx=0)\n",
- "\n",
- " insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = basewindows._parse_windows(batch, windows)\n",
- "\n",
- " # Check equality of parsed and original insample_y\n",
- " parsed_insample_y = insample_y.numpy().flatten()\n",
- " original_insample_y = AirPassengersDF.y.values\n",
- " test_eq(parsed_insample_y, original_insample_y[:basewindows.input_size])\n",
- "\n",
- " # Check equality of parsed and original hist_exog\n",
- " parsed_hist_exog = hist_exog.numpy().flatten()\n",
- " original_hist_exog = AirPassengersDF.x.values\n",
- " test_eq(parsed_hist_exog, original_hist_exog[:basewindows.input_size])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "86ab58a9",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# Test that start_padding_enabled=True solves the problem of short series\n",
- "h = 12\n",
- "basewindows = BaseWindows(h=h,\n",
- " input_size=500,\n",
- " hist_exog_list=['x'],\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " learning_rate=0.001,\n",
- " max_steps=1,\n",
- " val_check_steps=0,\n",
- " batch_size=1,\n",
- " valid_batch_size=1,\n",
- " windows_batch_size=10,\n",
- " inference_windows_batch_size=2,\n",
- " start_padding_enabled=True)\n",
- "\n",
- "windows = basewindows._create_windows(batch, step='train')\n",
- "windows = basewindows._normalization(windows=windows, y_idx=0)\n",
- "insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = basewindows._parse_windows(batch, windows)\n",
- "\n",
- "basewindows.val_size = 12\n",
- "windows = basewindows._create_windows(batch, step='val')\n",
- "windows = basewindows._normalization(windows=windows, y_idx=0)\n",
- "insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = basewindows._parse_windows(batch, windows)\n",
- "\n",
- "basewindows.test_size = 12\n",
- "basewindows.predict_step_size = 1\n",
- "windows = basewindows._create_windows(batch, step='predict')\n",
- "windows = basewindows._normalization(windows=windows, y_idx=0)\n",
- "insample_y, insample_mask, outsample_y, outsample_mask, \\\n",
- " hist_exog, futr_exog, stat_exog = basewindows._parse_windows(batch, windows)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "54d2e850",
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "\n",
- "# Test that hist_exog_list and futr_exog_list correctly filter data.\n",
- "# that is sent to scaler.\n",
- "basewindows = BaseWindows(h=12,\n",
- " input_size=500,\n",
- " hist_exog_list=['x', 'x2'],\n",
- " futr_exog_list=['x'],\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " learning_rate=0.001,\n",
- " max_steps=1,\n",
- " val_check_steps=0,\n",
- " batch_size=1,\n",
- " valid_batch_size=1,\n",
- " windows_batch_size=10,\n",
- " inference_windows_batch_size=2,\n",
- " start_padding_enabled=True)\n",
- "\n",
- "windows = basewindows._create_windows(batch, step='train')\n",
- "\n",
- "temporal_cols = windows['temporal_cols'].copy() # B, L+H, C\n",
- "temporal_data_cols = basewindows._get_temporal_exogenous_cols(temporal_cols=temporal_cols)\n",
- "\n",
- "test_eq(set(temporal_data_cols), set(['x', 'x2']))\n",
- "test_eq(windows['temporal'].shape, torch.Size([10,500+12,len(['y', 'x', 'x2', 'available_mask'])]))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bf493ff9",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "python3",
- "language": "python",
- "name": "python3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/nbs/common.model_checks.ipynb b/nbs/common.model_checks.ipynb
new file mode 100644
index 000000000..d618c5c33
--- /dev/null
+++ b/nbs/common.model_checks.ipynb
@@ -0,0 +1,248 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| default_exp common._model_checks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "%load_ext autoreload\n",
+ "%autoreload 2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 1. Checks for models"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This file provides a set of unit tests for all models"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "import pandas as pd\n",
+ "import neuralforecast.losses.pytorch as losses\n",
+ "\n",
+ "from neuralforecast import NeuralForecast\n",
+ "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, generate_series"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "seed = 0\n",
+ "test_size = 14\n",
+ "FREQ = \"D\"\n",
+ "\n",
+ "# 1 series, no exogenous\n",
+ "N_SERIES_1 = 1\n",
+ "df = generate_series(n_series=N_SERIES_1, seed=seed, freq=FREQ, equal_ends=True)\n",
+ "max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)\n",
+ "Y_TRAIN_DF_1 = df[df.ds < max_ds]\n",
+ "Y_TEST_DF_1 = df[df.ds >= max_ds]\n",
+ "\n",
+ "# 5 series, no exogenous\n",
+ "N_SERIES_2 = 5\n",
+ "df = generate_series(n_series=N_SERIES_2, seed=seed, freq=FREQ, equal_ends=True)\n",
+ "max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)\n",
+ "Y_TRAIN_DF_2 = df[df.ds < max_ds]\n",
+ "Y_TEST_DF_2 = df[df.ds >= max_ds]\n",
+ "\n",
+ "# 1 series, with static and temporal exogenous\n",
+ "N_SERIES_3 = 1\n",
+ "df, STATIC_3 = generate_series(n_series=N_SERIES_3, n_static_features=2, \n",
+ " n_temporal_features=2, seed=seed, freq=FREQ, equal_ends=True)\n",
+ "max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)\n",
+ "Y_TRAIN_DF_3 = df[df.ds < max_ds]\n",
+ "Y_TEST_DF_3 = df[df.ds >= max_ds]\n",
+ "\n",
+ "# 5 series, with static and temporal exogenous\n",
+ "N_SERIES_4 = 5\n",
+ "df, STATIC_4 = generate_series(n_series=N_SERIES_4, n_static_features=2, \n",
+ " n_temporal_features=2, seed=seed, freq=FREQ, equal_ends=True)\n",
+ "max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)\n",
+ "Y_TRAIN_DF_4 = df[df.ds < max_ds]\n",
+ "Y_TEST_DF_4 = df[df.ds >= max_ds]\n",
+ "\n",
+ "# Generic test for a given config for a model\n",
+ "def _run_model_tests(model_class, config):\n",
+ " if model_class.RECURRENT:\n",
+ " config[\"inference_input_size\"] = config[\"input_size\"]\n",
+ "\n",
+ " # DF_1\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = N_SERIES_1\n",
+ " if isinstance(config[\"loss\"], losses.relMSE):\n",
+ " config[\"loss\"].y_train = Y_TRAIN_DF_1[\"y\"].values \n",
+ " if isinstance(config[\"valid_loss\"], losses.relMSE):\n",
+ " config[\"valid_loss\"].y_train = Y_TRAIN_DF_1[\"y\"].values \n",
+ "\n",
+ " model = model_class(**config)\n",
+ " fcst = NeuralForecast(models=[model], freq=FREQ)\n",
+ " fcst.fit(df=Y_TRAIN_DF_1, val_size=24)\n",
+ " _ = fcst.predict(futr_df=Y_TEST_DF_1)\n",
+ " # DF_2\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = N_SERIES_2\n",
+ " if isinstance(config[\"loss\"], losses.relMSE):\n",
+ " config[\"loss\"].y_train = Y_TRAIN_DF_2[\"y\"].values \n",
+ " if isinstance(config[\"valid_loss\"], losses.relMSE):\n",
+ " config[\"valid_loss\"].y_train = Y_TRAIN_DF_2[\"y\"].values\n",
+ " model = model_class(**config)\n",
+ " fcst = NeuralForecast(models=[model], freq=FREQ)\n",
+ " fcst.fit(df=Y_TRAIN_DF_2, val_size=24)\n",
+ " _ = fcst.predict(futr_df=Y_TEST_DF_2)\n",
+ "\n",
+ " if model.EXOGENOUS_STAT and model.EXOGENOUS_FUTR:\n",
+ " # DF_3\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = N_SERIES_3\n",
+ " if isinstance(config[\"loss\"], losses.relMSE):\n",
+ " config[\"loss\"].y_train = Y_TRAIN_DF_3[\"y\"].values \n",
+ " if isinstance(config[\"valid_loss\"], losses.relMSE):\n",
+ " config[\"valid_loss\"].y_train = Y_TRAIN_DF_3[\"y\"].values\n",
+ " model = model_class(**config)\n",
+ " fcst = NeuralForecast(models=[model], freq=FREQ)\n",
+ " fcst.fit(df=Y_TRAIN_DF_3, static_df=STATIC_3, val_size=24)\n",
+ " _ = fcst.predict(futr_df=Y_TEST_DF_3)\n",
+ "\n",
+ " # DF_4\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = N_SERIES_4\n",
+ " if isinstance(config[\"loss\"], losses.relMSE):\n",
+ " config[\"loss\"].y_train = Y_TRAIN_DF_4[\"y\"].values \n",
+ " if isinstance(config[\"valid_loss\"], losses.relMSE):\n",
+ " config[\"valid_loss\"].y_train = Y_TRAIN_DF_4[\"y\"].values \n",
+ " model = model_class(**config)\n",
+ " fcst = NeuralForecast(models=[model], freq=FREQ)\n",
+ " fcst.fit(df=Y_TRAIN_DF_4, static_df=STATIC_4, val_size=24)\n",
+ " _ = fcst.predict(futr_df=Y_TEST_DF_4) \n",
+ "\n",
+ "# Tests a model against every loss function\n",
+ "def check_loss_functions(model_class):\n",
+ " loss_list = [losses.MAE(), losses.MSE(), losses.RMSE(), losses.MAPE(), losses.SMAPE(), losses.MASE(seasonality=7), \n",
+ " losses.QuantileLoss(q=0.5), losses.MQLoss(), losses.IQLoss(), losses.DistributionLoss(\"Normal\"), \n",
+ " losses.DistributionLoss(\"StudentT\"), losses.DistributionLoss(\"Poisson\"), losses.DistributionLoss(\"NegativeBinomial\"), \n",
+ " losses.DistributionLoss(\"Tweedie\", rho=1.5), losses.DistributionLoss(\"ISQF\"), losses.PMM(), losses.PMM(weighted=True), \n",
+ " losses.GMM(), losses.GMM(weighted=True), losses.NBMM(), losses.NBMM(weighted=True), losses.HuberLoss(), \n",
+ " losses.TukeyLoss(), losses.HuberQLoss(q=0.5), losses.HuberMQLoss()]\n",
+ " for loss in loss_list:\n",
+ " test_name = f\"{model_class.__name__}: checking {loss._get_name()}\"\n",
+ " print(f\"{test_name}\")\n",
+ " config = {'max_steps': 2,\n",
+ " 'h': 7,\n",
+ " 'input_size': 28,\n",
+ " 'loss': loss,\n",
+ " 'valid_loss': None,\n",
+ " 'enable_progress_bar': False,\n",
+ " 'enable_model_summary': False,\n",
+ " 'val_check_steps': 2} \n",
+ " try:\n",
+ " _run_model_tests(model_class, config) \n",
+ " except RuntimeError:\n",
+ " raise Exception(f\"{test_name} failed.\")\n",
+ " except Exception:\n",
+ " print(f\"{test_name} skipped on raised Exception.\")\n",
+ " pass\n",
+ "\n",
+ "# Tests a model against the AirPassengers dataset\n",
+ "def check_airpassengers(model_class):\n",
+ " print(f\"{model_class.__name__}: checking forecast AirPassengers dataset\")\n",
+ " Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
+ "\n",
+ " config = {'max_steps': 2,\n",
+ " 'h': 12,\n",
+ " 'input_size': 24,\n",
+ " 'enable_progress_bar': False,\n",
+ " 'enable_model_summary': False,\n",
+ " 'val_check_steps': 2,\n",
+ " }\n",
+ "\n",
+ " if model_class.MULTIVARIATE:\n",
+ " config[\"n_series\"] = Y_train_df[\"unique_id\"].nunique()\n",
+ " # Normal forecast\n",
+ " fcst = NeuralForecast(models=[model_class(**config)], freq='M')\n",
+ " fcst.fit(df=Y_train_df, static_df=AirPassengersStatic)\n",
+ " _ = fcst.predict(futr_df=Y_test_df) \n",
+ "\n",
+ " # Cross-validation\n",
+ " fcst = NeuralForecast(models=[model_class(**config)], freq='M')\n",
+ " _ = fcst.cross_validation(df=AirPassengersPanel, static_df=AirPassengersStatic, n_windows=2, step_size=12)\n",
+ "\n",
+ "# Add unit test functions to this function\n",
+ "def check_model(model_class, checks=[\"losses\", \"airpassengers\"]):\n",
+ " \"\"\"\n",
+ " Check model with various tests. Options for checks are: \n",
+ " \"losses\": test the model against all loss functions \n",
+ " \"airpassengers\": test the model against the airpassengers dataset for forecasting and cross-validation \n",
+ " \n",
+ " \"\"\"\n",
+ " if \"losses\" in checks:\n",
+ " check_loss_functions(model_class) \n",
+ " if \"airpassengers\" in checks:\n",
+ " try:\n",
+ " check_airpassengers(model_class) \n",
+ " except RuntimeError:\n",
+ " raise Exception(f\"{model_class.__name__}: AirPassengers forecast test failed.\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| eval: false\n",
+ "#| hide\n",
+ "# Run tests in this file. This is a slow test\n",
+ "import warnings\n",
+ "import logging\n",
+ "from neuralforecast.models import RNN, GRU, TCN, LSTM, DeepAR, DilatedRNN, BiTCN, MLP, NBEATS, NBEATSx, NHITS, DLinear, NLinear, TiDE, DeepNPTS, TFT, VanillaTransformer, Informer, Autoformer, FEDformer, TimesNet, iTransformer, KAN, RMoK, StemGNN, TSMixer, TSMixerx, MLPMultivariate, SOFTS, TimeMixer\n",
+ "\n",
+ "models = [RNN, GRU, TCN, LSTM, DeepAR, DilatedRNN, BiTCN, MLP, NBEATS, NBEATSx, NHITS, DLinear, NLinear, TiDE, DeepNPTS, TFT, VanillaTransformer, Informer, Autoformer, FEDformer, TimesNet, iTransformer, KAN, RMoK, StemGNN, TSMixer, TSMixerx, MLPMultivariate, SOFTS, TimeMixer]\n",
+ "\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " for model in models:\n",
+ " check_model(model, checks=[\"losses\"])"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "python3",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/nbs/common.modules.ipynb b/nbs/common.modules.ipynb
index f90e936da..403a2a5d6 100644
--- a/nbs/common.modules.ipynb
+++ b/nbs/common.modules.ipynb
@@ -691,6 +691,66 @@
" x = x + self.mean\n",
" return x"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "class RevINMultivariate(nn.Module):\n",
+ " \"\"\" \n",
+ " ReversibleInstanceNorm1d for Multivariate models\n",
+ " \"\"\" \n",
+ " def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):\n",
+ " super().__init__()\n",
+ " self.num_features = num_features\n",
+ " self.eps = eps\n",
+ " self.affine = affine\n",
+ " if self.affine:\n",
+ " self._init_params()\n",
+ "\n",
+ " def forward(self, x, mode: str):\n",
+ " if mode == 'norm':\n",
+ " x = self._normalize(x)\n",
+ " elif mode == 'denorm':\n",
+ " x = self._denormalize(x)\n",
+ " else:\n",
+ " raise NotImplementedError\n",
+ " return x\n",
+ "\n",
+ " def _init_params(self):\n",
+ " # initialize RevIN params: (C,)\n",
+ " self.affine_weight = nn.Parameter(torch.ones((1, 1, self.num_features)))\n",
+ " self.affine_bias = nn.Parameter(torch.zeros((1, 1, self.num_features)))\n",
+ "\n",
+ " def _normalize(self, x):\n",
+ " # Batch statistics\n",
+ " self.batch_mean = torch.mean(x, axis=1, keepdim=True).detach()\n",
+ " self.batch_std = torch.sqrt(torch.var(x, axis=1, keepdim=True, unbiased=False) + self.eps).detach()\n",
+ " \n",
+ " # Instance normalization\n",
+ " x = x - self.batch_mean\n",
+ " x = x / self.batch_std\n",
+ " \n",
+ " if self.affine:\n",
+ " x = x * self.affine_weight\n",
+ " x = x + self.affine_bias\n",
+ "\n",
+ " return x\n",
+ "\n",
+ " def _denormalize(self, x):\n",
+ " # Reverse the normalization\n",
+ " if self.affine:\n",
+ " x = x - self.affine_bias\n",
+ " x = x / self.affine_weight \n",
+ " \n",
+ " x = x * self.batch_std\n",
+ " x = x + self.batch_mean \n",
+ "\n",
+ " return x"
+ ]
}
],
"metadata": {
diff --git a/nbs/common.scalers.ipynb b/nbs/common.scalers.ipynb
index 9e6737c3c..f49714a6b 100644
--- a/nbs/common.scalers.ipynb
+++ b/nbs/common.scalers.ipynb
@@ -682,11 +682,11 @@
" def _init_params(self, num_features):\n",
" # Initialize RevIN scaler params to broadcast:\n",
" if self.dim==1: # [B,T,C] [1,1,C]\n",
- " self.revin_bias = nn.Parameter(torch.zeros(1,1,num_features))\n",
- " self.revin_weight = nn.Parameter(torch.ones(1,1,num_features))\n",
+ " self.revin_bias = nn.Parameter(torch.zeros(1, 1, num_features, 1))\n",
+ " self.revin_weight = nn.Parameter(torch.ones(1, 1, num_features, 1))\n",
" elif self.dim==-1: # [B,C,T] [1,C,1]\n",
- " self.revin_bias = nn.Parameter(torch.zeros(1,num_features,1))\n",
- " self.revin_weight = nn.Parameter(torch.ones(1,num_features,1))\n",
+ " self.revin_bias = nn.Parameter(torch.zeros(1, num_features, 1, 1))\n",
+ " self.revin_weight = nn.Parameter(torch.ones(1, num_features, 1, 1))\n",
"\n",
" #@torch.no_grad()\n",
" def transform(self, x, mask):\n",
@@ -863,8 +863,8 @@
"#| hide\n",
"# Validate scalers\n",
"for scaler_type in [None, 'identity', 'standard', 'robust', 'minmax', 'minmax1', 'invariant', 'revin']:\n",
- " x = 1.0*torch.tensor(np_x)\n",
- " mask = torch.tensor(np_mask)\n",
+ " x = 1.0*torch.tensor(np_x).unsqueeze(-1)\n",
+ " mask = torch.tensor(np_mask).unsqueeze(-1)\n",
" scaler = TemporalNorm(scaler_type=scaler_type, dim=1, num_features=np_x.shape[-1])\n",
" x_scaled = scaler.transform(x=x, mask=mask)\n",
" x_recovered = scaler.inverse_transform(x_scaled)\n",
@@ -987,14 +987,6 @@
"nf = NeuralForecast(models=[model], freq='MS')\n",
"Y_hat_df = nf.cross_validation(df=Y_df, val_size=12, n_windows=1)"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "b2f50bd8",
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
diff --git a/nbs/core.ipynb b/nbs/core.ipynb
index 5138b5e68..0ce17b6f7 100644
--- a/nbs/core.ipynb
+++ b/nbs/core.ipynb
@@ -84,6 +84,7 @@
"\n",
"from neuralforecast.common._base_model import DistributedConfig\n",
"from neuralforecast.compat import SparkDataFrame\n",
+ "from neuralforecast.losses.pytorch import IQLoss\n",
"from neuralforecast.tsdataset import _FilesDataset, TimeSeriesDataset, LocalFilesTimeSeriesDataset\n",
"from neuralforecast.models import (\n",
" GRU, LSTM, RNN, TCN, DeepAR, DilatedRNN,\n",
@@ -96,7 +97,7 @@
" TimeMixer, KAN, RMoK\n",
")\n",
"from neuralforecast.common._base_auto import BaseAuto, MockTrial\n",
- "from neuralforecast.utils import PredictionIntervals, get_prediction_interval_method"
+ "from neuralforecast.utils import PredictionIntervals, get_prediction_interval_method, level_to_quantiles, quantiles_to_level"
]
},
{
@@ -319,6 +320,7 @@
" # Flags and attributes\n",
" self._fitted = False\n",
" self._reset_models()\n",
+ " self._add_level = False\n",
"\n",
" def _scalers_fit_transform(self, dataset: TimeSeriesDataset) -> None:\n",
" self.scalers_ = {} \n",
@@ -707,13 +709,14 @@
" names: List[str] = []\n",
" count_names = {'model': 0}\n",
" for model in self.models:\n",
- " if add_level and model.loss.outputsize_multiplier > 1:\n",
- " continue\n",
- "\n",
" model_name = repr(model)\n",
" count_names[model_name] = count_names.get(model_name, -1) + 1\n",
" if count_names[model_name] > 0:\n",
" model_name += str(count_names[model_name])\n",
+ "\n",
+ " if add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)):\n",
+ " continue\n",
+ "\n",
" names.extend(model_name + n for n in model.loss.output_names)\n",
" return names\n",
"\n",
@@ -829,6 +832,7 @@
" verbose: bool = False,\n",
" engine = None,\n",
" level: Optional[List[Union[int, float]]] = None,\n",
+ " quantiles: Optional[List[float]] = None,\n",
" **data_kwargs\n",
" ):\n",
" \"\"\"Predict with core.NeuralForecast.\n",
@@ -850,6 +854,8 @@
" Distributed engine for inference. Only used if df is a spark dataframe or if fit was called on a spark dataframe.\n",
" level : list of ints or floats, optional (default=None)\n",
" Confidence levels between 0 and 100.\n",
+ " quantiles : list of floats, optional (default=None)\n",
+ " Alternative to level, target quantiles to predict.\n",
" data_kwargs : kwargs\n",
" Extra arguments to be passed to the dataset within each model.\n",
"\n",
@@ -864,6 +870,22 @@
"\n",
" if not self._fitted:\n",
" raise Exception(\"You must fit the model before predicting.\")\n",
+ " \n",
+ " quantiles_ = None\n",
+ " level_ = None\n",
+ " has_level = False \n",
+ " if level is not None:\n",
+ " has_level = True\n",
+ " if quantiles is not None:\n",
+ " raise ValueError(\"You can't set both level and quantiles.\")\n",
+ " level_ = sorted(list(set(level)))\n",
+ " quantiles_ = level_to_quantiles(level_)\n",
+ " \n",
+ " if quantiles is not None:\n",
+ " if level is not None:\n",
+ " raise ValueError(\"You can't set both level and quantiles.\") \n",
+ " quantiles_ = sorted(list(set(quantiles)))\n",
+ " level_ = quantiles_to_level(quantiles_)\n",
"\n",
" needed_futr_exog = self._get_needed_futr_exog()\n",
" if needed_futr_exog:\n",
@@ -912,8 +934,6 @@
" if verbose: print('Using stored dataset.')\n",
" \n",
"\n",
- " cols = self._get_model_names()\n",
- "\n",
" # Placeholder dataframe for predictions with unique_id and ds\n",
" fcsts_df = ufp.make_future_dataframe(\n",
" uids=uids,\n",
@@ -957,49 +977,20 @@
" )\n",
" self._scalers_transform(futr_dataset)\n",
" dataset = dataset.append(futr_dataset)\n",
- "\n",
- " col_idx = 0\n",
- " fcsts = np.full((self.h * len(uids), len(cols)), fill_value=np.nan, dtype=np.float32)\n",
- " for model in self.models:\n",
- " old_test_size = model.get_test_size()\n",
- " model.set_test_size(self.h) # To predict h steps ahead\n",
- " model_fcsts = model.predict(dataset=dataset, **data_kwargs)\n",
- " # Append predictions in memory placeholder\n",
- " output_length = len(model.loss.output_names)\n",
- " fcsts[:, col_idx : col_idx + output_length] = model_fcsts\n",
- " col_idx += output_length\n",
- " model.set_test_size(old_test_size) # Set back to original value\n",
+ " \n",
+ " fcsts, cols = self._generate_forecasts(dataset=dataset, uids=uids, quantiles_=quantiles_, level_=level_, has_level=has_level, **data_kwargs)\n",
+ " \n",
" if self.scalers_:\n",
" indptr = np.append(0, np.full(len(uids), self.h).cumsum())\n",
" fcsts = self._scalers_target_inverse_transform(fcsts, indptr)\n",
"\n",
" # Declare predictions pd.DataFrame\n",
- " cols = self._get_model_names() # Needed for IQLoss as column names may have changed during the call to .predict()\n",
" if isinstance(fcsts_df, pl_DataFrame):\n",
" fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))\n",
" else:\n",
" fcsts = pd.DataFrame(fcsts, columns=cols)\n",
" fcsts_df = ufp.horizontal_concat([fcsts_df, fcsts])\n",
"\n",
- " # add prediction intervals\n",
- " if level is not None:\n",
- " if self._cs_df is None or self.prediction_intervals is None:\n",
- " raise Exception('You must fit the model with prediction_intervals to use level.')\n",
- " else:\n",
- " level_ = sorted(level)\n",
- " model_names = self._get_model_names(add_level=True)\n",
- " prediction_interval_method = get_prediction_interval_method(self.prediction_intervals.method)\n",
- "\n",
- " fcsts_df = prediction_interval_method(\n",
- " fcsts_df,\n",
- " self._cs_df,\n",
- " model_names=list(model_names),\n",
- " level=level_,\n",
- " cs_n_windows=self.prediction_intervals.n_windows,\n",
- " n_series=len(uids),\n",
- " horizon=self.h,\n",
- " )\n",
- "\n",
" return fcsts_df\n",
"\n",
" def _reset_models(self):\n",
@@ -1042,15 +1033,6 @@
" if self.dataset.min_size < (val_size+test_size):\n",
" warnings.warn('Validation and test sets are larger than the shorter time-series.')\n",
"\n",
- " cols = []\n",
- " count_names = {'model': 0}\n",
- " for model in self.models:\n",
- " model_name = repr(model)\n",
- " count_names[model_name] = count_names.get(model_name, -1) + 1\n",
- " if count_names[model_name] > 0:\n",
- " model_name += str(count_names[model_name])\n",
- " cols += [model_name + n for n in model.loss.output_names]\n",
- "\n",
" fcsts_df = ufp.cv_times(\n",
" times=self.ds,\n",
" uids=self.uids,\n",
@@ -1064,20 +1046,20 @@
" # the cv_times is sorted by window and then id\n",
" fcsts_df = ufp.sort(fcsts_df, [id_col, 'cutoff', time_col])\n",
"\n",
- " col_idx = 0\n",
- " fcsts = np.full((self.dataset.n_groups * self.h * n_windows, len(cols)),\n",
- " np.nan, dtype=np.float32)\n",
- " \n",
+ " fcsts_list: List = []\n",
" for model in self.models:\n",
+ " if self._add_level and (model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)):\n",
+ " continue\n",
+ "\n",
" model.fit(dataset=self.dataset,\n",
" val_size=val_size, \n",
" test_size=test_size)\n",
" model_fcsts = model.predict(self.dataset, step_size=step_size, **data_kwargs)\n",
"\n",
" # Append predictions in memory placeholder\n",
- " output_length = len(model.loss.output_names)\n",
- " fcsts[:,col_idx:(col_idx + output_length)] = model_fcsts\n",
- " col_idx += output_length\n",
+ " fcsts_list.append(model_fcsts)\n",
+ "\n",
+ " fcsts = np.concatenate(fcsts_list, axis=-1)\n",
" # we may have allocated more space than needed\n",
" # each serie can produce at most (serie.size - 1) // self.h CV windows\n",
" effective_sizes = ufp.counts_by_id(fcsts_df, id_col)['counts'].to_numpy()\n",
@@ -1105,6 +1087,7 @@
" self._fitted = True\n",
"\n",
" # Add predictions to forecasts DataFrame\n",
+ " cols = self._get_model_names(add_level=self._add_level)\n",
" if isinstance(self.uids, pl_Series):\n",
" fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))\n",
" else:\n",
@@ -1117,7 +1100,7 @@
" df[[id_col, time_col, target_col]],\n",
" how='left',\n",
" on=[id_col, time_col],\n",
- " )\n",
+ " ) \n",
"\n",
" def cross_validation(\n",
" self,\n",
@@ -1135,6 +1118,7 @@
" target_col: str = 'y',\n",
" prediction_intervals: Optional[PredictionIntervals] = None,\n",
" level: Optional[List[Union[int, float]]] = None,\n",
+ " quantiles: Optional[List[float]] = None,\n",
" **data_kwargs\n",
" ) -> DataFrame:\n",
" \"\"\"Temporal Cross-Validation with core.NeuralForecast.\n",
@@ -1174,7 +1158,9 @@
" prediction_intervals : PredictionIntervals, optional (default=None)\n",
" Configuration to calibrate prediction intervals (Conformal Prediction). \n",
" level : list of ints or floats, optional (default=None)\n",
- " Confidence levels between 0 and 100. Use with prediction_intervals. \n",
+ " Confidence levels between 0 and 100.\n",
+ " quantiles : list of floats, optional (default=None)\n",
+ " Alternative to level, target quantiles to predict.\n",
" data_kwargs : kwargs\n",
" Extra arguments to be passed to the dataset within each model.\n",
"\n",
@@ -1201,15 +1187,15 @@
" self._reset_models()\n",
"\n",
" # Checks for prediction intervals\n",
- " if prediction_intervals is not None or level is not None:\n",
- " if level is None:\n",
- " warnings.warn('Level not provided, using level=[90].')\n",
- " level = [90]\n",
- " if prediction_intervals is None:\n",
- " raise Exception('You must set prediction_intervals to use level.')\n",
+ " if prediction_intervals is not None:\n",
+ " if level is None and quantiles is None:\n",
+ " raise Exception('When passing prediction_intervals you need to set the level or quantiles argument.') \n",
" if not refit:\n",
- " raise Exception('Passing prediction_intervals and/or level is only supported with refit=True.') \n",
+ " raise Exception('Passing prediction_intervals is only supported with refit=True.') \n",
"\n",
+ " if level is not None and quantiles is not None:\n",
+ " raise ValueError(\"You can't set both level and quantiles argument.\")\n",
+ " \n",
" if not refit:\n",
"\n",
" return self._no_refit_cross_validation(\n",
@@ -1267,6 +1253,7 @@
" futr_df=futr_df,\n",
" verbose=verbose,\n",
" level=level,\n",
+ " quantiles=quantiles,\n",
" **data_kwargs\n",
" )\n",
" preds = ufp.join(preds, cutoffs, on=id_col, how='left')\n",
@@ -1284,7 +1271,7 @@
" cols_order = first_out_cols + remaining_cols + [target_col]\n",
" return ufp.sort(out[cols_order], by=[id_col, 'cutoff', time_col])\n",
"\n",
- " def predict_insample(self, step_size: int = 1):\n",
+ " def predict_insample(self, step_size: int = 1, **data_kwargs):\n",
" \"\"\"Predict insample with core.NeuralForecast.\n",
"\n",
" `core.NeuralForecast`'s `predict_insample` uses stored fitted `models`\n",
@@ -1302,23 +1289,7 @@
" \"\"\"\n",
" if not self._fitted:\n",
" raise Exception('The models must be fitted first with `fit` or `cross_validation`.')\n",
- "\n",
- " for model in self.models:\n",
- " if model.SAMPLING_TYPE == 'recurrent':\n",
- " warnings.warn(f'Predict insample might not provide accurate predictions for \\\n",
- " recurrent model {repr(model)} class yet due to scaling.')\n",
- " print(f'WARNING: Predict insample might not provide accurate predictions for \\\n",
- " recurrent model {repr(model)} class yet due to scaling.')\n",
" \n",
- " cols = []\n",
- " count_names = {'model': 0}\n",
- " for model in self.models:\n",
- " model_name = repr(model)\n",
- " count_names[model_name] = count_names.get(model_name, -1) + 1\n",
- " if count_names[model_name] > 0:\n",
- " model_name += str(count_names[model_name])\n",
- " cols += [model_name + n for n in model.loss.output_names]\n",
- "\n",
" # Remove test set from dataset and last dates\n",
" test_size = self.models[0].get_test_size()\n",
"\n",
@@ -1354,9 +1325,7 @@
" time_col=self.time_col,\n",
" )\n",
"\n",
- " col_idx = 0\n",
- " fcsts = np.full((len(fcsts_df), len(cols)), np.nan, dtype=np.float32)\n",
- "\n",
+ " fcsts_list: List = []\n",
" for model in self.models:\n",
" # Test size is the number of periods to forecast (full size of trimmed dataset)\n",
" model.set_test_size(test_size=trimmed_dataset.max_size)\n",
@@ -1364,10 +1333,9 @@
" # Predict\n",
" model_fcsts = model.predict(trimmed_dataset, step_size=step_size)\n",
" # Append predictions in memory placeholder\n",
- " output_length = len(model.loss.output_names)\n",
- " fcsts[:,col_idx:(col_idx + output_length)] = model_fcsts\n",
- " col_idx += output_length \n",
+ " fcsts_list.append(model_fcsts) \n",
" model.set_test_size(test_size=test_size) # Set original test_size\n",
+ " fcsts = np.concatenate(fcsts_list, axis=-1)\n",
"\n",
" # original y\n",
" original_y = {\n",
@@ -1377,6 +1345,7 @@
" }\n",
"\n",
" # Add predictions to forecasts DataFrame\n",
+ " cols = self._get_model_names()\n",
" if isinstance(self.uids, pl_Series):\n",
" fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))\n",
" Y_df = pl_DataFrame(original_y)\n",
@@ -1637,6 +1606,7 @@
" \"Please reduce the number of windows, horizon or remove those series.\"\n",
" )\n",
" \n",
+ " self._add_level = True\n",
" cv_results = self.cross_validation(\n",
" df=df,\n",
" static_df=static_df,\n",
@@ -1645,7 +1615,8 @@
" time_col=time_col,\n",
" target_col=target_col,\n",
" )\n",
- " \n",
+ " self._add_level = False\n",
+ "\n",
" kept = [time_col, id_col, 'cutoff']\n",
" # conformity score for each model\n",
" for model in self._get_model_names(add_level=True):\n",
@@ -1655,7 +1626,102 @@
" abs_err = abs(cv_results[model] - cv_results[target_col])\n",
" cv_results = ufp.assign_columns(cv_results, model, abs_err)\n",
" dropped = list(set(cv_results.columns) - set(kept))\n",
- " return ufp.drop_columns(cv_results, dropped) "
+ " return ufp.drop_columns(cv_results, dropped) \n",
+ " \n",
+ " def _generate_forecasts(self, dataset: TimeSeriesDataset, uids: Series, quantiles_: Optional[List[float]] = None, level_: Optional[List[Union[int, float]]] = None, has_level: Optional[bool] = False, **data_kwargs) -> np.array:\n",
+ " fcsts_list: List = []\n",
+ " cols = []\n",
+ " count_names = {'model': 0}\n",
+ " for model in self.models:\n",
+ " old_test_size = model.get_test_size()\n",
+ " model.set_test_size(self.h) # To predict h steps ahead\n",
+ " \n",
+ " # Increment model name if the same model is used more than once\n",
+ " model_name = repr(model)\n",
+ " count_names[model_name] = count_names.get(model_name, -1) + 1\n",
+ " if count_names[model_name] > 0:\n",
+ " model_name += str(count_names[model_name])\n",
+ "\n",
+ " # Predict for every quantile or level if requested and the loss function supports it\n",
+ " # case 1: DistributionLoss and MixtureLosses\n",
+ " if quantiles_ is not None and not isinstance(model.loss, IQLoss) and hasattr(model.loss, 'update_quantile') and callable(model.loss.update_quantile):\n",
+ " model_fcsts = model.predict(dataset=dataset, quantiles = quantiles_, **data_kwargs)\n",
+ " fcsts_list.append(model_fcsts) \n",
+ " col_names = []\n",
+ " for i, quantile in enumerate(quantiles_):\n",
+ " col_name = self._get_column_name(model_name, quantile, has_level)\n",
+ " if i == 0:\n",
+ " col_names.extend([f\"{model_name}\", col_name])\n",
+ " else:\n",
+ " col_names.extend([col_name])\n",
+ " if hasattr(model.loss, 'return_params') and model.loss.return_params:\n",
+ " cols.extend(col_names + [model_name + param_name for param_name in model.loss.param_names])\n",
+ " else:\n",
+ " cols.extend(col_names)\n",
+ " # case 2: IQLoss\n",
+ " elif quantiles_ is not None and isinstance(model.loss, IQLoss):\n",
+ " # IQLoss does not give monotonically increasing quantiles, so we apply a hack: compute all quantiles, and take the quantile over the quantiles\n",
+ " quantiles_iqloss = np.linspace(0.01, 0.99, 20)\n",
+ " fcsts_list_iqloss = []\n",
+ " for i, quantile in enumerate(quantiles_iqloss):\n",
+ " model_fcsts = model.predict(dataset=dataset, quantiles = [quantile], **data_kwargs) \n",
+ " fcsts_list_iqloss.append(model_fcsts) \n",
+ " fcsts_iqloss = np.concatenate(fcsts_list_iqloss, axis=-1)\n",
+ "\n",
+ " # Get the actual requested quantiles\n",
+ " model_fcsts = np.quantile(fcsts_iqloss, quantiles_, axis=-1).T\n",
+ " fcsts_list.append(model_fcsts) \n",
+ "\n",
+ " # Get the right column names\n",
+ " col_names = []\n",
+ " for i, quantile in enumerate(quantiles_):\n",
+ " col_name = self._get_column_name(model_name, quantile, has_level)\n",
+ " col_names.extend([col_name]) \n",
+ " cols.extend(col_names)\n",
+ " # case 3: PointLoss via prediction intervals\n",
+ " elif quantiles_ is not None and model.loss.outputsize_multiplier == 1:\n",
+ " if self.prediction_intervals is None:\n",
+ " raise AttributeError(\n",
+ " f\"You have trained {model_name} with loss={type(model.loss).__name__}(). \\n\"\n",
+ " \" You then must set `prediction_intervals` during fit to use level or quantiles during predict.\") \n",
+ " model_fcsts = model.predict(dataset=dataset, quantiles = quantiles_, **data_kwargs)\n",
+ " prediction_interval_method = get_prediction_interval_method(self.prediction_intervals.method)\n",
+ " fcsts_with_intervals, out_cols = prediction_interval_method(\n",
+ " model_fcsts,\n",
+ " self._cs_df,\n",
+ " model=model_name,\n",
+ " level=level_ if has_level else None,\n",
+ " cs_n_windows=self.prediction_intervals.n_windows,\n",
+ " n_series=len(uids),\n",
+ " horizon=self.h,\n",
+ " quantiles=quantiles_ if not has_level else None,\n",
+ " ) \n",
+ " fcsts_list.append(fcsts_with_intervals) \n",
+ " cols.extend([model_name] + out_cols)\n",
+ " # base case: quantiles or levels are not supported or provided as arguments\n",
+ " else:\n",
+ " model_fcsts = model.predict(dataset=dataset, **data_kwargs)\n",
+ " fcsts_list.append(model_fcsts)\n",
+ " cols.extend(model_name + n for n in model.loss.output_names)\n",
+ " model.set_test_size(old_test_size) # Set back to original value\n",
+ " fcsts = np.concatenate(fcsts_list, axis=-1)\n",
+ "\n",
+ " return fcsts, cols\n",
+ " \n",
+ " @staticmethod\n",
+ " def _get_column_name(model_name, quantile, has_level) -> str:\n",
+ " if not has_level:\n",
+ " col_name = f\"{model_name}_ql{quantile}\" \n",
+ " elif quantile < 0.5:\n",
+ " level_lo = int(round(100 - 200 * quantile))\n",
+ " col_name = f\"{model_name}-lo-{level_lo}\"\n",
+ " elif quantile > 0.5:\n",
+ " level_hi = int(round(100 - 200 * (1 - quantile)))\n",
+ " col_name = f\"{model_name}-hi-{level_hi}\"\n",
+ " else:\n",
+ " col_name = f\"{model_name}-median\"\n",
+ "\n",
+ " return col_name\n"
]
},
{
@@ -1783,7 +1849,7 @@
"from neuralforecast.models.tsmixer import TSMixer\n",
"from neuralforecast.models.tsmixerx import TSMixerx\n",
"\n",
- "from neuralforecast.losses.pytorch import MQLoss, MAE, MSE\n",
+ "from neuralforecast.losses.pytorch import MQLoss, MAE, MSE, DistributionLoss, IQLoss\n",
"from neuralforecast.utils import AirPassengersDF, AirPassengersPanel, AirPassengersStatic\n",
"\n",
"from datetime import date"
@@ -3359,6 +3425,71 @@
")\n",
"assert all([col in cv2.columns for col in ['NHITS-lo-30', 'NHITS-hi-30']])"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b82e7c70",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Test quantile and level argument in predict for different models and errors\n",
+ "prediction_intervals = PredictionIntervals(method=\"conformal_error\")\n",
+ "\n",
+ "models = []\n",
+ "for nf_model in [NHITS, LSTM, TSMixer]:\n",
+ " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1, \"loss\": MAE()}\n",
+ " if nf_model.__name__ == \"TSMixer\":\n",
+ " params.update({\"n_series\": 2})\n",
+ " models.append(nf_model(**params))\n",
+ "\n",
+ " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1, \"loss\": DistributionLoss(distribution=\"Normal\")}\n",
+ " if nf_model.__name__ == \"TSMixer\":\n",
+ " params.update({\"n_series\": 2})\n",
+ " models.append(nf_model(**params))\n",
+ "\n",
+ " params = {\"h\": 12, \"input_size\": 24, \"max_steps\": 1, \"loss\": IQLoss()}\n",
+ " if nf_model.__name__ == \"TSMixer\":\n",
+ " params.update({\"n_series\": 2})\n",
+ " models.append(nf_model(**params))\n",
+ "\n",
+ "nf = NeuralForecast(models=models, freq='M')\n",
+ "nf.fit(AirPassengersPanel_train, prediction_intervals=prediction_intervals)\n",
+ "# Test default prediction\n",
+ "preds = nf.predict(futr_df=AirPassengersPanel_test)\n",
+ "assert list(preds.columns) == ['unique_id', 'ds', 'NHITS', 'NHITS1', 'NHITS1-median', 'NHITS1-lo-90',\n",
+ " 'NHITS1-lo-80', 'NHITS1-hi-80', 'NHITS1-hi-90', 'NHITS2_ql0.5', 'LSTM',\n",
+ " 'LSTM1', 'LSTM1-median', 'LSTM1-lo-90', 'LSTM1-lo-80', 'LSTM1-hi-80',\n",
+ " 'LSTM1-hi-90', 'LSTM2_ql0.5', 'TSMixer', 'TSMixer1', 'TSMixer1-median',\n",
+ " 'TSMixer1-lo-90', 'TSMixer1-lo-80', 'TSMixer1-hi-80', 'TSMixer1-hi-90',\n",
+ " 'TSMixer2_ql0.5']\n",
+ "# Test quantile prediction\n",
+ "preds = nf.predict(futr_df=AirPassengersPanel_test, quantiles=[0.2, 0.3])\n",
+ "assert list(preds.columns) == ['unique_id', 'ds', 'NHITS', 'NHITS-ql0.2', 'NHITS-ql0.3', 'NHITS1',\n",
+ " 'NHITS1_ql0.2', 'NHITS1_ql0.3', 'NHITS2_ql0.2', 'NHITS2_ql0.3', 'LSTM',\n",
+ " 'LSTM-ql0.2', 'LSTM-ql0.3', 'LSTM1', 'LSTM1_ql0.2', 'LSTM1_ql0.3',\n",
+ " 'LSTM2_ql0.2', 'LSTM2_ql0.3', 'TSMixer', 'TSMixer-ql0.2',\n",
+ " 'TSMixer-ql0.3', 'TSMixer1', 'TSMixer1_ql0.2', 'TSMixer1_ql0.3',\n",
+ " 'TSMixer2_ql0.2', 'TSMixer2_ql0.3']\n",
+ "# Test level prediction\n",
+ "preds = nf.predict(futr_df=AirPassengersPanel_test, level=[80, 90])\n",
+ "assert list(preds.columns) == ['unique_id', 'ds', 'NHITS', 'NHITS-lo-90', 'NHITS-lo-80', 'NHITS-hi-80',\n",
+ " 'NHITS-hi-90', 'NHITS1', 'NHITS1-lo-90', 'NHITS1-lo-80', 'NHITS1-hi-80',\n",
+ " 'NHITS1-hi-90', 'NHITS2-lo-90', 'NHITS2-lo-80', 'NHITS2-hi-80',\n",
+ " 'NHITS2-hi-90', 'LSTM', 'LSTM-lo-90', 'LSTM-lo-80', 'LSTM-hi-80',\n",
+ " 'LSTM-hi-90', 'LSTM1', 'LSTM1-lo-90', 'LSTM1-lo-80', 'LSTM1-hi-80',\n",
+ " 'LSTM1-hi-90', 'LSTM2-lo-90', 'LSTM2-lo-80', 'LSTM2-hi-80',\n",
+ " 'LSTM2-hi-90', 'TSMixer', 'TSMixer-lo-90', 'TSMixer-lo-80',\n",
+ " 'TSMixer-hi-80', 'TSMixer-hi-90', 'TSMixer1', 'TSMixer1-lo-90',\n",
+ " 'TSMixer1-lo-80', 'TSMixer1-hi-80', 'TSMixer1-hi-90', 'TSMixer2-lo-90',\n",
+ " 'TSMixer2-lo-80', 'TSMixer2-hi-80', 'TSMixer2-hi-90']\n",
+ "# Re-Test default prediction - note that they are different from the first test (this is expected)\n",
+ "preds = nf.predict(futr_df=AirPassengersPanel_test)\n",
+ "assert list(preds.columns) == ['unique_id', 'ds', 'NHITS', 'NHITS1', 'NHITS1-median', 'NHITS2_ql0.5',\n",
+ " 'LSTM', 'LSTM1', 'LSTM1-median', 'LSTM2_ql0.5', 'TSMixer', 'TSMixer1',\n",
+ " 'TSMixer1-median', 'TSMixer2_ql0.5']"
+ ]
}
],
"metadata": {
diff --git a/nbs/docs/capabilities/01_overview.ipynb b/nbs/docs/capabilities/01_overview.ipynb
index 11b964a7f..de1f3e374 100644
--- a/nbs/docs/capabilities/01_overview.ipynb
+++ b/nbs/docs/capabilities/01_overview.ipynb
@@ -19,11 +19,11 @@
"|`BiTCN` | `AutoBiTCN` | CNN | Univariate | Direct | F/H/S | \n",
"|`DeepAR` | `AutoDeepAR` | RNN | Univariate | Recursive | F/S | \n",
"|`DeepNPTS` | `AutoDeepNPTS` | MLP | Univariate | Direct | F/H/S | \n",
- "|`DilatedRNN` | `AutoDilatedRNN` | RNN | Univariate | Recursive | F/H/S | \n",
+ "|`DilatedRNN` | `AutoDilatedRNN` | RNN | Univariate | Direct | F/H/S | \n",
"|`FEDformer` | `AutoFEDformer` | Transformer | Univariate | Direct | F | \n",
"|`GRU` | `AutoGRU` | RNN | Univariate | Recursive | F/H/S | \n",
"|`HINT` | `AutoHINT` | Any7 | Both7 | Both7 | F/H/S | \n",
- "|`Informer` | `AutoInformer` | Transformer | Multivariate | Direct | F | \n",
+ "|`Informer` | `AutoInformer` | Transformer | Univariate | Direct | F | \n",
"|`iTransformer` | `AutoiTransformer` | Transformer | Multivariate | Direct | - | \n",
"|`KAN` | `AutoKAN` | KAN | Univariate | Direct | F/H/S | \n",
"|`LSTM` | `AutoLSTM` | RNN | Univariate | Recursive | F/H/S | \n",
@@ -38,7 +38,7 @@
"|`RNN` | `AutoRNN` | RNN | Univariate | Recursive | F/H/S | \n",
"|`SOFTS` | `AutoSOFTS` | MLP | Multivariate | Direct | - | \n",
"|`StemGNN` | `AutoStemGNN` | GNN | Multivariate | Direct | - | \n",
- "|`TCN` | `AutoTCN` | CNN | Univariate | Recursive | F/H/S | \n",
+ "|`TCN` | `AutoTCN` | CNN | Univariate | Direct | F/H/S | \n",
"|`TFT` | `AutoTFT` | Transformer | Univariate | Direct | F/H/S | \n",
"|`TiDE` | `AutoTiDE` | MLP | Univariate | Direct | F/H/S | \n",
"|`TimeMixer` | `AutoTimeMixer` | MLP | Multivariate | Direct | - | \n",
diff --git a/nbs/losses.pytorch.ipynb b/nbs/losses.pytorch.ipynb
index 8840aaef5..e1fe2de67 100644
--- a/nbs/losses.pytorch.ipynb
+++ b/nbs/losses.pytorch.ipynb
@@ -54,9 +54,8 @@
"outputs": [],
"source": [
"#| export\n",
- "from typing import Optional, Union, Tuple\n",
+ "from typing import Optional, Union, Tuple, List\n",
"\n",
- "import math\n",
"import numpy as np\n",
"import torch\n",
"\n",
@@ -70,6 +69,9 @@
" Poisson,\n",
" NegativeBinomial,\n",
" Beta,\n",
+ " Gamma,\n",
+ " MixtureSameFamily,\n",
+ " Categorical,\n",
" AffineTransform, \n",
" TransformedDistribution,\n",
")\n",
@@ -140,7 +142,7 @@
" `outputsize_multiplier`: Multiplier for the output size. \n",
" `output_names`: Names of the outputs. \n",
" \"\"\"\n",
- " def __init__(self, horizon_weight, outputsize_multiplier, output_names):\n",
+ " def __init__(self, horizon_weight=None, outputsize_multiplier=None, output_names=None):\n",
" super(BasePointLoss, self).__init__()\n",
" if horizon_weight is not None:\n",
" horizon_weight = torch.Tensor(horizon_weight.flatten())\n",
@@ -151,10 +153,13 @@
"\n",
" def domain_map(self, y_hat: torch.Tensor):\n",
" \"\"\"\n",
- " Univariate loss operates in dimension [B,T,H]/[B,H]\n",
- " This changes the network's output from [B,H,1]->[B,H]\n",
+ " Input:\n",
+ " Univariate: [B, H, 1]\n",
+ " Multivariate: [B, H, N]\n",
+ "\n",
+ " Output: [B, H, N]\n",
" \"\"\"\n",
- " return y_hat.squeeze(-1)\n",
+ " return y_hat\n",
"\n",
" def _compute_weights(self, y, mask):\n",
" \"\"\"\n",
@@ -163,16 +168,17 @@
" If set, check that it has the same length as the horizon in x.\n",
" \"\"\"\n",
" if mask is None:\n",
- " mask = torch.ones_like(y, device=y.device)\n",
+ " mask = torch.ones_like(y)\n",
"\n",
" if self.horizon_weight is None:\n",
- " self.horizon_weight = torch.ones(mask.shape[-1])\n",
+ " weights = torch.ones_like(mask)\n",
" else:\n",
- " assert mask.shape[-1] == len(self.horizon_weight), \\\n",
+ " assert mask.shape[1] == len(self.horizon_weight), \\\n",
" 'horizon_weight must have same length as Y'\n",
- "\n",
- " weights = self.horizon_weight.clone()\n",
- " weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)\n",
+ " weights = self.horizon_weight.clone()\n",
+ " weights = weights[None, :, None].to(mask.device)\n",
+ " weights = torch.ones_like(mask, device=mask.device) * weights\n",
+ " \n",
" return weights * mask"
]
},
@@ -227,7 +233,8 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " y_insample: Union[torch.Tensor, None] = None) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor, Actual values. \n",
@@ -311,7 +318,9 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor, Actual values. \n",
@@ -398,7 +407,8 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " y_insample: Union[torch.Tensor, None] = None) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor, Actual values. \n",
@@ -498,7 +508,9 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor, Actual values. \n",
@@ -590,7 +602,8 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " y_insample: Union[torch.Tensor, None] = None) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor, Actual values. \n",
@@ -685,12 +698,13 @@
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
" y_insample: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor (batch_size, output_size), Actual values. \n",
" `y_hat`: tensor (batch_size, output_size)), Predicted values. \n",
- " `y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions. \n",
+ " `y_insample`: tensor (batch_size, input_size), Actual insample values. \n",
" `mask`: tensor, Specifies date stamps per serie to consider in loss. \n",
"\n",
" **Returns:** \n",
@@ -699,7 +713,7 @@
" delta_y = torch.abs(y - y_hat)\n",
" scale = torch.mean(torch.abs(y_insample[:, self.seasonality:] - \\\n",
" y_insample[:, :-self.seasonality]), axis=1)\n",
- " losses = _divide_no_nan(delta_y, scale[:, None])\n",
+ " losses = _divide_no_nan(delta_y, scale[:, None, None])\n",
" weights = self._compute_weights(y=y, mask=mask)\n",
" return _weighted_mean(losses=losses, weights=weights)"
]
@@ -754,11 +768,11 @@
" \"\"\"Relative Mean Squared Error\n",
" Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)\n",
" as an alternative to percentage errors, to avoid measure unstability.\n",
- " $$ \\mathrm{relMSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}, \\\\mathbf{\\hat{y}}^{naive1}) =\n",
- " \\\\frac{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}^{naive1})} $$\n",
+ " $$ \\mathrm{relMSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}, \\\\mathbf{\\hat{y}}^{benchmark}) =\n",
+ " \\\\frac{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}})}{\\mathrm{MSE}(\\\\mathbf{y}, \\\\mathbf{\\hat{y}}^{benchmark})} $$\n",
"\n",
" **Parameters:** \n",
- " `y_train`: numpy array, Training values. \n",
+ " `y_train`: numpy array, deprecated. \n",
" `horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window. \n",
"\n",
" **References:** \n",
@@ -769,32 +783,31 @@
" \"Probabilistic Hierarchical Forecasting with Deep Poisson Mixtures. \n",
" Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)\n",
" \"\"\"\n",
- " def __init__(self, y_train, horizon_weight=None):\n",
+ " def __init__(self, y_train=None, horizon_weight=None):\n",
" super(relMSE, self).__init__(horizon_weight=horizon_weight,\n",
" outputsize_multiplier=1,\n",
" output_names=[''])\n",
- " self.y_train = y_train\n",
+ " if y_train is not None:\n",
+ " raise DeprecationWarning(\"y_train will be deprecated in a future release.\")\n",
" self.mse = MSE(horizon_weight=horizon_weight)\n",
"\n",
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_benchmark: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor (batch_size, output_size), Actual values. \n",
" `y_hat`: tensor (batch_size, output_size)), Predicted values. \n",
- " `y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions. \n",
+ " `y_benchmark`: tensor (batch_size, output_size), Benchmark predicted values. \n",
" `mask`: tensor, Specifies date stamps per serie to consider in loss. \n",
"\n",
" **Returns:** \n",
" `relMSE`: tensor (single value).\n",
" \"\"\"\n",
- " horizon = y.shape[-1]\n",
- " last_col = self.y_train[:, -1].unsqueeze(1)\n",
- " y_naive = last_col.repeat(1, horizon)\n",
- "\n",
- " norm = self.mse(y=y, y_hat=y_naive, mask=mask) # Already weighted\n",
+ " norm = self.mse(y=y, y_hat=y_benchmark, mask=mask) # Already weighted\n",
" norm = norm + 1e-5 # Numerical stability\n",
" loss = self.mse(y=y, y_hat=y_hat, mask=mask) # Already weighted\n",
" loss = _divide_no_nan(loss, norm)\n",
@@ -880,7 +893,9 @@
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor, Actual values. \n",
@@ -1022,35 +1037,47 @@
"\n",
" def domain_map(self, y_hat: torch.Tensor):\n",
" \"\"\"\n",
- " Identity domain map [B,T,H,Q]/[B,H,Q]\n",
+ " Input:\n",
+ " Univariate: [B, H, 1 * Q]\n",
+ " Multivariate: [B, H, N * Q]\n",
+ "\n",
+ " Output: [B, H, N, Q]\n",
" \"\"\"\n",
- " return y_hat\n",
- " \n",
+ " output = y_hat.reshape(y_hat.shape[0],\n",
+ " y_hat.shape[1],\n",
+ " -1,\n",
+ " self.outputsize_multiplier)\n",
+ "\n",
+ " return output\n",
+ "\n",
" def _compute_weights(self, y, mask):\n",
" \"\"\"\n",
" Compute final weights for each datapoint (based on all weights and all masks)\n",
" Set horizon_weight to a ones[H] tensor if not set.\n",
" If set, check that it has the same length as the horizon in x.\n",
+ "\n",
+ " y: [B, h, N, 1]\n",
+ " mask: [B, h, N, 1]\n",
" \"\"\"\n",
- " if mask is None:\n",
- " mask = torch.ones_like(y, device=y.device)\n",
- " else:\n",
- " mask = mask.unsqueeze(1) # Add Q dimension.\n",
"\n",
" if self.horizon_weight is None:\n",
- " self.horizon_weight = torch.ones(mask.shape[-1])\n",
+ " weights = torch.ones_like(mask)\n",
" else:\n",
- " assert mask.shape[-1] == len(self.horizon_weight), \\\n",
- " 'horizon_weight must have same length as Y'\n",
- " \n",
- " weights = self.horizon_weight.clone()\n",
- " weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)\n",
+ " assert mask.shape[1] == len(self.horizon_weight), \\\n",
+ " 'horizon_weight must have same length as Y' \n",
+ " weights = self.horizon_weight.clone()\n",
+ " weights = weights[None, :, None, None]\n",
+ " weights = weights.to(mask.device)\n",
+ " weights = torch.ones_like(mask, device=mask.device) * weights\n",
+ " \n",
" return weights * mask\n",
"\n",
" def __call__(self,\n",
" y: torch.Tensor,\n",
" y_hat: torch.Tensor,\n",
- " mask: Union[torch.Tensor, None] = None):\n",
+ " y_insample: torch.Tensor,\n",
+ " mask: Union[torch.Tensor, None] = None,\n",
+ " ) -> torch.Tensor:\n",
" \"\"\"\n",
" **Parameters:** \n",
" `y`: tensor, Actual values. \n",
@@ -1060,20 +1087,24 @@
" **Returns:** \n",
" `mqloss`: tensor (single value).\n",
" \"\"\"\n",
- " \n",
- " error = y_hat - y.unsqueeze(-1)\n",
- " sq = torch.maximum(-error, torch.zeros_like(error))\n",
- " s1_q = torch.maximum(error, torch.zeros_like(error))\n",
- " losses = (1/len(self.quantiles))*(self.quantiles * sq + (1 - self.quantiles) * s1_q)\n",
+ " # [B, h, N] -> [B, h, N, 1]\n",
+ " if y_hat.ndim == 3:\n",
+ " y_hat = y_hat.unsqueeze(-1)\n",
+ "\n",
+ " y = y.unsqueeze(-1)\n",
+ " if mask is not None:\n",
+ " mask = mask.unsqueeze(-1)\n",
+ " else:\n",
+ " mask = torch.ones_like(y, device=y.device)\n",
"\n",
- " if y_hat.ndim == 3: # BaseWindows\n",
- " losses = losses.swapaxes(-2,-1) # [B,H,Q] -> [B,Q,H] (needed for horizon weighting, H at the end)\n",
- " elif y_hat.ndim == 4: # BaseRecurrent\n",
- " losses = losses.swapaxes(-2,-1)\n",
- " losses = losses.swapaxes(-2,-3) # [B,seq_len,H,Q] -> [B,Q,seq_len,H] (needed for horizon weighting, H at the end)\n",
+ " error = y_hat - y\n",
"\n",
+ " sq = torch.maximum(-error, torch.zeros_like(error))\n",
+ " s1_q = torch.maximum(error, torch.zeros_like(error))\n",
+ " \n",
+ " quantiles = self.quantiles[None, None, None, :]\n",
+ " losses = (1 / len(quantiles)) * (quantiles * sq + (1 - quantiles) * s1_q)\n",
" weights = self._compute_weights(y=losses, mask=mask) # Use losses for extra dim\n",
- " # NOTE: Weights do not have Q dimension.\n",
"\n",
" return _weighted_mean(losses=losses, weights=weights)"
]
@@ -1228,9 +1259,9 @@
" self.sampling_distr = Beta(concentration0 = concentration0,\n",
" concentration1 = concentration1)\n",
"\n",
- " def update_quantile(self, q: float = 0.5):\n",
- " self.q = q\n",
- " self.output_names = [f\"_ql{q}\"]\n",
+ " def update_quantile(self, q: List[float] = [0.5]):\n",
+ " self.q = q[0]\n",
+ " self.output_names = [f\"_ql{q[0]}\"]\n",
" self.has_predicted = True\n",
"\n",
" def domain_map(self, y_hat):\n",
@@ -1239,9 +1270,8 @@
"\n",
" Input shapes to this function:\n",
" \n",
- " base_windows: y_hat = [B, h, 1] \n",
- " base_multivariate: y_hat = [B, h, n_series]\n",
- " base_recurrent: y_hat = [B, seq_len, h, n_series]\n",
+ " Univariate: y_hat = [B, h, 1] \n",
+ " Multivariate: y_hat = [B, h, N]\n",
" \"\"\"\n",
" if self.eval() and self.has_predicted:\n",
" quantiles = torch.full(size=y_hat.shape, \n",
@@ -1259,7 +1289,7 @@
" emb_outputs = self.output_layer(emb_inputs)\n",
" \n",
" # Domain map\n",
- " y_hat = emb_outputs.squeeze(-1).squeeze(-1)\n",
+ " y_hat = emb_outputs.squeeze(-1)\n",
"\n",
" return y_hat\n"
]
@@ -1299,7 +1329,7 @@
"\n",
"# Check that quantiles are correctly updated - prediction\n",
"check = IQLoss()\n",
- "check.update_quantile(0.7)\n",
+ "check.update_quantile([0.7])\n",
"test_eq(check.q, 0.7)"
]
},
@@ -1357,19 +1387,6 @@
"outputs": [],
"source": [
"#| exporti\n",
- "def bernoulli_domain_map(input: torch.Tensor):\n",
- " \"\"\" Bernoulli Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:** \n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta]. \n",
- "\n",
- " **Returns:** \n",
- " `(probs,)`: tuple with tensors of Poisson distribution arguments. \n",
- " \"\"\"\n",
- " return (input.squeeze(-1),)\n",
- "\n",
"def bernoulli_scale_decouple(output, loc=None, scale=None):\n",
" \"\"\" Bernoulli Scale Decouple\n",
"\n",
@@ -1383,21 +1400,6 @@
" probs = F.sigmoid(probs)#.clone()\n",
" return (probs,)\n",
"\n",
- "def student_domain_map(input: torch.Tensor):\n",
- " \"\"\" Student T Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:** \n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta]. \n",
- " `eps`: float, helps the initialization of scale for easier optimization. \n",
- "\n",
- " **Returns:** \n",
- " `(df, loc, scale)`: tuple with tensors of StudentT distribution arguments. \n",
- " \"\"\"\n",
- " df, loc, scale = torch.tensor_split(input, 3, dim=-1)\n",
- " return df.squeeze(-1), loc.squeeze(-1), scale.squeeze(-1)\n",
- "\n",
"def student_scale_decouple(output, loc=None, scale=None, eps: float=0.1):\n",
" \"\"\" Normal Scale Decouple\n",
"\n",
@@ -1413,21 +1415,6 @@
" df = 3.0 + F.softplus(df)\n",
" return (df, mean, tscale)\n",
"\n",
- "def normal_domain_map(input: torch.Tensor):\n",
- " \"\"\" Normal Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:** \n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta]. \n",
- " `eps`: float, helps the initialization of scale for easier optimization. \n",
- "\n",
- " **Returns:** \n",
- " `(mean, std)`: tuple with tensors of Normal distribution arguments. \n",
- " \"\"\"\n",
- " mean, std = torch.tensor_split(input, 2, dim=-1)\n",
- " return mean.squeeze(-1), std.squeeze(-1)\n",
- "\n",
"def normal_scale_decouple(output, loc=None, scale=None, eps: float=0.2):\n",
" \"\"\" Normal Scale Decouple\n",
"\n",
@@ -1442,19 +1429,6 @@
" std = (std + eps) * scale\n",
" return (mean, std)\n",
"\n",
- "def poisson_domain_map(input: torch.Tensor):\n",
- " \"\"\" Poisson Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:** \n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta]. \n",
- "\n",
- " **Returns:** \n",
- " `(rate,)`: tuple with tensors of Poisson distribution arguments. \n",
- " \"\"\"\n",
- " return (input.squeeze(-1),)\n",
- "\n",
"def poisson_scale_decouple(output, loc=None, scale=None):\n",
" \"\"\" Poisson Scale Decouple\n",
"\n",
@@ -1467,21 +1441,7 @@
" if (loc is not None) and (scale is not None):\n",
" rate = (rate * scale) + loc\n",
" rate = F.softplus(rate) + eps\n",
- " return (rate,)\n",
- "\n",
- "def nbinomial_domain_map(input: torch.Tensor):\n",
- " \"\"\" Negative Binomial Domain Map\n",
- " Maps input into distribution constraints, by construction input's \n",
- " last dimension is of matching `distr_args` length.\n",
- "\n",
- " **Parameters:** \n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta]. \n",
- "\n",
- " **Returns:** \n",
- " `(total_count, alpha)`: tuple with tensors of N.Binomial distribution arguments. \n",
- " \"\"\"\n",
- " mu, alpha = torch.tensor_split(input, 2, dim=-1)\n",
- " return mu.squeeze(-1), alpha.squeeze(-1)\n",
+ " return (rate, )\n",
"\n",
"def nbinomial_scale_decouple(output, loc=None, scale=None):\n",
" \"\"\" Negative Binomial Scale Decouple\n",
@@ -1550,10 +1510,12 @@
" - [Jorgensen, B. (1987). Exponential Dispersion Models. Journal of the Royal Statistical Society. \n",
" Series B (Methodological), 49(2), 127–162. http://www.jstor.org/stable/2345415](http://www.jstor.org/stable/2345415) \n",
" \"\"\"\n",
+ " arg_constraints = {'log_mu': constraints.real}\n",
+ " support = constraints.nonnegative\n",
+ "\n",
" def __init__(self, log_mu, rho, validate_args=None):\n",
" # TODO: add sigma2 dispersion\n",
" # TODO add constraints\n",
- " # arg_constraints = {'log_mu': constraints.real, 'rho': constraints.positive}\n",
" # support = constraints.real\n",
" self.log_mu = log_mu\n",
" self.rho = rho\n",
@@ -1587,7 +1549,7 @@
" beta = beta.expand(shape)\n",
"\n",
" N = torch.poisson(rate) + 1e-5\n",
- " gamma = torch.distributions.gamma.Gamma(N * alpha, beta)\n",
+ " gamma = Gamma(N*alpha, beta)\n",
" samples = gamma.sample()\n",
" samples[N==0] = 0\n",
"\n",
@@ -1602,12 +1564,12 @@
"\n",
" return a - b\n",
"\n",
- "def tweedie_domain_map(input: torch.Tensor):\n",
+ "def tweedie_domain_map(input: torch.Tensor, rho: float = 1.5):\n",
" \"\"\"\n",
" Maps output of neural network to domain of distribution loss\n",
"\n",
" \"\"\"\n",
- " return (input.squeeze(-1),)\n",
+ " return (input, rho)\n",
"\n",
"def tweedie_scale_decouple(output, loc=None, scale=None):\n",
" \"\"\"Tweedie Scale Decouple\n",
@@ -1616,14 +1578,14 @@
" count and logits based on anchoring `loc`, `scale`.\n",
" Also adds Tweedie domain protection to the distribution parameters.\n",
" \"\"\"\n",
- " log_mu = output[0]\n",
+ " log_mu, rho = output\n",
" log_mu = F.softplus(log_mu)\n",
" log_mu = torch.clamp(log_mu, 1e-9, 37)\n",
" if (loc is not None) and (scale is not None):\n",
" log_mu += torch.log(loc)\n",
"\n",
" log_mu = torch.clamp(log_mu, 1e-9, 37)\n",
- " return (log_mu,)"
+ " return (log_mu, rho)"
]
},
{
@@ -1687,6 +1649,15 @@
" scale *= t.scale\n",
" p = self.base_dist.crps(z)\n",
" return p * scale\n",
+ " \n",
+ " @property\n",
+ " def mean(self):\n",
+ " \"\"\"\n",
+ " Function used to compute the empirical mean\n",
+ " \"\"\"\n",
+ " samples = self.sample([1000])\n",
+ " return samples.mean(dim=0)\n",
+ " \n",
"\n",
"class BaseISQF(Distribution):\n",
" \"\"\"\n",
@@ -2357,7 +2328,7 @@
" last dimension is of matching `distr_args` length.\n",
"\n",
" **Parameters:** \n",
- " `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta]. \n",
+ " `input`: tensor, of dimensions [B, H, N * n_outputs]. \n",
" `tol`: float, tolerance. \n",
" `quantiles`: tensor, quantiles used for ISQF (i.e. x-positions for the knots). \n",
" `num_pieces`: int, num_pieces used for each quantile spline. \n",
@@ -2371,7 +2342,14 @@
" #\n",
" # Because in this case the spline knots could be squeezed together\n",
" # and cause overflow in spline CRPS computation\n",
- " num_qk = len(quantiles) \n",
+ " num_qk = len(quantiles)\n",
+ " n_outputs = 2 * (num_qk - 1) * num_pieces + 2 + num_qk\n",
+ " \n",
+ " # Reshape: [B, h, N * n_outputs] -> [B, h, N, n_outputs]\n",
+ " input = input.reshape(input.shape[0],\n",
+ " input.shape[1],\n",
+ " -1,\n",
+ " n_outputs)\n",
" start_index = 0\n",
" spline_knots = input[..., start_index: start_index + (num_qk - 1) * num_pieces]\n",
" start_index += (num_qk - 1) * num_pieces\n",
@@ -2381,27 +2359,19 @@
" start_index += 1\n",
" beta_r = input[..., start_index: start_index + 1]\n",
" start_index += 1\n",
- " quantile_knots = input[..., start_index: start_index + num_qk]\n",
- "\n",
- " qk_y = torch.cat(\n",
- " [\n",
- " quantile_knots[..., 0:1],\n",
- " torch.abs(quantile_knots[..., 1:]) + tol,\n",
- " ],\n",
- " dim=-1,\n",
- " )\n",
- " qk_y = torch.cumsum(qk_y, dim=-1)\n",
+ " quantile_knots = F.softplus(input[..., start_index: start_index + num_qk]) + tol\n",
+ "\n",
+ " qk_y = torch.cumsum(quantile_knots, dim=-1)\n",
"\n",
" # Prevent overflow when we compute 1/beta\n",
- " beta_l = torch.abs(beta_l.squeeze(-1)) + tol\n",
- " beta_r = torch.abs(beta_r.squeeze(-1)) + tol\n",
+ " beta_l = F.softplus(beta_l.squeeze(-1)) + tol\n",
+ " beta_r = F.softplus(beta_r.squeeze(-1)) + tol\n",
"\n",
" # Reshape spline arguments\n",
" batch_shape = spline_knots.shape[:-1]\n",
"\n",
" # repeat qk_x from (num_qk,) to (*batch_shape, num_qk)\n",
- " qk_x_repeat = torch.sort(quantiles)\\\n",
- " .values\\\n",
+ " qk_x_repeat = quantiles\\\n",
" .repeat(*batch_shape, 1)\\\n",
" .to(input.device)\n",
"\n",
@@ -2465,7 +2435,8 @@
" `level`: float list [0,100], confidence levels for prediction intervals. \n",
" `quantiles`: float list [0,1], alternative to level list, target quantiles. \n",
" `num_samples`: int=500, number of samples for the empirical quantiles. \n",
- " `return_params`: bool=False, wether or not return the Distribution parameters.
\n",
+ " `return_params`: bool=False, wether or not return the Distribution parameters. \n",
+ " `horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
\n",
" \n",
@@ -2467,17 +2236,17 @@
],
"text/plain": [
" trend y_[lag12] month observed_target \\\n",
- "trend 1.00 -0.45 -0.29 -0.41 \n",
- "y_[lag12] -0.45 1.00 -0.56 -0.18 \n",
- "month -0.29 -0.56 1.00 0.18 \n",
- "observed_target -0.41 -0.18 0.18 1.00 \n",
- "Correlation with Mean Attention -0.43 0.68 -0.38 0.07 \n",
+ "trend 1.00 -0.64 0.31 -0.52 \n",
+ "y_[lag12] -0.64 1.00 -0.80 -0.03 \n",
+ "month 0.31 -0.80 1.00 0.01 \n",
+ "observed_target -0.52 -0.03 0.01 1.00 \n",
+ "Correlation with Mean Attention 0.55 -0.12 -0.04 -0.79 \n",
"\n",
" Correlation with Mean Attention \n",
- "trend -0.43 \n",
- "y_[lag12] 0.68 \n",
- "month -0.38 \n",
- "observed_target 0.07 \n",
+ "trend 0.55 \n",
+ "y_[lag12] -0.12 \n",
+ "month -0.04 \n",
+ "observed_target -0.79 \n",
"Correlation with Mean Attention 1.00 "
]
},
diff --git a/nbs/models.tide.ipynb b/nbs/models.tide.ipynb
index 3b096a26f..834181e69 100644
--- a/nbs/models.tide.ipynb
+++ b/nbs/models.tide.ipynb
@@ -44,8 +44,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -62,7 +65,7 @@
"import torch.nn.functional as F\n",
"\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_windows import BaseWindows"
+ "from neuralforecast.common._base_model import BaseModel"
]
},
{
@@ -134,7 +137,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TiDE(BaseWindows):\n",
+ "class TiDE(BaseModel):\n",
" \"\"\" TiDE\n",
"\n",
" Time-series Dense Encoder (`TiDE`) is a MLP-based univariate time-series forecasting model. `TiDE` uses Multi-layer Perceptrons (MLPs) in an encoder-decoder model for long-term time-series forecasting.\n",
@@ -178,10 +181,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
- " EXOGENOUS_STAT = True \n",
+ " EXOGENOUS_STAT = True \n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -303,7 +307,7 @@
"\n",
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
- " x = windows_batch['insample_y'].unsqueeze(-1) # [B, L, 1]\n",
+ " x = windows_batch['insample_y'] # [B, L, 1]\n",
" hist_exog = windows_batch['hist_exog'] # [B, L, X]\n",
" futr_exog = windows_batch['futr_exog'] # [B, L + h, F]\n",
" stat_exog = windows_batch['stat_exog'] # [B, S]\n",
@@ -347,8 +351,7 @@
" # Temporal decoder\n",
" x = self.temporal_decoder(x) # [B, h, temporal_width + decoder_output_dim] -> [B, h, n_outputs]\n",
"\n",
- " # Map to output domain\n",
- " forecast = self.loss.domain_map(x + x_skip)\n",
+ " forecast = x + x_skip\n",
" \n",
" return forecast\n"
]
@@ -380,6 +383,21 @@
"show_doc(TiDE.predict, name='TiDE.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TiDE, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -399,7 +417,7 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TiDE\n",
- "from neuralforecast.losses.pytorch import GMM, DistributionLoss\n",
+ "from neuralforecast.losses.pytorch import GMM\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds 1:\n",
+ " raise Exception('TimeLLM only supports point loss functions (MAE, MSE, etc) as loss function.') \n",
+ " \n",
+ " if valid_loss is not None and not isinstance(valid_loss, losses.BasePointLoss):\n",
+ " raise Exception('TimeLLM only supports point loss functions (MAE, MSE, etc) as valid loss function.') \n",
+ "\n",
+ "\n",
" # Architecture\n",
" self.patch_len = patch_len\n",
" self.stride = stride\n",
@@ -520,13 +530,10 @@
" return lags\n",
" \n",
" def forward(self, windows_batch):\n",
- " insample_y = windows_batch['insample_y']\n",
- "\n",
- " x = insample_y.unsqueeze(-1)\n",
+ " x = windows_batch['insample_y']\n",
"\n",
" y_pred = self.forecast(x)\n",
" y_pred = y_pred[:, -self.h:, :]\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
" \n",
" return y_pred\n"
]
@@ -572,11 +579,12 @@
"outputs": [],
"source": [
"#| eval: false\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TimeLLM\n",
- "from neuralforecast.utils import AirPassengersPanel, augment_calendar_df\n",
- "\n",
- "AirPassengersPanel, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
+ "from neuralforecast.utils import AirPassengersPanel\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
diff --git a/nbs/models.timemixer.ipynb b/nbs/models.timemixer.ipynb
index 1f405c26a..743a1df40 100644
--- a/nbs/models.timemixer.ipynb
+++ b/nbs/models.timemixer.ipynb
@@ -17,8 +17,8 @@
"\n",
"Seasonal and trend components exhibit significantly different characteristics in time series, and different scales of the time series reflect different properties, with seasonal characteristics being more pronounced at a fine-grained micro scale and trend characteristics being more pronounced at a coarse macro scale, it is therefore necessary to decouple seasonal and trend components at different scales. As such, TimeMixer is an MLP-based architecture with Past-Decomposable-Mixing (PDM) and Future-Multipredictor-Mixing (FMM) blocks to take full advantage of disentangled multiscale series in both past extraction and future prediction phases.\n",
"\n",
- "**Reference**\n",
- "- [Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2)"
+ "**References** \n",
+ "[Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2) "
]
},
{
@@ -41,10 +41,10 @@
"import torch\n",
"import torch.nn as nn\n",
"\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"from neuralforecast.common._modules import PositionalEmbedding, TokenEmbedding, TemporalEmbedding, SeriesDecomp, RevIN\n",
- "\n",
- "from neuralforecast.losses.pytorch import MAE"
+ "from neuralforecast.losses.pytorch import MAE\n",
+ "from typing import Optional"
]
},
{
@@ -54,8 +54,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -324,7 +327,7 @@
"source": [
"#| export\n",
"\n",
- "class TimeMixer(BaseMultivariate):\n",
+ "class TimeMixer(BaseModel):\n",
" \"\"\" TimeMixer\n",
" **Parameters** \n",
" `h`: int, Forecast horizon. \n",
@@ -354,6 +357,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping. \n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check. \n",
" `batch_size`: int=32, number of different series in each batch. \n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size. \n",
+ " `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all. \n",
+ " `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all. \n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size. \n",
" `step_size`: int=1, step size between each window of temporal data. \n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html). \n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators. \n",
@@ -367,14 +374,15 @@
" `**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer). \n",
"\n",
" **References** \n",
- " [Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2)\n",
+ " [Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou.\"TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting\"](https://openreview.net/pdf?id=7oLshfEIC2) \n",
" \"\"\"\n",
"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -404,6 +412,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 256,\n",
+ " inference_windows_batch_size = 256,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -429,6 +441,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -519,6 +535,9 @@
" for i in range(self.down_sampling_layers + 1)\n",
" ]\n",
" )\n",
+ " \n",
+ " if self.loss.outputsize_multiplier > 1:\n",
+ " self.distr_output = nn.Linear(self.n_series, self.n_series * self.loss.outputsize_multiplier)\n",
"\n",
" def out_projection(self, dec_out, i, out_res):\n",
" dec_out = self.projection_layer(dec_out)\n",
@@ -675,13 +694,10 @@
"\n",
" y_pred = self.forecast(insample_y, x_mark_enc, x_mark_dec)\n",
" y_pred = y_pred[:, -self.h:, :]\n",
- " y_pred = self.loss.domain_map(y_pred)\n",
+ " if self.loss.outputsize_multiplier > 1:\n",
+ " y_pred = self.distr_output(y_pred)\n",
"\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " if y_pred.ndim == 2:\n",
- " return y_pred.unsqueeze(-1)\n",
- " else:\n",
- " return y_pred"
+ " return y_pred\n"
]
},
{
@@ -711,6 +727,21 @@
"show_doc(TimeMixer.predict, name='TimeMixer.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TimeMixer, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
diff --git a/nbs/models.timesnet.ipynb b/nbs/models.timesnet.ipynb
index bc85e7126..8d88db07b 100644
--- a/nbs/models.timesnet.ipynb
+++ b/nbs/models.timesnet.ipynb
@@ -54,7 +54,7 @@
"import torch.fft\n",
"\n",
"from neuralforecast.common._modules import DataEmbedding\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -66,8 +66,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -200,7 +203,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TimesNet(BaseWindows):\n",
+ "class TimesNet(BaseModel):\n",
" \"\"\" TimesNet\n",
"\n",
" The TimesNet univariate model tackles the challenge of modeling multiple intraperiod and interperiod temporal variations.\n",
@@ -277,10 +280,11 @@
" Haixu Wu and Tengge Hu and Yong Liu and Hang Zhou and Jianmin Wang and Mingsheng Long. TimesNet: Temporal 2D-Variation Modeling for General Time Series Analysis. https://openreview.net/pdf?id=ju_Uqw384Oq\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False \n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -373,13 +377,9 @@
"\n",
" # Parse windows_batch\n",
" insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
" futr_exog = windows_batch['futr_exog']\n",
"\n",
" # Parse inputs\n",
- " insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]\n",
" if self.futr_exog_size > 0:\n",
" x_mark_enc = futr_exog[:,:self.input_size,:]\n",
" else:\n",
@@ -394,7 +394,7 @@
" # porject back\n",
" dec_out = self.projection(enc_out)\n",
"\n",
- " forecast = self.loss.domain_map(dec_out[:, -self.h:])\n",
+ " forecast = dec_out[:, -self.h:]\n",
" return forecast"
]
},
@@ -425,6 +425,21 @@
"show_doc(TimesNet.predict, name='TimesNet.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TimesNet, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -444,9 +459,7 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.losses.pytorch import DistributionLoss\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, augment_calendar_df\n",
- "\n",
- "AirPassengersPanel, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
+ "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
@@ -456,10 +469,9 @@
" hidden_size = 16,\n",
" conv_hidden_size = 32,\n",
" loss=DistributionLoss(distribution='Normal', level=[80, 90]),\n",
- " futr_exog_list=calendar_cols,\n",
" scaler_type='standard',\n",
" learning_rate=1e-3,\n",
- " max_steps=5,\n",
+ " max_steps=100,\n",
" val_check_steps=50,\n",
" early_stop_patience_steps=2)\n",
"\n",
diff --git a/nbs/models.tsmixer.ipynb b/nbs/models.tsmixer.ipynb
index 55080cad9..5b0262cac 100644
--- a/nbs/models.tsmixer.ipynb
+++ b/nbs/models.tsmixer.ipynb
@@ -44,8 +44,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -55,12 +58,13 @@
"outputs": [],
"source": [
"#| export\n",
- "import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
+ "from typing import Optional\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate"
+ "from neuralforecast.common._base_model import BaseModel\n",
+ "from neuralforecast.common._modules import RevINMultivariate"
]
},
{
@@ -157,55 +161,6 @@
" return x"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## 1.2 Reversible InstanceNormalization\n",
- "An Instance Normalization Layer that is reversible, based on [this reference implementation](https://github.com/google-research/google-research/blob/master/tsmixer/tsmixer_basic/models/rev_in.py). "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| export\n",
- "class ReversibleInstanceNorm1d(nn.Module):\n",
- " \"\"\" \n",
- " ReversibleInstanceNorm1d\n",
- " \"\"\" \n",
- " def __init__(self, n_series, eps=1e-5):\n",
- " super().__init__()\n",
- " self.weight = nn.Parameter(torch.ones((1, 1, n_series)))\n",
- " self.bias = nn.Parameter(torch.zeros((1, 1, n_series)))\n",
- "\n",
- " self.eps = eps\n",
- "\n",
- " def forward(self, x):\n",
- " # Batch statistics\n",
- " self.batch_mean = torch.mean(x, axis=1, keepdim=True).detach()\n",
- " self.batch_std = torch.sqrt(torch.var(x, axis=1, keepdim=True, unbiased=False) + self.eps).detach()\n",
- " \n",
- " # Instance normalization\n",
- " x = x - self.batch_mean\n",
- " x = x / self.batch_std\n",
- " x = x * self.weight\n",
- " x = x + self.bias\n",
- " \n",
- " return x\n",
- "\n",
- " def reverse(self, x):\n",
- " # Reverse the normalization\n",
- " x = x - self.bias\n",
- " x = x / self.weight \n",
- " x = x * self.batch_std\n",
- " x = x + self.batch_mean \n",
- "\n",
- " return x"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -220,7 +175,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TSMixer(BaseMultivariate):\n",
+ "class TSMixer(BaseModel):\n",
" \"\"\" TSMixer\n",
"\n",
" Time-Series Mixer (`TSMixer`) is a MLP-based multivariate time-series forecasting model. `TSMixer` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`).\n",
@@ -244,6 +199,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping. \n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check. \n",
" `batch_size`: int=32, number of different series in each batch. \n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size. \n",
+ " `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all. \n",
+ " `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all. \n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size. \n",
" `step_size`: int=1, step size between each window of temporal data. \n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html). \n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators. \n",
@@ -261,10 +220,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = False\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -273,6 +233,7 @@
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" n_block = 2,\n",
" ff_dim = 64,\n",
" dropout = 0.9,\n",
@@ -285,6 +246,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 256,\n",
+ " inference_windows_batch_size = 256,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -303,6 +268,7 @@
" futr_exog_list=futr_exog_list,\n",
" hist_exog_list=hist_exog_list,\n",
" stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -311,6 +277,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -325,7 +295,7 @@
" # Reversible InstanceNormalization layer\n",
" self.revin = revin\n",
" if self.revin:\n",
- " self.norm = ReversibleInstanceNorm1d(n_series = n_series)\n",
+ " self.norm = RevINMultivariate(num_features = n_series, affine=True)\n",
"\n",
" # Mixing layers\n",
" mixing_layers = [MixingLayer(n_series=n_series, \n",
@@ -346,23 +316,17 @@
"\n",
" # TSMixer: InstanceNorm + Mixing layers + Dense output layer + ReverseInstanceNorm\n",
" if self.revin:\n",
- " x = self.norm(x)\n",
+ " x = self.norm(x, 'norm')\n",
" x = self.mixing_layers(x)\n",
" x = x.permute(0, 2, 1)\n",
" x = self.out(x)\n",
" x = x.permute(0, 2, 1)\n",
" if self.revin:\n",
- " x = self.norm.reverse(x)\n",
+ " x = self.norm(x, 'denorm')\n",
"\n",
" x = x.reshape(batch_size, self.h, self.loss.outputsize_multiplier * self.n_series)\n",
- " forecast = self.loss.domain_map(x)\n",
- "\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.\n",
- " if forecast.ndim == 2:\n",
- " return forecast.unsqueeze(-1)\n",
- " else:\n",
- " return forecast"
+ "\n",
+ " return x"
]
},
{
@@ -398,80 +362,12 @@
"metadata": {},
"outputs": [],
"source": [
- "#| hide\n",
- "import logging\n",
- "import warnings\n",
- "\n",
- "from neuralforecast import NeuralForecast\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MAE, MSE, RMSE, MAPE, SMAPE, MASE, relMSE, QuantileLoss, MQLoss, DistributionLoss,PMM, GMM, NBMM, HuberLoss, TukeyLoss, HuberQLoss, HuberMQLoss"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# Test losses\n",
+ "# Unit tests for models\n",
"logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
- "warnings.filterwarnings(\"ignore\")\n",
- "\n",
- "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
- "\n",
- "AirPassengersStatic_single = AirPassengersStatic[AirPassengersStatic[\"unique_id\"] == 'Airline1']\n",
- "Y_train_df_single = Y_train_df[Y_train_df[\"unique_id\"] == 'Airline1']\n",
- "Y_test_df_single = Y_test_df[Y_test_df[\"unique_id\"] == 'Airline1']\n",
- "\n",
- "losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "valid_losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "\n",
- "for loss, valid_loss in zip(losses, valid_losses):\n",
- " try:\n",
- " model = TSMixer(h=12,\n",
- " input_size=24,\n",
- " n_series=2,\n",
- " n_block=4,\n",
- " ff_dim=4,\n",
- " revin=True,\n",
- " scaler_type='standard',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " batch_size=32\n",
- " )\n",
- "\n",
- " fcst = NeuralForecast(models=[model], freq='M')\n",
- " fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
- " forecasts = fcst.predict(futr_df=Y_test_df)\n",
- " except Exception as e:\n",
- " assert str(e) == f\"{loss} is not supported in a Multivariate model.\"\n",
- "\n",
- "\n",
- "# Test n_series = 1\n",
- "model = TSMixer(h=12,\n",
- " input_size=24,\n",
- " n_series=1,\n",
- " n_block=4,\n",
- " ff_dim=4,\n",
- " revin=True,\n",
- " scaler_type='standard',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " batch_size=32\n",
- " )\n",
- "fcst = NeuralForecast(models=[model], freq='M')\n",
- "fcst.fit(df=Y_train_df_single, static_df=AirPassengersStatic_single, val_size=12)\n",
- "forecasts = fcst.predict(futr_df=Y_test_df_single)"
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TSMixer, [\"airpassengers\"])"
]
},
{
@@ -501,7 +397,7 @@
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TSMixer\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MAE\n",
+ "from neuralforecast.losses.pytorch import MAE, MQLoss\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
@@ -518,8 +414,7 @@
" early_stop_patience_steps=-1,\n",
" val_check_steps=5,\n",
" learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
+ " loss=MQLoss(),\n",
" batch_size=32\n",
" )\n",
"\n",
@@ -533,9 +428,13 @@
"plot_df = pd.concat([Y_test_df, Y_hat_df], axis=1)\n",
"plot_df = pd.concat([Y_train_df, plot_df])\n",
"\n",
- "plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n",
+ "plot_df = plot_df[plot_df.unique_id=='Airline2'].drop('unique_id', axis=1)\n",
"plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n",
- "plt.plot(plot_df['ds'], plot_df['TSMixer'], c='blue', label='Forecast')\n",
+ "plt.plot(plot_df['ds'], plot_df['TSMixer-median'], c='blue', label='median')\n",
+ "plt.fill_between(x=plot_df['ds'][-12:], \n",
+ " y1=plot_df['TSMixer-lo-90'][-12:].values,\n",
+ " y2=plot_df['TSMixer-hi-90'][-12:].values,\n",
+ " alpha=0.4, label='level 90')\n",
"ax.set_title('AirPassengers Forecast', fontsize=22)\n",
"ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
"ax.set_xlabel('Year', fontsize=20)\n",
@@ -566,7 +465,7 @@
"Y_df = AirPassengersPanel[AirPassengersPanel['unique_id']=='Airline1']\n",
"\n",
"plt.plot(Y_df['ds'], Y_df['y'], c='black', label='True')\n",
- "plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixer'], c='blue', label='Forecast')\n",
+ "plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixer-median'], c='blue', label='Forecast')\n",
"ax.set_title('AirPassengers Forecast', fontsize=22)\n",
"ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
"ax.set_xlabel('Year', fontsize=20)\n",
diff --git a/nbs/models.tsmixerx.ipynb b/nbs/models.tsmixerx.ipynb
index 74ba735eb..4c29a7f43 100644
--- a/nbs/models.tsmixerx.ipynb
+++ b/nbs/models.tsmixerx.ipynb
@@ -44,8 +44,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -59,8 +62,10 @@
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
+ "from typing import Optional\n",
"from neuralforecast.losses.pytorch import MAE\n",
- "from neuralforecast.common._base_multivariate import BaseMultivariate"
+ "from neuralforecast.common._base_model import BaseModel\n",
+ "from neuralforecast.common._modules import RevINMultivariate"
]
},
{
@@ -244,7 +249,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class TSMixerx(BaseMultivariate):\n",
+ "class TSMixerx(BaseModel):\n",
" \"\"\" TSMixerx\n",
"\n",
" Time-Series Mixer exogenous (`TSMixerx`) is a MLP-based multivariate time-series forecasting model, with capability for additional exogenous inputs. `TSMixerx` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`).\n",
@@ -268,6 +273,10 @@
" `early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping. \n",
" `val_check_steps`: int=100, Number of training steps between every validation loss check. \n",
" `batch_size`: int=32, number of different series in each batch. \n",
+ " `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size. \n",
+ " `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all. \n",
+ " `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all. \n",
+ " `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size. \n",
" `step_size`: int=1, step size between each window of temporal data. \n",
" `scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html). \n",
" `random_seed`: int=1, random_seed for pytorch initializer and numpy generators. \n",
@@ -285,10 +294,11 @@
"\n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'multivariate'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = True\n",
" EXOGENOUS_STAT = True\n",
+ " MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h,\n",
@@ -297,6 +307,7 @@
" futr_exog_list = None,\n",
" hist_exog_list = None,\n",
" stat_exog_list = None,\n",
+ " exclude_insample_y = False,\n",
" n_block = 2,\n",
" ff_dim = 64,\n",
" dropout = 0.0,\n",
@@ -309,6 +320,10 @@
" early_stop_patience_steps: int =-1,\n",
" val_check_steps: int = 100,\n",
" batch_size: int = 32,\n",
+ " valid_batch_size: Optional[int] = None,\n",
+ " windows_batch_size = 256,\n",
+ " inference_windows_batch_size = 256,\n",
+ " start_padding_enabled = False,\n",
" step_size: int = 1,\n",
" scaler_type: str = 'identity',\n",
" random_seed: int = 1,\n",
@@ -327,6 +342,7 @@
" futr_exog_list=futr_exog_list,\n",
" hist_exog_list=hist_exog_list,\n",
" stat_exog_list=stat_exog_list,\n",
+ " exclude_insample_y = exclude_insample_y,\n",
" loss=loss,\n",
" valid_loss=valid_loss,\n",
" max_steps=max_steps,\n",
@@ -335,6 +351,10 @@
" early_stop_patience_steps=early_stop_patience_steps,\n",
" val_check_steps=val_check_steps,\n",
" batch_size=batch_size,\n",
+ " valid_batch_size=valid_batch_size,\n",
+ " windows_batch_size=windows_batch_size,\n",
+ " inference_windows_batch_size=inference_windows_batch_size,\n",
+ " start_padding_enabled=start_padding_enabled,\n",
" step_size=step_size,\n",
" scaler_type=scaler_type,\n",
" random_seed=random_seed,\n",
@@ -348,7 +368,7 @@
" # Reversible InstanceNormalization layer\n",
" self.revin = revin\n",
" if self.revin:\n",
- " self.norm = ReversibleInstanceNorm1d(n_series = n_series)\n",
+ " self.norm = RevINMultivariate(num_features= n_series, affine=True)\n",
"\n",
" # Forecast horizon\n",
" self.h = h\n",
@@ -414,19 +434,19 @@
"\n",
" def forward(self, windows_batch):\n",
" # Parse batch\n",
- " x = windows_batch['insample_y'] # [batch_size (B), input_size (L), n_series (N)]\n",
- " hist_exog = windows_batch['hist_exog'] # [B, hist_exog_size (X), L, N]\n",
- " futr_exog = windows_batch['futr_exog'] # [B, futr_exog_size (F), L + h, N]\n",
- " stat_exog = windows_batch['stat_exog'] # [N, stat_exog_size (S)]\n",
+ " x = windows_batch['insample_y'] # [batch_size (B), input_size (L), n_series (N)]\n",
+ " hist_exog = windows_batch['hist_exog'] # [B, hist_exog_size (X), L, N]\n",
+ " futr_exog = windows_batch['futr_exog'] # [B, futr_exog_size (F), L + h, N]\n",
+ " stat_exog = windows_batch['stat_exog'] # [N, stat_exog_size (S)]\n",
" batch_size, input_size = x.shape[:2]\n",
"\n",
+ " # Apply revin to x\n",
+ " if self.revin:\n",
+ " x = self.norm(x, mode=\"norm\") # [B, L, N] -> [B, L, N]\n",
+ "\n",
" # Add channel dimension to x\n",
" x = x.unsqueeze(1) # [B, L, N] -> [B, 1, L, N]\n",
"\n",
- " # Apply revin to x\n",
- " if self.revin:\n",
- " x = self.norm(x) # [B, 1, L, N] -> [B, 1, L, N]\n",
- " \n",
" # Concatenate x with historical exogenous\n",
" if self.hist_exog_size > 0:\n",
" x = torch.cat((x, hist_exog), dim=1) # [B, 1, L, N] + [B, X, L, N] -> [B, 1 + X, L, N]\n",
@@ -473,26 +493,17 @@
" x = self.mixing_block(x) # [B, h, ff_dim] -> [B, h, ff_dim] \n",
" \n",
" # Fully connected output layer\n",
- " x = self.out(x) # [B, h, ff_dim] -> [B, h, N * n_outputs]\n",
+ " forecast = self.out(x) # [B, h, ff_dim] -> [B, h, N * n_outputs]\n",
" \n",
" # Reverse Instance Normalization on output\n",
" if self.revin:\n",
- " x = x.reshape(batch_size, \n",
- " self.h, \n",
- " self.loss.outputsize_multiplier,\n",
- " -1) # [B, h, N * n_outputs] -> [B, h, n_outputs, N]\n",
- " x = self.norm.reverse(x)\n",
- " x = x.reshape(batch_size, self.h, -1) # [B, h, n_outputs, N] -> [B, h, n_outputs * N]\n",
- "\n",
- " # Map to loss domain\n",
- " forecast = self.loss.domain_map(x)\n",
- "\n",
- " # domain_map might have squeezed the last dimension in case n_series == 1\n",
- " # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.\n",
- " if forecast.ndim == 2:\n",
- " return forecast.unsqueeze(-1)\n",
- " else:\n",
- " return forecast"
+ " forecast = forecast.reshape(batch_size, \n",
+ " self.h * self.loss.outputsize_multiplier,\n",
+ " -1) # [B, h, N * n_outputs] -> [B, h * n_outputs, N]\n",
+ " forecast = self.norm(forecast, \"denorm\")\n",
+ " forecast = forecast.reshape(batch_size, self.h, -1) # [B, h * n_outputs, N] -> [B, h, n_outputs * N]\n",
+ "\n",
+ " return forecast"
]
},
{
@@ -528,113 +539,12 @@
"metadata": {},
"outputs": [],
"source": [
- "#| hide\n",
- "import logging\n",
- "import warnings\n",
- "import pandas as pd\n",
- "\n",
- "from neuralforecast import NeuralForecast\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, generate_series\n",
- "from neuralforecast.losses.pytorch import MAE, MSE, RMSE, MAPE, SMAPE, MASE, relMSE, QuantileLoss, MQLoss, DistributionLoss,PMM, GMM, NBMM, HuberLoss, TukeyLoss, HuberQLoss, HuberMQLoss\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#| hide\n",
- "# Test losses\n",
+ "# Unit tests for models\n",
"logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
- "warnings.filterwarnings(\"ignore\")\n",
- "\n",
- "Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
- "\n",
- "AirPassengersStatic_single = AirPassengersStatic[AirPassengersStatic[\"unique_id\"] == 'Airline1']\n",
- "Y_train_df_single = Y_train_df[Y_train_df[\"unique_id\"] == 'Airline1']\n",
- "Y_test_df_single = Y_test_df[Y_test_df[\"unique_id\"] == 'Airline1']\n",
- "\n",
- "losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "valid_losses = [MAE(), MSE(), RMSE(), MAPE(), SMAPE(), MASE(seasonality=12), relMSE(y_train=Y_train_df), QuantileLoss(q=0.5), MQLoss(), DistributionLoss(distribution='Bernoulli'), DistributionLoss(distribution='Normal'), DistributionLoss(distribution='Poisson'), DistributionLoss(distribution='StudentT'), DistributionLoss(distribution='NegativeBinomial'), DistributionLoss(distribution='Tweedie'), PMM(), GMM(), NBMM(), HuberLoss(), TukeyLoss(), HuberQLoss(q=0.5), HuberMQLoss()]\n",
- "\n",
- "for loss, valid_loss in zip(losses, valid_losses):\n",
- " try:\n",
- " model = TSMixerx(h=12,\n",
- " input_size=24,\n",
- " n_series=2,\n",
- " stat_exog_list=['airline1'],\n",
- " futr_exog_list=['trend'],\n",
- " n_block=4,\n",
- " ff_dim=4,\n",
- " revin=True,\n",
- " scaler_type='standard',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=loss,\n",
- " valid_loss=valid_loss,\n",
- " batch_size=32\n",
- " )\n",
- "\n",
- " fcst = NeuralForecast(models=[model], freq='M')\n",
- " fcst.fit(df=Y_train_df, static_df=AirPassengersStatic, val_size=12)\n",
- " forecasts = fcst.predict(futr_df=Y_test_df)\n",
- " except Exception as e:\n",
- " assert str(e) == f\"{loss} is not supported in a Multivariate model.\"\n",
- "\n",
- "\n",
- "# Test n_series = 1\n",
- "model = TSMixerx(h=12,\n",
- " input_size=24,\n",
- " n_series=1,\n",
- " stat_exog_list=['airline1'],\n",
- " futr_exog_list=['trend'],\n",
- " n_block=4,\n",
- " ff_dim=4,\n",
- " revin=True,\n",
- " scaler_type='standard',\n",
- " max_steps=2,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " batch_size=32\n",
- " )\n",
- "fcst = NeuralForecast(models=[model], freq='M')\n",
- "fcst.fit(df=Y_train_df_single, static_df=AirPassengersStatic_single, val_size=12)\n",
- "forecasts = fcst.predict(futr_df=Y_test_df_single) \n",
- "\n",
- "# Test n_series > 1024\n",
- "# See issue: https://github.com/Nixtla/neuralforecast/issues/948\n",
- "n_series = 1111\n",
- "Y_df, S_df = generate_series(n_series=n_series, n_temporal_features=2, n_static_features=2)\n",
- "\n",
- "model = TSMixerx(\n",
- " h=12,\n",
- " input_size=24,\n",
- " n_series=n_series,\n",
- " stat_exog_list=['static_0', 'static_1'],\n",
- " hist_exog_list=[\"temporal_0\", \"temporal_1\"],\n",
- " n_block=4,\n",
- " ff_dim=3,\n",
- " revin=True,\n",
- " scaler_type=\"standard\",\n",
- " max_steps=5,\n",
- " early_stop_patience_steps=-1,\n",
- " val_check_steps=5,\n",
- " learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
- " batch_size=32,\n",
- ")\n",
- "\n",
- "fcst = NeuralForecast(models=[model], freq=\"D\")\n",
- "fcst.fit(df=Y_df, static_df=S_df, val_size=12)\n",
- "forecasts = fcst.predict()"
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(TSMixerx, [\"airpassengers\"])"
]
},
{
@@ -664,7 +574,7 @@
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import TSMixerx\n",
"from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
- "from neuralforecast.losses.pytorch import MAE\n",
+ "from neuralforecast.losses.pytorch import GMM\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
@@ -677,13 +587,12 @@
" n_block=4,\n",
" ff_dim=4,\n",
" revin=True,\n",
- " scaler_type='standard',\n",
+ " scaler_type='robust',\n",
" max_steps=500,\n",
" early_stop_patience_steps=-1,\n",
" val_check_steps=5,\n",
" learning_rate=1e-3,\n",
- " loss=MAE(),\n",
- " valid_loss=MAE(),\n",
+ " loss = GMM(n_components=10, weighted=True),\n",
" batch_size=32\n",
" )\n",
"\n",
@@ -699,7 +608,11 @@
"\n",
"plot_df = plot_df[plot_df.unique_id=='Airline1'].drop('unique_id', axis=1)\n",
"plt.plot(plot_df['ds'], plot_df['y'], c='black', label='True')\n",
- "plt.plot(plot_df['ds'], plot_df['TSMixerx'], c='blue', label='Forecast')\n",
+ "plt.plot(plot_df['ds'], plot_df['TSMixerx-median'], c='blue', label='median')\n",
+ "plt.fill_between(x=plot_df['ds'][-12:], \n",
+ " y1=plot_df['TSMixerx-lo-90'][-12:].values,\n",
+ " y2=plot_df['TSMixerx-hi-90'][-12:].values,\n",
+ " alpha=0.4, label='level 90')\n",
"ax.set_title('AirPassengers Forecast', fontsize=22)\n",
"ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
"ax.set_xlabel('Year', fontsize=20)\n",
@@ -730,7 +643,7 @@
"Y_df = AirPassengersPanel[AirPassengersPanel['unique_id']=='Airline1']\n",
"\n",
"plt.plot(Y_df['ds'], Y_df['y'], c='black', label='True')\n",
- "plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixerx'], c='blue', label='Forecast')\n",
+ "plt.plot(Y_hat_df['ds'], Y_hat_df['TSMixerx-median'], c='blue', label='Forecast')\n",
"ax.set_title('AirPassengers Forecast', fontsize=22)\n",
"ax.set_ylabel('Monthly Passengers', fontsize=20)\n",
"ax.set_xlabel('Year', fontsize=20)\n",
diff --git a/nbs/models.vanillatransformer.ipynb b/nbs/models.vanillatransformer.ipynb
index 232de7dfa..f9813efcc 100644
--- a/nbs/models.vanillatransformer.ipynb
+++ b/nbs/models.vanillatransformer.ipynb
@@ -67,7 +67,7 @@
" TransDecoderLayer, TransDecoder,\n",
" DataEmbedding, AttentionLayer,\n",
")\n",
- "from neuralforecast.common._base_windows import BaseWindows\n",
+ "from neuralforecast.common._base_model import BaseModel\n",
"\n",
"from neuralforecast.losses.pytorch import MAE"
]
@@ -79,8 +79,11 @@
"outputs": [],
"source": [
"#| hide\n",
+ "import logging\n",
+ "import warnings\n",
"from fastcore.test import test_eq\n",
- "from nbdev.showdoc import show_doc"
+ "from nbdev.showdoc import show_doc\n",
+ "from neuralforecast.common._model_checks import check_model"
]
},
{
@@ -154,7 +157,7 @@
"outputs": [],
"source": [
"#| export\n",
- "class VanillaTransformer(BaseWindows):\n",
+ "class VanillaTransformer(BaseModel):\n",
" \"\"\" VanillaTransformer\n",
"\n",
" Vanilla Transformer, following implementation of the Informer paper, used as baseline.\n",
@@ -208,10 +211,11 @@
"\t- [Haoyi Zhou, Shanghang Zhang, Jieqi Peng, Shuai Zhang, Jianxin Li, Hui Xiong, Wancai Zhang. \"Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting\"](https://arxiv.org/abs/2012.07436) \n",
" \"\"\"\n",
" # Class attributes\n",
- " SAMPLING_TYPE = 'windows'\n",
" EXOGENOUS_FUTR = True\n",
" EXOGENOUS_HIST = False\n",
" EXOGENOUS_STAT = False\n",
+ " MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)\n",
+ " RECURRENT = False # If the model produces forecasts recursively (True) or direct (False)\n",
"\n",
" def __init__(self,\n",
" h: int, \n",
@@ -343,14 +347,8 @@
" def forward(self, windows_batch):\n",
" # Parse windows_batch\n",
" insample_y = windows_batch['insample_y']\n",
- " #insample_mask = windows_batch['insample_mask']\n",
- " #hist_exog = windows_batch['hist_exog']\n",
- " #stat_exog = windows_batch['stat_exog']\n",
- "\n",
" futr_exog = windows_batch['futr_exog']\n",
"\n",
- " insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]\n",
- "\n",
" if self.futr_exog_size > 0:\n",
" x_mark_enc = futr_exog[:,:self.input_size,:]\n",
" x_mark_dec = futr_exog[:,-(self.label_len+self.h):,:]\n",
@@ -368,7 +366,7 @@
" dec_out = self.decoder(dec_out, enc_out, x_mask=None, \n",
" cross_mask=None)\n",
"\n",
- " forecast = self.loss.domain_map(dec_out[:, -self.h:])\n",
+ " forecast = dec_out[:, -self.h:]\n",
" return forecast"
]
},
@@ -399,6 +397,21 @@
"show_doc(VanillaTransformer.predict, name='VanillaTransformer.predict')"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Unit tests for models\n",
+ "logging.getLogger(\"pytorch_lightning\").setLevel(logging.ERROR)\n",
+ "logging.getLogger(\"lightning_fabric\").setLevel(logging.ERROR)\n",
+ "with warnings.catch_warnings():\n",
+ " warnings.simplefilter(\"ignore\")\n",
+ " check_model(VanillaTransformer, [\"airpassengers\"])"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
@@ -418,9 +431,7 @@
"\n",
"from neuralforecast import NeuralForecast\n",
"from neuralforecast.models import VanillaTransformer\n",
- "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic, augment_calendar_df\n",
- "\n",
- "AirPassengersPanel, calendar_cols = augment_calendar_df(df=AirPassengersPanel, freq='M')\n",
+ "from neuralforecast.utils import AirPassengersPanel, AirPassengersStatic\n",
"\n",
"Y_train_df = AirPassengersPanel[AirPassengersPanel.ds=AirPassengersPanel['ds'].values[-12]].reset_index(drop=True) # 12 test\n",
@@ -431,7 +442,6 @@
" conv_hidden_size=32,\n",
" n_head=2,\n",
" loss=MAE(),\n",
- " futr_exog_list=calendar_cols,\n",
" scaler_type='robust',\n",
" learning_rate=1e-3,\n",
" max_steps=500,\n",
diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb
index 5b056c144..cf4faa82b 100644
--- a/nbs/utils.ipynb
+++ b/nbs/utils.ipynb
@@ -38,12 +38,11 @@
"#| export\n",
"import random\n",
"from itertools import chain\n",
- "from typing import List, Union\n",
+ "from typing import List, Union, Optional, Tuple\n",
"from utilsforecast.compat import DFType\n",
"\n",
"import numpy as np\n",
- "import pandas as pd\n",
- "import utilsforecast.processing as ufp"
+ "import pandas as pd"
]
},
{
@@ -609,41 +608,51 @@
"source": [
"#| export\n",
"def add_conformal_distribution_intervals(\n",
- " fcst_df: DFType, \n",
+ " model_fcsts: np.array, \n",
" cs_df: DFType,\n",
- " model_names: List[str],\n",
- " level: List[Union[int, float]],\n",
+ " model: str,\n",
" cs_n_windows: int,\n",
" n_series: int,\n",
" horizon: int,\n",
- ") -> DFType:\n",
+ " level: Optional[List[Union[int, float]]] = None,\n",
+ " quantiles: Optional[List[float]] = None,\n",
+ ") -> Tuple[np.array, List[str]]:\n",
" \"\"\"\n",
" Adds conformal intervals to a `fcst_df` based on conformal scores `cs_df`.\n",
" `level` should be already sorted. This strategy creates forecasts paths\n",
" based on errors and calculate quantiles using those paths.\n",
" \"\"\"\n",
- " fcst_df = ufp.copy_if_pandas(fcst_df, deep=False)\n",
- " alphas = [100 - lv for lv in level]\n",
- " cuts = [alpha / 200 for alpha in reversed(alphas)]\n",
- " cuts.extend(1 - alpha / 200 for alpha in alphas)\n",
- " for model in model_names:\n",
- " scores = cs_df[model].to_numpy().reshape(n_series, cs_n_windows, horizon)\n",
- " scores = scores.transpose(1, 0, 2)\n",
- " # restrict scores to horizon\n",
- " scores = scores[:,:,:horizon]\n",
- " mean = fcst_df[model].to_numpy().reshape(1, n_series, -1)\n",
- " scores = np.vstack([mean - scores, mean + scores])\n",
- " quantiles = np.quantile(\n",
- " scores,\n",
- " cuts,\n",
- " axis=0,\n",
- " )\n",
- " quantiles = quantiles.reshape(len(cuts), -1).T\n",
+ " assert level is not None or quantiles is not None, \"Either level or quantiles must be provided\"\n",
+ " \n",
+ " if quantiles is None and level is not None:\n",
+ " alphas = [100 - lv for lv in level]\n",
+ " cuts = [alpha / 200 for alpha in reversed(alphas)]\n",
+ " cuts.extend(1 - alpha / 200 for alpha in alphas)\n",
+ " elif quantiles is not None:\n",
+ " cuts = quantiles\n",
+ " \n",
+ " scores = cs_df[model].to_numpy().reshape(n_series, cs_n_windows, horizon)\n",
+ " scores = scores.transpose(1, 0, 2)\n",
+ " # restrict scores to horizon\n",
+ " scores = scores[:,:,:horizon]\n",
+ " mean = model_fcsts.reshape(1, n_series, -1)\n",
+ " scores = np.vstack([mean - scores, mean + scores])\n",
+ " scores_quantiles = np.quantile(\n",
+ " scores,\n",
+ " cuts,\n",
+ " axis=0,\n",
+ " )\n",
+ " scores_quantiles = scores_quantiles.reshape(len(cuts), -1).T\n",
+ " if quantiles is None and level is not None:\n",
" lo_cols = [f\"{model}-lo-{lv}\" for lv in reversed(level)]\n",
" hi_cols = [f\"{model}-hi-{lv}\" for lv in level]\n",
" out_cols = lo_cols + hi_cols\n",
- " fcst_df = ufp.assign_columns(fcst_df, out_cols, quantiles)\n",
- " return fcst_df"
+ " elif quantiles is not None:\n",
+ " out_cols = [f\"{model}-ql{q}\" for q in quantiles]\n",
+ "\n",
+ " fcsts_with_intervals = np.hstack([model_fcsts, scores_quantiles])\n",
+ "\n",
+ " return fcsts_with_intervals, out_cols"
]
},
{
@@ -654,39 +663,59 @@
"source": [
"#| export\n",
"def add_conformal_error_intervals(\n",
- " fcst_df: DFType, \n",
+ " model_fcsts: np.array, \n",
" cs_df: DFType, \n",
- " model_names: List[str],\n",
- " level: List[Union[int, float]],\n",
+ " model: str,\n",
" cs_n_windows: int,\n",
" n_series: int,\n",
" horizon: int,\n",
- ") -> DFType:\n",
+ " level: Optional[List[Union[int, float]]] = None,\n",
+ " quantiles: Optional[List[float]] = None,\n",
+ ") -> Tuple[np.array, List[str]]:\n",
" \"\"\"\n",
" Adds conformal intervals to a `fcst_df` based on conformal scores `cs_df`.\n",
" `level` should be already sorted. This startegy creates prediction intervals\n",
" based on the absolute errors.\n",
" \"\"\"\n",
- " fcst_df = ufp.copy_if_pandas(fcst_df, deep=False)\n",
- " cuts = [lv / 100 for lv in level]\n",
- " for model in model_names:\n",
- " mean = fcst_df[model].to_numpy().ravel()\n",
- " scores = cs_df[model].to_numpy().reshape(n_series, cs_n_windows, horizon)\n",
- " scores = scores.transpose(1, 0, 2)\n",
- " # restrict scores to horizon\n",
- " scores = scores[:,:,:horizon]\n",
- " quantiles = np.quantile(\n",
- " scores,\n",
- " cuts,\n",
- " axis=0,\n",
- " )\n",
- " quantiles = quantiles.reshape(len(cuts), -1)\n",
+ " assert level is not None or quantiles is not None, \"Either level or quantiles must be provided\"\n",
+ "\n",
+ " if quantiles is None and level is not None:\n",
+ " cuts = [lv / 100 for lv in level]\n",
+ " elif quantiles is not None:\n",
+ " cuts = quantiles\n",
+ "\n",
+ " mean = model_fcsts.ravel()\n",
+ " scores = cs_df[model].to_numpy().reshape(n_series, cs_n_windows, horizon)\n",
+ " scores = scores.transpose(1, 0, 2)\n",
+ " # restrict scores to horizon\n",
+ " scores = scores[:,:,:horizon]\n",
+ " scores_quantiles = np.quantile(\n",
+ " scores,\n",
+ " cuts,\n",
+ " axis=0,\n",
+ " )\n",
+ " scores_quantiles = scores_quantiles.reshape(len(cuts), -1)\n",
+ " if quantiles is None and level is not None:\n",
" lo_cols = [f\"{model}-lo-{lv}\" for lv in reversed(level)]\n",
" hi_cols = [f\"{model}-hi-{lv}\" for lv in level]\n",
- " quantiles = np.vstack([mean - quantiles[::-1], mean + quantiles]).T\n",
- " columns = lo_cols + hi_cols\n",
- " fcst_df = ufp.assign_columns(fcst_df, columns, quantiles)\n",
- " return fcst_df"
+ " out_cols = lo_cols + hi_cols\n",
+ " scores_quantiles = np.vstack([mean - scores_quantiles[::-1], mean + scores_quantiles]).T\n",
+ " elif quantiles is not None:\n",
+ " out_cols = []\n",
+ " scores_quantiles_ls = []\n",
+ " for i, q in enumerate(quantiles):\n",
+ " out_cols.append(f\"{model}-ql{q}\")\n",
+ " if q < 0.5:\n",
+ " scores_quantiles_ls.append(mean - scores_quantiles[::-1][i])\n",
+ " elif q > 0.5:\n",
+ " scores_quantiles_ls.append(mean + scores_quantiles[i])\n",
+ " else:\n",
+ " scores_quantiles_ls.append(mean)\n",
+ " scores_quantiles = np.vstack(scores_quantiles_ls).T \n",
+ "\n",
+ " fcsts_with_intervals = np.hstack([model_fcsts, scores_quantiles])\n",
+ "\n",
+ " return fcsts_with_intervals, out_cols"
]
},
{
@@ -708,6 +737,45 @@
" )\n",
" return available_methods[method]"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| export\n",
+ "def level_to_quantiles(level: List[Union[int, float]]) -> List[float]:\n",
+ " \"\"\"\n",
+ " Converts a list of levels to a list of quantiles.\n",
+ " \"\"\"\n",
+ " level_set = set(level)\n",
+ " return sorted(list(set(sum([[(50 - l / 2) / 100, (50 + l / 2) / 100] for l in level_set], []))))\n",
+ "\n",
+ "def quantiles_to_level(quantiles: List[float]) -> List[Union[int, float]]:\n",
+ " \"\"\"\n",
+ " Converts a list of quantiles to a list of levels.\n",
+ " \"\"\"\n",
+ " quantiles_set = set(quantiles)\n",
+ " return sorted(set([int(round(100 - 200 * (q * (q < 0.5) + (1 - q) * (q >= 0.5)), 2)) for q in quantiles_set]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#| hide\n",
+ "# Test level_to_quantiles\n",
+ "level_base = [80, 90]\n",
+ "quantiles_base = [0.05, 0.1, 0.9, 0.95]\n",
+ "quantiles = level_to_quantiles(level_base)\n",
+ "level = quantiles_to_level(quantiles_base)\n",
+ "\n",
+ "assert quantiles == quantiles_base\n",
+ "assert level == level_base"
+ ]
}
],
"metadata": {
diff --git a/neuralforecast/_modidx.py b/neuralforecast/_modidx.py
index 445266155..360522d7c 100644
--- a/neuralforecast/_modidx.py
+++ b/neuralforecast/_modidx.py
@@ -164,6 +164,10 @@
'neuralforecast/core.py'),
'neuralforecast.core.NeuralForecast._conformity_scores': ( 'core.html#neuralforecast._conformity_scores',
'neuralforecast/core.py'),
+ 'neuralforecast.core.NeuralForecast._generate_forecasts': ( 'core.html#neuralforecast._generate_forecasts',
+ 'neuralforecast/core.py'),
+ 'neuralforecast.core.NeuralForecast._get_column_name': ( 'core.html#neuralforecast._get_column_name',
+ 'neuralforecast/core.py'),
'neuralforecast.core.NeuralForecast._get_model_names': ( 'core.html#neuralforecast._get_model_names',
'neuralforecast/core.py'),
'neuralforecast.core.NeuralForecast._get_needed_exog': ( 'core.html#neuralforecast._get_needed_exog',
@@ -284,10 +288,14 @@
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.DistributionLoss._compute_weights': ( 'losses.pytorch.html#distributionloss._compute_weights',
'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.DistributionLoss._domain_map': ( 'losses.pytorch.html#distributionloss._domain_map',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.DistributionLoss.get_distribution': ( 'losses.pytorch.html#distributionloss.get_distribution',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.DistributionLoss.sample': ( 'losses.pytorch.html#distributionloss.sample',
'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.DistributionLoss.update_quantile': ( 'losses.pytorch.html#distributionloss.update_quantile',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.GMM': ( 'losses.pytorch.html#gmm',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.GMM.__call__': ( 'losses.pytorch.html#gmm.__call__',
@@ -296,12 +304,14 @@
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.GMM.domain_map': ( 'losses.pytorch.html#gmm.domain_map',
'neuralforecast/losses/pytorch.py'),
- 'neuralforecast.losses.pytorch.GMM.neglog_likelihood': ( 'losses.pytorch.html#gmm.neglog_likelihood',
- 'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.GMM.get_distribution': ( 'losses.pytorch.html#gmm.get_distribution',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.GMM.sample': ( 'losses.pytorch.html#gmm.sample',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.GMM.scale_decouple': ( 'losses.pytorch.html#gmm.scale_decouple',
'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.GMM.update_quantile': ( 'losses.pytorch.html#gmm.update_quantile',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.HuberLoss': ( 'losses.pytorch.html#huberloss',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.HuberLoss.__call__': ( 'losses.pytorch.html#huberloss.__call__',
@@ -342,6 +352,8 @@
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.ISQF.crps': ( 'losses.pytorch.html#isqf.crps',
'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.ISQF.mean': ( 'losses.pytorch.html#isqf.mean',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.MAE': ( 'losses.pytorch.html#mae',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.MAE.__call__': ( 'losses.pytorch.html#mae.__call__',
@@ -384,12 +396,14 @@
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.NBMM.domain_map': ( 'losses.pytorch.html#nbmm.domain_map',
'neuralforecast/losses/pytorch.py'),
- 'neuralforecast.losses.pytorch.NBMM.neglog_likelihood': ( 'losses.pytorch.html#nbmm.neglog_likelihood',
- 'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.NBMM.get_distribution': ( 'losses.pytorch.html#nbmm.get_distribution',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.NBMM.sample': ( 'losses.pytorch.html#nbmm.sample',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.NBMM.scale_decouple': ( 'losses.pytorch.html#nbmm.scale_decouple',
'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.NBMM.update_quantile': ( 'losses.pytorch.html#nbmm.update_quantile',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.PMM': ( 'losses.pytorch.html#pmm',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.PMM.__call__': ( 'losses.pytorch.html#pmm.__call__',
@@ -398,12 +412,14 @@
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.PMM.domain_map': ( 'losses.pytorch.html#pmm.domain_map',
'neuralforecast/losses/pytorch.py'),
- 'neuralforecast.losses.pytorch.PMM.neglog_likelihood': ( 'losses.pytorch.html#pmm.neglog_likelihood',
- 'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.PMM.get_distribution': ( 'losses.pytorch.html#pmm.get_distribution',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.PMM.sample': ( 'losses.pytorch.html#pmm.sample',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.PMM.scale_decouple': ( 'losses.pytorch.html#pmm.scale_decouple',
'neuralforecast/losses/pytorch.py'),
+ 'neuralforecast.losses.pytorch.PMM.update_quantile': ( 'losses.pytorch.html#pmm.update_quantile',
+ 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.QuantileLayer': ( 'losses.pytorch.html#quantilelayer',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.QuantileLayer.__init__': ( 'losses.pytorch.html#quantilelayer.__init__',
@@ -454,8 +470,6 @@
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch._weighted_mean': ( 'losses.pytorch.html#_weighted_mean',
'neuralforecast/losses/pytorch.py'),
- 'neuralforecast.losses.pytorch.bernoulli_domain_map': ( 'losses.pytorch.html#bernoulli_domain_map',
- 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.bernoulli_scale_decouple': ( 'losses.pytorch.html#bernoulli_scale_decouple',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.est_alpha': ( 'losses.pytorch.html#est_alpha',
@@ -470,16 +484,10 @@
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.level_to_outputs': ( 'losses.pytorch.html#level_to_outputs',
'neuralforecast/losses/pytorch.py'),
- 'neuralforecast.losses.pytorch.nbinomial_domain_map': ( 'losses.pytorch.html#nbinomial_domain_map',
- 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.nbinomial_scale_decouple': ( 'losses.pytorch.html#nbinomial_scale_decouple',
'neuralforecast/losses/pytorch.py'),
- 'neuralforecast.losses.pytorch.normal_domain_map': ( 'losses.pytorch.html#normal_domain_map',
- 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.normal_scale_decouple': ( 'losses.pytorch.html#normal_scale_decouple',
'neuralforecast/losses/pytorch.py'),
- 'neuralforecast.losses.pytorch.poisson_domain_map': ( 'losses.pytorch.html#poisson_domain_map',
- 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.poisson_scale_decouple': ( 'losses.pytorch.html#poisson_scale_decouple',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.quantiles_to_outputs': ( 'losses.pytorch.html#quantiles_to_outputs',
@@ -496,8 +504,6 @@
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.sCRPS.__init__': ( 'losses.pytorch.html#scrps.__init__',
'neuralforecast/losses/pytorch.py'),
- 'neuralforecast.losses.pytorch.student_domain_map': ( 'losses.pytorch.html#student_domain_map',
- 'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.student_scale_decouple': ( 'losses.pytorch.html#student_scale_decouple',
'neuralforecast/losses/pytorch.py'),
'neuralforecast.losses.pytorch.tweedie_domain_map': ( 'losses.pytorch.html#tweedie_domain_map',
@@ -589,15 +595,7 @@
'neuralforecast.models.deepar.DeepAR.__init__': ( 'models.deepar.html#deepar.__init__',
'neuralforecast/models/deepar.py'),
'neuralforecast.models.deepar.DeepAR.forward': ( 'models.deepar.html#deepar.forward',
- 'neuralforecast/models/deepar.py'),
- 'neuralforecast.models.deepar.DeepAR.predict_step': ( 'models.deepar.html#deepar.predict_step',
- 'neuralforecast/models/deepar.py'),
- 'neuralforecast.models.deepar.DeepAR.train_forward': ( 'models.deepar.html#deepar.train_forward',
- 'neuralforecast/models/deepar.py'),
- 'neuralforecast.models.deepar.DeepAR.training_step': ( 'models.deepar.html#deepar.training_step',
- 'neuralforecast/models/deepar.py'),
- 'neuralforecast.models.deepar.DeepAR.validation_step': ( 'models.deepar.html#deepar.validation_step',
- 'neuralforecast/models/deepar.py')},
+ 'neuralforecast/models/deepar.py')},
'neuralforecast.models.deepnpts': { 'neuralforecast.models.deepnpts.DeepNPTS': ( 'models.deepnpts.html#deepnpts',
'neuralforecast/models/deepnpts.py'),
'neuralforecast.models.deepnpts.DeepNPTS.__init__': ( 'models.deepnpts.html#deepnpts.__init__',
@@ -1304,14 +1302,6 @@
'neuralforecast/models/tsmixer.py'),
'neuralforecast.models.tsmixer.MixingLayer.forward': ( 'models.tsmixer.html#mixinglayer.forward',
'neuralforecast/models/tsmixer.py'),
- 'neuralforecast.models.tsmixer.ReversibleInstanceNorm1d': ( 'models.tsmixer.html#reversibleinstancenorm1d',
- 'neuralforecast/models/tsmixer.py'),
- 'neuralforecast.models.tsmixer.ReversibleInstanceNorm1d.__init__': ( 'models.tsmixer.html#reversibleinstancenorm1d.__init__',
- 'neuralforecast/models/tsmixer.py'),
- 'neuralforecast.models.tsmixer.ReversibleInstanceNorm1d.forward': ( 'models.tsmixer.html#reversibleinstancenorm1d.forward',
- 'neuralforecast/models/tsmixer.py'),
- 'neuralforecast.models.tsmixer.ReversibleInstanceNorm1d.reverse': ( 'models.tsmixer.html#reversibleinstancenorm1d.reverse',
- 'neuralforecast/models/tsmixer.py'),
'neuralforecast.models.tsmixer.TSMixer': ( 'models.tsmixer.html#tsmixer',
'neuralforecast/models/tsmixer.py'),
'neuralforecast.models.tsmixer.TSMixer.__init__': ( 'models.tsmixer.html#tsmixer.__init__',
@@ -1494,5 +1484,9 @@
'neuralforecast/utils.py'),
'neuralforecast.utils.get_prediction_interval_method': ( 'utils.html#get_prediction_interval_method',
'neuralforecast/utils.py'),
+ 'neuralforecast.utils.level_to_quantiles': ( 'utils.html#level_to_quantiles',
+ 'neuralforecast/utils.py'),
+ 'neuralforecast.utils.quantiles_to_level': ( 'utils.html#quantiles_to_level',
+ 'neuralforecast/utils.py'),
'neuralforecast.utils.time_features_from_frequency_str': ( 'utils.html#time_features_from_frequency_str',
'neuralforecast/utils.py')}}}
diff --git a/neuralforecast/auto.py b/neuralforecast/auto.py
index b3c85892a..cb69edc49 100644
--- a/neuralforecast/auto.py
+++ b/neuralforecast/auto.py
@@ -63,10 +63,10 @@ class AutoRNN(BaseAuto):
"input_size_multiplier": [-1, 4, 16, 64],
"inference_input_size_multiplier": [-1],
"h": None,
- "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+ "encoder_hidden_size": tune.choice([16, 32, 64, 128]),
"encoder_n_layers": tune.randint(1, 4),
"context_size": tune.choice([5, 10, 50]),
- "decoder_hidden_size": tune.choice([64, 128, 256, 512]),
+ "decoder_hidden_size": tune.choice([16, 32, 64, 128]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"max_steps": tune.choice([500, 1000]),
"batch_size": tune.choice([16, 32]),
@@ -138,10 +138,10 @@ class AutoLSTM(BaseAuto):
"input_size_multiplier": [-1, 4, 16, 64],
"inference_input_size_multiplier": [-1],
"h": None,
- "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+ "encoder_hidden_size": tune.choice([16, 32, 64, 128]),
"encoder_n_layers": tune.randint(1, 4),
"context_size": tune.choice([5, 10, 50]),
- "decoder_hidden_size": tune.choice([64, 128, 256, 512]),
+ "decoder_hidden_size": tune.choice([16, 32, 64, 128]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"max_steps": tune.choice([500, 1000]),
"batch_size": tune.choice([16, 32]),
@@ -209,10 +209,10 @@ class AutoGRU(BaseAuto):
"input_size_multiplier": [-1, 4, 16, 64],
"inference_input_size_multiplier": [-1],
"h": None,
- "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+ "encoder_hidden_size": tune.choice([16, 32, 64, 128]),
"encoder_n_layers": tune.randint(1, 4),
"context_size": tune.choice([5, 10, 50]),
- "decoder_hidden_size": tune.choice([64, 128, 256, 512]),
+ "decoder_hidden_size": tune.choice([16, 32, 64, 128]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"max_steps": tune.choice([500, 1000]),
"batch_size": tune.choice([16, 32]),
@@ -280,9 +280,9 @@ class AutoTCN(BaseAuto):
"input_size_multiplier": [-1, 4, 16, 64],
"inference_input_size_multiplier": [-1],
"h": None,
- "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+ "encoder_hidden_size": tune.choice([16, 32, 64, 128]),
"context_size": tune.choice([5, 10, 50]),
- "decoder_hidden_size": tune.choice([64, 128]),
+ "decoder_hidden_size": tune.choice([32, 64]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"max_steps": tune.choice([500, 1000]),
"batch_size": tune.choice([16, 32]),
@@ -422,10 +422,10 @@ class AutoDilatedRNN(BaseAuto):
"inference_input_size_multiplier": [-1],
"h": None,
"cell_type": tune.choice(["LSTM", "GRU"]),
- "encoder_hidden_size": tune.choice([50, 100, 200, 300]),
+ "encoder_hidden_size": tune.choice([16, 32, 64, 128]),
"dilations": tune.choice([[[1, 2], [4, 8]], [[1, 2, 4, 8]]]),
"context_size": tune.choice([5, 10, 50]),
- "decoder_hidden_size": tune.choice([64, 128, 256, 512]),
+ "decoder_hidden_size": tune.choice([16, 32, 64, 128]),
"learning_rate": tune.loguniform(1e-4, 1e-1),
"max_steps": tune.choice([500, 1000]),
"batch_size": tune.choice([16, 32]),
diff --git a/neuralforecast/common/_base_auto.py b/neuralforecast/common/_base_auto.py
index a44f86267..2a306cae9 100644
--- a/neuralforecast/common/_base_auto.py
+++ b/neuralforecast/common/_base_auto.py
@@ -178,7 +178,11 @@ def config_f(trial):
self.callbacks = callbacks
# Base Class attributes
- self.SAMPLING_TYPE = cls_model.SAMPLING_TYPE
+ self.EXOGENOUS_FUTR = cls_model.EXOGENOUS_FUTR
+ self.EXOGENOUS_HIST = cls_model.EXOGENOUS_HIST
+ self.EXOGENOUS_STAT = cls_model.EXOGENOUS_STAT
+ self.MULTIVARIATE = cls_model.MULTIVARIATE
+ self.RECURRENT = cls_model.RECURRENT
def __repr__(self):
return type(self).__name__ if self.alias is None else self.alias
diff --git a/neuralforecast/common/_base_model.py b/neuralforecast/common/_base_model.py
index 38482c3a9..aec754e15 100644
--- a/neuralforecast/common/_base_model.py
+++ b/neuralforecast/common/_base_model.py
@@ -10,19 +10,25 @@
from contextlib import contextmanager
from copy import deepcopy
from dataclasses import dataclass
+from typing import List, Dict, Union
import fsspec
import numpy as np
import torch
import torch.nn as nn
+import torch.nn.functional as F
import pytorch_lightning as pl
+import neuralforecast.losses.pytorch as losses
+
+from ..losses.pytorch import BasePointLoss, DistributionLoss
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from neuralforecast.tsdataset import (
TimeSeriesDataModule,
BaseTimeSeriesDataset,
_DistributedTimeSeriesDataModule,
)
-from ..losses.pytorch import IQLoss
+from ._scalers import TemporalNorm
+from ..utils import get_indexer_raise_missing
# %% ../../nbs/common.base_model.ipynb 3
@dataclass
@@ -71,27 +77,104 @@ def tensor_to_numpy(tensor: torch.Tensor) -> np.ndarray:
# %% ../../nbs/common.base_model.ipynb 6
class BaseModel(pl.LightningModule):
- EXOGENOUS_FUTR = True
- EXOGENOUS_HIST = True
- EXOGENOUS_STAT = True
+ EXOGENOUS_FUTR = True # If the model can handle future exogenous variables
+ EXOGENOUS_HIST = True # If the model can handle historical exogenous variables
+ EXOGENOUS_STAT = True # If the model can handle static exogenous variables
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
- random_seed,
- loss,
- valid_loss,
- optimizer,
- optimizer_kwargs,
- lr_scheduler,
- lr_scheduler_kwargs,
- futr_exog_list,
- hist_exog_list,
- stat_exog_list,
- max_steps,
- early_stop_patience_steps,
+ h: int,
+ input_size: int,
+ loss: Union[BasePointLoss, DistributionLoss, nn.Module],
+ valid_loss: Union[BasePointLoss, DistributionLoss, nn.Module],
+ learning_rate: float,
+ max_steps: int,
+ val_check_steps: int,
+ batch_size: int,
+ valid_batch_size: Union[int, None],
+ windows_batch_size: int,
+ inference_windows_batch_size: Union[int, None],
+ start_padding_enabled: bool,
+ n_series: Union[int, None] = None,
+ n_samples: Union[int, None] = 100,
+ h_train: int = 1,
+ inference_input_size: Union[int, None] = None,
+ step_size: int = 1,
+ num_lr_decays: int = 0,
+ early_stop_patience_steps: int = -1,
+ scaler_type: str = "identity",
+ futr_exog_list: Union[List, None] = None,
+ hist_exog_list: Union[List, None] = None,
+ stat_exog_list: Union[List, None] = None,
+ exclude_insample_y: Union[bool, None] = False,
+ drop_last_loader: Union[bool, None] = False,
+ random_seed: Union[int, None] = 1,
+ alias: Union[str, None] = None,
+ optimizer: Union[torch.optim.Optimizer, None] = None,
+ optimizer_kwargs: Union[Dict, None] = None,
+ lr_scheduler: Union[torch.optim.lr_scheduler.LRScheduler, None] = None,
+ lr_scheduler_kwargs: Union[Dict, None] = None,
+ dataloader_kwargs=None,
**trainer_kwargs,
):
super().__init__()
+
+ # Multivarariate checks
+ if self.MULTIVARIATE and n_series is None:
+ raise Exception(
+ f"{type(self).__name__} is a multivariate model. Please set n_series to the number of unique time series in your dataset."
+ )
+ if not self.MULTIVARIATE:
+ if n_series is not None:
+ warnings.warn(
+ f"{type(self).__name__} is a univariate model. Parameter n_series is ignored."
+ )
+ n_series = 1
+ self.n_series = n_series
+
+ # Protections for previous recurrent models
+ if input_size < 1:
+ input_size = 3 * h
+ warnings.warn(
+ f"Input size too small. Automatically setting input size to 3 * horizon = {input_size}"
+ )
+
+ if inference_input_size is None:
+ inference_input_size = input_size
+ elif inference_input_size is not None and inference_input_size < 1:
+ inference_input_size = input_size
+ warnings.warn(
+ f"Inference input size too small. Automatically setting inference input size to input_size = {input_size}"
+ )
+
+ # For recurrent models we need one additional input as we need to shift insample_y to use it as input
+ if self.RECURRENT:
+ input_size += 1
+ inference_input_size += 1
+
+ # Attributes needed for recurrent models
+ self.horizon_backup = h
+ self.input_size_backup = input_size
+ self.n_samples = n_samples
+ if self.RECURRENT:
+ if (
+ hasattr(loss, "horizon_weight")
+ and loss.horizon_weight is not None
+ and h_train != h
+ ):
+ warnings.warn(
+ f"Setting h_train={h} to match the horizon_weight length."
+ )
+ h_train = h
+ self.h_train = h_train
+ self.inference_input_size = inference_input_size
+ self.rnn_state = None
+ self.maintain_state = False
+
with warnings.catch_warnings(record=False):
warnings.filterwarnings("ignore")
# the following line issues a warning about the loss attribute being saved
@@ -106,8 +189,8 @@ def __init__(
self.valid_loss = loss
else:
self.valid_loss = valid_loss
- self.train_trajectories = []
- self.valid_trajectories = []
+ self.train_trajectories: List = []
+ self.valid_trajectories: List = []
# Optimization
if optimizer is not None and not issubclass(optimizer, torch.optim.Optimizer):
@@ -153,14 +236,41 @@ def __init__(
f"{type(self).__name__} does not support static exogenous variables."
)
- # Implicit Quantile Loss
- if isinstance(self.loss, IQLoss):
- if not isinstance(self.valid_loss, IQLoss):
+ # Protections for loss functions
+ if isinstance(self.loss, (losses.IQLoss, losses.MQLoss, losses.HuberMQLoss)):
+ loss_type = type(self.loss)
+ if not isinstance(self.valid_loss, loss_type):
+ raise Exception(
+ f"Please set valid_loss={type(self.loss).__name__}() when training with {type(self.loss).__name__}"
+ )
+ if isinstance(self.valid_loss, losses.IQLoss):
+ valid_loss_type = type(self.valid_loss)
+ if not isinstance(self.loss, valid_loss_type):
raise Exception(
- "Please set valid_loss to IQLoss() when training with IQLoss"
+ f"Please set loss={type(self.valid_loss).__name__}() when validating with {type(self.valid_loss).__name__}"
)
- if isinstance(self.valid_loss, IQLoss) and not isinstance(self.loss, IQLoss):
- raise Exception("Please set loss to IQLoss() when validating with IQLoss")
+
+ # Deny impossible loss / valid_loss combinations
+ if (
+ isinstance(self.loss, losses.BasePointLoss)
+ and self.valid_loss.is_distribution_output
+ ):
+ raise Exception(
+ f"Validation with distribution loss {type(self.valid_loss).__name__} is not possible when using loss={type(self.loss).__name__}. Please use a point valid_loss (MAE, MSE, ...)"
+ )
+ elif self.valid_loss.is_distribution_output and self.valid_loss is not loss:
+ # Maybe we should raise a Warning or an Exception here, but meh for now.
+ self.valid_loss = loss
+
+ if isinstance(self.loss, (losses.relMSE, losses.Accuracy, losses.sCRPS)):
+ raise Exception(
+ f"{type(self.loss).__name__} cannot be used for training. Please use another loss function (MAE, MSE, ...)"
+ )
+
+ if isinstance(self.valid_loss, (losses.relMSE)):
+ raise Exception(
+ f"{type(self.valid_loss).__name__} cannot be used for validation. Please use another valid_loss (MAE, MSE, ...)"
+ )
## Trainer arguments ##
# Max steps, validation steps and check_val_every_n_epoch
@@ -191,7 +301,78 @@ def __init__(
if trainer_kwargs.get("enable_checkpointing", None) is None:
trainer_kwargs["enable_checkpointing"] = False
+ # Set other attributes
self.trainer_kwargs = trainer_kwargs
+ self.h = h
+ self.input_size = input_size
+ self.windows_batch_size = windows_batch_size
+ self.start_padding_enabled = start_padding_enabled
+
+ # Padder to complete train windows,
+ # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]
+ if start_padding_enabled:
+ self.padder_train = nn.ConstantPad1d(
+ padding=(self.input_size - 1, self.h), value=0.0
+ )
+ else:
+ self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0.0)
+
+ # Batch sizes
+ if self.MULTIVARIATE and n_series is not None:
+ self.batch_size = max(batch_size, n_series)
+ else:
+ self.batch_size = batch_size
+ if valid_batch_size is None:
+ self.valid_batch_size = batch_size
+ else:
+ self.valid_batch_size = valid_batch_size
+ if inference_windows_batch_size is None:
+ self.inference_windows_batch_size = windows_batch_size
+ else:
+ self.inference_windows_batch_size = inference_windows_batch_size
+
+ # Optimization
+ self.learning_rate = learning_rate
+ self.max_steps = max_steps
+ self.num_lr_decays = num_lr_decays
+ self.lr_decay_steps = (
+ max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7
+ )
+ self.early_stop_patience_steps = early_stop_patience_steps
+ self.val_check_steps = val_check_steps
+ self.windows_batch_size = windows_batch_size
+ self.step_size = step_size
+
+ # If the model does not support exogenous, it can't support exclude_insample_y
+ if exclude_insample_y and not (
+ self.EXOGENOUS_FUTR or self.EXOGENOUS_HIST or self.EXOGENOUS_STAT
+ ):
+ raise Exception(
+ f"{type(self).__name__} does not support `exclude_insample_y=True`. Please set `exclude_insample_y=False`"
+ )
+
+ self.exclude_insample_y = exclude_insample_y
+
+ # Scaler
+ self.scaler = TemporalNorm(
+ scaler_type=scaler_type,
+ dim=1, # Time dimension is 1.
+ num_features=1 + len(self.hist_exog_list) + len(self.futr_exog_list),
+ )
+
+ # Fit arguments
+ self.val_size = 0
+ self.test_size = 0
+
+ # Model state
+ self.decompose_forecast = False
+
+ # DataModule arguments
+ self.dataloader_kwargs = dataloader_kwargs
+ self.drop_last_loader = drop_last_loader
+ # used by on_validation_epoch_end hook
+ self.validation_step_outputs: List = []
+ self.alias = alias
def __repr__(self):
return type(self).__name__ if self.alias is None else self.alias
@@ -228,21 +409,13 @@ def _get_temporal_exogenous_cols(self, temporal_cols):
set(temporal_cols.tolist()) & set(self.hist_exog_list + self.futr_exog_list)
)
- def _set_quantile_for_iqloss(self, **data_module_kwargs):
- if "quantile" in data_module_kwargs:
- if not isinstance(self.loss, IQLoss):
- raise Exception(
- "Please train with loss=IQLoss() to make use of the quantile argument."
- )
- else:
- self.quantile = data_module_kwargs["quantile"]
- data_module_kwargs.pop("quantile")
- self.loss.update_quantile(q=self.quantile)
- elif isinstance(self.loss, IQLoss):
- self.quantile = 0.5
- self.loss.update_quantile(q=self.quantile)
-
- return data_module_kwargs
+ def _set_quantiles(self, quantiles=None):
+ if quantiles is None and isinstance(self.loss, losses.IQLoss):
+ self.loss.update_quantile(q=[0.5])
+ elif hasattr(self.loss, "update_quantile") and callable(
+ self.loss.update_quantile
+ ):
+ self.loss.update_quantile(q=quantiles)
def _fit_distributed(
self,
@@ -465,3 +638,932 @@ def load(cls, path, **kwargs):
else: # pytorch<2.1
model.load_state_dict(content["state_dict"], strict=True)
return model
+
+ def _create_windows(self, batch, step, w_idxs=None):
+ # Parse common data
+ window_size = self.input_size + self.h
+ temporal_cols = batch["temporal_cols"]
+ temporal = batch["temporal"]
+
+ if step == "train":
+ if self.val_size + self.test_size > 0:
+ cutoff = -self.val_size - self.test_size
+ temporal = temporal[:, :, :cutoff]
+
+ temporal = self.padder_train(temporal)
+
+ if temporal.shape[-1] < window_size:
+ raise Exception(
+ "Time series is too short for training, consider setting a smaller input size or set start_padding_enabled=True"
+ )
+
+ windows = temporal.unfold(
+ dimension=-1, size=window_size, step=self.step_size
+ )
+
+ if self.MULTIVARIATE:
+ # [n_series, C, Ws, L + h] -> [Ws, L + h, C, n_series]
+ windows = windows.permute(2, 3, 1, 0)
+ else:
+ # [n_series, C, Ws, L + h] -> [Ws * n_series, L + h, C, 1]
+ windows_per_serie = windows.shape[2]
+ windows = windows.permute(0, 2, 3, 1)
+ windows = windows.flatten(0, 1)
+ windows = windows.unsqueeze(-1)
+
+ # Sample and Available conditions
+ available_idx = temporal_cols.get_loc("available_mask")
+ available_condition = windows[:, : self.input_size, available_idx]
+ available_condition = torch.sum(
+ available_condition, axis=(1, -1)
+ ) # Sum over time & series dimension
+ final_condition = available_condition > 0
+
+ if self.h > 0:
+ sample_condition = windows[:, self.input_size :, available_idx]
+ sample_condition = torch.sum(
+ sample_condition, axis=(1, -1)
+ ) # Sum over time & series dimension
+ final_condition = (sample_condition > 0) & (available_condition > 0)
+
+ windows = windows[final_condition]
+
+ # Parse Static data to match windows
+ static = batch.get("static", None)
+ static_cols = batch.get("static_cols", None)
+
+ # Repeat static if univariate: [n_series, S] -> [Ws * n_series, S]
+ if static is not None and not self.MULTIVARIATE:
+ static = torch.repeat_interleave(
+ static, repeats=windows_per_serie, dim=0
+ )
+ static = static[final_condition]
+
+ # Protection of empty windows
+ if final_condition.sum() == 0:
+ raise Exception("No windows available for training")
+
+ # Sample windows
+ if self.windows_batch_size is not None:
+ n_windows = windows.shape[0]
+ w_idxs = np.random.choice(
+ n_windows,
+ size=self.windows_batch_size,
+ replace=(n_windows < self.windows_batch_size),
+ )
+ windows = windows[w_idxs]
+
+ if static is not None and not self.MULTIVARIATE:
+ static = static[w_idxs]
+
+ windows_batch = dict(
+ temporal=windows,
+ temporal_cols=temporal_cols,
+ static=static,
+ static_cols=static_cols,
+ )
+ return windows_batch
+
+ elif step in ["predict", "val"]:
+
+ if step == "predict":
+ initial_input = temporal.shape[-1] - self.test_size
+ if (
+ initial_input <= self.input_size
+ ): # There is not enough data to predict first timestamp
+ temporal = F.pad(
+ temporal,
+ pad=(self.input_size - initial_input, 0),
+ mode="constant",
+ value=0.0,
+ )
+ predict_step_size = self.predict_step_size
+ cutoff = -self.input_size - self.test_size
+ temporal = temporal[:, :, cutoff:]
+
+ elif step == "val":
+ predict_step_size = self.step_size
+ cutoff = -self.input_size - self.val_size - self.test_size
+ if self.test_size > 0:
+ temporal = batch["temporal"][:, :, cutoff : -self.test_size]
+ else:
+ temporal = batch["temporal"][:, :, cutoff:]
+ if temporal.shape[-1] < window_size:
+ initial_input = temporal.shape[-1] - self.val_size
+ temporal = F.pad(
+ temporal,
+ pad=(self.input_size - initial_input, 0),
+ mode="constant",
+ value=0.0,
+ )
+
+ if (
+ (step == "predict")
+ and (self.test_size == 0)
+ and (len(self.futr_exog_list) == 0)
+ ):
+ temporal = F.pad(temporal, pad=(0, self.h), mode="constant", value=0.0)
+
+ windows = temporal.unfold(
+ dimension=-1, size=window_size, step=predict_step_size
+ )
+
+ static = batch.get("static", None)
+ static_cols = batch.get("static_cols", None)
+
+ if self.MULTIVARIATE:
+ # [n_series, C, Ws, L + h] -> [Ws, L + h, C, n_series]
+ windows = windows.permute(2, 3, 1, 0)
+ else:
+ # [n_series, C, Ws, L + h] -> [Ws * n_series, L + h, C, 1]
+ windows_per_serie = windows.shape[2]
+ windows = windows.permute(0, 2, 3, 1)
+ windows = windows.flatten(0, 1)
+ windows = windows.unsqueeze(-1)
+ if static is not None:
+ static = torch.repeat_interleave(
+ static, repeats=windows_per_serie, dim=0
+ )
+
+ # Sample windows for batched prediction
+ if w_idxs is not None:
+ windows = windows[w_idxs]
+ if static is not None and not self.MULTIVARIATE:
+ static = static[w_idxs]
+
+ windows_batch = dict(
+ temporal=windows,
+ temporal_cols=temporal_cols,
+ static=static,
+ static_cols=static_cols,
+ )
+ return windows_batch
+ else:
+ raise ValueError(f"Unknown step {step}")
+
+ def _normalization(self, windows, y_idx):
+ # windows are already filtered by train/validation/test
+ # from the `create_windows_method` nor leakage risk
+ temporal = windows["temporal"] # [Ws, L + h, C, n_series]
+ temporal_cols = windows["temporal_cols"].copy() # [Ws, L + h, C, n_series]
+
+ # To avoid leakage uses only the lags
+ temporal_data_cols = self._get_temporal_exogenous_cols(
+ temporal_cols=temporal_cols
+ )
+ temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)
+ temporal_idxs = np.append(y_idx, temporal_idxs)
+ temporal_data = temporal[:, :, temporal_idxs]
+ temporal_mask = temporal[:, :, temporal_cols.get_loc("available_mask")].clone()
+ if self.h > 0:
+ temporal_mask[:, -self.h :] = 0.0
+
+ # Normalize. self.scaler stores the shift and scale for inverse transform
+ temporal_mask = temporal_mask.unsqueeze(
+ 2
+ ) # Add channel dimension for scaler.transform.
+ temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)
+
+ # Replace values in windows dict
+ temporal[:, :, temporal_idxs] = temporal_data
+ windows["temporal"] = temporal
+
+ return windows
+
+ def _inv_normalization(self, y_hat, y_idx):
+ # Receives window predictions [Ws, h, output, n_series]
+ # Broadcasts scale if necessary and inverts normalization
+ add_channel_dim = y_hat.ndim > 3
+ y_loc, y_scale = self._get_loc_scale(y_idx, add_channel_dim=add_channel_dim)
+ y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)
+
+ return y_hat
+
+ def _parse_windows(self, batch, windows):
+ # windows: [Ws, L + h, C, n_series]
+
+ # Filter insample lags from outsample horizon
+ y_idx = batch["y_idx"]
+ mask_idx = batch["temporal_cols"].get_loc("available_mask")
+
+ insample_y = windows["temporal"][:, : self.input_size, y_idx]
+ insample_mask = windows["temporal"][:, : self.input_size, mask_idx]
+
+ # Declare additional information
+ outsample_y = None
+ outsample_mask = None
+ hist_exog = None
+ futr_exog = None
+ stat_exog = None
+
+ if self.h > 0:
+ outsample_y = windows["temporal"][:, self.input_size :, y_idx]
+ outsample_mask = windows["temporal"][:, self.input_size :, mask_idx]
+
+ # Recurrent models at t predict t+1, so we shift the input (insample_y) by one
+ if self.RECURRENT:
+ insample_y = torch.cat((insample_y, outsample_y[:, :-1]), dim=1)
+ insample_mask = torch.cat((insample_mask, outsample_mask[:, :-1]), dim=1)
+ self.maintain_state = False
+
+ if len(self.hist_exog_list):
+ hist_exog_idx = get_indexer_raise_missing(
+ windows["temporal_cols"], self.hist_exog_list
+ )
+ if self.RECURRENT:
+ hist_exog = windows["temporal"][:, :, hist_exog_idx]
+ hist_exog[:, self.input_size :] = 0.0
+ hist_exog = hist_exog[:, 1:]
+ else:
+ hist_exog = windows["temporal"][:, : self.input_size, hist_exog_idx]
+ if not self.MULTIVARIATE:
+ hist_exog = hist_exog.squeeze(-1)
+ else:
+ hist_exog = hist_exog.swapaxes(1, 2)
+
+ if len(self.futr_exog_list):
+ futr_exog_idx = get_indexer_raise_missing(
+ windows["temporal_cols"], self.futr_exog_list
+ )
+ futr_exog = windows["temporal"][:, :, futr_exog_idx]
+ if self.RECURRENT:
+ futr_exog = futr_exog[:, 1:]
+ if not self.MULTIVARIATE:
+ futr_exog = futr_exog.squeeze(-1)
+ else:
+ futr_exog = futr_exog.swapaxes(1, 2)
+
+ if len(self.stat_exog_list):
+ static_idx = get_indexer_raise_missing(
+ windows["static_cols"], self.stat_exog_list
+ )
+ stat_exog = windows["static"][:, static_idx]
+
+ # TODO: think a better way of removing insample_y features
+ if self.exclude_insample_y:
+ insample_y = insample_y * 0
+
+ return (
+ insample_y,
+ insample_mask,
+ outsample_y,
+ outsample_mask,
+ hist_exog,
+ futr_exog,
+ stat_exog,
+ )
+
+ def _get_loc_scale(self, y_idx, add_channel_dim=False):
+ # [B, L, C, n_series] -> [B, L, n_series]
+ y_scale = self.scaler.x_scale[:, :, y_idx]
+ y_loc = self.scaler.x_shift[:, :, y_idx]
+
+ # [B, L, n_series] -> [B, L, n_series, 1]
+ if add_channel_dim:
+ y_scale = y_scale.unsqueeze(-1)
+ y_loc = y_loc.unsqueeze(-1)
+
+ return y_loc, y_scale
+
+ def _compute_valid_loss(
+ self, insample_y, outsample_y, output, outsample_mask, y_idx
+ ):
+ if self.loss.is_distribution_output:
+ y_loc, y_scale = self._get_loc_scale(y_idx)
+ distr_args = self.loss.scale_decouple(
+ output=output, loc=y_loc, scale=y_scale
+ )
+ if isinstance(
+ self.valid_loss, (losses.sCRPS, losses.MQLoss, losses.HuberMQLoss)
+ ):
+ _, _, quants = self.loss.sample(distr_args=distr_args)
+ output = quants
+ elif isinstance(self.valid_loss, losses.BasePointLoss):
+ distr = self.loss.get_distribution(distr_args=distr_args)
+ output = distr.mean
+
+ # Validation Loss evaluation
+ if self.valid_loss.is_distribution_output:
+ valid_loss = self.valid_loss(
+ y=outsample_y, distr_args=distr_args, mask=outsample_mask
+ )
+ else:
+ output = self._inv_normalization(y_hat=output, y_idx=y_idx)
+ valid_loss = self.valid_loss(
+ y=outsample_y, y_hat=output, y_insample=insample_y, mask=outsample_mask
+ )
+ return valid_loss
+
+ def _validate_step_recurrent_batch(
+ self, insample_y, insample_mask, futr_exog, hist_exog, stat_exog, y_idx
+ ):
+ # Remember state in network and set horizon to 1
+ self.rnn_state = None
+ self.maintain_state = True
+ self.h = 1
+
+ # Initialize results array
+ n_outputs = self.loss.outputsize_multiplier
+ y_hat = torch.zeros(
+ (insample_y.shape[0], self.horizon_backup, self.n_series * n_outputs),
+ device=insample_y.device,
+ dtype=insample_y.dtype,
+ )
+
+ # First step prediction
+ tau = 0
+
+ # Set exogenous
+ hist_exog_current = None
+ if self.hist_exog_size > 0:
+ hist_exog_current = hist_exog[:, : self.input_size + tau - 1]
+
+ futr_exog_current = None
+ if self.futr_exog_size > 0:
+ futr_exog_current = futr_exog[:, : self.input_size + tau - 1]
+
+ # First forecast step
+ y_hat[:, tau], insample_y = self._validate_step_recurrent_single(
+ insample_y=insample_y[:, : self.input_size + tau - 1],
+ insample_mask=insample_mask[:, : self.input_size + tau - 1],
+ hist_exog=hist_exog_current,
+ futr_exog=futr_exog_current,
+ stat_exog=stat_exog,
+ y_idx=y_idx,
+ )
+
+ # Horizon prediction recursively
+ for tau in range(self.horizon_backup):
+ # Set exogenous
+ if self.hist_exog_size > 0:
+ hist_exog_current = hist_exog[:, self.input_size + tau - 1].unsqueeze(1)
+
+ if self.futr_exog_size > 0:
+ futr_exog_current = futr_exog[:, self.input_size + tau - 1].unsqueeze(1)
+
+ y_hat[:, tau], insample_y = self._validate_step_recurrent_single(
+ insample_y=insample_y,
+ insample_mask=None,
+ hist_exog=hist_exog_current,
+ futr_exog=futr_exog_current,
+ stat_exog=stat_exog,
+ y_idx=y_idx,
+ )
+
+ # Reset state and horizon
+ self.maintain_state = False
+ self.rnn_state = None
+ self.h = self.horizon_backup
+
+ return y_hat
+
+ def _validate_step_recurrent_single(
+ self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx
+ ):
+ # Input sequence
+ windows_batch = dict(
+ insample_y=insample_y, # [Ws, L, n_series]
+ insample_mask=insample_mask, # [Ws, L, n_series]
+ futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]
+ hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]
+ stat_exog=stat_exog,
+ ) # univariate: [Ws, S]; multivariate: [n_series, S]
+
+ # Model Predictions
+ output_batch_unmapped = self(windows_batch)
+ output_batch = self.loss.domain_map(output_batch_unmapped)
+
+ # Inverse normalization and sampling
+ if self.loss.is_distribution_output:
+ # Sample distribution
+ y_loc, y_scale = self._get_loc_scale(y_idx)
+ distr_args = self.loss.scale_decouple(
+ output=output_batch, loc=y_loc, scale=y_scale
+ )
+ # When validating, the output is the mean of the distribution which is an attribute
+ distr = self.loss.get_distribution(distr_args=distr_args)
+
+ # Scale back to feed back as input
+ insample_y = self.scaler.scaler(distr.mean, y_loc, y_scale)
+ else:
+ # Todo: for now, we assume that in case of a BasePointLoss with ndim==4, the last dimension
+ # contains a set of predictions for the target (e.g. MQLoss multiple quantiles), for which we use the
+ # mean as feedback signal for the recurrent predictions. A more precise way is to increase the
+ # insample input size of the recurrent network by the number of outputs so that each output
+ # can be fed back to a specific input channel.
+ if output_batch.ndim == 4:
+ output_batch = output_batch.mean(dim=-1)
+
+ insample_y = output_batch
+
+ # Remove horizon dim: [B, 1, N * n_outputs] -> [B, N * n_outputs]
+ y_hat = output_batch_unmapped.squeeze(1)
+ return y_hat, insample_y
+
+ def _predict_step_recurrent_batch(
+ self, insample_y, insample_mask, futr_exog, hist_exog, stat_exog, y_idx
+ ):
+ # Remember state in network and set horizon to 1
+ self.rnn_state = None
+ self.maintain_state = True
+ self.h = 1
+
+ # Initialize results array
+ n_outputs = len(self.loss.output_names)
+ y_hat = torch.zeros(
+ (insample_y.shape[0], self.horizon_backup, self.n_series, n_outputs),
+ device=insample_y.device,
+ dtype=insample_y.dtype,
+ )
+
+ # First step prediction
+ tau = 0
+
+ # Set exogenous
+ hist_exog_current = None
+ if self.hist_exog_size > 0:
+ hist_exog_current = hist_exog[:, : self.input_size + tau - 1]
+
+ futr_exog_current = None
+ if self.futr_exog_size > 0:
+ futr_exog_current = futr_exog[:, : self.input_size + tau - 1]
+
+ # First forecast step
+ y_hat[:, tau], insample_y = self._predict_step_recurrent_single(
+ insample_y=insample_y[:, : self.input_size + tau - 1],
+ insample_mask=insample_mask[:, : self.input_size + tau - 1],
+ hist_exog=hist_exog_current,
+ futr_exog=futr_exog_current,
+ stat_exog=stat_exog,
+ y_idx=y_idx,
+ )
+
+ # Horizon prediction recursively
+ for tau in range(self.horizon_backup):
+ # Set exogenous
+ if self.hist_exog_size > 0:
+ hist_exog_current = hist_exog[:, self.input_size + tau - 1].unsqueeze(1)
+
+ if self.futr_exog_size > 0:
+ futr_exog_current = futr_exog[:, self.input_size + tau - 1].unsqueeze(1)
+
+ y_hat[:, tau], insample_y = self._predict_step_recurrent_single(
+ insample_y=insample_y,
+ insample_mask=None,
+ hist_exog=hist_exog_current,
+ futr_exog=futr_exog_current,
+ stat_exog=stat_exog,
+ y_idx=y_idx,
+ )
+
+ # Reset state and horizon
+ self.maintain_state = False
+ self.rnn_state = None
+ self.h = self.horizon_backup
+
+ # Squeeze for univariate case
+ if not self.MULTIVARIATE:
+ y_hat = y_hat.squeeze(2)
+
+ return y_hat
+
+ def _predict_step_recurrent_single(
+ self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx
+ ):
+ # Input sequence
+ windows_batch = dict(
+ insample_y=insample_y, # [Ws, L, n_series]
+ insample_mask=insample_mask, # [Ws, L, n_series]
+ futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]
+ hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]
+ stat_exog=stat_exog,
+ ) # univariate: [Ws, S]; multivariate: [n_series, S]
+
+ # Model Predictions
+ output_batch_unmapped = self(windows_batch)
+ output_batch = self.loss.domain_map(output_batch_unmapped)
+
+ # Inverse normalization and sampling
+ if self.loss.is_distribution_output:
+ # Sample distribution
+ y_loc, y_scale = self._get_loc_scale(y_idx)
+ distr_args = self.loss.scale_decouple(
+ output=output_batch, loc=y_loc, scale=y_scale
+ )
+ # When predicting, we need to sample to get the quantiles. The mean is an attribute.
+ _, _, quants = self.loss.sample(
+ distr_args=distr_args, num_samples=self.n_samples
+ )
+ mean = self.loss.distr_mean
+
+ # Scale back to feed back as input
+ insample_y = self.scaler.scaler(mean, y_loc, y_scale)
+
+ # Save predictions
+ y_hat = torch.concat((mean.unsqueeze(-1), quants), axis=-1)
+
+ if self.loss.return_params:
+ distr_args = torch.stack(distr_args, dim=-1)
+ if distr_args.ndim > 4:
+ distr_args = distr_args.flatten(-2, -1)
+ y_hat = torch.concat((y_hat, distr_args), axis=-1)
+ else:
+ # Todo: for now, we assume that in case of a BasePointLoss with ndim==4, the last dimension
+ # contains a set of predictions for the target (e.g. MQLoss multiple quantiles), for which we use the
+ # mean as feedback signal for the recurrent predictions. A more precise way is to increase the
+ # insample input size of the recurrent network by the number of outputs so that each output
+ # can be fed back to a specific input channel.
+ if output_batch.ndim == 4:
+ output_batch = output_batch.mean(dim=-1)
+
+ insample_y = output_batch
+ y_hat = self._inv_normalization(y_hat=output_batch, y_idx=y_idx)
+ y_hat = y_hat.unsqueeze(-1)
+
+ # Remove horizon dim: [B, 1, N, n_outputs] -> [B, N, n_outputs]
+ y_hat = y_hat.squeeze(1)
+ return y_hat, insample_y
+
+ def _predict_step_direct_batch(
+ self, insample_y, insample_mask, hist_exog, futr_exog, stat_exog, y_idx
+ ):
+ windows_batch = dict(
+ insample_y=insample_y, # [Ws, L, n_series]
+ insample_mask=insample_mask, # [Ws, L, n_series]
+ futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]
+ hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]
+ stat_exog=stat_exog,
+ ) # univariate: [Ws, S]; multivariate: [n_series, S]
+
+ # Model Predictions
+ output_batch = self(windows_batch)
+ output_batch = self.loss.domain_map(output_batch)
+
+ # Inverse normalization and sampling
+ if self.loss.is_distribution_output:
+ y_loc, y_scale = self._get_loc_scale(y_idx)
+ distr_args = self.loss.scale_decouple(
+ output=output_batch, loc=y_loc, scale=y_scale
+ )
+ _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
+ y_hat = torch.concat((sample_mean, quants), axis=-1)
+
+ if self.loss.return_params:
+ distr_args = torch.stack(distr_args, dim=-1)
+ if distr_args.ndim > 4:
+ distr_args = distr_args.flatten(-2, -1)
+ y_hat = torch.concat((y_hat, distr_args), axis=-1)
+ else:
+ y_hat = self._inv_normalization(y_hat=output_batch, y_idx=y_idx)
+
+ return y_hat
+
+ def training_step(self, batch, batch_idx):
+ # Set horizon to h_train in case of recurrent model to speed up training
+ if self.RECURRENT:
+ self.h = self.h_train
+
+ # windows: [Ws, L + h, C, n_series] or [Ws, L + h, C]
+ y_idx = batch["y_idx"]
+
+ windows = self._create_windows(batch, step="train")
+ original_outsample_y = torch.clone(
+ windows["temporal"][:, self.input_size :, y_idx]
+ )
+ windows = self._normalization(windows=windows, y_idx=y_idx)
+
+ # Parse windows
+ (
+ insample_y,
+ insample_mask,
+ outsample_y,
+ outsample_mask,
+ hist_exog,
+ futr_exog,
+ stat_exog,
+ ) = self._parse_windows(batch, windows)
+
+ windows_batch = dict(
+ insample_y=insample_y, # [Ws, L, n_series]
+ insample_mask=insample_mask, # [Ws, L, n_series]
+ futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]
+ hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]
+ stat_exog=stat_exog,
+ ) # univariate: [Ws, S]; multivariate: [n_series, S]
+
+ # Model Predictions
+ output = self(windows_batch)
+ output = self.loss.domain_map(output)
+
+ if self.loss.is_distribution_output:
+ y_loc, y_scale = self._get_loc_scale(y_idx)
+ outsample_y = original_outsample_y
+ distr_args = self.loss.scale_decouple(
+ output=output, loc=y_loc, scale=y_scale
+ )
+ loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)
+ else:
+ loss = self.loss(
+ y=outsample_y, y_hat=output, y_insample=insample_y, mask=outsample_mask
+ )
+
+ if torch.isnan(loss):
+ print("Model Parameters", self.hparams)
+ print("insample_y", torch.isnan(insample_y).sum())
+ print("outsample_y", torch.isnan(outsample_y).sum())
+ raise Exception("Loss is NaN, training stopped.")
+
+ train_loss_log = loss.detach().item()
+ self.log(
+ "train_loss",
+ train_loss_log,
+ batch_size=outsample_y.size(0),
+ prog_bar=True,
+ on_epoch=True,
+ )
+ self.train_trajectories.append((self.global_step, train_loss_log))
+
+ self.h = self.horizon_backup
+
+ return loss
+
+ def validation_step(self, batch, batch_idx):
+ if self.val_size == 0:
+ return np.nan
+
+ # TODO: Hack to compute number of windows
+ windows = self._create_windows(batch, step="val")
+ n_windows = len(windows["temporal"])
+ y_idx = batch["y_idx"]
+
+ # Number of windows in batch
+ windows_batch_size = self.inference_windows_batch_size
+ if windows_batch_size < 0:
+ windows_batch_size = n_windows
+ n_batches = int(np.ceil(n_windows / windows_batch_size))
+
+ valid_losses = []
+ batch_sizes = []
+ for i in range(n_batches):
+ # Create and normalize windows [Ws, L + h, C, n_series]
+ w_idxs = np.arange(
+ i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)
+ )
+ windows = self._create_windows(batch, step="val", w_idxs=w_idxs)
+ original_outsample_y = torch.clone(
+ windows["temporal"][:, self.input_size :, y_idx]
+ )
+
+ windows = self._normalization(windows=windows, y_idx=y_idx)
+
+ # Parse windows
+ (
+ insample_y,
+ insample_mask,
+ _,
+ outsample_mask,
+ hist_exog,
+ futr_exog,
+ stat_exog,
+ ) = self._parse_windows(batch, windows)
+
+ if self.RECURRENT:
+ output_batch = self._validate_step_recurrent_batch(
+ insample_y=insample_y,
+ insample_mask=insample_mask,
+ futr_exog=futr_exog,
+ hist_exog=hist_exog,
+ stat_exog=stat_exog,
+ y_idx=y_idx,
+ )
+ else:
+ windows_batch = dict(
+ insample_y=insample_y, # [Ws, L, n_series]
+ insample_mask=insample_mask, # [Ws, L, n_series]
+ futr_exog=futr_exog, # univariate: [Ws, L, F]; multivariate: [Ws, F, L, n_series]
+ hist_exog=hist_exog, # univariate: [Ws, L, X]; multivariate: [Ws, X, L, n_series]
+ stat_exog=stat_exog,
+ ) # univariate: [Ws, S]; multivariate: [n_series, S]
+
+ # Model Predictions
+ output_batch = self(windows_batch)
+
+ output_batch = self.loss.domain_map(output_batch)
+ valid_loss_batch = self._compute_valid_loss(
+ insample_y=insample_y,
+ outsample_y=original_outsample_y,
+ output=output_batch,
+ outsample_mask=outsample_mask,
+ y_idx=batch["y_idx"],
+ )
+ valid_losses.append(valid_loss_batch)
+ batch_sizes.append(len(output_batch))
+
+ valid_loss = torch.stack(valid_losses)
+ batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)
+ batch_size = torch.sum(batch_sizes)
+ valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size
+
+ if torch.isnan(valid_loss):
+ raise Exception("Loss is NaN, training stopped.")
+
+ valid_loss_log = valid_loss.detach()
+ self.log(
+ "valid_loss",
+ valid_loss_log.item(),
+ batch_size=batch_size,
+ prog_bar=True,
+ on_epoch=True,
+ )
+ self.validation_step_outputs.append(valid_loss_log)
+ return valid_loss
+
+ def predict_step(self, batch, batch_idx):
+ if self.RECURRENT:
+ self.input_size = self.inference_input_size
+
+ # TODO: Hack to compute number of windows
+ windows = self._create_windows(batch, step="predict")
+ n_windows = len(windows["temporal"])
+ y_idx = batch["y_idx"]
+
+ # Number of windows in batch
+ windows_batch_size = self.inference_windows_batch_size
+ if windows_batch_size < 0:
+ windows_batch_size = n_windows
+ n_batches = int(np.ceil(n_windows / windows_batch_size))
+ y_hats = []
+ for i in range(n_batches):
+ # Create and normalize windows [Ws, L+H, C]
+ w_idxs = np.arange(
+ i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)
+ )
+ windows = self._create_windows(batch, step="predict", w_idxs=w_idxs)
+ windows = self._normalization(windows=windows, y_idx=y_idx)
+
+ # Parse windows
+ insample_y, insample_mask, _, _, hist_exog, futr_exog, stat_exog = (
+ self._parse_windows(batch, windows)
+ )
+
+ if self.RECURRENT:
+ y_hat = self._predict_step_recurrent_batch(
+ insample_y=insample_y,
+ insample_mask=insample_mask,
+ futr_exog=futr_exog,
+ hist_exog=hist_exog,
+ stat_exog=stat_exog,
+ y_idx=y_idx,
+ )
+ else:
+ y_hat = self._predict_step_direct_batch(
+ insample_y=insample_y,
+ insample_mask=insample_mask,
+ futr_exog=futr_exog,
+ hist_exog=hist_exog,
+ stat_exog=stat_exog,
+ y_idx=y_idx,
+ )
+
+ y_hats.append(y_hat)
+ y_hat = torch.cat(y_hats, dim=0)
+ self.input_size = self.input_size_backup
+
+ return y_hat
+
+ def fit(
+ self,
+ dataset,
+ val_size=0,
+ test_size=0,
+ random_seed=None,
+ distributed_config=None,
+ ):
+ """Fit.
+
+ The `fit` method, optimizes the neural network's weights using the
+ initialization parameters (`learning_rate`, `windows_batch_size`, ...)
+ and the `loss` function as defined during the initialization.
+ Within `fit` we use a PyTorch Lightning `Trainer` that
+ inherits the initialization's `self.trainer_kwargs`, to customize
+ its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
+
+ The method is designed to be compatible with SKLearn-like classes
+ and in particular to be compatible with the StatsForecast library.
+
+ By default the `model` is not saving training checkpoints to protect
+ disk memory, to get them change `enable_checkpointing=True` in `__init__`.
+
+ **Parameters:**
+ `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
+ `val_size`: int, validation size for temporal cross-validation.
+ `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
+ `test_size`: int, test size for temporal cross-validation.
+ """
+ return self._fit(
+ dataset=dataset,
+ batch_size=self.batch_size,
+ valid_batch_size=self.valid_batch_size,
+ val_size=val_size,
+ test_size=test_size,
+ random_seed=random_seed,
+ distributed_config=distributed_config,
+ )
+
+ def predict(
+ self,
+ dataset,
+ test_size=None,
+ step_size=1,
+ random_seed=None,
+ quantiles=None,
+ **data_module_kwargs,
+ ):
+ """Predict.
+
+ Neural network prediction with PL's `Trainer` execution of `predict_step`.
+
+ **Parameters:**
+ `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
+ `test_size`: int=None, test size for temporal cross-validation.
+ `step_size`: int=1, Step size between each window.
+ `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
+ `quantiles`: list of floats, optional (default=None), target quantiles to predict.
+ `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
+ """
+ self._check_exog(dataset)
+ self._restart_seed(random_seed)
+ if "quantile" in data_module_kwargs:
+ warnings.warn(
+ "The 'quantile' argument will be deprecated, use 'quantiles' instead."
+ )
+ if quantiles is not None:
+ raise ValueError("You can't specify quantile and quantiles.")
+ quantiles = [data_module_kwargs.pop("quantile")]
+ self._set_quantiles(quantiles)
+
+ self.predict_step_size = step_size
+ self.decompose_forecast = False
+ datamodule = TimeSeriesDataModule(
+ dataset=dataset,
+ valid_batch_size=self.valid_batch_size,
+ **data_module_kwargs,
+ )
+
+ # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.
+ pred_trainer_kwargs = self.trainer_kwargs.copy()
+ if (pred_trainer_kwargs.get("accelerator", None) == "gpu") and (
+ torch.cuda.device_count() > 1
+ ):
+ pred_trainer_kwargs["devices"] = [0]
+
+ trainer = pl.Trainer(**pred_trainer_kwargs)
+ fcsts = trainer.predict(self, datamodule=datamodule)
+ fcsts = torch.vstack(fcsts)
+
+ if self.MULTIVARIATE:
+ # [B, h, n_series (, Q)] -> [n_series, B, h (, Q)]
+ fcsts = fcsts.swapaxes(0, 2)
+ fcsts = fcsts.swapaxes(1, 2)
+
+ fcsts = tensor_to_numpy(fcsts).flatten()
+ fcsts = fcsts.reshape(-1, len(self.loss.output_names))
+ return fcsts
+
+ def decompose(
+ self,
+ dataset,
+ step_size=1,
+ random_seed=None,
+ quantiles=None,
+ **data_module_kwargs,
+ ):
+ """Decompose Predictions.
+
+ Decompose the predictions through the network's layers.
+ Available methods are `ESRNN`, `NHITS`, `NBEATS`, and `NBEATSx`.
+
+ **Parameters:**
+ `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation here](https://nixtla.github.io/neuralforecast/tsdataset.html).
+ `step_size`: int=1, step size between each window of temporal data.
+ `quantiles`: list of floats, optional (default=None), target quantiles to predict.
+ `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
+ """
+ # Restart random seed
+ if random_seed is None:
+ random_seed = self.random_seed
+ torch.manual_seed(random_seed)
+ self._set_quantiles(quantiles)
+
+ self.predict_step_size = step_size
+ self.decompose_forecast = True
+ datamodule = TimeSeriesDataModule(
+ dataset=dataset,
+ valid_batch_size=self.valid_batch_size,
+ **data_module_kwargs,
+ )
+ trainer = pl.Trainer(**self.trainer_kwargs)
+ fcsts = trainer.predict(self, datamodule=datamodule)
+ self.decompose_forecast = False # Default decomposition back to false
+ fcsts = torch.vstack(fcsts)
+ return tensor_to_numpy(fcsts)
diff --git a/neuralforecast/common/_base_multivariate.py b/neuralforecast/common/_base_multivariate.py
deleted file mode 100644
index 0f68918da..000000000
--- a/neuralforecast/common/_base_multivariate.py
+++ /dev/null
@@ -1,606 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.base_multivariate.ipynb.
-
-# %% auto 0
-__all__ = ['BaseMultivariate']
-
-# %% ../../nbs/common.base_multivariate.ipynb 5
-import numpy as np
-import torch
-import torch.nn as nn
-import pytorch_lightning as pl
-import neuralforecast.losses.pytorch as losses
-
-from ._base_model import BaseModel, tensor_to_numpy
-from ._scalers import TemporalNorm
-from ..tsdataset import TimeSeriesDataModule
-from ..utils import get_indexer_raise_missing
-
-# %% ../../nbs/common.base_multivariate.ipynb 6
-class BaseMultivariate(BaseModel):
- """Base Multivariate
-
- Base class for all multivariate models. The forecasts for all time-series are produced simultaneously
- within each window, which are randomly sampled during training.
-
- This class implements the basic functionality for all windows-based models, including:
- - PyTorch Lightning's methods training_step, validation_step, predict_step.
- - fit and predict methods used by NeuralForecast.core class.
- - sampling and wrangling methods to generate multivariate windows.
- """
-
- def __init__(
- self,
- h,
- input_size,
- loss,
- valid_loss,
- learning_rate,
- max_steps,
- val_check_steps,
- n_series,
- batch_size,
- step_size=1,
- num_lr_decays=0,
- early_stop_patience_steps=-1,
- scaler_type="robust",
- futr_exog_list=None,
- hist_exog_list=None,
- stat_exog_list=None,
- drop_last_loader=False,
- random_seed=1,
- alias=None,
- optimizer=None,
- optimizer_kwargs=None,
- lr_scheduler=None,
- lr_scheduler_kwargs=None,
- dataloader_kwargs=None,
- **trainer_kwargs,
- ):
- super().__init__(
- random_seed=random_seed,
- loss=loss,
- valid_loss=valid_loss,
- optimizer=optimizer,
- optimizer_kwargs=optimizer_kwargs,
- lr_scheduler=lr_scheduler,
- lr_scheduler_kwargs=lr_scheduler_kwargs,
- futr_exog_list=futr_exog_list,
- hist_exog_list=hist_exog_list,
- stat_exog_list=stat_exog_list,
- max_steps=max_steps,
- early_stop_patience_steps=early_stop_patience_steps,
- **trainer_kwargs,
- )
-
- # Padder to complete train windows,
- # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]
- self.h = h
- self.input_size = input_size
- self.n_series = n_series
- self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0)
-
- # Multivariate models do not support these loss functions yet.
- unsupported_losses = (
- losses.sCRPS,
- losses.MQLoss,
- losses.DistributionLoss,
- losses.PMM,
- losses.GMM,
- losses.HuberMQLoss,
- losses.MASE,
- losses.relMSE,
- losses.NBMM,
- )
- if isinstance(self.loss, unsupported_losses):
- raise Exception(f"{self.loss} is not supported in a Multivariate model.")
- if isinstance(self.valid_loss, unsupported_losses):
- raise Exception(
- f"{self.valid_loss} is not supported in a Multivariate model."
- )
-
- self.batch_size = batch_size
-
- # Optimization
- self.learning_rate = learning_rate
- self.max_steps = max_steps
- self.num_lr_decays = num_lr_decays
- self.lr_decay_steps = (
- max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7
- )
- self.early_stop_patience_steps = early_stop_patience_steps
- self.val_check_steps = val_check_steps
- self.step_size = step_size
-
- # Scaler
- self.scaler = TemporalNorm(
- scaler_type=scaler_type, dim=2
- ) # Time dimension is in the second axis
-
- # Fit arguments
- self.val_size = 0
- self.test_size = 0
-
- # Model state
- self.decompose_forecast = False
-
- # DataModule arguments
- self.dataloader_kwargs = dataloader_kwargs
- self.drop_last_loader = drop_last_loader
- # used by on_validation_epoch_end hook
- self.validation_step_outputs = []
- self.alias = alias
-
- def _create_windows(self, batch, step):
- # Parse common data
- window_size = self.input_size + self.h
- temporal_cols = batch["temporal_cols"]
- temporal = batch["temporal"]
-
- if step == "train":
- if self.val_size + self.test_size > 0:
- cutoff = -self.val_size - self.test_size
- temporal = temporal[:, :, :cutoff]
-
- temporal = self.padder(temporal)
- windows = temporal.unfold(
- dimension=-1, size=window_size, step=self.step_size
- )
- # [n_series, C, Ws, L+H] 0, 1, 2, 3
-
- # Sample and Available conditions
- available_idx = temporal_cols.get_loc("available_mask")
- sample_condition = windows[:, available_idx, :, -self.h :]
- sample_condition = torch.sum(sample_condition, axis=2) # Sum over time
- sample_condition = torch.sum(
- sample_condition, axis=0
- ) # Sum over time-series
- available_condition = windows[:, available_idx, :, : -self.h]
- available_condition = torch.sum(
- available_condition, axis=2
- ) # Sum over time
- available_condition = torch.sum(
- available_condition, axis=0
- ) # Sum over time-series
- final_condition = (sample_condition > 0) & (
- available_condition > 0
- ) # Of shape [Ws]
- windows = windows[:, :, final_condition, :]
-
- # Get Static data
- static = batch.get("static", None)
- static_cols = batch.get("static_cols", None)
-
- # Protection of empty windows
- if final_condition.sum() == 0:
- raise Exception("No windows available for training")
-
- # Sample windows
- n_windows = windows.shape[2]
- if self.batch_size is not None:
- w_idxs = np.random.choice(
- n_windows,
- size=self.batch_size,
- replace=(n_windows < self.batch_size),
- )
- windows = windows[:, :, w_idxs, :]
-
- windows = windows.permute(2, 1, 3, 0) # [Ws, C, L+H, n_series]
-
- windows_batch = dict(
- temporal=windows,
- temporal_cols=temporal_cols,
- static=static,
- static_cols=static_cols,
- )
-
- return windows_batch
-
- elif step in ["predict", "val"]:
-
- if step == "predict":
- predict_step_size = self.predict_step_size
- cutoff = -self.input_size - self.test_size
- temporal = batch["temporal"][:, :, cutoff:]
-
- elif step == "val":
- predict_step_size = self.step_size
- cutoff = -self.input_size - self.val_size - self.test_size
- if self.test_size > 0:
- temporal = batch["temporal"][:, :, cutoff : -self.test_size]
- else:
- temporal = batch["temporal"][:, :, cutoff:]
-
- if (
- (step == "predict")
- and (self.test_size == 0)
- and (len(self.futr_exog_list) == 0)
- ):
- temporal = self.padder(temporal)
-
- windows = temporal.unfold(
- dimension=-1, size=window_size, step=predict_step_size
- )
- # [n_series, C, Ws, L+H] -> [Ws, C, L+H, n_series]
- windows = windows.permute(2, 1, 3, 0)
-
- # Get Static data
- static = batch.get("static", None)
- static_cols = batch.get("static_cols", None)
-
- windows_batch = dict(
- temporal=windows,
- temporal_cols=temporal_cols,
- static=static,
- static_cols=static_cols,
- )
-
- return windows_batch
- else:
- raise ValueError(f"Unknown step {step}")
-
- def _normalization(self, windows, y_idx):
-
- # windows are already filtered by train/validation/test
- # from the `create_windows_method` nor leakage risk
- temporal = windows["temporal"] # [Ws, C, L+H, n_series]
- temporal_cols = windows["temporal_cols"].copy() # [Ws, C, L+H, n_series]
-
- # To avoid leakage uses only the lags
- temporal_data_cols = self._get_temporal_exogenous_cols(
- temporal_cols=temporal_cols
- )
- temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)
- temporal_idxs = np.append(y_idx, temporal_idxs)
- temporal_data = temporal[:, temporal_idxs, :, :]
- temporal_mask = temporal[
- :, temporal_cols.get_loc("available_mask"), :, :
- ].clone()
- temporal_mask[:, -self.h :, :] = 0.0
-
- # Normalize. self.scaler stores the shift and scale for inverse transform
- temporal_mask = temporal_mask.unsqueeze(
- 1
- ) # Add channel dimension for scaler.transform.
- temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)
- # Replace values in windows dict
- temporal[:, temporal_idxs, :, :] = temporal_data
- windows["temporal"] = temporal
-
- return windows
-
- def _inv_normalization(self, y_hat, temporal_cols, y_idx):
- # Receives window predictions [Ws, H, n_series]
- # Broadcasts outputs and inverts normalization
-
- # Add C dimension
- # if y_hat.ndim == 2:
- # remove_dimension = True
- # y_hat = y_hat.unsqueeze(-1)
- # else:
- # remove_dimension = False
-
- y_scale = self.scaler.x_scale[:, [y_idx], :].squeeze(1)
- y_loc = self.scaler.x_shift[:, [y_idx], :].squeeze(1)
-
- # y_scale = torch.repeat_interleave(y_scale, repeats=y_hat.shape[-1], dim=-1)
- # y_loc = torch.repeat_interleave(y_loc, repeats=y_hat.shape[-1], dim=-1)
-
- y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)
-
- # if remove_dimension:
- # y_hat = y_hat.squeeze(-1)
- # y_loc = y_loc.squeeze(-1)
- # y_scale = y_scale.squeeze(-1)
-
- return y_hat, y_loc, y_scale
-
- def _parse_windows(self, batch, windows):
- # Temporal: [Ws, C, L+H, n_series]
-
- # Filter insample lags from outsample horizon
- mask_idx = batch["temporal_cols"].get_loc("available_mask")
- y_idx = batch["y_idx"]
- insample_y = windows["temporal"][:, y_idx, : -self.h, :]
- insample_mask = windows["temporal"][:, mask_idx, : -self.h, :]
- outsample_y = windows["temporal"][:, y_idx, -self.h :, :]
- outsample_mask = windows["temporal"][:, mask_idx, -self.h :, :]
-
- # Filter historic exogenous variables
- if len(self.hist_exog_list):
- hist_exog_idx = get_indexer_raise_missing(
- windows["temporal_cols"], self.hist_exog_list
- )
- hist_exog = windows["temporal"][:, hist_exog_idx, : -self.h, :]
- else:
- hist_exog = None
-
- # Filter future exogenous variables
- if len(self.futr_exog_list):
- futr_exog_idx = get_indexer_raise_missing(
- windows["temporal_cols"], self.futr_exog_list
- )
- futr_exog = windows["temporal"][:, futr_exog_idx, :, :]
- else:
- futr_exog = None
-
- # Filter static variables
- if len(self.stat_exog_list):
- static_idx = get_indexer_raise_missing(
- windows["static_cols"], self.stat_exog_list
- )
- stat_exog = windows["static"][:, static_idx]
- else:
- stat_exog = None
-
- return (
- insample_y,
- insample_mask,
- outsample_y,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- )
-
- def training_step(self, batch, batch_idx):
- # Create and normalize windows [batch_size, n_series, C, L+H]
- windows = self._create_windows(batch, step="train")
- y_idx = batch["y_idx"]
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- (
- insample_y,
- insample_mask,
- outsample_y,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- ) = self._parse_windows(batch, windows)
-
- windows_batch = dict(
- insample_y=insample_y, # [Ws, L, n_series]
- insample_mask=insample_mask, # [Ws, L, n_series]
- futr_exog=futr_exog, # [Ws, F, L + h, n_series]
- hist_exog=hist_exog, # [Ws, X, L, n_series]
- stat_exog=stat_exog,
- ) # [n_series, S]
-
- # Model Predictions
- output = self(windows_batch)
- if self.loss.is_distribution_output:
- outsample_y, y_loc, y_scale = self._inv_normalization(
- y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)
- else:
- loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)
-
- if torch.isnan(loss):
- print("Model Parameters", self.hparams)
- print("insample_y", torch.isnan(insample_y).sum())
- print("outsample_y", torch.isnan(outsample_y).sum())
- print("output", torch.isnan(output).sum())
- raise Exception("Loss is NaN, training stopped.")
-
- self.log(
- "train_loss",
- loss.detach().item(),
- batch_size=outsample_y.size(0),
- prog_bar=True,
- on_epoch=True,
- )
- self.train_trajectories.append((self.global_step, loss.detach().item()))
- return loss
-
- def validation_step(self, batch, batch_idx):
- if self.val_size == 0:
- return np.nan
-
- # Create and normalize windows [Ws, L+H, C]
- windows = self._create_windows(batch, step="val")
- y_idx = batch["y_idx"]
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- (
- insample_y,
- insample_mask,
- outsample_y,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- ) = self._parse_windows(batch, windows)
-
- windows_batch = dict(
- insample_y=insample_y, # [Ws, L, n_series]
- insample_mask=insample_mask, # [Ws, L, n_series]
- futr_exog=futr_exog, # [Ws, F, L + h, n_series]
- hist_exog=hist_exog, # [Ws, X, L, n_series]
- stat_exog=stat_exog,
- ) # [n_series, S]
-
- # Model Predictions
- output = self(windows_batch)
- if self.loss.is_distribution_output:
- outsample_y, y_loc, y_scale = self._inv_normalization(
- y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
-
- if str(type(self.valid_loss)) in [
- "",
- "",
- ]:
- _, output = self.loss.sample(distr_args=distr_args)
-
- # Validation Loss evaluation
- if self.valid_loss.is_distribution_output:
- valid_loss = self.valid_loss(
- y=outsample_y, distr_args=distr_args, mask=outsample_mask
- )
- else:
- valid_loss = self.valid_loss(
- y=outsample_y, y_hat=output, mask=outsample_mask
- )
-
- if torch.isnan(valid_loss):
- raise Exception("Loss is NaN, training stopped.")
-
- self.log(
- "valid_loss",
- valid_loss.detach().item(),
- batch_size=outsample_y.size(0),
- prog_bar=True,
- on_epoch=True,
- )
- self.validation_step_outputs.append(valid_loss)
- return valid_loss
-
- def predict_step(self, batch, batch_idx):
- # Create and normalize windows [Ws, L+H, C]
- windows = self._create_windows(batch, step="predict")
- y_idx = batch["y_idx"]
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- insample_y, insample_mask, _, _, hist_exog, futr_exog, stat_exog = (
- self._parse_windows(batch, windows)
- )
-
- windows_batch = dict(
- insample_y=insample_y, # [Ws, L, n_series]
- insample_mask=insample_mask, # [Ws, L, n_series]
- futr_exog=futr_exog, # [Ws, F, L + h, n_series]
- hist_exog=hist_exog, # [Ws, X, L, n_series]
- stat_exog=stat_exog,
- ) # [n_series, S]
-
- # Model Predictions
- output = self(windows_batch)
- if self.loss.is_distribution_output:
- _, y_loc, y_scale = self._inv_normalization(
- y_hat=torch.empty(
- size=(insample_y.shape[0], self.h, self.n_series),
- dtype=output[0].dtype,
- device=output[0].device,
- ),
- temporal_cols=batch["temporal_cols"],
- y_idx=y_idx,
- )
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- _, y_hat = self.loss.sample(distr_args=distr_args)
-
- if self.loss.return_params:
- distr_args = torch.stack(distr_args, dim=-1)
- distr_args = torch.reshape(
- distr_args, (len(windows["temporal"]), self.h, -1)
- )
- y_hat = torch.concat((y_hat, distr_args), axis=2)
- else:
- y_hat, _, _ = self._inv_normalization(
- y_hat=output, temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- return y_hat
-
- def fit(
- self,
- dataset,
- val_size=0,
- test_size=0,
- random_seed=None,
- distributed_config=None,
- ):
- """Fit.
-
- The `fit` method, optimizes the neural network's weights using the
- initialization parameters (`learning_rate`, `windows_batch_size`, ...)
- and the `loss` function as defined during the initialization.
- Within `fit` we use a PyTorch Lightning `Trainer` that
- inherits the initialization's `self.trainer_kwargs`, to customize
- its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
-
- The method is designed to be compatible with SKLearn-like classes
- and in particular to be compatible with the StatsForecast library.
-
- By default the `model` is not saving training checkpoints to protect
- disk memory, to get them change `enable_checkpointing=True` in `__init__`.
-
- **Parameters:**
- `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
- `val_size`: int, validation size for temporal cross-validation.
- `test_size`: int, test size for temporal cross-validation.
- """
- if distributed_config is not None:
- raise ValueError(
- "multivariate models cannot be trained using distributed data parallel."
- )
- return self._fit(
- dataset=dataset,
- batch_size=self.n_series,
- valid_batch_size=self.n_series,
- val_size=val_size,
- test_size=test_size,
- random_seed=random_seed,
- shuffle_train=False,
- distributed_config=None,
- )
-
- def predict(
- self,
- dataset,
- test_size=None,
- step_size=1,
- random_seed=None,
- **data_module_kwargs,
- ):
- """Predict.
-
- Neural network prediction with PL's `Trainer` execution of `predict_step`.
-
- **Parameters:**
- `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
- `test_size`: int=None, test size for temporal cross-validation.
- `step_size`: int=1, Step size between each window.
- `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
- """
- self._check_exog(dataset)
- self._restart_seed(random_seed)
- data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)
-
- self.predict_step_size = step_size
- self.decompose_forecast = False
- datamodule = TimeSeriesDataModule(
- dataset=dataset,
- valid_batch_size=self.n_series,
- batch_size=self.n_series,
- **data_module_kwargs,
- )
-
- # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.
- pred_trainer_kwargs = self.trainer_kwargs.copy()
- if (pred_trainer_kwargs.get("accelerator", None) == "gpu") and (
- torch.cuda.device_count() > 1
- ):
- pred_trainer_kwargs["devices"] = [0]
-
- trainer = pl.Trainer(**pred_trainer_kwargs)
- fcsts = trainer.predict(self, datamodule=datamodule)
- fcsts = tensor_to_numpy(torch.vstack(fcsts))
-
- fcsts = np.transpose(fcsts, (2, 0, 1))
- fcsts = fcsts.flatten()
- fcsts = fcsts.reshape(-1, len(self.loss.output_names))
- return fcsts
-
- def decompose(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):
- raise NotImplementedError("decompose")
diff --git a/neuralforecast/common/_base_recurrent.py b/neuralforecast/common/_base_recurrent.py
deleted file mode 100644
index 59d25bc9f..000000000
--- a/neuralforecast/common/_base_recurrent.py
+++ /dev/null
@@ -1,591 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.base_recurrent.ipynb.
-
-# %% auto 0
-__all__ = ['BaseRecurrent']
-
-# %% ../../nbs/common.base_recurrent.ipynb 6
-import numpy as np
-import torch
-import torch.nn as nn
-import pytorch_lightning as pl
-import neuralforecast.losses.pytorch as losses
-
-from ._base_model import BaseModel, tensor_to_numpy
-from ._scalers import TemporalNorm
-from ..tsdataset import TimeSeriesDataModule
-from ..utils import get_indexer_raise_missing
-
-# %% ../../nbs/common.base_recurrent.ipynb 7
-class BaseRecurrent(BaseModel):
- """Base Recurrent
-
- Base class for all recurrent-based models. The forecasts are produced sequentially between
- windows.
-
- This class implements the basic functionality for all windows-based models, including:
- - PyTorch Lightning's methods training_step, validation_step, predict_step.
- - fit and predict methods used by NeuralForecast.core class.
- - sampling and wrangling methods to sequential windows.
- """
-
- def __init__(
- self,
- h,
- input_size,
- inference_input_size,
- loss,
- valid_loss,
- learning_rate,
- max_steps,
- val_check_steps,
- batch_size,
- valid_batch_size,
- scaler_type="robust",
- num_lr_decays=0,
- early_stop_patience_steps=-1,
- futr_exog_list=None,
- hist_exog_list=None,
- stat_exog_list=None,
- drop_last_loader=False,
- random_seed=1,
- alias=None,
- optimizer=None,
- optimizer_kwargs=None,
- lr_scheduler=None,
- lr_scheduler_kwargs=None,
- dataloader_kwargs=None,
- **trainer_kwargs,
- ):
- super().__init__(
- random_seed=random_seed,
- loss=loss,
- valid_loss=valid_loss,
- optimizer=optimizer,
- optimizer_kwargs=optimizer_kwargs,
- lr_scheduler=lr_scheduler,
- lr_scheduler_kwargs=lr_scheduler_kwargs,
- futr_exog_list=futr_exog_list,
- hist_exog_list=hist_exog_list,
- stat_exog_list=stat_exog_list,
- max_steps=max_steps,
- early_stop_patience_steps=early_stop_patience_steps,
- **trainer_kwargs,
- )
-
- # Padder to complete train windows,
- # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]
- self.h = h
- self.input_size = input_size
- self.inference_input_size = inference_input_size
- self.padder = nn.ConstantPad1d(padding=(0, self.h), value=0.0)
-
- unsupported_distributions = ["Bernoulli", "ISQF"]
- if (
- isinstance(self.loss, losses.DistributionLoss)
- and self.loss.distribution in unsupported_distributions
- ):
- raise Exception(
- f"Distribution {self.loss.distribution} not available for Recurrent-based models. Please choose another distribution."
- )
-
- # Valid batch_size
- self.batch_size = batch_size
- if valid_batch_size is None:
- self.valid_batch_size = batch_size
- else:
- self.valid_batch_size = valid_batch_size
-
- # Optimization
- self.learning_rate = learning_rate
- self.max_steps = max_steps
- self.num_lr_decays = num_lr_decays
- self.lr_decay_steps = (
- max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7
- )
- self.early_stop_patience_steps = early_stop_patience_steps
- self.val_check_steps = val_check_steps
-
- # Scaler
- self.scaler = TemporalNorm(
- scaler_type=scaler_type,
- dim=-1, # Time dimension is -1.
- num_features=1 + len(self.hist_exog_list) + len(self.futr_exog_list),
- )
-
- # Fit arguments
- self.val_size = 0
- self.test_size = 0
-
- # DataModule arguments
- self.dataloader_kwargs = dataloader_kwargs
- self.drop_last_loader = drop_last_loader
- # used by on_validation_epoch_end hook
- self.validation_step_outputs = []
- self.alias = alias
-
- def _normalization(self, batch, val_size=0, test_size=0):
- temporal = batch["temporal"] # B, C, T
- temporal_cols = batch["temporal_cols"].copy()
- y_idx = batch["y_idx"]
-
- # Separate data and mask
- temporal_data_cols = self._get_temporal_exogenous_cols(
- temporal_cols=temporal_cols
- )
- temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)
- temporal_idxs = np.append(y_idx, temporal_idxs)
- temporal_data = temporal[:, temporal_idxs, :]
- temporal_mask = temporal[:, temporal_cols.get_loc("available_mask"), :].clone()
-
- # Remove validation and test set to prevent leakeage
- if val_size + test_size > 0:
- cutoff = val_size + test_size
- temporal_mask[:, -cutoff:] = 0
-
- # Normalize. self.scaler stores the shift and scale for inverse transform
- temporal_mask = temporal_mask.unsqueeze(
- 1
- ) # Add channel dimension for scaler.transform.
- temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)
-
- # Replace values in windows dict
- temporal[:, temporal_idxs, :] = temporal_data
- batch["temporal"] = temporal
-
- return batch
-
- def _inv_normalization(self, y_hat, temporal_cols, y_idx):
- # Receives window predictions [B, seq_len, H, output]
- # Broadcasts outputs and inverts normalization
-
- # Get 'y' scale and shift, and add W dimension
- y_loc = self.scaler.x_shift[:, [y_idx], 0].flatten() # [B,C,T] -> [B]
- y_scale = self.scaler.x_scale[:, [y_idx], 0].flatten() # [B,C,T] -> [B]
-
- # Expand scale and shift to y_hat dimensions
- y_loc = y_loc.view(*y_loc.shape, *(1,) * (y_hat.ndim - 1)) # .expand(y_hat)
- y_scale = y_scale.view(
- *y_scale.shape, *(1,) * (y_hat.ndim - 1)
- ) # .expand(y_hat)
-
- y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)
-
- return y_hat, y_loc, y_scale
-
- def _create_windows(self, batch, step):
- temporal = batch["temporal"]
- temporal_cols = batch["temporal_cols"]
-
- if step == "train":
- if self.val_size + self.test_size > 0:
- cutoff = -self.val_size - self.test_size
- temporal = temporal[:, :, :cutoff]
- temporal = self.padder(temporal)
-
- # Truncate batch to shorter time-series
- av_condition = torch.nonzero(
- torch.min(
- temporal[:, temporal_cols.get_loc("available_mask")], axis=0
- ).values
- )
- min_time_stamp = int(av_condition.min())
-
- available_ts = temporal.shape[-1] - min_time_stamp
- if available_ts < 1 + self.h:
- raise Exception(
- "Time series too short for given input and output size. \n"
- f"Available timestamps: {available_ts}"
- )
-
- temporal = temporal[:, :, min_time_stamp:]
-
- if step == "val":
- if self.test_size > 0:
- temporal = temporal[:, :, : -self.test_size]
- temporal = self.padder(temporal)
-
- if step == "predict":
- if (self.test_size == 0) and (len(self.futr_exog_list) == 0):
- temporal = self.padder(temporal)
-
- # Test size covers all data, pad left one timestep with zeros
- if temporal.shape[-1] == self.test_size:
- padder_left = nn.ConstantPad1d(padding=(1, 0), value=0.0)
- temporal = padder_left(temporal)
-
- # Parse batch
- window_size = 1 + self.h # 1 for current t and h for future
- windows = temporal.unfold(dimension=-1, size=window_size, step=1)
-
- # Truncated backprogatation/inference (shorten sequence where RNNs unroll)
- n_windows = windows.shape[2]
- input_size = -1
- if (step == "train") and (self.input_size > 0):
- input_size = self.input_size
- if (input_size > 0) and (n_windows > input_size):
- max_sampleable_time = n_windows - self.input_size + 1
- start = np.random.choice(max_sampleable_time)
- windows = windows[:, :, start : (start + input_size), :]
-
- if (step == "val") and (self.inference_input_size > 0):
- cutoff = self.inference_input_size + self.val_size
- windows = windows[:, :, -cutoff:, :]
-
- if (step == "predict") and (self.inference_input_size > 0):
- cutoff = self.inference_input_size + self.test_size
- windows = windows[:, :, -cutoff:, :]
-
- # [B, C, input_size, 1+H]
- windows_batch = dict(
- temporal=windows,
- temporal_cols=temporal_cols,
- static=batch.get("static", None),
- static_cols=batch.get("static_cols", None),
- )
-
- return windows_batch
-
- def _parse_windows(self, batch, windows):
- # [B, C, seq_len, 1+H]
- # Filter insample lags from outsample horizon
- mask_idx = batch["temporal_cols"].get_loc("available_mask")
- y_idx = batch["y_idx"]
- insample_y = windows["temporal"][:, y_idx, :, : -self.h]
- insample_mask = windows["temporal"][:, mask_idx, :, : -self.h]
- outsample_y = windows["temporal"][:, y_idx, :, -self.h :].contiguous()
- outsample_mask = windows["temporal"][:, mask_idx, :, -self.h :].contiguous()
-
- # Filter historic exogenous variables
- if len(self.hist_exog_list):
- hist_exog_idx = get_indexer_raise_missing(
- windows["temporal_cols"], self.hist_exog_list
- )
- hist_exog = windows["temporal"][:, hist_exog_idx, :, : -self.h]
- else:
- hist_exog = None
-
- # Filter future exogenous variables
- if len(self.futr_exog_list):
- futr_exog_idx = get_indexer_raise_missing(
- windows["temporal_cols"], self.futr_exog_list
- )
- futr_exog = windows["temporal"][:, futr_exog_idx, :, :]
- else:
- futr_exog = None
- # Filter static variables
- if len(self.stat_exog_list):
- static_idx = get_indexer_raise_missing(
- windows["static_cols"], self.stat_exog_list
- )
- stat_exog = windows["static"][:, static_idx]
- else:
- stat_exog = None
-
- return (
- insample_y,
- insample_mask,
- outsample_y,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- )
-
- def training_step(self, batch, batch_idx):
- # Create and normalize windows [Ws, L+H, C]
- batch = self._normalization(
- batch, val_size=self.val_size, test_size=self.test_size
- )
- windows = self._create_windows(batch, step="train")
-
- # Parse windows
- (
- insample_y,
- insample_mask,
- outsample_y,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- ) = self._parse_windows(batch, windows)
-
- windows_batch = dict(
- insample_y=insample_y, # [B, seq_len, 1]
- insample_mask=insample_mask, # [B, seq_len, 1]
- futr_exog=futr_exog, # [B, F, seq_len, 1+H]
- hist_exog=hist_exog, # [B, C, seq_len]
- stat_exog=stat_exog,
- ) # [B, S]
-
- # Model predictions
- output = self(windows_batch) # tuple([B, seq_len, H, output])
- if self.loss.is_distribution_output:
- outsample_y, y_loc, y_scale = self._inv_normalization(
- y_hat=outsample_y,
- temporal_cols=batch["temporal_cols"],
- y_idx=batch["y_idx"],
- )
- B = output[0].size()[0]
- T = output[0].size()[1]
- H = output[0].size()[2]
- output = [arg.view(-1, *(arg.size()[2:])) for arg in output]
- outsample_y = outsample_y.view(B * T, H)
- outsample_mask = outsample_mask.view(B * T, H)
- y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)
- y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)
- else:
- loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)
-
- if torch.isnan(loss):
- print("Model Parameters", self.hparams)
- print("insample_y", torch.isnan(insample_y).sum())
- print("outsample_y", torch.isnan(outsample_y).sum())
- print("output", torch.isnan(output).sum())
- raise Exception("Loss is NaN, training stopped.")
-
- self.log(
- "train_loss",
- loss.detach().item(),
- batch_size=outsample_y.size(0),
- prog_bar=True,
- on_epoch=True,
- )
- self.train_trajectories.append((self.global_step, loss.detach().item()))
- return loss
-
- def validation_step(self, batch, batch_idx):
- if self.val_size == 0:
- return np.nan
-
- # Create and normalize windows [Ws, L+H, C]
- batch = self._normalization(
- batch, val_size=self.val_size, test_size=self.test_size
- )
- windows = self._create_windows(batch, step="val")
- y_idx = batch["y_idx"]
-
- # Parse windows
- (
- insample_y,
- insample_mask,
- outsample_y,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- ) = self._parse_windows(batch, windows)
-
- windows_batch = dict(
- insample_y=insample_y, # [B, seq_len, 1]
- insample_mask=insample_mask, # [B, seq_len, 1]
- futr_exog=futr_exog, # [B, F, seq_len, 1+H]
- hist_exog=hist_exog, # [B, C, seq_len]
- stat_exog=stat_exog,
- ) # [B, S]
-
- # Remove train y_hat (+1 and -1 for padded last window with zeros)
- # tuple([B, seq_len, H, output]) -> tuple([B, validation_size, H, output])
- val_windows = (self.val_size) + 1
- outsample_y = outsample_y[:, -val_windows:-1, :]
- outsample_mask = outsample_mask[:, -val_windows:-1, :]
-
- # Model predictions
- output = self(windows_batch) # tuple([B, seq_len, H, output])
- if self.loss.is_distribution_output:
- output = [arg[:, -val_windows:-1] for arg in output]
- outsample_y, y_loc, y_scale = self._inv_normalization(
- y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- B = output[0].size()[0]
- T = output[0].size()[1]
- H = output[0].size()[2]
- output = [arg.reshape(-1, *(arg.size()[2:])) for arg in output]
- outsample_y = outsample_y.reshape(B * T, H)
- outsample_mask = outsample_mask.reshape(B * T, H)
- y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)
- y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
-
- if str(type(self.valid_loss)) in [
- "",
- "",
- ]:
- output = quants
- elif str(type(self.valid_loss)) in [
- ""
- ]:
- output = torch.unsqueeze(sample_mean, dim=-1) # [N,H,1] -> [N,H]
-
- else:
- output = output[:, -val_windows:-1, :]
-
- # Validation Loss evaluation
- if self.valid_loss.is_distribution_output:
- valid_loss = self.valid_loss(
- y=outsample_y, distr_args=distr_args, mask=outsample_mask
- )
- else:
- outsample_y, _, _ = self._inv_normalization(
- y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- output, _, _ = self._inv_normalization(
- y_hat=output, temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- valid_loss = self.valid_loss(
- y=outsample_y, y_hat=output, mask=outsample_mask
- )
-
- if torch.isnan(valid_loss):
- raise Exception("Loss is NaN, training stopped.")
-
- self.log(
- "valid_loss",
- valid_loss.detach().item(),
- batch_size=outsample_y.size(0),
- prog_bar=True,
- on_epoch=True,
- )
- self.validation_step_outputs.append(valid_loss)
- return valid_loss
-
- def predict_step(self, batch, batch_idx):
- # Create and normalize windows [Ws, L+H, C]
- batch = self._normalization(batch, val_size=0, test_size=self.test_size)
- windows = self._create_windows(batch, step="predict")
- y_idx = batch["y_idx"]
-
- # Parse windows
- insample_y, insample_mask, _, _, hist_exog, futr_exog, stat_exog = (
- self._parse_windows(batch, windows)
- )
-
- windows_batch = dict(
- insample_y=insample_y, # [B, seq_len, 1]
- insample_mask=insample_mask, # [B, seq_len, 1]
- futr_exog=futr_exog, # [B, F, seq_len, 1+H]
- hist_exog=hist_exog, # [B, C, seq_len]
- stat_exog=stat_exog,
- ) # [B, S]
-
- # Model Predictions
- output = self(windows_batch) # tuple([B, seq_len, H], ...)
- if self.loss.is_distribution_output:
- _, y_loc, y_scale = self._inv_normalization(
- y_hat=output[0], temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- B = output[0].size()[0]
- T = output[0].size()[1]
- H = output[0].size()[2]
- output = [arg.reshape(-1, *(arg.size()[2:])) for arg in output]
- y_loc = y_loc.repeat_interleave(repeats=T, dim=0).squeeze(-1)
- y_scale = y_scale.repeat_interleave(repeats=T, dim=0).squeeze(-1)
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
- y_hat = torch.concat((sample_mean, quants), axis=2)
- y_hat = y_hat.view(B, T, H, -1)
-
- if self.loss.return_params:
- distr_args = torch.stack(distr_args, dim=-1)
- distr_args = torch.reshape(distr_args, (B, T, H, -1))
- y_hat = torch.concat((y_hat, distr_args), axis=3)
- else:
- y_hat, _, _ = self._inv_normalization(
- y_hat=output, temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- return y_hat
-
- def fit(
- self,
- dataset,
- val_size=0,
- test_size=0,
- random_seed=None,
- distributed_config=None,
- ):
- """Fit.
-
- The `fit` method, optimizes the neural network's weights using the
- initialization parameters (`learning_rate`, `batch_size`, ...)
- and the `loss` function as defined during the initialization.
- Within `fit` we use a PyTorch Lightning `Trainer` that
- inherits the initialization's `self.trainer_kwargs`, to customize
- its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
-
- The method is designed to be compatible with SKLearn-like classes
- and in particular to be compatible with the StatsForecast library.
-
- By default the `model` is not saving training checkpoints to protect
- disk memory, to get them change `enable_checkpointing=True` in `__init__`.
-
- **Parameters:**
- `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
- `val_size`: int, validation size for temporal cross-validation.
- `test_size`: int, test size for temporal cross-validation.
- `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
- """
- return self._fit(
- dataset=dataset,
- batch_size=self.batch_size,
- valid_batch_size=self.valid_batch_size,
- val_size=val_size,
- test_size=test_size,
- random_seed=random_seed,
- distributed_config=distributed_config,
- )
-
- def predict(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):
- """Predict.
-
- Neural network prediction with PL's `Trainer` execution of `predict_step`.
-
- **Parameters:**
- `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
- `step_size`: int=1, Step size between each window.
- `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
- `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
- """
- self._check_exog(dataset)
- self._restart_seed(random_seed)
- data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)
-
- if step_size > 1:
- raise Exception("Recurrent models do not support step_size > 1")
-
- # fcsts (window, batch, h)
- # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.
- pred_trainer_kwargs = self.trainer_kwargs.copy()
- if (pred_trainer_kwargs.get("accelerator", None) == "gpu") and (
- torch.cuda.device_count() > 1
- ):
- pred_trainer_kwargs["devices"] = [0]
-
- trainer = pl.Trainer(**pred_trainer_kwargs)
-
- datamodule = TimeSeriesDataModule(
- dataset=dataset,
- valid_batch_size=self.valid_batch_size,
- **data_module_kwargs,
- )
- fcsts = trainer.predict(self, datamodule=datamodule)
- if self.test_size > 0:
- # Remove warmup windows (from train and validation)
- # [N,T,H,output], avoid indexing last dim for univariate output compatibility
- fcsts = torch.vstack(
- [fcst[:, -(1 + self.test_size - self.h) :, :] for fcst in fcsts]
- )
- fcsts = tensor_to_numpy(fcsts).flatten()
- fcsts = fcsts.reshape(-1, len(self.loss.output_names))
- else:
- fcsts = torch.vstack([fcst[:, -1:, :] for fcst in fcsts])
- fcsts = tensor_to_numpy(fcsts).flatten()
- fcsts = fcsts.reshape(-1, len(self.loss.output_names))
- return fcsts
diff --git a/neuralforecast/common/_base_windows.py b/neuralforecast/common/_base_windows.py
deleted file mode 100644
index 3d948f1ea..000000000
--- a/neuralforecast/common/_base_windows.py
+++ /dev/null
@@ -1,744 +0,0 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.base_windows.ipynb.
-
-# %% auto 0
-__all__ = ['BaseWindows']
-
-# %% ../../nbs/common.base_windows.ipynb 5
-import numpy as np
-import torch
-import torch.nn as nn
-import pytorch_lightning as pl
-
-from ._base_model import BaseModel, tensor_to_numpy
-from ._scalers import TemporalNorm
-from ..tsdataset import TimeSeriesDataModule
-from ..utils import get_indexer_raise_missing
-
-# %% ../../nbs/common.base_windows.ipynb 6
-class BaseWindows(BaseModel):
- """Base Windows
-
- Base class for all windows-based models. The forecasts are produced separately
- for each window, which are randomly sampled during training.
-
- This class implements the basic functionality for all windows-based models, including:
- - PyTorch Lightning's methods training_step, validation_step, predict_step.
- - fit and predict methods used by NeuralForecast.core class.
- - sampling and wrangling methods to generate windows.
- """
-
- def __init__(
- self,
- h,
- input_size,
- loss,
- valid_loss,
- learning_rate,
- max_steps,
- val_check_steps,
- batch_size,
- valid_batch_size,
- windows_batch_size,
- inference_windows_batch_size,
- start_padding_enabled,
- step_size=1,
- num_lr_decays=0,
- early_stop_patience_steps=-1,
- scaler_type="identity",
- futr_exog_list=None,
- hist_exog_list=None,
- stat_exog_list=None,
- exclude_insample_y=False,
- drop_last_loader=False,
- random_seed=1,
- alias=None,
- optimizer=None,
- optimizer_kwargs=None,
- lr_scheduler=None,
- lr_scheduler_kwargs=None,
- dataloader_kwargs=None,
- **trainer_kwargs,
- ):
- super().__init__(
- random_seed=random_seed,
- loss=loss,
- valid_loss=valid_loss,
- optimizer=optimizer,
- optimizer_kwargs=optimizer_kwargs,
- lr_scheduler=lr_scheduler,
- lr_scheduler_kwargs=lr_scheduler_kwargs,
- futr_exog_list=futr_exog_list,
- hist_exog_list=hist_exog_list,
- stat_exog_list=stat_exog_list,
- max_steps=max_steps,
- early_stop_patience_steps=early_stop_patience_steps,
- **trainer_kwargs,
- )
-
- # Padder to complete train windows,
- # example y=[1,2,3,4,5] h=3 -> last y_output = [5,0,0]
- self.h = h
- self.input_size = input_size
- self.windows_batch_size = windows_batch_size
- self.start_padding_enabled = start_padding_enabled
- if start_padding_enabled:
- self.padder_train = nn.ConstantPad1d(
- padding=(self.input_size - 1, self.h), value=0.0
- )
- else:
- self.padder_train = nn.ConstantPad1d(padding=(0, self.h), value=0.0)
-
- # Batch sizes
- self.batch_size = batch_size
- if valid_batch_size is None:
- self.valid_batch_size = batch_size
- else:
- self.valid_batch_size = valid_batch_size
- if inference_windows_batch_size is None:
- self.inference_windows_batch_size = windows_batch_size
- else:
- self.inference_windows_batch_size = inference_windows_batch_size
-
- # Optimization
- self.learning_rate = learning_rate
- self.max_steps = max_steps
- self.num_lr_decays = num_lr_decays
- self.lr_decay_steps = (
- max(max_steps // self.num_lr_decays, 1) if self.num_lr_decays > 0 else 10e7
- )
- self.early_stop_patience_steps = early_stop_patience_steps
- self.val_check_steps = val_check_steps
- self.windows_batch_size = windows_batch_size
- self.step_size = step_size
-
- self.exclude_insample_y = exclude_insample_y
-
- # Scaler
- self.scaler = TemporalNorm(
- scaler_type=scaler_type,
- dim=1, # Time dimension is 1.
- num_features=1 + len(self.hist_exog_list) + len(self.futr_exog_list),
- )
-
- # Fit arguments
- self.val_size = 0
- self.test_size = 0
-
- # Model state
- self.decompose_forecast = False
-
- # DataModule arguments
- self.dataloader_kwargs = dataloader_kwargs
- self.drop_last_loader = drop_last_loader
- # used by on_validation_epoch_end hook
- self.validation_step_outputs = []
- self.alias = alias
-
- def _create_windows(self, batch, step, w_idxs=None):
- # Parse common data
- window_size = self.input_size + self.h
- temporal_cols = batch["temporal_cols"]
- temporal = batch["temporal"]
-
- if step == "train":
- if self.val_size + self.test_size > 0:
- cutoff = -self.val_size - self.test_size
- temporal = temporal[:, :, :cutoff]
-
- temporal = self.padder_train(temporal)
- if temporal.shape[-1] < window_size:
- raise Exception(
- "Time series is too short for training, consider setting a smaller input size or set start_padding_enabled=True"
- )
- windows = temporal.unfold(
- dimension=-1, size=window_size, step=self.step_size
- )
-
- # [B, C, Ws, L+H] 0, 1, 2, 3
- # -> [B * Ws, L+H, C] 0, 2, 3, 1
- windows_per_serie = windows.shape[2]
- windows = windows.permute(0, 2, 3, 1).contiguous()
- windows = windows.reshape(-1, window_size, len(temporal_cols))
-
- # Sample and Available conditions
- available_idx = temporal_cols.get_loc("available_mask")
- available_condition = windows[:, : self.input_size, available_idx]
- available_condition = torch.sum(available_condition, axis=1)
- final_condition = available_condition > 0
- if self.h > 0:
- sample_condition = windows[:, self.input_size :, available_idx]
- sample_condition = torch.sum(sample_condition, axis=1)
- final_condition = (sample_condition > 0) & (available_condition > 0)
- windows = windows[final_condition]
-
- # Parse Static data to match windows
- # [B, S_in] -> [B, Ws, S_in] -> [B*Ws, S_in]
- static = batch.get("static", None)
- static_cols = batch.get("static_cols", None)
- if static is not None:
- static = torch.repeat_interleave(
- static, repeats=windows_per_serie, dim=0
- )
- static = static[final_condition]
-
- # Protection of empty windows
- if final_condition.sum() == 0:
- raise Exception("No windows available for training")
-
- # Sample windows
- n_windows = len(windows)
- if self.windows_batch_size is not None:
- w_idxs = np.random.choice(
- n_windows,
- size=self.windows_batch_size,
- replace=(n_windows < self.windows_batch_size),
- )
- windows = windows[w_idxs]
-
- if static is not None:
- static = static[w_idxs]
-
- # think about interaction available * sample mask
- # [B, C, Ws, L+H]
- windows_batch = dict(
- temporal=windows,
- temporal_cols=temporal_cols,
- static=static,
- static_cols=static_cols,
- )
- return windows_batch
-
- elif step in ["predict", "val"]:
-
- if step == "predict":
- initial_input = temporal.shape[-1] - self.test_size
- if (
- initial_input <= self.input_size
- ): # There is not enough data to predict first timestamp
- padder_left = nn.ConstantPad1d(
- padding=(self.input_size - initial_input, 0), value=0.0
- )
- temporal = padder_left(temporal)
- predict_step_size = self.predict_step_size
- cutoff = -self.input_size - self.test_size
- temporal = temporal[:, :, cutoff:]
-
- elif step == "val":
- predict_step_size = self.step_size
- cutoff = -self.input_size - self.val_size - self.test_size
- if self.test_size > 0:
- temporal = batch["temporal"][:, :, cutoff : -self.test_size]
- else:
- temporal = batch["temporal"][:, :, cutoff:]
- if temporal.shape[-1] < window_size:
- initial_input = temporal.shape[-1] - self.val_size
- padder_left = nn.ConstantPad1d(
- padding=(self.input_size - initial_input, 0), value=0.0
- )
- temporal = padder_left(temporal)
-
- if (
- (step == "predict")
- and (self.test_size == 0)
- and (len(self.futr_exog_list) == 0)
- ):
- padder_right = nn.ConstantPad1d(padding=(0, self.h), value=0.0)
- temporal = padder_right(temporal)
-
- windows = temporal.unfold(
- dimension=-1, size=window_size, step=predict_step_size
- )
-
- # [batch, channels, windows, window_size] 0, 1, 2, 3
- # -> [batch * windows, window_size, channels] 0, 2, 3, 1
- windows_per_serie = windows.shape[2]
- windows = windows.permute(0, 2, 3, 1).contiguous()
- windows = windows.reshape(-1, window_size, len(temporal_cols))
-
- static = batch.get("static", None)
- static_cols = batch.get("static_cols", None)
- if static is not None:
- static = torch.repeat_interleave(
- static, repeats=windows_per_serie, dim=0
- )
-
- # Sample windows for batched prediction
- if w_idxs is not None:
- windows = windows[w_idxs]
- if static is not None:
- static = static[w_idxs]
-
- windows_batch = dict(
- temporal=windows,
- temporal_cols=temporal_cols,
- static=static,
- static_cols=static_cols,
- )
- return windows_batch
- else:
- raise ValueError(f"Unknown step {step}")
-
- def _normalization(self, windows, y_idx):
- # windows are already filtered by train/validation/test
- # from the `create_windows_method` nor leakage risk
- temporal = windows["temporal"] # B, L+H, C
- temporal_cols = windows["temporal_cols"].copy() # B, L+H, C
-
- # To avoid leakage uses only the lags
- # temporal_data_cols = temporal_cols.drop('available_mask').tolist()
- temporal_data_cols = self._get_temporal_exogenous_cols(
- temporal_cols=temporal_cols
- )
- temporal_idxs = get_indexer_raise_missing(temporal_cols, temporal_data_cols)
- temporal_idxs = np.append(y_idx, temporal_idxs)
- temporal_data = temporal[:, :, temporal_idxs]
- temporal_mask = temporal[:, :, temporal_cols.get_loc("available_mask")].clone()
- if self.h > 0:
- temporal_mask[:, -self.h :] = 0.0
-
- # Normalize. self.scaler stores the shift and scale for inverse transform
- temporal_mask = temporal_mask.unsqueeze(
- -1
- ) # Add channel dimension for scaler.transform.
- temporal_data = self.scaler.transform(x=temporal_data, mask=temporal_mask)
-
- # Replace values in windows dict
- temporal[:, :, temporal_idxs] = temporal_data
- windows["temporal"] = temporal
-
- return windows
-
- def _inv_normalization(self, y_hat, temporal_cols, y_idx):
- # Receives window predictions [B, H, output]
- # Broadcasts outputs and inverts normalization
-
- # Add C dimension
- if y_hat.ndim == 2:
- remove_dimension = True
- y_hat = y_hat.unsqueeze(-1)
- else:
- remove_dimension = False
-
- y_scale = self.scaler.x_scale[:, :, [y_idx]]
- y_loc = self.scaler.x_shift[:, :, [y_idx]]
-
- y_scale = torch.repeat_interleave(y_scale, repeats=y_hat.shape[-1], dim=-1).to(
- y_hat.device
- )
- y_loc = torch.repeat_interleave(y_loc, repeats=y_hat.shape[-1], dim=-1).to(
- y_hat.device
- )
-
- y_hat = self.scaler.inverse_transform(z=y_hat, x_scale=y_scale, x_shift=y_loc)
- y_loc = y_loc.to(y_hat.device)
- y_scale = y_scale.to(y_hat.device)
-
- if remove_dimension:
- y_hat = y_hat.squeeze(-1)
- y_loc = y_loc.squeeze(-1)
- y_scale = y_scale.squeeze(-1)
-
- return y_hat, y_loc, y_scale
-
- def _parse_windows(self, batch, windows):
- # Filter insample lags from outsample horizon
- y_idx = batch["y_idx"]
- mask_idx = batch["temporal_cols"].get_loc("available_mask")
-
- insample_y = windows["temporal"][:, : self.input_size, y_idx]
- insample_mask = windows["temporal"][:, : self.input_size, mask_idx]
-
- # Declare additional information
- outsample_y = None
- outsample_mask = None
- hist_exog = None
- futr_exog = None
- stat_exog = None
-
- if self.h > 0:
- outsample_y = windows["temporal"][:, self.input_size :, y_idx]
- outsample_mask = windows["temporal"][:, self.input_size :, mask_idx]
-
- if len(self.hist_exog_list):
- hist_exog_idx = get_indexer_raise_missing(
- windows["temporal_cols"], self.hist_exog_list
- )
- hist_exog = windows["temporal"][:, : self.input_size, hist_exog_idx]
-
- if len(self.futr_exog_list):
- futr_exog_idx = get_indexer_raise_missing(
- windows["temporal_cols"], self.futr_exog_list
- )
- futr_exog = windows["temporal"][:, :, futr_exog_idx]
-
- if len(self.stat_exog_list):
- static_idx = get_indexer_raise_missing(
- windows["static_cols"], self.stat_exog_list
- )
- stat_exog = windows["static"][:, static_idx]
-
- # TODO: think a better way of removing insample_y features
- if self.exclude_insample_y:
- insample_y = insample_y * 0
-
- return (
- insample_y,
- insample_mask,
- outsample_y,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- )
-
- def training_step(self, batch, batch_idx):
- # Create and normalize windows [Ws, L+H, C]
- windows = self._create_windows(batch, step="train")
- y_idx = batch["y_idx"]
- original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, y_idx])
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- (
- insample_y,
- insample_mask,
- outsample_y,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- ) = self._parse_windows(batch, windows)
-
- windows_batch = dict(
- insample_y=insample_y, # [Ws, L]
- insample_mask=insample_mask, # [Ws, L]
- futr_exog=futr_exog, # [Ws, L + h, F]
- hist_exog=hist_exog, # [Ws, L, X]
- stat_exog=stat_exog,
- ) # [Ws, S]
-
- # Model Predictions
- output = self(windows_batch)
- if self.loss.is_distribution_output:
- _, y_loc, y_scale = self._inv_normalization(
- y_hat=outsample_y, temporal_cols=batch["temporal_cols"], y_idx=y_idx
- )
- outsample_y = original_outsample_y
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- loss = self.loss(y=outsample_y, distr_args=distr_args, mask=outsample_mask)
- else:
- loss = self.loss(y=outsample_y, y_hat=output, mask=outsample_mask)
-
- if torch.isnan(loss):
- print("Model Parameters", self.hparams)
- print("insample_y", torch.isnan(insample_y).sum())
- print("outsample_y", torch.isnan(outsample_y).sum())
- print("output", torch.isnan(output).sum())
- raise Exception("Loss is NaN, training stopped.")
-
- self.log(
- "train_loss",
- loss.detach().item(),
- batch_size=outsample_y.size(0),
- prog_bar=True,
- on_epoch=True,
- )
- self.train_trajectories.append((self.global_step, loss.detach().item()))
- return loss
-
- def _compute_valid_loss(
- self, outsample_y, output, outsample_mask, temporal_cols, y_idx
- ):
- if self.loss.is_distribution_output:
- _, y_loc, y_scale = self._inv_normalization(
- y_hat=outsample_y, temporal_cols=temporal_cols, y_idx=y_idx
- )
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
-
- if str(type(self.valid_loss)) in [
- "",
- "",
- ]:
- output = quants
- elif str(type(self.valid_loss)) in [
- ""
- ]:
- output = torch.unsqueeze(sample_mean, dim=-1) # [N,H,1] -> [N,H]
-
- # Validation Loss evaluation
- if self.valid_loss.is_distribution_output:
- valid_loss = self.valid_loss(
- y=outsample_y, distr_args=distr_args, mask=outsample_mask
- )
- else:
- output, _, _ = self._inv_normalization(
- y_hat=output, temporal_cols=temporal_cols, y_idx=y_idx
- )
- valid_loss = self.valid_loss(
- y=outsample_y, y_hat=output, mask=outsample_mask
- )
- return valid_loss
-
- def validation_step(self, batch, batch_idx):
- if self.val_size == 0:
- return np.nan
-
- # TODO: Hack to compute number of windows
- windows = self._create_windows(batch, step="val")
- n_windows = len(windows["temporal"])
- y_idx = batch["y_idx"]
-
- # Number of windows in batch
- windows_batch_size = self.inference_windows_batch_size
- if windows_batch_size < 0:
- windows_batch_size = n_windows
- n_batches = int(np.ceil(n_windows / windows_batch_size))
-
- valid_losses = []
- batch_sizes = []
- for i in range(n_batches):
- # Create and normalize windows [Ws, L+H, C]
- w_idxs = np.arange(
- i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)
- )
- windows = self._create_windows(batch, step="val", w_idxs=w_idxs)
- original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, y_idx])
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- (
- insample_y,
- insample_mask,
- _,
- outsample_mask,
- hist_exog,
- futr_exog,
- stat_exog,
- ) = self._parse_windows(batch, windows)
-
- windows_batch = dict(
- insample_y=insample_y, # [Ws, L]
- insample_mask=insample_mask, # [Ws, L]
- futr_exog=futr_exog, # [Ws, L + h, F]
- hist_exog=hist_exog, # [Ws, L, X]
- stat_exog=stat_exog,
- ) # [Ws, S]
-
- # Model Predictions
- output_batch = self(windows_batch)
- valid_loss_batch = self._compute_valid_loss(
- outsample_y=original_outsample_y,
- output=output_batch,
- outsample_mask=outsample_mask,
- temporal_cols=batch["temporal_cols"],
- y_idx=batch["y_idx"],
- )
- valid_losses.append(valid_loss_batch)
- batch_sizes.append(len(output_batch))
-
- valid_loss = torch.stack(valid_losses)
- batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)
- batch_size = torch.sum(batch_sizes)
- valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size
-
- if torch.isnan(valid_loss):
- raise Exception("Loss is NaN, training stopped.")
-
- self.log(
- "valid_loss",
- valid_loss.detach().item(),
- batch_size=batch_size,
- prog_bar=True,
- on_epoch=True,
- )
- self.validation_step_outputs.append(valid_loss)
- return valid_loss
-
- def predict_step(self, batch, batch_idx):
-
- # TODO: Hack to compute number of windows
- windows = self._create_windows(batch, step="predict")
- n_windows = len(windows["temporal"])
- y_idx = batch["y_idx"]
-
- # Number of windows in batch
- windows_batch_size = self.inference_windows_batch_size
- if windows_batch_size < 0:
- windows_batch_size = n_windows
- n_batches = int(np.ceil(n_windows / windows_batch_size))
-
- y_hats = []
- for i in range(n_batches):
- # Create and normalize windows [Ws, L+H, C]
- w_idxs = np.arange(
- i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)
- )
- windows = self._create_windows(batch, step="predict", w_idxs=w_idxs)
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- insample_y, insample_mask, _, _, hist_exog, futr_exog, stat_exog = (
- self._parse_windows(batch, windows)
- )
-
- windows_batch = dict(
- insample_y=insample_y, # [Ws, L]
- insample_mask=insample_mask, # [Ws, L]
- futr_exog=futr_exog, # [Ws, L + h, F]
- hist_exog=hist_exog, # [Ws, L, X]
- stat_exog=stat_exog,
- ) # [Ws, S]
-
- # Model Predictions
- output_batch = self(windows_batch)
- # Inverse normalization and sampling
- if self.loss.is_distribution_output:
- _, y_loc, y_scale = self._inv_normalization(
- y_hat=torch.empty(
- size=(insample_y.shape[0], self.h),
- dtype=output_batch[0].dtype,
- device=output_batch[0].device,
- ),
- temporal_cols=batch["temporal_cols"],
- y_idx=y_idx,
- )
- distr_args = self.loss.scale_decouple(
- output=output_batch, loc=y_loc, scale=y_scale
- )
- _, sample_mean, quants = self.loss.sample(distr_args=distr_args)
- y_hat = torch.concat((sample_mean, quants), axis=2)
-
- if self.loss.return_params:
- distr_args = torch.stack(distr_args, dim=-1)
- distr_args = torch.reshape(
- distr_args, (len(windows["temporal"]), self.h, -1)
- )
- y_hat = torch.concat((y_hat, distr_args), axis=2)
- else:
- y_hat, _, _ = self._inv_normalization(
- y_hat=output_batch,
- temporal_cols=batch["temporal_cols"],
- y_idx=y_idx,
- )
- y_hats.append(y_hat)
- y_hat = torch.cat(y_hats, dim=0)
- return y_hat
-
- def fit(
- self,
- dataset,
- val_size=0,
- test_size=0,
- random_seed=None,
- distributed_config=None,
- ):
- """Fit.
-
- The `fit` method, optimizes the neural network's weights using the
- initialization parameters (`learning_rate`, `windows_batch_size`, ...)
- and the `loss` function as defined during the initialization.
- Within `fit` we use a PyTorch Lightning `Trainer` that
- inherits the initialization's `self.trainer_kwargs`, to customize
- its inputs, see [PL's trainer arguments](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
-
- The method is designed to be compatible with SKLearn-like classes
- and in particular to be compatible with the StatsForecast library.
-
- By default the `model` is not saving training checkpoints to protect
- disk memory, to get them change `enable_checkpointing=True` in `__init__`.
-
- **Parameters:**
- `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
- `val_size`: int, validation size for temporal cross-validation.
- `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
- `test_size`: int, test size for temporal cross-validation.
- """
- return self._fit(
- dataset=dataset,
- batch_size=self.batch_size,
- valid_batch_size=self.valid_batch_size,
- val_size=val_size,
- test_size=test_size,
- random_seed=random_seed,
- distributed_config=distributed_config,
- )
-
- def predict(
- self,
- dataset,
- test_size=None,
- step_size=1,
- random_seed=None,
- **data_module_kwargs,
- ):
- """Predict.
-
- Neural network prediction with PL's `Trainer` execution of `predict_step`.
-
- **Parameters:**
- `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation](https://nixtla.github.io/neuralforecast/tsdataset.html).
- `test_size`: int=None, test size for temporal cross-validation.
- `step_size`: int=1, Step size between each window.
- `random_seed`: int=None, random_seed for pytorch initializer and numpy generators, overwrites model.__init__'s.
- `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
- """
- self._check_exog(dataset)
- self._restart_seed(random_seed)
- data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)
-
- self.predict_step_size = step_size
- self.decompose_forecast = False
- datamodule = TimeSeriesDataModule(
- dataset=dataset,
- valid_batch_size=self.valid_batch_size,
- **data_module_kwargs,
- )
-
- # Protect when case of multiple gpu. PL does not support return preds with multiple gpu.
- pred_trainer_kwargs = self.trainer_kwargs.copy()
- if (pred_trainer_kwargs.get("accelerator", None) == "gpu") and (
- torch.cuda.device_count() > 1
- ):
- pred_trainer_kwargs["devices"] = [0]
-
- trainer = pl.Trainer(**pred_trainer_kwargs)
- fcsts = trainer.predict(self, datamodule=datamodule)
- fcsts = torch.vstack(fcsts)
- fcsts = tensor_to_numpy(fcsts).flatten()
- fcsts = fcsts.reshape(-1, len(self.loss.output_names))
- return fcsts
-
- def decompose(self, dataset, step_size=1, random_seed=None, **data_module_kwargs):
- """Decompose Predictions.
-
- Decompose the predictions through the network's layers.
- Available methods are `ESRNN`, `NHITS`, `NBEATS`, and `NBEATSx`.
-
- **Parameters:**
- `dataset`: NeuralForecast's `TimeSeriesDataset`, see [documentation here](https://nixtla.github.io/neuralforecast/tsdataset.html).
- `step_size`: int=1, step size between each window of temporal data.
- `**data_module_kwargs`: PL's TimeSeriesDataModule args, see [documentation](https://pytorch-lightning.readthedocs.io/en/1.6.1/extensions/datamodules.html#using-a-datamodule).
- """
- # Restart random seed
- if random_seed is None:
- random_seed = self.random_seed
- torch.manual_seed(random_seed)
- data_module_kwargs = self._set_quantile_for_iqloss(**data_module_kwargs)
-
- self.predict_step_size = step_size
- self.decompose_forecast = True
- datamodule = TimeSeriesDataModule(
- dataset=dataset,
- valid_batch_size=self.valid_batch_size,
- **data_module_kwargs,
- )
- trainer = pl.Trainer(**self.trainer_kwargs)
- fcsts = trainer.predict(self, datamodule=datamodule)
- self.decompose_forecast = False # Default decomposition back to false
- fcsts = torch.vstack(fcsts)
- return tensor_to_numpy(fcsts)
diff --git a/neuralforecast/common/_model_checks.py b/neuralforecast/common/_model_checks.py
new file mode 100644
index 000000000..ab387c0ff
--- /dev/null
+++ b/neuralforecast/common/_model_checks.py
@@ -0,0 +1,224 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/common.model_checks.ipynb.
+
+# %% auto 0
+__all__ = ['seed', 'test_size', 'FREQ', 'N_SERIES_1', 'df', 'max_ds', 'Y_TRAIN_DF_1', 'Y_TEST_DF_1', 'N_SERIES_2', 'Y_TRAIN_DF_2',
+ 'Y_TEST_DF_2', 'N_SERIES_3', 'STATIC_3', 'Y_TRAIN_DF_3', 'Y_TEST_DF_3', 'N_SERIES_4', 'STATIC_4',
+ 'Y_TRAIN_DF_4', 'Y_TEST_DF_4', 'check_loss_functions', 'check_airpassengers', 'check_model']
+
+# %% ../../nbs/common.model_checks.ipynb 4
+import pandas as pd
+import neuralforecast.losses.pytorch as losses
+
+from .. import NeuralForecast
+from neuralforecast.utils import (
+ AirPassengersPanel,
+ AirPassengersStatic,
+ generate_series,
+)
+
+# %% ../../nbs/common.model_checks.ipynb 5
+seed = 0
+test_size = 14
+FREQ = "D"
+
+# 1 series, no exogenous
+N_SERIES_1 = 1
+df = generate_series(n_series=N_SERIES_1, seed=seed, freq=FREQ, equal_ends=True)
+max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)
+Y_TRAIN_DF_1 = df[df.ds < max_ds]
+Y_TEST_DF_1 = df[df.ds >= max_ds]
+
+# 5 series, no exogenous
+N_SERIES_2 = 5
+df = generate_series(n_series=N_SERIES_2, seed=seed, freq=FREQ, equal_ends=True)
+max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)
+Y_TRAIN_DF_2 = df[df.ds < max_ds]
+Y_TEST_DF_2 = df[df.ds >= max_ds]
+
+# 1 series, with static and temporal exogenous
+N_SERIES_3 = 1
+df, STATIC_3 = generate_series(
+ n_series=N_SERIES_3,
+ n_static_features=2,
+ n_temporal_features=2,
+ seed=seed,
+ freq=FREQ,
+ equal_ends=True,
+)
+max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)
+Y_TRAIN_DF_3 = df[df.ds < max_ds]
+Y_TEST_DF_3 = df[df.ds >= max_ds]
+
+# 5 series, with static and temporal exogenous
+N_SERIES_4 = 5
+df, STATIC_4 = generate_series(
+ n_series=N_SERIES_4,
+ n_static_features=2,
+ n_temporal_features=2,
+ seed=seed,
+ freq=FREQ,
+ equal_ends=True,
+)
+max_ds = df.ds.max() - pd.Timedelta(test_size, FREQ)
+Y_TRAIN_DF_4 = df[df.ds < max_ds]
+Y_TEST_DF_4 = df[df.ds >= max_ds]
+
+
+# Generic test for a given config for a model
+def _run_model_tests(model_class, config):
+ if model_class.RECURRENT:
+ config["inference_input_size"] = config["input_size"]
+
+ # DF_1
+ if model_class.MULTIVARIATE:
+ config["n_series"] = N_SERIES_1
+ if isinstance(config["loss"], losses.relMSE):
+ config["loss"].y_train = Y_TRAIN_DF_1["y"].values
+ if isinstance(config["valid_loss"], losses.relMSE):
+ config["valid_loss"].y_train = Y_TRAIN_DF_1["y"].values
+
+ model = model_class(**config)
+ fcst = NeuralForecast(models=[model], freq=FREQ)
+ fcst.fit(df=Y_TRAIN_DF_1, val_size=24)
+ _ = fcst.predict(futr_df=Y_TEST_DF_1)
+ # DF_2
+ if model_class.MULTIVARIATE:
+ config["n_series"] = N_SERIES_2
+ if isinstance(config["loss"], losses.relMSE):
+ config["loss"].y_train = Y_TRAIN_DF_2["y"].values
+ if isinstance(config["valid_loss"], losses.relMSE):
+ config["valid_loss"].y_train = Y_TRAIN_DF_2["y"].values
+ model = model_class(**config)
+ fcst = NeuralForecast(models=[model], freq=FREQ)
+ fcst.fit(df=Y_TRAIN_DF_2, val_size=24)
+ _ = fcst.predict(futr_df=Y_TEST_DF_2)
+
+ if model.EXOGENOUS_STAT and model.EXOGENOUS_FUTR:
+ # DF_3
+ if model_class.MULTIVARIATE:
+ config["n_series"] = N_SERIES_3
+ if isinstance(config["loss"], losses.relMSE):
+ config["loss"].y_train = Y_TRAIN_DF_3["y"].values
+ if isinstance(config["valid_loss"], losses.relMSE):
+ config["valid_loss"].y_train = Y_TRAIN_DF_3["y"].values
+ model = model_class(**config)
+ fcst = NeuralForecast(models=[model], freq=FREQ)
+ fcst.fit(df=Y_TRAIN_DF_3, static_df=STATIC_3, val_size=24)
+ _ = fcst.predict(futr_df=Y_TEST_DF_3)
+
+ # DF_4
+ if model_class.MULTIVARIATE:
+ config["n_series"] = N_SERIES_4
+ if isinstance(config["loss"], losses.relMSE):
+ config["loss"].y_train = Y_TRAIN_DF_4["y"].values
+ if isinstance(config["valid_loss"], losses.relMSE):
+ config["valid_loss"].y_train = Y_TRAIN_DF_4["y"].values
+ model = model_class(**config)
+ fcst = NeuralForecast(models=[model], freq=FREQ)
+ fcst.fit(df=Y_TRAIN_DF_4, static_df=STATIC_4, val_size=24)
+ _ = fcst.predict(futr_df=Y_TEST_DF_4)
+
+
+# Tests a model against every loss function
+def check_loss_functions(model_class):
+ loss_list = [
+ losses.MAE(),
+ losses.MSE(),
+ losses.RMSE(),
+ losses.MAPE(),
+ losses.SMAPE(),
+ losses.MASE(seasonality=7),
+ losses.QuantileLoss(q=0.5),
+ losses.MQLoss(),
+ losses.IQLoss(),
+ losses.DistributionLoss("Normal"),
+ losses.DistributionLoss("StudentT"),
+ losses.DistributionLoss("Poisson"),
+ losses.DistributionLoss("NegativeBinomial"),
+ losses.DistributionLoss("Tweedie", rho=1.5),
+ losses.DistributionLoss("ISQF"),
+ losses.PMM(),
+ losses.PMM(weighted=True),
+ losses.GMM(),
+ losses.GMM(weighted=True),
+ losses.NBMM(),
+ losses.NBMM(weighted=True),
+ losses.HuberLoss(),
+ losses.TukeyLoss(),
+ losses.HuberQLoss(q=0.5),
+ losses.HuberMQLoss(),
+ ]
+ for loss in loss_list:
+ test_name = f"{model_class.__name__}: checking {loss._get_name()}"
+ print(f"{test_name}")
+ config = {
+ "max_steps": 2,
+ "h": 7,
+ "input_size": 28,
+ "loss": loss,
+ "valid_loss": None,
+ "enable_progress_bar": False,
+ "enable_model_summary": False,
+ "val_check_steps": 2,
+ }
+ try:
+ _run_model_tests(model_class, config)
+ except RuntimeError:
+ raise Exception(f"{test_name} failed.")
+ except Exception:
+ print(f"{test_name} skipped on raised Exception.")
+ pass
+
+
+# Tests a model against the AirPassengers dataset
+def check_airpassengers(model_class):
+ print(f"{model_class.__name__}: checking forecast AirPassengers dataset")
+ Y_train_df = AirPassengersPanel[
+ AirPassengersPanel.ds < AirPassengersPanel["ds"].values[-12]
+ ] # 132 train
+ Y_test_df = AirPassengersPanel[
+ AirPassengersPanel.ds >= AirPassengersPanel["ds"].values[-12]
+ ].reset_index(
+ drop=True
+ ) # 12 test
+
+ config = {
+ "max_steps": 2,
+ "h": 12,
+ "input_size": 24,
+ "enable_progress_bar": False,
+ "enable_model_summary": False,
+ "val_check_steps": 2,
+ }
+
+ if model_class.MULTIVARIATE:
+ config["n_series"] = Y_train_df["unique_id"].nunique()
+ # Normal forecast
+ fcst = NeuralForecast(models=[model_class(**config)], freq="M")
+ fcst.fit(df=Y_train_df, static_df=AirPassengersStatic)
+ _ = fcst.predict(futr_df=Y_test_df)
+
+ # Cross-validation
+ fcst = NeuralForecast(models=[model_class(**config)], freq="M")
+ _ = fcst.cross_validation(
+ df=AirPassengersPanel, static_df=AirPassengersStatic, n_windows=2, step_size=12
+ )
+
+
+# Add unit test functions to this function
+def check_model(model_class, checks=["losses", "airpassengers"]):
+ """
+ Check model with various tests. Options for checks are:
+ "losses": test the model against all loss functions
+ "airpassengers": test the model against the airpassengers dataset for forecasting and cross-validation
+
+ """
+ if "losses" in checks:
+ check_loss_functions(model_class)
+ if "airpassengers" in checks:
+ try:
+ check_airpassengers(model_class)
+ except RuntimeError:
+ raise Exception(
+ f"{model_class.__name__}: AirPassengers forecast test failed."
+ )
diff --git a/neuralforecast/common/_modules.py b/neuralforecast/common/_modules.py
index d50228b87..852968bd0 100644
--- a/neuralforecast/common/_modules.py
+++ b/neuralforecast/common/_modules.py
@@ -4,7 +4,7 @@
__all__ = ['ACTIVATIONS', 'MLP', 'Chomp1d', 'CausalConv1d', 'TemporalConvolutionEncoder', 'TransEncoderLayer', 'TransEncoder',
'TransDecoderLayer', 'TransDecoder', 'AttentionLayer', 'PositionalEmbedding', 'TokenEmbedding',
'TimeFeatureEmbedding', 'FixedEmbedding', 'TemporalEmbedding', 'DataEmbedding', 'MovingAvg', 'SeriesDecomp',
- 'RevIN']
+ 'RevIN', 'RevINMultivariate']
# %% ../../nbs/common.modules.ipynb 3
import math
@@ -601,3 +601,66 @@ def _denormalize(self, x):
else:
x = x + self.mean
return x
+
+# %% ../../nbs/common.modules.ipynb 21
+class RevINMultivariate(nn.Module):
+ """
+ ReversibleInstanceNorm1d for Multivariate models
+ """
+
+ def __init__(
+ self,
+ num_features: int,
+ eps=1e-5,
+ affine=False,
+ subtract_last=False,
+ non_norm=False,
+ ):
+ super().__init__()
+ self.num_features = num_features
+ self.eps = eps
+ self.affine = affine
+ if self.affine:
+ self._init_params()
+
+ def forward(self, x, mode: str):
+ if mode == "norm":
+ x = self._normalize(x)
+ elif mode == "denorm":
+ x = self._denormalize(x)
+ else:
+ raise NotImplementedError
+ return x
+
+ def _init_params(self):
+ # initialize RevIN params: (C,)
+ self.affine_weight = nn.Parameter(torch.ones((1, 1, self.num_features)))
+ self.affine_bias = nn.Parameter(torch.zeros((1, 1, self.num_features)))
+
+ def _normalize(self, x):
+ # Batch statistics
+ self.batch_mean = torch.mean(x, axis=1, keepdim=True).detach()
+ self.batch_std = torch.sqrt(
+ torch.var(x, axis=1, keepdim=True, unbiased=False) + self.eps
+ ).detach()
+
+ # Instance normalization
+ x = x - self.batch_mean
+ x = x / self.batch_std
+
+ if self.affine:
+ x = x * self.affine_weight
+ x = x + self.affine_bias
+
+ return x
+
+ def _denormalize(self, x):
+ # Reverse the normalization
+ if self.affine:
+ x = x - self.affine_bias
+ x = x / self.affine_weight
+
+ x = x * self.batch_std
+ x = x + self.batch_mean
+
+ return x
diff --git a/neuralforecast/common/_scalers.py b/neuralforecast/common/_scalers.py
index c45b58d62..f11187d21 100644
--- a/neuralforecast/common/_scalers.py
+++ b/neuralforecast/common/_scalers.py
@@ -402,11 +402,11 @@ def __init__(self, scaler_type="robust", dim=-1, eps=1e-6, num_features=None):
def _init_params(self, num_features):
# Initialize RevIN scaler params to broadcast:
if self.dim == 1: # [B,T,C] [1,1,C]
- self.revin_bias = nn.Parameter(torch.zeros(1, 1, num_features))
- self.revin_weight = nn.Parameter(torch.ones(1, 1, num_features))
+ self.revin_bias = nn.Parameter(torch.zeros(1, 1, num_features, 1))
+ self.revin_weight = nn.Parameter(torch.ones(1, 1, num_features, 1))
elif self.dim == -1: # [B,C,T] [1,C,1]
- self.revin_bias = nn.Parameter(torch.zeros(1, num_features, 1))
- self.revin_weight = nn.Parameter(torch.ones(1, num_features, 1))
+ self.revin_bias = nn.Parameter(torch.zeros(1, num_features, 1, 1))
+ self.revin_weight = nn.Parameter(torch.ones(1, num_features, 1, 1))
# @torch.no_grad()
def transform(self, x, mask):
diff --git a/neuralforecast/core.py b/neuralforecast/core.py
index f8b254745..d21ffba4e 100644
--- a/neuralforecast/core.py
+++ b/neuralforecast/core.py
@@ -28,6 +28,7 @@
from .common._base_model import DistributedConfig
from .compat import SparkDataFrame
+from .losses.pytorch import IQLoss
from neuralforecast.tsdataset import (
_FilesDataset,
TimeSeriesDataset,
@@ -68,7 +69,12 @@
RMoK,
)
from .common._base_auto import BaseAuto, MockTrial
-from .utils import PredictionIntervals, get_prediction_interval_method
+from neuralforecast.utils import (
+ PredictionIntervals,
+ get_prediction_interval_method,
+ level_to_quantiles,
+ quantiles_to_level,
+)
# %% ../nbs/core.ipynb 5
# this disables warnings about the number of workers in the dataloaders
@@ -250,6 +256,7 @@ def __init__(
# Flags and attributes
self._fitted = False
self._reset_models()
+ self._add_level = False
def _scalers_fit_transform(self, dataset: TimeSeriesDataset) -> None:
self.scalers_ = {}
@@ -652,13 +659,16 @@ def _get_model_names(self, add_level=False) -> List[str]:
names: List[str] = []
count_names = {"model": 0}
for model in self.models:
- if add_level and model.loss.outputsize_multiplier > 1:
- continue
-
model_name = repr(model)
count_names[model_name] = count_names.get(model_name, -1) + 1
if count_names[model_name] > 0:
model_name += str(count_names[model_name])
+
+ if add_level and (
+ model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)
+ ):
+ continue
+
names.extend(model_name + n for n in model.loss.output_names)
return names
@@ -782,6 +792,7 @@ def predict(
verbose: bool = False,
engine=None,
level: Optional[List[Union[int, float]]] = None,
+ quantiles: Optional[List[float]] = None,
**data_kwargs,
):
"""Predict with core.NeuralForecast.
@@ -803,6 +814,8 @@ def predict(
Distributed engine for inference. Only used if df is a spark dataframe or if fit was called on a spark dataframe.
level : list of ints or floats, optional (default=None)
Confidence levels between 0 and 100.
+ quantiles : list of floats, optional (default=None)
+ Alternative to level, target quantiles to predict.
data_kwargs : kwargs
Extra arguments to be passed to the dataset within each model.
@@ -818,6 +831,22 @@ def predict(
if not self._fitted:
raise Exception("You must fit the model before predicting.")
+ quantiles_ = None
+ level_ = None
+ has_level = False
+ if level is not None:
+ has_level = True
+ if quantiles is not None:
+ raise ValueError("You can't set both level and quantiles.")
+ level_ = sorted(list(set(level)))
+ quantiles_ = level_to_quantiles(level_)
+
+ if quantiles is not None:
+ if level is not None:
+ raise ValueError("You can't set both level and quantiles.")
+ quantiles_ = sorted(list(set(quantiles)))
+ level_ = quantiles_to_level(quantiles_)
+
needed_futr_exog = self._get_needed_futr_exog()
if needed_futr_exog:
if futr_df is None:
@@ -869,8 +898,6 @@ def predict(
if verbose:
print("Using stored dataset.")
- cols = self._get_model_names()
-
# Placeholder dataframe for predictions with unique_id and ds
fcsts_df = ufp.make_future_dataframe(
uids=uids,
@@ -913,56 +940,26 @@ def predict(
self._scalers_transform(futr_dataset)
dataset = dataset.append(futr_dataset)
- col_idx = 0
- fcsts = np.full(
- (self.h * len(uids), len(cols)), fill_value=np.nan, dtype=np.float32
+ fcsts, cols = self._generate_forecasts(
+ dataset=dataset,
+ uids=uids,
+ quantiles_=quantiles_,
+ level_=level_,
+ has_level=has_level,
+ **data_kwargs,
)
- for model in self.models:
- old_test_size = model.get_test_size()
- model.set_test_size(self.h) # To predict h steps ahead
- model_fcsts = model.predict(dataset=dataset, **data_kwargs)
- # Append predictions in memory placeholder
- output_length = len(model.loss.output_names)
- fcsts[:, col_idx : col_idx + output_length] = model_fcsts
- col_idx += output_length
- model.set_test_size(old_test_size) # Set back to original value
+
if self.scalers_:
indptr = np.append(0, np.full(len(uids), self.h).cumsum())
fcsts = self._scalers_target_inverse_transform(fcsts, indptr)
# Declare predictions pd.DataFrame
- cols = (
- self._get_model_names()
- ) # Needed for IQLoss as column names may have changed during the call to .predict()
if isinstance(fcsts_df, pl_DataFrame):
fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))
else:
fcsts = pd.DataFrame(fcsts, columns=cols)
fcsts_df = ufp.horizontal_concat([fcsts_df, fcsts])
- # add prediction intervals
- if level is not None:
- if self._cs_df is None or self.prediction_intervals is None:
- raise Exception(
- "You must fit the model with prediction_intervals to use level."
- )
- else:
- level_ = sorted(level)
- model_names = self._get_model_names(add_level=True)
- prediction_interval_method = get_prediction_interval_method(
- self.prediction_intervals.method
- )
-
- fcsts_df = prediction_interval_method(
- fcsts_df,
- self._cs_df,
- model_names=list(model_names),
- level=level_,
- cs_n_windows=self.prediction_intervals.n_windows,
- n_series=len(uids),
- horizon=self.h,
- )
-
return fcsts_df
def _reset_models(self):
@@ -1008,15 +1005,6 @@ def _no_refit_cross_validation(
"Validation and test sets are larger than the shorter time-series."
)
- cols = []
- count_names = {"model": 0}
- for model in self.models:
- model_name = repr(model)
- count_names[model_name] = count_names.get(model_name, -1) + 1
- if count_names[model_name] > 0:
- model_name += str(count_names[model_name])
- cols += [model_name + n for n in model.loss.output_names]
-
fcsts_df = ufp.cv_times(
times=self.ds,
uids=self.uids,
@@ -1030,23 +1018,22 @@ def _no_refit_cross_validation(
# the cv_times is sorted by window and then id
fcsts_df = ufp.sort(fcsts_df, [id_col, "cutoff", time_col])
- col_idx = 0
- fcsts = np.full(
- (self.dataset.n_groups * self.h * n_windows, len(cols)),
- np.nan,
- dtype=np.float32,
- )
-
+ fcsts_list: List = []
for model in self.models:
+ if self._add_level and (
+ model.loss.outputsize_multiplier > 1 or isinstance(model.loss, IQLoss)
+ ):
+ continue
+
model.fit(dataset=self.dataset, val_size=val_size, test_size=test_size)
model_fcsts = model.predict(
self.dataset, step_size=step_size, **data_kwargs
)
# Append predictions in memory placeholder
- output_length = len(model.loss.output_names)
- fcsts[:, col_idx : (col_idx + output_length)] = model_fcsts
- col_idx += output_length
+ fcsts_list.append(model_fcsts)
+
+ fcsts = np.concatenate(fcsts_list, axis=-1)
# we may have allocated more space than needed
# each serie can produce at most (serie.size - 1) // self.h CV windows
effective_sizes = ufp.counts_by_id(fcsts_df, id_col)["counts"].to_numpy()
@@ -1074,6 +1061,7 @@ def _no_refit_cross_validation(
self._fitted = True
# Add predictions to forecasts DataFrame
+ cols = self._get_model_names(add_level=self._add_level)
if isinstance(self.uids, pl_Series):
fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))
else:
@@ -1104,6 +1092,7 @@ def cross_validation(
target_col: str = "y",
prediction_intervals: Optional[PredictionIntervals] = None,
level: Optional[List[Union[int, float]]] = None,
+ quantiles: Optional[List[float]] = None,
**data_kwargs,
) -> DataFrame:
"""Temporal Cross-Validation with core.NeuralForecast.
@@ -1143,7 +1132,9 @@ def cross_validation(
prediction_intervals : PredictionIntervals, optional (default=None)
Configuration to calibrate prediction intervals (Conformal Prediction).
level : list of ints or floats, optional (default=None)
- Confidence levels between 0 and 100. Use with prediction_intervals.
+ Confidence levels between 0 and 100.
+ quantiles : list of floats, optional (default=None)
+ Alternative to level, target quantiles to predict.
data_kwargs : kwargs
Extra arguments to be passed to the dataset within each model.
@@ -1170,17 +1161,19 @@ def cross_validation(
self._reset_models()
# Checks for prediction intervals
- if prediction_intervals is not None or level is not None:
- if level is None:
- warnings.warn("Level not provided, using level=[90].")
- level = [90]
- if prediction_intervals is None:
- raise Exception("You must set prediction_intervals to use level.")
+ if prediction_intervals is not None:
+ if level is None and quantiles is None:
+ raise Exception(
+ "When passing prediction_intervals you need to set the level or quantiles argument."
+ )
if not refit:
raise Exception(
- "Passing prediction_intervals and/or level is only supported with refit=True."
+ "Passing prediction_intervals is only supported with refit=True."
)
+ if level is not None and quantiles is not None:
+ raise ValueError("You can't set both level and quantiles argument.")
+
if not refit:
return self._no_refit_cross_validation(
@@ -1238,6 +1231,7 @@ def cross_validation(
futr_df=futr_df,
verbose=verbose,
level=level,
+ quantiles=quantiles,
**data_kwargs,
)
preds = ufp.join(preds, cutoffs, on=id_col, how="left")
@@ -1255,7 +1249,7 @@ def cross_validation(
cols_order = first_out_cols + remaining_cols + [target_col]
return ufp.sort(out[cols_order], by=[id_col, "cutoff", time_col])
- def predict_insample(self, step_size: int = 1):
+ def predict_insample(self, step_size: int = 1, **data_kwargs):
"""Predict insample with core.NeuralForecast.
`core.NeuralForecast`'s `predict_insample` uses stored fitted `models`
@@ -1276,26 +1270,6 @@ def predict_insample(self, step_size: int = 1):
"The models must be fitted first with `fit` or `cross_validation`."
)
- for model in self.models:
- if model.SAMPLING_TYPE == "recurrent":
- warnings.warn(
- f"Predict insample might not provide accurate predictions for \
- recurrent model {repr(model)} class yet due to scaling."
- )
- print(
- f"WARNING: Predict insample might not provide accurate predictions for \
- recurrent model {repr(model)} class yet due to scaling."
- )
-
- cols = []
- count_names = {"model": 0}
- for model in self.models:
- model_name = repr(model)
- count_names[model_name] = count_names.get(model_name, -1) + 1
- if count_names[model_name] > 0:
- model_name += str(count_names[model_name])
- cols += [model_name + n for n in model.loss.output_names]
-
# Remove test set from dataset and last dates
test_size = self.models[0].get_test_size()
@@ -1334,9 +1308,7 @@ def predict_insample(self, step_size: int = 1):
time_col=self.time_col,
)
- col_idx = 0
- fcsts = np.full((len(fcsts_df), len(cols)), np.nan, dtype=np.float32)
-
+ fcsts_list: List = []
for model in self.models:
# Test size is the number of periods to forecast (full size of trimmed dataset)
model.set_test_size(test_size=trimmed_dataset.max_size)
@@ -1344,10 +1316,9 @@ def predict_insample(self, step_size: int = 1):
# Predict
model_fcsts = model.predict(trimmed_dataset, step_size=step_size)
# Append predictions in memory placeholder
- output_length = len(model.loss.output_names)
- fcsts[:, col_idx : (col_idx + output_length)] = model_fcsts
- col_idx += output_length
+ fcsts_list.append(model_fcsts)
model.set_test_size(test_size=test_size) # Set original test_size
+ fcsts = np.concatenate(fcsts_list, axis=-1)
# original y
original_y = {
@@ -1357,6 +1328,7 @@ def predict_insample(self, step_size: int = 1):
}
# Add predictions to forecasts DataFrame
+ cols = self._get_model_names()
if isinstance(self.uids, pl_Series):
fcsts = pl_DataFrame(dict(zip(cols, fcsts.T)))
Y_df = pl_DataFrame(original_y)
@@ -1632,6 +1604,7 @@ def _conformity_scores(
"Please reduce the number of windows, horizon or remove those series."
)
+ self._add_level = True
cv_results = self.cross_validation(
df=df,
static_df=static_df,
@@ -1640,6 +1613,7 @@ def _conformity_scores(
time_col=time_col,
target_col=target_col,
)
+ self._add_level = False
kept = [time_col, id_col, "cutoff"]
# conformity score for each model
@@ -1651,3 +1625,126 @@ def _conformity_scores(
cv_results = ufp.assign_columns(cv_results, model, abs_err)
dropped = list(set(cv_results.columns) - set(kept))
return ufp.drop_columns(cv_results, dropped)
+
+ def _generate_forecasts(
+ self,
+ dataset: TimeSeriesDataset,
+ uids: Series,
+ quantiles_: Optional[List[float]] = None,
+ level_: Optional[List[Union[int, float]]] = None,
+ has_level: Optional[bool] = False,
+ **data_kwargs,
+ ) -> np.array:
+ fcsts_list: List = []
+ cols = []
+ count_names = {"model": 0}
+ for model in self.models:
+ old_test_size = model.get_test_size()
+ model.set_test_size(self.h) # To predict h steps ahead
+
+ # Increment model name if the same model is used more than once
+ model_name = repr(model)
+ count_names[model_name] = count_names.get(model_name, -1) + 1
+ if count_names[model_name] > 0:
+ model_name += str(count_names[model_name])
+
+ # Predict for every quantile or level if requested and the loss function supports it
+ # case 1: DistributionLoss and MixtureLosses
+ if (
+ quantiles_ is not None
+ and not isinstance(model.loss, IQLoss)
+ and hasattr(model.loss, "update_quantile")
+ and callable(model.loss.update_quantile)
+ ):
+ model_fcsts = model.predict(
+ dataset=dataset, quantiles=quantiles_, **data_kwargs
+ )
+ fcsts_list.append(model_fcsts)
+ col_names = []
+ for i, quantile in enumerate(quantiles_):
+ col_name = self._get_column_name(model_name, quantile, has_level)
+ if i == 0:
+ col_names.extend([f"{model_name}", col_name])
+ else:
+ col_names.extend([col_name])
+ if hasattr(model.loss, "return_params") and model.loss.return_params:
+ cols.extend(
+ col_names
+ + [
+ model_name + param_name
+ for param_name in model.loss.param_names
+ ]
+ )
+ else:
+ cols.extend(col_names)
+ # case 2: IQLoss
+ elif quantiles_ is not None and isinstance(model.loss, IQLoss):
+ # IQLoss does not give monotonically increasing quantiles, so we apply a hack: compute all quantiles, and take the quantile over the quantiles
+ quantiles_iqloss = np.linspace(0.01, 0.99, 20)
+ fcsts_list_iqloss = []
+ for i, quantile in enumerate(quantiles_iqloss):
+ model_fcsts = model.predict(
+ dataset=dataset, quantiles=[quantile], **data_kwargs
+ )
+ fcsts_list_iqloss.append(model_fcsts)
+ fcsts_iqloss = np.concatenate(fcsts_list_iqloss, axis=-1)
+
+ # Get the actual requested quantiles
+ model_fcsts = np.quantile(fcsts_iqloss, quantiles_, axis=-1).T
+ fcsts_list.append(model_fcsts)
+
+ # Get the right column names
+ col_names = []
+ for i, quantile in enumerate(quantiles_):
+ col_name = self._get_column_name(model_name, quantile, has_level)
+ col_names.extend([col_name])
+ cols.extend(col_names)
+ # case 3: PointLoss via prediction intervals
+ elif quantiles_ is not None and model.loss.outputsize_multiplier == 1:
+ if self.prediction_intervals is None:
+ raise AttributeError(
+ f"You have trained {model_name} with loss={type(model.loss).__name__}(). \n"
+ " You then must set `prediction_intervals` during fit to use level or quantiles during predict."
+ )
+ model_fcsts = model.predict(
+ dataset=dataset, quantiles=quantiles_, **data_kwargs
+ )
+ prediction_interval_method = get_prediction_interval_method(
+ self.prediction_intervals.method
+ )
+ fcsts_with_intervals, out_cols = prediction_interval_method(
+ model_fcsts,
+ self._cs_df,
+ model=model_name,
+ level=level_ if has_level else None,
+ cs_n_windows=self.prediction_intervals.n_windows,
+ n_series=len(uids),
+ horizon=self.h,
+ quantiles=quantiles_ if not has_level else None,
+ )
+ fcsts_list.append(fcsts_with_intervals)
+ cols.extend([model_name] + out_cols)
+ # base case: quantiles or levels are not supported or provided as arguments
+ else:
+ model_fcsts = model.predict(dataset=dataset, **data_kwargs)
+ fcsts_list.append(model_fcsts)
+ cols.extend(model_name + n for n in model.loss.output_names)
+ model.set_test_size(old_test_size) # Set back to original value
+ fcsts = np.concatenate(fcsts_list, axis=-1)
+
+ return fcsts, cols
+
+ @staticmethod
+ def _get_column_name(model_name, quantile, has_level) -> str:
+ if not has_level:
+ col_name = f"{model_name}_ql{quantile}"
+ elif quantile < 0.5:
+ level_lo = int(round(100 - 200 * quantile))
+ col_name = f"{model_name}-lo-{level_lo}"
+ elif quantile > 0.5:
+ level_hi = int(round(100 - 200 * (1 - quantile)))
+ col_name = f"{model_name}-hi-{level_hi}"
+ else:
+ col_name = f"{model_name}-median"
+
+ return col_name
diff --git a/neuralforecast/losses/pytorch.py b/neuralforecast/losses/pytorch.py
index 53e61ebfc..2841a5e45 100644
--- a/neuralforecast/losses/pytorch.py
+++ b/neuralforecast/losses/pytorch.py
@@ -6,9 +6,8 @@
'Accuracy', 'sCRPS']
# %% ../../nbs/losses.pytorch.ipynb 4
-from typing import Optional, Union, Tuple
+from typing import Optional, Union, Tuple, List
-import math
import numpy as np
import torch
@@ -22,6 +21,9 @@
Poisson,
NegativeBinomial,
Beta,
+ Gamma,
+ MixtureSameFamily,
+ Categorical,
AffineTransform,
TransformedDistribution,
)
@@ -55,7 +57,9 @@ class BasePointLoss(torch.nn.Module):
`output_names`: Names of the outputs.
"""
- def __init__(self, horizon_weight, outputsize_multiplier, output_names):
+ def __init__(
+ self, horizon_weight=None, outputsize_multiplier=None, output_names=None
+ ):
super(BasePointLoss, self).__init__()
if horizon_weight is not None:
horizon_weight = torch.Tensor(horizon_weight.flatten())
@@ -66,10 +70,13 @@ def __init__(self, horizon_weight, outputsize_multiplier, output_names):
def domain_map(self, y_hat: torch.Tensor):
"""
- Univariate loss operates in dimension [B,T,H]/[B,H]
- This changes the network's output from [B,H,1]->[B,H]
+ Input:
+ Univariate: [B, H, 1]
+ Multivariate: [B, H, N]
+
+ Output: [B, H, N]
"""
- return y_hat.squeeze(-1)
+ return y_hat
def _compute_weights(self, y, mask):
"""
@@ -78,17 +85,18 @@ def _compute_weights(self, y, mask):
If set, check that it has the same length as the horizon in x.
"""
if mask is None:
- mask = torch.ones_like(y, device=y.device)
+ mask = torch.ones_like(y)
if self.horizon_weight is None:
- self.horizon_weight = torch.ones(mask.shape[-1])
+ weights = torch.ones_like(mask)
else:
- assert mask.shape[-1] == len(
+ assert mask.shape[1] == len(
self.horizon_weight
), "horizon_weight must have same length as Y"
+ weights = self.horizon_weight.clone()
+ weights = weights[None, :, None].to(mask.device)
+ weights = torch.ones_like(mask, device=mask.device) * weights
- weights = self.horizon_weight.clone()
- weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)
return weights * mask
# %% ../../nbs/losses.pytorch.ipynb 11
@@ -118,7 +126,8 @@ def __call__(
y: torch.Tensor,
y_hat: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ y_insample: Union[torch.Tensor, None] = None,
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -158,8 +167,9 @@ def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -203,7 +213,8 @@ def __call__(
y: torch.Tensor,
y_hat: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ y_insample: Union[torch.Tensor, None] = None,
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -248,8 +259,9 @@ def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -298,7 +310,8 @@ def __call__(
y: torch.Tensor,
y_hat: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ y_insample: Union[torch.Tensor, None] = None,
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -348,12 +361,12 @@ def __call__(
y_hat: torch.Tensor,
y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor (batch_size, output_size), Actual values.
`y_hat`: tensor (batch_size, output_size)), Predicted values.
- `y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
+ `y_insample`: tensor (batch_size, input_size), Actual insample values.
`mask`: tensor, Specifies date stamps per serie to consider in loss.
**Returns:**
@@ -366,7 +379,7 @@ def __call__(
),
axis=1,
)
- losses = _divide_no_nan(delta_y, scale[:, None])
+ losses = _divide_no_nan(delta_y, scale[:, None, None])
weights = self._compute_weights(y=y, mask=mask)
return _weighted_mean(losses=losses, weights=weights)
@@ -375,11 +388,11 @@ class relMSE(BasePointLoss):
"""Relative Mean Squared Error
Computes Relative Mean Squared Error (relMSE), as proposed by Hyndman & Koehler (2006)
as an alternative to percentage errors, to avoid measure unstability.
- $$ \mathrm{relMSE}(\\mathbf{y}, \\mathbf{\hat{y}}, \\mathbf{\hat{y}}^{naive1}) =
- \\frac{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}})}{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}}^{naive1})} $$
+ $$ \mathrm{relMSE}(\\mathbf{y}, \\mathbf{\hat{y}}, \\mathbf{\hat{y}}^{benchmark}) =
+ \\frac{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}})}{\mathrm{MSE}(\\mathbf{y}, \\mathbf{\hat{y}}^{benchmark})} $$
**Parameters:**
- `y_train`: numpy array, Training values.
+ `y_train`: numpy array, deprecated.
`horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
**References:**
@@ -391,34 +404,32 @@ class relMSE(BasePointLoss):
Submitted to the International Journal Forecasting, Working paper available at arxiv.](https://arxiv.org/pdf/2110.13179.pdf)
"""
- def __init__(self, y_train, horizon_weight=None):
+ def __init__(self, y_train=None, horizon_weight=None):
super(relMSE, self).__init__(
horizon_weight=horizon_weight, outputsize_multiplier=1, output_names=[""]
)
- self.y_train = y_train
+ if y_train is not None:
+ raise DeprecationWarning("y_train will be deprecated in a future release.")
self.mse = MSE(horizon_weight=horizon_weight)
def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_benchmark: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor (batch_size, output_size), Actual values.
`y_hat`: tensor (batch_size, output_size)), Predicted values.
- `y_insample`: tensor (batch_size, input_size), Actual insample Seasonal Naive predictions.
+ `y_benchmark`: tensor (batch_size, output_size), Benchmark predicted values.
`mask`: tensor, Specifies date stamps per serie to consider in loss.
**Returns:**
`relMSE`: tensor (single value).
"""
- horizon = y.shape[-1]
- last_col = self.y_train[:, -1].unsqueeze(1)
- y_naive = last_col.repeat(1, horizon)
-
- norm = self.mse(y=y, y_hat=y_naive, mask=mask) # Already weighted
+ norm = self.mse(y=y, y_hat=y_benchmark, mask=mask) # Already weighted
norm = norm + 1e-5 # Numerical stability
loss = self.mse(y=y, y_hat=y_hat, mask=mask) # Already weighted
loss = _divide_no_nan(loss, norm)
@@ -456,8 +467,9 @@ def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -549,38 +561,48 @@ def __init__(self, level=[80, 90], quantiles=None, horizon_weight=None):
def domain_map(self, y_hat: torch.Tensor):
"""
- Identity domain map [B,T,H,Q]/[B,H,Q]
+ Input:
+ Univariate: [B, H, 1 * Q]
+ Multivariate: [B, H, N * Q]
+
+ Output: [B, H, N, Q]
"""
- return y_hat
+ output = y_hat.reshape(
+ y_hat.shape[0], y_hat.shape[1], -1, self.outputsize_multiplier
+ )
+
+ return output
def _compute_weights(self, y, mask):
"""
Compute final weights for each datapoint (based on all weights and all masks)
Set horizon_weight to a ones[H] tensor if not set.
If set, check that it has the same length as the horizon in x.
+
+ y: [B, h, N, 1]
+ mask: [B, h, N, 1]
"""
- if mask is None:
- mask = torch.ones_like(y, device=y.device)
- else:
- mask = mask.unsqueeze(1) # Add Q dimension.
if self.horizon_weight is None:
- self.horizon_weight = torch.ones(mask.shape[-1])
+ weights = torch.ones_like(mask)
else:
- assert mask.shape[-1] == len(
+ assert mask.shape[1] == len(
self.horizon_weight
), "horizon_weight must have same length as Y"
+ weights = self.horizon_weight.clone()
+ weights = weights[None, :, None, None]
+ weights = weights.to(mask.device)
+ weights = torch.ones_like(mask, device=mask.device) * weights
- weights = self.horizon_weight.clone()
- weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)
return weights * mask
def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -590,26 +612,24 @@ def __call__(
**Returns:**
`mqloss`: tensor (single value).
"""
+ # [B, h, N] -> [B, h, N, 1]
+ if y_hat.ndim == 3:
+ y_hat = y_hat.unsqueeze(-1)
+
+ y = y.unsqueeze(-1)
+ if mask is not None:
+ mask = mask.unsqueeze(-1)
+ else:
+ mask = torch.ones_like(y, device=y.device)
+
+ error = y_hat - y
- error = y_hat - y.unsqueeze(-1)
sq = torch.maximum(-error, torch.zeros_like(error))
s1_q = torch.maximum(error, torch.zeros_like(error))
- losses = (1 / len(self.quantiles)) * (
- self.quantiles * sq + (1 - self.quantiles) * s1_q
- )
-
- if y_hat.ndim == 3: # BaseWindows
- losses = losses.swapaxes(
- -2, -1
- ) # [B,H,Q] -> [B,Q,H] (needed for horizon weighting, H at the end)
- elif y_hat.ndim == 4: # BaseRecurrent
- losses = losses.swapaxes(-2, -1)
- losses = losses.swapaxes(
- -2, -3
- ) # [B,seq_len,H,Q] -> [B,Q,seq_len,H] (needed for horizon weighting, H at the end)
+ quantiles = self.quantiles[None, None, None, :]
+ losses = (1 / len(quantiles)) * (quantiles * sq + (1 - quantiles) * s1_q)
weights = self._compute_weights(y=losses, mask=mask) # Use losses for extra dim
- # NOTE: Weights do not have Q dimension.
return _weighted_mean(losses=losses, weights=weights)
@@ -700,9 +720,9 @@ def _init_sampling_distribution(self, device):
concentration0=concentration0, concentration1=concentration1
)
- def update_quantile(self, q: float = 0.5):
- self.q = q
- self.output_names = [f"_ql{q}"]
+ def update_quantile(self, q: List[float] = [0.5]):
+ self.q = q[0]
+ self.output_names = [f"_ql{q[0]}"]
self.has_predicted = True
def domain_map(self, y_hat):
@@ -711,9 +731,8 @@ def domain_map(self, y_hat):
Input shapes to this function:
- base_windows: y_hat = [B, h, 1]
- base_multivariate: y_hat = [B, h, n_series]
- base_recurrent: y_hat = [B, seq_len, h, n_series]
+ Univariate: y_hat = [B, h, 1]
+ Multivariate: y_hat = [B, h, N]
"""
if self.eval() and self.has_predicted:
quantiles = torch.full(
@@ -734,7 +753,7 @@ def domain_map(self, y_hat):
emb_outputs = self.output_layer(emb_inputs)
# Domain map
- y_hat = emb_outputs.squeeze(-1).squeeze(-1)
+ y_hat = emb_outputs.squeeze(-1)
return y_hat
@@ -767,20 +786,6 @@ def weighted_average(
return x.mean(dim=dim)
# %% ../../nbs/losses.pytorch.ipynb 65
-def bernoulli_domain_map(input: torch.Tensor):
- """Bernoulli Domain Map
- Maps input into distribution constraints, by construction input's
- last dimension is of matching `distr_args` length.
-
- **Parameters:**
- `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
-
- **Returns:**
- `(probs,)`: tuple with tensors of Poisson distribution arguments.
- """
- return (input.squeeze(-1),)
-
-
def bernoulli_scale_decouple(output, loc=None, scale=None):
"""Bernoulli Scale Decouple
@@ -795,22 +800,6 @@ def bernoulli_scale_decouple(output, loc=None, scale=None):
return (probs,)
-def student_domain_map(input: torch.Tensor):
- """Student T Domain Map
- Maps input into distribution constraints, by construction input's
- last dimension is of matching `distr_args` length.
-
- **Parameters:**
- `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
- `eps`: float, helps the initialization of scale for easier optimization.
-
- **Returns:**
- `(df, loc, scale)`: tuple with tensors of StudentT distribution arguments.
- """
- df, loc, scale = torch.tensor_split(input, 3, dim=-1)
- return df.squeeze(-1), loc.squeeze(-1), scale.squeeze(-1)
-
-
def student_scale_decouple(output, loc=None, scale=None, eps: float = 0.1):
"""Normal Scale Decouple
@@ -827,22 +816,6 @@ def student_scale_decouple(output, loc=None, scale=None, eps: float = 0.1):
return (df, mean, tscale)
-def normal_domain_map(input: torch.Tensor):
- """Normal Domain Map
- Maps input into distribution constraints, by construction input's
- last dimension is of matching `distr_args` length.
-
- **Parameters:**
- `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
- `eps`: float, helps the initialization of scale for easier optimization.
-
- **Returns:**
- `(mean, std)`: tuple with tensors of Normal distribution arguments.
- """
- mean, std = torch.tensor_split(input, 2, dim=-1)
- return mean.squeeze(-1), std.squeeze(-1)
-
-
def normal_scale_decouple(output, loc=None, scale=None, eps: float = 0.2):
"""Normal Scale Decouple
@@ -858,20 +831,6 @@ def normal_scale_decouple(output, loc=None, scale=None, eps: float = 0.2):
return (mean, std)
-def poisson_domain_map(input: torch.Tensor):
- """Poisson Domain Map
- Maps input into distribution constraints, by construction input's
- last dimension is of matching `distr_args` length.
-
- **Parameters:**
- `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
-
- **Returns:**
- `(rate,)`: tuple with tensors of Poisson distribution arguments.
- """
- return (input.squeeze(-1),)
-
-
def poisson_scale_decouple(output, loc=None, scale=None):
"""Poisson Scale Decouple
@@ -887,21 +846,6 @@ def poisson_scale_decouple(output, loc=None, scale=None):
return (rate,)
-def nbinomial_domain_map(input: torch.Tensor):
- """Negative Binomial Domain Map
- Maps input into distribution constraints, by construction input's
- last dimension is of matching `distr_args` length.
-
- **Parameters:**
- `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
-
- **Returns:**
- `(total_count, alpha)`: tuple with tensors of N.Binomial distribution arguments.
- """
- mu, alpha = torch.tensor_split(input, 2, dim=-1)
- return mu.squeeze(-1), alpha.squeeze(-1)
-
-
def nbinomial_scale_decouple(output, loc=None, scale=None):
"""Negative Binomial Scale Decouple
@@ -964,10 +908,12 @@ class Tweedie(Distribution):
Series B (Methodological), 49(2), 127–162. http://www.jstor.org/stable/2345415](http://www.jstor.org/stable/2345415)
"""
+ arg_constraints = {"log_mu": constraints.real}
+ support = constraints.nonnegative
+
def __init__(self, log_mu, rho, validate_args=None):
# TODO: add sigma2 dispersion
# TODO add constraints
- # arg_constraints = {'log_mu': constraints.real, 'rho': constraints.positive}
# support = constraints.real
self.log_mu = log_mu
self.rho = rho
@@ -1001,7 +947,7 @@ def sample(self, sample_shape=torch.Size()):
beta = beta.expand(shape)
N = torch.poisson(rate) + 1e-5
- gamma = torch.distributions.gamma.Gamma(N * alpha, beta)
+ gamma = Gamma(N * alpha, beta)
samples = gamma.sample()
samples[N == 0] = 0
@@ -1017,12 +963,12 @@ def log_prob(self, y_true):
return a - b
-def tweedie_domain_map(input: torch.Tensor):
+def tweedie_domain_map(input: torch.Tensor, rho: float = 1.5):
"""
Maps output of neural network to domain of distribution loss
"""
- return (input.squeeze(-1),)
+ return (input, rho)
def tweedie_scale_decouple(output, loc=None, scale=None):
@@ -1032,14 +978,14 @@ def tweedie_scale_decouple(output, loc=None, scale=None):
count and logits based on anchoring `loc`, `scale`.
Also adds Tweedie domain protection to the distribution parameters.
"""
- log_mu = output[0]
+ log_mu, rho = output
log_mu = F.softplus(log_mu)
log_mu = torch.clamp(log_mu, 1e-9, 37)
if (loc is not None) and (scale is not None):
log_mu += torch.log(loc)
log_mu = torch.clamp(log_mu, 1e-9, 37)
- return (log_mu,)
+ return (log_mu, rho)
# %% ../../nbs/losses.pytorch.ipynb 67
# Code adapted from: https://github.com/awslabs/gluonts/blob/61133ef6e2d88177b32ace4afc6843ab9a7bc8cd/src/gluonts/torch/distributions/isqf.py
@@ -1097,6 +1043,14 @@ def crps(self, y: torch.Tensor) -> torch.Tensor:
p = self.base_dist.crps(z)
return p * scale
+ @property
+ def mean(self):
+ """
+ Function used to compute the empirical mean
+ """
+ samples = self.sample([1000])
+ return samples.mean(dim=0)
+
class BaseISQF(Distribution):
"""
@@ -1753,7 +1707,7 @@ def isqf_domain_map(
last dimension is of matching `distr_args` length.
**Parameters:**
- `input`: tensor, of dimensions [B,T,H,theta] or [B,H,theta].
+ `input`: tensor, of dimensions [B, H, N * n_outputs].
`tol`: float, tolerance.
`quantiles`: tensor, quantiles used for ISQF (i.e. x-positions for the knots).
`num_pieces`: int, num_pieces used for each quantile spline.
@@ -1768,6 +1722,10 @@ def isqf_domain_map(
# Because in this case the spline knots could be squeezed together
# and cause overflow in spline CRPS computation
num_qk = len(quantiles)
+ n_outputs = 2 * (num_qk - 1) * num_pieces + 2 + num_qk
+
+ # Reshape: [B, h, N * n_outputs] -> [B, h, N, n_outputs]
+ input = input.reshape(input.shape[0], input.shape[1], -1, n_outputs)
start_index = 0
spline_knots = input[..., start_index : start_index + (num_qk - 1) * num_pieces]
start_index += (num_qk - 1) * num_pieces
@@ -1777,26 +1735,19 @@ def isqf_domain_map(
start_index += 1
beta_r = input[..., start_index : start_index + 1]
start_index += 1
- quantile_knots = input[..., start_index : start_index + num_qk]
-
- qk_y = torch.cat(
- [
- quantile_knots[..., 0:1],
- torch.abs(quantile_knots[..., 1:]) + tol,
- ],
- dim=-1,
- )
- qk_y = torch.cumsum(qk_y, dim=-1)
+ quantile_knots = F.softplus(input[..., start_index : start_index + num_qk]) + tol
+
+ qk_y = torch.cumsum(quantile_knots, dim=-1)
# Prevent overflow when we compute 1/beta
- beta_l = torch.abs(beta_l.squeeze(-1)) + tol
- beta_r = torch.abs(beta_r.squeeze(-1)) + tol
+ beta_l = F.softplus(beta_l.squeeze(-1)) + tol
+ beta_r = F.softplus(beta_r.squeeze(-1)) + tol
# Reshape spline arguments
batch_shape = spline_knots.shape[:-1]
# repeat qk_x from (num_qk,) to (*batch_shape, num_qk)
- qk_x_repeat = torch.sort(quantiles).values.repeat(*batch_shape, 1).to(input.device)
+ qk_x_repeat = quantiles.repeat(*batch_shape, 1).to(input.device)
# knots and heights have shape (*batch_shape, (num_qk-1)*num_pieces)
# reshape them to (*batch_shape, (num_qk-1), num_pieces)
@@ -1856,7 +1807,8 @@ class DistributionLoss(torch.nn.Module):
`level`: float list [0,100], confidence levels for prediction intervals.
`quantiles`: float list [0,1], alternative to level list, target quantiles.
`num_samples`: int=500, number of samples for the empirical quantiles.
- `return_params`: bool=False, wether or not return the Distribution parameters.
+ `return_params`: bool=False, wether or not return the Distribution parameters.
+ `horizon_weight`: Tensor of size h, weight for each timestamp of the forecasting window.
**References:**
- [PyTorch Probability Distributions Package: StudentT.](https://pytorch.org/docs/stable/distributions.html#studentt)
@@ -1908,15 +1860,6 @@ def __init__(
Tweedie=Tweedie,
ISQF=ISQF,
)
- domain_maps = dict(
- Bernoulli=bernoulli_domain_map,
- Normal=normal_domain_map,
- Poisson=poisson_domain_map,
- StudentT=student_domain_map,
- NegativeBinomial=nbinomial_domain_map,
- Tweedie=tweedie_domain_map,
- ISQF=partial(isqf_domain_map, quantiles=qs, num_pieces=num_pieces),
- )
scale_decouples = dict(
Bernoulli=bernoulli_scale_decouple,
Normal=normal_scale_decouple,
@@ -1941,9 +1884,23 @@ def __init__(
assert (
distribution in available_distributions.keys()
), f"{distribution} not available"
+ if distribution == "ISQF":
+ quantiles = torch.sort(qs).values
+ self.domain_map = partial(
+ isqf_domain_map, quantiles=quantiles, num_pieces=num_pieces
+ )
+ if return_params:
+ raise Exception("ISQF does not support 'return_params=True'")
+ elif distribution == "Tweedie":
+ rho = distribution_kwargs.pop("rho")
+ self.domain_map = partial(tweedie_domain_map, rho=rho)
+ if return_params:
+ raise Exception("Tweedie does not support 'return_params=True'")
+ else:
+ self.domain_map = self._domain_map
+
self.distribution = distribution
self._base_distribution = available_distributions[distribution]
- self.domain_map = domain_maps[distribution]
self.scale_decouple = scale_decouples[distribution]
self.distribution_kwargs = distribution_kwargs
self.num_samples = num_samples
@@ -1959,6 +1916,16 @@ def __init__(
self.outputsize_multiplier = len(self.param_names)
self.is_distribution_output = True
+ self.has_predicted = False
+
+ def _domain_map(self, input: torch.Tensor):
+ """
+ Maps output of neural network to domain of distribution loss
+
+ """
+ output = torch.tensor_split(input, self.outputsize_multiplier, dim=2)
+
+ return output
def get_distribution(self, distr_args, **distribution_kwargs) -> Distribution:
"""
@@ -1971,10 +1938,10 @@ def get_distribution(self, distr_args, **distribution_kwargs) -> Distribution:
**Returns**
`Distribution`: AffineTransformed distribution.
"""
- # TransformedDistribution(distr, [AffineTransform(loc=loc, scale=scale)])
distr = self._base_distribution(*distr_args, **distribution_kwargs)
+ self.distr_mean = distr.mean
- if self.distribution == "Poisson":
+ if self.distribution in ("Poisson", "NegativeBinomial"):
distr.support = constraints.nonnegative
return distr
@@ -1985,7 +1952,7 @@ def sample(self, distr_args: torch.Tensor, num_samples: Optional[int] = None):
**Parameters**
`distr_args`: Constructor arguments for the underlying Distribution type.
- `num_samples`: int=500, overwrite number of samples for the empirical quantiles.
+ `num_samples`: int, overwrite number of samples for the empirical quantiles.
**Returns**
`samples`: tensor, shape [B,H,`num_samples`].
@@ -1994,29 +1961,39 @@ def sample(self, distr_args: torch.Tensor, num_samples: Optional[int] = None):
if num_samples is None:
num_samples = self.num_samples
- # print(distr_args[0].size())
- B, H = distr_args[0].shape[:2]
- Q = len(self.quantiles)
-
# Instantiate Scaled Decoupled Distribution
distr = self.get_distribution(distr_args=distr_args, **self.distribution_kwargs)
samples = distr.sample(sample_shape=(num_samples,))
- samples = samples.permute(1, 2, 0) # [samples,B,H] -> [B,H,samples]
- samples = samples.view(B * H, num_samples)
- sample_mean = torch.mean(samples, dim=-1)
+ samples = samples.permute(
+ 1, 2, 3, 0
+ ) # [samples, B, H, N] -> [B, H, N, samples]
+
+ sample_mean = torch.mean(samples, dim=-1, keepdim=True)
# Compute quantiles
quantiles_device = self.quantiles.to(distr_args[0].device)
- quants = torch.quantile(input=samples, q=quantiles_device, dim=1)
- quants = quants.permute((1, 0)) # [Q, B*H] -> [B*H, Q]
-
- # Final reshapes
- samples = samples.view(B, H, num_samples)
- sample_mean = sample_mean.view(B, H, 1)
- quants = quants.view(B, H, Q)
+ quants = torch.quantile(input=samples, q=quantiles_device, dim=-1)
+ quants = quants.permute(1, 2, 3, 0) # [Q, B, H, N] -> [B, H, N, Q]
return samples, sample_mean, quants
+ def update_quantile(self, q: Optional[List[float]] = None):
+ if q is not None:
+ self.quantiles = nn.Parameter(
+ torch.tensor(q, dtype=torch.float32), requires_grad=False
+ )
+ self.output_names = (
+ [""]
+ + [f"_ql{q_i}" for q_i in q]
+ + self.return_params * self.param_names
+ )
+ self.has_predicted = True
+ elif q is None and self.has_predicted:
+ self.quantiles = nn.Parameter(
+ torch.tensor([0.5], dtype=torch.float32), requires_grad=False
+ )
+ self.output_names = ["", "-median"] + self.return_params * self.param_names
+
def _compute_weights(self, y, mask):
"""
Compute final weights for each datapoint (based on all weights and all masks)
@@ -2024,19 +2001,18 @@ def _compute_weights(self, y, mask):
If set, check that it has the same length as the horizon in x.
"""
if mask is None:
- mask = torch.ones_like(y, device=y.device)
- else:
- mask = mask.unsqueeze(1) # Add Q dimension.
+ mask = torch.ones_like(y)
- # get uniform weights if none
if self.horizon_weight is None:
- self.horizon_weight = torch.ones(mask.shape[-1])
+ weights = torch.ones_like(mask)
else:
- assert mask.shape[-1] == len(
+ assert mask.shape[1] == len(
self.horizon_weight
), "horizon_weight must have same length as Y"
- weights = self.horizon_weight.clone()
- weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)
+ weights = self.horizon_weight.clone()
+ weights = weights[None, :, None].to(mask.device)
+ weights = torch.ones_like(mask, device=mask.device) * weights
+
return weights * mask
def __call__(
@@ -2072,7 +2048,7 @@ def __call__(
loss_weights = self._compute_weights(y=y, mask=mask)
return weighted_average(loss_values, weights=loss_weights)
-# %% ../../nbs/losses.pytorch.ipynb 74
+# %% ../../nbs/losses.pytorch.ipynb 75
class PMM(torch.nn.Module):
"""Poisson Mixture Mesh
@@ -2107,6 +2083,7 @@ def __init__(
return_params=False,
batch_correlation=False,
horizon_correlation=False,
+ weighted=False,
):
super(PMM, self).__init__()
# Transform level to MQLoss parameters
@@ -2121,21 +2098,36 @@ def __init__(
self.num_samples = num_samples
self.batch_correlation = batch_correlation
self.horizon_correlation = horizon_correlation
+ self.weighted = weighted
# If True, predict_step will return Distribution's parameters
self.return_params = return_params
+
+ lambda_names = [f"-lambda-{i}" for i in range(1, n_components + 1)]
+ if weighted:
+ weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)]
+ self.param_names = [i for j in zip(lambda_names, weight_names) for i in j]
+ else:
+ self.param_names = lambda_names
+
if self.return_params:
- self.param_names = [f"-lambda-{i}" for i in range(1, n_components + 1)]
self.output_names = self.output_names + self.param_names
# Add first output entry for the sample_mean
self.output_names.insert(0, "")
- self.outputsize_multiplier = n_components
+ self.n_outputs = 1 + weighted
+ self.n_components = n_components
+ self.outputsize_multiplier = self.n_outputs * n_components
self.is_distribution_output = True
+ self.has_predicted = False
def domain_map(self, output: torch.Tensor):
- return (output,) # , weights
+ output = output.reshape(
+ output.shape[0], output.shape[1], -1, self.outputsize_multiplier
+ )
+
+ return torch.tensor_split(output, self.n_outputs, dim=-1)
def scale_decouple(
self,
@@ -2149,26 +2141,61 @@ def scale_decouple(
variance and residual location based on anchoring `loc`, `scale`.
Also adds domain protection to the distribution parameters.
"""
- lambdas = output[0]
+ if self.weighted:
+ lambdas, weights = output
+ weights = F.softmax(weights, dim=-1)
+ else:
+ lambdas = output[0]
+
if (loc is not None) and (scale is not None):
- loc = loc.view(lambdas.size(dim=0), 1, -1)
- scale = scale.view(lambdas.size(dim=0), 1, -1)
+ if loc.ndim == 3:
+ loc = loc.unsqueeze(-1)
+ scale = scale.unsqueeze(-1)
lambdas = (lambdas * scale) + loc
- lambdas = F.softplus(lambdas)
- return (lambdas,)
- def sample(self, distr_args, num_samples=None):
+ lambdas = F.softplus(lambdas) + 1e-3
+
+ if self.weighted:
+ return (lambdas, weights)
+ else:
+ return (lambdas,)
+
+ def get_distribution(self, distr_args) -> Distribution:
+ """
+ Construct the associated Pytorch Distribution, given the collection of
+ constructor arguments and, optionally, location and scale tensors.
+
+ **Parameters**
+ `distr_args`: Constructor arguments for the underlying Distribution type.
+
+ **Returns**
+ `Distribution`: AffineTransformed distribution.
+ """
+ if self.weighted:
+ lambdas, weights = distr_args
+ else:
+ lambdas = distr_args[0]
+ weights = torch.full_like(lambdas, fill_value=1 / self.n_components)
+
+ mix = Categorical(weights)
+ components = Poisson(rate=lambdas)
+ components.support = constraints.nonnegative
+ distr = MixtureSameFamily(
+ mixture_distribution=mix, component_distribution=components
+ )
+
+ self.distr_mean = distr.mean
+
+ return distr
+
+ def sample(self, distr_args: torch.Tensor, num_samples: Optional[int] = None):
"""
Construct the empirical quantiles from the estimated Distribution,
sampling from it `num_samples` independently.
**Parameters**
`distr_args`: Constructor arguments for the underlying Distribution type.
- `loc`: Optional tensor, of the same shape as the batch_shape + event_shape
- of the resulting distribution.
- `scale`: Optional tensor, of the same shape as the batch_shape+event_shape
- of the resulting distribution.
- `num_samples`: int=500, overwrites number of samples for the empirical quantiles.
+ `num_samples`: int, overwrite number of samples for the empirical quantiles.
**Returns**
`samples`: tensor, shape [B,H,`num_samples`].
@@ -2177,102 +2204,77 @@ def sample(self, distr_args, num_samples=None):
if num_samples is None:
num_samples = self.num_samples
- lambdas = distr_args[0]
- B, H, K = lambdas.size()
- Q = len(self.quantiles)
-
- # Sample K ~ Mult(weights)
- # shared across B, H
- # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)
- weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device)
-
- # Avoid loop, vectorize
- weights = weights.reshape(-1, K)
- lambdas = lambdas.flatten()
-
- # Vectorization trick to recover row_idx
- sample_idxs = torch.multinomial(
- input=weights, num_samples=num_samples, replacement=True
- )
- aux_col_idx = (
- torch.unsqueeze(torch.arange(B * H, device=lambdas.device), -1) * K
- )
-
- # To device
- sample_idxs = sample_idxs.to(lambdas.device)
-
- sample_idxs = sample_idxs + aux_col_idx
- sample_idxs = sample_idxs.flatten()
-
- sample_lambdas = lambdas[sample_idxs]
+ # Instantiate Scaled Decoupled Distribution
+ distr = self.get_distribution(distr_args=distr_args)
+ samples = distr.sample(sample_shape=(num_samples,))
+ samples = samples.permute(
+ 1, 2, 3, 0
+ ) # [samples, B, H, N] -> [B, H, N, samples]
- # Sample y ~ Poisson(lambda) independently
- samples = torch.poisson(sample_lambdas).to(lambdas.device)
- samples = samples.view(B * H, num_samples)
- sample_mean = torch.mean(samples, dim=-1)
+ sample_mean = torch.mean(samples, dim=-1, keepdim=True)
# Compute quantiles
- quantiles_device = self.quantiles.to(lambdas.device)
- quants = torch.quantile(input=samples, q=quantiles_device, dim=1)
- quants = quants.permute((1, 0)) # Q, B*H
-
- # Final reshapes
- samples = samples.view(B, H, num_samples)
- sample_mean = sample_mean.view(B, H, 1)
- quants = quants.view(B, H, Q)
+ quantiles_device = self.quantiles.to(distr_args[0].device)
+ quants = torch.quantile(input=samples, q=quantiles_device, dim=-1)
+ quants = quants.permute(1, 2, 3, 0) # [Q, B, H, N] -> [B, H, N, Q]
return samples, sample_mean, quants
- def neglog_likelihood(
+ def update_quantile(self, q: Optional[List[float]] = None):
+ if q is not None:
+ self.quantiles = nn.Parameter(
+ torch.tensor(q, dtype=torch.float32), requires_grad=False
+ )
+ self.output_names = (
+ [""]
+ + [f"_ql{q_i}" for q_i in q]
+ + self.return_params * self.param_names
+ )
+ self.has_predicted = True
+ elif q is None and self.has_predicted:
+ self.quantiles = nn.Parameter(
+ torch.tensor([0.5], dtype=torch.float32), requires_grad=False
+ )
+ self.output_names = ["", "-median"] + self.return_params * self.param_names
+
+ def __call__(
self,
y: torch.Tensor,
- distr_args: Tuple[torch.Tensor],
+ distr_args: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
):
- if mask is None:
- mask = (y > 0) * 1
- else:
- mask = mask * ((y > 0) * 1)
-
- eps = 1e-10
- lambdas = distr_args[0]
- B, H, K = lambdas.size()
-
- weights = (1 / K) * torch.ones_like(lambdas, device=lambdas.device)
+ """
+ Computes the negative log-likelihood objective function.
+ To estimate the following predictive distribution:
- y = y[:, :, None]
- mask = mask[:, :, None]
+ $$\mathrm{P}(\mathbf{y}_{\\tau}\,|\,\\theta) \\quad \mathrm{and} \\quad -\log(\mathrm{P}(\mathbf{y}_{\\tau}\,|\,\\theta))$$
- y = y * mask # Protect y negative entries
+ where $\\theta$ represents the distributions parameters. It aditionally
+ summarizes the objective signal using a weighted average using the `mask` tensor.
- # Single Poisson likelihood
- log_pi = y.xlogy(lambdas + eps) - lambdas - (y + 1).lgamma()
+ **Parameters**
+ `y`: tensor, Actual values.
+ `distr_args`: Constructor arguments for the underlying Distribution type.
+ `mask`: tensor, Specifies date stamps per serie to consider in loss.
+ **Returns**
+ `loss`: scalar, weighted loss function against which backpropagation will be performed.
+ """
+ # Instantiate Scaled Decoupled Distribution
+ distr = self.get_distribution(distr_args=distr_args)
+ x = distr._pad(y)
+ log_prob_x = distr.component_distribution.log_prob(x)
+ log_mix_prob = torch.log_softmax(distr.mixture_distribution.logits, dim=-1)
if self.batch_correlation:
- log_pi = torch.sum(log_pi, dim=0, keepdim=True)
-
+ log_prob_x = torch.sum(log_prob_x, dim=0, keepdim=True)
if self.horizon_correlation:
- log_pi = torch.sum(log_pi, dim=1, keepdim=True)
+ log_prob_x = torch.sum(log_prob_x, dim=1, keepdim=True)
- # Numerically Stable Mixture loglikelihood
- loglik = torch.logsumexp((torch.log(weights) + log_pi), dim=2, keepdim=True)
- loglik = loglik * mask
+ loss_values = -torch.logsumexp(log_prob_x + log_mix_prob, dim=-1)
- mean = torch.sum(weights * lambdas, axis=-1, keepdims=True)
- reglrz = torch.mean(torch.square(y - mean) * mask)
- loss = -torch.mean(loglik) + 0.001 * reglrz
- return loss
-
- def __call__(
- self,
- y: torch.Tensor,
- distr_args: Tuple[torch.Tensor],
- mask: Union[torch.Tensor, None] = None,
- ):
-
- return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)
+ return weighted_average(loss_values, weights=mask)
-# %% ../../nbs/losses.pytorch.ipynb 82
+# %% ../../nbs/losses.pytorch.ipynb 83
class GMM(torch.nn.Module):
"""Gaussian Mixture Mesh
@@ -2308,6 +2310,7 @@ def __init__(
return_params=False,
batch_correlation=False,
horizon_correlation=False,
+ weighted=False,
):
super(GMM, self).__init__()
# Transform level to MQLoss parameters
@@ -2322,24 +2325,39 @@ def __init__(
self.num_samples = num_samples
self.batch_correlation = batch_correlation
self.horizon_correlation = horizon_correlation
+ self.weighted = weighted
# If True, predict_step will return Distribution's parameters
self.return_params = return_params
+
+ mu_names = [f"-mu-{i}" for i in range(1, n_components + 1)]
+ std_names = [f"-std-{i}" for i in range(1, n_components + 1)]
+ if weighted:
+ weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)]
+ self.param_names = [
+ i for j in zip(mu_names, std_names, weight_names) for i in j
+ ]
+ else:
+ self.param_names = [i for j in zip(mu_names, std_names) for i in j]
+
if self.return_params:
- mu_names = [f"-mu-{i}" for i in range(1, n_components + 1)]
- std_names = [f"-std-{i}" for i in range(1, n_components + 1)]
- mu_std_names = [i for j in zip(mu_names, std_names) for i in j]
- self.output_names = self.output_names + mu_std_names
+ self.output_names = self.output_names + self.param_names
# Add first output entry for the sample_mean
self.output_names.insert(0, "")
- self.outputsize_multiplier = 2 * n_components
+ self.n_outputs = 2 + weighted
+ self.n_components = n_components
+ self.outputsize_multiplier = self.n_outputs * n_components
self.is_distribution_output = True
+ self.has_predicted = False
def domain_map(self, output: torch.Tensor):
- means, stds = torch.tensor_split(output, 2, dim=-1)
- return (means, stds)
+ output = output.reshape(
+ output.shape[0], output.shape[1], -1, self.outputsize_multiplier
+ )
+
+ return torch.tensor_split(output, self.n_outputs, dim=-1)
def scale_decouple(
self,
@@ -2354,132 +2372,138 @@ def scale_decouple(
variance and residual location based on anchoring `loc`, `scale`.
Also adds domain protection to the distribution parameters.
"""
- means, stds = output
+ if self.weighted:
+ means, stds, weights = output
+ weights = F.softmax(weights, dim=-1)
+ else:
+ means, stds = output
+
stds = F.softplus(stds)
if (loc is not None) and (scale is not None):
- loc = loc.view(means.size(dim=0), 1, -1)
- scale = scale.view(means.size(dim=0), 1, -1)
+ if loc.ndim == 3:
+ loc = loc.unsqueeze(-1)
+ scale = scale.unsqueeze(-1)
means = (means * scale) + loc
stds = (stds + eps) * scale
- return (means, stds)
- def sample(self, distr_args, num_samples=None):
+ if self.weighted:
+ return (means, stds, weights)
+ else:
+ return (means, stds)
+
+ def get_distribution(self, distr_args) -> Distribution:
"""
- Construct the empirical quantiles from the estimated Distribution,
- sampling from it `num_samples` independently.
+ Construct the associated Pytorch Distribution, given the collection of
+ constructor arguments and, optionally, location and scale tensors.
**Parameters**
`distr_args`: Constructor arguments for the underlying Distribution type.
- `loc`: Optional tensor, of the same shape as the batch_shape + event_shape
- of the resulting distribution.
- `scale`: Optional tensor, of the same shape as the batch_shape+event_shape
- of the resulting distribution.
- `num_samples`: int=500, number of samples for the empirical quantiles.
**Returns**
- `samples`: tensor, shape [B,H,`num_samples`].
- `quantiles`: tensor, empirical quantiles defined by `levels`.
+ `Distribution`: AffineTransformed distribution.
"""
- if num_samples is None:
- num_samples = self.num_samples
-
- means, stds = distr_args
- B, H, K = means.size()
- Q = len(self.quantiles)
- assert means.shape == stds.shape
+ if self.weighted:
+ means, stds, weights = distr_args
+ else:
+ means, stds = distr_args
+ weights = torch.full_like(means, fill_value=1 / self.n_components)
- # Sample K ~ Mult(weights)
- # shared across B, H
- # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)
+ mix = Categorical(weights)
+ components = Normal(loc=means, scale=stds)
+ distr = MixtureSameFamily(
+ mixture_distribution=mix, component_distribution=components
+ )
- weights = (1 / K) * torch.ones_like(means, device=means.device)
+ self.distr_mean = distr.mean
- # Avoid loop, vectorize
- weights = weights.reshape(-1, K)
- means = means.flatten()
- stds = stds.flatten()
+ return distr
- # Vectorization trick to recover row_idx
- sample_idxs = torch.multinomial(
- input=weights, num_samples=num_samples, replacement=True
- )
- aux_col_idx = torch.unsqueeze(torch.arange(B * H, device=means.device), -1) * K
+ def sample(self, distr_args: torch.Tensor, num_samples: Optional[int] = None):
+ """
+ Construct the empirical quantiles from the estimated Distribution,
+ sampling from it `num_samples` independently.
- # To device
- sample_idxs = sample_idxs.to(means.device)
+ **Parameters**
+ `distr_args`: Constructor arguments for the underlying Distribution type.
+ `num_samples`: int, overwrite number of samples for the empirical quantiles.
- sample_idxs = sample_idxs + aux_col_idx
- sample_idxs = sample_idxs.flatten()
+ **Returns**
+ `samples`: tensor, shape [B,H,`num_samples`].
+ `quantiles`: tensor, empirical quantiles defined by `levels`.
+ """
+ if num_samples is None:
+ num_samples = self.num_samples
- sample_means = means[sample_idxs]
- sample_stds = stds[sample_idxs]
+ # Instantiate Scaled Decoupled Distribution
+ distr = self.get_distribution(distr_args=distr_args)
+ samples = distr.sample(sample_shape=(num_samples,))
+ samples = samples.permute(
+ 1, 2, 3, 0
+ ) # [samples, B, H, N] -> [B, H, N, samples]
- # Sample y ~ Normal(mu, std) independently
- samples = torch.normal(sample_means, sample_stds).to(means.device)
- samples = samples.view(B * H, num_samples)
- sample_mean = torch.mean(samples, dim=-1)
+ sample_mean = torch.mean(samples, dim=-1, keepdim=True)
# Compute quantiles
- quantiles_device = self.quantiles.to(means.device)
- quants = torch.quantile(input=samples, q=quantiles_device, dim=1)
- quants = quants.permute((1, 0)) # Q, B*H
-
- # Final reshapes
- samples = samples.view(B, H, num_samples)
- sample_mean = sample_mean.view(B, H, 1)
- quants = quants.view(B, H, Q)
+ quantiles_device = self.quantiles.to(distr_args[0].device)
+ quants = torch.quantile(input=samples, q=quantiles_device, dim=-1)
+ quants = quants.permute(1, 2, 3, 0) # [Q, B, H, N] -> [B, H, N, Q]
return samples, sample_mean, quants
- def neglog_likelihood(
+ def update_quantile(self, q: Optional[List[float]] = None):
+ if q is not None:
+ self.quantiles = nn.Parameter(
+ torch.tensor(q, dtype=torch.float32), requires_grad=False
+ )
+ self.output_names = (
+ [""]
+ + [f"_ql{q_i}" for q_i in q]
+ + self.return_params * self.param_names
+ )
+ self.has_predicted = True
+ elif q is None and self.has_predicted:
+ self.quantiles = nn.Parameter(
+ torch.tensor([0.5], dtype=torch.float32), requires_grad=False
+ )
+ self.output_names = ["", "-median"] + self.return_params * self.param_names
+
+ def __call__(
self,
y: torch.Tensor,
- distr_args: Tuple[torch.Tensor, torch.Tensor],
+ distr_args: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
):
+ """
+ Computes the negative log-likelihood objective function.
+ To estimate the following predictive distribution:
- if mask is None:
- mask = torch.ones_like(y)
-
- means, stds = distr_args
- B, H, K = means.size()
-
- weights = (1 / K) * torch.ones_like(means, device=means.device)
+ $$\mathrm{P}(\mathbf{y}_{\\tau}\,|\,\\theta) \\quad \mathrm{and} \\quad -\log(\mathrm{P}(\mathbf{y}_{\\tau}\,|\,\\theta))$$
- y = y[:, :, None]
- mask = mask[:, :, None]
+ where $\\theta$ represents the distributions parameters. It aditionally
+ summarizes the objective signal using a weighted average using the `mask` tensor.
- var = stds**2
- log_stds = torch.log(stds)
- log_pi = (
- -((y - means) ** 2 / (2 * var))
- - log_stds
- - math.log(math.sqrt(2 * math.pi))
- )
+ **Parameters**
+ `y`: tensor, Actual values.
+ `distr_args`: Constructor arguments for the underlying Distribution type.
+ `mask`: tensor, Specifies date stamps per serie to consider in loss.
+ **Returns**
+ `loss`: scalar, weighted loss function against which backpropagation will be performed.
+ """
+ # Instantiate Scaled Decoupled Distribution
+ distr = self.get_distribution(distr_args=distr_args)
+ x = distr._pad(y)
+ log_prob_x = distr.component_distribution.log_prob(x)
+ log_mix_prob = torch.log_softmax(distr.mixture_distribution.logits, dim=-1)
if self.batch_correlation:
- log_pi = torch.sum(log_pi, dim=0, keepdim=True)
-
+ log_prob_x = torch.sum(log_prob_x, dim=0, keepdim=True)
if self.horizon_correlation:
- log_pi = torch.sum(log_pi, dim=1, keepdim=True)
-
- # Numerically Stable Mixture loglikelihood
- loglik = torch.logsumexp((torch.log(weights) + log_pi), dim=2, keepdim=True)
- loglik = loglik * mask
-
- loss = -torch.mean(loglik)
- return loss
-
- def __call__(
- self,
- y: torch.Tensor,
- distr_args: Tuple[torch.Tensor, torch.Tensor],
- mask: Union[torch.Tensor, None] = None,
- ):
+ log_prob_x = torch.sum(log_prob_x, dim=1, keepdim=True)
+ loss_values = -torch.logsumexp(log_prob_x + log_mix_prob, dim=-1)
- return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)
+ return weighted_average(loss_values, weights=mask)
-# %% ../../nbs/losses.pytorch.ipynb 90
+# %% ../../nbs/losses.pytorch.ipynb 91
class NBMM(torch.nn.Module):
"""Negative Binomial Mixture Mesh
@@ -2511,6 +2535,7 @@ def __init__(
quantiles=None,
num_samples=1000,
return_params=False,
+ weighted=False,
):
super(NBMM, self).__init__()
# Transform level to MQLoss parameters
@@ -2523,26 +2548,41 @@ def __init__(
qs = torch.Tensor(quantiles)
self.quantiles = torch.nn.Parameter(qs, requires_grad=False)
self.num_samples = num_samples
+ self.weighted = weighted
# If True, predict_step will return Distribution's parameters
self.return_params = return_params
- if self.return_params:
- total_count_names = [
- f"-total_count-{i}" for i in range(1, n_components + 1)
+
+ total_count_names = [f"-total_count-{i}" for i in range(1, n_components + 1)]
+ probs_names = [f"-probs-{i}" for i in range(1, n_components + 1)]
+ if weighted:
+ weight_names = [f"-weight-{i}" for i in range(1, n_components + 1)]
+ self.param_names = [
+ i for j in zip(total_count_names, probs_names, weight_names) for i in j
+ ]
+ else:
+ self.param_names = [
+ i for j in zip(total_count_names, probs_names) for i in j
]
- probs_names = [f"-probs-{i}" for i in range(1, n_components + 1)]
- param_names = [i for j in zip(total_count_names, probs_names) for i in j]
- self.output_names = self.output_names + param_names
+
+ if self.return_params:
+ self.output_names = self.output_names + self.param_names
# Add first output entry for the sample_mean
self.output_names.insert(0, "")
- self.outputsize_multiplier = 2 * n_components
+ self.n_outputs = 2 + weighted
+ self.n_components = n_components
+ self.outputsize_multiplier = self.n_outputs * n_components
self.is_distribution_output = True
+ self.has_predicted = False
def domain_map(self, output: torch.Tensor):
- mu, alpha = torch.tensor_split(output, 2, dim=-1)
- return (mu, alpha)
+ output = output.reshape(
+ output.shape[0], output.shape[1], -1, self.outputsize_multiplier
+ )
+
+ return torch.tensor_split(output, self.n_outputs, dim=-1)
def scale_decouple(
self,
@@ -2558,11 +2598,18 @@ def scale_decouple(
Also adds domain protection to the distribution parameters.
"""
# Efficient NBinomial parametrization
- mu, alpha = output
+ if self.weighted:
+ mu, alpha, weights = output
+ weights = F.softmax(weights, dim=-1)
+ else:
+ mu, alpha = output
+
mu = F.softplus(mu) + 1e-8
alpha = F.softplus(alpha) + 1e-8 # alpha = 1/total_counts
if (loc is not None) and (scale is not None):
- loc = loc.view(mu.size(dim=0), 1, -1)
+ if loc.ndim == 3:
+ loc = loc.unsqueeze(-1)
+ scale = scale.unsqueeze(-1)
mu *= loc
alpha /= loc + 1.0
@@ -2571,20 +2618,47 @@ def scale_decouple(
# => probs = mu / [total_count * (1 + mu * (1/total_count))]
total_count = 1.0 / alpha
probs = (mu * alpha / (1.0 + mu * alpha)) + 1e-8
- return (total_count, probs)
+ if self.weighted:
+ return (total_count, probs, weights)
+ else:
+ return (total_count, probs)
+
+ def get_distribution(self, distr_args) -> Distribution:
+ """
+ Construct the associated Pytorch Distribution, given the collection of
+ constructor arguments and, optionally, location and scale tensors.
+
+ **Parameters**
+ `distr_args`: Constructor arguments for the underlying Distribution type.
+
+ **Returns**
+ `Distribution`: AffineTransformed distribution.
+ """
+ if self.weighted:
+ total_count, probs, weights = distr_args
+ else:
+ total_count, probs = distr_args
+ weights = torch.full_like(total_count, fill_value=1 / self.n_components)
+
+ mix = Categorical(weights)
+ components = NegativeBinomial(total_count, probs)
+ components.support = constraints.nonnegative
+ distr = MixtureSameFamily(
+ mixture_distribution=mix, component_distribution=components
+ )
- def sample(self, distr_args, num_samples=None):
+ self.distr_mean = distr.mean
+
+ return distr
+
+ def sample(self, distr_args: torch.Tensor, num_samples: Optional[int] = None):
"""
Construct the empirical quantiles from the estimated Distribution,
sampling from it `num_samples` independently.
**Parameters**
`distr_args`: Constructor arguments for the underlying Distribution type.
- `loc`: Optional tensor, of the same shape as the batch_shape + event_shape
- of the resulting distribution.
- `scale`: Optional tensor, of the same shape as the batch_shape+event_shape
- of the resulting distribution.
- `num_samples`: int=500, number of samples for the empirical quantiles.
+ `num_samples`: int, overwrite number of samples for the empirical quantiles.
**Returns**
`samples`: tensor, shape [B,H,`num_samples`].
@@ -2593,107 +2667,70 @@ def sample(self, distr_args, num_samples=None):
if num_samples is None:
num_samples = self.num_samples
- total_count, probs = distr_args
- B, H, K = total_count.size()
- Q = len(self.quantiles)
- assert total_count.shape == probs.shape
-
- # Sample K ~ Mult(weights)
- # shared across B, H
- # weights = torch.repeat_interleave(input=weights, repeats=H, dim=2)
-
- weights = (1 / K) * torch.ones_like(probs, device=probs.device)
-
- # Avoid loop, vectorize
- weights = weights.reshape(-1, K)
- total_count = total_count.flatten()
- probs = probs.flatten()
-
- # Vectorization trick to recover row_idx
- sample_idxs = torch.multinomial(
- input=weights, num_samples=num_samples, replacement=True
- )
- aux_col_idx = torch.unsqueeze(torch.arange(B * H, device=probs.device), -1) * K
-
- # To device
- sample_idxs = sample_idxs.to(probs.device)
-
- sample_idxs = sample_idxs + aux_col_idx
- sample_idxs = sample_idxs.flatten()
-
- sample_total_count = total_count[sample_idxs]
- sample_probs = probs[sample_idxs]
+ # Instantiate Scaled Decoupled Distribution
+ distr = self.get_distribution(distr_args=distr_args)
+ samples = distr.sample(sample_shape=(num_samples,))
+ samples = samples.permute(
+ 1, 2, 3, 0
+ ) # [samples, B, H, N] -> [B, H, N, samples]
- # Sample y ~ NBinomial(total_count, probs) independently
- dist = NegativeBinomial(total_count=sample_total_count, probs=sample_probs)
- samples = dist.sample(sample_shape=(1,)).to(probs.device)[0]
- samples = samples.view(B * H, num_samples)
- sample_mean = torch.mean(samples, dim=-1)
+ sample_mean = torch.mean(samples, dim=-1, keepdim=True)
# Compute quantiles
- quantiles_device = self.quantiles.to(probs.device)
- quants = torch.quantile(input=samples, q=quantiles_device, dim=1)
- quants = quants.permute((1, 0)) # Q, B*H
-
- # Final reshapes
- samples = samples.view(B, H, num_samples)
- sample_mean = sample_mean.view(B, H, 1)
- quants = quants.view(B, H, Q)
+ quantiles_device = self.quantiles.to(distr_args[0].device)
+ quants = torch.quantile(input=samples, q=quantiles_device, dim=-1)
+ quants = quants.permute(1, 2, 3, 0) # [Q, B, H, N] -> [B, H, N, Q]
return samples, sample_mean, quants
- def neglog_likelihood(
+ def update_quantile(self, q: Optional[List[float]] = None):
+ if q is not None:
+ self.quantiles = nn.Parameter(
+ torch.tensor(q, dtype=torch.float32), requires_grad=False
+ )
+ self.output_names = (
+ [""]
+ + [f"_ql{q_i}" for q_i in q]
+ + self.return_params * self.param_names
+ )
+ self.has_predicted = True
+ elif q is None and self.has_predicted:
+ self.quantiles = nn.Parameter(
+ torch.tensor([0.5], dtype=torch.float32), requires_grad=False
+ )
+ self.output_names = ["", "-median"] + self.return_params * self.param_names
+
+ def __call__(
self,
y: torch.Tensor,
- distr_args: Tuple[torch.Tensor, torch.Tensor],
+ distr_args: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
):
+ """
+ Computes the negative log-likelihood objective function.
+ To estimate the following predictive distribution:
- if mask is None:
- mask = torch.ones_like(y)
-
- total_count, probs = distr_args
- B, H, K = total_count.size()
-
- weights = (1 / K) * torch.ones_like(probs, device=probs.device)
-
- y = y[:, :, None]
- mask = mask[:, :, None]
-
- log_unnormalized_prob = total_count * torch.log(1.0 - probs) + y * torch.log(
- probs
- )
- log_normalization = (
- -torch.lgamma(total_count + y)
- + torch.lgamma(1.0 + y)
- + torch.lgamma(total_count)
- )
- log_normalization[total_count + y == 0.0] = 0.0
- log = log_unnormalized_prob - log_normalization
-
- # log = torch.sum(log, dim=0, keepdim=True) # Joint within batch/group
- # log = torch.sum(log, dim=1, keepdim=True) # Joint within horizon
-
- # Numerical stability mixture and loglik
- log_max = torch.amax(log, dim=2, keepdim=True) # [1,1,K] (collapsed joints)
- lik = weights * torch.exp(log - log_max) # Take max
- loglik = torch.log(torch.sum(lik, dim=2, keepdim=True)) + log_max # Return max
+ $$\mathrm{P}(\mathbf{y}_{\\tau}\,|\,\\theta) \\quad \mathrm{and} \\quad -\log(\mathrm{P}(\mathbf{y}_{\\tau}\,|\,\\theta))$$
- loglik = loglik * mask # replace with mask
+ where $\\theta$ represents the distributions parameters. It aditionally
+ summarizes the objective signal using a weighted average using the `mask` tensor.
- loss = -torch.mean(loglik)
- return loss
+ **Parameters**
+ `y`: tensor, Actual values.
+ `distr_args`: Constructor arguments for the underlying Distribution type.
+ `mask`: tensor, Specifies date stamps per serie to consider in loss.
- def __call__(
- self,
- y: torch.Tensor,
- distr_args: Tuple[torch.Tensor, torch.Tensor],
- mask: Union[torch.Tensor, None] = None,
- ):
+ **Returns**
+ `loss`: scalar, weighted loss function against which backpropagation will be performed.
+ """
+ # Instantiate Scaled Decoupled Distribution
+ distr = self.get_distribution(distr_args=distr_args)
+ loss_values = -distr.log_prob(y)
+ loss_weights = mask
- return self.neglog_likelihood(y=y, distr_args=distr_args, mask=mask)
+ return weighted_average(loss_values, weights=loss_weights)
-# %% ../../nbs/losses.pytorch.ipynb 97
+# %% ../../nbs/losses.pytorch.ipynb 98
class HuberLoss(BasePointLoss):
""" Huber Loss
@@ -2730,8 +2767,9 @@ def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -2745,8 +2783,8 @@ def __call__(
weights = self._compute_weights(y=y, mask=mask)
return _weighted_mean(losses=losses, weights=weights)
-# %% ../../nbs/losses.pytorch.ipynb 102
-class TukeyLoss(torch.nn.Module):
+# %% ../../nbs/losses.pytorch.ipynb 103
+class TukeyLoss(BasePointLoss):
""" Tukey Loss
The Tukey loss function, also known as Tukey's biweight function, is a
@@ -2786,10 +2824,14 @@ def __init__(self, c: float = 4.685, normalize: bool = True):
def domain_map(self, y_hat: torch.Tensor):
"""
- Univariate loss operates in dimension [B,T,H]/[B,H]
- This changes the network's output from [B,H,1]->[B,H]
+ Input:
+ Univariate: [B, H, 1]
+ Multivariate: [B, H, N]
+
+ Output: [B, H, N]
"""
- return y_hat.squeeze(-1)
+
+ return y_hat
def masked_mean(self, x, mask, dim):
x_nan = x.masked_fill(mask < 1, float("nan"))
@@ -2801,8 +2843,9 @@ def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -2833,7 +2876,7 @@ def __call__(
tukey_loss = (self.c**2 / 6) * torch.mean(tukey_loss)
return tukey_loss
-# %% ../../nbs/losses.pytorch.ipynb 107
+# %% ../../nbs/losses.pytorch.ipynb 108
class HuberQLoss(BasePointLoss):
"""Huberized Quantile Loss
@@ -2872,8 +2915,9 @@ def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -2883,6 +2927,7 @@ def __call__(
**Returns:**
`huber_qloss`: tensor (single value).
"""
+
error = y_hat - y
zero_error = torch.zeros_like(error)
sq = torch.maximum(-error, zero_error)
@@ -2896,7 +2941,7 @@ def __call__(
weights = self._compute_weights(y=y, mask=mask)
return _weighted_mean(losses=losses, weights=weights)
-# %% ../../nbs/losses.pytorch.ipynb 112
+# %% ../../nbs/losses.pytorch.ipynb 113
class HuberMQLoss(BasePointLoss):
"""Huberized Multi-Quantile loss
@@ -2942,9 +2987,17 @@ def __init__(
def domain_map(self, y_hat: torch.Tensor):
"""
- Identity domain map [B,T,H,Q]/[B,H,Q]
+ Input:
+ Univariate: [B, H, 1 * Q]
+ Multivariate: [B, H, N * Q]
+
+ Output: [B, H, N, Q]
"""
- return y_hat
+ output = y_hat.reshape(
+ y_hat.shape[0], y_hat.shape[1], -1, self.outputsize_multiplier
+ )
+
+ return output
def _compute_weights(self, y, mask):
"""
@@ -2952,28 +3005,26 @@ def _compute_weights(self, y, mask):
Set horizon_weight to a ones[H] tensor if not set.
If set, check that it has the same length as the horizon in x.
"""
- if mask is None:
- mask = torch.ones_like(y, device=y.device)
- else:
- mask = mask.unsqueeze(1) # Add Q dimension.
if self.horizon_weight is None:
- self.horizon_weight = torch.ones(mask.shape[-1])
+ weights = torch.ones_like(mask)
else:
- assert mask.shape[-1] == len(
+ assert mask.shape[1] == len(
self.horizon_weight
), "horizon_weight must have same length as Y"
+ weights = self.horizon_weight.clone()
+ weights = weights[None, :, None, None].to(mask.device)
+ weights = torch.ones_like(mask, device=mask.device) * weights
- weights = self.horizon_weight.clone()
- weights = torch.ones_like(mask, device=mask.device) * weights.to(mask.device)
return weights * mask
def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -2983,35 +3034,33 @@ def __call__(
**Returns:**
`hmqloss`: tensor (single value).
"""
+ y = y.unsqueeze(-1)
+
+ if mask is not None:
+ mask = mask.unsqueeze(-1)
+ else:
+ mask = torch.ones_like(y, device=y.device)
+
+ error = y_hat - y
- error = y_hat - y.unsqueeze(-1)
zero_error = torch.zeros_like(error)
sq = torch.maximum(-error, torch.zeros_like(error))
s1_q = torch.maximum(error, torch.zeros_like(error))
+
+ quantiles = self.quantiles[None, None, None, :]
losses = F.huber_loss(
- self.quantiles * sq, zero_error, reduction="none", delta=self.delta
+ quantiles * sq, zero_error, reduction="none", delta=self.delta
) + F.huber_loss(
- (1 - self.quantiles) * s1_q, zero_error, reduction="none", delta=self.delta
+ (1 - quantiles) * s1_q, zero_error, reduction="none", delta=self.delta
)
- losses = (1 / len(self.quantiles)) * losses
-
- if y_hat.ndim == 3: # BaseWindows
- losses = losses.swapaxes(
- -2, -1
- ) # [B,H,Q] -> [B,Q,H] (needed for horizon weighting, H at the end)
- elif y_hat.ndim == 4: # BaseRecurrent
- losses = losses.swapaxes(-2, -1)
- losses = losses.swapaxes(
- -2, -3
- ) # [B,seq_len,H,Q] -> [B,Q,seq_len,H] (needed for horizon weighting, H at the end)
+ losses = (1 / len(quantiles)) * losses
- weights = self._compute_weights(y=losses, mask=mask) # Use losses for extra dim
- # NOTE: Weights do not have Q dimension.
+ weights = self._compute_weights(y=losses, mask=mask)
return _weighted_mean(losses=losses, weights=weights)
-# %% ../../nbs/losses.pytorch.ipynb 118
-class Accuracy(torch.nn.Module):
+# %% ../../nbs/losses.pytorch.ipynb 119
+class Accuracy(BasePointLoss):
"""Accuracy
Computes the accuracy between categorical `y` and `y_hat`.
@@ -3027,20 +3076,26 @@ def __init__(
):
super(Accuracy, self).__init__()
self.is_distribution_output = False
+ self.outputsize_multiplier = 1
def domain_map(self, y_hat: torch.Tensor):
"""
- Univariate loss operates in dimension [B,T,H]/[B,H]
- This changes the network's output from [B,H,1]->[B,H]
+ Input:
+ Univariate: [B, H, 1]
+ Multivariate: [B, H, N]
+
+ Output: [B, H, N]
"""
- return y_hat.squeeze(-1)
+
+ return y_hat
def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -3050,15 +3105,16 @@ def __call__(
**Returns:**
`accuracy`: tensor (single value).
"""
+
if mask is None:
mask = torch.ones_like(y_hat)
- measure = (y.unsqueeze(-1) == y_hat) * mask.unsqueeze(-1)
+ measure = (y == y_hat) * mask
accuracy = torch.mean(measure)
return accuracy
-# %% ../../nbs/losses.pytorch.ipynb 122
-class sCRPS(torch.nn.Module):
+# %% ../../nbs/losses.pytorch.ipynb 123
+class sCRPS(BasePointLoss):
"""Scaled Continues Ranked Probability Score
Calculates a scaled variation of the CRPS, as proposed by Rangapuram (2021),
@@ -3098,8 +3154,9 @@ def __call__(
self,
y: torch.Tensor,
y_hat: torch.Tensor,
+ y_insample: torch.Tensor,
mask: Union[torch.Tensor, None] = None,
- ):
+ ) -> torch.Tensor:
"""
**Parameters:**
`y`: tensor, Actual values.
@@ -3109,7 +3166,7 @@ def __call__(
**Returns:**
`scrps`: tensor (single value).
"""
- mql = self.mql(y=y, y_hat=y_hat, mask=mask)
+ mql = self.mql(y=y, y_hat=y_hat, mask=mask, y_insample=y_insample)
norm = torch.sum(torch.abs(y))
unmean = torch.sum(mask)
scrps = 2 * mql * unmean / (norm + 1e-5)
diff --git a/neuralforecast/models/autoformer.py b/neuralforecast/models/autoformer.py
index 069e3641d..715f73694 100644
--- a/neuralforecast/models/autoformer.py
+++ b/neuralforecast/models/autoformer.py
@@ -14,7 +14,7 @@
import torch.nn.functional as F
from ..common._modules import DataEmbedding, SeriesDecomp
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..losses.pytorch import MAE
@@ -394,7 +394,7 @@ def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
return x, trend
# %% ../../nbs/models.autoformer.ipynb 10
-class Autoformer(BaseWindows):
+class Autoformer(BaseModel):
"""Autoformer
The Autoformer model tackles the challenge of finding reliable dependencies on intricate temporal patterns of long-horizon forecasting.
@@ -453,10 +453,13 @@ class Autoformer(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -628,13 +631,9 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
insample_y = windows_batch["insample_y"]
- # insample_mask = windows_batch['insample_mask']
- # hist_exog = windows_batch['hist_exog']
- # stat_exog = windows_batch['stat_exog']
futr_exog = windows_batch["futr_exog"]
# Parse inputs
- insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]
if self.futr_exog_size > 0:
x_mark_enc = futr_exog[:, : self.input_size, :]
x_mark_dec = futr_exog[:, -(self.label_len + self.h) :, :]
@@ -667,5 +666,6 @@ def forward(self, windows_batch):
# final
dec_out = trend_part + seasonal_part
- forecast = self.loss.domain_map(dec_out[:, -self.h :])
+ forecast = dec_out[:, -self.h :]
+
return forecast
diff --git a/neuralforecast/models/bitcn.py b/neuralforecast/models/bitcn.py
index cf4fc91df..bd656b775 100644
--- a/neuralforecast/models/bitcn.py
+++ b/neuralforecast/models/bitcn.py
@@ -12,7 +12,7 @@
import numpy as np
from neuralforecast.losses.pytorch import MAE
-from neuralforecast.common._base_windows import BaseWindows
+from neuralforecast.common._base_model import BaseModel
# %% ../../nbs/models.bitcn.ipynb 8
class CustomConv1d(nn.Module):
@@ -84,7 +84,7 @@ def forward(self, x):
return (h_prev + h_next, out_prev + out_next)
# %% ../../nbs/models.bitcn.ipynb 10
-class BiTCN(BaseWindows):
+class BiTCN(BaseModel):
"""BiTCN
Bidirectional Temporal Convolutional Network (BiTCN) is a forecasting architecture based on two temporal convolutional networks (TCNs). The first network ('forward') encodes future covariates of the time series, whereas the second network ('backward') encodes past observations and covariates. This is a univariate model.
@@ -108,7 +108,7 @@ class BiTCN(BaseWindows):
`batch_size`: int=32, number of different series in each batch.
`valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
`windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
- `inference_windows_batch_size`: int=-1, number of windows to sample in each inference batch, -1 uses all.
+ `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.
`start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
@@ -128,10 +128,13 @@ class BiTCN(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -274,7 +277,7 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
- x = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1]
+ x = windows_batch["insample_y"].contiguous() # [B, L, 1]
hist_exog = windows_batch["hist_exog"] # [B, L, X]
futr_exog = windows_batch["futr_exog"] # [B, L + h, F]
stat_exog = windows_batch["stat_exog"] # [B, S]
@@ -345,9 +348,6 @@ def forward(self, windows_batch):
# Output layer to create forecasts
x = x.permute(0, 2, 1) # [B, 3 * hidden_size, h] -> [B, h, 3 * hidden_size]
- x = self.output_lin(x) # [B, h, 3 * hidden_size] -> [B, h, n_outputs]
-
- # Map to output domain
- forecast = self.loss.domain_map(x)
+ forecast = self.output_lin(x) # [B, h, 3 * hidden_size] -> [B, h, n_outputs]
return forecast
diff --git a/neuralforecast/models/deepar.py b/neuralforecast/models/deepar.py
index 6b16f51d1..15fe40b3f 100644
--- a/neuralforecast/models/deepar.py
+++ b/neuralforecast/models/deepar.py
@@ -4,15 +4,13 @@
__all__ = ['Decoder', 'DeepAR']
# %% ../../nbs/models.deepar.ipynb 4
-import numpy as np
-
import torch
import torch.nn as nn
from typing import Optional
-from ..common._base_windows import BaseWindows
-from ..losses.pytorch import DistributionLoss, MQLoss
+from ..common._base_model import BaseModel
+from ..losses.pytorch import DistributionLoss, MAE
# %% ../../nbs/models.deepar.ipynb 7
class Decoder(nn.Module):
@@ -53,7 +51,7 @@ def forward(self, x):
return self.layers(x)
# %% ../../nbs/models.deepar.ipynb 8
-class DeepAR(BaseWindows):
+class DeepAR(BaseModel):
"""DeepAR
**Parameters:**
@@ -100,10 +98,11 @@ class DeepAR(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = False
EXOGENOUS_STAT = True
+ MULTIVARIATE = False
+ RECURRENT = True
def __init__(
self,
@@ -122,7 +121,7 @@ def __init__(
loss=DistributionLoss(
distribution="StudentT", level=[80, 90], return_params=False
),
- valid_loss=MQLoss(level=[80, 90]),
+ valid_loss=MAE(),
max_steps: int = 1000,
learning_rate: float = 1e-3,
num_lr_decays: int = 3,
@@ -148,19 +147,6 @@ def __init__(
if exclude_insample_y:
raise Exception("DeepAR has no possibility for excluding y.")
- if not loss.is_distribution_output:
- raise Exception("DeepAR only supports distributional outputs.")
-
- if str(type(valid_loss)) not in [
- ""
- ]:
- raise Exception("DeepAR only supports MQLoss as validation loss.")
-
- if loss.return_params:
- raise Exception(
- "DeepAR does not return distribution parameters due to Monte Carlo sampling."
- )
-
# Inherit BaseWindows class
super(DeepAR, self).__init__(
h=h,
@@ -193,8 +179,7 @@ def __init__(
**trainer_kwargs
)
- self.horizon_backup = self.h # Used because h=0 during training
- self.trajectory_samples = trajectory_samples
+ self.n_samples = trajectory_samples
# LSTM
self.encoder_n_layers = lstm_n_layers
@@ -205,6 +190,8 @@ def __init__(
input_encoder = 1 + self.futr_exog_size + self.stat_exog_size
# Instantiate model
+ self.rnn_state = None
+ self.maintain_state = False
self.hist_encoder = nn.LSTM(
input_size=input_encoder,
hidden_size=self.encoder_hidden_size,
@@ -221,206 +208,17 @@ def __init__(
hidden_layers=decoder_hidden_layers,
)
- # Override BaseWindows method
- def training_step(self, batch, batch_idx):
-
- # During training h=0
- self.h = 0
- y_idx = batch["y_idx"]
-
- # Create and normalize windows [Ws, L, C]
- windows = self._create_windows(batch, step="train")
- original_insample_y = windows["temporal"][
- :, :, y_idx
- ].clone() # windows: [B, L, Feature] -> [B, L]
- original_insample_y = original_insample_y[
- :, 1:
- ] # Remove first (shift in DeepAr, cell at t outputs t+1)
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- insample_y, insample_mask, _, _, _, futr_exog, stat_exog = self._parse_windows(
- batch, windows
- )
-
- windows_batch = dict(
- insample_y=insample_y, # [Ws, L]
- insample_mask=insample_mask, # [Ws, L]
- futr_exog=futr_exog, # [Ws, L+H]
- hist_exog=None, # None
- stat_exog=stat_exog,
- y_idx=y_idx,
- ) # [Ws, 1]
-
- # Model Predictions
- output = self.train_forward(windows_batch)
-
- if self.loss.is_distribution_output:
- _, y_loc, y_scale = self._inv_normalization(
- y_hat=original_insample_y,
- temporal_cols=batch["temporal_cols"],
- y_idx=y_idx,
- )
- outsample_y = original_insample_y
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- mask = insample_mask[
- :, 1:
- ].clone() # Remove first (shift in DeepAr, cell at t outputs t+1)
- loss = self.loss(y=outsample_y, distr_args=distr_args, mask=mask)
- else:
- raise Exception("DeepAR only supports distributional outputs.")
-
- if torch.isnan(loss):
- print("Model Parameters", self.hparams)
- print("insample_y", torch.isnan(insample_y).sum())
- print("outsample_y", torch.isnan(outsample_y).sum())
- print("output", torch.isnan(output).sum())
- raise Exception("Loss is NaN, training stopped.")
-
- self.log(
- "train_loss",
- loss.item(),
- batch_size=outsample_y.size(0),
- prog_bar=True,
- on_epoch=True,
- )
- self.train_trajectories.append((self.global_step, loss.item()))
-
- self.h = self.horizon_backup # Restore horizon
- return loss
-
- def validation_step(self, batch, batch_idx):
-
- self.h == self.horizon_backup
-
- if self.val_size == 0:
- return np.nan
-
- # TODO: Hack to compute number of windows
- windows = self._create_windows(batch, step="val")
- n_windows = len(windows["temporal"])
- y_idx = batch["y_idx"]
-
- # Number of windows in batch
- windows_batch_size = self.inference_windows_batch_size
- if windows_batch_size < 0:
- windows_batch_size = n_windows
- n_batches = int(np.ceil(n_windows / windows_batch_size))
-
- valid_losses = []
- batch_sizes = []
- for i in range(n_batches):
- # Create and normalize windows [Ws, L+H, C]
- w_idxs = np.arange(
- i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)
- )
- windows = self._create_windows(batch, step="val", w_idxs=w_idxs)
- original_outsample_y = torch.clone(windows["temporal"][:, -self.h :, 0])
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- insample_y, insample_mask, _, outsample_mask, _, futr_exog, stat_exog = (
- self._parse_windows(batch, windows)
- )
- windows_batch = dict(
- insample_y=insample_y,
- insample_mask=insample_mask,
- futr_exog=futr_exog,
- hist_exog=None,
- stat_exog=stat_exog,
- temporal_cols=batch["temporal_cols"],
- y_idx=y_idx,
- )
-
- # Model Predictions
- output_batch = self(windows_batch)
- # Monte Carlo already returns y_hat with mean and quantiles
- output_batch = output_batch[:, :, 1:] # Remove mean
- valid_loss_batch = self.valid_loss(
- y=original_outsample_y, y_hat=output_batch, mask=outsample_mask
- )
- valid_losses.append(valid_loss_batch)
- batch_sizes.append(len(output_batch))
-
- valid_loss = torch.stack(valid_losses)
- batch_sizes = torch.tensor(batch_sizes, device=valid_loss.device)
- batch_size = torch.sum(batch_sizes)
- valid_loss = torch.sum(valid_loss * batch_sizes) / batch_size
-
- if torch.isnan(valid_loss):
- raise Exception("Loss is NaN, training stopped.")
-
- self.log(
- "valid_loss",
- valid_loss.item(),
- batch_size=batch_size,
- prog_bar=True,
- on_epoch=True,
- )
- self.validation_step_outputs.append(valid_loss)
- return valid_loss
-
- def predict_step(self, batch, batch_idx):
-
- self.h == self.horizon_backup
-
- # TODO: Hack to compute number of windows
- windows = self._create_windows(batch, step="predict")
- n_windows = len(windows["temporal"])
- y_idx = batch["y_idx"]
-
- # Number of windows in batch
- windows_batch_size = self.inference_windows_batch_size
- if windows_batch_size < 0:
- windows_batch_size = n_windows
- n_batches = int(np.ceil(n_windows / windows_batch_size))
-
- y_hats = []
- for i in range(n_batches):
- # Create and normalize windows [Ws, L+H, C]
- w_idxs = np.arange(
- i * windows_batch_size, min((i + 1) * windows_batch_size, n_windows)
- )
- windows = self._create_windows(batch, step="predict", w_idxs=w_idxs)
- windows = self._normalization(windows=windows, y_idx=y_idx)
-
- # Parse windows
- insample_y, insample_mask, _, _, _, futr_exog, stat_exog = (
- self._parse_windows(batch, windows)
- )
- windows_batch = dict(
- insample_y=insample_y, # [Ws, L]
- insample_mask=insample_mask, # [Ws, L]
- futr_exog=futr_exog, # [Ws, L+H]
- stat_exog=stat_exog,
- temporal_cols=batch["temporal_cols"],
- y_idx=y_idx,
- )
-
- # Model Predictions
- y_hat = self(windows_batch)
- # Monte Carlo already returns y_hat with mean and quantiles
- y_hats.append(y_hat)
- y_hat = torch.cat(y_hats, dim=0)
- return y_hat
-
- def train_forward(self, windows_batch):
+ def forward(self, windows_batch):
# Parse windows_batch
- encoder_input = windows_batch["insample_y"][:, :, None] # <- [B,T,1]
+ encoder_input = windows_batch["insample_y"] # <- [B, T, 1]
futr_exog = windows_batch["futr_exog"]
stat_exog = windows_batch["stat_exog"]
- # [B, input_size-1, X]
- encoder_input = encoder_input[
- :, :-1, :
- ] # Remove last (shift in DeepAr, cell at t outputs t+1)
_, input_size = encoder_input.shape[:2]
if self.futr_exog_size > 0:
- # Shift futr_exog (t predicts t+1, last output is outside insample_y)
- encoder_input = torch.cat((encoder_input, futr_exog[:, 1:, :]), dim=2)
+ encoder_input = torch.cat((encoder_input, futr_exog), dim=2)
+
if self.stat_exog_size > 0:
stat_exog = stat_exog.unsqueeze(1).repeat(
1, input_size, 1
@@ -428,114 +226,20 @@ def train_forward(self, windows_batch):
encoder_input = torch.cat((encoder_input, stat_exog), dim=2)
# RNN forward
- hidden_state, _ = self.hist_encoder(
- encoder_input
+ if self.maintain_state:
+ rnn_state = self.rnn_state
+ else:
+ rnn_state = None
+
+ hidden_state, rnn_state = self.hist_encoder(
+ encoder_input, rnn_state
) # [B, input_size-1, rnn_hidden_state]
+ if self.maintain_state:
+ self.rnn_state = rnn_state
+
# Decoder forward
output = self.decoder(hidden_state) # [B, input_size-1, output_size]
- output = self.loss.domain_map(output)
- return output
-
- def forward(self, windows_batch):
-
- # Parse windows_batch
- encoder_input = windows_batch["insample_y"][:, :, None] # <- [B,L,1]
- futr_exog = windows_batch["futr_exog"] # <- [B,L+H, n_f]
- stat_exog = windows_batch["stat_exog"]
- y_idx = windows_batch["y_idx"]
- # [B, seq_len, X]
- batch_size, input_size = encoder_input.shape[:2]
- if self.futr_exog_size > 0:
- futr_exog_input_window = futr_exog[
- :, 1 : input_size + 1, :
- ] # Align y_t with futr_exog_t+1
- encoder_input = torch.cat((encoder_input, futr_exog_input_window), dim=2)
- if self.stat_exog_size > 0:
- stat_exog_input_window = stat_exog.unsqueeze(1).repeat(
- 1, input_size, 1
- ) # [B, S] -> [B, input_size, S]
- encoder_input = torch.cat((encoder_input, stat_exog_input_window), dim=2)
-
- # Use input_size history to predict first h of the forecasting window
- _, h_c_tuple = self.hist_encoder(encoder_input)
- h_n = h_c_tuple[0] # [n_layers, B, lstm_hidden_state]
- c_n = h_c_tuple[1] # [n_layers, B, lstm_hidden_state]
-
- # Vectorizes trajectory samples in batch dimension [1]
- h_n = torch.repeat_interleave(
- h_n, self.trajectory_samples, 1
- ) # [n_layers, B*trajectory_samples, rnn_hidden_state]
- c_n = torch.repeat_interleave(
- c_n, self.trajectory_samples, 1
- ) # [n_layers, B*trajectory_samples, rnn_hidden_state]
-
- # Scales for inverse normalization
- y_scale = (
- self.scaler.x_scale[:, 0, [y_idx]].squeeze(-1).to(encoder_input.device)
- )
- y_loc = self.scaler.x_shift[:, 0, [y_idx]].squeeze(-1).to(encoder_input.device)
- y_scale = torch.repeat_interleave(y_scale, self.trajectory_samples, 0)
- y_loc = torch.repeat_interleave(y_loc, self.trajectory_samples, 0)
-
- # Recursive strategy prediction
- quantiles = self.loss.quantiles.to(encoder_input.device)
- y_hat = torch.zeros(
- batch_size, self.h, len(quantiles) + 1, device=encoder_input.device
- )
- for tau in range(self.h):
- # Decoder forward
- last_layer_h = h_n[-1] # [B*trajectory_samples, lstm_hidden_state]
- output = self.decoder(last_layer_h)
- output = self.loss.domain_map(output)
-
- # Inverse normalization
- distr_args = self.loss.scale_decouple(
- output=output, loc=y_loc, scale=y_scale
- )
- # Add horizon (1) dimension
- distr_args = list(distr_args)
- for i in range(len(distr_args)):
- distr_args[i] = distr_args[i].unsqueeze(-1)
- distr_args = tuple(distr_args)
- samples_tau, _, _ = self.loss.sample(distr_args=distr_args, num_samples=1)
- samples_tau = samples_tau.reshape(batch_size, self.trajectory_samples)
- sample_mean = torch.mean(samples_tau, dim=-1).to(encoder_input.device)
- quants = torch.quantile(input=samples_tau, q=quantiles, dim=-1).to(
- encoder_input.device
- )
- y_hat[:, tau, 0] = sample_mean
- y_hat[:, tau, 1:] = quants.permute((1, 0)) # [Q, B] -> [B, Q]
-
- # Stop if already in the last step (no need to predict next step)
- if tau + 1 == self.h:
- continue
- # Normalize to use as input
- encoder_input = self.scaler.scaler(
- samples_tau.flatten(), y_loc, y_scale
- ) # [B*n_samples]
- encoder_input = encoder_input[:, None, None] # [B*n_samples, 1, 1]
-
- # Update input
- if self.futr_exog_size > 0:
- futr_exog_tau = futr_exog[:, [input_size + tau + 1], :] # [B, 1, n_f]
- futr_exog_tau = torch.repeat_interleave(
- futr_exog_tau, self.trajectory_samples, 0
- ) # [B*n_samples, 1, n_f]
- encoder_input = torch.cat(
- (encoder_input, futr_exog_tau), dim=2
- ) # [B*n_samples, 1, 1+n_f]
- if self.stat_exog_size > 0:
- stat_exog_tau = torch.repeat_interleave(
- stat_exog, self.trajectory_samples, 0
- ) # [B*n_samples, n_s]
- encoder_input = torch.cat(
- (encoder_input, stat_exog_tau[:, None, :]), dim=2
- ) # [B*n_samples, 1, 1+n_f+n_s]
-
- _, h_c_tuple = self.hist_encoder(encoder_input, (h_n, c_n))
- h_n = h_c_tuple[0] # [n_layers, B, rnn_hidden_state]
- c_n = h_c_tuple[1] # [n_layers, B, rnn_hidden_state]
-
- return y_hat
+ # Return only horizon part
+ return output[:, -self.h :]
diff --git a/neuralforecast/models/deepnpts.py b/neuralforecast/models/deepnpts.py
index 3edeb0596..016c7e1eb 100644
--- a/neuralforecast/models/deepnpts.py
+++ b/neuralforecast/models/deepnpts.py
@@ -11,11 +11,11 @@
from typing import Optional
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..losses.pytorch import MAE
# %% ../../nbs/models.deepnpts.ipynb 6
-class DeepNPTS(BaseWindows):
+class DeepNPTS(BaseModel):
"""DeepNPTS
Deep Non-Parametric Time Series Forecaster (`DeepNPTS`) is a baseline model for time-series forecasting. This model generates predictions by (weighted) sampling from the empirical distribution according to a learnable strategy. The strategy is learned by exploiting the information across multiple related time series.
@@ -61,10 +61,13 @@ class DeepNPTS(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -105,12 +108,12 @@ def __init__(
if exclude_insample_y:
raise Exception("DeepNPTS has no possibility for excluding y.")
- if not isinstance(loss, losses.BasePointLoss):
+ if loss.outputsize_multiplier > 1:
raise Exception(
"DeepNPTS only supports point loss functions (MAE, MSE, etc) as loss function."
)
- if not isinstance(valid_loss, losses.BasePointLoss):
+ if valid_loss is not None and not isinstance(valid_loss, losses.BasePointLoss):
raise Exception(
"DeepNPTS only supports point loss functions (MAE, MSE, etc) as valid loss function."
)
@@ -172,13 +175,13 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
- x = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1]
+ x = windows_batch["insample_y"] # [B, L, 1]
hist_exog = windows_batch["hist_exog"] # [B, L, X]
futr_exog = windows_batch["futr_exog"] # [B, L + h, F]
stat_exog = windows_batch["stat_exog"] # [B, S]
batch_size, seq_len = x.shape[:2] # B = batch_size, L = seq_len
- insample_y = windows_batch["insample_y"].unsqueeze(-1)
+ insample_y = windows_batch["insample_y"]
# Concatenate x_t with future exogenous of input
if self.futr_exog_size > 0:
@@ -220,8 +223,6 @@ def forward(self, windows_batch):
x = (
F.softmax(weights, dim=1) * insample_y
) # [B, L, h] * [B, L, 1] = [B, L, h]
- output = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1]
-
- forecast = self.loss.domain_map(output) # [B, h, 1] -> [B, h, 1]
+ forecast = torch.sum(x, dim=1).unsqueeze(-1) # [B, L, h] -> [B, h, 1]
return forecast
diff --git a/neuralforecast/models/dilated_rnn.py b/neuralforecast/models/dilated_rnn.py
index 96094c961..8f8504863 100644
--- a/neuralforecast/models/dilated_rnn.py
+++ b/neuralforecast/models/dilated_rnn.py
@@ -10,7 +10,7 @@
import torch.nn as nn
from ..losses.pytorch import MAE
-from ..common._base_recurrent import BaseRecurrent
+from ..common._base_model import BaseModel
from ..common._modules import MLP
# %% ../../nbs/models.dilated_rnn.ipynb 7
@@ -256,8 +256,8 @@ def _split_outputs(self, dilated_outputs, rate):
for i in range(rate)
]
- interleaved = torch.stack((blocks)).transpose(1, 0).contiguous()
- interleaved = interleaved.view(
+ interleaved = torch.stack((blocks)).transpose(1, 0)
+ interleaved = interleaved.reshape(
dilated_outputs.size(0) * rate, batchsize, dilated_outputs.size(2)
)
return interleaved
@@ -286,7 +286,7 @@ def _prepare_inputs(self, inputs, rate):
return dilated_inputs
# %% ../../nbs/models.dilated_rnn.ipynb 12
-class DilatedRNN(BaseRecurrent):
+class DilatedRNN(BaseModel):
"""DilatedRNN
**Parameters:**
@@ -325,25 +325,29 @@ class DilatedRNN(BaseRecurrent):
"""
# Class attributes
- SAMPLING_TYPE = "recurrent"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
h: int,
- input_size: int = -1,
+ input_size: int,
inference_input_size: int = -1,
cell_type: str = "LSTM",
dilations: List[List[int]] = [[1, 2], [4, 8]],
- encoder_hidden_size: int = 200,
+ encoder_hidden_size: int = 128,
context_size: int = 10,
- decoder_hidden_size: int = 200,
+ decoder_hidden_size: int = 128,
decoder_layers: int = 2,
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
loss=MAE(),
valid_loss=None,
max_steps: int = 1000,
@@ -353,6 +357,9 @@ def __init__(
val_check_steps: int = 100,
batch_size=32,
valid_batch_size: Optional[int] = None,
+ windows_batch_size=128,
+ inference_windows_batch_size=1024,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "robust",
random_seed: int = 1,
@@ -367,7 +374,10 @@ def __init__(
super(DilatedRNN, self).__init__(
h=h,
input_size=input_size,
- inference_input_size=inference_input_size,
+ futr_exog_list=futr_exog_list,
+ hist_exog_list=hist_exog_list,
+ stat_exog_list=stat_exog_list,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -377,12 +387,13 @@ def __init__(
val_check_steps=val_check_steps,
batch_size=batch_size,
valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
+ step_size=step_size,
scaler_type=scaler_type,
- futr_exog_list=futr_exog_list,
- hist_exog_list=hist_exog_list,
- stat_exog_list=stat_exog_list,
- drop_last_loader=drop_last_loader,
random_seed=random_seed,
+ drop_last_loader=drop_last_loader,
optimizer=optimizer,
optimizer_kwargs=optimizer_kwargs,
lr_scheduler=lr_scheduler,
@@ -404,14 +415,14 @@ def __init__(
self.decoder_layers = decoder_layers
# RNN input size (1 for target variable y)
- input_encoder = 1 + self.hist_exog_size + self.stat_exog_size
+ input_encoder = (
+ 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size
+ )
# Instantiate model
layers = []
for grp_num in range(len(self.dilations)):
- if grp_num == 0:
- input_encoder = 1 + self.hist_exog_size + self.stat_exog_size
- else:
+ if grp_num > 0:
input_encoder = self.encoder_hidden_size
layer = DRNN(
input_encoder,
@@ -425,14 +436,11 @@ def __init__(
self.rnn_stack = nn.Sequential(*layers)
# Context adapter
- self.context_adapter = nn.Linear(
- in_features=self.encoder_hidden_size + self.futr_exog_size * h,
- out_features=self.context_size * h,
- )
+ self.context_adapter = nn.Linear(in_features=self.input_size, out_features=h)
# Decoder MLP
self.mlp_decoder = MLP(
- in_features=self.context_size + self.futr_exog_size,
+ in_features=self.encoder_hidden_size + self.futr_exog_size,
out_features=self.loss.outputsize_multiplier,
hidden_size=self.decoder_hidden_size,
num_layers=self.decoder_layers,
@@ -443,26 +451,30 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
- encoder_input = windows_batch["insample_y"] # [B, seq_len, 1]
- futr_exog = windows_batch["futr_exog"]
- hist_exog = windows_batch["hist_exog"]
- stat_exog = windows_batch["stat_exog"]
+ encoder_input = windows_batch["insample_y"] # [B, L, 1]
+ futr_exog = windows_batch["futr_exog"] # [B, L + h, F]
+ hist_exog = windows_batch["hist_exog"] # [B, L, X]
+ stat_exog = windows_batch["stat_exog"] # [B, S]
# Concatenate y, historic and static inputs
- # [B, C, seq_len, 1] -> [B, seq_len, C]
- # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]
batch_size, seq_len = encoder_input.shape[:2]
if self.hist_exog_size > 0:
- hist_exog = hist_exog.permute(0, 2, 1, 3).squeeze(
- -1
- ) # [B, X, seq_len, 1] -> [B, seq_len, X]
- encoder_input = torch.cat((encoder_input, hist_exog), dim=2)
+ encoder_input = torch.cat(
+ (encoder_input, hist_exog), dim=2
+ ) # [B, L, 1] + [B, L, X] -> [B, L, 1 + X]
if self.stat_exog_size > 0:
stat_exog = stat_exog.unsqueeze(1).repeat(
1, seq_len, 1
- ) # [B, S] -> [B, seq_len, S]
- encoder_input = torch.cat((encoder_input, stat_exog), dim=2)
+ ) # [B, S] -> [B, L, S]
+ encoder_input = torch.cat(
+ (encoder_input, stat_exog), dim=2
+ ) # [B, L, 1 + X] + [B, L, S] -> [B, L, 1 + X + S]
+
+ if self.futr_exog_size > 0:
+ encoder_input = torch.cat(
+ (encoder_input, futr_exog[:, :seq_len]), dim=2
+ ) # [B, L, 1 + X + S] + [B, L, F] -> [B, L, 1 + X + S + F]
# DilatedRNN forward
for layer_num in range(len(self.rnn_stack)):
@@ -472,24 +484,21 @@ def forward(self, windows_batch):
output += residual
encoder_input = output
- if self.futr_exog_size > 0:
- futr_exog = futr_exog.permute(0, 2, 3, 1)[
- :, :, 1:, :
- ] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]
- encoder_input = torch.cat(
- (encoder_input, futr_exog.reshape(batch_size, seq_len, -1)), dim=2
- )
-
# Context adapter
- context = self.context_adapter(encoder_input)
- context = context.reshape(batch_size, seq_len, self.h, self.context_size)
+ output = output.permute(0, 2, 1) # [B, L, C] -> [B, C, L]
+ context = self.context_adapter(output) # [B, C, L] -> [B, C, h]
# Residual connection with futr_exog
if self.futr_exog_size > 0:
- context = torch.cat((context, futr_exog), dim=-1)
+ futr_exog_futr = futr_exog[:, seq_len:].permute(
+ 0, 2, 1
+ ) # [B, h, F] -> [B, F, h]
+ context = torch.cat(
+ (context, futr_exog_futr), dim=1
+ ) # [B, C, h] + [B, F, h] = [B, C + F, h]
# Final forecast
- output = self.mlp_decoder(context)
- output = self.loss.domain_map(output)
+ context = context.permute(0, 2, 1) # [B, C + F, h] -> [B, h, C + F]
+ output = self.mlp_decoder(context) # [B, h, C + F] -> [B, h, n_output]
return output
diff --git a/neuralforecast/models/dlinear.py b/neuralforecast/models/dlinear.py
index 3af5f11c0..3b87f267a 100644
--- a/neuralforecast/models/dlinear.py
+++ b/neuralforecast/models/dlinear.py
@@ -9,7 +9,7 @@
import torch
import torch.nn as nn
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..losses.pytorch import MAE
@@ -48,7 +48,7 @@ def forward(self, x):
return res, moving_mean
# %% ../../nbs/models.dlinear.ipynb 10
-class DLinear(BaseWindows):
+class DLinear(BaseModel):
"""DLinear
*Parameters:*
@@ -86,10 +86,13 @@ class DLinear(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -175,11 +178,7 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
- insample_y = windows_batch["insample_y"]
- # insample_mask = windows_batch['insample_mask']
- # hist_exog = windows_batch['hist_exog']
- # stat_exog = windows_batch['stat_exog']
- # futr_exog = windows_batch['futr_exog']
+ insample_y = windows_batch["insample_y"].squeeze(-1)
# Parse inputs
batch_size = len(insample_y)
@@ -191,5 +190,4 @@ def forward(self, windows_batch):
# Final
forecast = trend_part + seasonal_part
forecast = forecast.reshape(batch_size, self.h, self.loss.outputsize_multiplier)
- forecast = self.loss.domain_map(forecast)
return forecast
diff --git a/neuralforecast/models/fedformer.py b/neuralforecast/models/fedformer.py
index 7cfe3c5a6..2393a1aae 100644
--- a/neuralforecast/models/fedformer.py
+++ b/neuralforecast/models/fedformer.py
@@ -4,7 +4,7 @@
__all__ = ['LayerNorm', 'AutoCorrelationLayer', 'EncoderLayer', 'Encoder', 'DecoderLayer', 'Decoder', 'get_frequency_modes',
'FourierBlock', 'FourierCrossAttention', 'FEDformer']
-# %% ../../nbs/models.fedformer.ipynb 5
+# %% ../../nbs/models.fedformer.ipynb 6
import numpy as np
from typing import Optional
@@ -14,11 +14,11 @@
from ..common._modules import DataEmbedding
from ..common._modules import SeriesDecomp
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..losses.pytorch import MAE
-# %% ../../nbs/models.fedformer.ipynb 7
+# %% ../../nbs/models.fedformer.ipynb 8
class LayerNorm(nn.Module):
"""
Special designed layernorm for the seasonal part
@@ -66,7 +66,7 @@ def forward(self, queries, keys, values, attn_mask):
return self.out_projection(out), attn
-# %% ../../nbs/models.fedformer.ipynb 8
+# %% ../../nbs/models.fedformer.ipynb 9
class EncoderLayer(nn.Module):
"""
FEDformer encoder layer with the progressive decomposition architecture
@@ -234,7 +234,7 @@ def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
x = self.projection(x)
return x, trend
-# %% ../../nbs/models.fedformer.ipynb 9
+# %% ../../nbs/models.fedformer.ipynb 10
def get_frequency_modes(seq_len, modes=64, mode_select_method="random"):
"""
Get modes on frequency domain:
@@ -390,8 +390,8 @@ def forward(self, q, k, v, mask):
)
return (out, None)
-# %% ../../nbs/models.fedformer.ipynb 11
-class FEDformer(BaseWindows):
+# %% ../../nbs/models.fedformer.ipynb 12
+class FEDformer(BaseModel):
"""FEDformer
The FEDformer model tackles the challenge of finding reliable dependencies on intricate temporal patterns of long-horizon forecasting.
@@ -449,10 +449,13 @@ class FEDformer(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -623,13 +626,9 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
insample_y = windows_batch["insample_y"]
- # insample_mask = windows_batch['insample_mask']
- # hist_exog = windows_batch['hist_exog']
- # stat_exog = windows_batch['stat_exog']
futr_exog = windows_batch["futr_exog"]
# Parse inputs
- insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]
if self.futr_exog_size > 0:
x_mark_enc = futr_exog[:, : self.input_size, :]
x_mark_dec = futr_exog[:, -(self.label_len + self.h) :, :]
@@ -663,6 +662,6 @@ def forward(self, windows_batch):
)
# final
dec_out = trend_part + seasonal_part
+ forecast = dec_out[:, -self.h :]
- forecast = self.loss.domain_map(dec_out[:, -self.h :])
return forecast
diff --git a/neuralforecast/models/gru.py b/neuralforecast/models/gru.py
index f45aa7576..9cb5dd2b2 100644
--- a/neuralforecast/models/gru.py
+++ b/neuralforecast/models/gru.py
@@ -11,11 +11,11 @@
import torch.nn as nn
from ..losses.pytorch import MAE
-from ..common._base_recurrent import BaseRecurrent
+from ..common._base_model import BaseModel
from ..common._modules import MLP
# %% ../../nbs/models.gru.ipynb 8
-class GRU(BaseRecurrent):
+class GRU(BaseModel):
"""GRU
Multi Layer Recurrent Network with Gated Units (GRU), and
@@ -23,7 +23,7 @@ class GRU(BaseRecurrent):
using ADAM stochastic gradient descent. The network accepts static, historic
and future exogenous data, flattens the inputs.
- **Parameters:**
+ **Parameters:**
`h`: int, forecast horizon.
`input_size`: int, maximum sequence length for truncated train backpropagation. Default -1 uses all history.
`inference_input_size`: int, maximum sequence length for truncated inference. Default -1 uses all history.
@@ -32,7 +32,7 @@ class GRU(BaseRecurrent):
`encoder_activation`: Optional[str]=None, Deprecated. Activation function in GRU is frozen in PyTorch.
`encoder_bias`: bool=True, whether or not to use biases b_ih, b_hh within GRU units.
`encoder_dropout`: float=0., dropout regularization applied to GRU outputs.
- `context_size`: int=10, size of context vector for each timestamp on the forecasting window.
+ `context_size`: deprecated.
`decoder_hidden_size`: int=200, size of hidden layer for the MLP decoder.
`decoder_layers`: int=2, number of layers for the MLP decoder.
`futr_exog_list`: str list, future exogenous columns.
@@ -60,10 +60,13 @@ class GRU(BaseRecurrent):
"""
# Class attributes
- SAMPLING_TYPE = "recurrent"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ True # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -75,12 +78,14 @@ def __init__(
encoder_activation: Optional[str] = None,
encoder_bias: bool = True,
encoder_dropout: float = 0.0,
- context_size: int = 10,
- decoder_hidden_size: int = 200,
+ context_size: Optional[int] = None,
+ decoder_hidden_size: int = 128,
decoder_layers: int = 2,
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
+ recurrent=False,
loss=MAE(),
valid_loss=None,
max_steps: int = 1000,
@@ -90,6 +95,10 @@ def __init__(
val_check_steps: int = 100,
batch_size=32,
valid_batch_size: Optional[int] = None,
+ windows_batch_size=128,
+ inference_windows_batch_size=1024,
+ start_padding_enabled=False,
+ step_size: int = 1,
scaler_type: str = "robust",
random_seed=1,
drop_last_loader=False,
@@ -100,10 +109,16 @@ def __init__(
dataloader_kwargs=None,
**trainer_kwargs
):
+
+ self.RECURRENT = recurrent
+
super(GRU, self).__init__(
h=h,
input_size=input_size,
- inference_input_size=inference_input_size,
+ futr_exog_list=futr_exog_list,
+ hist_exog_list=hist_exog_list,
+ stat_exog_list=stat_exog_list,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -113,12 +128,13 @@ def __init__(
val_check_steps=val_check_steps,
batch_size=batch_size,
valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
+ step_size=step_size,
scaler_type=scaler_type,
- futr_exog_list=futr_exog_list,
- hist_exog_list=hist_exog_list,
- stat_exog_list=stat_exog_list,
- drop_last_loader=drop_last_loader,
random_seed=random_seed,
+ drop_last_loader=drop_last_loader,
optimizer=optimizer,
optimizer_kwargs=optimizer_kwargs,
lr_scheduler=lr_scheduler,
@@ -142,16 +158,23 @@ def __init__(
self.encoder_dropout = encoder_dropout
# Context adapter
- self.context_size = context_size
+ if context_size is not None:
+ warnings.warn(
+ "context_size is deprecated and will be removed in future versions."
+ )
# MLP decoder
self.decoder_hidden_size = decoder_hidden_size
self.decoder_layers = decoder_layers
# RNN input size (1 for target variable y)
- input_encoder = 1 + self.hist_exog_size + self.stat_exog_size
+ input_encoder = (
+ 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size
+ )
# Instantiate model
+ self.rnn_state = None
+ self.maintain_state = False
self.hist_encoder = nn.GRU(
input_size=input_encoder,
hidden_size=self.encoder_hidden_size,
@@ -161,69 +184,80 @@ def __init__(
batch_first=True,
)
- # Context adapter
- self.context_adapter = nn.Linear(
- in_features=self.encoder_hidden_size + self.futr_exog_size * h,
- out_features=self.context_size * h,
- )
-
# Decoder MLP
- self.mlp_decoder = MLP(
- in_features=self.context_size + self.futr_exog_size,
- out_features=self.loss.outputsize_multiplier,
- hidden_size=self.decoder_hidden_size,
- num_layers=self.decoder_layers,
- activation="ReLU",
- dropout=0.0,
- )
+ if self.RECURRENT:
+ self.proj = nn.Linear(
+ self.encoder_hidden_size, self.loss.outputsize_multiplier
+ )
+ else:
+ self.mlp_decoder = MLP(
+ in_features=self.encoder_hidden_size + self.futr_exog_size,
+ out_features=self.loss.outputsize_multiplier,
+ hidden_size=self.decoder_hidden_size,
+ num_layers=self.decoder_layers,
+ activation="ReLU",
+ dropout=0.0,
+ )
def forward(self, windows_batch):
# Parse windows_batch
encoder_input = windows_batch["insample_y"] # [B, seq_len, 1]
- futr_exog = windows_batch["futr_exog"]
- hist_exog = windows_batch["hist_exog"]
- stat_exog = windows_batch["stat_exog"]
+ futr_exog = windows_batch["futr_exog"] # [B, seq_len, F]
+ hist_exog = windows_batch["hist_exog"] # [B, seq_len, X]
+ stat_exog = windows_batch["stat_exog"] # [B, S]
# Concatenate y, historic and static inputs
- # [B, C, seq_len, 1] -> [B, seq_len, C]
- # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]
batch_size, seq_len = encoder_input.shape[:2]
if self.hist_exog_size > 0:
- hist_exog = hist_exog.permute(0, 2, 1, 3).squeeze(
- -1
- ) # [B, X, seq_len, 1] -> [B, seq_len, X]
- encoder_input = torch.cat((encoder_input, hist_exog), dim=2)
+ encoder_input = torch.cat(
+ (encoder_input, hist_exog), dim=2
+ ) # [B, seq_len, 1] + [B, seq_len, X] -> [B, seq_len, 1 + X]
if self.stat_exog_size > 0:
+ # print(encoder_input.shape)
stat_exog = stat_exog.unsqueeze(1).repeat(
1, seq_len, 1
) # [B, S] -> [B, seq_len, S]
- encoder_input = torch.cat((encoder_input, stat_exog), dim=2)
-
- # RNN forward
- hidden_state, _ = self.hist_encoder(
- encoder_input
- ) # [B, seq_len, rnn_hidden_state]
+ encoder_input = torch.cat(
+ (encoder_input, stat_exog), dim=2
+ ) # [B, seq_len, 1 + X] + [B, seq_len, S] -> [B, seq_len, 1 + X + S]
if self.futr_exog_size > 0:
- futr_exog = futr_exog.permute(0, 2, 3, 1)[
- :, :, 1:, :
- ] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]
- hidden_state = torch.cat(
- (hidden_state, futr_exog.reshape(batch_size, seq_len, -1)), dim=2
- )
+ encoder_input = torch.cat(
+ (encoder_input, futr_exog[:, :seq_len]), dim=2
+ ) # [B, seq_len, 1 + X + S] + [B, seq_len, F] -> [B, seq_len, 1 + X + S + F]
- # Context adapter
- context = self.context_adapter(hidden_state)
- context = context.reshape(batch_size, seq_len, self.h, self.context_size)
+ if self.RECURRENT:
+ if self.maintain_state:
+ rnn_state = self.rnn_state
+ else:
+ rnn_state = None
- # Residual connection with futr_exog
- if self.futr_exog_size > 0:
- context = torch.cat((context, futr_exog), dim=-1)
+ output, rnn_state = self.hist_encoder(
+ encoder_input, rnn_state
+ ) # [B, seq_len, rnn_hidden_state]
+ output = self.proj(
+ output
+ ) # [B, seq_len, rnn_hidden_state] -> [B, seq_len, n_output]
+ if self.maintain_state:
+ self.rnn_state = rnn_state
+ else:
+ hidden_state, _ = self.hist_encoder(
+ encoder_input, None
+ ) # [B, seq_len, rnn_hidden_state]
+ hidden_state = hidden_state[
+ :, -self.h :
+ ] # [B, seq_len, rnn_hidden_state] -> [B, h, rnn_hidden_state]
+
+ if self.futr_exog_size > 0:
+ futr_exog_futr = futr_exog[:, -self.h :] # [B, h, F]
+ hidden_state = torch.cat(
+ (hidden_state, futr_exog_futr), dim=-1
+ ) # [B, h, rnn_hidden_state] + [B, h, F] -> [B, h, rnn_hidden_state + F]
- # Final forecast
- output = self.mlp_decoder(context)
- output = self.loss.domain_map(output)
+ output = self.mlp_decoder(
+ hidden_state
+ ) # [B, h, rnn_hidden_state + F] -> [B, seq_len, n_output]
- return output
+ return output[:, -self.h :]
diff --git a/neuralforecast/models/informer.py b/neuralforecast/models/informer.py
index cb4ff2622..9782e4d8e 100644
--- a/neuralforecast/models/informer.py
+++ b/neuralforecast/models/informer.py
@@ -19,7 +19,7 @@
DataEmbedding,
AttentionLayer,
)
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..losses.pytorch import MAE
@@ -179,7 +179,7 @@ def forward(self, queries, keys, values, attn_mask):
return context.contiguous(), attn
# %% ../../nbs/models.informer.ipynb 11
-class Informer(BaseWindows):
+class Informer(BaseModel):
"""Informer
The Informer model tackles the vanilla Transformer computational complexity challenges for long-horizon forecasting.
@@ -237,10 +237,11 @@ class Informer(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False
+ RECURRENT = False
def __init__(
self,
@@ -411,14 +412,8 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
insample_y = windows_batch["insample_y"]
- # insample_mask = windows_batch['insample_mask']
- # hist_exog = windows_batch['hist_exog']
- # stat_exog = windows_batch['stat_exog']
-
futr_exog = windows_batch["futr_exog"]
- insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]
-
if self.futr_exog_size > 0:
x_mark_enc = futr_exog[:, : self.input_size, :]
x_mark_dec = futr_exog[:, -(self.label_len + self.h) :, :]
@@ -435,5 +430,5 @@ def forward(self, windows_batch):
dec_out = self.dec_embedding(x_dec, x_mark_dec)
dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
- forecast = self.loss.domain_map(dec_out[:, -self.h :])
+ forecast = dec_out[:, -self.h :]
return forecast
diff --git a/neuralforecast/models/itransformer.py b/neuralforecast/models/itransformer.py
index 121eac2b5..3d870a022 100644
--- a/neuralforecast/models/itransformer.py
+++ b/neuralforecast/models/itransformer.py
@@ -11,9 +11,9 @@
import numpy as np
from math import sqrt
-
+from typing import Optional
from ..losses.pytorch import MAE
-from ..common._base_multivariate import BaseMultivariate
+from ..common._base_model import BaseModel
from neuralforecast.common._modules import (
TransEncoder,
@@ -102,7 +102,7 @@ def forward(self, x, x_mark):
return self.dropout(x)
# %% ../../nbs/models.itransformer.ipynb 13
-class iTransformer(BaseMultivariate):
+class iTransformer(BaseModel):
"""iTransformer
**Parameters:**
@@ -128,6 +128,10 @@ class iTransformer(BaseMultivariate):
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
`val_check_steps`: int=100, Number of training steps between every validation loss check.
`batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=128, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=128, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -145,10 +149,11 @@ class iTransformer(BaseMultivariate):
"""
# Class attributes
- SAMPLING_TYPE = "multivariate"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = True
+ RECURRENT = False
def __init__(
self,
@@ -158,6 +163,7 @@ def __init__(
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
hidden_size: int = 512,
n_heads: int = 8,
e_layers: int = 2,
@@ -174,6 +180,10 @@ def __init__(
early_stop_patience_steps: int = -1,
val_check_steps: int = 100,
batch_size: int = 32,
+ valid_batch_size: Optional[int] = None,
+ windows_batch_size=128,
+ inference_windows_batch_size=128,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -193,6 +203,7 @@ def __init__(
stat_exog_list=None,
futr_exog_list=None,
hist_exog_list=None,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -201,6 +212,10 @@ def __init__(
early_stop_patience_steps=early_stop_patience_steps,
val_check_steps=val_check_steps,
batch_size=batch_size,
+ valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
random_seed=random_seed,
@@ -250,7 +265,9 @@ def __init__(
norm_layer=torch.nn.LayerNorm(self.hidden_size),
)
- self.projector = nn.Linear(self.hidden_size, h, bias=True)
+ self.projector = nn.Linear(
+ self.hidden_size, h * self.loss.outputsize_multiplier, bias=True
+ )
def forecast(self, x_enc):
if self.use_norm:
@@ -284,8 +301,16 @@ def forecast(self, x_enc):
if self.use_norm:
# De-Normalization from Non-stationary Transformer
- dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))
- dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))
+ dec_out = dec_out * (
+ stdev[:, 0, :]
+ .unsqueeze(1)
+ .repeat(1, self.h * self.loss.outputsize_multiplier, 1)
+ )
+ dec_out = dec_out + (
+ means[:, 0, :]
+ .unsqueeze(1)
+ .repeat(1, self.h * self.loss.outputsize_multiplier, 1)
+ )
return dec_out
@@ -293,11 +318,6 @@ def forward(self, windows_batch):
insample_y = windows_batch["insample_y"]
y_pred = self.forecast(insample_y)
- y_pred = y_pred[:, -self.h :, :]
- y_pred = self.loss.domain_map(y_pred)
+ y_pred = y_pred.reshape(insample_y.shape[0], self.h, -1)
- # domain_map might have squeezed the last dimension in case n_series == 1
- if y_pred.ndim == 2:
- return y_pred.unsqueeze(-1)
- else:
- return y_pred
+ return y_pred
diff --git a/neuralforecast/models/kan.py b/neuralforecast/models/kan.py
index e442fdbd4..b61aaae63 100644
--- a/neuralforecast/models/kan.py
+++ b/neuralforecast/models/kan.py
@@ -12,7 +12,7 @@
import torch.nn.functional as F
from ..losses.pytorch import MAE
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
# %% ../../nbs/models.kan.ipynb 8
class KANLinear(torch.nn.Module):
@@ -240,7 +240,7 @@ def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0)
)
# %% ../../nbs/models.kan.ipynb 9
-class KAN(BaseWindows):
+class KAN(BaseModel):
"""KAN
Simple Kolmogorov-Arnold Network (KAN).
@@ -293,10 +293,13 @@ class KAN(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -433,7 +436,7 @@ def regularization_loss(self, regularize_activation=1.0, regularize_entropy=1.0)
def forward(self, windows_batch, update_grid=False):
- insample_y = windows_batch["insample_y"]
+ insample_y = windows_batch["insample_y"].squeeze(-1)
futr_exog = windows_batch["futr_exog"]
hist_exog = windows_batch["hist_exog"]
stat_exog = windows_batch["stat_exog"]
@@ -463,5 +466,4 @@ def forward(self, windows_batch, update_grid=False):
y_pred = layer(y_pred)
y_pred = y_pred.reshape(batch_size, self.h, self.loss.outputsize_multiplier)
- y_pred = self.loss.domain_map(y_pred)
return y_pred
diff --git a/neuralforecast/models/lstm.py b/neuralforecast/models/lstm.py
index bb8906b8d..67fa49373 100644
--- a/neuralforecast/models/lstm.py
+++ b/neuralforecast/models/lstm.py
@@ -8,13 +8,14 @@
import torch
import torch.nn as nn
+import warnings
from ..losses.pytorch import MAE
-from ..common._base_recurrent import BaseRecurrent
+from ..common._base_model import BaseModel
from ..common._modules import MLP
# %% ../../nbs/models.lstm.ipynb 7
-class LSTM(BaseRecurrent):
+class LSTM(BaseModel):
"""LSTM
LSTM encoder, with MLP decoder.
@@ -30,7 +31,7 @@ class LSTM(BaseRecurrent):
`encoder_hidden_size`: int=200, units for the LSTM's hidden state size.
`encoder_bias`: bool=True, whether or not to use biases b_ih, b_hh within LSTM units.
`encoder_dropout`: float=0., dropout regularization applied to LSTM outputs.
- `context_size`: int=10, size of context vector for each timestamp on the forecasting window.
+ `context_size`: deprecated.
`decoder_hidden_size`: int=200, size of hidden layer for the MLP decoder.
`decoder_layers`: int=2, number of layers for the MLP decoder.
`futr_exog_list`: str list, future exogenous columns.
@@ -58,26 +59,30 @@ class LSTM(BaseRecurrent):
"""
# Class attributes
- SAMPLING_TYPE = "recurrent"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ True # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
h: int,
- input_size: int = -1,
- inference_input_size: int = -1,
+ input_size: int,
encoder_n_layers: int = 2,
- encoder_hidden_size: int = 200,
+ encoder_hidden_size: int = 128,
encoder_bias: bool = True,
encoder_dropout: float = 0.0,
- context_size: int = 10,
- decoder_hidden_size: int = 200,
+ context_size: Optional[int] = None,
+ decoder_hidden_size: int = 128,
decoder_layers: int = 2,
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
+ recurrent=False,
loss=MAE(),
valid_loss=None,
max_steps: int = 1000,
@@ -87,6 +92,10 @@ def __init__(
val_check_steps: int = 100,
batch_size=32,
valid_batch_size: Optional[int] = None,
+ windows_batch_size=128,
+ inference_windows_batch_size=1024,
+ start_padding_enabled=False,
+ step_size: int = 1,
scaler_type: str = "robust",
random_seed=1,
drop_last_loader=False,
@@ -97,10 +106,16 @@ def __init__(
dataloader_kwargs=None,
**trainer_kwargs
):
+
+ self.RECURRENT = recurrent
+
super(LSTM, self).__init__(
h=h,
input_size=input_size,
- inference_input_size=inference_input_size,
+ futr_exog_list=futr_exog_list,
+ hist_exog_list=hist_exog_list,
+ stat_exog_list=stat_exog_list,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -110,12 +125,13 @@ def __init__(
val_check_steps=val_check_steps,
batch_size=batch_size,
valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
+ step_size=step_size,
scaler_type=scaler_type,
- futr_exog_list=futr_exog_list,
- hist_exog_list=hist_exog_list,
- stat_exog_list=stat_exog_list,
- drop_last_loader=drop_last_loader,
random_seed=random_seed,
+ drop_last_loader=drop_last_loader,
optimizer=optimizer,
optimizer_kwargs=optimizer_kwargs,
lr_scheduler=lr_scheduler,
@@ -131,16 +147,23 @@ def __init__(
self.encoder_dropout = encoder_dropout
# Context adapter
- self.context_size = context_size
+ if context_size is not None:
+ warnings.warn(
+ "context_size is deprecated and will be removed in future versions."
+ )
# MLP decoder
self.decoder_hidden_size = decoder_hidden_size
self.decoder_layers = decoder_layers
# LSTM input size (1 for target variable y)
- input_encoder = 1 + self.hist_exog_size + self.stat_exog_size
+ input_encoder = (
+ 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size
+ )
# Instantiate model
+ self.rnn_state = None
+ self.maintain_state = False
self.hist_encoder = nn.LSTM(
input_size=input_encoder,
hidden_size=self.encoder_hidden_size,
@@ -148,71 +171,76 @@ def __init__(
bias=self.encoder_bias,
dropout=self.encoder_dropout,
batch_first=True,
- )
-
- # Context adapter
- self.context_adapter = nn.Linear(
- in_features=self.encoder_hidden_size + self.futr_exog_size * h,
- out_features=self.context_size * h,
+ proj_size=self.loss.outputsize_multiplier if self.RECURRENT else 0,
)
# Decoder MLP
- self.mlp_decoder = MLP(
- in_features=self.context_size + self.futr_exog_size,
- out_features=self.loss.outputsize_multiplier,
- hidden_size=self.decoder_hidden_size,
- num_layers=self.decoder_layers,
- activation="ReLU",
- dropout=0.0,
- )
+ if not self.RECURRENT:
+ self.mlp_decoder = MLP(
+ in_features=self.encoder_hidden_size + self.futr_exog_size,
+ out_features=self.loss.outputsize_multiplier,
+ hidden_size=self.decoder_hidden_size,
+ num_layers=self.decoder_layers,
+ activation="ReLU",
+ dropout=0.0,
+ )
def forward(self, windows_batch):
# Parse windows_batch
encoder_input = windows_batch["insample_y"] # [B, seq_len, 1]
- futr_exog = windows_batch["futr_exog"]
- hist_exog = windows_batch["hist_exog"]
- stat_exog = windows_batch["stat_exog"]
+ futr_exog = windows_batch["futr_exog"] # [B, seq_len, F]
+ hist_exog = windows_batch["hist_exog"] # [B, seq_len, X]
+ stat_exog = windows_batch["stat_exog"] # [B, S]
# Concatenate y, historic and static inputs
- # [B, C, seq_len, 1] -> [B, seq_len, C]
- # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]
batch_size, seq_len = encoder_input.shape[:2]
if self.hist_exog_size > 0:
- hist_exog = hist_exog.permute(0, 2, 1, 3).squeeze(
- -1
- ) # [B, X, seq_len, 1] -> [B, seq_len, X]
- encoder_input = torch.cat((encoder_input, hist_exog), dim=2)
+ encoder_input = torch.cat(
+ (encoder_input, hist_exog), dim=2
+ ) # [B, seq_len, 1] + [B, seq_len, X] -> [B, seq_len, 1 + X]
if self.stat_exog_size > 0:
+ # print(encoder_input.shape)
stat_exog = stat_exog.unsqueeze(1).repeat(
1, seq_len, 1
) # [B, S] -> [B, seq_len, S]
- encoder_input = torch.cat((encoder_input, stat_exog), dim=2)
-
- # RNN forward
- hidden_state, _ = self.hist_encoder(
- encoder_input
- ) # [B, seq_len, rnn_hidden_state]
-
- if self.futr_exog_size > 0:
- futr_exog = futr_exog.permute(0, 2, 3, 1)[
- :, :, 1:, :
- ] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]
- hidden_state = torch.cat(
- (hidden_state, futr_exog.reshape(batch_size, seq_len, -1)), dim=2
- )
-
- # Context adapter
- context = self.context_adapter(hidden_state)
- context = context.reshape(batch_size, seq_len, self.h, self.context_size)
+ encoder_input = torch.cat(
+ (encoder_input, stat_exog), dim=2
+ ) # [B, seq_len, 1 + X] + [B, seq_len, S] -> [B, seq_len, 1 + X + S]
- # Residual connection with futr_exog
if self.futr_exog_size > 0:
- context = torch.cat((context, futr_exog), dim=-1)
-
- # Final forecast
- output = self.mlp_decoder(context)
- output = self.loss.domain_map(output)
-
- return output
+ encoder_input = torch.cat(
+ (encoder_input, futr_exog[:, :seq_len]), dim=2
+ ) # [B, seq_len, 1 + X + S] + [B, seq_len, F] -> [B, seq_len, 1 + X + S + F]
+
+ if self.RECURRENT:
+ if self.maintain_state:
+ rnn_state = self.rnn_state
+ else:
+ rnn_state = None
+
+ output, rnn_state = self.hist_encoder(
+ encoder_input, rnn_state
+ ) # [B, seq_len, n_output]
+ if self.maintain_state:
+ self.rnn_state = rnn_state
+ else:
+ hidden_state, _ = self.hist_encoder(
+ encoder_input, None
+ ) # [B, seq_len, rnn_hidden_state]
+ hidden_state = hidden_state[
+ :, -self.h :
+ ] # [B, seq_len, rnn_hidden_state] -> [B, h, rnn_hidden_state]
+
+ if self.futr_exog_size > 0:
+ futr_exog_futr = futr_exog[:, -self.h :] # [B, h, F]
+ hidden_state = torch.cat(
+ (hidden_state, futr_exog_futr), dim=-1
+ ) # [B, h, rnn_hidden_state] + [B, h, F] -> [B, h, rnn_hidden_state + F]
+
+ output = self.mlp_decoder(
+ hidden_state
+ ) # [B, h, rnn_hidden_state + F] -> [B, seq_len, n_output]
+
+ return output[:, -self.h :]
diff --git a/neuralforecast/models/mlp.py b/neuralforecast/models/mlp.py
index 535c41424..fbede7623 100644
--- a/neuralforecast/models/mlp.py
+++ b/neuralforecast/models/mlp.py
@@ -10,10 +10,10 @@
import torch.nn as nn
from ..losses.pytorch import MAE
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
# %% ../../nbs/models.mlp.ipynb 6
-class MLP(BaseWindows):
+class MLP(BaseModel):
"""MLP
Simple Multi Layer Perceptron architecture (MLP).
@@ -57,10 +57,13 @@ class MLP(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -155,7 +158,7 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
- insample_y = windows_batch["insample_y"]
+ insample_y = windows_batch["insample_y"].squeeze(-1)
futr_exog = windows_batch["futr_exog"]
hist_exog = windows_batch["hist_exog"]
stat_exog = windows_batch["stat_exog"]
@@ -184,5 +187,4 @@ def forward(self, windows_batch):
y_pred = self.out(y_pred)
y_pred = y_pred.reshape(batch_size, self.h, self.loss.outputsize_multiplier)
- y_pred = self.loss.domain_map(y_pred)
return y_pred
diff --git a/neuralforecast/models/mlpmultivariate.py b/neuralforecast/models/mlpmultivariate.py
index f03ec7222..729901d5a 100644
--- a/neuralforecast/models/mlpmultivariate.py
+++ b/neuralforecast/models/mlpmultivariate.py
@@ -7,11 +7,12 @@
import torch
import torch.nn as nn
+from typing import Optional
from ..losses.pytorch import MAE
-from ..common._base_multivariate import BaseMultivariate
+from ..common._base_model import BaseModel
# %% ../../nbs/models.mlpmultivariate.ipynb 6
-class MLPMultivariate(BaseMultivariate):
+class MLPMultivariate(BaseModel):
"""MLPMultivariate
Simple Multi Layer Perceptron architecture (MLP) for multivariate forecasting.
@@ -37,6 +38,10 @@ class MLPMultivariate(BaseMultivariate):
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
`val_check_steps`: int=100, Number of training steps between every validation loss check.
`batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -51,10 +56,13 @@ class MLPMultivariate(BaseMultivariate):
"""
# Class attributes
- SAMPLING_TYPE = "multivariate"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -64,6 +72,7 @@ def __init__(
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
num_layers=2,
hidden_size=1024,
loss=MAE(),
@@ -74,6 +83,10 @@ def __init__(
early_stop_patience_steps: int = -1,
val_check_steps: int = 100,
batch_size: int = 32,
+ valid_batch_size: Optional[int] = None,
+ windows_batch_size=256,
+ inference_windows_batch_size=256,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -94,6 +107,7 @@ def __init__(
futr_exog_list=futr_exog_list,
hist_exog_list=hist_exog_list,
stat_exog_list=stat_exog_list,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -102,6 +116,10 @@ def __init__(
early_stop_patience_steps=early_stop_patience_steps,
val_check_steps=val_check_steps,
batch_size=batch_size,
+ valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
drop_last_loader=drop_last_loader,
@@ -170,12 +188,6 @@ def forward(self, windows_batch):
x = torch.relu(layer(x))
x = self.out(x)
- x = x.reshape(batch_size, self.h, -1)
- forecast = self.loss.domain_map(x)
+ forecast = x.reshape(batch_size, self.h, -1)
- # domain_map might have squeezed the last dimension in case n_series == 1
- # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.
- if forecast.ndim == 2:
- return forecast.unsqueeze(-1)
- else:
- return forecast
+ return forecast
diff --git a/neuralforecast/models/nbeats.py b/neuralforecast/models/nbeats.py
index 1fb4f07b8..9041007a0 100644
--- a/neuralforecast/models/nbeats.py
+++ b/neuralforecast/models/nbeats.py
@@ -11,7 +11,7 @@
import torch.nn as nn
from ..losses.pytorch import MAE
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
# %% ../../nbs/models.nbeats.ipynb 7
class IdentityBasis(nn.Module):
@@ -189,7 +189,7 @@ def forward(self, insample_y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]
return backcast, forecast
# %% ../../nbs/models.nbeats.ipynb 9
-class NBEATS(BaseWindows):
+class NBEATS(BaseModel):
"""NBEATS
The Neural Basis Expansion Analysis for Time Series (NBEATS), is a simple and yet
@@ -240,10 +240,13 @@ class NBEATS(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -403,8 +406,8 @@ def create_stack(
def forward(self, windows_batch):
# Parse windows_batch
- insample_y = windows_batch["insample_y"]
- insample_mask = windows_batch["insample_mask"]
+ insample_y = windows_batch["insample_y"].squeeze(-1)
+ insample_mask = windows_batch["insample_mask"].squeeze(-1)
# NBEATS' forward
residuals = insample_y.flip(dims=(-1,)) # backcast init
@@ -420,9 +423,6 @@ def forward(self, windows_batch):
if self.decompose_forecast:
block_forecasts.append(block_forecast)
- # Adapting output's domain
- forecast = self.loss.domain_map(forecast)
-
if self.decompose_forecast:
# (n_batch, n_blocks, h, out_features)
block_forecasts = torch.stack(block_forecasts)
diff --git a/neuralforecast/models/nbeatsx.py b/neuralforecast/models/nbeatsx.py
index 10e37f608..111082f83 100644
--- a/neuralforecast/models/nbeatsx.py
+++ b/neuralforecast/models/nbeatsx.py
@@ -11,7 +11,7 @@
import torch.nn as nn
from ..losses.pytorch import MAE
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
# %% ../../nbs/models.nbeatsx.ipynb 8
class IdentityBasis(nn.Module):
@@ -274,7 +274,7 @@ def forward(
return backcast, forecast
# %% ../../nbs/models.nbeatsx.ipynb 10
-class NBEATSx(BaseWindows):
+class NBEATSx(BaseModel):
"""NBEATSx
The Neural Basis Expansion Analysis with Exogenous variables (NBEATSx) is a simple
@@ -327,10 +327,13 @@ class NBEATSx(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -510,8 +513,8 @@ def create_stack(
def forward(self, windows_batch):
# Parse windows_batch
- insample_y = windows_batch["insample_y"]
- insample_mask = windows_batch["insample_mask"]
+ insample_y = windows_batch["insample_y"].squeeze(-1)
+ insample_mask = windows_batch["insample_mask"].squeeze(-1)
futr_exog = windows_batch["futr_exog"]
hist_exog = windows_batch["hist_exog"]
stat_exog = windows_batch["stat_exog"]
@@ -535,9 +538,6 @@ def forward(self, windows_batch):
if self.decompose_forecast:
block_forecasts.append(block_forecast)
- # Adapting output's domain
- forecast = self.loss.domain_map(forecast)
-
if self.decompose_forecast:
# (n_batch, n_blocks, h)
block_forecasts = torch.stack(block_forecasts)
diff --git a/neuralforecast/models/nhits.py b/neuralforecast/models/nhits.py
index f16db81a3..09fa7920a 100644
--- a/neuralforecast/models/nhits.py
+++ b/neuralforecast/models/nhits.py
@@ -12,7 +12,7 @@
import torch.nn.functional as F
from ..losses.pytorch import MAE
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
# %% ../../nbs/models.nhits.ipynb 8
class _IdentityBasis(nn.Module):
@@ -184,7 +184,7 @@ def forward(
return backcast, forecast
# %% ../../nbs/models.nhits.ipynb 10
-class NHITS(BaseWindows):
+class NHITS(BaseModel):
"""NHITS
The Neural Hierarchical Interpolation for Time Series (NHITS), is an MLP-based deep
@@ -239,10 +239,13 @@ class NHITS(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -395,8 +398,8 @@ def create_stack(
def forward(self, windows_batch):
# Parse windows_batch
- insample_y = windows_batch["insample_y"]
- insample_mask = windows_batch["insample_mask"]
+ insample_y = windows_batch["insample_y"].squeeze(-1).contiguous()
+ insample_mask = windows_batch["insample_mask"].squeeze(-1).contiguous()
futr_exog = windows_batch["futr_exog"]
hist_exog = windows_batch["hist_exog"]
stat_exog = windows_batch["stat_exog"]
@@ -420,9 +423,6 @@ def forward(self, windows_batch):
if self.decompose_forecast:
block_forecasts.append(block_forecast)
- # Adapting output's domain
- forecast = self.loss.domain_map(forecast)
-
if self.decompose_forecast:
# (n_batch, n_blocks, h, output_size)
block_forecasts = torch.stack(block_forecasts)
diff --git a/neuralforecast/models/nlinear.py b/neuralforecast/models/nlinear.py
index 4bad929b1..a60b735d6 100644
--- a/neuralforecast/models/nlinear.py
+++ b/neuralforecast/models/nlinear.py
@@ -8,12 +8,12 @@
import torch.nn as nn
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..losses.pytorch import MAE
# %% ../../nbs/models.nlinear.ipynb 7
-class NLinear(BaseWindows):
+class NLinear(BaseModel):
"""NLinear
*Parameters:*
@@ -50,10 +50,13 @@ class NLinear(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -129,11 +132,7 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
- insample_y = windows_batch["insample_y"]
- # insample_mask = windows_batch['insample_mask']
- # hist_exog = windows_batch['hist_exog']
- # stat_exog = windows_batch['stat_exog']
- # futr_exog = windows_batch['futr_exog']
+ insample_y = windows_batch["insample_y"].squeeze(-1)
# Parse inputs
batch_size = len(insample_y)
@@ -145,5 +144,4 @@ def forward(self, windows_batch):
# Final
forecast = self.linear(norm_insample_y) + last_value
forecast = forecast.reshape(batch_size, self.h, self.loss.outputsize_multiplier)
- forecast = self.loss.domain_map(forecast)
return forecast
diff --git a/neuralforecast/models/patchtst.py b/neuralforecast/models/patchtst.py
index 25770b71c..096ccd28f 100644
--- a/neuralforecast/models/patchtst.py
+++ b/neuralforecast/models/patchtst.py
@@ -14,7 +14,7 @@
import torch.nn as nn
import torch.nn.functional as F
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..common._modules import RevIN
from ..losses.pytorch import MAE
@@ -785,7 +785,7 @@ def forward(
return output, attn_weights
# %% ../../nbs/models.patchtst.ipynb 15
-class PatchTST(BaseWindows):
+class PatchTST(BaseModel):
"""PatchTST
The PatchTST model is an efficient Transformer-based model for multivariate time series forecasting.
@@ -847,10 +847,13 @@ class PatchTST(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -992,20 +995,10 @@ def __init__(
def forward(self, windows_batch): # x: [batch, input_size]
# Parse windows_batch
- insample_y = windows_batch["insample_y"]
- # insample_mask = windows_batch['insample_mask']
- # hist_exog = windows_batch['hist_exog']
- # stat_exog = windows_batch['stat_exog']
- # futr_exog = windows_batch['futr_exog']
-
- # Add dimension for channel
- x = insample_y.unsqueeze(-1) # [Ws,L,1]
+ x = windows_batch["insample_y"]
x = x.permute(0, 2, 1) # x: [Batch, 1, input_size]
x = self.model(x)
- x = x.reshape(x.shape[0], self.h, -1) # x: [Batch, h, c_out]
-
- # Domain map
- forecast = self.loss.domain_map(x)
+ forecast = x.reshape(x.shape[0], self.h, -1) # x: [Batch, h, c_out]
return forecast
diff --git a/neuralforecast/models/rmok.py b/neuralforecast/models/rmok.py
index fc66483d6..4bbee8523 100644
--- a/neuralforecast/models/rmok.py
+++ b/neuralforecast/models/rmok.py
@@ -11,8 +11,9 @@
import torch.nn.functional as F
from ..losses.pytorch import MAE
-from ..common._base_multivariate import BaseMultivariate
-from ..common._modules import RevIN
+from ..common._base_model import BaseModel
+from ..common._modules import RevINMultivariate
+from typing import Optional
# %% ../../nbs/models.rmok.ipynb 8
class WaveKANLayer(nn.Module):
@@ -256,9 +257,11 @@ def forward(self, x):
return y
# %% ../../nbs/models.rmok.ipynb 14
-class RMoK(BaseMultivariate):
+class RMoK(BaseModel):
"""Reversible Mixture of KAN
- **Parameters**
+
+
+ **Parameters:**
`h`: int, Forecast horizon.
`input_size`: int, autorregresive inputs size, y=[1,2,3,4] input_size=2 -> y_[t-2:t]=[1,2].
`n_series`: int, number of time-series.
@@ -278,6 +281,10 @@ class RMoK(BaseMultivariate):
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
`val_check_steps`: int=100, Number of training steps between every validation loss check.
`batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -290,21 +297,24 @@ class RMoK(BaseMultivariate):
`dataloader_kwargs`: dict, optional, list of parameters passed into the PyTorch Lightning dataloader by the `TimeSeriesDataLoader`.
`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
- Reference
- [Xiao Han, Xinfeng Zhang, Yiling Wu, Zhenduo Zhang, Zhe Wu."KAN4TSF: Are KAN and KAN-based models Effective for Time Series Forecasting?"](https://arxiv.org/abs/2408.11306)
+ **References**
+ - [Xiao Han, Xinfeng Zhang, Yiling Wu, Zhenduo Zhang, Zhe Wu."KAN4TSF: Are KAN and KAN-based models Effective for Time Series Forecasting?". arXiv.](https://arxiv.org/abs/2408.11306)
"""
# Class attributes
- SAMPLING_TYPE = "multivariate"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
h,
input_size,
- n_series,
+ n_series: int,
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
@@ -321,6 +331,10 @@ def __init__(
early_stop_patience_steps: int = -1,
val_check_steps: int = 100,
batch_size: int = 32,
+ valid_batch_size: Optional[int] = None,
+ windows_batch_size=1024,
+ inference_windows_batch_size=1024,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -348,6 +362,10 @@ def __init__(
early_stop_patience_steps=early_stop_patience_steps,
val_check_steps=val_check_steps,
batch_size=batch_size,
+ valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
random_seed=random_seed,
@@ -373,25 +391,34 @@ def __init__(
self.experts = nn.ModuleList(
[
TaylorKANLayer(
- self.input_size, self.h, order=self.taylor_order, addbias=True
+ self.input_size,
+ self.h * self.loss.outputsize_multiplier,
+ order=self.taylor_order,
+ addbias=True,
+ ),
+ JacobiKANLayer(
+ self.input_size,
+ self.h * self.loss.outputsize_multiplier,
+ degree=self.jacobi_degree,
),
- JacobiKANLayer(self.input_size, self.h, degree=self.jacobi_degree),
WaveKANLayer(
- self.input_size, self.h, wavelet_type=self.wavelet_function
+ self.input_size,
+ self.h * self.loss.outputsize_multiplier,
+ wavelet_type=self.wavelet_function,
),
- nn.Linear(self.input_size, self.h),
+ nn.Linear(self.input_size, self.h * self.loss.outputsize_multiplier),
]
)
self.num_experts = len(self.experts)
self.gate = nn.Linear(self.input_size, self.num_experts)
self.softmax = nn.Softmax(dim=-1)
- self.rev = RevIN(self.n_series, affine=self.revin_affine)
+ self.rev = RevINMultivariate(self.n_series, affine=self.revin_affine)
def forward(self, windows_batch):
insample_y = windows_batch["insample_y"]
B, L, N = insample_y.shape
- x = self.rev(insample_y, "norm") if self.rev else insample_y
+ x = self.rev(insample_y, "norm")
x = self.dropout(x).transpose(1, 2).reshape(B * N, L)
score = F.softmax(self.gate(x), dim=-1)
@@ -400,15 +427,11 @@ def forward(self, windows_batch):
)
y_pred = (
- torch.einsum("BLE,BE->BL", expert_outputs, score)
- .reshape(B, N, -1)
+ torch.einsum("BLE, BE -> BL", expert_outputs, score)
+ .reshape(B, N, self.h * self.loss.outputsize_multiplier)
.permute(0, 2, 1)
)
y_pred = self.rev(y_pred, "denorm")
- y_pred = self.loss.domain_map(y_pred)
+ y_pred = y_pred.reshape(B, self.h, -1)
- # domain_map might have squeezed the last dimension in case n_series == 1
- if y_pred.ndim == 2:
- return y_pred.unsqueeze(-1)
- else:
- return y_pred
+ return y_pred
diff --git a/neuralforecast/models/rnn.py b/neuralforecast/models/rnn.py
index d3f8b4fff..c5a10ab0b 100644
--- a/neuralforecast/models/rnn.py
+++ b/neuralforecast/models/rnn.py
@@ -8,13 +8,14 @@
import torch
import torch.nn as nn
+import warnings
from ..losses.pytorch import MAE
-from ..common._base_recurrent import BaseRecurrent
+from ..common._base_model import BaseModel
from ..common._modules import MLP
# %% ../../nbs/models.rnn.ipynb 7
-class RNN(BaseRecurrent):
+class RNN(BaseModel):
"""RNN
Multi Layer Elman RNN (RNN), with MLP decoder.
@@ -31,7 +32,7 @@ class RNN(BaseRecurrent):
`encoder_activation`: str=`tanh`, type of RNN activation from `tanh` or `relu`.
`encoder_bias`: bool=True, whether or not to use biases b_ih, b_hh within RNN units.
`encoder_dropout`: float=0., dropout regularization applied to RNN outputs.
- `context_size`: int=10, size of context vector for each timestamp on the forecasting window.
+ `context_size`: deprecated.
`decoder_hidden_size`: int=200, size of hidden layer for the MLP decoder.
`decoder_layers`: int=2, number of layers for the MLP decoder.
`futr_exog_list`: str list, future exogenous columns.
@@ -60,10 +61,13 @@ class RNN(BaseRecurrent):
"""
# Class attributes
- SAMPLING_TYPE = "recurrent"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ True # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -71,16 +75,18 @@ def __init__(
input_size: int = -1,
inference_input_size: int = -1,
encoder_n_layers: int = 2,
- encoder_hidden_size: int = 200,
+ encoder_hidden_size: int = 128,
encoder_activation: str = "tanh",
encoder_bias: bool = True,
encoder_dropout: float = 0.0,
- context_size: int = 10,
- decoder_hidden_size: int = 200,
+ context_size: Optional[int] = None,
+ decoder_hidden_size: int = 128,
decoder_layers: int = 2,
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
+ recurrent=False,
loss=MAE(),
valid_loss=None,
max_steps: int = 1000,
@@ -90,6 +96,10 @@ def __init__(
val_check_steps: int = 100,
batch_size=32,
valid_batch_size: Optional[int] = None,
+ windows_batch_size=128,
+ inference_windows_batch_size=1024,
+ start_padding_enabled=False,
+ step_size: int = 1,
scaler_type: str = "robust",
random_seed=1,
drop_last_loader=False,
@@ -100,10 +110,16 @@ def __init__(
dataloader_kwargs=None,
**trainer_kwargs
):
+
+ self.RECURRENT = recurrent
+
super(RNN, self).__init__(
h=h,
input_size=input_size,
- inference_input_size=inference_input_size,
+ futr_exog_list=futr_exog_list,
+ hist_exog_list=hist_exog_list,
+ stat_exog_list=stat_exog_list,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -113,12 +129,13 @@ def __init__(
val_check_steps=val_check_steps,
batch_size=batch_size,
valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
+ step_size=step_size,
scaler_type=scaler_type,
- futr_exog_list=futr_exog_list,
- hist_exog_list=hist_exog_list,
- stat_exog_list=stat_exog_list,
- drop_last_loader=drop_last_loader,
random_seed=random_seed,
+ drop_last_loader=drop_last_loader,
optimizer=optimizer,
optimizer_kwargs=optimizer_kwargs,
lr_scheduler=lr_scheduler,
@@ -134,6 +151,12 @@ def __init__(
self.encoder_bias = encoder_bias
self.encoder_dropout = encoder_dropout
+ # Context adapter
+ if context_size is not None:
+ warnings.warn(
+ "context_size is deprecated and will be removed in future versions."
+ )
+
# Context adapter
self.context_size = context_size
@@ -142,82 +165,96 @@ def __init__(
self.decoder_layers = decoder_layers
# RNN input size (1 for target variable y)
- input_encoder = 1 + self.hist_exog_size + self.stat_exog_size
+ input_encoder = (
+ 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size
+ )
# Instantiate model
+ self.rnn_state = None
+ self.maintain_state = False
self.hist_encoder = nn.RNN(
input_size=input_encoder,
hidden_size=self.encoder_hidden_size,
num_layers=self.encoder_n_layers,
- nonlinearity=self.encoder_activation,
bias=self.encoder_bias,
dropout=self.encoder_dropout,
batch_first=True,
)
- # Context adapter
- self.context_adapter = nn.Linear(
- in_features=self.encoder_hidden_size + self.futr_exog_size * h,
- out_features=self.context_size * h,
- )
-
# Decoder MLP
- self.mlp_decoder = MLP(
- in_features=self.context_size + self.futr_exog_size,
- out_features=self.loss.outputsize_multiplier,
- hidden_size=self.decoder_hidden_size,
- num_layers=self.decoder_layers,
- activation="ReLU",
- dropout=0.0,
- )
+ if self.RECURRENT:
+ self.proj = nn.Linear(
+ self.encoder_hidden_size, self.loss.outputsize_multiplier
+ )
+ else:
+ self.mlp_decoder = MLP(
+ in_features=self.encoder_hidden_size + self.futr_exog_size,
+ out_features=self.loss.outputsize_multiplier,
+ hidden_size=self.decoder_hidden_size,
+ num_layers=self.decoder_layers,
+ activation="ReLU",
+ dropout=0.0,
+ )
def forward(self, windows_batch):
# Parse windows_batch
encoder_input = windows_batch["insample_y"] # [B, seq_len, 1]
- futr_exog = windows_batch["futr_exog"]
- hist_exog = windows_batch["hist_exog"]
- stat_exog = windows_batch["stat_exog"]
+ futr_exog = windows_batch["futr_exog"] # [B, seq_len, F]
+ hist_exog = windows_batch["hist_exog"] # [B, seq_len, X]
+ stat_exog = windows_batch["stat_exog"] # [B, S]
# Concatenate y, historic and static inputs
- # [B, C, seq_len, 1] -> [B, seq_len, C]
- # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]
batch_size, seq_len = encoder_input.shape[:2]
if self.hist_exog_size > 0:
- hist_exog = hist_exog.permute(0, 2, 1, 3).squeeze(
- -1
- ) # [B, X, seq_len, 1] -> [B, seq_len, X]
- encoder_input = torch.cat((encoder_input, hist_exog), dim=2)
+ encoder_input = torch.cat(
+ (encoder_input, hist_exog), dim=2
+ ) # [B, seq_len, 1] + [B, seq_len, X] -> [B, seq_len, 1 + X]
if self.stat_exog_size > 0:
+ # print(encoder_input.shape)
stat_exog = stat_exog.unsqueeze(1).repeat(
1, seq_len, 1
) # [B, S] -> [B, seq_len, S]
- encoder_input = torch.cat((encoder_input, stat_exog), dim=2)
-
- # RNN forward
- hidden_state, _ = self.hist_encoder(
- encoder_input
- ) # [B, seq_len, rnn_hidden_state]
+ encoder_input = torch.cat(
+ (encoder_input, stat_exog), dim=2
+ ) # [B, seq_len, 1 + X] + [B, seq_len, S] -> [B, seq_len, 1 + X + S]
if self.futr_exog_size > 0:
- futr_exog = futr_exog.permute(0, 2, 3, 1)[
- :, :, 1:, :
- ] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]
- hidden_state = torch.cat(
- (hidden_state, futr_exog.reshape(batch_size, seq_len, -1)), dim=2
- )
+ encoder_input = torch.cat(
+ (encoder_input, futr_exog[:, :seq_len]), dim=2
+ ) # [B, seq_len, 1 + X + S] + [B, seq_len, F] -> [B, seq_len, 1 + X + S + F]
- # Context adapter
- context = self.context_adapter(hidden_state)
- context = context.reshape(batch_size, seq_len, self.h, self.context_size)
+ if self.RECURRENT:
+ if self.maintain_state:
+ rnn_state = self.rnn_state
+ else:
+ rnn_state = None
- # Residual connection with futr_exog
- if self.futr_exog_size > 0:
- context = torch.cat((context, futr_exog), dim=-1)
+ output, rnn_state = self.hist_encoder(
+ encoder_input, rnn_state
+ ) # [B, seq_len, rnn_hidden_state]
+ output = self.proj(
+ output
+ ) # [B, seq_len, rnn_hidden_state] -> [B, seq_len, n_output]
+ if self.maintain_state:
+ self.rnn_state = rnn_state
+ else:
+ hidden_state, _ = self.hist_encoder(
+ encoder_input, None
+ ) # [B, seq_len, rnn_hidden_state]
+ hidden_state = hidden_state[
+ :, -self.h :
+ ] # [B, seq_len, rnn_hidden_state] -> [B, h, rnn_hidden_state]
+
+ if self.futr_exog_size > 0:
+ futr_exog_futr = futr_exog[:, -self.h :] # [B, h, F]
+ hidden_state = torch.cat(
+ (hidden_state, futr_exog_futr), dim=-1
+ ) # [B, h, rnn_hidden_state] + [B, h, F] -> [B, h, rnn_hidden_state + F]
- # Final forecast
- output = self.mlp_decoder(context)
- output = self.loss.domain_map(output)
+ output = self.mlp_decoder(
+ hidden_state
+ ) # [B, h, rnn_hidden_state + F] -> [B, seq_len, n_output]
- return output
+ return output[:, -self.h :]
diff --git a/neuralforecast/models/softs.py b/neuralforecast/models/softs.py
index a40f32beb..ef18ba74e 100644
--- a/neuralforecast/models/softs.py
+++ b/neuralforecast/models/softs.py
@@ -8,8 +8,9 @@
import torch.nn as nn
import torch.nn.functional as F
+from typing import Optional
from ..losses.pytorch import MAE
-from ..common._base_multivariate import BaseMultivariate
+from ..common._base_model import BaseModel
from ..common._modules import TransEncoder, TransEncoderLayer
# %% ../../nbs/models.softs.ipynb 6
@@ -57,7 +58,7 @@ def forward(self, input, *args, **kwargs):
# stochastic pooling
if self.training:
- ratio = F.softmax(combined_mean, dim=1)
+ ratio = F.softmax(torch.nan_to_num(combined_mean), dim=1)
ratio = ratio.permute(0, 2, 1)
ratio = ratio.reshape(-1, channels)
indices = torch.multinomial(ratio, 1)
@@ -79,7 +80,7 @@ def forward(self, input, *args, **kwargs):
return output, None
# %% ../../nbs/models.softs.ipynb 10
-class SOFTS(BaseMultivariate):
+class SOFTS(BaseModel):
"""SOFTS
**Parameters:**
@@ -103,6 +104,10 @@ class SOFTS(BaseMultivariate):
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
`val_check_steps`: int=100, Number of training steps between every validation loss check.
`batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -120,10 +125,11 @@ class SOFTS(BaseMultivariate):
"""
# Class attributes
- SAMPLING_TYPE = "multivariate"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = True
+ RECURRENT = False
def __init__(
self,
@@ -133,6 +139,7 @@ def __init__(
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
hidden_size: int = 512,
d_core: int = 512,
e_layers: int = 2,
@@ -147,6 +154,10 @@ def __init__(
early_stop_patience_steps: int = -1,
val_check_steps: int = 100,
batch_size: int = 32,
+ valid_batch_size: Optional[int] = None,
+ windows_batch_size=256,
+ inference_windows_batch_size=256,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -166,6 +177,7 @@ def __init__(
stat_exog_list=None,
futr_exog_list=None,
hist_exog_list=None,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -174,6 +186,10 @@ def __init__(
early_stop_patience_steps=early_stop_patience_steps,
val_check_steps=val_check_steps,
batch_size=batch_size,
+ valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
random_seed=random_seed,
@@ -208,7 +224,9 @@ def __init__(
]
)
- self.projection = nn.Linear(hidden_size, self.h, bias=True)
+ self.projection = nn.Linear(
+ hidden_size, self.h * self.loss.outputsize_multiplier, bias=True
+ )
def forecast(self, x_enc):
# Normalization from Non-stationary Transformer
@@ -227,19 +245,22 @@ def forecast(self, x_enc):
# De-Normalization from Non-stationary Transformer
if self.use_norm:
- dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))
- dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.h, 1))
+ dec_out = dec_out * (
+ stdev[:, 0, :]
+ .unsqueeze(1)
+ .repeat(1, self.h * self.loss.outputsize_multiplier, 1)
+ )
+ dec_out = dec_out + (
+ means[:, 0, :]
+ .unsqueeze(1)
+ .repeat(1, self.h * self.loss.outputsize_multiplier, 1)
+ )
return dec_out
def forward(self, windows_batch):
insample_y = windows_batch["insample_y"]
y_pred = self.forecast(insample_y)
- y_pred = y_pred[:, -self.h :, :]
- y_pred = self.loss.domain_map(y_pred)
+ y_pred = y_pred.reshape(insample_y.shape[0], self.h, -1)
- # domain_map might have squeezed the last dimension in case n_series == 1
- if y_pred.ndim == 2:
- return y_pred.unsqueeze(-1)
- else:
- return y_pred
+ return y_pred
diff --git a/neuralforecast/models/stemgnn.py b/neuralforecast/models/stemgnn.py
index 69cdc4ef5..7d72f0a9d 100644
--- a/neuralforecast/models/stemgnn.py
+++ b/neuralforecast/models/stemgnn.py
@@ -8,8 +8,9 @@
import torch.nn as nn
import torch.nn.functional as F
+from typing import Optional
from ..losses.pytorch import MAE
-from ..common._base_multivariate import BaseMultivariate
+from ..common._base_model import BaseModel
# %% ../../nbs/models.stemgnn.ipynb 7
class GLU(nn.Module):
@@ -136,7 +137,7 @@ def forward(self, x, mul_L):
return forecast, backcast_source
# %% ../../nbs/models.stemgnn.ipynb 9
-class StemGNN(BaseMultivariate):
+class StemGNN(BaseModel):
"""StemGNN
The Spectral Temporal Graph Neural Network (`StemGNN`) is a Graph-based multivariate
@@ -163,6 +164,10 @@ class StemGNN(BaseMultivariate):
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
`val_check_steps`: int=100, Number of training steps between every validation loss check.
`batch_size`: int, number of windows in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=1024, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=1024, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='robust', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int, random_seed for pytorch initializer and numpy generators.
@@ -177,10 +182,13 @@ class StemGNN(BaseMultivariate):
"""
# Class attributes
- SAMPLING_TYPE = "multivariate"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -190,6 +198,7 @@ def __init__(
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
n_stacks=2,
multi_layer: int = 5,
dropout_rate: float = 0.5,
@@ -202,6 +211,10 @@ def __init__(
early_stop_patience_steps: int = -1,
val_check_steps: int = 100,
batch_size: int = 32,
+ valid_batch_size: Optional[int] = None,
+ windows_batch_size=1024,
+ inference_windows_batch_size=1024,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "robust",
random_seed: int = 1,
@@ -222,6 +235,7 @@ def __init__(
futr_exog_list=futr_exog_list,
hist_exog_list=hist_exog_list,
stat_exog_list=stat_exog_list,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -230,6 +244,10 @@ def __init__(
early_stop_patience_steps=early_stop_patience_steps,
val_check_steps=val_check_steps,
batch_size=batch_size,
+ valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
drop_last_loader=drop_last_loader,
@@ -367,11 +385,5 @@ def forward(self, windows_batch):
forecast = forecast.reshape(
batch_size, self.h, self.loss.outputsize_multiplier * self.n_series
)
- forecast = self.loss.domain_map(forecast)
- # domain_map might have squeezed the last dimension in case n_series == 1
- # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.
- if forecast.ndim == 2:
- return forecast.unsqueeze(-1)
- else:
- return forecast
+ return forecast
diff --git a/neuralforecast/models/tcn.py b/neuralforecast/models/tcn.py
index 70dd9c37f..b3113ac7e 100644
--- a/neuralforecast/models/tcn.py
+++ b/neuralforecast/models/tcn.py
@@ -10,11 +10,11 @@
import torch.nn as nn
from ..losses.pytorch import MAE
-from ..common._base_recurrent import BaseRecurrent
+from ..common._base_model import BaseModel
from ..common._modules import MLP, TemporalConvolutionEncoder
# %% ../../nbs/models.tcn.ipynb 7
-class TCN(BaseRecurrent):
+class TCN(BaseModel):
"""TCN
Temporal Convolution Network (TCN), with MLP decoder.
@@ -55,10 +55,13 @@ class TCN(BaseRecurrent):
"""
# Class attributes
- SAMPLING_TYPE = "recurrent"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -67,10 +70,10 @@ def __init__(
inference_input_size: int = -1,
kernel_size: int = 2,
dilations: List[int] = [1, 2, 4, 8, 16],
- encoder_hidden_size: int = 200,
+ encoder_hidden_size: int = 128,
encoder_activation: str = "ReLU",
context_size: int = 10,
- decoder_hidden_size: int = 200,
+ decoder_hidden_size: int = 128,
decoder_layers: int = 2,
futr_exog_list=None,
hist_exog_list=None,
@@ -84,6 +87,10 @@ def __init__(
val_check_steps: int = 100,
batch_size: int = 32,
valid_batch_size: Optional[int] = None,
+ windows_batch_size=128,
+ inference_windows_batch_size=1024,
+ start_padding_enabled=False,
+ step_size: int = 1,
scaler_type: str = "robust",
random_seed: int = 1,
drop_last_loader=False,
@@ -107,6 +114,10 @@ def __init__(
val_check_steps=val_check_steps,
batch_size=batch_size,
valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
+ step_size=step_size,
scaler_type=scaler_type,
futr_exog_list=futr_exog_list,
hist_exog_list=hist_exog_list,
@@ -136,7 +147,9 @@ def __init__(
self.decoder_layers = decoder_layers
# TCN input size (1 for target variable y)
- input_encoder = 1 + self.hist_exog_size + self.stat_exog_size
+ input_encoder = (
+ 1 + self.hist_exog_size + self.stat_exog_size + self.futr_exog_size
+ )
# ---------------------------------- Instantiate Model -----------------------------------#
# Instantiate historic encoder
@@ -149,14 +162,11 @@ def __init__(
)
# Context adapter
- self.context_adapter = nn.Linear(
- in_features=self.encoder_hidden_size + self.futr_exog_size * h,
- out_features=self.context_size * h,
- )
+ self.context_adapter = nn.Linear(in_features=self.input_size, out_features=h)
# Decoder MLP
self.mlp_decoder = MLP(
- in_features=self.context_size + self.futr_exog_size,
+ in_features=self.encoder_hidden_size + self.futr_exog_size,
out_features=self.loss.outputsize_multiplier,
hidden_size=self.decoder_hidden_size,
num_layers=self.decoder_layers,
@@ -167,50 +177,51 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
- encoder_input = windows_batch["insample_y"] # [B, seq_len, 1]
- futr_exog = windows_batch["futr_exog"]
- hist_exog = windows_batch["hist_exog"]
- stat_exog = windows_batch["stat_exog"]
+ encoder_input = windows_batch["insample_y"] # [B, L, 1]
+ futr_exog = windows_batch["futr_exog"] # [B, L + h, F]
+ hist_exog = windows_batch["hist_exog"] # [B, L, X]
+ stat_exog = windows_batch["stat_exog"] # [B, S]
# Concatenate y, historic and static inputs
- # [B, C, seq_len, 1] -> [B, seq_len, C]
- # Contatenate [ Y_t, | X_{t-L},..., X_{t} | S ]
- batch_size, seq_len = encoder_input.shape[:2]
+ batch_size, input_size = encoder_input.shape[:2]
if self.hist_exog_size > 0:
- hist_exog = hist_exog.permute(0, 2, 1, 3).squeeze(
- -1
- ) # [B, X, seq_len, 1] -> [B, seq_len, X]
- encoder_input = torch.cat((encoder_input, hist_exog), dim=2)
+ encoder_input = torch.cat(
+ (encoder_input, hist_exog), dim=2
+ ) # [B, L, 1] + [B, L, X] -> [B, L, 1 + X]
if self.stat_exog_size > 0:
+ # print(encoder_input.shape)
stat_exog = stat_exog.unsqueeze(1).repeat(
- 1, seq_len, 1
- ) # [B, S] -> [B, seq_len, S]
- encoder_input = torch.cat((encoder_input, stat_exog), dim=2)
-
- # TCN forward
- hidden_state = self.hist_encoder(
- encoder_input
- ) # [B, seq_len, tcn_hidden_state]
+ 1, input_size, 1
+ ) # [B, S] -> [B, L, S]
+ encoder_input = torch.cat(
+ (encoder_input, stat_exog), dim=2
+ ) # [B, L, 1 + X] + [B, L, S] -> [B, L, 1 + X + S]
if self.futr_exog_size > 0:
- futr_exog = futr_exog.permute(0, 2, 3, 1)[
- :, :, 1:, :
- ] # [B, F, seq_len, 1+H] -> [B, seq_len, H, F]
- hidden_state = torch.cat(
- (hidden_state, futr_exog.reshape(batch_size, seq_len, -1)), dim=2
- )
+ encoder_input = torch.cat(
+ (encoder_input, futr_exog[:, :input_size]), dim=2
+ ) # [B, L, 1 + X + S] + [B, L, F] -> [B, L, 1 + X + S + F]
+
+ # TCN forward
+ hidden_state = self.hist_encoder(encoder_input) # [B, L, C]
# Context adapter
- context = self.context_adapter(hidden_state)
- context = context.reshape(batch_size, seq_len, self.h, self.context_size)
+ hidden_state = hidden_state.permute(0, 2, 1) # [B, L, C] -> [B, C, L]
+ context = self.context_adapter(hidden_state) # [B, C, L] -> [B, C, h]
# Residual connection with futr_exog
if self.futr_exog_size > 0:
- context = torch.cat((context, futr_exog), dim=-1)
+ futr_exog_futr = futr_exog[:, input_size:].swapaxes(
+ 1, 2
+ ) # [B, L + h, F] -> [B, F, h]
+ context = torch.cat(
+ (context, futr_exog_futr), dim=1
+ ) # [B, C, h] + [B, F, h] = [B, C + F, h]
+
+ context = context.swapaxes(1, 2) # [B, C + F, h] -> [B, h, C + F]
# Final forecast
- output = self.mlp_decoder(context)
- output = self.loss.domain_map(output)
+ output = self.mlp_decoder(context) # [B, h, C + F] -> [B, h, n_output]
return output
diff --git a/neuralforecast/models/tft.py b/neuralforecast/models/tft.py
index 5590b2f03..aa58703f8 100644
--- a/neuralforecast/models/tft.py
+++ b/neuralforecast/models/tft.py
@@ -10,11 +10,11 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
+
from torch import Tensor
from torch.nn import LayerNorm
-
-from ..common._base_windows import BaseWindows
from ..losses.pytorch import MAE
+from ..common._base_model import BaseModel
# %% ../../nbs/models.tft.ipynb 11
def get_activation_fn(activation_str: str) -> Callable:
@@ -510,7 +510,7 @@ def forward(self, temporal_features, ce):
return x, atten_vect
# %% ../../nbs/models.tft.ipynb 24
-class TFT(BaseWindows):
+class TFT(BaseModel):
"""TFT
The Temporal Fusion Transformer architecture (TFT) is an Sequence-to-Sequence
@@ -563,10 +563,13 @@ class TFT(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -691,8 +694,9 @@ def __init__(
)
def forward(self, windows_batch):
+
# Parsiw windows_batch
- y_insample = windows_batch["insample_y"][:, :, None] # <- [B,T,1]
+ y_insample = windows_batch["insample_y"] # <- [B,T,1]
futr_exog = windows_batch["futr_exog"]
hist_exog = windows_batch["hist_exog"]
stat_exog = windows_batch["stat_exog"]
@@ -764,7 +768,6 @@ def forward(self, windows_batch):
# Adapt output to loss
y_hat = self.output_adapter(temporal_features)
- y_hat = self.loss.domain_map(y_hat)
return y_hat
diff --git a/neuralforecast/models/tide.py b/neuralforecast/models/tide.py
index 1f8f7144f..2930befa3 100644
--- a/neuralforecast/models/tide.py
+++ b/neuralforecast/models/tide.py
@@ -11,7 +11,7 @@
import torch.nn.functional as F
from ..losses.pytorch import MAE
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
# %% ../../nbs/models.tide.ipynb 8
class MLPResidual(nn.Module):
@@ -48,7 +48,7 @@ def forward(self, input):
return x
# %% ../../nbs/models.tide.ipynb 10
-class TiDE(BaseWindows):
+class TiDE(BaseModel):
"""TiDE
Time-series Dense Encoder (`TiDE`) is a MLP-based univariate time-series forecasting model. `TiDE` uses Multi-layer Perceptrons (MLPs) in an encoder-decoder model for long-term time-series forecasting.
@@ -93,10 +93,13 @@ class TiDE(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -240,7 +243,7 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
- x = windows_batch["insample_y"].unsqueeze(-1) # [B, L, 1]
+ x = windows_batch["insample_y"] # [B, L, 1]
hist_exog = windows_batch["hist_exog"] # [B, L, X]
futr_exog = windows_batch["futr_exog"] # [B, L + h, F]
stat_exog = windows_batch["stat_exog"] # [B, S]
@@ -310,7 +313,6 @@ def forward(self, windows_batch):
x
) # [B, h, temporal_width + decoder_output_dim] -> [B, h, n_outputs]
- # Map to output domain
- forecast = self.loss.domain_map(x + x_skip)
+ forecast = x + x_skip
return forecast
diff --git a/neuralforecast/models/timellm.py b/neuralforecast/models/timellm.py
index bec5fb453..c1b627355 100644
--- a/neuralforecast/models/timellm.py
+++ b/neuralforecast/models/timellm.py
@@ -7,12 +7,12 @@
import math
from typing import Optional
+import neuralforecast.losses.pytorch as losses
import torch
import torch.nn as nn
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..common._modules import RevIN
-
from ..losses.pytorch import MAE
try:
@@ -165,7 +165,7 @@ def reprogramming(self, target_embedding, source_embedding, value_embedding):
return reprogramming_embedding
# %% ../../nbs/models.timellm.ipynb 11
-class TimeLLM(BaseWindows):
+class TimeLLM(BaseModel):
"""TimeLLM
Time-LLM is a reprogramming framework to repurpose an off-the-shelf LLM for time series forecasting.
@@ -225,10 +225,13 @@ class TimeLLM(BaseWindows):
"""
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -306,6 +309,15 @@ def __init__(
dataloader_kwargs=dataloader_kwargs,
**trainer_kwargs,
)
+ if loss.outputsize_multiplier > 1:
+ raise Exception(
+ "TimeLLM only supports point loss functions (MAE, MSE, etc) as loss function."
+ )
+
+ if valid_loss is not None and not isinstance(valid_loss, losses.BasePointLoss):
+ raise Exception(
+ "TimeLLM only supports point loss functions (MAE, MSE, etc) as valid loss function."
+ )
# Architecture
self.patch_len = patch_len
@@ -465,12 +477,9 @@ def calcute_lags(self, x_enc):
return lags
def forward(self, windows_batch):
- insample_y = windows_batch["insample_y"]
-
- x = insample_y.unsqueeze(-1)
+ x = windows_batch["insample_y"]
y_pred = self.forecast(x)
y_pred = y_pred[:, -self.h :, :]
- y_pred = self.loss.domain_map(y_pred)
return y_pred
diff --git a/neuralforecast/models/timemixer.py b/neuralforecast/models/timemixer.py
index cdaea20bc..d28f790bd 100644
--- a/neuralforecast/models/timemixer.py
+++ b/neuralforecast/models/timemixer.py
@@ -11,7 +11,7 @@
import torch
import torch.nn as nn
-from ..common._base_multivariate import BaseMultivariate
+from ..common._base_model import BaseModel
from neuralforecast.common._modules import (
PositionalEmbedding,
TokenEmbedding,
@@ -19,8 +19,8 @@
SeriesDecomp,
RevIN,
)
-
from ..losses.pytorch import MAE
+from typing import Optional
# %% ../../nbs/models.timemixer.ipynb 6
class DataEmbedding_wo_pos(nn.Module):
@@ -249,7 +249,7 @@ def forward(self, x_list):
return out_list
# %% ../../nbs/models.timemixer.ipynb 12
-class TimeMixer(BaseMultivariate):
+class TimeMixer(BaseModel):
"""TimeMixer
**Parameters**
`h`: int, Forecast horizon.
@@ -279,6 +279,10 @@ class TimeMixer(BaseMultivariate):
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
`val_check_steps`: int=100, Number of training steps between every validation loss check.
`batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -292,14 +296,17 @@ class TimeMixer(BaseMultivariate):
`**trainer_kwargs`: int, keyword trainer arguments inherited from [PyTorch Lighning's trainer](https://pytorch-lightning.readthedocs.io/en/stable/api/pytorch_lightning.trainer.trainer.Trainer.html?highlight=trainer).
**References**
- [Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou."TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting"](https://openreview.net/pdf?id=7oLshfEIC2)
+ [Shiyu Wang, Haixu Wu, Xiaoming Shi, Tengge Hu, Huakun Luo, Lintao Ma, James Y. Zhang, Jun Zhou."TimeMixer: Decomposable Multiscale Mixing For Time Series Forecasting"](https://openreview.net/pdf?id=7oLshfEIC2)
"""
# Class attributes
- SAMPLING_TYPE = "multivariate"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -330,6 +337,10 @@ def __init__(
early_stop_patience_steps: int = -1,
val_check_steps: int = 100,
batch_size: int = 32,
+ valid_batch_size: Optional[int] = None,
+ windows_batch_size=256,
+ inference_windows_batch_size=256,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -357,6 +368,10 @@ def __init__(
early_stop_patience_steps=early_stop_patience_steps,
val_check_steps=val_check_steps,
batch_size=batch_size,
+ valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
random_seed=random_seed,
@@ -471,6 +486,11 @@ def __init__(
]
)
+ if self.loss.outputsize_multiplier > 1:
+ self.distr_output = nn.Linear(
+ self.n_series, self.n_series * self.loss.outputsize_multiplier
+ )
+
def out_projection(self, dec_out, i, out_res):
dec_out = self.projection_layer(dec_out)
out_res = out_res.permute(0, 2, 1)
@@ -644,10 +664,7 @@ def forward(self, windows_batch):
y_pred = self.forecast(insample_y, x_mark_enc, x_mark_dec)
y_pred = y_pred[:, -self.h :, :]
- y_pred = self.loss.domain_map(y_pred)
+ if self.loss.outputsize_multiplier > 1:
+ y_pred = self.distr_output(y_pred)
- # domain_map might have squeezed the last dimension in case n_series == 1
- if y_pred.ndim == 2:
- return y_pred.unsqueeze(-1)
- else:
- return y_pred
+ return y_pred
diff --git a/neuralforecast/models/timesnet.py b/neuralforecast/models/timesnet.py
index 9089a8796..bd99f055e 100644
--- a/neuralforecast/models/timesnet.py
+++ b/neuralforecast/models/timesnet.py
@@ -12,7 +12,7 @@
import torch.fft
from ..common._modules import DataEmbedding
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..losses.pytorch import MAE
@@ -119,7 +119,7 @@ def forward(self, x):
return res
# %% ../../nbs/models.timesnet.ipynb 10
-class TimesNet(BaseWindows):
+class TimesNet(BaseModel):
"""TimesNet
The TimesNet univariate model tackles the challenge of modeling multiple intraperiod and interperiod temporal variations.
@@ -197,10 +197,13 @@ class TimesNet(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -305,13 +308,9 @@ def forward(self, windows_batch):
# Parse windows_batch
insample_y = windows_batch["insample_y"]
- # insample_mask = windows_batch['insample_mask']
- # hist_exog = windows_batch['hist_exog']
- # stat_exog = windows_batch['stat_exog']
futr_exog = windows_batch["futr_exog"]
# Parse inputs
- insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]
if self.futr_exog_size > 0:
x_mark_enc = futr_exog[:, : self.input_size, :]
else:
@@ -328,5 +327,5 @@ def forward(self, windows_batch):
# porject back
dec_out = self.projection(enc_out)
- forecast = self.loss.domain_map(dec_out[:, -self.h :])
+ forecast = dec_out[:, -self.h :]
return forecast
diff --git a/neuralforecast/models/tsmixer.py b/neuralforecast/models/tsmixer.py
index 17fae38be..6165b6782 100644
--- a/neuralforecast/models/tsmixer.py
+++ b/neuralforecast/models/tsmixer.py
@@ -1,15 +1,16 @@
# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/models.tsmixer.ipynb.
# %% auto 0
-__all__ = ['TemporalMixing', 'FeatureMixing', 'MixingLayer', 'ReversibleInstanceNorm1d', 'TSMixer']
+__all__ = ['TemporalMixing', 'FeatureMixing', 'MixingLayer', 'TSMixer']
# %% ../../nbs/models.tsmixer.ipynb 5
-import torch
import torch.nn as nn
import torch.nn.functional as F
+from typing import Optional
from ..losses.pytorch import MAE
-from ..common._base_multivariate import BaseMultivariate
+from ..common._base_model import BaseModel
+from ..common._modules import RevINMultivariate
# %% ../../nbs/models.tsmixer.ipynb 8
class TemporalMixing(nn.Module):
@@ -93,44 +94,7 @@ def forward(self, input):
return x
# %% ../../nbs/models.tsmixer.ipynb 10
-class ReversibleInstanceNorm1d(nn.Module):
- """
- ReversibleInstanceNorm1d
- """
-
- def __init__(self, n_series, eps=1e-5):
- super().__init__()
- self.weight = nn.Parameter(torch.ones((1, 1, n_series)))
- self.bias = nn.Parameter(torch.zeros((1, 1, n_series)))
-
- self.eps = eps
-
- def forward(self, x):
- # Batch statistics
- self.batch_mean = torch.mean(x, axis=1, keepdim=True).detach()
- self.batch_std = torch.sqrt(
- torch.var(x, axis=1, keepdim=True, unbiased=False) + self.eps
- ).detach()
-
- # Instance normalization
- x = x - self.batch_mean
- x = x / self.batch_std
- x = x * self.weight
- x = x + self.bias
-
- return x
-
- def reverse(self, x):
- # Reverse the normalization
- x = x - self.bias
- x = x / self.weight
- x = x * self.batch_std
- x = x + self.batch_mean
-
- return x
-
-# %% ../../nbs/models.tsmixer.ipynb 12
-class TSMixer(BaseMultivariate):
+class TSMixer(BaseModel):
"""TSMixer
Time-Series Mixer (`TSMixer`) is a MLP-based multivariate time-series forecasting model. `TSMixer` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`).
@@ -154,6 +118,10 @@ class TSMixer(BaseMultivariate):
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
`val_check_steps`: int=100, Number of training steps between every validation loss check.
`batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -172,10 +140,13 @@ class TSMixer(BaseMultivariate):
"""
# Class attributes
- SAMPLING_TYPE = "multivariate"
EXOGENOUS_FUTR = False
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -185,6 +156,7 @@ def __init__(
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
n_block=2,
ff_dim=64,
dropout=0.9,
@@ -197,6 +169,10 @@ def __init__(
early_stop_patience_steps: int = -1,
val_check_steps: int = 100,
batch_size: int = 32,
+ valid_batch_size: Optional[int] = None,
+ windows_batch_size=256,
+ inference_windows_batch_size=256,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -217,6 +193,7 @@ def __init__(
futr_exog_list=futr_exog_list,
hist_exog_list=hist_exog_list,
stat_exog_list=stat_exog_list,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -225,6 +202,10 @@ def __init__(
early_stop_patience_steps=early_stop_patience_steps,
val_check_steps=val_check_steps,
batch_size=batch_size,
+ valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
random_seed=random_seed,
@@ -240,7 +221,7 @@ def __init__(
# Reversible InstanceNormalization layer
self.revin = revin
if self.revin:
- self.norm = ReversibleInstanceNorm1d(n_series=n_series)
+ self.norm = RevINMultivariate(num_features=n_series, affine=True)
# Mixing layers
mixing_layers = [
@@ -263,22 +244,16 @@ def forward(self, windows_batch):
# TSMixer: InstanceNorm + Mixing layers + Dense output layer + ReverseInstanceNorm
if self.revin:
- x = self.norm(x)
+ x = self.norm(x, "norm")
x = self.mixing_layers(x)
x = x.permute(0, 2, 1)
x = self.out(x)
x = x.permute(0, 2, 1)
if self.revin:
- x = self.norm.reverse(x)
+ x = self.norm(x, "denorm")
x = x.reshape(
batch_size, self.h, self.loss.outputsize_multiplier * self.n_series
)
- forecast = self.loss.domain_map(x)
-
- # domain_map might have squeezed the last dimension in case n_series == 1
- # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.
- if forecast.ndim == 2:
- return forecast.unsqueeze(-1)
- else:
- return forecast
+
+ return x
diff --git a/neuralforecast/models/tsmixerx.py b/neuralforecast/models/tsmixerx.py
index 97747bbb4..e41a88bff 100644
--- a/neuralforecast/models/tsmixerx.py
+++ b/neuralforecast/models/tsmixerx.py
@@ -8,8 +8,10 @@
import torch.nn as nn
import torch.nn.functional as F
+from typing import Optional
from ..losses.pytorch import MAE
-from ..common._base_multivariate import BaseMultivariate
+from ..common._base_model import BaseModel
+from ..common._modules import RevINMultivariate
# %% ../../nbs/models.tsmixerx.ipynb 8
class TemporalMixing(nn.Module):
@@ -158,7 +160,7 @@ def reverse(self, x):
return x
# %% ../../nbs/models.tsmixerx.ipynb 12
-class TSMixerx(BaseMultivariate):
+class TSMixerx(BaseModel):
"""TSMixerx
Time-Series Mixer exogenous (`TSMixerx`) is a MLP-based multivariate time-series forecasting model, with capability for additional exogenous inputs. `TSMixerx` jointly learns temporal and cross-sectional representations of the time-series by repeatedly combining time- and feature information using stacked mixing layers. A mixing layer consists of a sequential time- and feature Multi Layer Perceptron (`MLP`).
@@ -182,6 +184,10 @@ class TSMixerx(BaseMultivariate):
`early_stop_patience_steps`: int=-1, Number of validation iterations before early stopping.
`val_check_steps`: int=100, Number of training steps between every validation loss check.
`batch_size`: int=32, number of different series in each batch.
+ `valid_batch_size`: int=None, number of different series in each validation and test batch, if None uses batch_size.
+ `windows_batch_size`: int=256, number of windows to sample in each training batch, default uses all.
+ `inference_windows_batch_size`: int=256, number of windows to sample in each inference batch, -1 uses all.
+ `start_padding_enabled`: bool=False, if True, the model will pad the time series with zeros at the beginning, by input size.
`step_size`: int=1, step size between each window of temporal data.
`scaler_type`: str='identity', type of scaler for temporal inputs normalization see [temporal scalers](https://nixtla.github.io/neuralforecast/common.scalers.html).
`random_seed`: int=1, random_seed for pytorch initializer and numpy generators.
@@ -200,10 +206,13 @@ class TSMixerx(BaseMultivariate):
"""
# Class attributes
- SAMPLING_TYPE = "multivariate"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = True
EXOGENOUS_STAT = True
+ MULTIVARIATE = True # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -213,6 +222,7 @@ def __init__(
futr_exog_list=None,
hist_exog_list=None,
stat_exog_list=None,
+ exclude_insample_y=False,
n_block=2,
ff_dim=64,
dropout=0.0,
@@ -225,6 +235,10 @@ def __init__(
early_stop_patience_steps: int = -1,
val_check_steps: int = 100,
batch_size: int = 32,
+ valid_batch_size: Optional[int] = None,
+ windows_batch_size=256,
+ inference_windows_batch_size=256,
+ start_padding_enabled=False,
step_size: int = 1,
scaler_type: str = "identity",
random_seed: int = 1,
@@ -245,6 +259,7 @@ def __init__(
futr_exog_list=futr_exog_list,
hist_exog_list=hist_exog_list,
stat_exog_list=stat_exog_list,
+ exclude_insample_y=exclude_insample_y,
loss=loss,
valid_loss=valid_loss,
max_steps=max_steps,
@@ -253,6 +268,10 @@ def __init__(
early_stop_patience_steps=early_stop_patience_steps,
val_check_steps=val_check_steps,
batch_size=batch_size,
+ valid_batch_size=valid_batch_size,
+ windows_batch_size=windows_batch_size,
+ inference_windows_batch_size=inference_windows_batch_size,
+ start_padding_enabled=start_padding_enabled,
step_size=step_size,
scaler_type=scaler_type,
random_seed=random_seed,
@@ -267,7 +286,7 @@ def __init__(
# Reversible InstanceNormalization layer
self.revin = revin
if self.revin:
- self.norm = ReversibleInstanceNorm1d(n_series=n_series)
+ self.norm = RevINMultivariate(num_features=n_series, affine=True)
# Forecast horizon
self.h = h
@@ -355,12 +374,12 @@ def forward(self, windows_batch):
stat_exog = windows_batch["stat_exog"] # [N, stat_exog_size (S)]
batch_size, input_size = x.shape[:2]
- # Add channel dimension to x
- x = x.unsqueeze(1) # [B, L, N] -> [B, 1, L, N]
-
# Apply revin to x
if self.revin:
- x = self.norm(x) # [B, 1, L, N] -> [B, 1, L, N]
+ x = self.norm(x, mode="norm") # [B, L, N] -> [B, L, N]
+
+ # Add channel dimension to x
+ x = x.unsqueeze(1) # [B, L, N] -> [B, 1, L, N]
# Concatenate x with historical exogenous
if self.hist_exog_size > 0:
@@ -427,24 +446,16 @@ def forward(self, windows_batch):
x = self.mixing_block(x) # [B, h, ff_dim] -> [B, h, ff_dim]
# Fully connected output layer
- x = self.out(x) # [B, h, ff_dim] -> [B, h, N * n_outputs]
+ forecast = self.out(x) # [B, h, ff_dim] -> [B, h, N * n_outputs]
# Reverse Instance Normalization on output
if self.revin:
- x = x.reshape(
- batch_size, self.h, self.loss.outputsize_multiplier, -1
- ) # [B, h, N * n_outputs] -> [B, h, n_outputs, N]
- x = self.norm.reverse(x)
- x = x.reshape(
+ forecast = forecast.reshape(
+ batch_size, self.h * self.loss.outputsize_multiplier, -1
+ ) # [B, h, N * n_outputs] -> [B, h * n_outputs, N]
+ forecast = self.norm(forecast, "denorm")
+ forecast = forecast.reshape(
batch_size, self.h, -1
- ) # [B, h, n_outputs, N] -> [B, h, n_outputs * N]
+ ) # [B, h * n_outputs, N] -> [B, h, n_outputs * N]
- # Map to loss domain
- forecast = self.loss.domain_map(x)
-
- # domain_map might have squeezed the last dimension in case n_series == 1
- # Note that this fails in case of a tuple loss, but Multivariate does not support tuple losses yet.
- if forecast.ndim == 2:
- return forecast.unsqueeze(-1)
- else:
- return forecast
+ return forecast
diff --git a/neuralforecast/models/vanillatransformer.py b/neuralforecast/models/vanillatransformer.py
index e38c03fc9..aa467f0de 100644
--- a/neuralforecast/models/vanillatransformer.py
+++ b/neuralforecast/models/vanillatransformer.py
@@ -19,7 +19,7 @@
DataEmbedding,
AttentionLayer,
)
-from ..common._base_windows import BaseWindows
+from ..common._base_model import BaseModel
from ..losses.pytorch import MAE
@@ -73,7 +73,7 @@ def forward(self, queries, keys, values, attn_mask):
return (V.contiguous(), None)
# %% ../../nbs/models.vanillatransformer.ipynb 10
-class VanillaTransformer(BaseWindows):
+class VanillaTransformer(BaseModel):
"""VanillaTransformer
Vanilla Transformer, following implementation of the Informer paper, used as baseline.
@@ -128,10 +128,13 @@ class VanillaTransformer(BaseWindows):
"""
# Class attributes
- SAMPLING_TYPE = "windows"
EXOGENOUS_FUTR = True
EXOGENOUS_HIST = False
EXOGENOUS_STAT = False
+ MULTIVARIATE = False # If the model produces multivariate forecasts (True) or univariate (False)
+ RECURRENT = (
+ False # If the model produces forecasts recursively (True) or direct (False)
+ )
def __init__(
self,
@@ -290,14 +293,8 @@ def __init__(
def forward(self, windows_batch):
# Parse windows_batch
insample_y = windows_batch["insample_y"]
- # insample_mask = windows_batch['insample_mask']
- # hist_exog = windows_batch['hist_exog']
- # stat_exog = windows_batch['stat_exog']
-
futr_exog = windows_batch["futr_exog"]
- insample_y = insample_y.unsqueeze(-1) # [Ws,L,1]
-
if self.futr_exog_size > 0:
x_mark_enc = futr_exog[:, : self.input_size, :]
x_mark_dec = futr_exog[:, -(self.label_len + self.h) :, :]
@@ -314,5 +311,5 @@ def forward(self, windows_batch):
dec_out = self.dec_embedding(x_dec, x_mark_dec)
dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
- forecast = self.loss.domain_map(dec_out[:, -self.h :])
+ forecast = dec_out[:, -self.h :]
return forecast
diff --git a/neuralforecast/utils.py b/neuralforecast/utils.py
index 4a272dfcb..ab3ff1d5e 100644
--- a/neuralforecast/utils.py
+++ b/neuralforecast/utils.py
@@ -6,17 +6,16 @@
'HourOfDay', 'DayOfWeek', 'DayOfMonth', 'DayOfYear', 'MonthOfYear', 'WeekOfYear',
'time_features_from_frequency_str', 'augment_calendar_df', 'get_indexer_raise_missing',
'PredictionIntervals', 'add_conformal_distribution_intervals', 'add_conformal_error_intervals',
- 'get_prediction_interval_method']
+ 'get_prediction_interval_method', 'level_to_quantiles', 'quantiles_to_level']
# %% ../nbs/utils.ipynb 3
import random
from itertools import chain
-from typing import List, Union
+from typing import List, Union, Optional, Tuple
from utilsforecast.compat import DFType
import numpy as np
import pandas as pd
-import utilsforecast.processing as ufp
# %% ../nbs/utils.ipynb 6
def generate_series(
@@ -484,77 +483,113 @@ def __repr__(self):
# %% ../nbs/utils.ipynb 32
def add_conformal_distribution_intervals(
- fcst_df: DFType,
+ model_fcsts: np.array,
cs_df: DFType,
- model_names: List[str],
- level: List[Union[int, float]],
+ model: str,
cs_n_windows: int,
n_series: int,
horizon: int,
-) -> DFType:
+ level: Optional[List[Union[int, float]]] = None,
+ quantiles: Optional[List[float]] = None,
+) -> Tuple[np.array, List[str]]:
"""
Adds conformal intervals to a `fcst_df` based on conformal scores `cs_df`.
`level` should be already sorted. This strategy creates forecasts paths
based on errors and calculate quantiles using those paths.
"""
- fcst_df = ufp.copy_if_pandas(fcst_df, deep=False)
- alphas = [100 - lv for lv in level]
- cuts = [alpha / 200 for alpha in reversed(alphas)]
- cuts.extend(1 - alpha / 200 for alpha in alphas)
- for model in model_names:
- scores = cs_df[model].to_numpy().reshape(n_series, cs_n_windows, horizon)
- scores = scores.transpose(1, 0, 2)
- # restrict scores to horizon
- scores = scores[:, :, :horizon]
- mean = fcst_df[model].to_numpy().reshape(1, n_series, -1)
- scores = np.vstack([mean - scores, mean + scores])
- quantiles = np.quantile(
- scores,
- cuts,
- axis=0,
- )
- quantiles = quantiles.reshape(len(cuts), -1).T
+ assert (
+ level is not None or quantiles is not None
+ ), "Either level or quantiles must be provided"
+
+ if quantiles is None and level is not None:
+ alphas = [100 - lv for lv in level]
+ cuts = [alpha / 200 for alpha in reversed(alphas)]
+ cuts.extend(1 - alpha / 200 for alpha in alphas)
+ elif quantiles is not None:
+ cuts = quantiles
+
+ scores = cs_df[model].to_numpy().reshape(n_series, cs_n_windows, horizon)
+ scores = scores.transpose(1, 0, 2)
+ # restrict scores to horizon
+ scores = scores[:, :, :horizon]
+ mean = model_fcsts.reshape(1, n_series, -1)
+ scores = np.vstack([mean - scores, mean + scores])
+ scores_quantiles = np.quantile(
+ scores,
+ cuts,
+ axis=0,
+ )
+ scores_quantiles = scores_quantiles.reshape(len(cuts), -1).T
+ if quantiles is None and level is not None:
lo_cols = [f"{model}-lo-{lv}" for lv in reversed(level)]
hi_cols = [f"{model}-hi-{lv}" for lv in level]
out_cols = lo_cols + hi_cols
- fcst_df = ufp.assign_columns(fcst_df, out_cols, quantiles)
- return fcst_df
+ elif quantiles is not None:
+ out_cols = [f"{model}-ql{q}" for q in quantiles]
+
+ fcsts_with_intervals = np.hstack([model_fcsts, scores_quantiles])
+
+ return fcsts_with_intervals, out_cols
# %% ../nbs/utils.ipynb 33
def add_conformal_error_intervals(
- fcst_df: DFType,
+ model_fcsts: np.array,
cs_df: DFType,
- model_names: List[str],
- level: List[Union[int, float]],
+ model: str,
cs_n_windows: int,
n_series: int,
horizon: int,
-) -> DFType:
+ level: Optional[List[Union[int, float]]] = None,
+ quantiles: Optional[List[float]] = None,
+) -> Tuple[np.array, List[str]]:
"""
Adds conformal intervals to a `fcst_df` based on conformal scores `cs_df`.
`level` should be already sorted. This startegy creates prediction intervals
based on the absolute errors.
"""
- fcst_df = ufp.copy_if_pandas(fcst_df, deep=False)
- cuts = [lv / 100 for lv in level]
- for model in model_names:
- mean = fcst_df[model].to_numpy().ravel()
- scores = cs_df[model].to_numpy().reshape(n_series, cs_n_windows, horizon)
- scores = scores.transpose(1, 0, 2)
- # restrict scores to horizon
- scores = scores[:, :, :horizon]
- quantiles = np.quantile(
- scores,
- cuts,
- axis=0,
- )
- quantiles = quantiles.reshape(len(cuts), -1)
+ assert (
+ level is not None or quantiles is not None
+ ), "Either level or quantiles must be provided"
+
+ if quantiles is None and level is not None:
+ cuts = [lv / 100 for lv in level]
+ elif quantiles is not None:
+ cuts = quantiles
+
+ mean = model_fcsts.ravel()
+ scores = cs_df[model].to_numpy().reshape(n_series, cs_n_windows, horizon)
+ scores = scores.transpose(1, 0, 2)
+ # restrict scores to horizon
+ scores = scores[:, :, :horizon]
+ scores_quantiles = np.quantile(
+ scores,
+ cuts,
+ axis=0,
+ )
+ scores_quantiles = scores_quantiles.reshape(len(cuts), -1)
+ if quantiles is None and level is not None:
lo_cols = [f"{model}-lo-{lv}" for lv in reversed(level)]
hi_cols = [f"{model}-hi-{lv}" for lv in level]
- quantiles = np.vstack([mean - quantiles[::-1], mean + quantiles]).T
- columns = lo_cols + hi_cols
- fcst_df = ufp.assign_columns(fcst_df, columns, quantiles)
- return fcst_df
+ out_cols = lo_cols + hi_cols
+ scores_quantiles = np.vstack(
+ [mean - scores_quantiles[::-1], mean + scores_quantiles]
+ ).T
+ elif quantiles is not None:
+ out_cols = []
+ scores_quantiles_ls = []
+ for i, q in enumerate(quantiles):
+ out_cols.append(f"{model}-ql{q}")
+ if q < 0.5:
+ scores_quantiles_ls.append(mean - scores_quantiles[::-1][i])
+ elif q > 0.5:
+ scores_quantiles_ls.append(mean + scores_quantiles[i])
+ else:
+ scores_quantiles_ls.append(mean)
+ scores_quantiles = np.vstack(scores_quantiles_ls).T
+
+ fcsts_with_intervals = np.hstack([model_fcsts, scores_quantiles])
+
+ return fcsts_with_intervals, out_cols
# %% ../nbs/utils.ipynb 34
def get_prediction_interval_method(method: str):
@@ -568,3 +603,30 @@ def get_prediction_interval_method(method: str):
f'please choose one of {", ".join(available_methods.keys())}'
)
return available_methods[method]
+
+# %% ../nbs/utils.ipynb 35
+def level_to_quantiles(level: List[Union[int, float]]) -> List[float]:
+ """
+ Converts a list of levels to a list of quantiles.
+ """
+ level_set = set(level)
+ return sorted(
+ list(
+ set(sum([[(50 - l / 2) / 100, (50 + l / 2) / 100] for l in level_set], []))
+ )
+ )
+
+
+def quantiles_to_level(quantiles: List[float]) -> List[Union[int, float]]:
+ """
+ Converts a list of quantiles to a list of levels.
+ """
+ quantiles_set = set(quantiles)
+ return sorted(
+ set(
+ [
+ int(round(100 - 200 * (q * (q < 0.5) + (1 - q) * (q >= 0.5)), 2))
+ for q in quantiles_set
+ ]
+ )
+ )