Skip to content

Commit 884106c

Browse files
authored
Merge pull request #380 from jdb78/feature/simple_models
Add recurrent and mlp models
2 parents 48179d2 + 66cf2e9 commit 884106c

File tree

13 files changed

+823
-32
lines changed

13 files changed

+823
-32
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@
55
### Added
66

77
- Adding a filter functionality to the timeseries datasset (#329)
8+
- Add simple models such as LSTM, GRU and a MLP on the decoder (#380)
9+
- Allow usage of any torch optimizer such as SGD (#380)
810

911
### Fixed
1012

1113
- Moving predictions to CPU to avoid running out of memory (#329)
1214
- Correct determination of `output_size` for multi-target forecasting with the TemporalFusionTransformer (#328)
1315
- Tqdm autonotebook fix to work outside of Jupyter (#338)
16+
- Fix issue with yaml serialization for TensorboardLogger (#379)
1417

1518
### Contributors
1619

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ documentation with detailed tutorials.
4949
methods in the M4 competition. The M4 competition is arguably the most important benchmark for univariate time series forecasting.
5050
- [DeepAR: Probabilistic forecasting with autoregressive recurrent networks](https://www.sciencedirect.com/science/article/pii/S0169207019301888)
5151
which is the one of the most popular forecasting algorithms and is often used as a baseline
52+
- A baseline model that always predicts the latest known value
53+
- Simple standard networks for baselining: LSTM and GRU networks as well as a MLP on the decoder
5254

5355
To implement new models, see the [How to implement new models tutorial](https://pytorch-forecasting.readthedocs.io/en/latest/tutorials/building.html).
5456
It covers basic as well as advanced architectures.

docs/source/models.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ and you should take into account. Here is an overview over the pros and cons of
2424
.. csv-table:: Model comparison
2525
:header: "Name", "Covariates", "Multiple targets", "Regression", "Classification", "Probabilistic", "Uncertainty", "Interactions between series", "Flexible history length", "Cold-start", "Required computational resources (1-5, 5=most)"
2626

27+
:py:class:`~pytorch_forecasting.models.rnn.RecurrentNetwork`, "x", "x", "x", "", "", "", "", "x", "", 2
28+
:py:class:`~pytorch_forecasting.models.mlp.DecoderMLP`, "x", "x", "x", "x", "", "x", "", "x", "x", 1
2729
:py:class:`~pytorch_forecasting.models.nbeats.NBeats`, "", "", "x", "", "", "", "", "", "", 1
2830
:py:class:`~pytorch_forecasting.models.deepar.DeepAR`, "x", "x", "x", "", "x", "x", "", "x", "", 3
2931
:py:class:`~pytorch_forecasting.models.temporal_fusion_transformer.TemporalFusionTransformer`, "x", "x", "x", "x", "", "x", "", "x", "x", 4

pytorch_forecasting/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@
3636
Baseline,
3737
BaseModel,
3838
BaseModelWithCovariates,
39+
DecoderMLP,
3940
DeepAR,
4041
MultiEmbedding,
4142
NBeats,
43+
RecurrentNetwork,
4244
TemporalFusionTransformer,
4345
get_rnn,
4446
)
@@ -85,6 +87,8 @@
8587
"get_embedding_size",
8688
"create_mask",
8789
"to_list",
90+
"RecurrentNetwork",
91+
"DecoderMLP",
8892
]
8993

9094
__version__ = "0.0.0"

pytorch_forecasting/models/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,16 @@
99
)
1010
from pytorch_forecasting.models.baseline import Baseline
1111
from pytorch_forecasting.models.deepar import DeepAR
12+
from pytorch_forecasting.models.mlp import DecoderMLP
1213
from pytorch_forecasting.models.nbeats import NBeats
1314
from pytorch_forecasting.models.nn import GRU, LSTM, MultiEmbedding, get_rnn
15+
from pytorch_forecasting.models.rnn import RecurrentNetwork
1416
from pytorch_forecasting.models.temporal_fusion_transformer import TemporalFusionTransformer
1517

1618
__all__ = [
1719
"NBeats",
1820
"TemporalFusionTransformer",
21+
"RecurrentNetwork",
1922
"DeepAR",
2023
"BaseModel",
2124
"Baseline",
@@ -26,4 +29,5 @@
2629
"LSTM",
2730
"GRU",
2831
"MultiEmbedding",
32+
"DecoderMLP",
2933
]

pytorch_forecasting/models/base_model.py

Lines changed: 89 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,16 @@
2020

2121
from pytorch_forecasting.data import TimeSeriesDataSet
2222
from pytorch_forecasting.data.encoders import EncoderNormalizer, GroupNormalizer, MultiNormalizer, NaNLabelEncoder
23-
from pytorch_forecasting.metrics import MASE, SMAPE, DistributionLoss, Metric, MultiLoss
23+
from pytorch_forecasting.metrics import (
24+
MAE,
25+
MASE,
26+
SMAPE,
27+
DistributionLoss,
28+
Metric,
29+
MultiHorizonMetric,
30+
MultiLoss,
31+
QuantileLoss,
32+
)
2433
from pytorch_forecasting.optim import Ranger
2534
from pytorch_forecasting.utils import apply_to_list, create_mask, get_embedding_size, groupby_apply, to_list
2635

@@ -154,6 +163,7 @@ def __init__(
154163
reduce_on_plateau_patience: int = 1000,
155164
reduce_on_plateau_min_lr: float = 1e-5,
156165
weight_decay: float = 0.0,
166+
optimizer_params: Dict[str, Any] = None,
157167
monotone_constaints: Dict[str, int] = {},
158168
output_transformer: Callable = None,
159169
optimizer="ranger",
@@ -177,14 +187,16 @@ def __init__(
177187
reduce_on_plateau_min_lr (float): minimum learning rate for reduce on plateua learning rate scheduler.
178188
Defaults to 1e-5
179189
weight_decay (float): weight decay. Defaults to 0.0.
190+
optimizer_params (Dict[str, Any]): additional parameters for the optimizer. Defaults to {}.
180191
monotone_constaints (Dict[str, int]): dictionary of monotonicity constraints for continuous decoder
181192
variables mapping
182193
position (e.g. ``"0"`` for first position) to constraint (``-1`` for negative and ``+1`` for positive,
183194
larger numbers add more weight to the constraint vs. the loss but are usually not necessary).
184195
This constraint significantly slows down training. Defaults to {}.
185196
output_transformer (Callable): transformer that takes network output and transforms it to prediction space.
186197
Defaults to None which is equivalent to ``lambda out: out["prediction"]``.
187-
optimizer (str): Optimizer, "ranger", "adam" or "adamw". Defaults to "ranger".
198+
optimizer (str): Optimizer, "ranger", "sgd", "adam", "adamw" or class name of optimizer in ``torch.optim``.
199+
Defaults to "ranger".
188200
"""
189201
super().__init__()
190202
# update hparams
@@ -203,6 +215,21 @@ def __init__(
203215
if not hasattr(self, "output_transformer"):
204216
self.output_transformer = output_transformer
205217

218+
@property
219+
def n_targets(self) -> int:
220+
"""
221+
Number of targets to forecast.
222+
223+
Based on loss function.
224+
225+
Returns:
226+
int: number of targets
227+
"""
228+
if isinstance(self.loss, MultiLoss):
229+
return len(self.loss.metrics)
230+
else:
231+
return 1
232+
206233
def transform_output(self, out: Dict[str, torch.Tensor]) -> torch.Tensor:
207234
"""
208235
Extract prediction from network output and rescale it to real space / de-normalize it.
@@ -251,6 +278,52 @@ def transform_output(self, out: Dict[str, torch.Tensor]) -> torch.Tensor:
251278
out = self.output_transformer(out)
252279
return out
253280

281+
@staticmethod
282+
def deduce_default_output_parameters(
283+
dataset: TimeSeriesDataSet, kwargs: Dict[str, Any], default_loss: MultiHorizonMetric = None
284+
) -> Dict[str, Any]:
285+
"""
286+
Deduce default parameters for output for `from_dataset()` method.
287+
288+
Determines ``output_size`` and ``loss`` parameters.
289+
290+
Args:
291+
dataset (TimeSeriesDataSet): timeseries dataset
292+
kwargs (Dict[str, Any]): current hyperparameters
293+
default_loss (MultiHorizonMetric, optional): default loss function.
294+
Defaults to :py:class:`~pytorch_forecasting.metrics.MAE`.
295+
296+
Returns:
297+
Dict[str, Any]: dictionary with ``output_size`` and ``loss``.
298+
"""
299+
# infer output size
300+
def get_output_size(normalizer, loss):
301+
if isinstance(loss, QuantileLoss):
302+
return len(loss.quantiles)
303+
elif isinstance(normalizer, NaNLabelEncoder):
304+
return len(normalizer.classes_)
305+
else:
306+
return 1
307+
308+
# handle multiple targets
309+
new_kwargs = {}
310+
n_targets = len(dataset.target_names)
311+
if default_loss is None:
312+
default_loss = MAE()
313+
loss = kwargs.get("loss", default_loss)
314+
if n_targets > 1: # try to infer number of ouput sizes
315+
if not isinstance(loss, MultiLoss):
316+
loss = MultiLoss([deepcopy(loss)] * n_targets)
317+
new_kwargs["loss"] = loss
318+
if isinstance(loss, MultiLoss) and "output_size" not in kwargs:
319+
new_kwargs["output_size"] = [
320+
get_output_size(normalizer, l)
321+
for normalizer, l in zip(dataset.target_normalizer.normalizers, loss.metrics)
322+
]
323+
elif "output_size" not in kwargs:
324+
new_kwargs["output_size"] = get_output_size(dataset.target_normalizer, loss)
325+
return new_kwargs
326+
254327
def size(self) -> int:
255328
"""
256329
get number of parameters in model
@@ -673,6 +746,10 @@ def configure_optimizers(self):
673746
Tuple[List]: first entry is list of optimizers and second is list of schedulers
674747
"""
675748
# either set a schedule of lrs or find it dynamically
749+
if self.hparams.optimizer_params is None:
750+
optimizer_params = {}
751+
else:
752+
optimizer_params = self.hparams.optimizer_params
676753
if isinstance(self.hparams.learning_rate, (list, tuple)): # set schedule
677754
lrs = self.hparams.learning_rate
678755
if self.hparams.optimizer == "adam":
@@ -681,8 +758,17 @@ def configure_optimizers(self):
681758
optimizer = torch.optim.AdamW(self.parameters(), lr=lrs[0])
682759
elif self.hparams.optimizer == "ranger":
683760
optimizer = Ranger(self.parameters(), lr=lrs[0], weight_decay=self.hparams.weight_decay)
761+
elif self.hparams.optimizer == "sgd":
762+
optimizer = torch.optim.SGD(
763+
self.parameters(), lr=lrs[0], weight_decay=self.hparams.weight_decay, **optimizer_params
764+
)
684765
else:
685-
raise ValueError(f"Optimizer of self.hparams.optimizer={self.hparams.optimizer} unknown")
766+
try:
767+
optimizer = getattr(torch.optim, self.hparams.optimizer)(
768+
self.parameters(), lr=lrs[0], weight_decay=self.hparams.weight_decay, **optimizer_params
769+
)
770+
except AttributeError:
771+
raise ValueError(f"Optimizer of self.hparams.optimizer={self.hparams.optimizer} unknown")
686772
# normalize lrs
687773
lrs = np.array(lrs) / lrs[0]
688774
schedulers = [

pytorch_forecasting/models/deepar/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,6 @@ def from_dataset(
176176
Returns:
177177
DeepAR network
178178
"""
179-
# assert fixed encoder and decoder length for the moment
180179
new_kwargs = {}
181180
if dataset.multi_target:
182181
new_kwargs.setdefault("loss", MultiLoss([NormalDistributionLoss()] * len(dataset.target_names)))
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
"""
2+
Simple models based on fully connected networks
3+
"""
4+
5+
6+
from typing import Dict, List, Tuple, Union
7+
8+
import numpy as np
9+
import torch
10+
from torch import nn
11+
12+
from pytorch_forecasting.data import TimeSeriesDataSet
13+
from pytorch_forecasting.metrics import MAE, MAPE, MASE, RMSE, SMAPE, MultiHorizonMetric, QuantileLoss
14+
from pytorch_forecasting.models.base_model import BaseModelWithCovariates
15+
from pytorch_forecasting.models.mlp.submodules import FullyConnectedModule
16+
from pytorch_forecasting.models.nn.embeddings import MultiEmbedding
17+
18+
19+
class DecoderMLP(BaseModelWithCovariates):
20+
"""
21+
MLP on the decoder.
22+
23+
MLP that predicts output only based on information available in the decoder.
24+
"""
25+
26+
def __init__(
27+
self,
28+
activation_class: str = "ReLU",
29+
hidden_size: int = 300,
30+
n_hidden_layers: int = 3,
31+
dropout: float = 0.1,
32+
norm: bool = True,
33+
static_categoricals: List[str] = [],
34+
static_reals: List[str] = [],
35+
time_varying_categoricals_encoder: List[str] = [],
36+
time_varying_categoricals_decoder: List[str] = [],
37+
categorical_groups: Dict[str, List[str]] = {},
38+
time_varying_reals_encoder: List[str] = [],
39+
time_varying_reals_decoder: List[str] = [],
40+
embedding_sizes: Dict[str, Tuple[int, int]] = {},
41+
embedding_paddings: List[str] = [],
42+
embedding_labels: Dict[str, np.ndarray] = {},
43+
x_reals: List[str] = [],
44+
x_categoricals: List[str] = [],
45+
output_size: Union[int, List[int]] = 1,
46+
target: Union[str, List[str]] = None,
47+
loss: MultiHorizonMetric = None,
48+
logging_metrics: nn.ModuleList = None,
49+
**kwargs,
50+
):
51+
"""
52+
Args:
53+
activation_class (str, optional): PyTorch activation class. Defaults to "ReLU".
54+
hidden_size (int, optional): hidden recurrent size - the most important hyperparameter along with
55+
``n_hidden_layers``. Defaults to 10.
56+
n_hidden_layers (int, optional): Number of hidden layers - important hyperparameter. Defaults to 2.
57+
dropout (float, optional): Dropout. Defaults to 0.1.
58+
norm (bool, optional): if to use normalization in the MLP. Defaults to True.
59+
static_categoricals: integer of positions of static categorical variables
60+
static_reals: integer of positions of static continuous variables
61+
time_varying_categoricals_encoder: integer of positions of categorical variables for encoder
62+
time_varying_categoricals_decoder: integer of positions of categorical variables for decoder
63+
time_varying_reals_encoder: integer of positions of continuous variables for encoder
64+
time_varying_reals_decoder: integer of positions of continuous variables for decoder
65+
categorical_groups: dictionary where values
66+
are list of categorical variables that are forming together a new categorical
67+
variable which is the key in the dictionary
68+
x_reals: order of continuous variables in tensor passed to forward function
69+
x_categoricals: order of categorical variables in tensor passed to forward function
70+
embedding_sizes: dictionary mapping (string) indices to tuple of number of categorical classes and
71+
embedding size
72+
embedding_paddings: list of indices for embeddings which transform the zero's embedding to a zero vector
73+
embedding_labels: dictionary mapping (string) indices to list of categorical labels
74+
output_size (Union[int, List[int]], optional): number of outputs (e.g. number of quantiles for
75+
QuantileLoss and one target or list of output sizes).
76+
target (str, optional): Target variable or list of target variables. Defaults to None.
77+
loss (MultiHorizonMetric, optional): loss: loss function taking prediction and targets.
78+
Defaults to QuantileLoss.
79+
logging_metrics (nn.ModuleList, optional): Metrics to log during training.
80+
Defaults to nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()]).
81+
"""
82+
if loss is None:
83+
loss = QuantileLoss()
84+
if logging_metrics is None:
85+
logging_metrics = nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()])
86+
self.save_hyperparameters()
87+
# store loss function separately as it is a module
88+
super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
89+
90+
self.input_embeddings = MultiEmbedding(
91+
embedding_sizes={
92+
name: val
93+
for name, val in embedding_sizes.items()
94+
if name in self.decoder_variables + self.static_variables
95+
},
96+
embedding_paddings=embedding_paddings,
97+
categorical_groups=categorical_groups,
98+
x_categoricals=x_categoricals,
99+
)
100+
# define network
101+
if isinstance(self.hparams.output_size, int):
102+
mlp_output_size = self.hparams.output_size
103+
else:
104+
mlp_output_size = sum(self.hparams.output_size)
105+
106+
cont_size = len(self.decoder_reals_positions)
107+
cat_size = sum([emb.embedding_dim for emb in self.input_embeddings.values()])
108+
input_size = cont_size + cat_size
109+
110+
self.mlp = FullyConnectedModule(
111+
dropout=dropout,
112+
norm=self.hparams.norm,
113+
activation_class=getattr(nn, self.hparams.activation_class),
114+
input_size=input_size,
115+
output_size=mlp_output_size,
116+
hidden_size=self.hparams.hidden_size,
117+
n_hidden_layers=self.hparams.n_hidden_layers,
118+
)
119+
120+
@property
121+
def decoder_reals_positions(self) -> List[int]:
122+
return [
123+
self.hparams.x_reals.index(name)
124+
for name in self.reals
125+
if name in self.decoder_variables + self.static_variables
126+
]
127+
128+
def forward(self, x: Dict[str, torch.Tensor], n_samples: int = None) -> Dict[str, torch.Tensor]:
129+
"""
130+
Forward network
131+
"""
132+
# x is a batch generated based on the TimeSeriesDataset
133+
batch_size = x["decoder_lengths"].size(0)
134+
embeddings = self.input_embeddings(x["decoder_cat"]) # returns dictionary with embedding tensors
135+
network_input = torch.cat(
136+
[x["decoder_cont"][..., self.decoder_reals_positions]] + list(embeddings.values()),
137+
dim=-1,
138+
)
139+
prediction = self.mlp(network_input.view(-1, self.mlp.input_size)).view(
140+
batch_size, network_input.size(1), self.mlp.output_size
141+
)
142+
143+
# cut prediction into pieces for multiple targets
144+
if self.n_targets > 1:
145+
prediction = torch.split(prediction, self.hparams.output_size, dim=-1)
146+
147+
# We need to return a dictionary that at least contains the prediction and the target_scale.
148+
# The parameter can be directly forwarded from the input.
149+
return dict(prediction=prediction, target_scale=x["target_scale"])
150+
151+
@classmethod
152+
def from_dataset(cls, dataset: TimeSeriesDataSet, **kwargs):
153+
new_kwargs = cls.deduce_default_output_parameters(dataset, kwargs, QuantileLoss())
154+
kwargs.update(new_kwargs)
155+
return super().from_dataset(dataset, **kwargs)

0 commit comments

Comments
 (0)