sktime
diff --git a/‎docs/source/tutorials/stallion.ipynb‎
Lines changed: 68 additions & 731 deletions b/‎docs/source/tutorials/stallion.ipynb‎
Lines changed: 68 additions & 731 deletions
diff --git a/‎pytorch_forecasting/data/timeseries.py‎
Lines changed: 89 additions & 39 deletions b/‎pytorch_forecasting/data/timeseries.py‎
Lines changed: 89 additions & 39 deletions
diff --git a/‎pytorch_forecasting/models/base_model.py‎
Lines changed: 18 additions & 9 deletions b/‎pytorch_forecasting/models/base_model.py‎
Lines changed: 18 additions & 9 deletions
diff --git a/‎pytorch_forecasting/models/nbeats/__init__.py‎
Lines changed: 5 additions & 3 deletions b/‎pytorch_forecasting/models/nbeats/__init__.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎pytorch_forecasting/models/temporal_fusion_transformer/__init__.py‎
Lines changed: 7 additions & 6 deletions b/‎pytorch_forecasting/models/temporal_fusion_transformer/__init__.py‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎pytorch_forecasting/utils.py‎
Lines changed: 3 additions & 2 deletions b/‎pytorch_forecasting/utils.py‎
Lines changed: 3 additions & 2 deletions
@@ -25,6 +25,54 @@
 from pytorch_forecasting.data.encoders import EncoderNormalizer, GroupNormalizer, NaNLabelEncoder, TorchNormalizer
 
 
+def _find_end_indices(diffs: np.ndarray, max_lengths: np.ndarray, min_length: int) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Identify end indices in series even if some values are missing.
+
+    Args:
+        diffs (np.ndarray): array of differences to next time step. nans should be filled up with ones
+        max_lengths (np.ndarray): maximum length of sequence by position.
+        min_length (int): minimum length of sequence.
+
+    Returns:
+        Tuple[np.ndarray, np.ndarray]: tuple of arrays where first is end indices and second is list of start
+            and end indices that are currently missing.
+    """
+    missing_start_ends = []
+    end_indices = []
+    length = 1
+    start_idx = 0
+    max_idx = len(diffs) - 1
+    max_length = max_lengths[start_idx]
+
+    for idx, diff in enumerate(diffs):
+        if length >= max_length:
+            while length >= max_length:
+                if length == max_length:
+                    end_indices.append(idx)
+                else:
+                    end_indices.append(idx - 1)
+                length -= diffs[start_idx]
+                if start_idx < max_idx:
+                    start_idx += 1
+                max_length = max_lengths[start_idx]
+        elif length >= min_length:
+            missing_start_ends.append([start_idx, idx])
+        length += diff
+    if len(missing_start_ends) > 0:  # required for numba compliance
+        return np.asarray(end_indices), np.asarray(missing_start_ends)
+    else:
+        return np.asarray(end_indices), np.empty((0, 2), dtype=np.int64)
+
+
+try:
+    import numba
+
+    _find_end_indices = numba.jit(nopython=True)(_find_end_indices)
+except ImportError:
+    pass
+
+
 class TimeSeriesDataSet(Dataset):
     """
     PyTorch Dataset for fitting timeseries models.
@@ -125,12 +173,16 @@ def __init__(
         """
         super().__init__()
         self.max_encoder_length = max_encoder_length
-        self.min_encoder_length = min_encoder_length or max_encoder_length
+        if min_encoder_length is None:
+            min_encoder_length = max_encoder_length
+        self.min_encoder_length = min_encoder_length
         assert (
             self.min_encoder_length <= self.max_encoder_length
         ), "max encoder length has to be larger equals min encoder length"
         self.max_prediction_length = max_prediction_length
-        self.min_prediction_length = min_prediction_length or max_prediction_length
+        if min_prediction_length is None:
+            min_prediction_length = max_prediction_length
+        self.min_prediction_length = min_prediction_length
         assert (
             self.min_prediction_length <= self.max_prediction_length
         ), "max prediction length has to be larger equals min prediction length"
@@ -155,7 +207,9 @@ def __init__(
             else:
                 randomize_length = (0.2, 0.05)
         self.randomize_length = randomize_length
-        self.min_prediction_idx = min_prediction_idx or data[self.time_idx].min()
+        if min_prediction_idx is None:
+            min_prediction_idx = data[self.time_idx].min()
+        self.min_prediction_idx = min_prediction_idx
         self.constant_fill_strategy = {} if len(constant_fill_strategy) == 0 else constant_fill_strategy
         self.predict_mode = predict_mode
         self.allow_missings = allow_missings
@@ -623,52 +677,54 @@ def _construct_index(self, data: pd.DataFrame, predict_mode: bool) -> pd.DataFra
         df_index["count"] = (df_index["time_last"] - df_index["time_first"]).astype(int) + 1
         df_index["group_id"] = g.ngroup()
 
+        min_sequence_length = self.min_prediction_length + self.min_encoder_length
+        max_sequence_length = self.max_prediction_length + self.max_encoder_length
+
         # calculate maximum index to include from current index_start
-        max_time = (df_index["time"] + self.max_encoder_length + self.max_prediction_length).clip(
-            upper=df_index["count"] + df_index.time_first
-        )
+        max_time = (df_index["time"] + max_sequence_length - 1).clip(upper=df_index["count"] + df_index.time_first - 1)
 
         # if there are missing timesteps, we cannot say directly what is the last timestep to include
         # therefore we iterate until it is found
         if (df_index["time_diff_to_next"] != 1).any():
             assert (
                 self.allow_missings
             ), "Time difference between steps has been idenfied as larger than 1 - set allow_missings=True"
-            df_index["index_end"] = df_index["index_start"]
-            for _ in range(df_index["count"].max()):
-                new_end_time = (
-                    df_index[["time", "time_diff_to_next"]].iloc[df_index["index_end"]].sum(axis=1).to_numpy()
-                )
-                df_index["index_end"] = df_index["index_end"].where(
-                    new_end_time + 1 > max_time, df_index["index_end"] + 1
-                )
-        else:
-            # direct calculation of end index if there are no missing timesteps in the data
-            df_index["index_end"] = df_index["index_start"] + (max_time - df_index["time"] - 1)
+
+        df_index["index_end"], missing_sequences = _find_end_indices(
+            diffs=df_index.time_diff_to_next.to_numpy(),
+            max_lengths=(max_time - df_index.time).to_numpy() + 1,
+            min_length=min_sequence_length,
+        )
+        # add duplicates but mostly with shorter sequence length for start of timeseries
+        # while the previous steps have ensured that we start a sequence on every time step, the missing_sequences
+        # ensure that there is a sequence that finishes on every timestep
+        if len(missing_sequences) > 0:
+            shortened_sequences = df_index.iloc[missing_sequences[:, 0]].assign(index_end=missing_sequences[:, 1])
+
+            # concatenate shortened sequences
+            df_index = pd.concat([df_index, shortened_sequences], axis=0, ignore_index=True)
 
         # filter out where encode and decode length are not satisfied
         df_index["sequence_length"] = df_index["time"].iloc[df_index["index_end"]].to_numpy() - df_index["time"] + 1
 
         # filter too short sequences
         df_index = df_index[
             # sequence must be at least of minimal prediction length
-            lambda x: (x.sequence_length >= self.min_prediction_length + self.min_encoder_length)
+            lambda x: (x.sequence_length >= min_sequence_length)
             &
             # prediction must be for after minimal prediction index + length of prediction
-            (x["sequence_length"] + x["time"] - 1 >= self.min_prediction_idx - 1 + self.min_prediction_length)
+            (x["sequence_length"] + x["time"] >= self.min_prediction_idx + self.min_prediction_length)
         ]
-        # todo: add duplicates for
-        # (x.sequence length > self.min_prediction_length + self.min_encoder_length) &
-        # (x.time - x.time_start < self.max_prediction_length + self.max_encoder_length)
 
         if predict_mode:  # keep longest element per series (i.e. the first element that spans to the end of the series)
             # filter all elements that are longer than the allowed maximum sequence length
             df_index = df_index[
-                lambda x: (x["time_last"] - x["time"] + 1 <= self.max_prediction_length + self.max_encoder_length)
-                & (x["sequence_length"] >= self.min_prediction_length + self.min_encoder_length)
+                lambda x: (x["time_last"] - x["time"] + 1 <= max_sequence_length)
+                & (x["sequence_length"] >= min_sequence_length)
             ]
             # choose longest sequence
             df_index = df_index.loc[df_index.groupby("group_id").sequence_length.idxmax()]
+
         assert len(df_index) > 0, "filters should not remove entries"
 
         return df_index
@@ -690,8 +746,10 @@ def plot_randomization(
         """
         if betas is None:
             betas = self.randomize_length
-        length = length or self.max_encoder_length
-        min_length = min_length or self.min_encoder_length
+        if length is None:
+            length = self.max_encoder_length
+        if min_length is None:
+            min_length = self.min_encoder_length
         probabilities = Beta(betas[0], betas[1]).sample((1000,))
 
         lengths = ((length - min_length) * probabilities).round() + min_length
@@ -1050,27 +1108,19 @@ def to_dataloader(
             **kwargs,
         )
 
-    def get_index(self) -> pd.DataFrame:
+    def x_to_index(self, x) -> pd.DataFrame:
         """
-        Data index / order in which items are returned in train=False mode by dataloader.
+        Decode dataframe index from x.
 
         Returns:
             dataframe with time index column for first prediction and group ids
         """
-        decoder_length = pd.DataFrame(
-            dict(
-                prediction_idx=self.data["time"][self.index.index_end.to_numpy()] - (self.min_prediction_idx - 1),
-                sequence_length=self.index.sequence_length,
-                max_prediction_length=self.max_prediction_length,
-            )
-        ).min(axis=1)
-        encoder_lengths = self.index.sequence_length - decoder_length
-        index_data = {self.time_idx: self.index.time + encoder_lengths}
+        index_data = {self.time_idx: x["decoder_time_idx"][:, 0]}
         for id in self.group_ids:
-            index_data[id] = self.data["groups"][:, self.group_ids.index(id)][self.index.index_start.to_numpy()]
+            index_data[id] = x["groups"][:, self.group_ids.index(id)]
             # decode if possible
             index_data[id] = self.transform_values(id, index_data[id], inverse=True)
-        index = pd.DataFrame(index_data, index=self.index.index)
+        index = pd.DataFrame(index_data)
         return index
 
 
 
@@ -13,6 +13,7 @@
 from pytorch_lightning.metrics.metric import TensorMetric
 from pytorch_lightning.utilities.parsing import get_init_args
 import torch
+import torch.nn as nn
 from torch.nn.utils import rnn
 from torch.optim.lr_scheduler import LambdaLR, OneCycleLR, ReduceLROnPlateau
 from torch.utils.data import DataLoader
@@ -57,7 +58,7 @@ def __init__(
         learning_rate: Union[float, List[float]] = 1e-3,
         log_gradient_flow: bool = False,
         loss: TensorMetric = SMAPE(),
-        logging_metrics: List[TensorMetric] = [],
+        logging_metrics: nn.ModuleList = nn.ModuleList([]),
         reduce_on_plateau_patience: int = 1000,
         weight_decay: float = 0.0,
         monotone_constaints: Dict[str, int] = {},
@@ -76,7 +77,8 @@ def __init__(
             log_gradient_flow (bool): If to log gradient flow, this takes time and should be only done to diagnose
                 training failures. Defaults to False.
             loss (TensorMetric, optional): metric to optimize. Defaults to SMAPE().
-            logging_metrics: (List[TensorMetric], optional): list of metrics to log.
+            logging_metrics (nn.ModuleList[MultiHorizonMetric]): list of metrics that are logged during training.
+                Defaults to [].
             reduce_on_plateau_patience (int): patience after which learning rate is reduced by a factor of 10. Defaults
                 to 1000
             weight_decay (float): weight decay. Defaults to 0.0.
@@ -102,7 +104,7 @@ def __init__(
         if not hasattr(self, "loss"):
             self.loss = loss
         if not hasattr(self, "logging_metrics"):
-            self.logging_metrics = logging_metrics
+            self.logging_metrics = nn.ModuleList([l for l in logging_metrics])
         if not hasattr(self, "output_transformer"):
             self.output_transformer = output_transformer
 
@@ -557,6 +559,7 @@ def predict(
         output = []
         decode_lenghts = []
         x_list = []
+        index = []
         progress_bar = tqdm(desc="Predict", unit=" batches", total=len(dataloader), disable=not show_progress_bar)
         with torch.no_grad():
             for x, _ in dataloader:
@@ -596,6 +599,8 @@ def predict(
                 output.append(out)
                 if return_x:
                     x_list.append(x)
+                if return_index:
+                    index.append(dataloader.dataset.x_to_index(x))
                 progress_bar.update()
                 if fast_dev_run:
                     break
@@ -619,7 +624,7 @@ def predict(
             x_cat = x_cat
             output.append(x_cat)
         if return_index:
-            output.append(dataloader.dataset.get_index())
+            output.append(pd.concat(index, axis=0, ignore_index=True))
         if return_decoder_lengths:
             output.append(torch.cat(decode_lenghts, dim=0))
         return output
@@ -645,7 +650,7 @@ def predict_dependency(
             mode (str, optional): Output mode. Defaults to "dataframe". Either
 
                 * "series": values are average prediction and index are probed values
-                * "dataframe": columns are as obtained by the `dataset.get_index()` method,
+                * "dataframe": columns are as obtained by the `dataset.x_to_index()` method,
                     prediction (which is the mean prediction over the time horizon),
                     normalized_prediction (which are predictions devided by the prediction for the first probed value)
                     the variable name for the probed values
@@ -668,12 +673,17 @@ def predict_dependency(
 
         results = []
         progress_bar = tqdm(desc="Predict", unit=" batches", total=len(values), disable=not show_progress_bar)
-        for value in values:
+        for idx, value in enumerate(values):
             # set values
             data.set_overwrite_values(variable=variable, values=value, target=target)
             # predict
             kwargs.setdefault("mode", "prediction")
-            results.append(self.predict(data, **kwargs))
+
+            if idx == 0 and mode == "dataframe":  # need index for returning as dataframe
+                res, index = self.predict(data, return_index=True, **kwargs)
+                results.append(res)
+            else:
+                results.append(self.predict(data, **kwargs))
             # increment progress
             progress_bar.update()
 
@@ -694,9 +704,8 @@ def predict_dependency(
             results = results.sum(-1) / (~is_nan).float().sum(-1)
 
             # create dataframe
-            dependencies = data.get_index()
             dependencies = (
-                dependencies.iloc[np.tile(np.arange(len(dependencies)), len(values))]
+                index.iloc[np.tile(np.arange(len(index)), len(values))]
                 .reset_index(drop=True)
                 .assign(prediction=results.flatten())
             )
 
@@ -33,6 +33,7 @@ def __init__(
         loss=SMAPE(),
         reduce_on_plateau_patience: int = 1000,
         backcast_loss_ratio: float = 0.0,
+        logging_metrics: nn.ModuleList = nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()]),
         **kwargs,
     ):
         """
@@ -76,11 +77,12 @@ def __init__(
             log_gradient_flow: if to log gradient flow, this takes time and should be only done to diagnose training
                 failures
             reduce_on_plateau_patience (int): patience after which learning rate is reduced by a factor of 10
+            logging_metrics (nn.ModuleList[MultiHorizonMetric]): list of metrics that are logged during training.
+                Defaults to nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE(), MASE()])
+            **kwargs: additional arguments to :py:class:`~BaseModel`.
         """
         self.save_hyperparameters()
-        self.logging_metrics = [SMAPE(), MAE(), RMSE(), MAPE(), MASE()]
-        super().__init__(**kwargs)
-        self.loss = loss
+        super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
 
         # setup stacks
         self.net_blocks = nn.ModuleList()
 
@@ -5,6 +5,7 @@
 
 from matplotlib import pyplot as plt
 import numpy as np
+from pytorch_lightning.metrics.metric import TensorMetric
 import torch
 from torch import nn
 from torch.nn.utils import rnn
@@ -55,7 +56,8 @@ def __init__(
         reduce_on_plateau_patience: int = 1000,
         monotone_constaints: Dict[str, int] = {},
         share_single_variable_networks: bool = False,
-        output_transformer: Callable = None,
+        logging_metrics: nn.ModuleList = nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE()]),
+        **kwargs,
     ):
         """
         Temporal Fusion Transformer for forecasting timeseries - use its :py:meth:`~from_dataset` method if possible.
@@ -122,14 +124,14 @@ def __init__(
                 This constraint significantly slows down training. Defaults to {}.
             share_single_variable_networks (bool): if to share the single variable networks between the encoder and
                 decoder. Defaults to False.
+            logging_metrics (nn.ModuleList[MultiHorizonMetric]): list of metrics that are logged during training.
+                Defaults to nn.ModuleList([SMAPE(), MAE(), RMSE(), MAPE()]).
+            **kwargs: additional arguments to :py:class:`~BaseModel`.
         """
         self.save_hyperparameters()
-        super().__init__()
         # store loss function separately as it is a module
         assert isinstance(loss, MultiHorizonMetric), "Loss has to of class `MultiHorizonMetric`"
-        self.loss = loss
-        self.output_transformer = output_transformer
-        self.logging_metrics = [SMAPE(), MAE(), RMSE(), MAPE(), MASE()]
+        super().__init__(loss=loss, logging_metrics=logging_metrics, **kwargs)
 
         # processing inputs
         # embeddings
@@ -201,7 +203,6 @@ def __init__(
         )
 
         # create single variable grns that are shared across decoder and encoder
-
         if self.hparams.share_single_variable_networks:
             self.shared_single_variable_grns = nn.ModuleDict()
             for name, input_size in encoder_input_sizes.items():
 
@@ -2,13 +2,14 @@
 Helper functions for PyTorch forecasting
 """
 from contextlib import redirect_stdout
-import io
+import functools
+import inspect
 import os
+import re
 from typing import Callable, Tuple, Union
 
 import torch
 from torch.nn.utils import rnn
-from torch.tensor import Tensor
 
 
 def integer_histogram(