Merge pull request #108 from jdb78/fix/stack_variable_lengths_tensors

jdb78 · web-flow · commit c3992eeaf33f · 2020-10-17T18:56:16.000+01:00
Enable stacking of variable lengths tensors
diff --git a/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py b/pytorch_forecasting/models/temporal_fusion_transformer/__init__.py
@@ -21,7 +21,7 @@
     InterpretableMultiHeadAttention,
     VariableSelectionNetwork,
 )
-from pytorch_forecasting.utils import autocorrelation, get_embedding_size, integer_histogram
+from pytorch_forecasting.utils import autocorrelation, get_embedding_size, integer_histogram, padded_stack
 
 
 class TemporalFusionTransformer(BaseModel, CovariatesMixin):
@@ -791,7 +791,8 @@ def _log_interpretation(self, outputs, label="train"):
         """
         # extract interpretations
         interpretation = {
-            name: torch.stack([x["interpretation"][name] for x in outputs]).sum(0)
+            # use padded_stack because decoder length histogram can be of different length
+            name: padded_stack([x["interpretation"][name] for x in outputs], side="right", value=0).sum(0)
             for name in outputs[0]["interpretation"].keys()
         }
         # normalize attention with length histogram squared to account for: 1. zeros in attention and
diff --git a/pytorch_forecasting/utils.py b/pytorch_forecasting/utils.py
@@ -2,13 +2,11 @@
 Helper functions for PyTorch forecasting
 """
 from contextlib import redirect_stdout
-import functools
-import inspect
 import os
-import re
-from typing import Callable, Tuple, Union
+from typing import Callable, List, Tuple, Union
 
 import torch
+import torch.nn.functional as F
 from torch.nn.utils import rnn
 
 
@@ -202,3 +200,38 @@ def unpack_sequence(sequence: Union[torch.Tensor, rnn.PackedSequence]) -> Tuple[
     else:
         lengths = torch.ones(sequence.size(0), device=sequence.device, dtype=torch.long) * sequence.size(1)
     return sequence, lengths
+
+
+def padded_stack(
+    tensors: List[torch.Tensor], side: str = "right", mode: str = "constant", value: Union[int, float] = 0
+) -> torch.Tensor:
+    """
+    Stack tensors along first dimension and pad them along last dimension to ensure their size is equal.
+
+    Args:
+        tensors (List[torch.Tensor]): list of tensors to stack
+        side (str): side on which to pad - "left" or "right". Defaults to "right".
+        mode (str): 'constant', 'reflect', 'replicate' or 'circular'. Default: 'constant'
+        value (Union[int, float]): value to use for constant padding
+
+    Returns:
+        torch.Tensor: stacked tensor
+    """
+    full_size = max([x.size(-1) for x in tensors])
+
+    def make_padding(pad):
+        if side == "left":
+            return (pad, 0)
+        elif side == "right":
+            return (0, pad)
+        else:
+            raise ValueError(f"side for padding '{side}' is unknown")
+
+    out = torch.stack(
+        [
+            F.pad(x, make_padding(full_size - x.size(-1)), mode=mode, value=value) if full_size - x.size(-1) > 0 else x
+            for x in tensors
+        ],
+        dim=0,
+    )
+    return out