Merge pull request #96 from jdb78/feature/minimal_categorical_prediction

jdb78 · web-flow · commit c07d5acc7db1 · 2020-10-11T13:54:53.000+01:00
Minimal categorical prediction
diff --git a/pytorch_forecasting/data/encoders.py b/pytorch_forecasting/data/encoders.py
@@ -130,6 +130,20 @@ def inverse_transform(self, y: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
         decoded = self.classes_vector_[y]
         return decoded
 
+    def __call__(self, data: (Dict[str, torch.Tensor])) -> torch.Tensor:
+        """
+        Extract prediction from network output. Does not map back to input
+        categories as this would require a numpy tensor without grad-abilities.
+
+        Args:
+            data (Dict[str, torch.Tensor]): Dictionary with entries
+                * prediction: data to de-scale
+
+        Returns:
+            torch.Tensor: prediction
+        """
+        return data["prediction"]
+
 
 class TorchNormalizer(BaseEstimator, TransformerMixin):
     """
diff --git a/pytorch_forecasting/data/timeseries.py b/pytorch_forecasting/data/timeseries.py
@@ -127,7 +127,7 @@ def __init__(
         Args:
             data: dataframe with sequence data - each row can be identified with ``time_idx`` and the ``group_ids``
             time_idx: integer column denoting the time index
-            target: column denoting the target or list of columns denoting the target
+            target: column denoting the target or list of columns denoting the target - categorical or continous.
             group_ids: list of column names identifying a timeseries
             weight: column name for weights or list of column names corresponding to each target
             max_encoder_length: maximum length to encode
diff --git a/pytorch_forecasting/metrics.py b/pytorch_forecasting/metrics.py
@@ -493,6 +493,19 @@ def loss(self, y_pred, target):
         return loss
 
 
+class CrossEntropy(MultiHorizonMetric):
+    """
+    Cross entropy loss for classification.
+    """
+
+    def loss(self, y_pred, target):
+
+        loss = F.cross_entropy(y_pred.view(-1, y_pred.size(-1)), target.view(-1), reduction="none").view(
+            -1, target.size(-1)
+        )
+        return loss
+
+
 class RMSE(MultiHorizonMetric):
     """
     Root mean square error
diff --git a/pytorch_forecasting/models/base_model.py b/pytorch_forecasting/models/base_model.py
@@ -14,12 +14,12 @@
 import torch
 import torch.nn as nn
 from torch.nn.utils import rnn
-from torch.optim.lr_scheduler import LambdaLR, OneCycleLR, ReduceLROnPlateau
+from torch.optim.lr_scheduler import LambdaLR, ReduceLROnPlateau
 from torch.utils.data import DataLoader
 from tqdm.notebook import tqdm
 
 from pytorch_forecasting.data import TimeSeriesDataSet
-from pytorch_forecasting.data.encoders import GroupNormalizer
+from pytorch_forecasting.data.encoders import EncoderNormalizer, GroupNormalizer
 from pytorch_forecasting.metrics import MASE, SMAPE, Metric
 from pytorch_forecasting.optim import Ranger
 from pytorch_forecasting.utils import groupby_apply
@@ -908,7 +908,7 @@ def plot_prediction_actual_by_variable(
                 scaler = self.dataset_parameters["scalers"][name]
                 x = np.linspace(-data["std"], data["std"], bins)
                 # reversing normalization for group normalizer is not possible without sample level information
-                if not isinstance(scaler, GroupNormalizer):
+                if not isinstance(scaler, (GroupNormalizer, EncoderNormalizer)):
                     x = scaler.inverse_transform(x)
                     ax.set_xlabel(f"Normalized {name}")
 
diff --git a/tests/test_models/conftest.py b/tests/test_models/conftest.py
@@ -79,23 +79,26 @@ def data_with_covariates():
         dict(target_normalizer=EncoderNormalizer(), min_encoder_length=2),
         dict(target_normalizer=GroupNormalizer(log_scale=True)),
         dict(target_normalizer=GroupNormalizer(groups=["agency", "sku"], coerce_positive=1.0)),
+        dict(target="agency"),
     ]
 )
 def multiple_dataloaders_with_coveratiates(data_with_covariates, request):
     training_cutoff = "2016-09-01"
     max_encoder_length = 36
     max_prediction_length = 6
 
+    params = request.param
+    params.setdefault("target", "volume")
+
     training = TimeSeriesDataSet(
         data_with_covariates[lambda x: x.date < training_cutoff],
         time_idx="time_idx",
-        target="volume",
         # weight="weight",
         group_ids=["agency", "sku"],
         max_encoder_length=max_encoder_length,
         max_prediction_length=max_prediction_length,
         add_relative_time_idx=True,
-        **request.param  # fixture parametrization
+        **params  # fixture parametrization
     )
 
     validation = TimeSeriesDataSet.from_dataset(
diff --git a/tests/test_models/test_temporal_fusion_transformer.py b/tests/test_models/test_temporal_fusion_transformer.py
@@ -9,7 +9,8 @@
 from torch.utils.data import dataloader
 
 from pytorch_forecasting import TimeSeriesDataSet
-from pytorch_forecasting.metrics import PoissonLoss, QuantileLoss
+from pytorch_forecasting.data import NaNLabelEncoder
+from pytorch_forecasting.metrics import CrossEntropy, PoissonLoss, QuantileLoss
 from pytorch_forecasting.models import TemporalFusionTransformer
 from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
 
@@ -52,18 +53,24 @@ def test_integration(multiple_dataloaders_with_coveratiates, tmp_path, gpus):
         cuda_context = nullcontext()
 
     with cuda_context:
+        if isinstance(train_dataloader.dataset.target_normalizer, NaNLabelEncoder):
+            output_size = len(train_dataloader.dataset.target_normalizer.classes_)
+            loss = CrossEntropy()
+        else:
+            output_size = 7
+            loss = QuantileLoss()
         net = TemporalFusionTransformer.from_dataset(
             train_dataloader.dataset,
             learning_rate=0.15,
             hidden_size=4,
             attention_head_size=1,
             dropout=0.2,
             hidden_continuous_size=2,
-            loss=QuantileLoss(),
-            output_size=7,
+            loss=loss,
             log_interval=5,
             log_val_interval=1,
             log_gradient_flow=True,
+            output_size=output_size,
             monotone_constaints=monotone_constaints,
         )
         net.size()