Merge pull request #160 from JakeForsey/feature/beta-distribution-metric

jdb78 · web-flow · commit e5c8698e3be9 · 2020-12-03T10:21:31.000Z
Feature/beta distribution metric
diff --git a/pytorch_forecasting/data/encoders.py b/pytorch_forecasting/data/encoders.py
@@ -230,10 +230,17 @@ def preprocess(self, y: Union[pd.Series, np.ndarray, torch.Tensor]) -> Union[np.
         Returns:
             Union[np.ndarray, torch.Tensor]: return rescaled series with type depending on input type
         """
-        y = y + self.eps
         if self.transformation is None:
-            pass
-        elif isinstance(y, torch.Tensor):
+            return y
+
+        # protect against numerical instabilities
+        if isinstance(self.transformation, str) and self.transformation == "logit":
+            # need to apply eps slightly differently
+            y = y / (1 + 2 * self.eps) + self.eps
+        else:
+            y = y + self.eps
+
+        if isinstance(y, torch.Tensor):
             y = self.TRANSFORMATIONS.get(self.transformation, self.transformation)[0](y)
         else:
             # convert first to tensor, then transform and then convert to numpy array
diff --git a/pytorch_forecasting/metrics.py b/pytorch_forecasting/metrics.py
@@ -834,3 +834,39 @@ def rescale_parameters(
         loc = parameters[..., 0] * target_scale[..., 1].unsqueeze(-1) + target_scale[..., 0].unsqueeze(-1)
 
         return torch.stack([loc, scale], dim=-1)
+
+
+class BetaDistributionLoss(DistributionLoss):
+    """
+    Beta distribution loss for unit interval data.
+
+    Requirements for original target normalizer:
+        * logit transformation
+    """
+
+    distribution_class = distributions.Beta
+    distribution_arguments = ["mean", "shape"]
+
+    def map_x_to_distribution(self, x: torch.Tensor) -> distributions.Beta:
+        mean = x[..., 0]
+        shape = x[..., 1]
+        return self.distribution_class(concentration0=(1 - mean) * shape, concentration1=mean * shape)
+
+    def rescale_parameters(
+        self, parameters: torch.Tensor, target_scale: torch.Tensor, encoder: BaseEstimator
+    ) -> torch.Tensor:
+        assert encoder.transformation in ["logit"], "Beta distribution is only compatible with logit transformation"
+        assert encoder.center, "Beta distribution requires normalizer to center data"
+
+        scaled_mean = encoder(dict(prediction=parameters[..., 0], target_scale=target_scale))
+        # need to first transform target scale standard deviation in logit space to real space
+        # we assume a normal distribution in logit space (we used a logit transform and a standard scaler)
+        # and know that the variance of the beta distribution is limited by `scaled_mean * (1 - scaled_mean)`
+        mean_derivative = scaled_mean * (1 - scaled_mean)
+
+        # we can approximate variance as
+        # torch.pow(torch.tanh(target_scale[..., 1].unsqueeze(1) * torch.sqrt(mean_derivative)), 2) * mean_derivative
+        # shape is (positive) parameter * mean_derivative / var
+        shape_scaler = torch.pow(torch.tanh(target_scale[..., 1].unsqueeze(1) * torch.sqrt(mean_derivative)), 2)
+        scaled_shape = F.softplus(parameters[..., 1]) / shape_scaler
+        return torch.stack([scaled_mean, scaled_shape], dim=-1)
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -10,6 +10,7 @@
     MAE,
     SMAPE,
     AggregationMetric,
+    BetaDistributionLoss,
     CompositeMetric,
     LogNormalDistributionLoss,
     NegativeBinomialDistributionLoss,
@@ -156,3 +157,28 @@ def test_NegativeBinomialDistributionLoss(center, transformation):
         samples = loss.sample_n(rescaled_parameters, 1)
         assert torch.isclose(torch.as_tensor(mean), samples.mean(), atol=0.1, rtol=0.5)
         assert torch.isclose(torch.as_tensor(std), samples.std(), atol=0.1, rtol=0.5)
+
+
+@pytest.mark.parametrize(
+    ["center", "transformation"],
+    itertools.product([True, False], ["log", "log1p", "softplus", "relu", "logit", None]),
+)
+def test_BetaDistributionLoss(center, transformation):
+    initial_mean = 0.1
+    initial_shape = 10
+    n = 100000
+    target = BetaDistributionLoss().map_x_to_distribution(torch.tensor([initial_mean, initial_shape])).sample_n(n)
+    normalizer = TorchNormalizer(center=center, transformation=transformation)
+    normalized_target = normalizer.fit_transform(target).view(1, -1)
+    target_scale = normalizer.get_parameters().unsqueeze(0)
+    parameters = torch.stack([normalized_target, 1.0 * torch.ones_like(normalized_target)], dim=-1)
+    loss = BetaDistributionLoss()
+
+    if transformation not in ["logit"] or not center:
+        with pytest.raises(AssertionError):
+            loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer)
+    else:
+        rescaled_parameters = loss.rescale_parameters(parameters, target_scale=target_scale, encoder=normalizer)
+        samples = loss.sample_n(rescaled_parameters, 1)
+        assert torch.isclose(torch.as_tensor(initial_mean), samples.mean(), atol=0.01, rtol=0.01)  # mean=0.1
+        assert torch.isclose(target.std(), samples.std(), atol=0.02, rtol=0.3)  # std=0.09