[MNT] Exponential dist in NGBoostRegressor, NGBoostSurvival (#332)

This adds exponential distribution to `NGBoostRegressor` and `NGBoostSurvival`. Also refactors distribution adapter logic to a common location.
sktime · May 16, 2024 · 5889075 · 5889075
1 parent f0b899a
commit 5889075
Show file tree

Hide file tree

Showing 3 changed files with 88 additions and 89 deletions.
diff --git a/skpro/regression/adapters/ngboost/_ngboost_proba.py b/skpro/regression/adapters/ngboost/_ngboost_proba.py
@@ -32,14 +32,15 @@ def _dist_to_ngboost_instance(self, dist, survival=False):
         -------
         NGBoost Distribution object.
         """
-        from ngboost.distns import Laplace, LogNormal, Normal, Poisson, T
+        from ngboost.distns import Exponential, Laplace, LogNormal, Normal, Poisson, T
 
         ngboost_dists = {
             "Normal": Normal,
             "Laplace": Laplace,
             "TDistribution": T,
             "Poisson": Poisson,
             "LogNormal": LogNormal,
+            "Exponential": Exponential,
         }
         # default Normal distribution
         dist_ngboost = Normal
@@ -52,6 +53,66 @@ def _dist_to_ngboost_instance(self, dist, survival=False):
 
         return dist_ngboost
 
+    def _ngb_skpro_dist_params(
+        self,
+        pred_dist,
+        index,
+        columns,
+        **kwargs,
+    ):
+        import numpy as np
+
+        # The returned values of the Distributions from NGBoost
+        # are different. So based on that they are split into these
+        # categories of loc,scale,mu and s.
+        # Distribution type | Parameters
+        # ------------------|-----------
+        # Normal            | loc = mean, scale = standard deviation
+        # TDistribution     | loc = mean, scale = standard deviation
+        # Poisson           | mu = mean
+        # LogNormal         | s = standard deviation, scale = exp(mean)
+        #                   |     (see scipy.stats.lognorm)
+        # Laplace           | loc = mean, scale = scale parameter
+        # Exponential       | scale = 1/rate
+        # Normal, Laplace, TDistribution and Poisson have not yet
+        # been implemented for Survival analysis.
+
+        dist_params = {
+            "Normal": ["loc", "scale"],
+            "Laplace": ["loc", "scale"],
+            "TDistribution": ["loc", "scale"],
+            "Poisson": ["mu"],
+            "LogNormal": ["scale", "s"],
+            "Exponential": ["scale"],
+        }
+
+        skpro_params = {
+            "Normal": ["mu", "sigma"],
+            "Laplace": ["mu", "scale"],
+            "TDistribution": ["mu", "sigma"],
+            "Poisson": ["mu"],
+            "LogNormal": ["mu", "sigma"],
+            "Exponential": ["rate"],
+        }
+
+        if self.dist in dist_params and self.dist in skpro_params:
+            ngboost_params = dist_params[self.dist]
+            skp_params = skpro_params[self.dist]
+            for ngboost_param, skp_param in zip(ngboost_params, skp_params):
+                kwargs[skp_param] = pred_dist.params[ngboost_param]
+                if self.dist == "LogNormal" and ngboost_param == "scale":
+                    kwargs[skp_param] = np.log(pred_dist.params[ngboost_param])
+                if self.dist == "Exponential" and ngboost_param == "scale":
+                    kwargs[skp_param] = 1 / pred_dist.params[ngboost_param]
+
+                kwargs[skp_param] = self._check_y(y=kwargs[skp_param])
+                # returns a tuple so taking only first index of the tuple
+                kwargs[skp_param] = kwargs[skp_param][0]
+            kwargs["index"] = index
+            kwargs["columns"] = columns
+
+        return kwargs
+
     def _ngb_dist_to_skpro(self, **kwargs):
         """Convert NGBoost distribution object to skpro BaseDistribution object.
 
@@ -64,6 +125,7 @@ def _ngb_dist_to_skpro(self, **kwargs):
         skpro_dist (skpro.distributions.BaseDistribution):
         Converted skpro distribution object.
         """
+        from skpro.distributions.exponential import Exponential
         from skpro.distributions.laplace import Laplace
         from skpro.distributions.lognormal import LogNormal
         from skpro.distributions.normal import Normal
@@ -76,6 +138,7 @@ def _ngb_dist_to_skpro(self, **kwargs):
             "TDistribution": TDistribution,
             "Poisson": Poisson,
             "LogNormal": LogNormal,
+            "Exponential": Exponential,
         }
 
         skpro_dist = None

diff --git a/skpro/regression/ensemble/_ngboost.py b/skpro/regression/ensemble/_ngboost.py
@@ -3,8 +3,6 @@
 
 __author__ = ["ShreeshaM07"]
 
-import numpy as np
-
 from skpro.regression.adapters.ngboost._ngboost_proba import NGBoostAdapter
 from skpro.regression.base import BaseProbaRegressor
 
@@ -28,6 +26,7 @@ class NGBoostRegressor(BaseProbaRegressor, NGBoostAdapter):
         3. "LogNormal"
         4. "Poisson"
         5. "TDistribution"
+        6. "Exponential"
     score : string , default = "LogScore"
         A score from ngboost.scores for LogScore
         rule to compare probabilistic
@@ -227,49 +226,13 @@ def _predict_proba(self, X):
         """
         X = self._check_X(X)
 
-        # The returned values of the Distributions from NGBoost
-        # are different. So based on that they are split into these
-        # categories of loc,scale,mu and s.
-        # Distribution type | Parameters
-        # ------------------|-----------
-        # Normal            | loc = mean, scale = standard deviation
-        # TDistribution     | loc = mean, scale = standard deviation
-        # Poisson           | mu = mean
-        # LogNormal         | s = standard deviation, scale = exp(mean)
-        #                   |     (see scipy.stats.lognorm)
-        # Laplace           | loc = mean, scale = scale parameter
-
-        dist_params = {
-            "Normal": ["loc", "scale"],
-            "Laplace": ["loc", "scale"],
-            "TDistribution": ["loc", "scale"],
-            "Poisson": ["mu"],
-            "LogNormal": ["scale", "s"],
-        }
-
-        skpro_params = {
-            "Normal": ["mu", "sigma"],
-            "Laplace": ["mu", "scale"],
-            "TDistribution": ["mu", "sigma"],
-            "Poisson": ["mu"],
-            "LogNormal": ["mu", "sigma"],
-        }
-
         kwargs = {}
+        pred_dist = self._pred_dist(X)
+        index = X.index
+        columns = self._y_cols
 
-        if self.dist in dist_params and self.dist in skpro_params:
-            ngboost_params = dist_params[self.dist]
-            skp_params = skpro_params[self.dist]
-            for ngboost_param, skp_param in zip(ngboost_params, skp_params):
-                kwargs[skp_param] = self._pred_dist(X).params[ngboost_param]
-                if self.dist == "LogNormal" and ngboost_param == "scale":
-                    kwargs[skp_param] = np.log(self._pred_dist(X).params[ngboost_param])
-
-                kwargs[skp_param] = self._check_y(y=kwargs[skp_param])
-                # returns a tuple so taking only first index of the tuple
-                kwargs[skp_param] = kwargs[skp_param][0]
-            kwargs["index"] = X.index
-            kwargs["columns"] = self._y_cols
+        # Convert NGBoost Distribution return params into a dict
+        kwargs = self._ngb_skpro_dist_params(pred_dist, index, columns, **kwargs)
 
         # Convert NGBoost Distribution to skpro BaseDistribution
         pred_dist = self._ngb_dist_to_skpro(**kwargs)
@@ -317,4 +280,10 @@ def get_test_params(cls, parameter_set="default"):
             "verbose": False,
         }
 
-        return [params1, params2, params3, params4, params5, params6]
+        params7 = {
+            "dist": "Exponential",
+            "n_estimators": 800,
+            "verbose_eval": 50,
+        }
+
+        return [params1, params2, params3, params4, params5, params6, params7]
diff --git a/skpro/survival/ensemble/_ngboost_surv.py b/skpro/survival/ensemble/_ngboost_surv.py
@@ -25,6 +25,7 @@ class NGBoostSurvival(BaseSurvReg, NGBoostAdapter):
         A distribution from ngboost.distns, e.g. LogNormal
         Available distribution types
         1. "LogNormal"
+        2. "Exponential"
     score : string , default = "LogScore"
         rule to compare probabilistic predictions P̂ to the observed data y.
         A score from ngboost.scores, e.g. LogScore
@@ -228,51 +229,13 @@ def _predict_proba(self, X):
         """
         X = self._check_X(X)
 
-        # The returned values of the Distributions from NGBoost
-        # are different. So based on that they are split into these
-        # categories of loc,scale,mu and s.
-        # Distribution type | Parameters
-        # ------------------|-----------
-        # Normal            | loc = mean, scale = standard deviation
-        # TDistribution     | loc = mean, scale = standard deviation
-        # Poisson           | mu = mean
-        # LogNormal         | s = standard deviation, scale = exp(mean)
-        #                   |     (see scipy.stats.lognorm)
-        # Laplace           | loc = mean, scale = scale parameter
-        # Normal, Laplace, TDistribution and Poisson have not yet
-        # been implemented for Survival analysis.
-
-        dist_params = {
-            "Normal": ["loc", "scale"],
-            "Laplace": ["loc", "scale"],
-            "TDistribution": ["loc", "scale"],
-            "Poisson": ["mu"],
-            "LogNormal": ["scale", "s"],
-        }
-
-        skpro_params = {
-            "Normal": ["mu", "sigma"],
-            "Laplace": ["mu", "scale"],
-            "TDistribution": ["mu", "sigma"],
-            "Poisson": ["mu"],
-            "LogNormal": ["mu", "sigma"],
-        }
-
         kwargs = {}
+        pred_dist = self._pred_dist(X)
+        index = X.index
+        columns = self._y_cols
 
-        if self.dist in dist_params and self.dist in skpro_params:
-            ngboost_params = dist_params[self.dist]
-            skp_params = skpro_params[self.dist]
-            for ngboost_param, skp_param in zip(ngboost_params, skp_params):
-                kwargs[skp_param] = self._pred_dist(X).params[ngboost_param]
-                if self.dist == "LogNormal" and ngboost_param == "scale":
-                    kwargs[skp_param] = np.log(self._pred_dist(X).params[ngboost_param])
-
-                kwargs[skp_param] = self._check_y(y=kwargs[skp_param])
-                # returns a tuple so taking only first index of the tuple
-                kwargs[skp_param] = kwargs[skp_param][0]
-            kwargs["index"] = X.index
-            kwargs["columns"] = self._y_cols
+        # Convert NGBoost Distribution return params into a dict
+        kwargs = self._ngb_skpro_dist_params(pred_dist, index, columns, **kwargs)
 
         # Convert NGBoost Distribution to skpro BaseDistribution
         pred_dist = self._ngb_dist_to_skpro(**kwargs)
@@ -306,5 +269,9 @@ def get_test_params(cls, parameter_set="default"):
             "n_estimators": 800,
             "minibatch_frac": 0.8,
         }
+        params4 = {
+            "dist": "Exponential",
+            "n_estimators": 600,
+        }
 
-        return [params1, params2, params3]
+        return [params1, params2, params3, params4]