Skip to content

Commit

Permalink
[MNT] Exponential dist in NGBoostRegressor, NGBoostSurvival (#332)
Browse files Browse the repository at this point in the history
This adds exponential distribution to `NGBoostRegressor` and
`NGBoostSurvival`.

Also refactors distribution adapter logic to a common location.
  • Loading branch information
ShreeshaM07 authored May 16, 2024
1 parent f0b899a commit 5889075
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 89 deletions.
65 changes: 64 additions & 1 deletion skpro/regression/adapters/ngboost/_ngboost_proba.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,15 @@ def _dist_to_ngboost_instance(self, dist, survival=False):
-------
NGBoost Distribution object.
"""
from ngboost.distns import Laplace, LogNormal, Normal, Poisson, T
from ngboost.distns import Exponential, Laplace, LogNormal, Normal, Poisson, T

ngboost_dists = {
"Normal": Normal,
"Laplace": Laplace,
"TDistribution": T,
"Poisson": Poisson,
"LogNormal": LogNormal,
"Exponential": Exponential,
}
# default Normal distribution
dist_ngboost = Normal
Expand All @@ -52,6 +53,66 @@ def _dist_to_ngboost_instance(self, dist, survival=False):

return dist_ngboost

def _ngb_skpro_dist_params(
self,
pred_dist,
index,
columns,
**kwargs,
):
import numpy as np

# The returned values of the Distributions from NGBoost
# are different. So based on that they are split into these
# categories of loc,scale,mu and s.
# Distribution type | Parameters
# ------------------|-----------
# Normal | loc = mean, scale = standard deviation
# TDistribution | loc = mean, scale = standard deviation
# Poisson | mu = mean
# LogNormal | s = standard deviation, scale = exp(mean)
# | (see scipy.stats.lognorm)
# Laplace | loc = mean, scale = scale parameter
# Exponential | scale = 1/rate
# Normal, Laplace, TDistribution and Poisson have not yet
# been implemented for Survival analysis.

dist_params = {
"Normal": ["loc", "scale"],
"Laplace": ["loc", "scale"],
"TDistribution": ["loc", "scale"],
"Poisson": ["mu"],
"LogNormal": ["scale", "s"],
"Exponential": ["scale"],
}

skpro_params = {
"Normal": ["mu", "sigma"],
"Laplace": ["mu", "scale"],
"TDistribution": ["mu", "sigma"],
"Poisson": ["mu"],
"LogNormal": ["mu", "sigma"],
"Exponential": ["rate"],
}

if self.dist in dist_params and self.dist in skpro_params:
ngboost_params = dist_params[self.dist]
skp_params = skpro_params[self.dist]
for ngboost_param, skp_param in zip(ngboost_params, skp_params):
kwargs[skp_param] = pred_dist.params[ngboost_param]
if self.dist == "LogNormal" and ngboost_param == "scale":
kwargs[skp_param] = np.log(pred_dist.params[ngboost_param])
if self.dist == "Exponential" and ngboost_param == "scale":
kwargs[skp_param] = 1 / pred_dist.params[ngboost_param]

kwargs[skp_param] = self._check_y(y=kwargs[skp_param])
# returns a tuple so taking only first index of the tuple
kwargs[skp_param] = kwargs[skp_param][0]
kwargs["index"] = index
kwargs["columns"] = columns

return kwargs

def _ngb_dist_to_skpro(self, **kwargs):
"""Convert NGBoost distribution object to skpro BaseDistribution object.
Expand All @@ -64,6 +125,7 @@ def _ngb_dist_to_skpro(self, **kwargs):
skpro_dist (skpro.distributions.BaseDistribution):
Converted skpro distribution object.
"""
from skpro.distributions.exponential import Exponential
from skpro.distributions.laplace import Laplace
from skpro.distributions.lognormal import LogNormal
from skpro.distributions.normal import Normal
Expand All @@ -76,6 +138,7 @@ def _ngb_dist_to_skpro(self, **kwargs):
"TDistribution": TDistribution,
"Poisson": Poisson,
"LogNormal": LogNormal,
"Exponential": Exponential,
}

skpro_dist = None
Expand Down
57 changes: 13 additions & 44 deletions skpro/regression/ensemble/_ngboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

__author__ = ["ShreeshaM07"]

import numpy as np

from skpro.regression.adapters.ngboost._ngboost_proba import NGBoostAdapter
from skpro.regression.base import BaseProbaRegressor

Expand All @@ -28,6 +26,7 @@ class NGBoostRegressor(BaseProbaRegressor, NGBoostAdapter):
3. "LogNormal"
4. "Poisson"
5. "TDistribution"
6. "Exponential"
score : string , default = "LogScore"
A score from ngboost.scores for LogScore
rule to compare probabilistic
Expand Down Expand Up @@ -227,49 +226,13 @@ def _predict_proba(self, X):
"""
X = self._check_X(X)

# The returned values of the Distributions from NGBoost
# are different. So based on that they are split into these
# categories of loc,scale,mu and s.
# Distribution type | Parameters
# ------------------|-----------
# Normal | loc = mean, scale = standard deviation
# TDistribution | loc = mean, scale = standard deviation
# Poisson | mu = mean
# LogNormal | s = standard deviation, scale = exp(mean)
# | (see scipy.stats.lognorm)
# Laplace | loc = mean, scale = scale parameter

dist_params = {
"Normal": ["loc", "scale"],
"Laplace": ["loc", "scale"],
"TDistribution": ["loc", "scale"],
"Poisson": ["mu"],
"LogNormal": ["scale", "s"],
}

skpro_params = {
"Normal": ["mu", "sigma"],
"Laplace": ["mu", "scale"],
"TDistribution": ["mu", "sigma"],
"Poisson": ["mu"],
"LogNormal": ["mu", "sigma"],
}

kwargs = {}
pred_dist = self._pred_dist(X)
index = X.index
columns = self._y_cols

if self.dist in dist_params and self.dist in skpro_params:
ngboost_params = dist_params[self.dist]
skp_params = skpro_params[self.dist]
for ngboost_param, skp_param in zip(ngboost_params, skp_params):
kwargs[skp_param] = self._pred_dist(X).params[ngboost_param]
if self.dist == "LogNormal" and ngboost_param == "scale":
kwargs[skp_param] = np.log(self._pred_dist(X).params[ngboost_param])

kwargs[skp_param] = self._check_y(y=kwargs[skp_param])
# returns a tuple so taking only first index of the tuple
kwargs[skp_param] = kwargs[skp_param][0]
kwargs["index"] = X.index
kwargs["columns"] = self._y_cols
# Convert NGBoost Distribution return params into a dict
kwargs = self._ngb_skpro_dist_params(pred_dist, index, columns, **kwargs)

# Convert NGBoost Distribution to skpro BaseDistribution
pred_dist = self._ngb_dist_to_skpro(**kwargs)
Expand Down Expand Up @@ -317,4 +280,10 @@ def get_test_params(cls, parameter_set="default"):
"verbose": False,
}

return [params1, params2, params3, params4, params5, params6]
params7 = {
"dist": "Exponential",
"n_estimators": 800,
"verbose_eval": 50,
}

return [params1, params2, params3, params4, params5, params6, params7]
55 changes: 11 additions & 44 deletions skpro/survival/ensemble/_ngboost_surv.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class NGBoostSurvival(BaseSurvReg, NGBoostAdapter):
A distribution from ngboost.distns, e.g. LogNormal
Available distribution types
1. "LogNormal"
2. "Exponential"
score : string , default = "LogScore"
rule to compare probabilistic predictions P̂ to the observed data y.
A score from ngboost.scores, e.g. LogScore
Expand Down Expand Up @@ -228,51 +229,13 @@ def _predict_proba(self, X):
"""
X = self._check_X(X)

# The returned values of the Distributions from NGBoost
# are different. So based on that they are split into these
# categories of loc,scale,mu and s.
# Distribution type | Parameters
# ------------------|-----------
# Normal | loc = mean, scale = standard deviation
# TDistribution | loc = mean, scale = standard deviation
# Poisson | mu = mean
# LogNormal | s = standard deviation, scale = exp(mean)
# | (see scipy.stats.lognorm)
# Laplace | loc = mean, scale = scale parameter
# Normal, Laplace, TDistribution and Poisson have not yet
# been implemented for Survival analysis.

dist_params = {
"Normal": ["loc", "scale"],
"Laplace": ["loc", "scale"],
"TDistribution": ["loc", "scale"],
"Poisson": ["mu"],
"LogNormal": ["scale", "s"],
}

skpro_params = {
"Normal": ["mu", "sigma"],
"Laplace": ["mu", "scale"],
"TDistribution": ["mu", "sigma"],
"Poisson": ["mu"],
"LogNormal": ["mu", "sigma"],
}

kwargs = {}
pred_dist = self._pred_dist(X)
index = X.index
columns = self._y_cols

if self.dist in dist_params and self.dist in skpro_params:
ngboost_params = dist_params[self.dist]
skp_params = skpro_params[self.dist]
for ngboost_param, skp_param in zip(ngboost_params, skp_params):
kwargs[skp_param] = self._pred_dist(X).params[ngboost_param]
if self.dist == "LogNormal" and ngboost_param == "scale":
kwargs[skp_param] = np.log(self._pred_dist(X).params[ngboost_param])

kwargs[skp_param] = self._check_y(y=kwargs[skp_param])
# returns a tuple so taking only first index of the tuple
kwargs[skp_param] = kwargs[skp_param][0]
kwargs["index"] = X.index
kwargs["columns"] = self._y_cols
# Convert NGBoost Distribution return params into a dict
kwargs = self._ngb_skpro_dist_params(pred_dist, index, columns, **kwargs)

# Convert NGBoost Distribution to skpro BaseDistribution
pred_dist = self._ngb_dist_to_skpro(**kwargs)
Expand Down Expand Up @@ -306,5 +269,9 @@ def get_test_params(cls, parameter_set="default"):
"n_estimators": 800,
"minibatch_frac": 0.8,
}
params4 = {
"dist": "Exponential",
"n_estimators": 600,
}

return [params1, params2, params3]
return [params1, params2, params3, params4]

0 comments on commit 5889075

Please sign in to comment.