diff --git a/skpro/regression/adapters/ngboost/_ngboost_proba.py b/skpro/regression/adapters/ngboost/_ngboost_proba.py index 1de3c3830..2d620b095 100644 --- a/skpro/regression/adapters/ngboost/_ngboost_proba.py +++ b/skpro/regression/adapters/ngboost/_ngboost_proba.py @@ -32,7 +32,7 @@ def _dist_to_ngboost_instance(self, dist, survival=False): ------- NGBoost Distribution object. """ - from ngboost.distns import Laplace, LogNormal, Normal, Poisson, T + from ngboost.distns import Exponential, Laplace, LogNormal, Normal, Poisson, T ngboost_dists = { "Normal": Normal, @@ -40,6 +40,7 @@ def _dist_to_ngboost_instance(self, dist, survival=False): "TDistribution": T, "Poisson": Poisson, "LogNormal": LogNormal, + "Exponential": Exponential, } # default Normal distribution dist_ngboost = Normal @@ -52,6 +53,66 @@ def _dist_to_ngboost_instance(self, dist, survival=False): return dist_ngboost + def _ngb_skpro_dist_params( + self, + pred_dist, + index, + columns, + **kwargs, + ): + import numpy as np + + # The returned values of the Distributions from NGBoost + # are different. So based on that they are split into these + # categories of loc,scale,mu and s. + # Distribution type | Parameters + # ------------------|----------- + # Normal | loc = mean, scale = standard deviation + # TDistribution | loc = mean, scale = standard deviation + # Poisson | mu = mean + # LogNormal | s = standard deviation, scale = exp(mean) + # | (see scipy.stats.lognorm) + # Laplace | loc = mean, scale = scale parameter + # Exponential | scale = 1/rate + # Normal, Laplace, TDistribution and Poisson have not yet + # been implemented for Survival analysis. + + dist_params = { + "Normal": ["loc", "scale"], + "Laplace": ["loc", "scale"], + "TDistribution": ["loc", "scale"], + "Poisson": ["mu"], + "LogNormal": ["scale", "s"], + "Exponential": ["scale"], + } + + skpro_params = { + "Normal": ["mu", "sigma"], + "Laplace": ["mu", "scale"], + "TDistribution": ["mu", "sigma"], + "Poisson": ["mu"], + "LogNormal": ["mu", "sigma"], + "Exponential": ["rate"], + } + + if self.dist in dist_params and self.dist in skpro_params: + ngboost_params = dist_params[self.dist] + skp_params = skpro_params[self.dist] + for ngboost_param, skp_param in zip(ngboost_params, skp_params): + kwargs[skp_param] = pred_dist.params[ngboost_param] + if self.dist == "LogNormal" and ngboost_param == "scale": + kwargs[skp_param] = np.log(pred_dist.params[ngboost_param]) + if self.dist == "Exponential" and ngboost_param == "scale": + kwargs[skp_param] = 1 / pred_dist.params[ngboost_param] + + kwargs[skp_param] = self._check_y(y=kwargs[skp_param]) + # returns a tuple so taking only first index of the tuple + kwargs[skp_param] = kwargs[skp_param][0] + kwargs["index"] = index + kwargs["columns"] = columns + + return kwargs + def _ngb_dist_to_skpro(self, **kwargs): """Convert NGBoost distribution object to skpro BaseDistribution object. @@ -64,6 +125,7 @@ def _ngb_dist_to_skpro(self, **kwargs): skpro_dist (skpro.distributions.BaseDistribution): Converted skpro distribution object. """ + from skpro.distributions.exponential import Exponential from skpro.distributions.laplace import Laplace from skpro.distributions.lognormal import LogNormal from skpro.distributions.normal import Normal @@ -76,6 +138,7 @@ def _ngb_dist_to_skpro(self, **kwargs): "TDistribution": TDistribution, "Poisson": Poisson, "LogNormal": LogNormal, + "Exponential": Exponential, } skpro_dist = None diff --git a/skpro/regression/ensemble/_ngboost.py b/skpro/regression/ensemble/_ngboost.py index 4a09871f9..5abc3af6e 100644 --- a/skpro/regression/ensemble/_ngboost.py +++ b/skpro/regression/ensemble/_ngboost.py @@ -3,8 +3,6 @@ __author__ = ["ShreeshaM07"] -import numpy as np - from skpro.regression.adapters.ngboost._ngboost_proba import NGBoostAdapter from skpro.regression.base import BaseProbaRegressor @@ -28,6 +26,7 @@ class NGBoostRegressor(BaseProbaRegressor, NGBoostAdapter): 3. "LogNormal" 4. "Poisson" 5. "TDistribution" + 6. "Exponential" score : string , default = "LogScore" A score from ngboost.scores for LogScore rule to compare probabilistic @@ -227,49 +226,13 @@ def _predict_proba(self, X): """ X = self._check_X(X) - # The returned values of the Distributions from NGBoost - # are different. So based on that they are split into these - # categories of loc,scale,mu and s. - # Distribution type | Parameters - # ------------------|----------- - # Normal | loc = mean, scale = standard deviation - # TDistribution | loc = mean, scale = standard deviation - # Poisson | mu = mean - # LogNormal | s = standard deviation, scale = exp(mean) - # | (see scipy.stats.lognorm) - # Laplace | loc = mean, scale = scale parameter - - dist_params = { - "Normal": ["loc", "scale"], - "Laplace": ["loc", "scale"], - "TDistribution": ["loc", "scale"], - "Poisson": ["mu"], - "LogNormal": ["scale", "s"], - } - - skpro_params = { - "Normal": ["mu", "sigma"], - "Laplace": ["mu", "scale"], - "TDistribution": ["mu", "sigma"], - "Poisson": ["mu"], - "LogNormal": ["mu", "sigma"], - } - kwargs = {} + pred_dist = self._pred_dist(X) + index = X.index + columns = self._y_cols - if self.dist in dist_params and self.dist in skpro_params: - ngboost_params = dist_params[self.dist] - skp_params = skpro_params[self.dist] - for ngboost_param, skp_param in zip(ngboost_params, skp_params): - kwargs[skp_param] = self._pred_dist(X).params[ngboost_param] - if self.dist == "LogNormal" and ngboost_param == "scale": - kwargs[skp_param] = np.log(self._pred_dist(X).params[ngboost_param]) - - kwargs[skp_param] = self._check_y(y=kwargs[skp_param]) - # returns a tuple so taking only first index of the tuple - kwargs[skp_param] = kwargs[skp_param][0] - kwargs["index"] = X.index - kwargs["columns"] = self._y_cols + # Convert NGBoost Distribution return params into a dict + kwargs = self._ngb_skpro_dist_params(pred_dist, index, columns, **kwargs) # Convert NGBoost Distribution to skpro BaseDistribution pred_dist = self._ngb_dist_to_skpro(**kwargs) @@ -317,4 +280,10 @@ def get_test_params(cls, parameter_set="default"): "verbose": False, } - return [params1, params2, params3, params4, params5, params6] + params7 = { + "dist": "Exponential", + "n_estimators": 800, + "verbose_eval": 50, + } + + return [params1, params2, params3, params4, params5, params6, params7] diff --git a/skpro/survival/ensemble/_ngboost_surv.py b/skpro/survival/ensemble/_ngboost_surv.py index 9a95f741f..6e9ad62b0 100644 --- a/skpro/survival/ensemble/_ngboost_surv.py +++ b/skpro/survival/ensemble/_ngboost_surv.py @@ -25,6 +25,7 @@ class NGBoostSurvival(BaseSurvReg, NGBoostAdapter): A distribution from ngboost.distns, e.g. LogNormal Available distribution types 1. "LogNormal" + 2. "Exponential" score : string , default = "LogScore" rule to compare probabilistic predictions PĚ‚ to the observed data y. A score from ngboost.scores, e.g. LogScore @@ -228,51 +229,13 @@ def _predict_proba(self, X): """ X = self._check_X(X) - # The returned values of the Distributions from NGBoost - # are different. So based on that they are split into these - # categories of loc,scale,mu and s. - # Distribution type | Parameters - # ------------------|----------- - # Normal | loc = mean, scale = standard deviation - # TDistribution | loc = mean, scale = standard deviation - # Poisson | mu = mean - # LogNormal | s = standard deviation, scale = exp(mean) - # | (see scipy.stats.lognorm) - # Laplace | loc = mean, scale = scale parameter - # Normal, Laplace, TDistribution and Poisson have not yet - # been implemented for Survival analysis. - - dist_params = { - "Normal": ["loc", "scale"], - "Laplace": ["loc", "scale"], - "TDistribution": ["loc", "scale"], - "Poisson": ["mu"], - "LogNormal": ["scale", "s"], - } - - skpro_params = { - "Normal": ["mu", "sigma"], - "Laplace": ["mu", "scale"], - "TDistribution": ["mu", "sigma"], - "Poisson": ["mu"], - "LogNormal": ["mu", "sigma"], - } - kwargs = {} + pred_dist = self._pred_dist(X) + index = X.index + columns = self._y_cols - if self.dist in dist_params and self.dist in skpro_params: - ngboost_params = dist_params[self.dist] - skp_params = skpro_params[self.dist] - for ngboost_param, skp_param in zip(ngboost_params, skp_params): - kwargs[skp_param] = self._pred_dist(X).params[ngboost_param] - if self.dist == "LogNormal" and ngboost_param == "scale": - kwargs[skp_param] = np.log(self._pred_dist(X).params[ngboost_param]) - - kwargs[skp_param] = self._check_y(y=kwargs[skp_param]) - # returns a tuple so taking only first index of the tuple - kwargs[skp_param] = kwargs[skp_param][0] - kwargs["index"] = X.index - kwargs["columns"] = self._y_cols + # Convert NGBoost Distribution return params into a dict + kwargs = self._ngb_skpro_dist_params(pred_dist, index, columns, **kwargs) # Convert NGBoost Distribution to skpro BaseDistribution pred_dist = self._ngb_dist_to_skpro(**kwargs) @@ -306,5 +269,9 @@ def get_test_params(cls, parameter_set="default"): "n_estimators": 800, "minibatch_frac": 0.8, } + params4 = { + "dist": "Exponential", + "n_estimators": 600, + } - return [params1, params2, params3] + return [params1, params2, params3, params4]