Merge branch 'main' into cyc-boost-deprec

sktime · May 16, 2024 · 01c1531 · 01c1531
2 parents 12acdfd + f0b899a
commit 01c1531
Show file tree

Hide file tree

Showing 4 changed files with 120 additions and 1 deletion.
diff --git a/docs/source/api_reference/distributions.rst b/docs/source/api_reference/distributions.rst
@@ -37,6 +37,7 @@ Continuous support
 
     Beta
     ChiSquared
+    Exponential
     Fisk
     Laplace
     Logistic

diff --git a/skpro/distributions/__init__.py b/skpro/distributions/__init__.py
@@ -8,6 +8,7 @@
     "ChiSquared",
     "Delta",
     "Empirical",
+    "Exponential",
     "Fisk",
     "Laplace",
     "Logistic",
@@ -29,6 +30,7 @@
 from skpro.distributions.chi_squared import ChiSquared
 from skpro.distributions.delta import Delta
 from skpro.distributions.empirical import Empirical
+from skpro.distributions.exponential import Exponential
 from skpro.distributions.fisk import Fisk
 from skpro.distributions.laplace import Laplace
 from skpro.distributions.logistic import Logistic

diff --git a/skpro/distributions/base/_base.py b/skpro/distributions/base/_base.py
@@ -1138,6 +1138,23 @@ def _energy_default(self, x=None):
         2D np.ndarray, same shape as ``self``
             energy values w.r.t. the given points
         """
+        approx_spl_size = self.get_tag("approx_energy_spl")
+        if x is not None and self._has_implementation_of("_ppf"):
+            approx_method = (
+                "by approximating the energy expectation by the integral "
+                "of the absolute difference of x to the ppf,"
+                f"with {approx_spl_size} equidistant nodes"
+            )
+            warn(self._method_error_msg("energy", fill_in=approx_method))
+
+            ps = np.linspace(0, 1, approx_spl_size + 2)[1:-1]
+            qs = [np.abs(self.ppf(p) - x) for p in ps]
+            en3D = np.array(qs)
+            energy = np.mean(en3D, axis=0)
+            if self.ndim > 0:
+                energy = np.sum(energy, axis=1)
+            return energy
+
         # we want to approximate E[abs(X-Y)]
         # if x = None, X,Y are i.i.d. copies of self
         # if x is not None, X=x (constant), Y=self
@@ -1233,6 +1250,20 @@ def _mean(self):
         Private method, to be implemented by subclasses.
         """
         approx_spl_size = self.get_tag("approx_mean_spl")
+        if self._has_implementation_of("_ppf"):
+            approx_method = (
+                "by approximating the expected value by the integral of the ppf, "
+                f"with {approx_spl_size} equidistant nodes"
+            )
+            warn(self._method_error_msg("mean", fill_in=approx_method))
+
+            ps = np.linspace(0, 1, approx_spl_size + 2)[1:-1]
+            qs = [self.ppf(p) for p in ps]
+            np3D = np.array(qs)
+            means = np.mean(np3D, axis=0)
+            return means
+
+        # else we have to rely on samples
         approx_method = (
             "by approximating the expected value by the arithmetic mean of "
             f"{approx_spl_size} samples"
@@ -1262,6 +1293,26 @@ def _var(self):
         Private method, to be implemented by subclasses.
         """
         approx_spl_size = self.get_tag("approx_var_spl")
+        if self._has_implementation_of("_ppf"):
+            approx_method = (
+                "by approximating the variancee integrals of the ppf, "
+                "integral of ppf-squared minus square of integral of ppf, "
+                f"each with {approx_spl_size} equidistant nodes"
+            )
+            warn(self._method_error_msg("var", fill_in=approx_method))
+
+            ps = np.linspace(0, 1, approx_spl_size + 2)[1:-1]
+            qs = [self.ppf(p) for p in ps]
+            qsq = [q**2 for q in qs]
+
+            mean3D = np.array(qs)
+            means = np.mean(mean3D, axis=0)
+
+            mom2s3D = np.array(qsq)
+            mom2s = np.mean(mom2s3D, axis=0)
+
+            return mom2s - means**2
+
         approx_method = (
             "by approximating the variance by the arithmetic mean of "
             f"{approx_spl_size} samples of squared differences"
@@ -1271,7 +1322,7 @@ def _var(self):
         spl1 = self.sample(approx_spl_size)
         spl2 = self.sample(approx_spl_size)
         spl = (spl1 - spl2) ** 2
-        return self._sample_mean(spl)
+        return self._sample_mean(spl) / 2
 
     def pdfnorm(self, a=2):
         r"""a-norm of pdf, defaults to 2-norm.

diff --git a/skpro/distributions/exponential.py b/skpro/distributions/exponential.py
@@ -0,0 +1,65 @@
+# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
+"""Exponential probability distribution."""
+
+__author__ = ["ShreeshaM07"]
+
+import pandas as pd
+from scipy.stats import expon, rv_continuous
+
+from skpro.distributions.adapters.scipy import _ScipyAdapter
+
+
+class Exponential(_ScipyAdapter):
+    r"""Exponential Distribution.
+
+    The Exponential distribution is parametrized by mean :math:`\mu` and
+    scale :math:`b`, such that the pdf is
+
+    .. math:: f(x) = \lambda*\exp\left(-\lambda*x\right)
+
+    The rate :math:`\lambda` is represented by the parameter ``rate``,
+
+    Parameter
+    ---------
+    rate : float or array of float (1D or 2D)
+        rate of the distribution
+        rate = 1/scale
+    index : pd.Index, optional, default = RangeIndex
+    columns : pd.Index, optional, default = RangeIndex
+    """
+
+    _tags = {
+        "capabilities:approx": ["ppf", "energy", "pdfnorm"],
+        "capabilities:exact": ["mean", "var", "pdf", "log_pdf", "cdf"],
+        "distr:measuretype": "continuous",
+        "broadcast_init": "on",
+    }
+
+    def __init__(self, rate, index=None, columns=None):
+        self.rate = rate
+
+        super().__init__(index=index, columns=columns)
+
+    def _get_scipy_object(self) -> rv_continuous:
+        return expon
+
+    def _get_scipy_param(self):
+        rate = self._bc_params["rate"]
+        scale = 1 / rate
+        return [], {"scale": scale}
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the distribution."""
+        params1 = {"rate": [1, 2, 2.5, 3.5, 5]}
+        params2 = {"rate": 2}
+        params3 = {
+            "rate": [
+                [2, 2, 2],
+                [4, 4, 4],
+            ],
+            "index": pd.Index([1, 2]),
+            "columns": pd.Index(["a", "b", "c"]),
+        }
+
+        return [params1, params2, params3]
-Original file line number
+Diff line change
@@ Expand Up / @@ -37,6 +37,7 @@ Continuous support @@
         Beta
         ChiSquared
+        Exponential
         Fisk
         Laplace
         Logistic
@@ Expand Down @@