Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Adapter for Scipy Distributions #287

Merged
merged 11 commits into from
May 3, 2024
3 changes: 2 additions & 1 deletion skpro/distributions/adapters/scipy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Adapters for probability distribution objects, scipy facing."""
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)

from skpro.distributions.adapters.scipy._distribution import _ScipyAdapter
from skpro.distributions.adapters.scipy._empirical import empirical_from_discrete

__all__ = ["empirical_from_discrete"]
__all__ = ["empirical_from_discrete", "_ScipyAdapter"]
109 changes: 109 additions & 0 deletions skpro/distributions/adapters/scipy/_distribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
"""Adapter for Scipy Distributions."""

__author__ = ["malikrafsan"]

from typing import Union

import pandas as pd
from scipy.stats import rv_continuous, rv_discrete

from skpro.distributions.base import BaseDistribution

__all__ = ["_ScipyAdapter"]


class _ScipyAdapter(BaseDistribution):
"""Adapter for scipy distributions.

This class is an adapter for scipy distributions. It provides a common
interface for all scipy distributions. The class is abstract
and should not be instantiated directly.
"""

_distribution_attr = "_dist"
_tags = {
"object_type": ["distribution", "scipy_distribution_adapter"],
}

def __init__(self, index=None, columns=None):
obj = self._get_scipy_object()
setattr(self, self._distribution_attr, obj)
super().__init__(index, columns)

def _get_scipy_object(self) -> Union[rv_continuous, rv_discrete]:
"""Abstract method to get the scipy distribution object.

Should import the scipy distribution object and return it.
"""
raise NotImplementedError("abstract method")

def _get_scipy_param(self):
"""Abstract method to get the scipy distribution parameters.

Should return a tuple with two elements: a list of positional arguments (args)
and a dictionary of keyword arguments (kwds).
"""
raise NotImplementedError("abstract method")

def _mean(self):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
args, kwds = self._get_scipy_param()
return obj.mean(*args, **kwds)

def _var(self):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
args, kwds = self._get_scipy_param()
return obj.var(*args, **kwds)

def _pdf(self, x: pd.DataFrame):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
if isinstance(obj, rv_discrete):
return 0

args, kwds = self._get_scipy_param()
return obj.pdf(x, *args, **kwds)

def _log_pdf(self, x: pd.DataFrame):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
if isinstance(obj, rv_discrete):
return 0

args, kwds = self._get_scipy_param()
return obj.logpdf(x, *args, **kwds)

def _cdf(self, x: pd.DataFrame):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
args, kwds = self._get_scipy_param()
return obj.cdf(x, *args, **kwds)

def _ppf(self, p: pd.DataFrame):
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
args, kwds = self._get_scipy_param()
return obj.ppf(p, *args, **kwds)

def _pmf(self, x: pd.DataFrame):
"""Return the probability mass function evaluated at x."""
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
if isinstance(obj, rv_continuous):
return 0

args, kwds = self._get_scipy_param()
return obj.pmf(x, *args, **kwds)

def pmf(self, x: pd.DataFrame):
"""Return the probability mass function evaluated at x."""
return self._boilerplate("_pmf", x=x)

def _log_pmf(self, x: pd.DataFrame):
"""Return the log of the probability mass function evaluated at x."""
obj: Union[rv_continuous, rv_discrete] = getattr(self, self._distribution_attr)
if isinstance(obj, rv_continuous):
return 0

args, kwds = self._get_scipy_param()
return obj.logpmf(x, *args, **kwds)

def log_pmf(self, x: pd.DataFrame):
"""Return the log of the probability mass function evaluated at x."""
return self._boilerplate("_log_pmf", x=x)
85 changes: 85 additions & 0 deletions skpro/distributions/adapters/scipy/tests/test_scipy_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

import numpy as np
import pandas as pd
import pytest
from skbase.testing import QuickTester

from skpro.tests.test_all_estimators import BaseFixtureGenerator, PackageConfig

__author__ = ["fkiraly", "malikrafsan"]


def test_empirical_from_discrete():
Expand Down Expand Up @@ -40,3 +46,82 @@ def test_empirical_from_discrete():
)
assert np.all(emp2.spl.index == expected_idx)
assert np.all(emp2.spl.columns == ["abc"])


class ScipyDistributionFixtureGenerator(BaseFixtureGenerator):
"""Fixture generator for scipy distributions adapter.

Fixtures parameterized
----------------------
object_class: object inheriting from BaseObject
ranges over object classes not excluded by EXCLUDE_OBJECTS, EXCLUDED_TESTS
object_instance: instance of object inheriting from BaseObject
ranges over object classes not excluded by EXCLUDE_OBJECTS, EXCLUDED_TESTS
instances are generated by create_test_instance class method
"""

object_type_filter = "scipy_distribution_adapter"


class TestScipyAdapter(PackageConfig, ScipyDistributionFixtureGenerator, QuickTester):
"""Test the scipy adapter."""

METHOD_TESTS = {
"NO_PARAMS": [("mean", "mean"), ("var", "var")],
"X_PARAMS": [("cdf", "cdf"), ("ppf", "ppf")],
"CONTINUOUS": [("pdf", "pdf"), ("log_pdf", "logpdf")],
"DISCRETE": [("pmf", "pmf"), ("log_pmf", "logpmf")],
}

X_VALUES = [0.1, 0.5, 0.99]

@pytest.mark.parametrize("method,scipy_method", METHOD_TESTS["NO_PARAMS"])
def test_method_no_params(self, object_instance, method, scipy_method):
"""Test method that doesn't need additional parameters."""
res = getattr(object_instance, method)()
params = object_instance._get_scipy_param()
scipy_obj = object_instance._get_scipy_object()

scipy_res = getattr(scipy_obj, scipy_method)(*params[0], **params[1])

assert np.allclose(res, scipy_res)

@pytest.mark.parametrize("method,scipy_method", METHOD_TESTS["X_PARAMS"])
@pytest.mark.parametrize("x", X_VALUES)
def test_method_with_x_params(self, object_instance, method, scipy_method, x):
"""Test method that needs x as parameter."""
res = getattr(object_instance, method)(x)
params = object_instance._get_scipy_param()
scipy_obj = object_instance._get_scipy_object()

scipy_res = getattr(scipy_obj, scipy_method)(x, *params[0], **params[1])

assert np.allclose(res, scipy_res)

@pytest.mark.parametrize("method,scipy_method", METHOD_TESTS["CONTINUOUS"])
@pytest.mark.parametrize("x", X_VALUES)
def test_method_continuous_dist(self, object_instance, method, scipy_method, x):
"""Test continuous distribution method."""
res = getattr(object_instance, method)(x)
if object_instance._tags["distr:measuretype"] != "continuous":
scipy_res = 0
else:
params = object_instance._get_scipy_param()
scipy_obj = object_instance._get_scipy_object()
scipy_res = getattr(scipy_obj, scipy_method)(x, *params[0], **params[1])

assert np.allclose(res, scipy_res)

@pytest.mark.parametrize("method,scipy_method", METHOD_TESTS["DISCRETE"])
@pytest.mark.parametrize("x", X_VALUES)
def test_method_discrete_dist(self, object_instance, method, scipy_method, x):
"""Test discrete distribution method."""
res = getattr(object_instance, method)(x)
if object_instance._tags["distr:measuretype"] != "discrete":
scipy_res = 0
else:
params = object_instance._get_scipy_param()
scipy_obj = object_instance._get_scipy_object()
scipy_res = getattr(scipy_obj, scipy_method)(x, *params[0], **params[1])

assert np.allclose(res, scipy_res)
111 changes: 8 additions & 103 deletions skpro/distributions/fisk.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
# copyright: skpro developers, BSD-3-Clause License (see LICENSE file)
"""Log-logistic aka Fisk probability distribution."""

__author__ = ["fkiraly"]
__author__ = ["fkiraly", "malikrafsan"]

import pandas as pd
from scipy.stats import fisk
from scipy.stats import fisk, rv_continuous

from skpro.distributions.base import BaseDistribution
from skpro.distributions.adapters.scipy import _ScipyAdapter


class Fisk(BaseDistribution):
class Fisk(_ScipyAdapter):
r"""Fisk distribution, aka log-logistic distribution.

The Fisk distribution is parametrized by a scale parameter :math:`\alpha`
Expand Down Expand Up @@ -47,109 +47,14 @@ def __init__(self, alpha=1, beta=1, index=None, columns=None):

super().__init__(index=index, columns=columns)

def _mean(self):
"""Return expected value of the distribution.
def _get_scipy_object(self) -> rv_continuous:
return fisk

Returns
-------
2D np.ndarray, same shape as ``self``
expected value of distribution (entry-wise)
"""
def _get_scipy_param(self):
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

mean_arr = fisk.mean(scale=alpha, c=beta)
return mean_arr

def _var(self):
r"""Return element/entry-wise variance of the distribution.

Returns
-------
2D np.ndarray, same shape as ``self``
variance of the distribution (entry-wise)
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

var_arr = fisk.var(scale=alpha, c=beta)
return var_arr

def _pdf(self, x):
"""Probability density function.

Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pdf at

Returns
-------
2D np.ndarray, same shape as ``self``
pdf values at the given points
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

pdf_arr = fisk.pdf(x, scale=alpha, c=beta)
return pdf_arr

def _log_pdf(self, x):
"""Logarithmic probability density function.

Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the pdf at

Returns
-------
2D np.ndarray, same shape as ``self``
log pdf values at the given points
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

lpdf_arr = fisk.logpdf(x, scale=alpha, c=beta)
return lpdf_arr

def _cdf(self, x):
"""Cumulative distribution function.

Parameters
----------
x : 2D np.ndarray, same shape as ``self``
values to evaluate the cdf at

Returns
-------
2D np.ndarray, same shape as ``self``
cdf values at the given points
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

cdf_arr = fisk.cdf(x, scale=alpha, c=beta)
return cdf_arr

def _ppf(self, p):
"""Quantile function = percent point function = inverse cdf.

Parameters
----------
p : 2D np.ndarray, same shape as ``self``
values to evaluate the ppf at

Returns
-------
2D np.ndarray, same shape as ``self``
ppf values at the given points
"""
alpha = self._bc_params["alpha"]
beta = self._bc_params["beta"]

icdf_arr = fisk.ppf(p, scale=alpha, c=beta)
return icdf_arr
return [], {"c": beta, "scale": alpha}

@classmethod
def get_test_params(cls, parameter_set="default"):
Expand Down
Loading
Loading