diff --git a/skpro/regression/density.py b/skpro/regression/density.py deleted file mode 100644 index c4d5f6837..000000000 --- a/skpro/regression/density.py +++ /dev/null @@ -1,175 +0,0 @@ -# LEGACY MODULE - TODO: remove or refactor - -import abc - -import numpy as np -from scipy.integrate import simps -from sklearn.base import BaseEstimator -from sklearn.neighbors import KernelDensity - -from skpro.utils.utils import not_existing - - -def ecdf(a): - """Returns the empirical distribution function of a sample - - Parameters - ---------- - a: array - Input array representing a sample - - Returns - ------- - array xs Empirical cdf of the input sample - array ys - """ - xs = np.sort(np.array(a)) - ys = np.arange(1, len(xs) + 1) / float(len(xs)) - - return xs, ys - - -def step_function(xs, ys): - """ - Returns a step function from x-y pair sample - - Parameters - ---------- - xs x values - ys corresponding y values - - Returns - ------- - function A step function - """ - - def func(x): - index = np.searchsorted(xs, x) - index = len(ys) - 1 if index >= len(ys) else index - return ys[index] - - return func - - -class DensityAdapter(BaseEstimator, metaclass=abc.ABCMeta): - """ - Abstract base class for density adapter - that transform an input into an - density cdf/pdf interface - """ - - @abc.abstractmethod - def __call__(self, inlet): - """ - Adapter entry point - - Parameters - ---------- - mixed inlet Input for the adapter transformation - """ - raise NotImplementedError() - - @abc.abstractmethod - def pdf(self, x): - """Probability density function - - Parameters - ---------- - x - - Returns - ------- - mixed Density function evaluated at x - """ - raise NotImplementedError() - - @abc.abstractmethod - def cdf(self, x): - """Cumulative density function - - Parameters - ---------- - x - - Returns - ------- - mixed Cumulative density function evaluated at x - """ - raise NotImplementedError() - - -class KernelDensityAdapter(DensityAdapter): - """ - DensityAdapter that uses kernel density estimation - to transform samples - """ - - def __init__(self, estimator=KernelDensity()): - self.estimator = estimator - - def __call__(self, sample): - """ - Adapter entry point - - Parameters - ---------- - np.array(M) inlet: Sample of length M - """ - - # fit kernel density estimator - self._min_sample = min(sample) - self._max_sample = max(sample) - self._std_sample = np.std(sample) - - self.estimator.fit(sample[:, np.newaxis]) - - def cdf(self, x): - a = 10 - grid_size = 1000 - - minus_inf = self._min_sample - a * self._std_sample - - step = (x - minus_inf) / grid_size - grid = np.arange(minus_inf, x, step) - - pdf_estimation = np.exp(self.estimator.score_samples(grid.reshape(-1, 1))) - integral = simps(y=pdf_estimation, dx=step) - - return integral - - def pdf(self, x): - x = np.array(x) - try: - return np.exp(self.estimator.score_samples(x))[0] - except ValueError: - return np.exp(self.estimator.score_samples(x.reshape(-1, 1)))[0] - - -class EmpiricalDensityAdapter(DensityAdapter): - """ - DensityAdapter that uses empirical cdf - to transform samples - """ - - def __init__(self): - self.xs_ = None - self.ys_ = None - self.step_function_ = None - - def __call__(self, sample): - """ - Adapter entry point - - Parameters - ---------- - np.array(M) inlet: Bayesian sample of length M - """ - self.xs_, self.ys_ = ecdf(sample) - self.step_function_ = step_function(self.xs_, self.ys_) - - def cdf(self, x): - return self.step_function_(x) - - @not_existing - def pdf(self, x): - pass diff --git a/skpro/tests/test_density.py b/skpro/tests/test_density.py deleted file mode 100644 index 5628a120d..000000000 --- a/skpro/tests/test_density.py +++ /dev/null @@ -1,61 +0,0 @@ -# LEGACY MODULE - TODO: remove or refactor - -if False: - import numpy as np - from hypothesis import given - from hypothesis.extra.numpy import arrays - from hypothesis.strategies import floats - from scipy.stats import norm - - from skpro.density import EmpiricalDensityAdapter, KernelDensityAdapter, ecdf - - np.random.seed(1) - - @given(arrays(np.float, 10, elements=floats(0, 100))) - def test_ecdf_from_sample(sample): - xs, ys = ecdf(sample) - - # correct mapping? - assert len(xs) == len(ys) - - # is it monotone? - assert np.array_equal(ys, sorted(ys)) - - @given(floats(-10, 10)) - def test_kernel_density_adapter(x): - # Bayesian test sample - loc, scale = 5, 10 - sample = np.random.normal(loc=loc, scale=scale, size=500) - - # Initialise adapter - adapter = KernelDensityAdapter() - adapter(sample) - - # PDF - pdf = adapter.pdf(x) - assert isinstance(pdf, np.float) - assert abs(pdf - norm.pdf(x, loc=loc, scale=scale)) < 0.3 - - # CDF - cdf = adapter.cdf(x) - assert isinstance(cdf, np.float) - assert abs(cdf - norm.cdf(x, loc=5, scale=10)) < 0.3 - - @given(floats(-10, 10)) - def test_empirical_density_adapter(x): - # Bayesian test sample - loc, scale = 5, 10 - - sample = np.random.normal(loc=loc, scale=scale, size=5000) - - # Initialise adapter - adapter = EmpiricalDensityAdapter() - adapter(sample) - - # CDF - cdf = adapter.cdf(x) - assert isinstance(cdf, float) - assert abs(cdf - norm.cdf(x, loc=loc, scale=scale)) < 0.3 - - # PDF - assert adapter.pdf.not_existing