From d9c3f88531388472632cf47cf8b23925f02f27cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Fri, 19 Apr 2024 13:52:59 +0100 Subject: [PATCH] [DOC] docstring with mathematical description for `QPD_Empirical` (#255) This PR adds a mathematical description in the docstring of `QPD_Empirical`. The docstring also explains why the distribution is quantile parameterized, and its relation to `Empirical`. --- skpro/distributions/qpd_empirical.py | 39 ++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/skpro/distributions/qpd_empirical.py b/skpro/distributions/qpd_empirical.py index fb7a459b5..9f7596478 100644 --- a/skpro/distributions/qpd_empirical.py +++ b/skpro/distributions/qpd_empirical.py @@ -10,11 +10,40 @@ class QPD_Empirical(Empirical): - """Empirical quantile parametrized distribution. - - This distribution is parameterized by a set of quantile points. - - todo: add docstr + r"""Empirical quantile parametrized distribution. + + This distribution is parameterized by a set of quantile points and quantiles, + quantiles :math:`q_1, q_2, \dots, q_N` + at quantile points :math:`p_1, p_2, \dots, p_N`, + with :math:`0 \le p_1 < p_2 < \dots < p_N \le 1`. + + It represents a distribution with piecewise constant CDF and quantile function, + the unique distribution satisfying: + + * the support is :math:`[q_1, q_N]` + * for any quantile point :math:`p \in [p_1, p_N]`, it holds that + :math:`\mbox{ppf}(p)` = :math:`\mbox{ppf}(p_i)`, + where :math:`i` is the index minimizing :math:`|p_i - p|`, + in all cases where this minimizer is unique. + + In vernacular terms, the quantile function agrees with the quantiles prescribed by + :math:`q_i` at the quantile points :math:`p_i`, and for other quantile points + agrees with the value at the nearest quantile point. + + In explicit terms, the distribution is an empirical distribution (sum-of-diracs), + supported at the quantiles :math:`q_1, q_2, \dots, q_N`, + with weights :math:`w_1, w_2, \dots, w_N` + such that :math:`w_i = (p_{i+1} - p_{i-1})/2` for :math:`1 = 1, \dots, N`, + where we define :math:`p_0 = -p_1` and :math:`p_{N+1} = 2 - p_N`. + + Formally, the distribution is parametrized by the quantiles :math:`q_i` + and the quantile points :math:`p_i`, not by the quantiles and weights :math:`w_i`, + so it is distinct from the empirical distribution (``skpro`` ``Empirical``), + as a parameterized distribution, + by being quantile parameterized and not sample parameterized. + + However, it is equivalent, as an unparameterized distribution, + to an ``Empirical`` distribution with weights and nodes given as above. Parameters ----------