From 6dca57ed349e7c8e4242d56cde6d5f5b243ecae2 Mon Sep 17 00:00:00 2001
From: David Widmann <devmotion@users.noreply.github.com>
Date: Fri, 9 Oct 2020 07:08:51 +0200
Subject: [PATCH] Improve documentation and implementation of custom
 distributions (#1431)

---
 Project.toml                |   2 +-
 src/Turing.jl               |   1 +
 src/stdlib/distributions.jl | 263 +++++++++++++++++++++++++-----------
 3 files changed, 189 insertions(+), 77 deletions(-)

diff --git a/Project.toml b/Project.toml
index 8a1faf3d5..ec908d3c4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,6 +1,6 @@
 name = "Turing"
 uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
-version = "0.14.7"
+version = "0.14.8"
 
 [deps]
 AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
diff --git a/src/Turing.jl b/src/Turing.jl
index 8fa55a403..3409736e9 100644
--- a/src/Turing.jl
+++ b/src/Turing.jl
@@ -17,6 +17,7 @@ using Tracker: Tracker
 
 import AdvancedVI
 import DynamicPPL: getspace, NoDist, NamedDist
+import Random
 
 const PROGRESS = Ref(true)
 
diff --git a/src/stdlib/distributions.jl b/src/stdlib/distributions.jl
index 63ae2ea77..df6de50e2 100644
--- a/src/stdlib/distributions.jl
+++ b/src/stdlib/distributions.jl
@@ -1,139 +1,250 @@
-import Random: AbstractRNG
-
-# No info
 """
-    Flat <: ContinuousUnivariateDistribution
+    Flat()
+
+The *flat distribution* is the improper distribution of real numbers that has the improper
+probability density function
 
-A distribution with support and density of one
-everywhere.
+```math
+f(x) = 1.
+```
 """
 struct Flat <: ContinuousUnivariateDistribution end
 
-Distributions.rand(rng::AbstractRNG, d::Flat) = rand(rng)
-Distributions.logpdf(d::Flat, x::Real) = zero(x)
-Distributions.minimum(d::Flat) = -Inf
-Distributions.maximum(d::Flat) = +Inf
+Base.minimum(::Flat) = -Inf
+Base.maximum(::Flat) = Inf
+
+Base.rand(rng::Random.AbstractRNG, d::Flat) = rand(rng)
+Distributions.logpdf(::Flat, x::Real) = zero(x)
+
+# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped
+Distributions.pdf(d::Flat, x::Real) = exp(logpdf(d, x))
 
 # For vec support
-Distributions.logpdf(d::Flat, x::AbstractVector{<:Real}) = zero(x)
+Distributions.logpdf(::Flat, x::AbstractVector{<:Real}) = zero(x)
+Distributions.loglikelihood(::Flat, x::AbstractVector{<:Real}) = zero(eltype(x))
 
 """
     FlatPos(l::Real)
 
-A distribution with a lower bound of `l` and a density
-of one at every `x` above `l`.
+The *positive flat distribution* with real-valued parameter `l` is the improper distribution
+of real numbers that has the improper probability density function
+
+```math
+f(x) = \\begin{cases}
+0 & \\text{if } x \\leq l, \\\\
+1 & \\text{otherwise}.
+\\end{cases}
+```
 """
 struct FlatPos{T<:Real} <: ContinuousUnivariateDistribution
     l::T
 end
 
-Distributions.rand(rng::AbstractRNG, d::FlatPos) = rand(rng) + d.l
-Distributions.logpdf(d::FlatPos, x::Real) = x <= d.l ? -Inf : zero(x)
-Distributions.minimum(d::FlatPos) = d.l
-Distributions.maximum(d::FlatPos) = Inf
+Base.minimum(d::FlatPos) = d.l
+Base.maximum(d::FlatPos) = Inf
+
+Base.rand(rng::Random.AbstractRNG, d::FlatPos) = rand(rng) + d.l
+function Distributions.logpdf(d::FlatPos, x::Real)
+    z = float(zero(x))
+    return x <= d.l ? oftype(z, -Inf) : z
+end
+
+# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped
+Distributions.pdf(d::FlatPos, x::Real) = exp(logpdf(d, x))
 
 # For vec support
-function Distributions.logpdf(d::FlatPos, x::AbstractVector{<:Real})
-    return any(x .<= d.l) ? -Inf : zero(x)
+function Distributions.loglikelihood(d::FlatPos, x::AbstractVector{<:Real})
+    lower = d.l
+    T = float(eltype(x))
+    return any(xi <= lower for xi in x) ? T(-Inf) : zero(T)
 end
 
 """
-    BinomialLogit(n<:Real, I<:Integer)
+    BinomialLogit(n, logitp)
+
+The *Binomial distribution* with logit parameterization characterizes the number of
+successes in a sequence of independent trials.
+
+It has two parameters: `n`, the number of trials, and `logitp`, the logit of the probability
+of success in an individual trial, with the distribution
+
+```math
+P(X = k) = {n \\choose k}{(\\text{logistic}(logitp))}^k (1 - \\text{logistic}(logitp))^{n-k}, \\quad \\text{ for } k = 0,1,2, \\ldots, n.
+```
 
-A univariate binomial logit distribution.
+See also: [`Binomial`](@ref)
 """
-struct BinomialLogit{T<:Real, I<:Integer} <: DiscreteUnivariateDistribution
-    n::I
+struct BinomialLogit{T<:Real,S<:Real} <: DiscreteUnivariateDistribution
+    n::Int
     logitp::T
-end
+    logconstant::S
 
-function logpdf_binomial_logit(n, logitp, k)
-    logcomb = -StatsFuns.log1p(n) - SpecialFunctions.logbeta(n - k + 1, k + 1)
-    return logcomb + k * logitp - n * StatsFuns.log1pexp(logitp)
+    function BinomialLogit{T}(n::Int, logitp::T) where T
+        n >= 0 || error("parameter `n` has to be non-negative")
+        logconstant = - (log1p(n) + n * StatsFuns.log1pexp(logitp))
+        return new{T,typeof(logconstant)}(n, logitp, logconstant)
+    end
 end
 
-function Distributions.logpdf(d::BinomialLogit{<:Real}, k::Int)
-    return logpdf_binomial_logit(d.n, d.logitp, k)
+BinomialLogit(n::Int, logitp::Real) = BinomialLogit{typeof(logitp)}(n, logitp)
+
+Base.minimum(::BinomialLogit) = 0
+Base.maximum(d::BinomialLogit) = d.n
+
+# TODO: only implement `logpdf(d, k::Real)` if support for Distributions < 0.24 is dropped
+Distributions.pdf(d::BinomialLogit, k::Real) = exp(logpdf(d, k))
+Distributions.logpdf(d::BinomialLogit, k::Real) = _logpdf(d, k)
+Distributions.logpdf(d::BinomialLogit, k::Integer) = _logpdf(d, k)
+
+function _logpdf(d::BinomialLogit, k::Real)
+    n, logitp, logconstant = d.n, d.logitp, d.logconstant
+    _insupport = insupport(d, k)
+    _k = _insupport ? round(Int, k) : 0
+    result = logconstant + _k * logitp - SpecialFunctions.logbeta(n - _k + 1, _k + 1)
+    return _insupport ? result : oftype(result, -Inf)
 end
 
-function Distributions.pdf(d::BinomialLogit{<:Real}, k::Int)
-    return exp(logpdf_binomial_logit(d.n, d.logitp, k))
+function Base.rand(rng::Random.AbstractRNG, d::BinomialLogit)
+    return rand(rng, Binomial(d.n, logistic(d.logitp)))
 end
+Distributions.sampler(d::BinomialLogit) = sampler(Binomial(d.n, logistic(d.logitp)))
 
 """
-    BernoulliLogit(p<:Real)
+    BernoulliLogit(logitp::Real)
 
-A univariate logit-parameterised Bernoulli distribution.
+Create a univariate logit-parameterised Bernoulli distribution.
 """
-function BernoulliLogit(logitp::Real)
-    return BinomialLogit(1, logitp)
-end
+BernoulliLogit(logitp::Real) = BinomialLogit(1, logitp)
 
 """
-    OrderedLogistic(η::Any, cutpoints<:AbstractVector)
-
-An ordered logistic distribution.
+    OrderedLogistic(η, c::AbstractVector)
+
+The *ordered logistic distribution* with real-valued parameter `η` and cutpoints `c` has the
+probability mass function
+
+```math
+P(X = k) = \\begin{cases}
+    1 - \\text{logistic}(\\eta - c_1) & \\text{if } k = 1, \\\\
+    \\text{logistic}(\\eta - c_{k-1}) - \\text{logistic}(\\eta - c_k) & \\text{if } 1 < k < K, \\\\
+    \\text{logistic}(\\eta - c_{K-1}) & \\text{if } k = K,
+\\end{cases}
+```
+where `K = length(c) + 1`.
 """
 struct OrderedLogistic{T1, T2<:AbstractVector} <: DiscreteUnivariateDistribution
-   η::T1
-   cutpoints::T2
+    η::T1
+    cutpoints::T2
 
-   function OrderedLogistic(η, cutpoints)
-        if !issorted(cutpoints)
-            error("cutpoints are not sorted")
-        end
+    function OrderedLogistic{T1,T2}(η::T1, cutpoints::T2) where {T1,T2}
+        issorted(cutpoints) || error("cutpoints are not sorted")
         return new{typeof(η), typeof(cutpoints)}(η, cutpoints)
-   end
-
-end
-
-function Distributions.logpdf(d::OrderedLogistic, k::Int)
-    K = length(d.cutpoints)+1
-    c =  d.cutpoints
-
-    if k==1
-        logp= -softplus(-(c[k]-d.η))  #logp= log(logistic(c[k]-d.η))
-    elseif k<K
-        logp= log(logistic(c[k]-d.η) - logistic(c[k-1]-d.η))
-    else
-        logp= -softplus(c[k-1]-d.η)  #logp= log(1-logistic(c[k-1]-d.η))
     end
+end
 
-    return logp
+function OrderedLogistic(η, cutpoints::AbstractVector)
+    return OrderedLogistic{typeof(η),typeof(cutpoints)}(η, cutpoints)
 end
 
-Distributions.pdf(d::OrderedLogistic, k::Int) = exp(logpdf(d,k))
+Base.minimum(d::OrderedLogistic) = 0
+Base.maximum(d::OrderedLogistic) = length(d.cutpoints) + 1
 
-function Distributions.rand(rng::AbstractRNG, d::OrderedLogistic)
-    cutpoints = d.cutpoints
-    η = d.η
+# TODO: only implement `logpdf(d, k::Real)` if support for Distributions < 0.24 is dropped
+Distributions.pdf(d::OrderedLogistic, k::Real) = exp(logpdf(d, k))
+Distributions.logpdf(d::OrderedLogistic, k::Real) = _logpdf(d, k)
+Distributions.logpdf(d::OrderedLogistic, k::Integer) = _logpdf(d, k)
 
-    K = length(cutpoints)+1
-    c = vcat(-Inf, cutpoints, Inf)
+function _logpdf(d::OrderedLogistic, k::Real)
+    η, cutpoints = d.η, d.cutpoints
+    K = length(cutpoints) + 1
 
-    ps = [logistic(η - i[1]) - logistic(η - i[2]) for i in zip(c[1:(end-1)],c[2:end])]
+    _insupport = insupport(d, k)
+    _k = _insupport ? round(Int, k) : 1
+    logp = unsafe_logpdf_ordered_logistic(η, cutpoints, K, _k)
 
+    return _insupport ? logp : oftype(logp, -Inf)
+end
+
+function Base.rand(rng::Random.AbstractRNG, d::OrderedLogistic)
+    η, cutpoints = d.η, d.cutpoints
+    K = length(cutpoints) + 1
+    # evaluate probability mass function
+    ps = map(1:K) do k
+        exp(unsafe_logpdf_ordered_logistic(η, cutpoints, K, k))
+    end
     k = rand(rng, Categorical(ps))
+    return k
+end
+function Distributions.sampler(d::OrderedLogistic)
+    η, cutpoints = d.η, d.cutpoints
+    K = length(cutpoints) + 1
+    # evaluate probability mass function
+    ps = map(1:K) do k
+        exp(unsafe_logpdf_ordered_logistic(η, cutpoints, K, k))
+    end
+    return sampler(Categorical(ps))
+end
 
-    if all(x -> x > zero(x), ps)
-        return(k)
-    else
-        return(-Inf)
+# unsafe version without bounds checking
+function unsafe_logpdf_ordered_logistic(η, cutpoints, K, k::Int)
+    @inbounds begin
+        logp = if k == 1
+            -StatsFuns.log1pexp(η - cutpoints[k])
+        elseif k < K
+            tmp = StatsFuns.log1pexp(cutpoints[k-1] - η)
+            -tmp + StatsFuns.log1mexp(tmp - StatsFuns.log1pexp(cutpoints[k] - η))
+        else
+            -StatsFuns.log1pexp(cutpoints[k-1] - η)
+        end
     end
+    return logp
 end
 
 """
-Numerically stable Poisson log likelihood.
-* `logλ`: log of rate parameter
+    LogPoisson(logλ)
+
+The *Poisson distribution* with logarithmic parameterization of the rate parameter
+descibes the number of independent events occurring within a unit time interval, given the
+average rate of occurrence ``exp(logλ)``.
+
+The distribution has the probability mass function
+
+```math
+P(X = k) = \\frac{e^{k \\cdot logλ}{k!} e^{-e^{logλ}}, \\quad \\text{ for } k = 0,1,2,\\ldots.
+```
+
+See also: [`Poisson`](@ref)
 """
-struct LogPoisson{T<:Real} <: DiscreteUnivariateDistribution
+struct LogPoisson{T<:Real,S} <: DiscreteUnivariateDistribution
     logλ::T
+    λ::S
+
+    function LogPoisson{T}(logλ::T) where T
+        λ = exp(logλ)
+        return new{T,typeof(λ)}(logλ, λ)
+    end
 end
 
-function Distributions.logpdf(lp::LogPoisson, k::Int)
-    return k * lp.logλ - exp(lp.logλ) - SpecialFunctions.loggamma(k + 1)
+LogPoisson(logλ::Real) = LogPoisson{typeof(logλ)}(logλ)
+
+Base.minimum(d::LogPoisson) = 0
+Base.maximum(d::LogPoisson) = Inf
+
+function _logpdf(d::LogPoisson, k::Real)
+    _insupport = insupport(d, k)
+    _k = _insupport ? round(Int, k) : 0
+    logp = _k * d.logλ - d.λ - SpecialFunctions.loggamma(_k + 1)
+
+    return _insupport ? logp : oftype(logp, -Inf)
 end
 
+# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped
+Distributions.pdf(d::LogPoisson, k::Real) = exp(logpdf(d, k))
+Distributions.logpdf(d::LogPoisson, k::Integer) = _logpdf(d, k)
+Distributions.logpdf(d::LogPoisson, k::Real) = _logpdf(d, k)
+
+Base.rand(rng::Random.AbstractRNG, d::LogPoisson) = rand(rng, Poisson(d.λ))
+Distributions.sampler(d::LogPoisson) = sampler(Poisson(d.λ))
+
 Bijectors.logpdf_with_trans(d::NoDist{<:Univariate}, ::Real, ::Bool) = 0
 Bijectors.logpdf_with_trans(d::NoDist{<:Multivariate}, ::AbstractVector{<:Real}, ::Bool) = 0
 function Bijectors.logpdf_with_trans(d::NoDist{<:Multivariate}, x::AbstractMatrix{<:Real}, ::Bool)