Skip to content

Commit

Permalink
Improve documentation and implementation of custom distributions (#1431)
Browse files Browse the repository at this point in the history
  • Loading branch information
devmotion authored Oct 9, 2020
1 parent c05472c commit 6dca57e
Show file tree
Hide file tree
Showing 3 changed files with 189 additions and 77 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "Turing"
uuid = "fce5fe82-541a-59a6-adf8-730c64b5f9a0"
version = "0.14.7"
version = "0.14.8"

[deps]
AbstractMCMC = "80f14c24-f653-4e6a-9b94-39d6b0f70001"
Expand Down
1 change: 1 addition & 0 deletions src/Turing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ using Tracker: Tracker

import AdvancedVI
import DynamicPPL: getspace, NoDist, NamedDist
import Random

const PROGRESS = Ref(true)

Expand Down
263 changes: 187 additions & 76 deletions src/stdlib/distributions.jl
Original file line number Diff line number Diff line change
@@ -1,139 +1,250 @@
import Random: AbstractRNG

# No info
"""
Flat <: ContinuousUnivariateDistribution
Flat()
The *flat distribution* is the improper distribution of real numbers that has the improper
probability density function
A distribution with support and density of one
everywhere.
```math
f(x) = 1.
```
"""
struct Flat <: ContinuousUnivariateDistribution end

Distributions.rand(rng::AbstractRNG, d::Flat) = rand(rng)
Distributions.logpdf(d::Flat, x::Real) = zero(x)
Distributions.minimum(d::Flat) = -Inf
Distributions.maximum(d::Flat) = +Inf
Base.minimum(::Flat) = -Inf
Base.maximum(::Flat) = Inf

Base.rand(rng::Random.AbstractRNG, d::Flat) = rand(rng)
Distributions.logpdf(::Flat, x::Real) = zero(x)

# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped
Distributions.pdf(d::Flat, x::Real) = exp(logpdf(d, x))

# For vec support
Distributions.logpdf(d::Flat, x::AbstractVector{<:Real}) = zero(x)
Distributions.logpdf(::Flat, x::AbstractVector{<:Real}) = zero(x)
Distributions.loglikelihood(::Flat, x::AbstractVector{<:Real}) = zero(eltype(x))

"""
FlatPos(l::Real)
A distribution with a lower bound of `l` and a density
of one at every `x` above `l`.
The *positive flat distribution* with real-valued parameter `l` is the improper distribution
of real numbers that has the improper probability density function
```math
f(x) = \\begin{cases}
0 & \\text{if } x \\leq l, \\\\
1 & \\text{otherwise}.
\\end{cases}
```
"""
struct FlatPos{T<:Real} <: ContinuousUnivariateDistribution
l::T
end

Distributions.rand(rng::AbstractRNG, d::FlatPos) = rand(rng) + d.l
Distributions.logpdf(d::FlatPos, x::Real) = x <= d.l ? -Inf : zero(x)
Distributions.minimum(d::FlatPos) = d.l
Distributions.maximum(d::FlatPos) = Inf
Base.minimum(d::FlatPos) = d.l
Base.maximum(d::FlatPos) = Inf

Base.rand(rng::Random.AbstractRNG, d::FlatPos) = rand(rng) + d.l
function Distributions.logpdf(d::FlatPos, x::Real)
z = float(zero(x))
return x <= d.l ? oftype(z, -Inf) : z
end

# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped
Distributions.pdf(d::FlatPos, x::Real) = exp(logpdf(d, x))

# For vec support
function Distributions.logpdf(d::FlatPos, x::AbstractVector{<:Real})
return any(x .<= d.l) ? -Inf : zero(x)
function Distributions.loglikelihood(d::FlatPos, x::AbstractVector{<:Real})
lower = d.l
T = float(eltype(x))
return any(xi <= lower for xi in x) ? T(-Inf) : zero(T)
end

"""
BinomialLogit(n<:Real, I<:Integer)
BinomialLogit(n, logitp)
The *Binomial distribution* with logit parameterization characterizes the number of
successes in a sequence of independent trials.
It has two parameters: `n`, the number of trials, and `logitp`, the logit of the probability
of success in an individual trial, with the distribution
```math
P(X = k) = {n \\choose k}{(\\text{logistic}(logitp))}^k (1 - \\text{logistic}(logitp))^{n-k}, \\quad \\text{ for } k = 0,1,2, \\ldots, n.
```
A univariate binomial logit distribution.
See also: [`Binomial`](@ref)
"""
struct BinomialLogit{T<:Real, I<:Integer} <: DiscreteUnivariateDistribution
n::I
struct BinomialLogit{T<:Real,S<:Real} <: DiscreteUnivariateDistribution
n::Int
logitp::T
end
logconstant::S

function logpdf_binomial_logit(n, logitp, k)
logcomb = -StatsFuns.log1p(n) - SpecialFunctions.logbeta(n - k + 1, k + 1)
return logcomb + k * logitp - n * StatsFuns.log1pexp(logitp)
function BinomialLogit{T}(n::Int, logitp::T) where T
n >= 0 || error("parameter `n` has to be non-negative")
logconstant = - (log1p(n) + n * StatsFuns.log1pexp(logitp))
return new{T,typeof(logconstant)}(n, logitp, logconstant)
end
end

function Distributions.logpdf(d::BinomialLogit{<:Real}, k::Int)
return logpdf_binomial_logit(d.n, d.logitp, k)
BinomialLogit(n::Int, logitp::Real) = BinomialLogit{typeof(logitp)}(n, logitp)

Base.minimum(::BinomialLogit) = 0
Base.maximum(d::BinomialLogit) = d.n

# TODO: only implement `logpdf(d, k::Real)` if support for Distributions < 0.24 is dropped
Distributions.pdf(d::BinomialLogit, k::Real) = exp(logpdf(d, k))
Distributions.logpdf(d::BinomialLogit, k::Real) = _logpdf(d, k)
Distributions.logpdf(d::BinomialLogit, k::Integer) = _logpdf(d, k)

function _logpdf(d::BinomialLogit, k::Real)
n, logitp, logconstant = d.n, d.logitp, d.logconstant
_insupport = insupport(d, k)
_k = _insupport ? round(Int, k) : 0
result = logconstant + _k * logitp - SpecialFunctions.logbeta(n - _k + 1, _k + 1)
return _insupport ? result : oftype(result, -Inf)
end

function Distributions.pdf(d::BinomialLogit{<:Real}, k::Int)
return exp(logpdf_binomial_logit(d.n, d.logitp, k))
function Base.rand(rng::Random.AbstractRNG, d::BinomialLogit)
return rand(rng, Binomial(d.n, logistic(d.logitp)))
end
Distributions.sampler(d::BinomialLogit) = sampler(Binomial(d.n, logistic(d.logitp)))

"""
BernoulliLogit(p<:Real)
BernoulliLogit(logitp::Real)
A univariate logit-parameterised Bernoulli distribution.
Create a univariate logit-parameterised Bernoulli distribution.
"""
function BernoulliLogit(logitp::Real)
return BinomialLogit(1, logitp)
end
BernoulliLogit(logitp::Real) = BinomialLogit(1, logitp)

"""
OrderedLogistic(η::Any, cutpoints<:AbstractVector)
An ordered logistic distribution.
OrderedLogistic(η, c::AbstractVector)
The *ordered logistic distribution* with real-valued parameter `η` and cutpoints `c` has the
probability mass function
```math
P(X = k) = \\begin{cases}
1 - \\text{logistic}(\\eta - c_1) & \\text{if } k = 1, \\\\
\\text{logistic}(\\eta - c_{k-1}) - \\text{logistic}(\\eta - c_k) & \\text{if } 1 < k < K, \\\\
\\text{logistic}(\\eta - c_{K-1}) & \\text{if } k = K,
\\end{cases}
```
where `K = length(c) + 1`.
"""
struct OrderedLogistic{T1, T2<:AbstractVector} <: DiscreteUnivariateDistribution
η::T1
cutpoints::T2
η::T1
cutpoints::T2

function OrderedLogistic(η, cutpoints)
if !issorted(cutpoints)
error("cutpoints are not sorted")
end
function OrderedLogistic{T1,T2}::T1, cutpoints::T2) where {T1,T2}
issorted(cutpoints) || error("cutpoints are not sorted")
return new{typeof(η), typeof(cutpoints)}(η, cutpoints)
end

end

function Distributions.logpdf(d::OrderedLogistic, k::Int)
K = length(d.cutpoints)+1
c = d.cutpoints

if k==1
logp= -softplus(-(c[k]-d.η)) #logp= log(logistic(c[k]-d.η))
elseif k<K
logp= log(logistic(c[k]-d.η) - logistic(c[k-1]-d.η))
else
logp= -softplus(c[k-1]-d.η) #logp= log(1-logistic(c[k-1]-d.η))
end
end

return logp
function OrderedLogistic(η, cutpoints::AbstractVector)
return OrderedLogistic{typeof(η),typeof(cutpoints)}(η, cutpoints)
end

Distributions.pdf(d::OrderedLogistic, k::Int) = exp(logpdf(d,k))
Base.minimum(d::OrderedLogistic) = 0
Base.maximum(d::OrderedLogistic) = length(d.cutpoints) + 1

function Distributions.rand(rng::AbstractRNG, d::OrderedLogistic)
cutpoints = d.cutpoints
η = d.η
# TODO: only implement `logpdf(d, k::Real)` if support for Distributions < 0.24 is dropped
Distributions.pdf(d::OrderedLogistic, k::Real) = exp(logpdf(d, k))
Distributions.logpdf(d::OrderedLogistic, k::Real) = _logpdf(d, k)
Distributions.logpdf(d::OrderedLogistic, k::Integer) = _logpdf(d, k)

K = length(cutpoints)+1
c = vcat(-Inf, cutpoints, Inf)
function _logpdf(d::OrderedLogistic, k::Real)
η, cutpoints = d.η, d.cutpoints
K = length(cutpoints) + 1

ps = [logistic- i[1]) - logistic- i[2]) for i in zip(c[1:(end-1)],c[2:end])]
_insupport = insupport(d, k)
_k = _insupport ? round(Int, k) : 1
logp = unsafe_logpdf_ordered_logistic(η, cutpoints, K, _k)

return _insupport ? logp : oftype(logp, -Inf)
end

function Base.rand(rng::Random.AbstractRNG, d::OrderedLogistic)
η, cutpoints = d.η, d.cutpoints
K = length(cutpoints) + 1
# evaluate probability mass function
ps = map(1:K) do k
exp(unsafe_logpdf_ordered_logistic(η, cutpoints, K, k))
end
k = rand(rng, Categorical(ps))
return k
end
function Distributions.sampler(d::OrderedLogistic)
η, cutpoints = d.η, d.cutpoints
K = length(cutpoints) + 1
# evaluate probability mass function
ps = map(1:K) do k
exp(unsafe_logpdf_ordered_logistic(η, cutpoints, K, k))
end
return sampler(Categorical(ps))
end

if all(x -> x > zero(x), ps)
return(k)
else
return(-Inf)
# unsafe version without bounds checking
function unsafe_logpdf_ordered_logistic(η, cutpoints, K, k::Int)
@inbounds begin
logp = if k == 1
-StatsFuns.log1pexp- cutpoints[k])
elseif k < K
tmp = StatsFuns.log1pexp(cutpoints[k-1] - η)
-tmp + StatsFuns.log1mexp(tmp - StatsFuns.log1pexp(cutpoints[k] - η))
else
-StatsFuns.log1pexp(cutpoints[k-1] - η)
end
end
return logp
end

"""
Numerically stable Poisson log likelihood.
* `logλ`: log of rate parameter
LogPoisson(logλ)
The *Poisson distribution* with logarithmic parameterization of the rate parameter
descibes the number of independent events occurring within a unit time interval, given the
average rate of occurrence ``exp(logλ)``.
The distribution has the probability mass function
```math
P(X = k) = \\frac{e^{k \\cdot logλ}{k!} e^{-e^{logλ}}, \\quad \\text{ for } k = 0,1,2,\\ldots.
```
See also: [`Poisson`](@ref)
"""
struct LogPoisson{T<:Real} <: DiscreteUnivariateDistribution
struct LogPoisson{T<:Real,S} <: DiscreteUnivariateDistribution
logλ::T
λ::S

function LogPoisson{T}(logλ::T) where T
λ = exp(logλ)
return new{T,typeof(λ)}(logλ, λ)
end
end

function Distributions.logpdf(lp::LogPoisson, k::Int)
return k * lp.logλ - exp(lp.logλ) - SpecialFunctions.loggamma(k + 1)
LogPoisson(logλ::Real) = LogPoisson{typeof(logλ)}(logλ)

Base.minimum(d::LogPoisson) = 0
Base.maximum(d::LogPoisson) = Inf

function _logpdf(d::LogPoisson, k::Real)
_insupport = insupport(d, k)
_k = _insupport ? round(Int, k) : 0
logp = _k * d.logλ - d.λ - SpecialFunctions.loggamma(_k + 1)

return _insupport ? logp : oftype(logp, -Inf)
end

# TODO: only implement `logpdf(d, ::Real)` if support for Distributions < 0.24 is dropped
Distributions.pdf(d::LogPoisson, k::Real) = exp(logpdf(d, k))
Distributions.logpdf(d::LogPoisson, k::Integer) = _logpdf(d, k)
Distributions.logpdf(d::LogPoisson, k::Real) = _logpdf(d, k)

Base.rand(rng::Random.AbstractRNG, d::LogPoisson) = rand(rng, Poisson(d.λ))
Distributions.sampler(d::LogPoisson) = sampler(Poisson(d.λ))

Bijectors.logpdf_with_trans(d::NoDist{<:Univariate}, ::Real, ::Bool) = 0
Bijectors.logpdf_with_trans(d::NoDist{<:Multivariate}, ::AbstractVector{<:Real}, ::Bool) = 0
function Bijectors.logpdf_with_trans(d::NoDist{<:Multivariate}, x::AbstractMatrix{<:Real}, ::Bool)
Expand Down

2 comments on commit 6dca57e

@devmotion
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/22645

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.14.8 -m "<description of version>" 6dca57ed349e7c8e4242d56cde6d5f5b243ecae2
git push origin v0.14.8

Please sign in to comment.