From 4dcea021b150b839cc8b9e2494fef8966affbb4b Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sun, 9 Dec 2018 03:15:35 +0000 Subject: [PATCH] Improve numerical stability (#619) * remove unnecessary lower bound check * make default precond to diagonal * add numerical stable Binomial with logit * fix DA complete adapt * add test of BinomialLogit * turn off pre-cond adapt by default --- REQUIRE | 1 + src/Turing.jl | 3 ++- src/models/distributions.jl | 24 ++++++++++++++++++++++++ src/samplers/adapt/adapt.jl | 12 ++++++++---- src/samplers/adapt/stepsize.jl | 15 +++++++-------- src/samplers/hmc.jl | 2 +- test/models.jl/distributions.jl | 11 +++++++++++ test/models.jl/skip_tests | 1 + 8 files changed, 55 insertions(+), 14 deletions(-) create mode 100644 test/models.jl/distributions.jl create mode 100644 test/models.jl/skip_tests diff --git a/REQUIRE b/REQUIRE index 880f16425..981e65603 100644 --- a/REQUIRE +++ b/REQUIRE @@ -9,6 +9,7 @@ Libtask 0.1.1 Flux 0.6.7 MacroTools StatsFuns 0.7.0 +SpecialFunctions Bijectors ProgressMeter 0.6.0 diff --git a/src/Turing.jl b/src/Turing.jl index 3fcd47217..c0c2d74c4 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -16,6 +16,7 @@ using ForwardDiff using Bijectors @reexport using MCMCChain using StatsFuns +using SpecialFunctions using Statistics using LinearAlgebra using ProgressMeter @@ -169,7 +170,7 @@ export TArray, tzeros, localcopy, IArray export @sym_str -export Flat, FlatPos +export Flat, FlatPos, BinomialLogit, VecBinomialLogit ################## # Inference code # diff --git a/src/models/distributions.jl b/src/models/distributions.jl index 0ac23ea48..bb7003b4c 100644 --- a/src/models/distributions.jl +++ b/src/models/distributions.jl @@ -26,3 +26,27 @@ Distributions.rand(d::FlatPos, n::Int) = Vector([rand() for _ = 1:n] .+ d.l) function Distributions.logpdf(d::FlatPos, x::AbstractVector{<:Real}) return any(x .<= d.l) ? -Inf : zero(x) end + +# Binomial with logit +struct BinomialLogit{T<:Real} <: DiscreteUnivariateDistribution + n::Int64 + logitp::T +end + +struct VecBinomialLogit{T<:Real} <: DiscreteUnivariateDistribution + n::Vector{Int64} + logitp::Vector{T} +end + +function logpdf_binomial_logit(n, logitp, k) + logcomb = -StatsFuns.log1p(n) - SpecialFunctions.lbeta(n - k + 1, k + 1) + return logcomb + k * logitp - n * StatsFuns.log1pexp(logitp) +end + +function Distributions.logpdf(d::BinomialLogit{<:Real}, k::Int64) + return logpdf_binomial_logit(d.n, d.logitp, k) +end + +function Distributions.logpdf(d::VecBinomialLogit{<:Real}, ks::Vector{Int64}) + return sum(logpdf_binomial_logit.(d.n, d.logitp, ks)) +end diff --git a/src/samplers/adapt/adapt.jl b/src/samplers/adapt/adapt.jl index 4cb31a017..d220dd038 100644 --- a/src/samplers/adapt/adapt.jl +++ b/src/samplers/adapt/adapt.jl @@ -45,8 +45,9 @@ end function ThreePhaseAdapter(spl::Sampler{<:AdaptiveHamiltonian}, ϵ::Real, dim::Integer) # Diagonal pre-conditioner - # pc = DiagPreConditioner(dim) - pc = DensePreConditioner(dim) + # pc = UnitPreConditioner() + pc = DiagPreConditioner(dim) + # pc = DensePreConditioner(dim) # Dual averaging for step size ssa = DualAveraging(spl, spl.info[:adapt_conf], ϵ) # Window parameters @@ -84,11 +85,14 @@ function adapt!(tp::ThreePhaseAdapter, stats::Real, θ; adapt_ϵ=false, adapt_M= if tp.state.n < tp.n_adapts tp.state.n += 1 if tp.state.n == tp.n_adapts + if adapt_ϵ + tp.ssa.state.ϵ = exp(tp.ssa.state.x_bar) + end @info " Adapted ϵ = $(getss(tp)), std = $(string(tp.pc)); $(tp.state.n) iterations is used for adaption." else if adapt_ϵ - is_updateϵ = is_windowend(tp) || tp.state.n == tp.n_adapts - adapt!(tp.ssa, stats, is_updateϵ) + is_updateμ = is_windowend(tp)# || tp.state.n == tp.n_adapts + adapt!(tp.ssa, stats, is_updateμ) end # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp diff --git a/src/samplers/adapt/stepsize.jl b/src/samplers/adapt/stepsize.jl index 95beddce3..1ccec105a 100644 --- a/src/samplers/adapt/stepsize.jl +++ b/src/samplers/adapt/stepsize.jl @@ -71,7 +71,7 @@ end function adapt_stepsize!(da::DualAveraging, stats::Real) @debug "adapting step size ϵ..." @debug "current α = $(stats)" - da.state.m = da.state.m + 1 + da.state.m += 1 m = da.state.m # Clip average MH acceptance probability. @@ -90,20 +90,19 @@ function adapt_stepsize!(da::DualAveraging, stats::Real) ϵ = exp(x) @debug "new ϵ = $(ϵ), old ϵ = $(da.state.ϵ)" - if isnan(ϵ) || isinf(ϵ) || ϵ <= 1e-3 + if isnan(ϵ) || isinf(ϵ) @warn "Incorrect ϵ = $ϵ; ϵ_previous = $(da.state.ϵ) is used instead." else da.state.ϵ = ϵ - da.state.x_bar, da.state.H_bar = x_bar, H_bar end + da.state.x_bar = x_bar + da.state.H_bar = H_bar end -function adapt!(da::DualAveraging, stats::Real, is_updateϵ::Bool) +function adapt!(da::DualAveraging, stats::Real, is_updateμ::Bool) adapt_stepsize!(da, stats) - if is_updateϵ - ϵ = exp(da.state.x_bar) - da.state.ϵ = ϵ - da.state.μ = computeμ(ϵ) + if is_updateμ + da.state.μ = computeμ(da.state.ϵ) reset!(da.state) end end diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl index bbf108533..0caa5055e 100644 --- a/src/samplers/hmc.jl +++ b/src/samplers/hmc.jl @@ -245,7 +245,7 @@ function step(model, spl::Sampler{<:Hamiltonian}, vi::VarInfo, is_first::Val{fal end if spl.alg isa AdaptiveHamiltonian - adapt!(spl.info[:wum], α, vi[spl], adapt_M=true, adapt_ϵ=true) + adapt!(spl.info[:wum], α, vi[spl], adapt_M=false, adapt_ϵ=true) end @debug "R -> X..." diff --git a/test/models.jl/distributions.jl b/test/models.jl/distributions.jl new file mode 100644 index 000000000..374d294d0 --- /dev/null +++ b/test/models.jl/distributions.jl @@ -0,0 +1,11 @@ +using Test +using Turing: BinomialLogit +using Distributions: Binomial, logpdf +using StatsFuns: logistic + +n = 10 +logitp = randn() +d1 = BinomialLogit(n, logitp) +d2 = Binomial(n, logistic(logitp)) +k = 3 +@test logpdf(d1, k) ≈ logpdf(d2, k) diff --git a/test/models.jl/skip_tests b/test/models.jl/skip_tests new file mode 100644 index 000000000..3d088bb70 --- /dev/null +++ b/test/models.jl/skip_tests @@ -0,0 +1 @@ +# tests to skip