From 4dcea021b150b839cc8b9e2494fef8966affbb4b Mon Sep 17 00:00:00 2001
From: Kai Xu <xukai921110@gmail.com>
Date: Sun, 9 Dec 2018 03:15:35 +0000
Subject: [PATCH] Improve numerical stability (#619)

* remove unnecessary lower bound check

* make default precond to diagonal

* add numerical stable Binomial with logit

* fix DA complete adapt

* add test of BinomialLogit

* turn off pre-cond adapt by default
---
 REQUIRE                         |  1 +
 src/Turing.jl                   |  3 ++-
 src/models/distributions.jl     | 24 ++++++++++++++++++++++++
 src/samplers/adapt/adapt.jl     | 12 ++++++++----
 src/samplers/adapt/stepsize.jl  | 15 +++++++--------
 src/samplers/hmc.jl             |  2 +-
 test/models.jl/distributions.jl | 11 +++++++++++
 test/models.jl/skip_tests       |  1 +
 8 files changed, 55 insertions(+), 14 deletions(-)
 create mode 100644 test/models.jl/distributions.jl
 create mode 100644 test/models.jl/skip_tests

diff --git a/REQUIRE b/REQUIRE
index 880f16425..981e65603 100644
--- a/REQUIRE
+++ b/REQUIRE
@@ -9,6 +9,7 @@ Libtask 0.1.1
 Flux 0.6.7
 MacroTools
 StatsFuns 0.7.0
+SpecialFunctions
 Bijectors
 
 ProgressMeter 0.6.0
diff --git a/src/Turing.jl b/src/Turing.jl
index 3fcd47217..c0c2d74c4 100644
--- a/src/Turing.jl
+++ b/src/Turing.jl
@@ -16,6 +16,7 @@ using ForwardDiff
 using Bijectors
 @reexport using MCMCChain
 using StatsFuns
+using SpecialFunctions
 using Statistics
 using LinearAlgebra
 using ProgressMeter
@@ -169,7 +170,7 @@ export TArray, tzeros, localcopy, IArray
 
 export @sym_str
 
-export Flat, FlatPos
+export Flat, FlatPos, BinomialLogit, VecBinomialLogit
 
 ##################
 # Inference code #
diff --git a/src/models/distributions.jl b/src/models/distributions.jl
index 0ac23ea48..bb7003b4c 100644
--- a/src/models/distributions.jl
+++ b/src/models/distributions.jl
@@ -26,3 +26,27 @@ Distributions.rand(d::FlatPos, n::Int) = Vector([rand() for _ = 1:n] .+ d.l)
 function Distributions.logpdf(d::FlatPos, x::AbstractVector{<:Real})
     return any(x .<= d.l) ? -Inf : zero(x)
 end
+
+# Binomial with logit
+struct BinomialLogit{T<:Real} <: DiscreteUnivariateDistribution
+    n::Int64
+    logitp::T
+end
+
+struct VecBinomialLogit{T<:Real} <: DiscreteUnivariateDistribution
+    n::Vector{Int64}
+    logitp::Vector{T}
+end
+
+function logpdf_binomial_logit(n, logitp, k)
+    logcomb = -StatsFuns.log1p(n) - SpecialFunctions.lbeta(n - k + 1, k + 1)
+    return logcomb + k * logitp - n * StatsFuns.log1pexp(logitp)
+end
+
+function Distributions.logpdf(d::BinomialLogit{<:Real}, k::Int64)
+    return logpdf_binomial_logit(d.n, d.logitp, k)
+end
+
+function Distributions.logpdf(d::VecBinomialLogit{<:Real}, ks::Vector{Int64})
+    return sum(logpdf_binomial_logit.(d.n, d.logitp, ks))
+end
diff --git a/src/samplers/adapt/adapt.jl b/src/samplers/adapt/adapt.jl
index 4cb31a017..d220dd038 100644
--- a/src/samplers/adapt/adapt.jl
+++ b/src/samplers/adapt/adapt.jl
@@ -45,8 +45,9 @@ end
 
 function ThreePhaseAdapter(spl::Sampler{<:AdaptiveHamiltonian}, ϵ::Real, dim::Integer)
     # Diagonal pre-conditioner
-    # pc = DiagPreConditioner(dim)
-    pc = DensePreConditioner(dim)
+    # pc = UnitPreConditioner()
+    pc = DiagPreConditioner(dim)
+    # pc = DensePreConditioner(dim)
     # Dual averaging for step size
     ssa = DualAveraging(spl, spl.info[:adapt_conf], ϵ)
     # Window parameters
@@ -84,11 +85,14 @@ function adapt!(tp::ThreePhaseAdapter, stats::Real, θ; adapt_ϵ=false, adapt_M=
     if tp.state.n < tp.n_adapts
         tp.state.n += 1
         if tp.state.n == tp.n_adapts
+            if adapt_ϵ
+                tp.ssa.state.ϵ = exp(tp.ssa.state.x_bar)
+            end
             @info " Adapted ϵ = $(getss(tp)), std = $(string(tp.pc)); $(tp.state.n) iterations is used for adaption."
         else
             if adapt_ϵ
-                is_updateϵ = is_windowend(tp) || tp.state.n == tp.n_adapts
-                adapt!(tp.ssa, stats, is_updateϵ)
+                is_updateμ = is_windowend(tp)# || tp.state.n == tp.n_adapts
+                adapt!(tp.ssa, stats, is_updateμ)
             end
 
             # Ref: https://github.com/stan-dev/stan/blob/develop/src/stan/mcmc/hmc/nuts/adapt_diag_e_nuts.hpp
diff --git a/src/samplers/adapt/stepsize.jl b/src/samplers/adapt/stepsize.jl
index 95beddce3..1ccec105a 100644
--- a/src/samplers/adapt/stepsize.jl
+++ b/src/samplers/adapt/stepsize.jl
@@ -71,7 +71,7 @@ end
 function adapt_stepsize!(da::DualAveraging, stats::Real)
     @debug "adapting step size ϵ..."
     @debug "current α = $(stats)"
-    da.state.m = da.state.m + 1
+    da.state.m += 1
     m = da.state.m
 
     # Clip average MH acceptance probability.
@@ -90,20 +90,19 @@ function adapt_stepsize!(da::DualAveraging, stats::Real)
     ϵ = exp(x)
     @debug "new ϵ = $(ϵ), old ϵ = $(da.state.ϵ)"
 
-    if isnan(ϵ) || isinf(ϵ) || ϵ <= 1e-3
+    if isnan(ϵ) || isinf(ϵ)
         @warn "Incorrect ϵ = $ϵ; ϵ_previous = $(da.state.ϵ) is used instead."
     else
         da.state.ϵ = ϵ
-        da.state.x_bar, da.state.H_bar = x_bar, H_bar
     end
+    da.state.x_bar = x_bar
+    da.state.H_bar = H_bar
 end
 
-function adapt!(da::DualAveraging, stats::Real, is_updateϵ::Bool)
+function adapt!(da::DualAveraging, stats::Real, is_updateμ::Bool)
     adapt_stepsize!(da, stats)
-    if is_updateϵ
-        ϵ = exp(da.state.x_bar)
-        da.state.ϵ = ϵ
-        da.state.μ = computeμ(ϵ)
+    if is_updateμ
+        da.state.μ = computeμ(da.state.ϵ)
         reset!(da.state)
     end
 end
diff --git a/src/samplers/hmc.jl b/src/samplers/hmc.jl
index bbf108533..0caa5055e 100644
--- a/src/samplers/hmc.jl
+++ b/src/samplers/hmc.jl
@@ -245,7 +245,7 @@ function step(model, spl::Sampler{<:Hamiltonian}, vi::VarInfo, is_first::Val{fal
     end
 
     if spl.alg isa AdaptiveHamiltonian
-        adapt!(spl.info[:wum], α, vi[spl], adapt_M=true, adapt_ϵ=true)
+        adapt!(spl.info[:wum], α, vi[spl], adapt_M=false, adapt_ϵ=true)
     end
 
     @debug "R -> X..."
diff --git a/test/models.jl/distributions.jl b/test/models.jl/distributions.jl
new file mode 100644
index 000000000..374d294d0
--- /dev/null
+++ b/test/models.jl/distributions.jl
@@ -0,0 +1,11 @@
+using Test
+using Turing: BinomialLogit
+using Distributions: Binomial, logpdf
+using StatsFuns: logistic
+
+n = 10
+logitp = randn()
+d1 = BinomialLogit(n, logitp)
+d2 = Binomial(n, logistic(logitp))
+k = 3
+@test logpdf(d1, k) ≈ logpdf(d2, k)
diff --git a/test/models.jl/skip_tests b/test/models.jl/skip_tests
new file mode 100644
index 000000000..3d088bb70
--- /dev/null
+++ b/test/models.jl/skip_tests
@@ -0,0 +1 @@
+# tests to skip