diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 00000000..1e72b507 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1 @@ +style="blue" diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml new file mode 100644 index 00000000..41ea6187 --- /dev/null +++ b/.github/workflows/Format.yml @@ -0,0 +1,28 @@ +name: Format + +on: + push: + branches: + - main + pull_request: + +jobs: + format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: julia-actions/setup-julia@latest + with: + version: 1 + - name: Format code + run: | + using Pkg + Pkg.add(; name="JuliaFormatter", uuid="98e50ef6-434e-11e9-1051-2b60c6c9e899") + using JuliaFormatter + format("."; verbose=true) + shell: julia --color=yes {0} + - uses: reviewdog/action-suggester@v1 + if: github.event_name == 'pull_request' + with: + tool_name: JuliaFormatter + fail_on_error: true diff --git a/src/InferenceDiagnostics.jl b/src/InferenceDiagnostics.jl index ba779918..c992b91f 100644 --- a/src/InferenceDiagnostics.jl +++ b/src/InferenceDiagnostics.jl @@ -1,16 +1,16 @@ module InferenceDiagnostics -import AbstractFFTs -import DataAPI -import Distributions -import MLJModelInterface -import SpecialFunctions -import StatsBase -import Tables +using AbstractFFTs: AbstractFFTs +using DataAPI: DataAPI +using Distributions: Distributions +using MLJModelInterface: MLJModelInterface +using SpecialFunctions: SpecialFunctions +using StatsBase: StatsBase +using Tables: Tables -import LinearAlgebra -import Random -import Statistics +using LinearAlgebra: LinearAlgebra +using Random: Random +using Statistics: Statistics export discretediag export ess_rhat, ESSMethod, FFTESSMethod, BDAESSMethod diff --git a/src/discretediag.jl b/src/discretediag.jl index d693ed49..22f8dc6f 100644 --- a/src/discretediag.jl +++ b/src/discretediag.jl @@ -1,27 +1,26 @@ -function update_hangartner_temp_vars!(u::Matrix{Int}, X::Matrix{Int}, - t::Int) - d = size(X,2) +function update_hangartner_temp_vars!(u::Matrix{Int}, X::Matrix{Int}, t::Int) + d = size(X, 2) - for j in 1:d - u[X[t, j], j] += 1 - end + for j in 1:d + u[X[t, j], j] += 1 + end end function hangartner_inner(Y::AbstractMatrix, m::Int) - ## setup temp vars - n, d = size(Y) + ## setup temp vars + n, d = size(Y) - # Count for each category in each chain - u = zeros(Int, m, d) - v = zeros(Int, m, d) + # Count for each category in each chain + u = zeros(Int, m, d) + v = zeros(Int, m, d) - for t in 1:n - # fill out temp vars - update_hangartner_temp_vars!(u, Y, t) - end - phia, chi_stat, m_tot = weiss_sub(u, v, n) + for t in 1:n + # fill out temp vars + update_hangartner_temp_vars!(u, Y, t) + end + phia, chi_stat, m_tot = weiss_sub(u, v, n) - return (n * sum(chi_stat), m_tot) + return (n * sum(chi_stat), m_tot) end """ @@ -32,366 +31,378 @@ Assess the convergence of the MCMC chains with the Weiss procedure. It computes ``\\frac{X^2}{c}`` and evaluates a p-value from the ``\\chi^2`` distribution with ``(|R| − 1)(s − 1)`` degrees of freedom. """ function weiss(X::AbstractMatrix) - ## number of iterations, number of chains - n, d = size(X) - - ## mapping of values to integers - v_dict = Dict{eltype(X), Int}() - - ## max unique categories - mc = map(c -> length(unique(X[:,c])), 1:d) - m = length(unique(X)) + ## number of iterations, number of chains + n, d = size(X) + + ## mapping of values to integers + v_dict = Dict{eltype(X),Int}() + + ## max unique categories + mc = map(c -> length(unique(X[:, c])), 1:d) + m = length(unique(X)) + + ## counter of number of unique values in each chain + r0 = 0 + + ## Count for each category in each chain + u = zeros(Int, m, d) + + ## Number of times a category did not transition in each chain + v = zeros(Int, m, d) + + for t in 1:n + for c in 1:d + if !(X[t, c] in keys(v_dict)) + r0 += 1 + v_dict[X[t, c]] = r0 + end + idx1 = v_dict[X[t, c]] + u[idx1, c] += 1 + + if t > 1 + if X[t - 1, c] == X[t, c] + v[idx1, c] += 1 + end + end + end + end + phia, chi_stat, m_tot = weiss_sub(u, v, n) + ca = (1 + phia) / (1 - phia) + stat = (n / ca) * sum(chi_stat) + pval = NaN + if ((m_tot - 1) * (d - 1)) >= 1 + pval = 1 - Distributions.cdf(Distributions.Chisq((m_tot - 1) * (d - 1)), stat) + end - ## counter of number of unique values in each chain - r0 = 0 + return (stat, m_tot, pval, ca) +end - ## Count for each category in each chain - u = zeros(Int, m, d) +function weiss_sub(u::Matrix{Int}, v::Matrix{Int}, t::Int) + m, d = size(u) + nt = 0.0 + dt = 0.0 + m_tot = 0 + + mp = zeros(Float64, m, d) + ma = zeros(Float64, m) + phia = 0.0 + ca = 0.0 + df = 0.0 + + chi_stat = zeros(Float64, d) + + for j in 1:m + p1 = 0.0 + p2 = 0.0 + for l in 1:d + #aggregate + p1 += v[j, l] / (d * (t - 1)) + p2 += u[j, l] / (d * t) - ## Number of times a category did not transition in each chain - v = zeros(Int, m, d) + #per chain + mp[j, l] = u[j, l] / t + ma[j] += u[j, l] / (d * t) + end + nt += p1 + dt += p2^2 + + if ma[j] > 0 + m_tot += 1 + for l in 1:d + chi_stat[l] += (mp[j, l] - ma[j])^2 / ma[j] + end + end + end + phia = 1.0 + (1.0 / t) - ((1 - nt) / (1 - dt)) + phia = min(max(phia, 0.0), 1.0 - eps()) + return (phia, chi_stat, m_tot) +end - for t in 1:n - for c in 1:d - if !(X[t,c] in keys(v_dict)) - r0 += 1 - v_dict[X[t,c]] = r0 - end - idx1 = v_dict[X[t,c]] - u[idx1,c] += 1 +function update_billingsley_temp_vars!(f::Array{Int,3}, X::Matrix{Int}, t::Int) + d = size(X, 2) + for j in 1:d + if t > 1 + f[X[t - 1, j], X[t, j], j] += 1 + end + end +end - if t > 1 - if X[t-1,c] == X[t,c] - v[idx1, c] += 1 +function billingsley_sub(f::Array{Int,3}) + df = 0.0 + stat = 0.0 + + m, d = size(f)[2:3] + + # marginal transitions, i.e. + # number of transitions from each category + mf = mapslices(sum, f; dims=[2]) + + # For each category, number of chains for which + # that category was present + A = vec(mapslices((x) -> sum(x .> 0), mf; dims=[3])) + + # For each category, number of categories it + # transitioned to + B = vec(mapslices((x) -> sum(x .> 0), mapslices(sum, f; dims=[3]); dims=[2])) + + # transition probabilities in each chain + P = f ./ mf + + # transition probabilities + mP = (mapslices(sum, f; dims=[3]) ./ mapslices(sum, mf; dims=[3])) + mP = reshape(mP, size(mP)[1:2]) + + idx = findall((A .* B) .> 0) + for j in idx + #df for billingsley + df += (A[j] - 1) * (B[j] - 1) + + #billingsley + for k in idx + if (mP[j, k] > 0.0) + for l in 1:d + if mf[j, 1, l] > 0 && isfinite(P[j, k, l]) + stat += mf[j, 1, l] * (P[j, k, l] - mP[j, k])^2 / mP[j, k] + end + end + end end - end end - end - phia, chi_stat, m_tot = weiss_sub(u, v, n) - ca = (1 + phia) / (1 - phia) - stat = (n / ca) * sum(chi_stat) - pval = NaN - if ((m_tot - 1) * (d - 1)) >= 1 - pval = 1 - Distributions.cdf(Distributions.Chisq((m_tot - 1) * (d - 1)), stat) - end - - return (stat, m_tot, pval, ca) + return (stat, df, mP) end -function weiss_sub(u::Matrix{Int}, v::Matrix{Int}, t::Int) - m, d = size(u) - nt = 0.0 - dt = 0.0 - m_tot = 0 - - mp = zeros(Float64, m, d) - ma = zeros(Float64, m) - phia = 0.0 - ca = 0.0 - df = 0.0 - - chi_stat = zeros(Float64, d) - - for j in 1:m - p1 = 0.0 - p2 = 0.0 - for l in 1:d - #aggregate - p1 += v[j, l] / (d * (t - 1)) - p2 += u[j, l] / (d * t) - - #per chain - mp[j, l] = u[j, l] / t - ma[j] += u[j, l] / (d * t) +function bd_inner(Y::AbstractMatrix, m::Int) + num_iters, num_chains = size(Y) + # Transition matrix for each chain + f = zeros(Int, m, m, num_chains) + + for t in 1:num_iters + # fill out temp vars + update_billingsley_temp_vars!(f, Y, t) end - nt += p1 - dt += p2 ^ 2 - - if ma[j] > 0 - m_tot += 1 - for l in 1:d - chi_stat[l] += (mp[j,l] - ma[j]) ^ 2 / ma[j] - end + return billingsley_sub(f) +end + +function simulate_NDARMA( + N::Int, p::Int, q::Int, prob::Vector{Float64}, phi::Vector{Float64} +) + sampler1 = Distributions.sampler(Distributions.Multinomial(1, phi)) + sampler2 = Distributions.sampler(Distributions.Categorical(prob)) + + alphabeta = Vector{Int}(undef, length(phi)) + alphabeta_p = @view(alphabeta[1:p]) + alphabeta_q = @view(alphabeta[(end - q):end]) + eps = Vector{Int}(undef, q + 1) + X = Vector{Int}(undef, N) + Random.rand!(sampler2, @view(X[1:p])) + for t in (p + 1):N + Random.rand!(sampler1, alphabeta) + Random.rand!(sampler2, eps) + X[t] = + LinearAlgebra.dot(alphabeta_p, @view(X[(t - p):(t - 1)])) + + LinearAlgebra.dot(alphabeta_q, eps) end - end - phia = 1.0 + (1.0 / t) - ((1 - nt) / (1 - dt)) - phia = min(max(phia,0.0),1.0 - eps()) - return (phia, chi_stat, m_tot) + + return X end -function update_billingsley_temp_vars!(f::Array{Int,3}, - X::Matrix{Int}, - t::Int) - d = size(X,2) - for j in 1:d - if t > 1 - f[X[t-1, j], X[t,j], j] += 1 +function simulate_MC(N::Int, P::Matrix{Float64}) + X = zeros(Int, N) + n, m = size(P) + X[1] = StatsBase.sample(1:n) + for i in 2:N + X[i] = StatsBase.wsample(1:n, vec(P[X[i - 1], :])) end - end + return X end -function billingsley_sub(f::Array{Int,3}) - df = 0.0 - stat = 0.0 +function diag_all( + X::AbstractMatrix, method::Symbol, nsim::Int, start_iter::Int, step_size::Int +) - m, d = size(f)[2:3] + ## number of iterations, number of chains + n, d = size(X) - # marginal transitions, i.e. - # number of transitions from each category - mf = mapslices(sum, f, dims = [2]) + ## mapping of values to integers + v_dict = Dict{eltype(X),Int}() - # For each category, number of chains for which - # that category was present - A = vec(mapslices( (x)-> sum(x.>0), mf, dims = [3])) + ## max unique categories + mc = map(c -> length(unique(X[:, c])), 1:d) + m = length(unique(X)) - # For each category, number of categories it - # transitioned to - B = vec(mapslices((x)-> sum(x.>0), mapslices(sum,f, dims = [3]), dims = [2])) + ## counter of number of unique values in each chain + r0 = 0 - # transition probabilities in each chain - P = f ./ mf + ## Count for each category in each chain + u = zeros(Int, m, d) - # transition probabilities - mP = (mapslices(sum,f,dims = [3]) ./ mapslices(sum,mf,dims = [3])) - mP = reshape(mP, size(mP)[1:2]) + ## Number of times a category did not transition in each chain + v = zeros(Int, m, d) - idx = findall((A .* B) .> 0) - for j in idx - #df for billingsley - df += (A[j] - 1) * (B[j] - 1) + ## transition matrix for each chain + f = zeros(Int, m, m, d) - #billingsley - for k in idx - if (mP[j,k] > 0.0) - for l in 1:d - if mf[j,1,l] > 0 && isfinite(P[j,k,l]) - stat += mf[j,1,l] * (P[j,k,l] - mP[j,k]) ^ 2 / mP[j,k] - end + length_result = length(start_iter:step_size:n) + result = ( + stat=Vector{Float64}(undef, length_result), + df=Vector{Float64}(undef, length_result), + pvalue=Vector{Float64}(undef, length_result), + ) + result_iter = 1 + for t in 1:n + for c in 1:d + if !(X[t, c] in keys(v_dict)) + r0 += 1 + v_dict[X[t, c]] = r0 + end + idx1 = v_dict[X[t, c]] + u[idx1, c] += 1 + + if t > 1 + idx2 = v_dict[X[t - 1, c]] + f[idx1, idx2, c] += 1 + + if X[t - 1, c] == X[t, c] + v[idx1, c] += 1 + end + end end - end - end - end - return (stat, df, mP) -end -function bd_inner(Y::AbstractMatrix, m::Int) - num_iters, num_chains = size(Y) - # Transition matrix for each chain - f = zeros(Int, m, m, num_chains) - - for t in 1:num_iters - # fill out temp vars - update_billingsley_temp_vars!(f, Y, t) - end - billingsley_sub(f) + if ((t >= start_iter) && (((t - start_iter) % step_size) == 0)) + phia, chi_stat, m_tot = weiss_sub(u, v, t) + hot_stat, df, mP = billingsley_sub(f) + + phat = mapslices(sum, u; dims=[2])[:, 1] / sum(mapslices(sum, u; dims=[2])) + ca = (1 + phia) / (1 - phia) + stat = NaN + pval = NaN + df0 = NaN + + if method == :hangartner + stat = t * sum(chi_stat) + df0 = (m - 1) * (d - 1) + if m > 1 && !isnan(stat) + pval = + 1 - Distributions.cdf(Distributions.Chisq((m - 1) * (d - 1)), stat) + end + elseif method == :weiss + stat = (t / ca) * sum(chi_stat) + df0 = (m - 1) * (d - 1) + pval = NaN + if m > 1 && !isnan(stat) + pval = + 1 - Distributions.cdf(Distributions.Chisq((m - 1) * (d - 1)), stat) + end + elseif method == :DARBOOT + stat = t * sum(chi_stat) + bstats = zeros(Float64, nsim) + for b in 1:nsim + Y = reduce( + hcat, + [simulate_NDARMA(t, 1, 0, phat, [phia, 1 - phia]) for j in 1:d], + ) + s = hangartner_inner(Y, m)[1] + bstats[b] = s + end + non_nan_bstats = filter(!isnan, bstats) + df0 = Statistics.mean(non_nan_bstats) + pval = Statistics.mean(stat <= x for x in non_nan_bstats) + elseif method == :MCBOOT + bstats = zeros(Float64, nsim) + for b in 1:nsim + Y = reduce(hcat, [simulate_MC(t, mP) for j in 1:d]) + s = hangartner_inner(Y, m)[1] + bstats[b] = s + end + non_nan_bstats = filter(!isnan, bstats) + df0 = Statistics.mean(non_nan_bstats) + pval = Statistics.mean(stat <= x for x in non_nan_bstats) + elseif method == :billingsley + stat = hot_stat + df0 = df + if df > 0 && !isnan(hot_stat) + pval = 1 - Distributions.cdf(Distributions.Chisq(df), hot_stat) + end + elseif method == :billingsleyBOOT + stat = hot_stat + bstats = zeros(Float64, nsim) + for b in 1:nsim + Y = reduce(hcat, [simulate_MC(t, mP) for j in 1:d]) + (s, sd) = bd_inner(Y, m)[1:2] + bstats[b] = s / sd + end + non_nan_bstats = filter(!isnan, bstats) + df0 = Statistics.mean(non_nan_bstats) + statodf = stat / df + pval = Statistics.mean(statodf <= x for x in non_nan_bstats) + else + error("Unexpected") + end + result.stat[result_iter] = stat + result.df[result_iter] = df0 + result.pvalue[result_iter] = pval + result_iter += 1 + end + end + return result end -function simulate_NDARMA(N::Int, p::Int, q::Int, prob::Vector{Float64}, - phi::Vector{Float64}) - sampler1 = Distributions.sampler(Distributions.Multinomial(1, phi)) - sampler2 = Distributions.sampler(Distributions.Categorical(prob)) - - alphabeta = Vector{Int}(undef, length(phi)) - alphabeta_p = @view(alphabeta[1:p]) - alphabeta_q = @view(alphabeta[(end-q):end]) - eps = Vector{Int}(undef, q + 1) - X = Vector{Int}(undef, N) - Random.rand!(sampler2, @view(X[1:p])) - for t in (p + 1):N - Random.rand!(sampler1, alphabeta) - Random.rand!(sampler2, eps) - X[t] = LinearAlgebra.dot(alphabeta_p, @view(X[(t - p):(t - 1)])) + - LinearAlgebra.dot(alphabeta_q, eps) - end - - return X -end +function discretediag_sub( + c::AbstractArray{<:Real,3}, + frac::Real, + method::Symbol, + nsim::Int, + start_iter::Int, + step_size::Int, +) + num_iters, num_vars, num_chains = size(c) + + ## Between-chain diagnostic + length_results = length(start_iter:step_size:num_iters) + plot_vals_stat = Matrix{Float64}(undef, length_results, num_vars) + plot_vals_pval = Matrix{Float64}(undef, length_results, num_vars) + between_chain_vals = ( + stat=Vector{Float64}(undef, num_vars), + df=Vector{Float64}(undef, num_vars), + pvalue=Vector{Float64}(undef, num_vars), + ) + for j in 1:num_vars + X = convert(AbstractMatrix{Int}, c[:, j, :]) + result = diag_all(X, method, nsim, start_iter, step_size) -function simulate_MC(N::Int, P::Matrix{Float64}) - X = zeros(Int, N) - n, m = size(P) - X[1] = StatsBase.sample(1:n) - for i in 2:N - X[i] = StatsBase.wsample(1:n, vec(P[X[i-1], :]) ) - end - return X -end + plot_vals_stat[:, j] .= result.stat ./ result.df + plot_vals_pval[:, j] .= result.pvalue -function diag_all(X::AbstractMatrix, method::Symbol, - nsim::Int, start_iter::Int, step_size::Int) - - ## number of iterations, number of chains - n, d = size(X) - - ## mapping of values to integers - v_dict = Dict{eltype(X), Int}() - - ## max unique categories - mc = map(c -> length(unique(X[:,c])), 1:d) - m = length(unique(X)) - - ## counter of number of unique values in each chain - r0 = 0 - - ## Count for each category in each chain - u = zeros(Int, m, d) - - ## Number of times a category did not transition in each chain - v = zeros(Int, m, d) - - ## transition matrix for each chain - f = zeros(Int, m, m, d) - - length_result = length(start_iter:step_size:n) - result = ( - stat = Vector{Float64}(undef, length_result), - df = Vector{Float64}(undef, length_result), - pvalue = Vector{Float64}(undef, length_result), - ) - result_iter = 1 - for t in 1:n - for c in 1:d - if !(X[t,c] in keys(v_dict)) - r0 += 1 - v_dict[X[t,c]] = r0 - end - idx1 = v_dict[X[t,c]] - u[idx1,c] += 1 - - if t > 1 - idx2 = v_dict[X[t-1,c]] - f[idx1, idx2, c] += 1 - - if X[t-1,c] == X[t,c] - v[idx1, c] += 1 - end - end + between_chain_vals.stat[j] = result.stat[end] + between_chain_vals.df[j] = result.df[end] + between_chain_vals.pvalue[j] = result.pvalue[end] end - if ((t >= start_iter) && (((t - start_iter) % step_size) == 0)) - phia, chi_stat, m_tot = weiss_sub(u, v, t) - hot_stat, df, mP = billingsley_sub(f) - - phat = mapslices(sum,u,dims = [2])[:,1] / sum(mapslices(sum,u,dims = [2])) - ca = (1 + phia) / (1 - phia) - stat = NaN - pval = NaN - df0 = NaN - - if method == :hangartner - stat = t * sum(chi_stat) - df0 = (m - 1) * (d - 1) - if m > 1 && !isnan(stat) - pval = 1 - Distributions.cdf(Distributions.Chisq( (m - 1) * (d - 1)), stat) - end - elseif method == :weiss - stat = (t / ca) * sum(chi_stat) - df0 = (m - 1) * (d - 1) - pval = NaN - if m > 1 && !isnan(stat) - pval = 1 - Distributions.cdf(Distributions.Chisq( (m - 1) * (d - 1)), stat) - end - elseif method == :DARBOOT - stat = t * sum(chi_stat) - bstats = zeros(Float64, nsim) - for b in 1:nsim - Y = reduce(hcat, [simulate_NDARMA(t, 1, 0, phat, [phia, 1-phia]) for j in 1:d]) - s = hangartner_inner(Y, m)[1] - bstats[b] = s - end - non_nan_bstats = filter(!isnan, bstats) - df0 = Statistics.mean(non_nan_bstats) - pval = Statistics.mean(stat <= x for x in non_nan_bstats) - elseif method == :MCBOOT - bstats = zeros(Float64, nsim) - for b in 1:nsim - Y = reduce(hcat, [simulate_MC(t, mP) for j in 1:d]) - s = hangartner_inner(Y, m)[1] - bstats[b] = s - end - non_nan_bstats = filter(!isnan, bstats) - df0 = Statistics.mean(non_nan_bstats) - pval = Statistics.mean(stat <= x for x in non_nan_bstats) - elseif method == :billingsley - stat = hot_stat - df0 = df - if df > 0 && !isnan(hot_stat) - pval = 1 - Distributions.cdf(Distributions.Chisq(df), hot_stat) - end - elseif method == :billingsleyBOOT - stat = hot_stat - bstats = zeros(Float64, nsim) - for b in 1:nsim - Y = reduce(hcat, [simulate_MC(t, mP) for j in 1:d]) - (s,sd) = bd_inner(Y, m)[1:2] - bstats[b] = s/sd + ## Within-chain diagnostic + within_chain_vals = ( + stat=Matrix{Float64}(undef, num_vars, num_chains), + df=Matrix{Float64}(undef, num_vars, num_chains), + pvalue=Matrix{Float64}(undef, num_vars, num_chains), + ) + for k in 1:num_chains + for j in 1:num_vars + x = convert(AbstractVector{Int}, c[:, j, k]) + + idx1 = 1:round(Int, frac * num_iters) + idx2 = round(Int, num_iters - frac * num_iters + 1):num_iters + x1 = x[idx1] + x2 = x[idx2] + n_min = min(length(x1), length(x2)) + Y = [x1[1:n_min] x2[(end - n_min + 1):end]] + + result = diag_all(Y, method, nsim, n_min, step_size) + within_chain_vals.stat[j, k] = result.stat[end] + within_chain_vals.df[j, k] = result.df[end] + within_chain_vals.pvalue[j, k] = result.pvalue[end] end - non_nan_bstats = filter(!isnan, bstats) - df0 = Statistics.mean(non_nan_bstats) - statodf = stat / df - pval = Statistics.mean(statodf <= x for x in non_nan_bstats) - else - error("Unexpected") - end - result.stat[result_iter] = stat - result.df[result_iter] = df0 - result.pvalue[result_iter] = pval - result_iter += 1 - end - end - return result -end - -function discretediag_sub(c::AbstractArray{<:Real,3}, frac::Real, method::Symbol, - nsim::Int, start_iter::Int, step_size::Int) - num_iters, num_vars, num_chains = size(c) - - ## Between-chain diagnostic - length_results = length(start_iter:step_size:num_iters) - plot_vals_stat = Matrix{Float64}(undef, length_results, num_vars) - plot_vals_pval = Matrix{Float64}(undef, length_results, num_vars) - between_chain_vals = ( - stat = Vector{Float64}(undef, num_vars), - df = Vector{Float64}(undef, num_vars), - pvalue = Vector{Float64}(undef, num_vars), - ) - for j in 1:num_vars - X = convert(AbstractMatrix{Int}, c[:, j, :]) - result = diag_all(X, method, nsim, start_iter, step_size) - - plot_vals_stat[:,j] .= result.stat ./ result.df - plot_vals_pval[:,j] .= result.pvalue - - between_chain_vals.stat[j] = result.stat[end] - between_chain_vals.df[j] = result.df[end] - between_chain_vals.pvalue[j] = result.pvalue[end] - end - - ## Within-chain diagnostic - within_chain_vals = ( - stat = Matrix{Float64}(undef, num_vars, num_chains), - df = Matrix{Float64}(undef, num_vars, num_chains), - pvalue = Matrix{Float64}(undef, num_vars, num_chains), - ) - for k in 1:num_chains - for j in 1:num_vars - x = convert(AbstractVector{Int}, c[:, j, k]) - - idx1 = 1:round(Int, frac * num_iters) - idx2 = round(Int, num_iters - frac * num_iters + 1):num_iters - x1 = x[idx1] - x2 = x[idx2] - n_min = min(length(x1), length(x2)) - Y = [x1[1:n_min] x2[(end - n_min + 1):end]] - - result = diag_all(Y, method, nsim, n_min, step_size) - within_chain_vals.stat[j, k] = result.stat[end] - within_chain_vals.df[j, k] = result.df[end] - within_chain_vals.pvalue[j, k] = result.pvalue[end] end - end - return between_chain_vals, within_chain_vals, plot_vals_stat, plot_vals_pval + return between_chain_vals, within_chain_vals, plot_vals_stat, plot_vals_pval end """ @@ -401,16 +412,12 @@ Compute discrete diagnostic where `method` can be one of `:weiss`, `:hangartner` `:DARBOOT`, `:MCBOOT`, `:billinsgley`, and `:billingsleyBOOT`. """ function discretediag( - chains::AbstractArray{<:Real,3}; - frac::Real = 0.3, - method::Symbol = :weiss, - nsim::Int = 1000 + chains::AbstractArray{<:Real,3}; frac::Real=0.3, method::Symbol=:weiss, nsim::Int=1000 ) valid_methods = (:weiss, :hangartner, :DARBOOT, :MCBOOT, :billingsley, :billingsleyBOOT) - method in valid_methods || - throw(ArgumentError( - "`method` must be one of :" * join(valid_methods, ", :", " and :"), - )) + method in valid_methods || throw( + ArgumentError("`method` must be one of :" * join(valid_methods, ", :", " and :")), + ) 0 < frac < 1 || throw(ArgumentError("`frac` must be in (0,1)")) num_iters = size(chains, 1) diff --git a/src/ess.jl b/src/ess.jl index fff2a4d0..7d120a1a 100644 --- a/src/ess.jl +++ b/src/ess.jl @@ -123,14 +123,14 @@ function update!(cache::FFTESSCache) @. samples_cache = abs2(samples_cache) cache.invplan * samples_cache - nothing + return nothing end function update!(cache::BDAESSCache) # recompute mean of within-chain variances cache.mean_chain_var = Statistics.mean(cache.chain_var) - return + return nothing end function mean_autocov(k::Int, cache::ESSCache) @@ -202,8 +202,8 @@ Estimate the effective sample size and the potential scale reduction. """ function ess_rhat( chains::AbstractArray{<:Union{Missing,Real},3}; - method::AbstractESSMethod = ESSMethod(), - maxlag::Int = 250 + method::AbstractESSMethod=ESSMethod(), + maxlag::Int=250, ) # compute size of matrices (each chain is split!) niter = size(chains, 1) ÷ 2 @@ -249,12 +249,14 @@ function ess_rhat( # calculate within-chain variance @inbounds for j in 1:nchains - chain_var[j] = Statistics.var(view(samples, :, j); mean = chain_mean[j], corrected = true) + chain_var[j] = Statistics.var( + view(samples, :, j); mean=chain_mean[j], corrected=true + ) end W = Statistics.mean(chain_var) # compute variance estimator var₊, which accounts for between-chain variance as well - var₊ = correctionfactor * W + Statistics.var(chain_mean; corrected = true) + var₊ = correctionfactor * W + Statistics.var(chain_mean; corrected=true) inv_var₊ = inv(var₊) # estimate the potential scale reduction @@ -315,10 +317,16 @@ function copyto_split!(out::AbstractMatrix, x::AbstractMatrix) # check dimensions nrows_out, ncols_out = size(out) nrows_x, ncols_x = size(x) - ncols_out == 2 * ncols_x || - throw(DimensionMismatch("the output matrix must have twice as many columns as the input matrix")) - nrows_out == nrows_x ÷ 2 || - throw(DimensionMismatch("the output matrix must have half as many rows as as the input matrix")) + ncols_out == 2 * ncols_x || throw( + DimensionMismatch( + "the output matrix must have twice as many columns as the input matrix" + ), + ) + nrows_out == nrows_x ÷ 2 || throw( + DimensionMismatch( + "the output matrix must have half as many rows as as the input matrix" + ), + ) jout = 0 offset = iseven(nrows_x) ? nrows_out : nrows_out + 1 @@ -336,5 +344,5 @@ function copyto_split!(out::AbstractMatrix, x::AbstractMatrix) end end - out + return out end diff --git a/src/gelmandiag.jl b/src/gelmandiag.jl index 19a38168..e080ed0e 100644 --- a/src/gelmandiag.jl +++ b/src/gelmandiag.jl @@ -1,9 +1,6 @@ #################### Gelman, Rubin, and Brooks Diagnostics #################### -function _gelmandiag( - psi::AbstractArray{<:Real,3}; - alpha::Real = 0.05 -) +function _gelmandiag(psi::AbstractArray{<:Real,3}; alpha::Real=0.05) niters, nparams, nchains = size(psi) nchains > 1 || error("Gelman diagnostic requires at least 2 chains") @@ -22,9 +19,12 @@ function _gelmandiag( psibar2 = vec(Statistics.mean(psibar; dims=1)) var_w = vec(Statistics.var(s2; dims=1)) ./ nchains - var_b = (2 / (nchains - 1)) .* b.^2 - var_wb = (niters / nchains) .* - (LinearAlgebra.diag(Statistics.cov(s2, psibar.^2)) .- 2 .* psibar2 .* LinearAlgebra.diag(Statistics.cov(s2, psibar))) + var_b = (2 / (nchains - 1)) .* b .^ 2 + var_wb = + (niters / nchains) .* ( + LinearAlgebra.diag(Statistics.cov(s2, psibar .^ 2)) .- + 2 .* psibar2 .* LinearAlgebra.diag(Statistics.cov(s2, psibar)) + ) V = @. rfixed * w + rrandomscale * b var_V = rfixed^2 * var_w + rrandomscale^2 * var_b + 2 * rfixed * rrandomscale * var_wb @@ -61,7 +61,7 @@ Compute the Gelman, Rubin and Brooks diagnostics. function gelmandiag(chains::AbstractArray{<:Real,3}; kwargs...) estimates, upperlimits = _gelmandiag(chains; kwargs...) - return (psrf = estimates, psrfci = upperlimits) + return (psrf=estimates, psrfci=upperlimits) end """ @@ -69,14 +69,13 @@ end Compute the multivariate Gelman, Rubin and Brooks diagnostics. """ -function gelmandiag_multivariate( - chains::AbstractArray{<:Real,3}; - kwargs... -) +function gelmandiag_multivariate(chains::AbstractArray{<:Real,3}; kwargs...) niters, nparams, nchains = size(chains) if nparams < 2 - error("computation of the multivariate potential scale reduction factor requires ", - "at least two variables") + error( + "computation of the multivariate potential scale reduction factor requires ", + "at least two variables", + ) end estimates, upperlimits, W, B = _gelmandiag(chains; kwargs...) @@ -94,5 +93,5 @@ function gelmandiag_multivariate( λmax = LinearAlgebra.eigmax(LinearAlgebra.Symmetric(Y)) multivariate = rfixed + rrandomscale * λmax - return (psrf = estimates, psrfci = upperlimits, psrfmultivariate = multivariate) + return (psrf=estimates, psrfci=upperlimits, psrfmultivariate=multivariate) end diff --git a/src/gewekediag.jl b/src/gewekediag.jl index efc0127d..80fcc36d 100644 --- a/src/gewekediag.jl +++ b/src/gewekediag.jl @@ -4,16 +4,17 @@ Compute the Geweke diagnostic from the `first` and `last` proportion of samples `x`. """ function gewekediag(x::AbstractVector{<:Real}; first::Real=0.1, last::Real=0.5, kwargs...) - 0 < first < 1 || throw(ArgumentError("`first` is not in (0, 1)")) - 0 < last < 1 || throw(ArgumentError("`last` is not in (0, 1)")) - first + last <= 1 || throw(ArgumentError("`first` and `last` proportions overlap")) + 0 < first < 1 || throw(ArgumentError("`first` is not in (0, 1)")) + 0 < last < 1 || throw(ArgumentError("`last` is not in (0, 1)")) + first + last <= 1 || throw(ArgumentError("`first` and `last` proportions overlap")) - n = length(x) - x1 = x[1:round(Int, first * n)] - x2 = x[round(Int, n - last * n + 1):n] - z = (Statistics.mean(x1) - Statistics.mean(x2)) / - hypot(mcse(x1; kwargs...), mcse(x2; kwargs...)) - p = SpecialFunctions.erfc(abs(z) / sqrt(2)) + n = length(x) + x1 = x[1:round(Int, first * n)] + x2 = x[round(Int, n - last * n + 1):n] + z = + (Statistics.mean(x1) - Statistics.mean(x2)) / + hypot(mcse(x1; kwargs...), mcse(x2; kwargs...)) + p = SpecialFunctions.erfc(abs(z) / sqrt(2)) - return (zscore=z, pvalue=p) + return (zscore=z, pvalue=p) end diff --git a/src/heideldiag.jl b/src/heideldiag.jl index 132d991a..936a9a7f 100644 --- a/src/heideldiag.jl +++ b/src/heideldiag.jl @@ -5,30 +5,38 @@ Compute the Heidelberger and Welch diagnostic. """ -function heideldiag(x::AbstractVector{<:Real}; alpha::Real=0.05, eps::Real=0.1, - start::Int=1, kwargs...) - n = length(x) - delta = trunc(Int, 0.10 * n) - y = x[trunc(Int, n / 2):end] - S0 = length(y) * mcse(y; kwargs...)^2 - i, pvalue, converged, ybar = 1, 1.0, false, NaN - while i < n / 2 - y = x[i:end] - m = length(y) - ybar = Statistics.mean(y) - B = cumsum(y) - ybar * collect(1:m) - Bsq = (B .* B) ./ (m * S0) - I = sum(Bsq) / m - pvalue = 1.0 - pcramer(I) - converged = pvalue > alpha - if converged - break +function heideldiag( + x::AbstractVector{<:Real}; alpha::Real=0.05, eps::Real=0.1, start::Int=1, kwargs... +) + n = length(x) + delta = trunc(Int, 0.10 * n) + y = x[trunc(Int, n / 2):end] + S0 = length(y) * mcse(y; kwargs...)^2 + i, pvalue, converged, ybar = 1, 1.0, false, NaN + while i < n / 2 + y = x[i:end] + m = length(y) + ybar = Statistics.mean(y) + B = cumsum(y) - ybar * collect(1:m) + Bsq = (B .* B) ./ (m * S0) + I = sum(Bsq) / m + pvalue = 1.0 - pcramer(I) + converged = pvalue > alpha + if converged + break + end + i += delta end - i += delta - end - halfwidth = sqrt(2) * SpecialFunctions.erfcinv(alpha) * mcse(y; kwargs...) - passed = halfwidth / abs(ybar) <= eps - return (burnin=i + start - 2, stationarity=converged, pvalue=pvalue, mean=ybar, halfwidth=halfwidth, test=passed) + halfwidth = sqrt(2) * SpecialFunctions.erfcinv(alpha) * mcse(y; kwargs...) + passed = halfwidth / abs(ybar) <= eps + return ( + burnin=i + start - 2, + stationarity=converged, + pvalue=pvalue, + mean=ybar, + halfwidth=halfwidth, + test=passed, + ) end ## Csorgo S and Faraway JJ. The exact and asymptotic distributions of the @@ -39,7 +47,11 @@ function pcramer(q::Real) for k in 0:3 c1 = 4.0 * k + 1.0 c2 = c1^2 / (16.0 * q) - p += SpecialFunctions.gamma(k + 0.5) / factorial(k) * sqrt(c1) * exp(-c2) * SpecialFunctions.besselk(0.25, c2) + p += + SpecialFunctions.gamma(k + 0.5) / factorial(k) * + sqrt(c1) * + exp(-c2) * + SpecialFunctions.besselk(0.25, c2) end return p / (pi^1.5 * sqrt(q)) end diff --git a/src/mcse.jl b/src/mcse.jl index 27afbc31..3db98525 100644 --- a/src/mcse.jl +++ b/src/mcse.jl @@ -6,15 +6,15 @@ Compute the Monte Carlo standard error (MCSE) of samples `x`. """ function mcse(x::AbstractVector{<:Real}; method::Symbol=:imse, kwargs...) - return if method === :bm - mcse_bm(x; kwargs...) - elseif method === :imse - mcse_imse(x) - elseif method === :ipse - mcse_ipse(x) - else - throw(ArgumentError("unsupported MCSE method $method")) - end + return if method === :bm + mcse_bm(x; kwargs...) + elseif method === :imse + mcse_imse(x) + elseif method === :ipse + mcse_ipse(x) + else + throw(ArgumentError("unsupported MCSE method $method")) + end end function mcse_bm(x::AbstractVector{<:Real}; size::Int=floor(Int, sqrt(length(x)))) diff --git a/src/rafterydiag.jl b/src/rafterydiag.jl index 79b07f59..f580fd88 100644 --- a/src/rafterydiag.jl +++ b/src/rafterydiag.jl @@ -6,14 +6,8 @@ Compute the Raftery and Lewis diagnostic. """ function rafterydiag( - x::AbstractVector{<:Real}; - q = 0.025, - r = 0.005, - s = 0.95, - eps = 0.001, - range = 1:length(x) - ) - + x::AbstractVector{<:Real}; q=0.025, r=0.005, s=0.95, eps=0.001, range=1:length(x) +) nx = length(x) phi = sqrt(2.0) * SpecialFunctions.erfinv(s) nmin = ceil(Int, q * (1.0 - q) * (phi / r)^2) @@ -25,7 +19,7 @@ function rafterydiag( dichot = Int[(x .<= StatsBase.quantile(x, q))...] kthin = 0 bic = 1.0 - local test, ntest + local test , ntest while bic >= 0.0 kthin += 1 test = dichot[1:kthin:nx] @@ -36,7 +30,8 @@ function rafterydiag( for i1 in 1:2, i2 in 1:2, i3 in 1:2 tt = trantest[i1, i2, i3] if tt > 0 - fitted = sum(trantest[:, i2, i3]) * sum(trantest[i1, i2, :]) / + fitted = + sum(trantest[:, i2, i3]) * sum(trantest[i1, i2, :]) / sum(trantest[:, i2, :]) g2 += 2.0 * tt * log(tt / fitted) end @@ -54,5 +49,7 @@ function rafterydiag( keep = kthin * ceil(n) total = burnin + keep end - return (thinning=kthin, burnin=burnin, total=total, nmin=nmin, dependencefactor=total / nmin) + return ( + thinning=kthin, burnin=burnin, total=total, nmin=nmin, dependencefactor=total / nmin + ) end diff --git a/src/rstar.jl b/src/rstar.jl index f5bfc7c3..af195fc7 100644 --- a/src/rstar.jl +++ b/src/rstar.jl @@ -31,13 +31,22 @@ R = round(Statistics.mean(Rs); digits=0) 1.0 ``` """ -function rstar(rng::Random.AbstractRNG, classif::MLJModelInterface.Supervised, x::AbstractMatrix, y::AbstractVector{Int}; iterations = 10, subset = 0.8, verbosity = 0) - - size(x,1) != length(y) && throw(DimensionMismatch()) +function rstar( + rng::Random.AbstractRNG, + classif::MLJModelInterface.Supervised, + x::AbstractMatrix, + y::AbstractVector{Int}; + iterations=10, + subset=0.8, + verbosity=0, +) + size(x, 1) != length(y) && throw(DimensionMismatch()) iterations >= 1 && ArgumentError("Number of iterations has to be positive!") if iterations > 1 && classif isa MLJModelInterface.Deterministic - @warn("Classifier is not a probabilistic classifier but number of iterations is > 1.") + @warn( + "Classifier is not a probabilistic classifier but number of iterations is > 1." + ) elseif iterations == 1 && classif isa MLJModelInterface.Probabilistic @warn("Classifier is probabilistic but number of iterations is equal to one.") end @@ -46,33 +55,58 @@ function rstar(rng::Random.AbstractRNG, classif::MLJModelInterface.Supervised, x K = length(unique(y)) # randomly sub-select training and testing set - Ntrain = round(Int, N*subset) + Ntrain = round(Int, N * subset) Ntest = N - Ntrain ids = Random.randperm(rng, N) train_ids = view(ids, 1:Ntrain) - test_ids = view(ids, (Ntrain+1):N) + test_ids = view(ids, (Ntrain + 1):N) # train classifier using XGBoost - fitresult, _ = MLJModelInterface.fit(classif, verbosity, Tables.table(x[train_ids,:]), MLJModelInterface.categorical(y[train_ids])) - - xtest = Tables.table(x[test_ids,:]) + fitresult, _ = MLJModelInterface.fit( + classif, + verbosity, + Tables.table(x[train_ids, :]), + MLJModelInterface.categorical(y[train_ids]), + ) + + xtest = Tables.table(x[test_ids, :]) ytest = view(y, test_ids) - Rstats = map(i -> K*rstar_score(rng, classif, fitresult, xtest, ytest), 1:iterations) + Rstats = map(i -> K * rstar_score(rng, classif, fitresult, xtest, ytest), 1:iterations) return Rstats end -function rstar(classif::MLJModelInterface.Supervised, x::AbstractMatrix, y::AbstractVector{Int}; kwargs...) - rstar(Random.GLOBAL_RNG, classif, x, y; kwargs...) +function rstar( + classif::MLJModelInterface.Supervised, + x::AbstractMatrix, + y::AbstractVector{Int}; + kwargs..., +) + return rstar(Random.GLOBAL_RNG, classif, x, y; kwargs...) end -function rstar_score(rng::Random.AbstractRNG, classif::MLJModelInterface.Probabilistic, fitresult, xtest, ytest) - pred = DataAPI.unwrap.(rand.(Ref(rng), MLJModelInterface.predict(classif, fitresult, xtest))) - return Statistics.mean(((p,y),) -> p == y, zip(pred, ytest)) +function rstar_score( + rng::Random.AbstractRNG, + classif::MLJModelInterface.Probabilistic, + fitresult, + xtest, + ytest, +) + pred = + DataAPI.unwrap.( + rand.(Ref(rng), MLJModelInterface.predict(classif, fitresult, xtest)) + ) + return Statistics.mean(((p, y),) -> p == y, zip(pred, ytest)) end -function rstar_score(rng::Random.AbstractRNG, classif::MLJModelInterface.Deterministic, fitresult, xtest, ytest) +function rstar_score( + rng::Random.AbstractRNG, + classif::MLJModelInterface.Deterministic, + fitresult, + xtest, + ytest, +) pred = MLJModelInterface.predict(classif, fitresult, xtest) - return Statistics.mean(((p,y),) -> p == y, zip(pred, ytest)) + return Statistics.mean(((p, y),) -> p == y, zip(pred, ytest)) end diff --git a/test/discretediag.jl b/test/discretediag.jl index c00c2751..8415614f 100644 --- a/test/discretediag.jl +++ b/test/discretediag.jl @@ -4,9 +4,8 @@ samples = rand(-100:100, 100, nparams, nchains) @testset "results" begin - for method in ( - :weiss, :hangartner, :DARBOOT, :MCBOOT, :billingsley, :billingsleyBOOT - ) + for method in + (:weiss, :hangartner, :DARBOOT, :MCBOOT, :billingsley, :billingsleyBOOT) between_chain, within_chain = @inferred(discretediag(samples; method=method)) @test between_chain isa NamedTuple{(:stat, :df, :pvalue)} diff --git a/test/ess.jl b/test/ess.jl index 32d36cf5..91fac763 100644 --- a/test/ess.jl +++ b/test/ess.jl @@ -4,8 +4,12 @@ x = rand(50, 20) # check incompatible sizes - @test_throws DimensionMismatch InferenceDiagnostics.copyto_split!(similar(x, 25, 20), x) - @test_throws DimensionMismatch InferenceDiagnostics.copyto_split!(similar(x, 50, 40), x) + @test_throws DimensionMismatch InferenceDiagnostics.copyto_split!( + similar(x, 25, 20), x + ) + @test_throws DimensionMismatch InferenceDiagnostics.copyto_split!( + similar(x, 50, 40), x + ) y = similar(x, 25, 40) InferenceDiagnostics.copyto_split!(y, x) @@ -15,8 +19,12 @@ x = rand(51, 20) # check incompatible sizes - @test_throws DimensionMismatch InferenceDiagnostics.copyto_split!(similar(x, 25, 20), x) - @test_throws DimensionMismatch InferenceDiagnostics.copyto_split!(similar(x, 51, 40), x) + @test_throws DimensionMismatch InferenceDiagnostics.copyto_split!( + similar(x, 25, 20), x + ) + @test_throws DimensionMismatch InferenceDiagnostics.copyto_split!( + similar(x, 51, 40), x + ) InferenceDiagnostics.copyto_split!(y, x) @test reshape(y, 50, 20) == x[vcat(1:25, 27:51), :] @@ -30,9 +38,9 @@ x = scale * rawx ess_standard, rhat_standard = ess_rhat(x) - ess_standard2, rhat_standard2 = ess_rhat(x; method = ESSMethod()) - ess_fft, rhat_fft = ess_rhat(x; method = FFTESSMethod()) - ess_bda, rhat_bda = ess_rhat(x; method = BDAESSMethod()) + ess_standard2, rhat_standard2 = ess_rhat(x; method=ESSMethod()) + ess_fft, rhat_fft = ess_rhat(x; method=FFTESSMethod()) + ess_bda, rhat_bda = ess_rhat(x; method=BDAESSMethod()) # check that we get (roughly) the same results @test ess_standard == ess_standard2 @@ -40,9 +48,9 @@ @test rhat_standard == rhat_standard2 == rhat_fft == rhat_bda # check that the estimates are reasonable - @test all(x -> isapprox(x, 100_000; rtol = 0.1), ess_standard) - @test all(x -> isapprox(x, 100_000; rtol = 0.1), ess_bda) - @test all(x -> isapprox(x, 1; rtol = 0.1), rhat_standard) + @test all(x -> isapprox(x, 100_000; rtol=0.1), ess_standard) + @test all(x -> isapprox(x, 100_000; rtol=0.1), ess_bda) + @test all(x -> isapprox(x, 1; rtol=0.1), rhat_standard) # BDA method fluctuates more @test var(ess_standard) < var(ess_bda) @@ -53,9 +61,9 @@ x = ones(10_000, 40, 10) ess_standard, rhat_standard = ess_rhat(x) - ess_standard2, rhat_standard2 = ess_rhat(x; method = ESSMethod()) - ess_fft, rhat_fft = ess_rhat(x; method = FFTESSMethod()) - ess_bda, rhat_bda = ess_rhat(x; method = BDAESSMethod()) + ess_standard2, rhat_standard2 = ess_rhat(x; method=ESSMethod()) + ess_fft, rhat_fft = ess_rhat(x; method=FFTESSMethod()) + ess_bda, rhat_bda = ess_rhat(x; method=BDAESSMethod()) # check that the estimates are all NaN for ess in (ess_standard, ess_standard2, ess_fft, ess_bda) @@ -71,7 +79,7 @@ for method in (ESSMethod(), FFTESSMethod(), BDAESSMethod()) # analyze array - ess_array, rhat_array = ess_rhat(x; method = method) + ess_array, rhat_array = ess_rhat(x; method=method) @test length(ess_array) == size(x, 2) @test all(ismissing, ess_array) # since min(maxlag, niter - 1) = 0 diff --git a/test/gewekediag.jl b/test/gewekediag.jl index 66ce4f51..f877ccde 100644 --- a/test/gewekediag.jl +++ b/test/gewekediag.jl @@ -3,7 +3,7 @@ @testset "results" begin @test @inferred(gewekediag(samples)) isa - NamedTuple{(:zscore, :pvalue), Tuple{Float64,Float64}} + NamedTuple{(:zscore, :pvalue),Tuple{Float64,Float64}} end @testset "exceptions" begin diff --git a/test/rstar/runtests.jl b/test/rstar/runtests.jl index 424ff303..e2cc0b7f 100644 --- a/test/rstar/runtests.jl +++ b/test/rstar/runtests.jl @@ -14,7 +14,7 @@ using Test R = rstar(classif, randn(N, 2), rand(1:3, N)) # Resulting R value should be close to one, i.e. the classifier does not perform better than random guessing. - @test Statistics.mean(R) ≈ 1 rtol=0.15 + @test Statistics.mean(R) ≈ 1 rtol = 0.15 # Compute R* statistic for a mixed chain. samples = randn(4 * N, 8) @@ -22,14 +22,16 @@ using Test R = rstar(classif, samples, chain_indices) # Resulting R value should be close to one, i.e. the classifier does not perform better than random guessing. - @test Statistics.mean(R) ≈ 1 rtol=0.15 + @test Statistics.mean(R) ≈ 1 rtol = 0.15 # Compute R* statistic for a non-mixed chain. - samples = [sin.(1:N) cos.(1:N); - 100 .* cos.(1:N) 100 .* sin.(1:N)] + samples = [ + sin.(1:N) cos.(1:N) + 100 .* cos.(1:N) 100 .* sin.(1:N) + ] chain_indices = repeat(1:2; inner=N) # Restuling R value should be close to two, i.e. the classifier should be able to learn an almost perfect decision boundary between chains. R = rstar(classif, samples, chain_indices) - @test Statistics.mean(R) ≈ 2 rtol=5e-2 + @test Statistics.mean(R) ≈ 2 rtol = 5e-2 end diff --git a/test/runtests.jl b/test/runtests.jl index bdfd5fbd..c1953466 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,13 +17,27 @@ using Test Random.seed!(1) @testset "InferenceDiagnostics.jl" begin - @testset "discrete diagnostic" begin include("discretediag.jl") end - @testset "ESS" begin include("ess.jl") end - @testset "Gelman, Rubin and Brooks diagnostic" begin include("gelmandiag.jl") end - @testset "Geweke diagnostic" begin include("gewekediag.jl") end - @testset "Heidelberger and Welch diagnostic" begin include("heideldiag.jl") end - @testset "Monte Carlo standard error" begin include("mcse.jl") end - @testset "Raftery and Lewis diagnostic" begin include("rafterydiag.jl") end + @testset "discrete diagnostic" begin + include("discretediag.jl") + end + @testset "ESS" begin + include("ess.jl") + end + @testset "Gelman, Rubin and Brooks diagnostic" begin + include("gelmandiag.jl") + end + @testset "Geweke diagnostic" begin + include("gewekediag.jl") + end + @testset "Heidelberger and Welch diagnostic" begin + include("heideldiag.jl") + end + @testset "Monte Carlo standard error" begin + include("mcse.jl") + end + @testset "Raftery and Lewis diagnostic" begin + include("rafterydiag.jl") + end @testset "R⋆ diagnostic" begin # MLJXGBoostInterface requires Julia >= 1.3 # XGBoost errors on 32bit systems: https://github.com/dmlc/XGBoost.jl/issues/92