diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 04dcd5680..5e865d93a 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1,5 +1,6 @@ style="blue" format_markdown = true +import_to_using = false # TODO # We ignore these files because when formatting was first put in place they were being worked on. # These ignores should be removed once the relevant PRs are merged/closed. diff --git a/src/variational/VariationalInference.jl b/src/variational/VariationalInference.jl index d601ae406..189d3f700 100644 --- a/src/variational/VariationalInference.jl +++ b/src/variational/VariationalInference.jl @@ -12,16 +12,9 @@ using Random: Random import AdvancedVI import Bijectors - # Reexports using AdvancedVI: vi, ADVI, ELBO, elbo, TruncatedADAGrad, DecayedADAGrad -export - vi, - ADVI, - ELBO, - elbo, - TruncatedADAGrad, - DecayedADAGrad +export vi, ADVI, ELBO, elbo, TruncatedADAGrad, DecayedADAGrad """ make_logjoint(model::Model; weight = 1.0) @@ -31,17 +24,10 @@ use `DynamicPPL.MiniBatch` context to run the `Model` with a weight `num_total_o ## Notes - For sake of efficiency, the returned function is closes over an instance of `VarInfo`. This means that you *might* run into some weird behaviour if you call this method sequentially using different types; if that's the case, just generate a new one for each type using `make_logjoint`. """ -function make_logjoint(model::DynamicPPL.Model; weight = 1.0) +function make_logjoint(model::DynamicPPL.Model; weight=1.0) # setup - ctx = DynamicPPL.MiniBatchContext( - DynamicPPL.DefaultContext(), - weight - ) - f = DynamicPPL.LogDensityFunction( - model, - DynamicPPL.VarInfo(model), - ctx - ) + ctx = DynamicPPL.MiniBatchContext(DynamicPPL.DefaultContext(), weight) + f = DynamicPPL.LogDensityFunction(model, DynamicPPL.VarInfo(model), ctx) return Base.Fix1(LogDensityProblems.logdensity, f) end @@ -52,10 +38,10 @@ function (elbo::ELBO)( q, model::DynamicPPL.Model, num_samples; - weight = 1.0, - kwargs... + weight=1.0, + kwargs..., ) - return elbo(rng, alg, q, make_logjoint(model; weight = weight), num_samples; kwargs...) + return elbo(rng, alg, q, make_logjoint(model; weight=weight), num_samples; kwargs...) end # VI algorithms diff --git a/src/variational/advi.jl b/src/variational/advi.jl index cf2d4034a..ec3e6552e 100644 --- a/src/variational/advi.jl +++ b/src/variational/advi.jl @@ -14,7 +14,6 @@ function wrap_in_vec_reshape(f, in_size) return reshape_outer ∘ f ∘ reshape_inner end - """ bijector(model::Model[, sym2ranges = Val(false)]) @@ -22,26 +21,26 @@ Returns a `Stacked <: Bijector` which maps from the support of the posterior to denoting the dimensionality of the latent variables. """ function Bijectors.bijector( - model::DynamicPPL.Model, - ::Val{sym2ranges} = Val(false); - varinfo = DynamicPPL.VarInfo(model) + model::DynamicPPL.Model, ::Val{sym2ranges}=Val(false); varinfo=DynamicPPL.VarInfo(model) ) where {sym2ranges} - num_params = sum([size(varinfo.metadata[sym].vals, 1) - for sym ∈ keys(varinfo.metadata)]) + num_params = sum([ + size(varinfo.metadata[sym].vals, 1) for sym in keys(varinfo.metadata) + ]) - dists = vcat([varinfo.metadata[sym].dists for sym ∈ keys(varinfo.metadata)]...) + dists = vcat([varinfo.metadata[sym].dists for sym in keys(varinfo.metadata)]...) - num_ranges = sum([length(varinfo.metadata[sym].ranges) - for sym ∈ keys(varinfo.metadata)]) + num_ranges = sum([ + length(varinfo.metadata[sym].ranges) for sym in keys(varinfo.metadata) + ]) ranges = Vector{UnitRange{Int}}(undef, num_ranges) idx = 0 range_idx = 1 # ranges might be discontinuous => values are vectors of ranges rather than just ranges - sym_lookup = Dict{Symbol, Vector{UnitRange{Int}}}() - for sym ∈ keys(varinfo.metadata) + sym_lookup = Dict{Symbol,Vector{UnitRange{Int}}}() + for sym in keys(varinfo.metadata) sym_lookup[sym] = Vector{UnitRange{Int}}() - for r ∈ varinfo.metadata[sym].ranges + for r in varinfo.metadata[sym].ranges ranges[range_idx] = idx .+ r push!(sym_lookup[sym], ranges[range_idx]) range_idx += 1 @@ -117,27 +116,24 @@ function AdvancedVI.update( end function AdvancedVI.vi( - model::DynamicPPL.Model, - alg::AdvancedVI.ADVI; - optimizer = AdvancedVI.TruncatedADAGrad(), + model::DynamicPPL.Model, alg::AdvancedVI.ADVI; optimizer=AdvancedVI.TruncatedADAGrad() ) q = meanfield(model) - return AdvancedVI.vi(model, alg, q; optimizer = optimizer) + return AdvancedVI.vi(model, alg, q; optimizer=optimizer) end - function AdvancedVI.vi( model::DynamicPPL.Model, alg::AdvancedVI.ADVI, q::Bijectors.TransformedDistribution{<:DistributionsAD.TuringDiagMvNormal}; - optimizer = AdvancedVI.TruncatedADAGrad(), + optimizer=AdvancedVI.TruncatedADAGrad(), ) # Initial parameters for mean-field approx μ, σs = StatsBase.params(q) θ = vcat(μ, StatsFuns.invsoftplus.(σs)) # Optimize - AdvancedVI.optimize!(elbo, alg, q, make_logjoint(model), θ; optimizer = optimizer) + AdvancedVI.optimize!(elbo, alg, q, make_logjoint(model), θ; optimizer=optimizer) # Return updated `Distribution` return AdvancedVI.update(q, θ) diff --git a/test/variational/advi.jl b/test/variational/advi.jl index 8f12562a5..639df018c 100644 --- a/test/variational/advi.jl +++ b/test/variational/advi.jl @@ -27,12 +27,12 @@ using Turing.Essential: TuringDiagMvNormal N = 500 alg = ADVI(10, 5000) - q = vi(gdemo_default, alg; optimizer = opt) + q = vi(gdemo_default, alg; optimizer=opt) samples = transpose(rand(q, N)) chn = Chains(reshape(samples, size(samples)..., 1), ["s", "m"]) # TODO: uhmm, seems like a large `eps` here... - check_gdemo(chn, atol = 0.5) + check_gdemo(chn; atol=0.5) end end @@ -52,7 +52,7 @@ using Turing.Essential: TuringDiagMvNormal # OR: implement `update` and pass a `Distribution` function AdvancedVI.update(d::TuringDiagMvNormal, θ::AbstractArray{<:Real}) - return TuringDiagMvNormal(θ[1:length(q)], exp.(θ[length(q) + 1:end])) + return TuringDiagMvNormal(θ[1:length(q)], exp.(θ[(length(q) + 1):end])) end q0 = TuringDiagMvNormal(zeros(2), ones(2)) @@ -66,7 +66,7 @@ using Turing.Essential: TuringDiagMvNormal # https://github.com/TuringLang/Turing.jl/issues/2065 @testset "simplex bijector" begin @model function dirichlet() - x ~ Dirichlet([1.0,1.0]) + x ~ Dirichlet([1.0, 1.0]) return x end @@ -82,17 +82,17 @@ using Turing.Essential: TuringDiagMvNormal # And regression for https://github.com/TuringLang/Turing.jl/issues/2160. q = vi(m, ADVI(10, 1000)) x = rand(q, 1000) - @test mean(eachcol(x)) ≈ [0.5, 0.5] atol=0.1 + @test mean(eachcol(x)) ≈ [0.5, 0.5] atol = 0.1 end # Ref: https://github.com/TuringLang/Turing.jl/issues/2205 @testset "with `condition` (issue #2205)" begin @model function demo_issue2205() x ~ Normal() - y ~ Normal(x, 1) + return y ~ Normal(x, 1) end - model = demo_issue2205() | (y = 1.0,) + model = demo_issue2205() | (y=1.0,) q = vi(model, ADVI(10, 1000)) # True mean. mean_true = 1 / 2 @@ -101,8 +101,8 @@ using Turing.Essential: TuringDiagMvNormal samples = rand(q, 1000) mean_est = mean(samples) var_est = var(samples) - @test mean_est ≈ mean_true atol=0.2 - @test var_est ≈ var_true atol=0.2 + @test mean_est ≈ mean_true atol = 0.2 + @test var_est ≈ var_true atol = 0.2 end end diff --git a/test/variational/optimisers.jl b/test/variational/optimisers.jl index 8063cdf2e..6f64d5fb1 100644 --- a/test/variational/optimisers.jl +++ b/test/variational/optimisers.jl @@ -9,8 +9,8 @@ using Turing function test_opt(ADPack, opt) θ = randn(10, 10) θ_fit = randn(10, 10) - loss(x, θ_) = mean(sum(abs2, θ*x - θ_*x; dims = 1)) - for t = 1:10^4 + loss(x, θ_) = mean(sum(abs2, θ * x - θ_ * x; dims=1)) + for t in 1:(10^4) x = rand(10) Δ = ADPack.gradient(θ_ -> loss(x, θ_), θ_fit) Δ = apply!(opt, θ_fit, Δ)