From aa13e0c7955d97358e810f25bd3b55dadeb119c0 Mon Sep 17 00:00:00 2001 From: Markus Hauru Date: Sat, 1 Jun 2024 11:37:52 +0200 Subject: [PATCH] Optimization improvements (#2221) * initial work on interface * Improving the Optimization.jl interface, work in progress * More work on Optimization.jl, still in progress * Add docstrings to Optimisation.jl * Fix OptimizationOptimJL version constraint * Clean up optimisation TODO notes * Relax OptimizationOptimJL version constraints * Simplify optimization imports * Remove commented out code * Small improvements all over in optimisation * Clean up of Optimisation tests * Add a test for OptimizationBBO * Add tests using OptimizationNLopt * Rename/move the optimisation test files The files for Optimisaton.jl and OptimInterface.jl were in the wrong folders: One in `test/optimisation` the other in `test/ext`, but the wrong way around. * Relax compat bounds on OptimizationBBO and OptimizationNLopt * Split a testset to test/optimisation/OptimisationCore.jl * Import AbstractADType from ADTypes, not SciMLBase * Fix Optimization.jl depwarning * Fix seeds in more tests * Merge OptimizationCore into Optimization * In optimisation, rename init_value to initial_params * Optimisation docstring improvements * Code style adjustments in optimisation * Qualify references in optimisation * Simplify creation of ModeResults * Qualified references in optimization tests * Enforce line length in optimization * Simplify optimisation exports * Enforce line legth in Optim.jl interface * Refactor away ModeEstimationProblem * Style and docstring improvements for optimisation * Add := test to optimisation tests. * Clarify comment * Simplify generate_initial_params * Fix doc references * Rename testsets * Refactor check_success * Make initial_params a kwarg * Remove unnecessary type constrain on kwarg * Fix broken reference in tests * Fix bug in generate_initial_params * Fix qualified references in optimisation tests * Add hasstats checks to optimisation tests * Extend OptimizationOptimJL compat to 0.3 Co-authored-by: Hong Ge <3279477+yebai@users.noreply.github.com> * Change some `import`s to `using` Co-authored-by: Tor Erlend Fjelde * Change to kwargs... in docstrings * Add a two-argument method to OptimLogDensity as callable --------- Co-authored-by: Tor Erlend Fjelde Co-authored-by: Hong Ge <3279477+yebai@users.noreply.github.com> --- Project.toml | 4 + ext/TuringOptimExt.jl | 193 +++---- src/Turing.jl | 10 +- src/optimisation/Optimisation.jl | 535 +++++++++++------- test/Project.toml | 8 +- test/{optimisation => ext}/OptimInterface.jl | 134 ++--- test/ext/Optimisation.jl | 123 ---- test/optimisation/Optimisation.jl | 554 +++++++++++++++++++ test/runtests.jl | 12 +- 9 files changed, 1024 insertions(+), 549 deletions(-) rename test/{optimisation => ext}/OptimInterface.jl (59%) delete mode 100644 test/ext/Optimisation.jl create mode 100644 test/optimisation/Optimisation.jl diff --git a/Project.toml b/Project.toml index 0b1cf945d..e4b1c0b62 100644 --- a/Project.toml +++ b/Project.toml @@ -25,6 +25,8 @@ LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c" LogDensityProblemsAD = "996a588d-648d-4e1f-a8f0-a84b347e47b1" MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" +Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" +OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -68,6 +70,8 @@ LogDensityProblems = "2" LogDensityProblemsAD = "1.7.0" MCMCChains = "5, 6" NamedArrays = "0.9, 0.10" +Optimization = "3" +OptimizationOptimJL = "0.1, 0.2, 0.3" OrderedCollections = "1" Optim = "1" Reexport = "0.2, 1" diff --git a/ext/TuringOptimExt.jl b/ext/TuringOptimExt.jl index c30078024..d95bdde33 100644 --- a/ext/TuringOptimExt.jl +++ b/ext/TuringOptimExt.jl @@ -2,105 +2,22 @@ module TuringOptimExt if isdefined(Base, :get_extension) import Turing - import Turing: Distributions, DynamicPPL, ForwardDiff, NamedArrays, Printf, Accessors, Statistics, StatsAPI, StatsBase + import Turing: + DynamicPPL, + NamedArrays, + Accessors, + Optimisation import Optim else import ..Turing - import ..Turing: Distributions, DynamicPPL, ForwardDiff, NamedArrays, Printf, Accessors, Statistics, StatsAPI, StatsBase + import ..Turing: + DynamicPPL, + NamedArrays, + Accessors, + Optimisation import ..Optim end -""" - ModeResult{ - V<:NamedArrays.NamedArray, - M<:NamedArrays.NamedArray, - O<:Optim.MultivariateOptimizationResults, - S<:NamedArrays.NamedArray - } - -A wrapper struct to store various results from a MAP or MLE estimation. -""" -struct ModeResult{ - V<:NamedArrays.NamedArray, - O<:Optim.MultivariateOptimizationResults, - M<:Turing.OptimLogDensity -} <: StatsBase.StatisticalModel - "A vector with the resulting point estimates." - values::V - "The stored Optim.jl results." - optim_result::O - "The final log likelihood or log joint, depending on whether `MAP` or `MLE` was run." - lp::Float64 - "The evaluation function used to calculate the output." - f::M -end -############################# -# Various StatsBase methods # -############################# - - - -function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) - print(io, "ModeResult with maximized lp of ") - Printf.@printf(io, "%.2f", m.lp) - println(io) - show(io, m.values) -end - -function Base.show(io::IO, m::ModeResult) - show(io, m.values.array) -end - -function StatsBase.coeftable(m::ModeResult; level::Real=0.95) - # Get columns for coeftable. - terms = string.(StatsBase.coefnames(m)) - estimates = m.values.array[:, 1] - stderrors = StatsBase.stderror(m) - zscore = estimates ./ stderrors - p = map(z -> StatsAPI.pvalue(Distributions.Normal(), z; tail=:both), zscore) - - # Confidence interval (CI) - q = Statistics.quantile(Distributions.Normal(), (1 + level) / 2) - ci_low = estimates .- q .* stderrors - ci_high = estimates .+ q .* stderrors - - level_ = 100*level - level_percentage = isinteger(level_) ? Int(level_) : level_ - - StatsBase.CoefTable( - [estimates, stderrors, zscore, p, ci_low, ci_high], - ["Coef.", "Std. Error", "z", "Pr(>|z|)", "Lower $(level_percentage)%", "Upper $(level_percentage)%"], - terms) -end - -function StatsBase.informationmatrix(m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs...) - # Calculate Hessian and information matrix. - - # Convert the values to their unconstrained states to make sure the - # Hessian is computed with respect to the untransformed parameters. - linked = DynamicPPL.istrans(m.f.varinfo) - if linked - m = Accessors.@set m.f.varinfo = DynamicPPL.invlink!!(m.f.varinfo, m.f.model) - end - - # Calculate the Hessian, which is the information matrix because the negative of the log likelihood was optimized - varnames = StatsBase.coefnames(m) - info = hessian_function(m.f, m.values.array[:, 1]) - - # Link it back if we invlinked it. - if linked - m = Accessors.@set m.f.varinfo = DynamicPPL.link!!(m.f.varinfo, m.f.model) - end - - return NamedArrays.NamedArray(info, (varnames, varnames)) -end - -StatsBase.coef(m::ModeResult) = m.values -StatsBase.coefnames(m::ModeResult) = names(m.values)[1] -StatsBase.params(m::ModeResult) = StatsBase.coefnames(m) -StatsBase.vcov(m::ModeResult) = inv(StatsBase.informationmatrix(m)) -StatsBase.loglikelihood(m::ModeResult) = m.lp - #################### # Optim.jl methods # #################### @@ -125,26 +42,41 @@ mle = optimize(model, MLE()) mle = optimize(model, MLE(), NelderMead()) ``` """ -function Optim.optimize(model::DynamicPPL.Model, ::Turing.MLE, options::Optim.Options=Optim.Options(); kwargs...) - ctx = Turing.OptimizationContext(DynamicPPL.LikelihoodContext()) - f = Turing.OptimLogDensity(model, ctx) +function Optim.optimize( + model::DynamicPPL.Model, ::Optimisation.MLE, options::Optim.Options=Optim.Options(); + kwargs... +) + ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) + f = Optimisation.OptimLogDensity(model, ctx) init_vals = DynamicPPL.getparams(f) optimizer = Optim.LBFGS() return _mle_optimize(model, init_vals, optimizer, options; kwargs...) end -function Optim.optimize(model::DynamicPPL.Model, ::Turing.MLE, init_vals::AbstractArray, options::Optim.Options=Optim.Options(); kwargs...) +function Optim.optimize( + model::DynamicPPL.Model, + ::Optimisation.MLE, + init_vals::AbstractArray, + options::Optim.Options=Optim.Options(); + kwargs... +) optimizer = Optim.LBFGS() return _mle_optimize(model, init_vals, optimizer, options; kwargs...) end -function Optim.optimize(model::DynamicPPL.Model, ::Turing.MLE, optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options(); kwargs...) - ctx = Turing.OptimizationContext(DynamicPPL.LikelihoodContext()) - f = Turing.OptimLogDensity(model, ctx) +function Optim.optimize( + model::DynamicPPL.Model, + ::Optimisation.MLE, + optimizer::Optim.AbstractOptimizer, + options::Optim.Options=Optim.Options(); + kwargs... +) + ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) + f = Optimisation.OptimLogDensity(model, ctx) init_vals = DynamicPPL.getparams(f) return _mle_optimize(model, init_vals, optimizer, options; kwargs...) end function Optim.optimize( model::DynamicPPL.Model, - ::Turing.MLE, + ::Optimisation.MLE, init_vals::AbstractArray, optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options(); @@ -154,8 +86,8 @@ function Optim.optimize( end function _mle_optimize(model::DynamicPPL.Model, args...; kwargs...) - ctx = Turing.OptimizationContext(DynamicPPL.LikelihoodContext()) - return _optimize(model, Turing.OptimLogDensity(model, ctx), args...; kwargs...) + ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) + return _optimize(model, Optimisation.OptimLogDensity(model, ctx), args...; kwargs...) end """ @@ -178,26 +110,41 @@ map_est = optimize(model, MAP()) map_est = optimize(model, MAP(), NelderMead()) ``` """ -function Optim.optimize(model::DynamicPPL.Model, ::Turing.MAP, options::Optim.Options=Optim.Options(); kwargs...) - ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext()) - f = Turing.OptimLogDensity(model, ctx) +function Optim.optimize( + model::DynamicPPL.Model, ::Optimisation.MAP, options::Optim.Options=Optim.Options(); + kwargs... +) + ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) + f = Optimisation.OptimLogDensity(model, ctx) init_vals = DynamicPPL.getparams(f) optimizer = Optim.LBFGS() return _map_optimize(model, init_vals, optimizer, options; kwargs...) end -function Optim.optimize(model::DynamicPPL.Model, ::Turing.MAP, init_vals::AbstractArray, options::Optim.Options=Optim.Options(); kwargs...) +function Optim.optimize( + model::DynamicPPL.Model, + ::Optimisation.MAP, + init_vals::AbstractArray, + options::Optim.Options=Optim.Options(); + kwargs... +) optimizer = Optim.LBFGS() return _map_optimize(model, init_vals, optimizer, options; kwargs...) end -function Optim.optimize(model::DynamicPPL.Model, ::Turing.MAP, optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options(); kwargs...) - ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext()) - f = Turing.OptimLogDensity(model, ctx) +function Optim.optimize( + model::DynamicPPL.Model, + ::Optimisation.MAP, + optimizer::Optim.AbstractOptimizer, + options::Optim.Options=Optim.Options(); + kwargs... +) + ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) + f = Optimisation.OptimLogDensity(model, ctx) init_vals = DynamicPPL.getparams(f) return _map_optimize(model, init_vals, optimizer, options; kwargs...) end function Optim.optimize( model::DynamicPPL.Model, - ::Turing.MAP, + ::Optimisation.MAP, init_vals::AbstractArray, optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options(); @@ -207,8 +154,8 @@ function Optim.optimize( end function _map_optimize(model::DynamicPPL.Model, args...; kwargs...) - ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext()) - return _optimize(model, Turing.OptimLogDensity(model, ctx), args...; kwargs...) + ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) + return _optimize(model, Optimisation.OptimLogDensity(model, ctx), args...; kwargs...) end """ @@ -218,7 +165,7 @@ Estimate a mode, i.e., compute a MLE or MAP estimate. """ function _optimize( model::DynamicPPL.Model, - f::Turing.OptimLogDensity, + f::Optimisation.OptimLogDensity, init_vals::AbstractArray=DynamicPPL.getparams(f), optimizer::Optim.AbstractOptimizer=Optim.LBFGS(), options::Optim.Options=Optim.Options(), @@ -236,25 +183,19 @@ function _optimize( # Warn the user if the optimization did not converge. if !Optim.converged(M) - @warn "Optimization did not converge! You may need to correct your model or adjust the Optim parameters." + @warn """ + Optimization did not converge! You may need to correct your model or adjust the + Optim parameters. + """ end - # Get the VarInfo at the MLE/MAP point, and run the model to ensure - # correct dimensionality. + # Get the optimum in unconstrained space. `getparams` does the invlinking. f = Accessors.@set f.varinfo = DynamicPPL.unflatten(f.varinfo, M.minimizer) - f = Accessors.@set f.varinfo = DynamicPPL.invlink(f.varinfo, model) - vals = DynamicPPL.getparams(f) - f = Accessors.@set f.varinfo = DynamicPPL.link(f.varinfo, model) - - # Make one transition to get the parameter names. vns_vals_iter = Turing.Inference.getparams(model, f.varinfo) varnames = map(Symbol ∘ first, vns_vals_iter) vals = map(last, vns_vals_iter) - - # Store the parameters and their names in an array. vmat = NamedArrays.NamedArray(vals, varnames) - - return ModeResult(vmat, M, -M.minimum, f) + return Optimisation.ModeResult(vmat, M, -M.minimum, f) end end # module diff --git a/src/Turing.jl b/src/Turing.jl index 99e9880d2..5562830af 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -138,13 +138,11 @@ export @model, # modelling ordered, # Exports from Bijectors - constrained_space, # optimisation interface + maximum_a_posteriori, + maximum_likelihood, + # The MAP and MLE exports are only needed for the Optim.jl interface. MAP, - MLE, - get_parameter_bounds, - optim_objective, - optim_function, - optim_problem + MLE if !isdefined(Base, :get_extension) using Requires diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index 639b576c6..faa8a38f3 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -1,34 +1,35 @@ module Optimisation using ..Turing -using Bijectors -using Random -using SciMLBase: OptimizationFunction, OptimizationProblem, AbstractADType, NoAD - +using NamedArrays: NamedArrays +using DynamicPPL: DynamicPPL +using LogDensityProblems: LogDensityProblems +using LogDensityProblemsAD: LogDensityProblemsAD +using Optimization: Optimization +using OptimizationOptimJL: OptimizationOptimJL +using Random: Random +using SciMLBase: SciMLBase +using ADTypes: ADTypes +using StatsBase: StatsBase using Accessors: Accessors -using DynamicPPL -using DynamicPPL: Model, AbstractContext, VarInfo, VarName, - _getindex, getsym, getfield, setorder!, - get_and_set_val!, istrans - -import LogDensityProblems -import LogDensityProblemsAD - -export constrained_space, - MAP, - MLE, - OptimLogDensity, - OptimizationContext, - get_parameter_bounds, - optim_objective, - optim_function, - optim_problem +using Printf: Printf +using ForwardDiff: ForwardDiff +using StatsAPI: StatsAPI +using Statistics: Statistics -struct constrained_space{x} end +export maximum_a_posteriori, maximum_likelihood +# The MAP and MLE exports are only needed for the Optim.jl interface. +export MAP, MLE -struct MLE end -struct MAP end +""" + ModeEstimator +An abstract type to mark whether mode estimation is to be done with maximum a posteriori +(MAP) or maximum likelihood estimation (MLE). +""" +abstract type ModeEstimator end +struct MLE <: ModeEstimator end +struct MAP <: ModeEstimator end """ OptimizationContext{C<:AbstractContext} <: AbstractContext @@ -37,25 +38,29 @@ The `OptimizationContext` transforms variables to their constrained space, but does not use the density with respect to the transformation. This context is intended to allow an optimizer to sample in R^n freely. """ -struct OptimizationContext{C<:AbstractContext} <: AbstractContext +struct OptimizationContext{C<:DynamicPPL.AbstractContext} <: DynamicPPL.AbstractContext context::C - function OptimizationContext{C}(context::C) where {C<:AbstractContext} - if !(context isa Union{DefaultContext,LikelihoodContext}) - throw(ArgumentError("`OptimizationContext` supports only leaf contexts of type `DynamicPPL.DefaultContext` and `DynamicPPL.LikelihoodContext` (given: `$(typeof(context)))`")) + function OptimizationContext{C}(context::C) where {C<:DynamicPPL.AbstractContext} + if !(context isa Union{DynamicPPL.DefaultContext,DynamicPPL.LikelihoodContext}) + msg = """ + `OptimizationContext` supports only leaf contexts of type + `DynamicPPL.DefaultContext` and `DynamicPPL.LikelihoodContext` + (given: `$(typeof(context)))` + """ + throw(ArgumentError(msg)) end return new{C}(context) end end -OptimizationContext(context::AbstractContext) = OptimizationContext{typeof(context)}(context) +OptimizationContext(ctx::DynamicPPL.AbstractContext) = OptimizationContext{typeof(ctx)}(ctx) DynamicPPL.NodeTrait(::OptimizationContext) = DynamicPPL.IsLeaf() -# assume function DynamicPPL.tilde_assume(ctx::OptimizationContext, dist, vn, vi) r = vi[vn, dist] - lp = if ctx.context isa DefaultContext + lp = if ctx.context isa DynamicPPL.DefaultContext # MAP Distributions.logpdf(dist, r) else @@ -65,15 +70,20 @@ function DynamicPPL.tilde_assume(ctx::OptimizationContext, dist, vn, vi) return r, lp, vi end -# dot assume -_loglikelihood(dist::Distribution, x) = loglikelihood(dist, x) -_loglikelihood(dists::AbstractArray{<:Distribution}, x) = loglikelihood(arraydist(dists), x) +_loglikelihood(dist::Distribution, x) = StatsAPI.loglikelihood(dist, x) + +function _loglikelihood(dists::AbstractArray{<:Distribution}, x) + return StatsAPI.loglikelihood(arraydist(dists), x) +end + function DynamicPPL.dot_tilde_assume(ctx::OptimizationContext, right, left, vns, vi) - # Values should be set and we're using `SampleFromPrior`, hence the `rng` argument shouldn't - # affect anything. + # Values should be set and we're using `SampleFromPrior`, hence the `rng` argument + # shouldn't affect anything. # TODO: Stop using `get_and_set_val!`. - r = DynamicPPL.get_and_set_val!(Random.default_rng(), vi, vns, right, SampleFromPrior()) - lp = if ctx.context isa DefaultContext + r = DynamicPPL.get_and_set_val!( + Random.default_rng(), vi, vns, right, DynamicPPL.SampleFromPrior() + ) + lp = if ctx.context isa DynamicPPL.DefaultContext # MAP _loglikelihood(right, r) else @@ -84,40 +94,51 @@ function DynamicPPL.dot_tilde_assume(ctx::OptimizationContext, right, left, vns, end """ - OptimLogDensity{M<:Model,C<:Context,V<:VarInfo} + OptimLogDensity{M<:DynamicPPL.Model,C<:Context,V<:DynamicPPL.VarInfo} A struct that stores the negative log density function of a `DynamicPPL` model. """ -const OptimLogDensity{M<:Model,C<:OptimizationContext,V<:VarInfo} = Turing.LogDensityFunction{V,M,C} +const OptimLogDensity{M<:DynamicPPL.Model,C<:OptimizationContext,V<:DynamicPPL.VarInfo} = + Turing.LogDensityFunction{V,M,C} """ - OptimLogDensity(model::Model, context::OptimizationContext) + OptimLogDensity(model::DynamicPPL.Model, context::OptimizationContext) Create a callable `OptimLogDensity` struct that evaluates a model using the given `context`. """ -function OptimLogDensity(model::Model, context::OptimizationContext) - init = VarInfo(model) +function OptimLogDensity(model::DynamicPPL.Model, context::OptimizationContext) + init = DynamicPPL.VarInfo(model) return Turing.LogDensityFunction(init, model, context) end """ - LogDensityProblems.logdensity(f::OptimLogDensity, z) + (f::OptimLogDensity)(z) + (f::OptimLogDensity)(z, _) + +Evaluate the negative log joint or log likelihood at the array `z`. Which one is evaluated +depends on the context of `f`. -Evaluate the negative log joint (with `DefaultContext`) or log likelihood (with `LikelihoodContext`) -at the array `z`. +Any second argument is ignored. The two-argument method only exists to match interface the +required by Optimization.jl. """ function (f::OptimLogDensity)(z::AbstractVector) varinfo = DynamicPPL.unflatten(f.varinfo, z) - return -getlogp(last(DynamicPPL.evaluate!!(f.model, varinfo, f.context))) + return -DynamicPPL.getlogp(last(DynamicPPL.evaluate!!(f.model, varinfo, f.context))) end +(f::OptimLogDensity)(z, _) = f(z) + # NOTE: This seems a bit weird IMO since this is the _negative_ log-likelihood. LogDensityProblems.logdensity(f::OptimLogDensity, z::AbstractVector) = f(z) +# NOTE: The format of this function is dictated by Optim. The first argument sets whether to +# compute the function value, the second whether to compute the gradient (and stores the +# gradient). The last one is the actual argument of the objective function. function (f::OptimLogDensity)(F, G, z) if G !== nothing # Calculate negative log joint and its gradient. - # TODO: Make OptimLogDensity already an LogDensityProblems.ADgradient? Allow to specify AD? + # TODO: Make OptimLogDensity already an LogDensityProblems.ADgradient? Allow to + # specify AD? ℓ = LogDensityProblemsAD.ADgradient(f) neglogp, ∇neglogp = LogDensityProblems.logdensity_and_gradient(ℓ, z) @@ -139,186 +160,328 @@ function (f::OptimLogDensity)(F, G, z) return nothing end - - -################################################# -# Generic optimisation objective initialisation # -################################################# - -function transform!!(f::OptimLogDensity) - ## Check link status of vi in OptimLogDensity - linked = DynamicPPL.istrans(f.varinfo) - - ## transform into constrained or unconstrained space depending on current state of vi - f = Accessors.@set f.varinfo = if !linked - DynamicPPL.link!!(f.varinfo, f.model) - else - DynamicPPL.invlink!!(f.varinfo, f.model) - end - - return f +""" + ModeResult{ + V<:NamedArrays.NamedArray, + M<:NamedArrays.NamedArray, + O<:Optim.MultivariateOptimizationResults, + S<:NamedArrays.NamedArray + } + +A wrapper struct to store various results from a MAP or MLE estimation. +""" +struct ModeResult{ + V<:NamedArrays.NamedArray, + O<:Any, + M<:OptimLogDensity +} <: StatsBase.StatisticalModel + "A vector with the resulting point estimates." + values::V + "The stored optimiser results." + optim_result::O + "The final log likelihood or log joint, depending on whether `MAP` or `MLE` was run." + lp::Float64 + "The evaluation function used to calculate the output." + f::M end -function transform!!(p::AbstractArray, vi::DynamicPPL.VarInfo, model::DynamicPPL.Model, ::constrained_space{true}) - linked = DynamicPPL.istrans(vi) - - !linked && return identity(p) # TODO: why do we do `identity` here? - vi = DynamicPPL.unflatten(vi, p) - vi = DynamicPPL.invlink!!(vi, model) - p .= vi[:] +function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) + print(io, "ModeResult with maximized lp of ") + Printf.@printf(io, "%.2f", m.lp) + println(io) + show(io, m.values) +end - # If linking mutated, we need to link once more. - linked && DynamicPPL.link!!(vi, model) +function Base.show(io::IO, m::ModeResult) + show(io, m.values.array) +end - return p +# Various StatsBase methods for ModeResult + +function StatsBase.coeftable(m::ModeResult; level::Real=0.95) + # Get columns for coeftable. + terms = string.(StatsBase.coefnames(m)) + estimates = m.values.array[:, 1] + stderrors = StatsBase.stderror(m) + zscore = estimates ./ stderrors + p = map(z -> StatsAPI.pvalue(Distributions.Normal(), z; tail=:both), zscore) + + # Confidence interval (CI) + q = Statistics.quantile(Distributions.Normal(), (1 + level) / 2) + ci_low = estimates .- q .* stderrors + ci_high = estimates .+ q .* stderrors + + level_ = 100 * level + level_percentage = isinteger(level_) ? Int(level_) : level_ + + cols = [estimates, stderrors, zscore, p, ci_low, ci_high] + colnms = [ + "Coef.", + "Std. Error", + "z", + "Pr(>|z|)", + "Lower $(level_percentage)%", + "Upper $(level_percentage)%", + ] + return StatsBase.CoefTable(cols, colnms, terms) end -function transform!!(p::AbstractArray, vi::DynamicPPL.VarInfo, model::DynamicPPL.Model, ::constrained_space{false}) - linked = DynamicPPL.istrans(vi) +function StatsBase.informationmatrix( + m::ModeResult; hessian_function=ForwardDiff.hessian, kwargs... +) + # Calculate Hessian and information matrix. + + # Convert the values to their unconstrained states to make sure the + # Hessian is computed with respect to the untransformed parameters. + linked = DynamicPPL.istrans(m.f.varinfo) if linked - vi = DynamicPPL.invlink!!(vi, model) + m = Accessors.@set m.f.varinfo = DynamicPPL.invlink!!(m.f.varinfo, m.f.model) end - vi = DynamicPPL.unflatten(vi, p) - vi = DynamicPPL.link!!(vi, model) - p .= vi[:] - # If linking mutated, we need to link once more. - !linked && DynamicPPL.invlink!!(vi, model) + # Calculate the Hessian, which is the information matrix because the negative of the log + # likelihood was optimized + varnames = StatsBase.coefnames(m) + info = hessian_function(m.f, m.values.array[:, 1]) - return p -end + # Link it back if we invlinked it. + if linked + m = Accessors.@set m.f.varinfo = DynamicPPL.link!!(m.f.varinfo, m.f.model) + end -function transform(p::AbstractArray, vi::DynamicPPL.VarInfo, model::DynamicPPL.Model, con::constrained_space) - return transform!!(copy(p), vi, model, con) + return NamedArrays.NamedArray(info, (varnames, varnames)) end -abstract type AbstractTransform end +StatsBase.coef(m::ModeResult) = m.values +StatsBase.coefnames(m::ModeResult) = names(m.values)[1] +StatsBase.params(m::ModeResult) = StatsBase.coefnames(m) +StatsBase.vcov(m::ModeResult) = inv(StatsBase.informationmatrix(m)) +StatsBase.loglikelihood(m::ModeResult) = m.lp -struct ParameterTransform{T<:DynamicPPL.VarInfo,M<:DynamicPPL.Model, S<:constrained_space} <: AbstractTransform - vi::T - model::M - space::S -end +""" + ModeResult(log_density::OptimLogDensity, solution::SciMLBase.OptimizationSolution) -struct Init{T<:DynamicPPL.VarInfo,M<:DynamicPPL.Model, S<:constrained_space} <: AbstractTransform - vi::T - model::M - space::S -end +Create a `ModeResult` for a given `log_density` objective and a `solution` given by `solve`. -function (t::AbstractTransform)(p::AbstractArray) - return transform(p, t.vi, t.model, t.space) +`Optimization.solve` returns its own result type. This function converts that into the +richer format of `ModeResult`. It also takes care of transforming them back to the original +parameter space in case the optimization was done in a transformed space. +""" +function ModeResult(log_density::OptimLogDensity, solution::SciMLBase.OptimizationSolution) + varinfo_new = DynamicPPL.unflatten(log_density.varinfo, solution.u) + # `getparams` performs invlinking if needed + vns_vals_iter = Turing.Inference.getparams(log_density.model, varinfo_new) + syms = map(Symbol ∘ first, vns_vals_iter) + vals = map(last, vns_vals_iter) + return ModeResult( + NamedArrays.NamedArray(vals, syms), solution, -solution.objective, log_density + ) end -function (t::Init)() - return t.vi[DynamicPPL.SampleFromPrior()] -end - -function get_parameter_bounds(model::DynamicPPL.Model) - vi = DynamicPPL.VarInfo(model) +""" + ModeEstimationConstraints - ## Check link status of vi - linked = DynamicPPL.istrans(vi) - - ## transform into unconstrained - if !linked - vi = DynamicPPL.link!!(vi, model) - end +A struct that holds constraints for mode estimation problems. - d = length(vi[:]) - lb = transform(fill(-Inf, d), vi, model, constrained_space{true}()) - ub = transform(fill(Inf, d), vi, model, constrained_space{true}()) +The fields are the same as possible constraints supported by the Optimization.jl: +`ub` and `lb` specify lower and upper bounds of box constraints. `cons` is a function that +takes the parameters of the model and returns a list of derived quantities, which are then +constrained by the lower and upper bounds set by `lcons` and `ucons`. We refer to these +as generic constraints. Please see the documentation of +[Optimization.jl](https://docs.sciml.ai/Optimization/stable/) for more details. - return lb, ub +Any of the fields can be `nothing`, disabling the corresponding constraints. +""" +struct ModeEstimationConstraints{ + Lb<:Union{Nothing,AbstractVector}, + Ub<:Union{Nothing,AbstractVector}, + Cons, + LCons<:Union{Nothing,AbstractVector}, + UCons<:Union{Nothing,AbstractVector}, +} + lb::Lb + ub::Ub + cons::Cons + lcons::LCons + ucons::UCons end -function _optim_objective(model::DynamicPPL.Model, ::MAP, ::constrained_space{false}) - ctx = OptimizationContext(DynamicPPL.DefaultContext()) - obj = OptimLogDensity(model, ctx) +has_box_constraints(c::ModeEstimationConstraints) = c.ub !== nothing || c.lb !== nothing +has_generic_constraints(c::ModeEstimationConstraints) = ( + c.cons !== nothing || c.lcons !== nothing || c.ucons !== nothing +) +has_constraints(c) = has_box_constraints(c) || has_generic_constraints(c) - obj = transform!!(obj) - init = Init(obj.varinfo, model, constrained_space{false}()) - t = ParameterTransform(obj.varinfo, model, constrained_space{true}()) +""" + generate_initial_params(model::DynamicPPL.Model, initial_params, constraints) - return (obj=obj, init = init, transform=t) -end +Generate an initial value for the optimization problem. -function _optim_objective(model::DynamicPPL.Model, ::MAP, ::constrained_space{true}) - ctx = OptimizationContext(DynamicPPL.DefaultContext()) - obj = OptimLogDensity(model, ctx) - - init = Init(obj.varinfo, model, constrained_space{true}()) - t = ParameterTransform(obj.varinfo, model, constrained_space{true}()) - - return (obj=obj, init = init, transform=t) -end +If `initial_params` is not `nothing`, a copy of it is returned. Otherwise initial parameter +values are generated either by sampling from the prior (if no constraints are present) or +uniformly from the box constraints. If generic constraints are set, an error is thrown. +""" +function generate_initial_params(model::DynamicPPL.Model, initial_params, constraints) + if initial_params === nothing && has_generic_constraints(constraints) + throw(ArgumentError( + "You must provide an initial value when using generic constraints." + )) + end -function _optim_objective(model::DynamicPPL.Model, ::MLE, ::constrained_space{false}) - ctx = OptimizationContext(DynamicPPL.LikelihoodContext()) - obj = OptimLogDensity(model, ctx) - - obj = transform!!(obj) - init = Init(obj.varinfo, model, constrained_space{false}()) - t = ParameterTransform(obj.varinfo, model, constrained_space{true}()) - - return (obj=obj, init = init, transform=t) + return if initial_params !== nothing + copy(initial_params) + elseif has_box_constraints(constraints) + [ + rand(Distributions.Uniform(lower, upper)) + for (lower, upper) in zip(constraints.lb, constraints.ub) + ] + else + rand(Vector, model) + end end -function _optim_objective(model::DynamicPPL.Model, ::MLE, ::constrained_space{true}) - ctx = OptimizationContext(DynamicPPL.LikelihoodContext()) - obj = OptimLogDensity(model, ctx) - - init = Init(obj.varinfo, model, constrained_space{true}()) - t = ParameterTransform(obj.varinfo, model, constrained_space{true}()) - - return (obj=obj, init = init, transform=t) +function default_solver(constraints::ModeEstimationConstraints) + return if has_generic_constraints(constraints) + OptimizationOptimJL.IPNewton() + else + OptimizationOptimJL.LBFGS() + end end -function optim_objective(model::DynamicPPL.Model, estimator::Union{MLE, MAP}; constrained::Bool=true) - return _optim_objective(model, estimator, constrained_space{constrained}()) -end +""" + OptimizationProblem(log_density::OptimLogDensity, adtype, constraints) +Create an `OptimizationProblem` for the objective function defined by `log_density`. +""" +function Optimization.OptimizationProblem(log_density::OptimLogDensity, adtype, constraints) + # Note that OptimLogDensity is a callable that evaluates the model with given + # parameters. Hence we can use it in the objective function as below. + f = Optimization.OptimizationFunction(log_density, adtype; cons=constraints.cons) + initial_params = log_density.varinfo[:] + prob = if !has_constraints(constraints) + Optimization.OptimizationProblem(f, initial_params) + else + Optimization.OptimizationProblem( + f, initial_params; + lcons=constraints.lcons, + ucons=constraints.ucons, + lb=constraints.lb, + ub=constraints.ub + ) + end + return prob +end -function optim_function( - model::Model, - estimator::Union{MLE, MAP}; - constrained::Bool=true, - adtype::Union{Nothing, AbstractADType}=NoAD(), +""" + estimate_mode( + model::DynamicPPL.Model, + estimator::ModeEstimator, + [solver]; + kwargs... + ) + +Find the mode of the probability distribution of a model. + +Under the hood this function calls `Optimization.solve`. + +# Arguments +- `model::DynamicPPL.Model`: The model for which to estimate the mode. +- `estimator::ModeEstimator`: Can be either `MLE()` for maximum likelihood estimation or + `MAP()` for maximum a posteriori estimation. +- `solver=nothing`. The optimization algorithm to use. Optional. Can be any solver + recognised by Optimization.jl. If omitted a default solver is used: LBFGS, or IPNewton + if non-box constraints are present. + +# Keyword arguments +- `initial_params::Union{AbstractVector,Nothing}=nothing`: Initial value for the + optimization. Optional, unless non-box constraints are specified. If omitted it is + generated by either sampling from the prior distribution or uniformly from the box + constraints, if any. +- `adtype::AbstractADType=AutoForwardDiff()`: The automatic differentiation type to use. +- Keyword arguments `lb`, `ub`, `cons`, `lcons`, and `ucons` define constraints for the + optimization problem. Please see [`ModeEstimationConstraints`](@ref) for more details. +- Any extra keyword arguments are passed to `Optimization.solve`. +""" +function estimate_mode( + model::DynamicPPL.Model, + estimator::ModeEstimator, + solver=nothing; + initial_params=nothing, + adtype=ADTypes.AutoForwardDiff(), + cons=nothing, + lcons=nothing, + ucons=nothing, + lb=nothing, + ub=nothing, + kwargs... ) - if adtype === nothing - Base.depwarn("the use of `adtype=nothing` is deprecated, please use `adtype=SciMLBase.NoAD()`", :optim_function) + constraints = ModeEstimationConstraints(lb, ub, cons, lcons, ucons) + initial_params = generate_initial_params(model, initial_params, constraints) + if solver === nothing + solver = default_solver(constraints) end - obj, init, t = optim_objective(model, estimator; constrained=constrained) - - l(x, _) = obj(x) - f = if adtype isa AbstractADType && adtype !== NoAD() - OptimizationFunction(l, adtype) - else - OptimizationFunction( - l; - grad = (G,x,p) -> obj(nothing, G, x), - ) + # Create an OptimLogDensity object that can be used to evaluate the objective function, + # i.e. the negative log density. Set its VarInfo to the initial parameters. + log_density = let + inner_context = if estimator isa MAP + DynamicPPL.DefaultContext() + else + DynamicPPL.LikelihoodContext() + end + ctx = OptimizationContext(inner_context) + ld = OptimLogDensity(model, ctx) + Accessors.@set ld.varinfo = DynamicPPL.unflatten(ld.varinfo, initial_params) + end + + # TODO(mhauru) We currently couple together the questions of whether the user specified + # bounds/constraints and whether we transform the objective function to an + # unconstrained space. These should be separate concerns, but for that we need to + # implement getting the bounds of the prior distributions. + optimise_in_unconstrained_space = !has_constraints(constraints) + if optimise_in_unconstrained_space + transformed_varinfo = DynamicPPL.link(log_density.varinfo, log_density.model) + log_density = Accessors.@set log_density.varinfo = transformed_varinfo end - - return (func=f, init=init, transform = t) + + prob = Optimization.OptimizationProblem(log_density, adtype, constraints) + solution = Optimization.solve(prob, solver; kwargs...) + # TODO(mhauru) We return a ModeResult for compatibility with the older Optim.jl + # interface. Might we want to break that and develop a better return type? + return ModeResult(log_density, solution) end +""" + maximum_a_posteriori( + model::DynamicPPL.Model, + [solver]; + kwargs... + ) -function optim_problem( - model::Model, - estimator::Union{MAP, MLE}; - constrained::Bool=true, - init_theta=nothing, - adtype::Union{Nothing, AbstractADType}=NoAD(), - kwargs..., -) - f, init, transform = optim_function(model, estimator; constrained=constrained, adtype=adtype) +Find the maximum a posteriori estimate of a model. + +This is a convenience function that calls `estimate_mode` with `MAP()` as the estimator. +Please see the documentation of [`Turing.Optimisation.estimate_mode`](@ref) for more +details. +""" +function maximum_a_posteriori(model::DynamicPPL.Model, args...; kwargs...) + return estimate_mode(model, MAP(), args...; kwargs...) +end + +""" + maximum_likelihood( + model::DynamicPPL.Model, + [solver]; + kwargs... + ) - u0 = init_theta === nothing ? init() : init(init_theta) - prob = OptimizationProblem(f, u0; kwargs...) +Find the maximum likelihood estimate of a model. - return (; prob, init, transform) +This is a convenience function that calls `estimate_mode` with `MLE()` as the estimator. +Please see the documentation of [`Turing.Optimisation.estimate_mode`](@ref) for more +details. +""" +function maximum_likelihood(model::DynamicPPL.Model, args...; kwargs...) + return estimate_mode(model, MLE(), args...; kwargs...) end end diff --git a/test/Project.toml b/test/Project.toml index b9c232dd8..484864590 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -18,6 +18,8 @@ MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d" NamedArrays = "86f7a689-2022-50b4-a561-43c23ac3c673" Optim = "429524aa-4258-5aef-a3af-852621145aeb" Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba" +OptimizationBBO = "3e6eede4-6085-4f62-9a71-46d9bc1eb92b" +OptimizationNLopt = "4e6fcdb7-1186-4e1f-a706-475e75c168bb" OptimizationOptimJL = "36348300-93cb-4f02-beb5-3c3902f8871e" PDMats = "90014a1f-27ba-587c-ab20-58faa44d9150" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" @@ -50,8 +52,10 @@ LogDensityProblemsAD = "1.4" MCMCChains = "5, 6" NamedArrays = "0.9.4, 0.10" Optim = "1" -Optimization = "3.5" -OptimizationOptimJL = "0.1" +Optimization = "3" +OptimizationBBO = "0.1, 0.2" +OptimizationNLopt = "0.1, 0.2" +OptimizationOptimJL = "0.1, 0.2" PDMats = "0.10, 0.11" ReverseDiff = "1.4.2" SpecialFunctions = "0.10.3, 1, 2" diff --git a/test/optimisation/OptimInterface.jl b/test/ext/OptimInterface.jl similarity index 59% rename from test/optimisation/OptimInterface.jl rename to test/ext/OptimInterface.jl index ba9e2f78e..0f888f6f6 100644 --- a/test/optimisation/OptimInterface.jl +++ b/test/ext/OptimInterface.jl @@ -1,36 +1,12 @@ -# Used for testing how well it works with nested contexts. -struct OverrideContext{C,T1,T2} <: DynamicPPL.AbstractContext - context::C - logprior_weight::T1 - loglikelihood_weight::T2 -end -DynamicPPL.NodeTrait(::OverrideContext) = DynamicPPL.IsParent() -DynamicPPL.childcontext(parent::OverrideContext) = parent.context -DynamicPPL.setchildcontext(parent::OverrideContext, child) = OverrideContext( - child, - parent.logprior_weight, - parent.loglikelihood_weight -) - -# Only implement what we need for the models above. -function DynamicPPL.tilde_assume(context::OverrideContext, right, vn, vi) - value, logp, vi = DynamicPPL.tilde_assume(context.context, right, vn, vi) - return value, context.logprior_weight, vi -end -function DynamicPPL.tilde_observe(context::OverrideContext, right, left, vi) - logp, vi = DynamicPPL.tilde_observe(context.context, right, left, vi) - return context.loglikelihood_weight, vi -end - -@numerical_testset "OptimInterface.jl" begin +@numerical_testset "TuringOptimExt" begin @testset "MLE" begin Random.seed!(222) true_value = [0.0625, 1.75] - m1 = optimize(gdemo_default, MLE()) - m2 = optimize(gdemo_default, MLE(), NelderMead()) - m3 = optimize(gdemo_default, MLE(), true_value, LBFGS()) - m4 = optimize(gdemo_default, MLE(), true_value) + m1 = Optim.optimize(gdemo_default, MLE()) + m2 = Optim.optimize(gdemo_default, MLE(), Optim.NelderMead()) + m3 = Optim.optimize(gdemo_default, MLE(), true_value, Optim.LBFGS()) + m4 = Optim.optimize(gdemo_default, MLE(), true_value) @test all(isapprox.(m1.values.array - true_value, 0.0, atol=0.01)) @test all(isapprox.(m2.values.array - true_value, 0.0, atol=0.01)) @@ -42,10 +18,10 @@ end Random.seed!(222) true_value = [49 / 54, 7 / 6] - m1 = optimize(gdemo_default, MAP()) - m2 = optimize(gdemo_default, MAP(), NelderMead()) - m3 = optimize(gdemo_default, MAP(), true_value, LBFGS()) - m4 = optimize(gdemo_default, MAP(), true_value) + m1 = Optim.optimize(gdemo_default, MAP()) + m2 = Optim.optimize(gdemo_default, MAP(), Optim.NelderMead()) + m3 = Optim.optimize(gdemo_default, MAP(), true_value, Optim.LBFGS()) + m4 = Optim.optimize(gdemo_default, MAP(), true_value) @test all(isapprox.(m1.values.array - true_value, 0.0, atol=0.01)) @test all(isapprox.(m2.values.array - true_value, 0.0, atol=0.01)) @@ -55,7 +31,7 @@ end @testset "StatsBase integration" begin Random.seed!(54321) - mle_est = optimize(gdemo_default, MLE()) + mle_est = Optim.optimize(gdemo_default, MLE()) # Calculated based on the two data points in gdemo_default, [1.5, 2.0] true_values = [0.0625, 1.75] @@ -95,7 +71,7 @@ end y = x*true_beta model = regtest(x, y) - mle = optimize(model, MLE()) + mle = Optim.optimize(model, MLE()) vcmat = inv(x'x) vcmat_mle = vcov(mle).array @@ -114,11 +90,11 @@ end model_dot = dot_gdemo([1.5, 2.0]) - mle1 = optimize(gdemo_default, MLE()) - mle2 = optimize(model_dot, MLE()) + mle1 = Optim.optimize(gdemo_default, MLE()) + mle2 = Optim.optimize(model_dot, MLE()) - map1 = optimize(gdemo_default, MAP()) - map2 = optimize(model_dot, MAP()) + map1 = Optim.optimize(gdemo_default, MAP()) + map2 = Optim.optimize(model_dot, MAP()) @test isapprox(mle1.values.array, mle2.values.array) @test isapprox(map1.values.array, map2.values.array) @@ -128,8 +104,9 @@ end @testset "MAP for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS result_true = DynamicPPL.TestUtils.posterior_optima(model) - @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(), NelderMead()] - result = optimize(model, MAP(), optimizer) + optimizers = [Optim.LBFGS(), Optim.NelderMead()] + @testset "$(nameof(typeof(optimizer)))" for optimizer in optimizers + result = Optim.optimize(model, MAP(), optimizer) vals = result.values for vn in DynamicPPL.TestUtils.varnames(model) @@ -164,8 +141,9 @@ end result_true = DynamicPPL.TestUtils.likelihood_optima(model) # `NelderMead` seems to struggle with convergence here, so we exclude it. - @testset "$(nameof(typeof(optimizer)))" for optimizer in [LBFGS(),] - result = optimize(model, MLE(), optimizer, Optim.Options(g_tol=1e-3, f_tol=1e-3)) + @testset "$(nameof(typeof(optimizer)))" for optimizer in [Optim.LBFGS(),] + options = Optim.Options(g_tol=1e-3, f_tol=1e-3) + result = Optim.optimize(model, MLE(), optimizer, options) vals = result.values for vn in DynamicPPL.TestUtils.varnames(model) @@ -180,68 +158,22 @@ end end end - # Issue: https://discourse.julialang.org/t/two-equivalent-conditioning-syntaxes-giving-different-likelihood-values/100320 - @testset "OptimizationContext" begin - @model function model1(x) - μ ~ Uniform(0, 2) - x ~ LogNormal(μ, 1) - end - - @model function model2() - μ ~ Uniform(0, 2) - x ~ LogNormal(μ, 1) - end - - x = 1.0 - w = [1.0] - - @testset "With ConditionContext" begin - m1 = model1(x) - m2 = model2() | (x = x,) - ctx = Turing.OptimizationContext(DynamicPPL.LikelihoodContext()) - @test Turing.OptimLogDensity(m1, ctx)(w) == Turing.OptimLogDensity(m2, ctx)(w) - end - - @testset "With prefixes" begin - function prefix_μ(model) - return DynamicPPL.contextualize(model, DynamicPPL.PrefixContext{:inner}(model.context)) - end - m1 = prefix_μ(model1(x)) - m2 = prefix_μ(model2() | (var"inner.x" = x,)) - ctx = Turing.OptimizationContext(DynamicPPL.LikelihoodContext()) - @test Turing.OptimLogDensity(m1, ctx)(w) == Turing.OptimLogDensity(m2, ctx)(w) - end - - @testset "Weighted" begin - function override(model) - return DynamicPPL.contextualize( - model, - OverrideContext(model.context, 100, 1) - ) - end - m1 = override(model1(x)) - m2 = override(model2() | (x = x,)) - ctx = Turing.OptimizationContext(DynamicPPL.DefaultContext()) - @test Turing.OptimLogDensity(m1, ctx)(w) == Turing.OptimLogDensity(m2, ctx)(w) - end - - @testset "with :=" begin - @model function demo_track() - x ~ Normal() - y := 100 + x - end - model = demo_track() - result = optimize(model, MAP()) - @test result.values[:x] ≈ 0 atol=1e-1 - @test result.values[:y] ≈ 100 atol=1e-1 - end - end - # Issue: https://discourse.julialang.org/t/turing-mixture-models-with-dirichlet-weightings/112910 @testset "with different linked dimensionality" begin @model demo_dirichlet() = x ~ Dirichlet(2 * ones(3)) model = demo_dirichlet() - result = optimize(model, MAP()) + result = Optim.optimize(model, MAP()) @test result.values ≈ mode(Dirichlet(2 * ones(3))) atol=0.2 end + + @testset "with :=" begin + @model function demo_track() + x ~ Normal() + y := 100 + x + end + model = demo_track() + result = Optim.optimize(model, MAP()) + @test result.values[:x] ≈ 0 atol=1e-1 + @test result.values[:y] ≈ 100 atol=1e-1 + end end diff --git a/test/ext/Optimisation.jl b/test/ext/Optimisation.jl deleted file mode 100644 index 324a8a7de..000000000 --- a/test/ext/Optimisation.jl +++ /dev/null @@ -1,123 +0,0 @@ -@testset "ext/Optimisation.jl" begin - @testset "gdemo" begin - @testset "MLE" begin - Random.seed!(222) - true_value = [0.0625, 1.75] - - f1 = optim_function(gdemo_default, MLE();constrained=false) - p1 = OptimizationProblem(f1.func, f1.init(true_value)) - - p2 = optim_objective(gdemo_default, MLE();constrained=false) - - p3 = optim_problem(gdemo_default, MLE();constrained=false, init_theta=true_value) - - m1 = solve(p1, NelderMead()) - m2 = solve(p1, LBFGS()) - m3 = solve(p1, BFGS()) - m4 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), p2.init(true_value), NelderMead()) - m5 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), p2.init(true_value), LBFGS()) - m6 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), p2.init(true_value), BFGS()) - m7 = solve(p3.prob, NelderMead()) - m8 = solve(p3.prob, LBFGS()) - m9 = solve(p3.prob, BFGS()) - - @test all(isapprox.(f1.transform(m1.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(f1.transform(m2.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(f1.transform(m3.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m4.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m5.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m6.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m7.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m8.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m9.minimizer) - true_value, 0.0, atol=0.01)) - end - - @testset "MAP" begin - Random.seed!(222) - true_value = [49 / 54, 7 / 6] - - f1 = optim_function(gdemo_default, MAP();constrained=false) - p1 = OptimizationProblem(f1.func, f1.init(true_value)) - - p2 = optim_objective(gdemo_default, MAP();constrained=false) - - p3 = optim_problem(gdemo_default, MAP();constrained=false,init_theta=true_value) - - m1 = solve(p1, NelderMead()) - m2 = solve(p1, LBFGS()) - m3 = solve(p1, BFGS()) - m4 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), p2.init(true_value), NelderMead()) - m5 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), p2.init(true_value), LBFGS()) - m6 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), p2.init(true_value), BFGS()) - m7 = solve(p3.prob, NelderMead()) - m8 = solve(p3.prob, LBFGS()) - m9 = solve(p3.prob, BFGS()) - - @test all(isapprox.(f1.transform(m1.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(f1.transform(m2.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(f1.transform(m3.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m4.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m5.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m6.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m7.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m8.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m9.minimizer) - true_value, 0.0, atol=0.01)) - end - - @testset "MLE constrained" begin - Random.seed!(222) - true_value = [0.0625, 1.75] - lb = [0.0, 0.0] - ub = [2.0, 2.0] - - f1 = optim_function(gdemo_default, MLE();constrained=true) - p1 = OptimizationProblem(f1.func, f1.init(true_value); lb=lb, ub=ub) - - p2 = optim_objective(gdemo_default, MLE();constrained=true) - - p3 = optim_problem(gdemo_default, MLE();constrained=true, init_theta=true_value, lb=lb, ub=ub) - - m1 = solve(p1, Fminbox(LBFGS())) - m2 = solve(p1, Fminbox(BFGS())) - m3 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), lb, ub, p2.init(true_value), Fminbox(LBFGS())) - m4 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), lb, ub, p2.init(true_value), Fminbox(BFGS())) - m5 = solve(p3.prob, Fminbox(LBFGS())) - m6 = solve(p3.prob, Fminbox(BFGS())) - - @test all(isapprox.(f1.transform(m1.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(f1.transform(m2.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m3.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m4.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m5.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m6.minimizer) - true_value, 0.0, atol=0.01)) - end - - @testset "MAP constrained" begin - Random.seed!(222) - true_value = [49 / 54, 7 / 6] - lb = [0.0, 0.0] - ub = [2.0, 2.0] - - f1 = optim_function(gdemo_default, MAP();constrained=true) - p1 = OptimizationProblem(f1.func, f1.init(true_value); lb=lb, ub=ub) - - p2 = optim_objective(gdemo_default, MAP();constrained=true) - - p3 = optim_problem(gdemo_default, MAP();constrained=true, init_theta=true_value, lb=lb, ub=ub) - - m1 = solve(p1, Fminbox(LBFGS())) - m2 = solve(p1, Fminbox(BFGS())) - m3 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), lb, ub, p2.init(true_value), Fminbox(LBFGS())) - m4 = optimize(p2.obj, (G,z) -> p2.obj(nothing,G,z), lb, ub, p2.init(true_value), Fminbox(BFGS())) - m5 = solve(p3.prob, Fminbox(LBFGS())) - m6 = solve(p3.prob, Fminbox(BFGS())) - - @test all(isapprox.(f1.transform(m1.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(f1.transform(m2.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m3.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p2.transform(m4.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m5.minimizer) - true_value, 0.0, atol=0.01)) - @test all(isapprox.(p3.transform(m6.minimizer) - true_value, 0.0, atol=0.01)) - end - end -end diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl new file mode 100644 index 000000000..5f38b6ff1 --- /dev/null +++ b/test/optimisation/Optimisation.jl @@ -0,0 +1,554 @@ +@testset "Optimisation" begin + + # The `stats` field is populated only in newer versions of OptimizationOptimJL and + # similar packages. Hence we end up doing this check a lot + hasstats(result) = result.optim_result.stats !== nothing + + # Issue: https://discourse.julialang.org/t/two-equivalent-conditioning-syntaxes-giving-different-likelihood-values/100320 + @testset "OptimizationContext" begin + # Used for testing how well it works with nested contexts. + struct OverrideContext{C,T1,T2} <: DynamicPPL.AbstractContext + context::C + logprior_weight::T1 + loglikelihood_weight::T2 + end + DynamicPPL.NodeTrait(::OverrideContext) = DynamicPPL.IsParent() + DynamicPPL.childcontext(parent::OverrideContext) = parent.context + DynamicPPL.setchildcontext(parent::OverrideContext, child) = OverrideContext( + child, + parent.logprior_weight, + parent.loglikelihood_weight + ) + + # Only implement what we need for the models above. + function DynamicPPL.tilde_assume(context::OverrideContext, right, vn, vi) + value, logp, vi = DynamicPPL.tilde_assume(context.context, right, vn, vi) + return value, context.logprior_weight, vi + end + function DynamicPPL.tilde_observe(context::OverrideContext, right, left, vi) + logp, vi = DynamicPPL.tilde_observe(context.context, right, left, vi) + return context.loglikelihood_weight, vi + end + + @model function model1(x) + μ ~ Uniform(0, 2) + x ~ LogNormal(μ, 1) + end + + @model function model2() + μ ~ Uniform(0, 2) + x ~ LogNormal(μ, 1) + end + + x = 1.0 + w = [1.0] + + @testset "With ConditionContext" begin + m1 = model1(x) + m2 = model2() | (x=x,) + ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) + @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) == + Turing.Optimisation.OptimLogDensity(m2, ctx)(w) + end + + @testset "With prefixes" begin + function prefix_μ(model) + return DynamicPPL.contextualize( + model, DynamicPPL.PrefixContext{:inner}(model.context) + ) + end + m1 = prefix_μ(model1(x)) + m2 = prefix_μ(model2() | (var"inner.x"=x,)) + ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) + @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) == + Turing.Optimisation.OptimLogDensity(m2, ctx)(w) + end + + @testset "Weighted" begin + function override(model) + return DynamicPPL.contextualize( + model, + OverrideContext(model.context, 100, 1) + ) + end + m1 = override(model1(x)) + m2 = override(model2() | (x=x,)) + ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) + @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) == + Turing.Optimisation.OptimLogDensity(m2, ctx)(w) + end + end + + @numerical_testset "gdemo" begin + + """ + check_success(result, true_value, true_logp, check_retcode=true) + + Check that the `result` returned by optimisation is close to the truth. + """ + function check_optimisation_result( + result, true_value, true_logp, check_retcode=true + ) + optimum = result.values.array + @test all(isapprox.(optimum - true_value, 0.0, atol=0.01)) + if check_retcode + @test result.optim_result.retcode == Optimization.ReturnCode.Success + end + @test isapprox(result.lp, true_logp, atol=0.01) + end + + @testset "MLE" begin + Random.seed!(222) + true_value = [0.0625, 1.75] + true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) + check_success(result) = check_optimisation_result(result, true_value, true_logp) + + m1 = Turing.Optimisation.estimate_mode( + gdemo_default, MLE() + ) + m2 = maximum_likelihood( + gdemo_default, OptimizationOptimJL.LBFGS(); initial_params=true_value + ) + m3 = maximum_likelihood( + gdemo_default, OptimizationOptimJL.Newton() + ) + # TODO(mhauru) How can we check that the adtype is actually AutoReverseDiff? + m4 = maximum_likelihood( + gdemo_default, OptimizationOptimJL.BFGS(); adtype=AutoReverseDiff() + ) + m5 = maximum_likelihood( + gdemo_default, OptimizationOptimJL.NelderMead(); initial_params=true_value + ) + m6 = maximum_likelihood( + gdemo_default, OptimizationOptimJL.NelderMead() + ) + + check_success(m1) + check_success(m2) + check_success(m3) + check_success(m4) + check_success(m5) + check_success(m6) + + @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 + if hasstats(m6) && hasstats(m5) + @test m5.optim_result.stats.iterations < m6.optim_result.stats.iterations + end + + @test !hasstats(m2) || m2.optim_result.stats.gevals > 0 + @test !hasstats(m3) || m3.optim_result.stats.gevals > 0 + @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 + @test !hasstats(m5) || m5.optim_result.stats.gevals == 0 + @test !hasstats(m6) || m6.optim_result.stats.gevals == 0 + end + + @testset "MAP" begin + Random.seed!(222) + true_value = [49 / 54, 7 / 6] + true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) + check_success(result) = check_optimisation_result(result, true_value, true_logp) + + m1 = Turing.Optimisation.estimate_mode( + gdemo_default, MAP() + ) + m2 = maximum_a_posteriori( + gdemo_default, OptimizationOptimJL.LBFGS(); initial_params=true_value + ) + m3 = maximum_a_posteriori( + gdemo_default, OptimizationOptimJL.Newton() + ) + m4 = maximum_a_posteriori( + gdemo_default, OptimizationOptimJL.BFGS(); adtype=AutoReverseDiff() + ) + m5 = maximum_a_posteriori( + gdemo_default, OptimizationOptimJL.NelderMead(); initial_params=true_value + ) + m6 = maximum_a_posteriori(gdemo_default, OptimizationOptimJL.NelderMead()) + + check_success(m1) + check_success(m2) + check_success(m3) + check_success(m4) + check_success(m5) + check_success(m6) + + @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 + if hasstats(m6) && hasstats(m5) + @test m5.optim_result.stats.iterations < m6.optim_result.stats.iterations + end + + @test !hasstats(m2) || m2.optim_result.stats.gevals > 0 + @test !hasstats(m3) || m3.optim_result.stats.gevals > 0 + @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 + @test !hasstats(m5) || m5.optim_result.stats.gevals == 0 + @test !hasstats(m6) || m6.optim_result.stats.gevals == 0 + end + + @testset "MLE with box constraints" begin + Random.seed!(222) + true_value = [0.0625, 1.75] + true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) + check_success(result, check_retcode=true) = check_optimisation_result( + result, true_value, true_logp, check_retcode + ) + + lb = [0.0, 0.0] + ub = [2.0, 2.0] + + m1 = Turing.Optimisation.estimate_mode( + gdemo_default, MLE(); lb=lb, ub=ub + ) + m2 = maximum_likelihood( + gdemo_default, + OptimizationOptimJL.Fminbox(OptimizationOptimJL.LBFGS()); + initial_params=true_value, lb=lb, ub=ub) + m3 = maximum_likelihood( + gdemo_default, + OptimizationBBO.BBO_separable_nes(); + maxiters=100_000, abstol=1e-5, lb=lb, ub=ub + ) + m4 = maximum_likelihood( + gdemo_default, + OptimizationOptimJL.Fminbox(OptimizationOptimJL.BFGS()); + adtype=AutoReverseDiff(), lb=lb, ub=ub + ) + m5 = maximum_likelihood( + gdemo_default, + OptimizationOptimJL.IPNewton(); + initial_params=true_value, lb=lb, ub=ub + ) + m6 = maximum_likelihood(gdemo_default; lb=lb, ub=ub) + + check_success(m1) + check_success(m2) + # BBO retcodes are misconfigured, so skip checking the retcode in this case. + # See https://github.com/SciML/Optimization.jl/issues/745 + check_success(m3, false) + check_success(m4) + check_success(m5) + check_success(m6) + + @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 + @test !hasstats(m5) || m5.optim_result.stats.iterations <= 1 + + @test !hasstats(m2) || m2.optim_result.stats.gevals > 0 + @test !hasstats(m3) || m3.optim_result.stats.gevals == 0 + @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 + @test !hasstats(m5) || m5.optim_result.stats.gevals > 0 + end + + @testset "MAP with box constraints" begin + Random.seed!(222) + true_value = [49 / 54, 7 / 6] + true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) + check_success(result, check_retcode=true) = check_optimisation_result( + result, true_value, true_logp, check_retcode + ) + + lb = [0.0, 0.0] + ub = [2.0, 2.0] + + m1 = Turing.Optimisation.estimate_mode( + gdemo_default, MAP(); lb=lb, ub=ub + ) + m2 = maximum_a_posteriori( + gdemo_default, + OptimizationOptimJL.Fminbox(OptimizationOptimJL.LBFGS()); + initial_params=true_value, lb=lb, ub=ub + ) + m3 = maximum_a_posteriori( + gdemo_default, + OptimizationBBO.BBO_separable_nes(); + maxiters=100_000, abstol=1e-5, lb=lb, ub=ub + ) + m4 = maximum_a_posteriori( + gdemo_default, + OptimizationOptimJL.Fminbox(OptimizationOptimJL.BFGS()); + adtype=AutoReverseDiff(), lb=lb, ub=ub + ) + m5 = maximum_a_posteriori( + gdemo_default, + OptimizationOptimJL.IPNewton(); + initial_params=true_value, lb=lb, ub=ub + ) + m6 = maximum_a_posteriori(gdemo_default; lb=lb, ub=ub) + + check_success(m1) + check_success(m2) + # BBO retcodes are misconfigured, so skip checking the retcode in this case. + # See https://github.com/SciML/Optimization.jl/issues/745 + check_success(m3, false) + check_success(m4) + check_success(m5) + check_success(m6) + + @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 + @test !hasstats(m5) || m5.optim_result.stats.iterations <= 1 + + @show m2.optim_result.stats + @test !hasstats(m2) || m2.optim_result.stats.gevals > 0 + @test !hasstats(m3) || m3.optim_result.stats.gevals == 0 + @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 + @test !hasstats(m5) || m5.optim_result.stats.gevals > 0 + end + + @testset "MLE with generic constraints" begin + Random.seed!(222) + true_value = [0.0625, 1.75] + true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) + check_success(result, check_retcode=true) = check_optimisation_result( + result, true_value, true_logp, check_retcode + ) + + # Set two constraints: The first parameter must be non-negative, and the L2 norm + # of the parameters must be between 0.5 and 2. + cons(res, x, _) = (res .= [x[1], sqrt(sum(x .^ 2))]) + lcons = [0, 0.5] + ucons = [Inf, 2.0] + cons_args = (cons=cons, lcons=lcons, ucons=ucons) + initial_params = [0.5, -1.0] + + m1 = Turing.Optimisation.estimate_mode( + gdemo_default, MLE(); initial_params=initial_params, cons_args... + ) + m2 = maximum_likelihood(gdemo_default; initial_params=true_value, cons_args...) + m3 = maximum_likelihood( + gdemo_default, OptimizationOptimJL.IPNewton(); + initial_params=initial_params, cons_args... + ) + m4 = maximum_likelihood( + gdemo_default, OptimizationOptimJL.IPNewton(); + initial_params=initial_params, adtype=AutoReverseDiff(), cons_args... + ) + m5 = maximum_likelihood( + gdemo_default; + initial_params=initial_params, cons_args... + ) + + check_success(m1) + check_success(m2) + check_success(m3) + check_success(m4) + check_success(m5) + + @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 + + @test !hasstats(m3) || m3.optim_result.stats.gevals > 0 + @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 + + expected_error = ArgumentError( + "You must provide an initial value when using generic constraints." + ) + @test_throws expected_error maximum_likelihood(gdemo_default; cons_args...) + end + + @testset "MAP with generic constraints" begin + Random.seed!(222) + true_value = [49 / 54, 7 / 6] + true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) + check_success(result, check_retcode=true) = check_optimisation_result( + result, true_value, true_logp, check_retcode + ) + + # Set two constraints: The first parameter must be non-negative, and the L2 norm + # of the parameters must be between 0.5 and 2. + cons(res, x, _) = (res .= [x[1], sqrt(sum(x .^ 2))]) + lcons = [0, 0.5] + ucons = [Inf, 2.0] + cons_args = (cons=cons, lcons=lcons, ucons=ucons) + initial_params = [0.5, -1.0] + + m1 = Turing.Optimisation.estimate_mode( + gdemo_default, MAP(); initial_params=initial_params, cons_args... + ) + m2 = maximum_a_posteriori( + gdemo_default; initial_params=true_value, cons_args... + ) + m3 = maximum_a_posteriori( + gdemo_default, OptimizationOptimJL.IPNewton(); + initial_params=initial_params, cons_args... + ) + m4 = maximum_a_posteriori( + gdemo_default, OptimizationOptimJL.IPNewton(); + initial_params=initial_params, adtype=AutoReverseDiff(), cons_args... + ) + m5 = maximum_a_posteriori( + gdemo_default; + initial_params=initial_params, cons_args... + ) + + check_success(m1) + check_success(m2) + check_success(m3) + check_success(m4) + check_success(m5) + + @test !hasstats(m2) || m2.optim_result.stats.iterations <= 1 + + @test !hasstats(m3) || m3.optim_result.stats.gevals > 0 + @test !hasstats(m4) || m4.optim_result.stats.gevals > 0 + + expected_error = ArgumentError( + "You must provide an initial value when using generic constraints." + ) + @test_throws expected_error maximum_a_posteriori(gdemo_default; cons_args...) + end + end + + @testset "StatsBase integration" begin + Random.seed!(54321) + mle_est = maximum_likelihood(gdemo_default) + # Calculated based on the two data points in gdemo_default, [1.5, 2.0] + true_values = [0.0625, 1.75] + + @test coefnames(mle_est) == [:s, :m] + + diffs = coef(mle_est).array - [0.0625031; 1.75001] + @test all(isapprox.(diffs, 0.0, atol=0.1)) + + infomat = [2/(2*true_values[1]^2) 0.0; 0.0 2/true_values[1]] + @test all(isapprox.(infomat - informationmatrix(mle_est), 0.0, atol=0.01)) + + vcovmat = [2*true_values[1]^2/2 0.0; 0.0 true_values[1]/2] + @test all(isapprox.(vcovmat - vcov(mle_est), 0.0, atol=0.01)) + + ctable = coeftable(mle_est) + @test ctable isa StatsBase.CoefTable + + s = stderror(mle_est).array + @test all(isapprox.(s - [0.06250415643292194, 0.17677963626053916], 0.0, atol=0.01)) + + @test coefnames(mle_est) == Distributions.params(mle_est) + @test vcov(mle_est) == inv(informationmatrix(mle_est)) + + @test isapprox(loglikelihood(mle_est), -0.0652883561466624, atol=0.01) + end + + @testset "Linear regression test" begin + @model function regtest(x, y) + beta ~ MvNormal(Zeros(2), I) + mu = x * beta + y ~ MvNormal(mu, I) + end + + Random.seed!(987) + true_beta = [1.0, -2.2] + x = rand(40, 2) + y = x * true_beta + + model = regtest(x, y) + mle = maximum_likelihood(model) + + vcmat = inv(x'x) + vcmat_mle = vcov(mle).array + + @test isapprox(mle.values.array, true_beta) + @test isapprox(vcmat, vcmat_mle) + end + + @testset "Dot tilde test" begin + @model function dot_gdemo(x) + s ~ InverseGamma(2, 3) + m ~ Normal(0, sqrt(s)) + + (.~)(x, Normal(m, sqrt(s))) + end + + model_dot = dot_gdemo([1.5, 2.0]) + + mle1 = maximum_likelihood(gdemo_default) + mle2 = maximum_likelihood(model_dot) + + map1 = maximum_a_posteriori(gdemo_default) + map2 = maximum_a_posteriori(model_dot) + + @test isapprox(mle1.values.array, mle2.values.array) + @test isapprox(map1.values.array, map2.values.array) + end + + # TODO(mhauru): The corresponding Optim.jl test had a note saying that some models + # don't work for Tracker and ReverseDiff. Is that still the case? + @testset "MAP for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS + Random.seed!(23) + result_true = DynamicPPL.TestUtils.posterior_optima(model) + + optimizers = [ + OptimizationOptimJL.LBFGS(), + OptimizationOptimJL.NelderMead(), + OptimizationNLopt.NLopt.LD_TNEWTON_PRECOND_RESTART(), + ] + @testset "$(nameof(typeof(optimizer)))" for optimizer in optimizers + result = maximum_a_posteriori(model, optimizer) + vals = result.values + + for vn in DynamicPPL.TestUtils.varnames(model) + for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn)) + @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol = 0.05 + end + end + end + end + + # Some of the models have one variance parameter per observation, and so + # the MLE should have the variances set to 0. Since we're working in + # transformed space, this corresponds to `-Inf`, which is of course not achievable. + # In particular, it can result in "early termniation" of the optimization process + # because we hit NaNs, etc. To avoid this, we set the `g_tol` and the `f_tol` to + # something larger than the default. + allowed_incorrect_mle = [ + DynamicPPL.TestUtils.demo_dot_assume_dot_observe, + DynamicPPL.TestUtils.demo_assume_index_observe, + DynamicPPL.TestUtils.demo_assume_multivariate_observe, + DynamicPPL.TestUtils.demo_assume_observe_literal, + DynamicPPL.TestUtils.demo_dot_assume_observe_submodel, + DynamicPPL.TestUtils.demo_dot_assume_dot_observe_matrix, + DynamicPPL.TestUtils.demo_dot_assume_matrix_dot_observe_matrix, + DynamicPPL.TestUtils.demo_assume_submodel_observe_index_literal, + DynamicPPL.TestUtils.demo_dot_assume_observe_index, + DynamicPPL.TestUtils.demo_dot_assume_observe_index_literal, + DynamicPPL.TestUtils.demo_assume_matrix_dot_observe_matrix, + ] + @testset "MLE for $(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS + Random.seed!(23) + result_true = DynamicPPL.TestUtils.likelihood_optima(model) + + optimizers = [ + OptimizationOptimJL.LBFGS(), + OptimizationOptimJL.NelderMead(), + OptimizationNLopt.NLopt.LD_TNEWTON_PRECOND_RESTART(), + ] + @testset "$(nameof(typeof(optimizer)))" for optimizer in optimizers + result = maximum_likelihood(model, optimizer; reltol=1e-3) + vals = result.values + + for vn in DynamicPPL.TestUtils.varnames(model) + for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn)) + if model.f in allowed_incorrect_mle + @test isfinite(get(result_true, vn_leaf)) + else + @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol = 0.05 + end + end + end + end + end + + # Issue: https://discourse.julialang.org/t/turing-mixture-models-with-dirichlet-weightings/112910 + @testset "Optimization with different linked dimensionality" begin + @model demo_dirichlet() = x ~ Dirichlet(2 * ones(3)) + model = demo_dirichlet() + result = maximum_a_posteriori(model) + @test result.values ≈ mode(Dirichlet(2 * ones(3))) atol = 0.2 + end + + @testset "with :=" begin + @model function demo_track() + x ~ Normal() + y := 100 + x + end + model = demo_track() + result = maximum_a_posteriori(model) + @test result.values[:x] ≈ 0 atol = 1e-1 + @test result.values[:y] ≈ 100 atol = 1e-1 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 8f9db1d1d..70bd532c1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,9 +9,11 @@ using FiniteDifferences using ForwardDiff using MCMCChains using NamedArrays -using Optim -using Optimization -using OptimizationOptimJL +using Optim: Optim +using Optimization: Optimization +using OptimizationBBO: OptimizationBBO +using OptimizationOptimJL: OptimizationOptimJL +using OptimizationNLopt: OptimizationNLopt using PDMats using ReverseDiff using SpecialFunctions @@ -83,8 +85,8 @@ macro timeit_include(path::AbstractString) :(@timeit TIMEROUTPUT $path include($ end @testset "mode estimation" begin - @timeit_include("optimisation/OptimInterface.jl") - @timeit_include("ext/Optimisation.jl") + @timeit_include("optimisation/Optimisation.jl") + @timeit_include("ext/OptimInterface.jl") end end