diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml new file mode 100644 index 000000000..51d6bc633 --- /dev/null +++ b/.JuliaFormatter.toml @@ -0,0 +1,25 @@ +style="blue" +format_markdown = true +# TODO +# We ignore these files because when formatting was first put in place they were being worked on. +# These ignores should be removed once the relevant PRs are merged/closed. +ignore = [ + # https://github.com/TuringLang/Turing.jl/pull/2231/files + "src/experimental/gibbs.jl", + "src/mcmc/abstractmcmc.jl", + "test/experimental/gibbs.jl", + "test/test_utils/numerical_tests.jl", + # https://github.com/TuringLang/Turing.jl/pull/2218/files + "src/mcmc/Inference.jl", + "test/mcmc/Inference.jl", + # https://github.com/TuringLang/Turing.jl/pull/2068 + "src/variational/VariationalInference.jl", + "src/variational/advi.jl", + "test/variational/advi.jl", + "test/variational/optimisers.jl", + # https://github.com/TuringLang/Turing.jl/pull/1887 + "test/mcmc/Inference.jl", + "test/mcmc/hmc.jl", + "test/mcmc/sghmc.jl", + "test/runtests.jl", +] diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml new file mode 100644 index 000000000..abeefa989 --- /dev/null +++ b/.github/workflows/Format.yml @@ -0,0 +1,38 @@ +name: Format + +on: + push: + branches: + - master + pull_request: + branches: + - master + merge_group: + types: [checks_requested] + +concurrency: + # Skip intermediate builds: always. + # Cancel intermediate builds: only if it is a pull request build. + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} + +jobs: + format: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@latest + with: + version: 1 + - name: Format code + run: | + using Pkg + Pkg.add(; name="JuliaFormatter", uuid="98e50ef6-434e-11e9-1051-2b60c6c9e899") + using JuliaFormatter + format("."; verbose=true) + shell: julia --color=yes {0} + - uses: reviewdog/action-suggester@v1 + if: github.event_name == 'pull_request' + with: + tool_name: JuliaFormatter + fail_on_error: true diff --git a/HISTORY.md b/HISTORY.md index 0b3661739..2a3929878 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,91 +1,100 @@ # Release 0.30.5 -- `essential/ad.jl` is removed, `ForwardDiff` and `ReverseDiff` integrations via `LogDensityProblemsAD` are moved to `DynamicPPL` and live in corresponding package extensions. -- `LogDensityProblemsAD.ADgradient(ℓ::DynamicPPL.LogDensityFunction)` (i.e. the single argument method) is moved to `Inference` module. It will create `ADgradient` using the `adtype` information stored in `context` field of `ℓ`. -- `getADbackend` function is renamed to `getADType`, the interface is preserved, but packages that previously used `getADbackend` should be updated to use `getADType`. -- `TuringTag` for ForwardDiff is also removed, now `DynamicPPLTag` is defined in `DynamicPPL` package and should serve the same [purpose](https://www.stochasticlifestyle.com/improved-forwarddiff-jl-stacktraces-with-package-tags/). + - `essential/ad.jl` is removed, `ForwardDiff` and `ReverseDiff` integrations via `LogDensityProblemsAD` are moved to `DynamicPPL` and live in corresponding package extensions. + - `LogDensityProblemsAD.ADgradient(ℓ::DynamicPPL.LogDensityFunction)` (i.e. the single argument method) is moved to `Inference` module. It will create `ADgradient` using the `adtype` information stored in `context` field of `ℓ`. + - `getADbackend` function is renamed to `getADType`, the interface is preserved, but packages that previously used `getADbackend` should be updated to use `getADType`. + - `TuringTag` for ForwardDiff is also removed, now `DynamicPPLTag` is defined in `DynamicPPL` package and should serve the same [purpose](https://www.stochasticlifestyle.com/improved-forwarddiff-jl-stacktraces-with-package-tags/). # Release 0.30.0 -- [`ADTypes.jl`](https://github.com/SciML/ADTypes.jl) replaced Turing's global AD backend. Users should now specify the desired `ADType` directly in sampler constructors, e.g., `HMC(0.1, 10; adtype=AutoForwardDiff(; chunksize))`, or `HMC(0.1, 10; adtype=AutoReverseDiff(false))` (`false` indicates not to use compiled tape). -- Interface functions such as `ADBackend`, `setadbackend`, `setadsafe`, `setchunksize`, and `setrdcache` are deprecated and will be removed in a future release. -- Removed the outdated `verifygrad` function. -- Updated to a newer version of `LogDensityProblemsAD` (v1.7). + - [`ADTypes.jl`](https://github.com/SciML/ADTypes.jl) replaced Turing's global AD backend. Users should now specify the desired `ADType` directly in sampler constructors, e.g., `HMC(0.1, 10; adtype=AutoForwardDiff(; chunksize))`, or `HMC(0.1, 10; adtype=AutoReverseDiff(false))` (`false` indicates not to use compiled tape). + - Interface functions such as `ADBackend`, `setadbackend`, `setadsafe`, `setchunksize`, and `setrdcache` are deprecated and will be removed in a future release. + - Removed the outdated `verifygrad` function. + - Updated to a newer version of `LogDensityProblemsAD` (v1.7). # Release 0.12.0 -- The interface for defining new distributions with constrained support and making them compatible with `Turing` has changed. To make a custom distribution type `CustomDistribution` compatible with `Turing`, the user needs to define the method `bijector(d::CustomDistribution)` that returns an instance of type `Bijector` implementing the `Bijectors.Bijector` API. -- `~` is now thread-safe when used for observations, but not assumptions (non-observed model parameters) yet. -- There were some performance improvements in the automatic differentiation (AD) of functions in `DistributionsAD` and `Bijectors`, leading to speeds closer to and sometimes faster than Stan's. -- An `HMC` initialization bug was fixed. `HMC` initialization in Turing is now consistent with Stan's. -- Sampling from the prior is now possible using `sample`. -- `psample` is now deprecated in favour of `sample(model, sampler, parallel_method, n_samples, n_chains)` where `parallel_method` can be either `MCMCThreads()` or `MCMCDistributed()`. `MCMCThreads` will use your available threads to sample each chain (ensure that you have the environment variable `JULIA_NUM_THREADS` set to the number of threads you want to use), and `MCMCDistributed` will dispatch chain sampling to each available process (you can add processes with `addprocs()`). -- Turing now uses `AdvancedMH.jl` v0.5, which mostly provides behind-the-scenes restructuring. -- Custom expressions and macros can be interpolated in the `@model` definition with `$`; it is possible to use `@.` also for assumptions (non-observed model parameters) and observations. -- The macros `@varinfo`, `@logpdf`, and `@sampler` are removed. Instead, one can access the internal variables `_varinfo`, `_model`, `_sampler`, and `_context` in the `@model` definition. -- Additional constructors for `SMC` and `PG` make it easier to choose the resampling method and threshold. + - The interface for defining new distributions with constrained support and making them compatible with `Turing` has changed. To make a custom distribution type `CustomDistribution` compatible with `Turing`, the user needs to define the method `bijector(d::CustomDistribution)` that returns an instance of type `Bijector` implementing the `Bijectors.Bijector` API. + - `~` is now thread-safe when used for observations, but not assumptions (non-observed model parameters) yet. + - There were some performance improvements in the automatic differentiation (AD) of functions in `DistributionsAD` and `Bijectors`, leading to speeds closer to and sometimes faster than Stan's. + - An `HMC` initialization bug was fixed. `HMC` initialization in Turing is now consistent with Stan's. + - Sampling from the prior is now possible using `sample`. + - `psample` is now deprecated in favour of `sample(model, sampler, parallel_method, n_samples, n_chains)` where `parallel_method` can be either `MCMCThreads()` or `MCMCDistributed()`. `MCMCThreads` will use your available threads to sample each chain (ensure that you have the environment variable `JULIA_NUM_THREADS` set to the number of threads you want to use), and `MCMCDistributed` will dispatch chain sampling to each available process (you can add processes with `addprocs()`). + - Turing now uses `AdvancedMH.jl` v0.5, which mostly provides behind-the-scenes restructuring. + - Custom expressions and macros can be interpolated in the `@model` definition with `$`; it is possible to use `@.` also for assumptions (non-observed model parameters) and observations. + - The macros `@varinfo`, `@logpdf`, and `@sampler` are removed. Instead, one can access the internal variables `_varinfo`, `_model`, `_sampler`, and `_context` in the `@model` definition. + - Additional constructors for `SMC` and `PG` make it easier to choose the resampling method and threshold. # Release 0.11.0 -- Removed some extraneous imports and dependencies ([#1182](https://github.com/TuringLang/Turing.jl/pull/1182)) -- Minor backend changes to `sample` and `psample`, which now use functions defined upstream in AbstractMCMC.jl ([#1187](https://github.com/TuringLang/Turing.jl/pull/1187)) -- Fix for an AD-related crash ([#1202](https://github.com/TuringLang/Turing.jl/pull/1202)) -- StatsBase compat update to 0.33 ([#1185](https://github.com/TuringLang/Turing.jl/pull/1185)) -- Bugfix for ReverseDiff caching and memoization ([#1208](https://github.com/TuringLang/Turing.jl/pull/1208)) -- BREAKING: `VecBinomialLogit` is now removed. Also `BernoulliLogit` is added ([#1214](https://github.com/TuringLang/Turing.jl/pull/1214)) -- Bugfix for cases where dynamic models were breaking with HMC methods ([#1217](https://github.com/TuringLang/Turing.jl/pull/1217)) -- Updates to allow AdvancedHMC 0.2.23 ([#1218](https://github.com/TuringLang/Turing.jl/pull/1218)) -- Add more informative error messages for SMC ([#900](https://github.com/TuringLang/Turing.jl/pull/900)) + + - Removed some extraneous imports and dependencies ([#1182](https://github.com/TuringLang/Turing.jl/pull/1182)) + - Minor backend changes to `sample` and `psample`, which now use functions defined upstream in AbstractMCMC.jl ([#1187](https://github.com/TuringLang/Turing.jl/pull/1187)) + - Fix for an AD-related crash ([#1202](https://github.com/TuringLang/Turing.jl/pull/1202)) + - StatsBase compat update to 0.33 ([#1185](https://github.com/TuringLang/Turing.jl/pull/1185)) + - Bugfix for ReverseDiff caching and memoization ([#1208](https://github.com/TuringLang/Turing.jl/pull/1208)) + - BREAKING: `VecBinomialLogit` is now removed. Also `BernoulliLogit` is added ([#1214](https://github.com/TuringLang/Turing.jl/pull/1214)) + - Bugfix for cases where dynamic models were breaking with HMC methods ([#1217](https://github.com/TuringLang/Turing.jl/pull/1217)) + - Updates to allow AdvancedHMC 0.2.23 ([#1218](https://github.com/TuringLang/Turing.jl/pull/1218)) + - Add more informative error messages for SMC ([#900](https://github.com/TuringLang/Turing.jl/pull/900)) # Release 0.10.1 -- Fix bug where arrays with mixed integers, floats, and missing values were not being passed to the `MCMCChains.Chains` constructor properly [#1180](https://github.com/TuringLang/Turing.jl/pull/1180). + + - Fix bug where arrays with mixed integers, floats, and missing values were not being passed to the `MCMCChains.Chains` constructor properly [#1180](https://github.com/TuringLang/Turing.jl/pull/1180). # Release 0.10.0 -- Update elliptical slice sampling to use [EllipticalSliceSampling.jl](https://github.com/TuringLang/EllipticalSliceSampling.jl) on the backend. [#1145](https://github.com/TuringLang/Turing.jl/pull/1145). Nothing should change from a front-end perspective -- you can still call `sample(model, ESS(), 1000)`. -- Added default progress loggers in [#1149](https://github.com/TuringLang/Turing.jl/pull/1149). -- The symbols used to define the AD backend have changed to be the lowercase form of the package name used for AD. `forward_diff` is now `forwarddiff`, `reverse_diff` is now `tracker`, and `zygote` and `reversediff` are newly supported (see below). `forward_diff` and `reverse_diff` are deprecated and are slated to be removed. -- Turing now has experimental support for Zygote.jl ([#783](https://github.com/TuringLang/Turing.jl/pull/783)) and ReverseDiff.jl ([#1170](https://github.com/TuringLang/Turing.jl/pull/1170)) AD backends. Both backends are experimental, so please report any bugs you find. Zygote does not allow mutation within your model, so please be aware of this issue. You can enable Zygote with `Turing.setadbackend(:zygote)` and you can enable ReverseDiff with `Turing.setadbackend(:reversediff)`, though to use either you must import the package with `using Zygote` or `using ReverseDiff`. `for` loops are not recommended for ReverseDiff or Zygote -- see [performance tips](https://turinglang.org/dev/docs/using-turing/performancetips#special-care-for-codetrackercode-and-codezygotecode) for more information. -- Fix MH indexing bug [#1135](https://github.com/TuringLang/Turing.jl/pull/1135). -- Fix MH array sampling [#1167](https://github.com/TuringLang/Turing.jl/pull/1167). -- Fix bug in VI where the bijectors where being inverted incorrectly [#1168](https://github.com/TuringLang/Turing.jl/pull/1168). -- The Gibbs sampler handles state better by passing `Transition` structs to the local samplers ([#1169](https://github.com/TuringLang/Turing.jl/pull/1169) and [#1166](https://github.com/TuringLang/Turing.jl/pull/1166)). + + - Update elliptical slice sampling to use [EllipticalSliceSampling.jl](https://github.com/TuringLang/EllipticalSliceSampling.jl) on the backend. [#1145](https://github.com/TuringLang/Turing.jl/pull/1145). Nothing should change from a front-end perspective -- you can still call `sample(model, ESS(), 1000)`. + - Added default progress loggers in [#1149](https://github.com/TuringLang/Turing.jl/pull/1149). + - The symbols used to define the AD backend have changed to be the lowercase form of the package name used for AD. `forward_diff` is now `forwarddiff`, `reverse_diff` is now `tracker`, and `zygote` and `reversediff` are newly supported (see below). `forward_diff` and `reverse_diff` are deprecated and are slated to be removed. + - Turing now has experimental support for Zygote.jl ([#783](https://github.com/TuringLang/Turing.jl/pull/783)) and ReverseDiff.jl ([#1170](https://github.com/TuringLang/Turing.jl/pull/1170)) AD backends. Both backends are experimental, so please report any bugs you find. Zygote does not allow mutation within your model, so please be aware of this issue. You can enable Zygote with `Turing.setadbackend(:zygote)` and you can enable ReverseDiff with `Turing.setadbackend(:reversediff)`, though to use either you must import the package with `using Zygote` or `using ReverseDiff`. `for` loops are not recommended for ReverseDiff or Zygote -- see [performance tips](https://turinglang.org/dev/docs/using-turing/performancetips#special-care-for-codetrackercode-and-codezygotecode) for more information. + - Fix MH indexing bug [#1135](https://github.com/TuringLang/Turing.jl/pull/1135). + - Fix MH array sampling [#1167](https://github.com/TuringLang/Turing.jl/pull/1167). + - Fix bug in VI where the bijectors where being inverted incorrectly [#1168](https://github.com/TuringLang/Turing.jl/pull/1168). + - The Gibbs sampler handles state better by passing `Transition` structs to the local samplers ([#1169](https://github.com/TuringLang/Turing.jl/pull/1169) and [#1166](https://github.com/TuringLang/Turing.jl/pull/1166)). # Release 0.4.0-alpha -- Fix compatibility with Julia 0.6 [#341, #330, #293] -- Support of Stan interface [#343, #326] -- Fix Binomial distribution for gradients. [#311] -- Stochastic gradient Hamiltonian Monte Carlo [#201]; Stochastic gradient Langevin dynamics [#27] -- More particle MCMC family samplers: PIMH & PMMH [#364, #369] -- Disable adaptive resampling for CSMC [#357] -- Fix resampler for SMC [#338] -- Interactive particle MCMC [#334] -- Add type alias CSMC for PG [#333] -- Fix progress meter [#317] + + - Fix compatibility with Julia 0.6 [#341, #330, #293] + - Support of Stan interface [#343, #326] + - Fix Binomial distribution for gradients. [#311] + - Stochastic gradient Hamiltonian Monte Carlo [#201]; Stochastic gradient Langevin dynamics [#27] + - More particle MCMC family samplers: PIMH & PMMH [#364, #369] + - Disable adaptive resampling for CSMC [#357] + - Fix resampler for SMC [#338] + - Interactive particle MCMC [#334] + - Add type alias CSMC for PG [#333] + - Fix progress meter [#317] # Release 0.3 -- NUTS implementation #188 -- HMC: Transforms of ϵ for each variable #67 (replace with introducing mass matrix) -- Finish: Sampler (internal) interface design #107 -- Substantially improve performance of HMC and Gibbs #7 - - Vectorising likelihood computations #117 #255 - - Remove obsolete `randoc`, `randc`? #156 -- Support truncated distribution. #87 -- Refactoring code: Unify VarInfo, Trace, TaskLocalStorage #96 -- Refactoring code: Better gradient interface #97 + + - NUTS implementation #188 + - HMC: Transforms of ϵ for each variable #67 (replace with introducing mass matrix) + - Finish: Sampler (internal) interface design #107 + - Substantially improve performance of HMC and Gibbs #7 + - Vectorising likelihood computations #117 #255 + - Remove obsolete `randoc`, `randc`? #156 + - Support truncated distribution. #87 + - Refactoring code: Unify VarInfo, Trace, TaskLocalStorage #96 + - Refactoring code: Better gradient interface #97 # Release 0.2 -- Gibbs sampler ([#73]) -- HMC for constrained variables ([#66]; no support for varying dimensions) -- Added support for `Mamba.Chain` ([#90]): describe, plot etc. -- New interface design ([#55]), ([#104]) -- Bugfixes and general improvements (e.g. `VarInfo` [#96]) + + - Gibbs sampler ([#73]) + - HMC for constrained variables ([#66]; no support for varying dimensions) + - Added support for `Mamba.Chain` ([#90]): describe, plot etc. + - New interface design ([#55]), ([#104]) + - Bugfixes and general improvements (e.g. `VarInfo` [#96]) # Release 0.1.0 -- Initial support for Hamiltonian Monte Carlo (no support for discrete/constrained variables) -- Require Julia 0.5 -- Bugfixes and general improvements + + - Initial support for Hamiltonian Monte Carlo (no support for discrete/constrained variables) + - Require Julia 0.5 + - Bugfixes and general improvements # Release 0.0.1-0.0.4 -The initial releases of Turing. -- Particle MCMC, SMC, IS -- Implemented [copying for Julia Task](https://github.com/JuliaLang/julia/pull/15078) -- Implemented copy-on-write data structure `TArray` for Tasks + +The initial releases of Turing. + + - Particle MCMC, SMC, IS + - Implemented [copying for Julia Task](https://github.com/JuliaLang/julia/pull/15078) + - Implemented copy-on-write data structure `TArray` for Tasks diff --git a/README.md b/README.md index fa95b3378..4ea12d52e 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,7 @@ [![Build Status](https://github.com/TuringLang/Turing.jl/workflows/Turing-CI/badge.svg)](https://github.com/TuringLang/Turing.jl/actions?query=workflow%3ATuring-CI+branch%3Amaster) [![Coverage Status](https://coveralls.io/repos/github/TuringLang/Turing.jl/badge.svg?branch=master)](https://coveralls.io/github/TuringLang/Turing.jl?branch=master) [![codecov](https://codecov.io/gh/TuringLang/Turing.jl/branch/master/graph/badge.svg?token=OiUBsnDQqf)](https://codecov.io/gh/TuringLang/Turing.jl) -[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac) - +[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor%27s%20Guide-blueviolet)](https://github.com/SciML/ColPrac) ## Getting Started @@ -12,7 +11,6 @@ Turing's home page, with links to everything you'll need to use Turing, is avail https://turinglang.org/docs/ - ## What's changed recently? See [releases](https://github.com/TuringLang/Turing.jl/releases). @@ -25,6 +23,5 @@ You can see the complete list on Github: https://github.com/TuringLang/Turing.jl Turing is an open source project so if you feel you have some relevant skills and are interested in contributing, please get in touch. See the [Contributing](https://turinglang.org/dev/docs/contributing/guide) page for details on the process. You can contribute by opening issues on Github, implementing things yourself, and making a pull request. We would also appreciate example models written using Turing. ## Issues and Discussions -Issues related to bugs and feature requests are welcome on the [issues page](https://github.com/TuringLang/Turing.jl/issues), while discussions and questions about statistical applications and theory should place on the [Discussions page](https://github.com/TuringLang/Turing.jl/discussions) or [our channel](https://julialang.slack.com/messages/turing/) (`#turing`) in the Julia Slack chat. If you do not have an invitation to Julia's Slack, you can get one by going [here](https://julialang.org/slack/). - +Issues related to bugs and feature requests are welcome on the [issues page](https://github.com/TuringLang/Turing.jl/issues), while discussions and questions about statistical applications and theory should place on the [Discussions page](https://github.com/TuringLang/Turing.jl/discussions) or [our channel](https://julialang.slack.com/messages/turing/) (`#turing`) in the Julia Slack chat. If you do not have an invitation to Julia's Slack, you can get one by going [here](https://julialang.org/slack/). diff --git a/benchmarks/benchmarks_suite.jl b/benchmarks/benchmarks_suite.jl index ef00117ee..200380121 100644 --- a/benchmarks/benchmarks_suite.jl +++ b/benchmarks/benchmarks_suite.jl @@ -16,18 +16,17 @@ BenchmarkSuite["constrained"] = BenchmarkGroup(["constrained"]) data = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1] - @model function constrained_test(obs) - p ~ Beta(2,2) - for i = 1:length(obs) + p ~ Beta(2, 2) + for i in 1:length(obs) obs[i] ~ Bernoulli(p) end - p + return p end - -BenchmarkSuite["constrained"]["constrained"] = @benchmarkable sample($(constrained_test(data)), $(HMC(0.01, 2)), 2000) - +BenchmarkSuite["constrained"]["constrained"] = @benchmarkable sample( + $(constrained_test(data)), $(HMC(0.01, 2)), 2000 +) ## gdemo @@ -41,9 +40,9 @@ BenchmarkSuite["gdemo"] = BenchmarkGroup(["gdemo"]) return s², m end -BenchmarkSuite["gdemo"]["hmc"] = @benchmarkable sample($(gdemo(1.5, 2.0)), $(HMC(0.01, 2)), 2000) - - +BenchmarkSuite["gdemo"]["hmc"] = @benchmarkable sample( + $(gdemo(1.5, 2.0)), $(HMC(0.01, 2)), 2000 +) ## MvNormal @@ -52,8 +51,8 @@ BenchmarkSuite["mnormal"] = BenchmarkGroup(["mnormal"]) # Define the target distribution and its gradient @model function target(dim) - Θ = Vector{Real}(undef, dim) - θ ~ MvNormal(zeros(dim), I) + Θ = Vector{Real}(undef, dim) + return θ ~ MvNormal(zeros(dim), I) end # Sampling parameter settings @@ -61,24 +60,29 @@ dim = 10 n_samples = 100_000 n_adapts = 2_000 -BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample($(target(dim)), $(HMC(0.1, 5)), $n_samples) +BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample( + $(target(dim)), $(HMC(0.1, 5)), $n_samples +) ## MvNormal: ForwardDiff vs ReverseDiff @model function mdemo(d, N) Θ = Vector(undef, N) - for n=1:N - Θ[n] ~ d - end + for n in 1:N + Θ[n] ~ d + end end dim2 = 250 -A = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2))); -d = MvNormal(zeros(dim2), A) +A = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2))); +d = MvNormal(zeros(dim2), A) # ForwardDiff -BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000) - +BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample( + $(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000 +) # ReverseDiff -BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000) +BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample( + $(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000 +) diff --git a/benchmarks/models/hlr.jl b/benchmarks/models/hlr.jl index 36836263f..b40c3b2c2 100644 --- a/benchmarks/models/hlr.jl +++ b/benchmarks/models/hlr.jl @@ -10,15 +10,14 @@ end x, y = readlrdata() @model function hlr_nuts(x, y, θ) + N, D = size(x) - N,D = size(x) - - σ² ~ Exponential(θ) + σ² ~ Exponential(θ) α ~ Normal(0, sqrt(σ²)) β ~ MvNormal(zeros(D), σ² * I) - for n = 1:N - y[n] ~ BinomialLogit(1, dot(x[n,:], β) + α) + for n in 1:N + y[n] ~ BinomialLogit(1, dot(x[n, :], β) + α) end end @@ -26,4 +25,6 @@ end n_samples = 10_000 # Sampling -BenchmarkSuite["nuts"]["hrl"] = @benchmarkable sample(hlr_nuts(x, y, 1/0.1), NUTS(0.65), n_samples) +BenchmarkSuite["nuts"]["hrl"] = @benchmarkable sample( + hlr_nuts(x, y, 1 / 0.1), NUTS(0.65), n_samples +) diff --git a/benchmarks/models/lr.jl b/benchmarks/models/lr.jl index c98963e79..3714a93d0 100644 --- a/benchmarks/models/lr.jl +++ b/benchmarks/models/lr.jl @@ -10,14 +10,13 @@ end X, Y = readlrdata() @model function lr_nuts(x, y, σ) - - N,D = size(x) + N, D = size(x) α ~ Normal(0, σ) β ~ MvNormal(zeros(D), σ^2 * I) - for n = 1:N - y[n] ~ BinomialLogit(1, dot(x[n,:], β) + α) + for n in 1:N + y[n] ~ BinomialLogit(1, dot(x[n, :], β) + α) end end @@ -26,5 +25,6 @@ n_samples = 1_000 n_adapts = 1_000 # Sampling -BenchmarkSuite["nuts"]["lr"] = @benchmarkable sample(lr_nuts(X, Y, 100), - NUTS(0.65), n_samples) +BenchmarkSuite["nuts"]["lr"] = @benchmarkable sample( + lr_nuts(X, Y, 100), NUTS(0.65), n_samples +) diff --git a/benchmarks/models/lr_helper.jl b/benchmarks/models/lr_helper.jl index 20f6bf3f1..83de134c8 100644 --- a/benchmarks/models/lr_helper.jl +++ b/benchmarks/models/lr_helper.jl @@ -1,10 +1,9 @@ using DelimitedFiles function readlrdata() - fname = joinpath(dirname(@__FILE__), "lr_nuts.data") z = readdlm(fname) - x = z[:,1:end-1] - y = z[:,end] .- 1 + x = z[:, 1:(end - 1)] + y = z[:, end] .- 1 return x, y end diff --git a/benchmarks/models/sv_nuts.jl b/benchmarks/models/sv_nuts.jl index c54dade69..697625dce 100644 --- a/benchmarks/models/sv_nuts.jl +++ b/benchmarks/models/sv_nuts.jl @@ -6,26 +6,25 @@ if !haskey(BenchmarkSuite, "nuts") end fname = joinpath(dirname(@__FILE__), "sv_nuts.data") -y, header = readdlm(fname, ',', header=true) +y, header = readdlm(fname, ','; header=true) # Stochastic volatility (SV) @model function sv_nuts(y, dy, ::Type{T}=Vector{Float64}) where {T} - N = size(y,1) + N = size(y, 1) - τ ~ Exponential(1/100) - ν ~ Exponential(1/100) + τ ~ Exponential(1 / 100) + ν ~ Exponential(1 / 100) s = T(undef, N) - s[1] ~ Exponential(1/100) + s[1] ~ Exponential(1 / 100) for n in 2:N - s[n] ~ Normal(log(s[n-1]), τ) + s[n] ~ Normal(log(s[n - 1]), τ) s[n] = exp(s[n]) - dy = log(y[n] / y[n-1]) / s[n] - dy ~ TDist(ν) + dy = log(y[n] / y[n - 1]) / s[n] + dy ~ TDist(ν) end end - # Sampling parameter settings n_samples = 10_000 diff --git a/docs/README.md b/docs/README.md index 2bc2ad683..7a1ad9119 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,5 +1,5 @@ -Turing's documentation in this directory is in markdown format. +Turing's documentation in this directory is in markdown format. If you want to build the doc locally, please refer to the [README](https://github.com/TuringLang/turinglang.github.io) file in [turinglang.github.io](https://github.com/TuringLang/turinglang.github.io). -Please also visit [this repo](https://github.com/TuringLang/TuringTutorials/tree/master/tutorials) for the docs. +Please also visit [this repo](https://github.com/TuringLang/TuringTutorials/tree/master/tutorials) for the docs. diff --git a/docs/src/library/advancedhmc.md b/docs/src/library/advancedhmc.md index 5742e4cff..84f712f4d 100644 --- a/docs/src/library/advancedhmc.md +++ b/docs/src/library/advancedhmc.md @@ -22,4 +22,4 @@ Order = [:function] ```@autodocs Modules = [AdvancedHMC] Order = [:type] -``` \ No newline at end of file +``` diff --git a/docs/src/library/api.md b/docs/src/library/api.md index 53a946ceb..c598820b7 100644 --- a/docs/src/library/api.md +++ b/docs/src/library/api.md @@ -7,6 +7,7 @@ toc: true ```@meta CurrentModule = Turing ``` + ## Index ```@index diff --git a/docs/src/library/bijectors.md b/docs/src/library/bijectors.md index 9fb4eaecf..471da45fe 100644 --- a/docs/src/library/bijectors.md +++ b/docs/src/library/bijectors.md @@ -22,4 +22,4 @@ Order = [:function] ```@autodocs Modules = [Bijectors] Order = [:type] -``` \ No newline at end of file +``` diff --git a/ext/TuringDynamicHMCExt.jl b/ext/TuringDynamicHMCExt.jl index 27bca72e1..c82f237c0 100644 --- a/ext/TuringDynamicHMCExt.jl +++ b/ext/TuringDynamicHMCExt.jl @@ -3,9 +3,8 @@ module TuringDynamicHMCExt ### DynamicHMC backend - https://github.com/tpapp/DynamicHMC.jl ### - if isdefined(Base, :get_extension) - import DynamicHMC + using DynamicHMC: DynamicHMC using Turing using Turing: AbstractMCMC, Random, LogDensityProblems, DynamicPPL using Turing.Inference: ADTypes, LogDensityProblemsAD, TYPEDFIELDS @@ -13,7 +12,7 @@ else import ..DynamicHMC using ..Turing using ..Turing: AbstractMCMC, Random, LogDensityProblems, DynamicPPL - using ..Turing.Inference: ADTypes, LogDensityProblemsAD, TYPEDFIELDS + using ..Turing.Inference: ADTypes, LogDensityProblemsAD, TYPEDFIELDS end """ @@ -25,22 +24,22 @@ To use it, make sure you have DynamicHMC package (version >= 2) loaded: ```julia using DynamicHMC ``` -""" +""" struct DynamicNUTS{AD,space,T<:DynamicHMC.NUTS} <: Turing.Inference.Hamiltonian sampler::T adtype::AD end function DynamicNUTS( - spl::DynamicHMC.NUTS = DynamicHMC.NUTS(), - space::Tuple = (); - adtype::ADTypes.AbstractADType = Turing.DEFAULT_ADTYPE + spl::DynamicHMC.NUTS=DynamicHMC.NUTS(), + space::Tuple=(); + adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) return DynamicNUTS{typeof(adtype),space,typeof(spl)}(spl, adtype) end Turing.externalsampler(spl::DynamicHMC.NUTS) = DynamicNUTS(spl) -DynamicPPL.getspace(::DynamicNUTS{<:Any, space}) where {space} = space +DynamicPPL.getspace(::DynamicNUTS{<:Any,space}) where {space} = space """ DynamicNUTSState @@ -59,14 +58,16 @@ struct DynamicNUTSState{L,V<:DynamicPPL.AbstractVarInfo,C,M,S} stepsize::S end -DynamicPPL.initialsampler(::DynamicPPL.Sampler{<:DynamicNUTS}) = DynamicPPL.SampleFromUniform() +function DynamicPPL.initialsampler(::DynamicPPL.Sampler{<:DynamicNUTS}) + return DynamicPPL.SampleFromUniform() +end function DynamicPPL.initialstep( rng::Random.AbstractRNG, model::DynamicPPL.Model, spl::DynamicPPL.Sampler{<:DynamicNUTS}, vi::DynamicPPL.AbstractVarInfo; - kwargs... + kwargs..., ) # Ensure that initial sample is in unconstrained space. if !DynamicPPL.islinked(vi, spl) @@ -75,15 +76,13 @@ function DynamicPPL.initialstep( end # Define log-density function. - ℓ = LogDensityProblemsAD.ADgradient(Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext())) + ℓ = LogDensityProblemsAD.ADgradient( + Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()) + ) # Perform initial step. results = DynamicHMC.mcmc_keep_warmup( - rng, - ℓ, - 0; - initialization = (q = vi[spl],), - reporter = DynamicHMC.NoProgressReport(), + rng, ℓ, 0; initialization=(q=vi[spl],), reporter=DynamicHMC.NoProgressReport() ) steps = DynamicHMC.mcmc_steps(results.sampling_logdensity, results.final_warmup_state) Q, _ = DynamicHMC.mcmc_next_step(steps, results.final_warmup_state.Q) @@ -104,18 +103,12 @@ function AbstractMCMC.step( model::DynamicPPL.Model, spl::DynamicPPL.Sampler{<:DynamicNUTS}, state::DynamicNUTSState; - kwargs... + kwargs..., ) # Compute next sample. vi = state.vi ℓ = state.logdensity - steps = DynamicHMC.mcmc_steps( - rng, - spl.alg.sampler, - state.metric, - ℓ, - state.stepsize, - ) + steps = DynamicHMC.mcmc_steps(rng, spl.alg.sampler, state.metric, ℓ, state.stepsize) Q, _ = DynamicHMC.mcmc_next_step(steps, state.cache) # Update the variables. diff --git a/ext/TuringOptimExt.jl b/ext/TuringOptimExt.jl index d95bdde33..3d31af6f2 100644 --- a/ext/TuringOptimExt.jl +++ b/ext/TuringOptimExt.jl @@ -1,20 +1,12 @@ module TuringOptimExt if isdefined(Base, :get_extension) - import Turing - import Turing: - DynamicPPL, - NamedArrays, - Accessors, - Optimisation - import Optim + using Turing: Turing + import Turing: DynamicPPL, NamedArrays, Accessors, Optimisation + using Optim: Optim else import ..Turing - import ..Turing: - DynamicPPL, - NamedArrays, - Accessors, - Optimisation + import ..Turing: DynamicPPL, NamedArrays, Accessors, Optimisation import ..Optim end @@ -43,8 +35,10 @@ mle = optimize(model, MLE(), NelderMead()) ``` """ function Optim.optimize( - model::DynamicPPL.Model, ::Optimisation.MLE, options::Optim.Options=Optim.Options(); - kwargs... + model::DynamicPPL.Model, + ::Optimisation.MLE, + options::Optim.Options=Optim.Options(); + kwargs..., ) ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) f = Optimisation.OptimLogDensity(model, ctx) @@ -57,7 +51,7 @@ function Optim.optimize( ::Optimisation.MLE, init_vals::AbstractArray, options::Optim.Options=Optim.Options(); - kwargs... + kwargs..., ) optimizer = Optim.LBFGS() return _mle_optimize(model, init_vals, optimizer, options; kwargs...) @@ -67,7 +61,7 @@ function Optim.optimize( ::Optimisation.MLE, optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options(); - kwargs... + kwargs..., ) ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) f = Optimisation.OptimLogDensity(model, ctx) @@ -80,7 +74,7 @@ function Optim.optimize( init_vals::AbstractArray, optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options(); - kwargs... + kwargs..., ) return _mle_optimize(model, init_vals, optimizer, options; kwargs...) end @@ -111,8 +105,10 @@ map_est = optimize(model, MAP(), NelderMead()) ``` """ function Optim.optimize( - model::DynamicPPL.Model, ::Optimisation.MAP, options::Optim.Options=Optim.Options(); - kwargs... + model::DynamicPPL.Model, + ::Optimisation.MAP, + options::Optim.Options=Optim.Options(); + kwargs..., ) ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) f = Optimisation.OptimLogDensity(model, ctx) @@ -125,7 +121,7 @@ function Optim.optimize( ::Optimisation.MAP, init_vals::AbstractArray, options::Optim.Options=Optim.Options(); - kwargs... + kwargs..., ) optimizer = Optim.LBFGS() return _map_optimize(model, init_vals, optimizer, options; kwargs...) @@ -135,7 +131,7 @@ function Optim.optimize( ::Optimisation.MAP, optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options(); - kwargs... + kwargs..., ) ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) f = Optimisation.OptimLogDensity(model, ctx) @@ -148,7 +144,7 @@ function Optim.optimize( init_vals::AbstractArray, optimizer::Optim.AbstractOptimizer, options::Optim.Options=Optim.Options(); - kwargs... + kwargs..., ) return _map_optimize(model, init_vals, optimizer, options; kwargs...) end @@ -170,7 +166,7 @@ function _optimize( optimizer::Optim.AbstractOptimizer=Optim.LBFGS(), options::Optim.Options=Optim.Options(), args...; - kwargs... + kwargs..., ) # Convert the initial values, since it is assumed that users provide them # in the constrained space. diff --git a/src/Turing.jl b/src/Turing.jl index 4aa2fef2c..e3ba2f81d 100644 --- a/src/Turing.jl +++ b/src/Turing.jl @@ -7,19 +7,19 @@ using Libtask @reexport using Distributions, MCMCChains, Libtask, AbstractMCMC, Bijectors using Compat: pkgversion -import AdvancedVI +using AdvancedVI: AdvancedVI using DynamicPPL: DynamicPPL, LogDensityFunction import DynamicPPL: getspace, NoDist, NamedDist -import LogDensityProblems -import NamedArrays -import Accessors -import StatsAPI -import StatsBase +using LogDensityProblems: LogDensityProblems +using NamedArrays: NamedArrays +using Accessors: Accessors +using StatsAPI: StatsAPI +using StatsBase: StatsBase using Accessors: Accessors -import Printf -import Random +using Printf: Printf +using Random: Random using ADTypes: ADTypes @@ -62,87 +62,82 @@ include("deprecated.jl") # to be removed in the next minor version release # Exports # ########### # `using` statements for stuff to re-export -using DynamicPPL: pointwise_loglikelihoods, generated_quantities, logprior, logjoint, condition, decondition, fix, unfix, conditioned +using DynamicPPL: + pointwise_loglikelihoods, + generated_quantities, + logprior, + logjoint, + condition, + decondition, + fix, + unfix, + conditioned using StatsBase: predict using Bijectors: ordered using OrderedCollections: OrderedDict # Turing essentials - modelling macros and inference algorithms -export @model, # modelling - @varname, - @submodel, - DynamicPPL, - - Prior, # Sampling from the prior - - MH, # classic sampling - Emcee, - ESS, - Gibbs, - GibbsConditional, - - HMC, # Hamiltonian-like sampling - SGLD, - SGHMC, - HMCDA, - NUTS, - DynamicNUTS, - ANUTS, - - PolynomialStepsize, - - IS, # particle-based sampling - SMC, - CSMC, - PG, - - vi, # variational inference - ADVI, - - sample, # inference - @logprob_str, - @prob_str, - externalsampler, - - AutoForwardDiff, # ADTypes - AutoReverseDiff, - AutoZygote, - AutoTracker, - - setprogress!, # debugging - - Flat, - FlatPos, - BinomialLogit, - BernoulliLogit, # Part of Distributions >= 0.25.77 - OrderedLogistic, - LogPoisson, - filldist, - arraydist, - - NamedDist, # Exports from DynamicPPL - predict, - pointwise_loglikelihoods, - elementwise_loglikelihoods, - generated_quantities, - logprior, - logjoint, - LogDensityFunction, - - condition, - decondition, - fix, - unfix, - conditioned, - OrderedDict, - - ordered, # Exports from Bijectors - - maximum_a_posteriori, - maximum_likelihood, - # The MAP and MLE exports are only needed for the Optim.jl interface. - MAP, - MLE +export @model, # modelling + @varname, + @submodel, + DynamicPPL, + Prior, # Sampling from the prior + MH, # classic sampling + Emcee, + ESS, + Gibbs, + GibbsConditional, + HMC, # Hamiltonian-like sampling + SGLD, + SGHMC, + HMCDA, + NUTS, + DynamicNUTS, + ANUTS, + PolynomialStepsize, + IS, # particle-based sampling + SMC, + CSMC, + PG, + vi, # variational inference + ADVI, + sample, # inference + @logprob_str, + @prob_str, + externalsampler, + AutoForwardDiff, # ADTypes + AutoReverseDiff, + AutoZygote, + AutoTracker, + setprogress!, # debugging + Flat, + FlatPos, + BinomialLogit, + BernoulliLogit, # Part of Distributions >= 0.25.77 + OrderedLogistic, + LogPoisson, + filldist, + arraydist, + NamedDist, # Exports from DynamicPPL + predict, + pointwise_loglikelihoods, + elementwise_loglikelihoods, + generated_quantities, + logprior, + logjoint, + LogDensityFunction, + condition, + decondition, + fix, + unfix, + conditioned, + OrderedDict, + ordered, # Exports from Bijectors + maximum_a_posteriori, + maximum_likelihood, + # The MAP and MLE exports are only needed for the Optim.jl interface. + MAP, + MLE # AutoTapir is only supported by ADTypes v1.0 and above. @static if VERSION >= v"1.10" && pkgversion(ADTypes) >= v"1" @@ -155,9 +150,13 @@ end function __init__() @static if !isdefined(Base, :get_extension) - @require Optim="429524aa-4258-5aef-a3af-852621145aeb" include("../ext/TuringOptimExt.jl") - @require DynamicHMC="bbc10e6e-7c05-544b-b16e-64fede858acb" include("../ext/TuringDynamicHMCExt.jl") - end + @require Optim = "429524aa-4258-5aef-a3af-852621145aeb" include( + "../ext/TuringOptimExt.jl" + ) + @require DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" include( + "../ext/TuringDynamicHMCExt.jl" + ) + end end end diff --git a/src/deprecated.jl b/src/deprecated.jl index 76f5854cf..34305d442 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,23 +1,39 @@ export setadbackend, setchunksize, setadsafe -function setadbackend(::Union{Symbol, Val}) - Base.depwarn("`ADBACKEND` and `setbackend` are deprecated. Please specify the chunk size directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoForwardDiff())`.\n This function has no effects.", :setbackend; force=true) - nothing +function setadbackend(::Union{Symbol,Val}) + Base.depwarn( + "`ADBACKEND` and `setbackend` are deprecated. Please specify the chunk size directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoForwardDiff())`.\n This function has no effects.", + :setbackend; + force=true, + ) + return nothing end function setchunksize(::Int) - Base.depwarn("`CHUNKSIZE` and `setchunksize` are deprecated. Please specify the chunk size directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoForwardDiff())`.\n This function has no effects.", :setchunksize; force=true) - nothing + Base.depwarn( + "`CHUNKSIZE` and `setchunksize` are deprecated. Please specify the chunk size directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoForwardDiff())`.\n This function has no effects.", + :setchunksize; + force=true, + ) + return nothing end -function setrdcache(::Union{Bool, Val}) - Base.depwarn("`RDCACHE` and `setrdcache` are deprecated. Please specify if you wish to use compiled tape for ReverseDiff directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoReverseDiff(false))`.\n This function has no effects.", :setrdcache; force=true) - nothing +function setrdcache(::Union{Bool,Val}) + Base.depwarn( + "`RDCACHE` and `setrdcache` are deprecated. Please specify if you wish to use compiled tape for ReverseDiff directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoReverseDiff(false))`.\n This function has no effects.", + :setrdcache; + force=true, + ) + return nothing end function setadsafe(::Bool) - Base.depwarn("`ADSAFE` and `setadsafe` are outdated and no longer in use.", :setadsafe; force=true) - nothing + Base.depwarn( + "`ADSAFE` and `setadsafe` are outdated and no longer in use.", + :setadsafe; + force=true, + ) + return nothing end Base.@deprecate_binding Core Essential false diff --git a/src/essential/Essential.jl b/src/essential/Essential.jl index 8cef7a681..12413bef2 100644 --- a/src/essential/Essential.jl +++ b/src/essential/Essential.jl @@ -13,35 +13,35 @@ using StatsFuns: logsumexp, softmax @reexport using DynamicPPL using ADTypes: ADTypes, AutoForwardDiff, AutoTracker, AutoReverseDiff, AutoZygote -import AdvancedPS +using AdvancedPS: AdvancedPS include("container.jl") -export @model, - @varname, - generate_observe, - translate_tilde!, - get_vars, - get_data, - get_default_values, - ParticleContainer, - Particle, - Trace, - fork, - forkr, - current_trace, - getweights, - getweight, - effectiveSampleSize, - sweep!, - ResampleWithESSThreshold, - AutoForwardDiff, - AutoTracker, - AutoZygote, - AutoReverseDiff, - value, - @logprob_str, - @prob_str +export @model, + @varname, + generate_observe, + translate_tilde!, + get_vars, + get_data, + get_default_values, + ParticleContainer, + Particle, + Trace, + fork, + forkr, + current_trace, + getweights, + getweight, + effectiveSampleSize, + sweep!, + ResampleWithESSThreshold, + AutoForwardDiff, + AutoTracker, + AutoZygote, + AutoReverseDiff, + value, + @logprob_str, + @prob_str # AutoTapir is only supported by ADTypes v1.0 and above. @static if VERSION >= v"1.10" && pkgversion(ADTypes) >= v"1" diff --git a/src/essential/container.jl b/src/essential/container.jl index 483270c9f..bd4e21f6b 100644 --- a/src/essential/container.jl +++ b/src/essential/container.jl @@ -1,4 +1,5 @@ -struct TracedModel{S<:AbstractSampler,V<:AbstractVarInfo,M<:Model,E<:Tuple} <: AdvancedPS.AbstractGenericModel +struct TracedModel{S<:AbstractSampler,V<:AbstractVarInfo,M<:Model,E<:Tuple} <: + AdvancedPS.AbstractGenericModel model::M sampler::S varinfo::V @@ -14,26 +15,24 @@ function TracedModel( context = SamplingContext(rng, sampler, DefaultContext()) args, kwargs = DynamicPPL.make_evaluate_args_and_kwargs(model, varinfo, context) if kwargs !== nothing && !isempty(kwargs) - error("Sampling with `$(sampler.alg)` does not support models with keyword arguments. See issue #2007 for more details.") + error( + "Sampling with `$(sampler.alg)` does not support models with keyword arguments. See issue #2007 for more details.", + ) end return TracedModel{AbstractSampler,AbstractVarInfo,Model,Tuple}( - model, - sampler, - varinfo, - (model.f, args...) + model, sampler, varinfo, (model.f, args...) ) end function AdvancedPS.advance!( - trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}}, - isref::Bool=false + trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}}, isref::Bool=false ) # Make sure we load/reset the rng in the new replaying mechanism DynamicPPL.increment_num_produce!(trace.model.f.varinfo) isref ? AdvancedPS.load_state!(trace.rng) : AdvancedPS.save_state!(trace.rng) score = consume(trace.model.ctask) if score === nothing - return + return nothing else return score + DynamicPPL.getlogp(trace.model.f.varinfo) end @@ -54,7 +53,9 @@ function AdvancedPS.reset_logprob!(trace::TracedModel) return trace end -function AdvancedPS.update_rng!(trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}}) +function AdvancedPS.update_rng!( + trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}} +) # Extract the `args`. args = trace.model.ctask.args # From `args`, extract the `SamplingContext`, which contains the RNG. diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl index 49b9aab57..4ebe49ea5 100644 --- a/src/mcmc/emcee.jl +++ b/src/mcmc/emcee.jl @@ -2,7 +2,7 @@ ### Sampler states ### -struct Emcee{space, E<:AMH.Ensemble} <: InferenceAlgorithm +struct Emcee{space,E<:AMH.Ensemble} <: InferenceAlgorithm ensemble::E end @@ -12,7 +12,7 @@ function Emcee(n_walkers::Int, stretch_length=2.0) # ensemble sampling. prop = AMH.StretchProposal(nothing, stretch_length) ensemble = AMH.Ensemble(n_walkers, prop) - return Emcee{(), typeof(ensemble)}(ensemble) + return Emcee{(),typeof(ensemble)}(ensemble) end struct EmceeState{V<:AbstractVarInfo,S} @@ -24,9 +24,9 @@ function AbstractMCMC.step( rng::Random.AbstractRNG, model::Model, spl::Sampler{<:Emcee}; - resume_from = nothing, - initial_params = nothing, - kwargs... + resume_from=nothing, + initial_params=nothing, + kwargs..., ) if resume_from !== nothing state = loadstate(resume_from) @@ -39,9 +39,8 @@ function AbstractMCMC.step( # Update the parameters if provided. if initial_params !== nothing - length(initial_params) == n || throw( - ArgumentError("initial parameters have to be specified for each walker") - ) + length(initial_params) == n || + throw(ArgumentError("initial parameters have to be specified for each walker")) vis = map(vis, initial_params) do vi, init vi = DynamicPPL.initialize_parameters!!(vi, init, spl, model) @@ -59,18 +58,14 @@ function AbstractMCMC.step( map(vis) do vi vi = DynamicPPL.link!!(vi, spl, model) AMH.Transition(vi[spl], getlogp(vi), false) - end + end, ) return transition, state end function AbstractMCMC.step( - rng::AbstractRNG, - model::Model, - spl::Sampler{<:Emcee}, - state::EmceeState; - kwargs... + rng::AbstractRNG, model::Model, spl::Sampler{<:Emcee}, state::EmceeState; kwargs... ) # Generate a log joint function. vi = state.vi @@ -98,11 +93,11 @@ function AbstractMCMC.bundle_samples( spl::Sampler{<:Emcee}, state::EmceeState, chain_type::Type{MCMCChains.Chains}; - save_state = false, - sort_chain = false, - discard_initial = 0, - thinning = 1, - kwargs... + save_state=false, + sort_chain=false, + discard_initial=0, + thinning=1, + kwargs..., ) # Convert transitions to array format. # Also retrieve the variable names. @@ -134,9 +129,9 @@ function AbstractMCMC.bundle_samples( le = getlogevidence(samples, state, spl) # Set up the info tuple. - info = (varname_to_symbol = OrderedDict(zip(varnames, varnames_symbol)),) + info = (varname_to_symbol=OrderedDict(zip(varnames, varnames_symbol)),) if save_state - info = merge(info, (model = model, sampler = spl, samplerstate = state)) + info = merge(info, (model=model, sampler=spl, samplerstate=state)) end # Concretize the array before giving it to MCMCChains. @@ -146,7 +141,7 @@ function AbstractMCMC.bundle_samples( chain = MCMCChains.Chains( parray, nms, - (internals = extra_params,); + (internals=extra_params,); evidence=le, info=info, start=discard_initial + 1, diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl index a12d683f5..2910a7efd 100644 --- a/src/mcmc/ess.jl +++ b/src/mcmc/ess.jl @@ -27,11 +27,7 @@ ESS(space::Symbol) = ESS{(space,)}() # always accept in the first step function DynamicPPL.initialstep( - rng::AbstractRNG, - model::Model, - spl::Sampler{<:ESS}, - vi::AbstractVarInfo; - kwargs... + rng::AbstractRNG, model::Model, spl::Sampler{<:ESS}, vi::AbstractVarInfo; kwargs... ) # Sanity check vns = _getvns(vi, spl) @@ -47,11 +43,7 @@ function DynamicPPL.initialstep( end function AbstractMCMC.step( - rng::AbstractRNG, - model::Model, - spl::Sampler{<:ESS}, - vi::AbstractVarInfo; - kwargs... + rng::AbstractRNG, model::Model, spl::Sampler{<:ESS}, vi::AbstractVarInfo; kwargs... ) # obtain previous sample f = vi[spl] @@ -64,7 +56,8 @@ function AbstractMCMC.step( sample, state = AbstractMCMC.step( rng, EllipticalSliceSampling.ESSModel( - ESSPrior(model, spl, vi), Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()), + ESSPrior(model, spl, vi), + Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()), ), EllipticalSliceSampling.ESS(), oldstate, @@ -83,10 +76,10 @@ struct ESSPrior{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo,T} sampler::S varinfo::V μ::T - - function ESSPrior{M,S,V}(model::M, sampler::S, varinfo::V) where { - M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo - } + + function ESSPrior{M,S,V}( + model::M, sampler::S, varinfo::V + ) where {M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} vns = _getvns(varinfo, sampler) μ = mapreduce(vcat, vns[1]) do vn dist = getdist(varinfo, vn) @@ -99,9 +92,7 @@ struct ESSPrior{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo,T} end function ESSPrior(model::Model, sampler::Sampler{<:ESS}, varinfo::AbstractVarInfo) - return ESSPrior{typeof(model),typeof(sampler),typeof(varinfo)}( - model, sampler, varinfo, - ) + return ESSPrior{typeof(model),typeof(sampler),typeof(varinfo)}(model, sampler, varinfo) end # Ensure that the prior is a Gaussian distribution (checked in the constructor) @@ -124,7 +115,9 @@ end Distributions.mean(p::ESSPrior) = p.μ # Evaluate log-likelihood of proposals -const ESSLogLikelihood{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} = Turing.LogDensityFunction{V,M,<:DynamicPPL.SamplingContext{<:S}} +const ESSLogLikelihood{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} = Turing.LogDensityFunction{ + V,M,<:DynamicPPL.SamplingContext{<:S} +} function (ℓ::ESSLogLikelihood)(f::AbstractVector) sampler = DynamicPPL.getsampler(ℓ) @@ -133,7 +126,9 @@ function (ℓ::ESSLogLikelihood)(f::AbstractVector) return getlogp(varinfo) end -function DynamicPPL.tilde_assume(rng::Random.AbstractRNG, ctx::DefaultContext, sampler::Sampler{<:ESS}, right, vn, vi) +function DynamicPPL.tilde_assume( + rng::Random.AbstractRNG, ctx::DefaultContext, sampler::Sampler{<:ESS}, right, vn, vi +) return if inspace(vn, sampler) DynamicPPL.tilde_assume(rng, LikelihoodContext(), SampleFromPrior(), right, vn, vi) else @@ -141,19 +136,33 @@ function DynamicPPL.tilde_assume(rng::Random.AbstractRNG, ctx::DefaultContext, s end end -function DynamicPPL.tilde_observe(ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi) +function DynamicPPL.tilde_observe( + ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi +) return DynamicPPL.tilde_observe(ctx, SampleFromPrior(), right, left, vi) end -function DynamicPPL.dot_tilde_assume(rng::Random.AbstractRNG, ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vns, vi) +function DynamicPPL.dot_tilde_assume( + rng::Random.AbstractRNG, + ctx::DefaultContext, + sampler::Sampler{<:ESS}, + right, + left, + vns, + vi, +) # TODO: Or should we do `all(Base.Fix2(inspace, sampler), vns)`? return if inspace(first(vns), sampler) - DynamicPPL.dot_tilde_assume(rng, LikelihoodContext(), SampleFromPrior(), right, left, vns, vi) + DynamicPPL.dot_tilde_assume( + rng, LikelihoodContext(), SampleFromPrior(), right, left, vns, vi + ) else DynamicPPL.dot_tilde_assume(rng, ctx, SampleFromPrior(), right, left, vns, vi) end end -function DynamicPPL.dot_tilde_observe(ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi) +function DynamicPPL.dot_tilde_observe( + ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi +) return DynamicPPL.dot_tilde_observe(ctx, SampleFromPrior(), right, left, vi) end diff --git a/src/mcmc/gibbs.jl b/src/mcmc/gibbs.jl index 2a7572ffa..815566437 100644 --- a/src/mcmc/gibbs.jl +++ b/src/mcmc/gibbs.jl @@ -2,7 +2,6 @@ ### Gibbs samplers / compositional samplers. ### - """ isgibbscomponent(alg) @@ -16,7 +15,7 @@ isgibbscomponent(::Hamiltonian) = true isgibbscomponent(::MH) = true isgibbscomponent(::PG) = true -const TGIBBS = Union{InferenceAlgorithm, GibbsConditional} +const TGIBBS = Union{InferenceAlgorithm,GibbsConditional} """ Gibbs(algs...) @@ -47,12 +46,15 @@ Tips: methods like Particle Gibbs. You can increase the effectiveness of particle sampling by including more particles in the particle sampler. """ -struct Gibbs{space, N, A<:NTuple{N, TGIBBS}, B<:NTuple{N, Int}} <: InferenceAlgorithm +struct Gibbs{space,N,A<:NTuple{N,TGIBBS},B<:NTuple{N,Int}} <: InferenceAlgorithm algs::A # component sampling algorithms iterations::B - function Gibbs{space, N, A, B}(algs::A, iterations::B) where {space, N, A<:NTuple{N, TGIBBS}, B<:NTuple{N, Int}} - all(isgibbscomponent, algs) || error("all algorithms have to support Gibbs sampling") - return new{space, N, A, B}(algs, iterations) + function Gibbs{space,N,A,B}( + algs::A, iterations::B + ) where {space,N,A<:NTuple{N,TGIBBS},B<:NTuple{N,Int}} + all(isgibbscomponent, algs) || + error("all algorithms have to support Gibbs sampling") + return new{space,N,A,B}(algs, iterations) end end @@ -61,19 +63,18 @@ function Gibbs(alg1::TGIBBS, algrest::Vararg{TGIBBS,N}) where {N} iterations = ntuple(Returns(1), Val(N + 1)) # obtain space for sampling algorithms space = Tuple(union(getspace.(algs)...)) - return Gibbs{space, N + 1, typeof(algs), typeof(iterations)}(algs, iterations) + return Gibbs{space,N + 1,typeof(algs),typeof(iterations)}(algs, iterations) end function Gibbs( - arg1::Tuple{<:TGIBBS,Int}, - argrest::Vararg{<:Tuple{<:TGIBBS,Int}, N}, + arg1::Tuple{<:TGIBBS,Int}, argrest::Vararg{<:Tuple{<:TGIBBS,Int},N} ) where {N} allargs = (arg1, argrest...) algs = map(first, allargs) iterations = map(last, allargs) # obtain space for sampling algorithms space = Tuple(union(getspace.(algs)...)) - return Gibbs{space, N + 1, typeof(algs), typeof(iterations)}(algs, iterations) + return Gibbs{space,N + 1,typeof(algs),typeof(iterations)}(algs, iterations) end """ @@ -110,14 +111,13 @@ Return an updated state, taking into account the variables sampled by other Gibb - `varinfo`: the variables, including the ones sampled by other Gibbs components. """ gibbs_state(model, sampler, state::AbstractVarInfo, varinfo::AbstractVarInfo) = varinfo -gibbs_state(model, sampler, state::PGState, varinfo::AbstractVarInfo) = PGState(varinfo, state.rng) +function gibbs_state(model, sampler, state::PGState, varinfo::AbstractVarInfo) + return PGState(varinfo, state.rng) +end # Update state in Gibbs sampling function gibbs_state( - model::Model, - spl::Sampler{<:Hamiltonian}, - state::HMCState, - varinfo::AbstractVarInfo, + model::Model, spl::Sampler{<:Hamiltonian}, state::HMCState, varinfo::AbstractVarInfo ) # Update hamiltonian θ_old = varinfo[spl] @@ -159,11 +159,7 @@ gibbs_rerun(prev_alg, ::PG) = false # Initialize the Gibbs sampler. function DynamicPPL.initialstep( - rng::AbstractRNG, - model::Model, - spl::Sampler{<:Gibbs}, - vi::AbstractVarInfo; - kwargs... + rng::AbstractRNG, model::Model, spl::Sampler{<:Gibbs}, vi::AbstractVarInfo; kwargs... ) # TODO: Technically this only works for `VarInfo` or `ThreadSafeVarInfo{<:VarInfo}`. # Should we enforce this? @@ -176,7 +172,7 @@ function DynamicPPL.initialstep( if i == 1 prev_alg = algs[end] else - prev_alg = algs[i-1] + prev_alg = algs[i - 1] end rerun = gibbs_rerun(prev_alg, alg) selector = DynamicPPL.Selector(Symbol(typeof(alg)), rerun) @@ -202,7 +198,11 @@ function DynamicPPL.initialstep( states = map(samplers) do local_spl # Recompute `vi.logp` if needed. if local_spl.selector.rerun - vi = last(DynamicPPL.evaluate!!(model, vi, DynamicPPL.SamplingContext(rng, local_spl))) + vi = last( + DynamicPPL.evaluate!!( + model, vi, DynamicPPL.SamplingContext(rng, local_spl) + ), + ) end # Compute initial state. @@ -223,11 +223,7 @@ end # Subsequent steps function AbstractMCMC.step( - rng::AbstractRNG, - model::Model, - spl::Sampler{<:Gibbs}, - state::GibbsState; - kwargs... + rng::AbstractRNG, model::Model, spl::Sampler{<:Gibbs}, state::GibbsState; kwargs... ) # Iterate through each of the samplers. vi = state.vi diff --git a/src/mcmc/gibbs_conditional.jl b/src/mcmc/gibbs_conditional.jl index 5dd1b1787..fda79315b 100644 --- a/src/mcmc/gibbs_conditional.jl +++ b/src/mcmc/gibbs_conditional.jl @@ -49,11 +49,11 @@ m = inverse_gdemo(x) sample(m, Gibbs(GibbsConditional(:λ, cond_λ), GibbsConditional(:m, cond_m)), 10) ``` """ -struct GibbsConditional{S, C} +struct GibbsConditional{S,C} conditional::C function GibbsConditional(sym::Symbol, conditional::C) where {C} - return new{sym, C}(conditional) + return new{sym,C}(conditional) end end @@ -64,7 +64,7 @@ function DynamicPPL.initialstep( model::Model, spl::Sampler{<:GibbsConditional}, vi::AbstractVarInfo; - kwargs... + kwargs..., ) return nothing, vi end @@ -74,7 +74,7 @@ function AbstractMCMC.step( model::Model, spl::Sampler{<:GibbsConditional}, vi::AbstractVarInfo; - kwargs... + kwargs..., ) condvals = DynamicPPL.values_as(DynamicPPL.invlink(vi, model), NamedTuple) conddist = spl.alg.conditional(condvals) diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl index c3c90eab3..f17bca2e7 100644 --- a/src/mcmc/hmc.jl +++ b/src/mcmc/hmc.jl @@ -61,13 +61,19 @@ sample(gdemo([1.5, 2]), HMC(0.1, 10), 1000) sample(gdemo([1.5, 2]), HMC(0.01, 10), 1000) ``` """ -struct HMC{AD, space, metricT <: AHMC.AbstractMetric} <: StaticHamiltonian +struct HMC{AD,space,metricT<:AHMC.AbstractMetric} <: StaticHamiltonian ϵ::Float64 # leapfrog step size n_leapfrog::Int # leapfrog step number adtype::AD end -function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE) where {metricT<:AHMC.AbstractMetric} +function HMC( + ϵ::Float64, + n_leapfrog::Int, + ::Type{metricT}, + space::Tuple; + adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, +) where {metricT<:AHMC.AbstractMetric} return HMC{typeof(adtype),space,metricT}(ϵ, n_leapfrog, adtype) end function HMC( @@ -77,7 +83,7 @@ function HMC( metricT=AHMC.UnitEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - return HMC(ϵ, n_leapfrog, metricT, space; adtype = adtype) + return HMC(ϵ, n_leapfrog, metricT, space; adtype=adtype) end DynamicPPL.initialsampler(::Sampler{<:Hamiltonian}) = SampleFromUniform() @@ -95,7 +101,7 @@ function AbstractMCMC.sample( nadapts=sampler.alg.n_adapts, discard_adapt=true, discard_initial=-1, - kwargs... + kwargs..., ) if resume_from === nothing # If `nadapts` is `-1`, then the user called a convenience @@ -114,15 +120,30 @@ function AbstractMCMC.sample( _discard_initial = discard_initial end - return AbstractMCMC.mcmcsample(rng, model, sampler, N; - chain_type=chain_type, progress=progress, - nadapts=_nadapts, discard_initial=_discard_initial, - kwargs...) + return AbstractMCMC.mcmcsample( + rng, + model, + sampler, + N; + chain_type=chain_type, + progress=progress, + nadapts=_nadapts, + discard_initial=_discard_initial, + kwargs..., + ) else return AbstractMCMC.mcmcsample( - rng, model, sampler, N; - chain_type=chain_type, initial_state=initial_state, progress=progress, - nadapts=0, discard_adapt=false, discard_initial=0, kwargs... + rng, + model, + sampler, + N; + chain_type=chain_type, + initial_state=initial_state, + progress=progress, + nadapts=0, + discard_adapt=false, + discard_initial=0, + kwargs..., ) end end @@ -134,7 +155,7 @@ function DynamicPPL.initialstep( vi_original::AbstractVarInfo; initial_params=nothing, nadapts=0, - kwargs... + kwargs..., ) # Transform the samples to unconstrained space and compute the joint log probability. vi = DynamicPPL.link(vi_original, spl, model) @@ -152,8 +173,8 @@ function DynamicPPL.initialstep( # Use the leaf-context from the `model` in case the user has # contextualized the model with something like `PriorContext` # to sample from the prior. - DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)) - ) + DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), + ), ) logπ = Base.Fix1(LogDensityProblems.logdensity, ℓ) ∂logπ∂θ(x) = LogDensityProblems.logdensity_and_gradient(ℓ, x) @@ -203,9 +224,9 @@ function DynamicPPL.initialstep( # Adaptation adaptor = AHMCAdaptor(spl.alg, hamiltonian.metric; ϵ=ϵ) if spl.alg isa AdaptiveHamiltonian - hamiltonian, kernel, _ = - AHMC.adapt!(hamiltonian, kernel, adaptor, - 1, nadapts, t.z.θ, t.stat.acceptance_rate) + hamiltonian, kernel, _ = AHMC.adapt!( + hamiltonian, kernel, adaptor, 1, nadapts, t.z.θ, t.stat.acceptance_rate + ) end # Update `vi` based on acceptance @@ -229,7 +250,7 @@ function AbstractMCMC.step( spl::Sampler{<:Hamiltonian}, state::HMCState; nadapts=0, - kwargs... + kwargs..., ) # Get step size @debug "current ϵ" getstepsize(spl, state) @@ -242,9 +263,15 @@ function AbstractMCMC.step( # Adaptation i = state.i + 1 if spl.alg isa AdaptiveHamiltonian - hamiltonian, kernel, _ = - AHMC.adapt!(hamiltonian, state.kernel, state.adaptor, - i, nadapts, t.z.θ, t.stat.acceptance_rate) + hamiltonian, kernel, _ = AHMC.adapt!( + hamiltonian, + state.kernel, + state.adaptor, + i, + nadapts, + t.z.θ, + t.stat.acceptance_rate, + ) else kernel = state.kernel end @@ -269,8 +296,8 @@ function get_hamiltonian(model, spl, vi, state, n) Turing.LogDensityFunction( vi, model, - DynamicPPL.SamplingContext(spl, DynamicPPL.leafcontext(model.context)) - ) + DynamicPPL.SamplingContext(spl, DynamicPPL.leafcontext(model.context)), + ), ) ℓπ = Base.Fix1(LogDensityProblems.logdensity, ℓ) ∂ℓπ∂θ = Base.Fix1(LogDensityProblems.logdensity_and_gradient, ℓ) @@ -308,15 +335,23 @@ Hoffman, Matthew D., and Andrew Gelman. "The No-U-turn sampler: adaptively setting path lengths in Hamiltonian Monte Carlo." Journal of Machine Learning Research 15, no. 1 (2014): 1593-1623. """ -struct HMCDA{AD, space, metricT <: AHMC.AbstractMetric} <: AdaptiveHamiltonian - n_adapts :: Int # number of samples with adaption for ϵ - δ :: Float64 # target accept rate - λ :: Float64 # target leapfrog length - ϵ :: Float64 # (initial) step size +struct HMCDA{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian + n_adapts::Int # number of samples with adaption for ϵ + δ::Float64 # target accept rate + λ::Float64 # target leapfrog length + ϵ::Float64 # (initial) step size adtype::AD end -function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE) where {metricT<:AHMC.AbstractMetric} +function HMCDA( + n_adapts::Int, + δ::Float64, + λ::Float64, + ϵ::Float64, + ::Type{metricT}, + space::Tuple; + adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, +) where {metricT<:AHMC.AbstractMetric} return HMCDA{typeof(adtype),space,metricT}(n_adapts, δ, λ, ϵ, adtype) end @@ -327,16 +362,10 @@ function HMCDA( metricT=AHMC.UnitEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype = adtype) + return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype=adtype) end -function HMCDA( - n_adapts::Int, - δ::Float64, - λ::Float64, - ::Tuple{}; - kwargs... -) +function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ::Tuple{}; kwargs...) return HMCDA(n_adapts, δ, λ; kwargs...) end @@ -349,10 +378,9 @@ function HMCDA( metricT=AHMC.UnitEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype = adtype) + return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype=adtype) end - """ NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0; adtype::ADTypes.AbstractADType=AutoForwardDiff() @@ -398,13 +426,8 @@ function NUTS( return NUTS{typeof(adtype),space,metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype) end -function NUTS( - n_adapts::Int, - δ::Float64, - ::Tuple{}; - kwargs... -) - NUTS(n_adapts, δ; kwargs...) +function NUTS(n_adapts::Int, δ::Float64, ::Tuple{}; kwargs...) + return NUTS(n_adapts, δ; kwargs...) end function NUTS( @@ -417,7 +440,7 @@ function NUTS( metricT=AHMC.DiagEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype) + return NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype) end function NUTS( @@ -428,15 +451,15 @@ function NUTS( metricT=AHMC.DiagEuclideanMetric, adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) - NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype) + return NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype) end function NUTS(; kwargs...) - NUTS(-1, 0.65; kwargs...) + return NUTS(-1, 0.65; kwargs...) end for alg in (:HMC, :HMCDA, :NUTS) - @eval getmetricT(::$alg{<:Any, <:Any, metricT}) where {metricT} = metricT + @eval getmetricT(::$alg{<:Any,<:Any,metricT}) where {metricT} = metricT end ##### @@ -452,23 +475,28 @@ function gen_metric(dim::Int, spl::Sampler{<:AdaptiveHamiltonian}, state) end function make_ahmc_kernel(alg::HMC, ϵ) - return AHMC.HMCKernel(AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedNSteps(alg.n_leapfrog))) + return AHMC.HMCKernel( + AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedNSteps(alg.n_leapfrog)) + ) end function make_ahmc_kernel(alg::HMCDA, ϵ) - return AHMC.HMCKernel(AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedIntegrationTime(alg.λ))) + return AHMC.HMCKernel( + AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedIntegrationTime(alg.λ)) + ) +end +function make_ahmc_kernel(alg::NUTS, ϵ) + return AHMC.HMCKernel( + AHMC.Trajectory{AHMC.MultinomialTS}( + AHMC.Leapfrog(ϵ), AHMC.GeneralisedNoUTurn(alg.max_depth, alg.Δ_max) + ), + ) end -make_ahmc_kernel(alg::NUTS, ϵ) = - AHMC.HMCKernel(AHMC.Trajectory{AHMC.MultinomialTS}(AHMC.Leapfrog(ϵ), AHMC.GeneralisedNoUTurn(alg.max_depth, alg.Δ_max))) #### #### Compiler interface, i.e. tilde operators. #### function DynamicPPL.assume( - rng, - spl::Sampler{<:Hamiltonian}, - dist::Distribution, - vn::VarName, - vi, + rng, spl::Sampler{<:Hamiltonian}, dist::Distribution, vn::VarName, vi ) DynamicPPL.updategid!(vi, vn, spl) return DynamicPPL.assume(dist, vn, vi) @@ -488,7 +516,7 @@ end function DynamicPPL.dot_assume( rng, spl::Sampler{<:Hamiltonian}, - dists::Union{Distribution, AbstractArray{<:Distribution}}, + dists::Union{Distribution,AbstractArray{<:Distribution}}, vns::AbstractArray{<:VarName}, var::AbstractArray, vi, @@ -497,18 +525,13 @@ function DynamicPPL.dot_assume( return DynamicPPL.dot_assume(dists, var, vns, vi) end -function DynamicPPL.observe( - spl::Sampler{<:Hamiltonian}, - d::Distribution, - value, - vi, -) +function DynamicPPL.observe(spl::Sampler{<:Hamiltonian}, d::Distribution, value, vi) return DynamicPPL.observe(d, value, vi) end function DynamicPPL.dot_observe( spl::Sampler{<:Hamiltonian}, - ds::Union{Distribution, AbstractArray{<:Distribution}}, + ds::Union{Distribution,AbstractArray{<:Distribution}}, value::AbstractArray, vi, ) @@ -537,7 +560,9 @@ function AHMCAdaptor(alg::AdaptiveHamiltonian, metric::AHMC.AbstractMetric; ϵ=a return adaptor end -AHMCAdaptor(::Hamiltonian, ::AHMC.AbstractMetric; kwargs...) = AHMC.Adaptation.NoAdaptation() +function AHMCAdaptor(::Hamiltonian, ::AHMC.AbstractMetric; kwargs...) + return AHMC.Adaptation.NoAdaptation() +end ########################## # HMC State Constructors # @@ -548,7 +573,7 @@ function HMCState( model::Model, spl::Sampler{<:Hamiltonian}, vi::AbstractVarInfo; - kwargs... + kwargs..., ) # Link everything if needed. waslinked = islinked(vi, spl) @@ -561,10 +586,9 @@ function HMCState( logπ = Turing.LogDensityFunction( vi, model, - DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)) + DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), ) - # Get the metric type. metricT = getmetricT(spl.alg) diff --git a/src/mcmc/is.jl b/src/mcmc/is.jl index ac05e9aaf..0fe6e1053 100644 --- a/src/mcmc/is.jl +++ b/src/mcmc/is.jl @@ -31,21 +31,13 @@ IS() = IS{()}() DynamicPPL.initialsampler(sampler::Sampler{<:IS}) = sampler function DynamicPPL.initialstep( - rng::AbstractRNG, - model::Model, - spl::Sampler{<:IS}, - vi::AbstractVarInfo; - kwargs... + rng::AbstractRNG, model::Model, spl::Sampler{<:IS}, vi::AbstractVarInfo; kwargs... ) return Transition(model, vi), nothing end function AbstractMCMC.step( - rng::Random.AbstractRNG, - model::Model, - spl::Sampler{<:IS}, - ::Nothing; - kwargs... + rng::Random.AbstractRNG, model::Model, spl::Sampler{<:IS}, ::Nothing; kwargs... ) vi = VarInfo(rng, model, spl) return Transition(model, vi), nothing diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl index 6cc9f9a21..9b7f3ff09 100644 --- a/src/mcmc/mh.jl +++ b/src/mcmc/mh.jl @@ -2,7 +2,7 @@ ### Sampler states ### -struct MH{space, P} <: InferenceAlgorithm +struct MH{space,P} <: InferenceAlgorithm proposals::P end @@ -173,7 +173,7 @@ function MH(space...) prop = proposal(s) # Return early, we got a covariance matrix. - return MH{(), typeof(prop)}(prop) + return MH{(),typeof(prop)}(prop) else # Try to convert it to a proposal anyways, # throw an error if not acceptable. @@ -185,7 +185,7 @@ function MH(space...) proposals = NamedTuple{tuple(prop_syms...)}(tuple(props...)) syms = vcat(syms, prop_syms) - return MH{tuple(syms...), typeof(proposals)}(proposals) + return MH{tuple(syms...),typeof(proposals)}(proposals) end # Some of the proposals require working in unconstrained space. @@ -225,11 +225,11 @@ function set_namedtuple!(vi::DynamicPPL.VarInfoOrThreadSafeVarInfo, nt::NamedTup # assign the unpacked values if length(vals) == 1 vi[vns[1]] = [vals[1];] - # otherwise just assign the values + # otherwise just assign the values else vi[vns[1]] = [vals;] end - # if there are multiple variables + # if there are multiple variables elseif vals isa AbstractArray nvals = length(vals) # if values are provided as an array with a single element @@ -238,7 +238,7 @@ function set_namedtuple!(vi::DynamicPPL.VarInfoOrThreadSafeVarInfo, nt::NamedTup for (vn, val) in zip(vns, vals[1]) vi[vn] = [val;] end - # otherwise number of variables and number of values have to be equal + # otherwise number of variables and number of values have to be equal elseif nvals == nvns # iterate over variables and values for (vn, val) in zip(vns, vals) @@ -260,7 +260,9 @@ A log density function for the MH sampler. This variant uses the `set_namedtuple!` function to update the `VarInfo`. """ -const MHLogDensityFunction{M<:Model,S<:Sampler{<:MH},V<:AbstractVarInfo} = Turing.LogDensityFunction{V,M,<:DynamicPPL.SamplingContext{<:S}} +const MHLogDensityFunction{M<:Model,S<:Sampler{<:MH},V<:AbstractVarInfo} = Turing.LogDensityFunction{ + V,M,<:DynamicPPL.SamplingContext{<:S} +} function LogDensityProblems.logdensity(f::MHLogDensityFunction, x::NamedTuple) # TODO: Make this work with immutable `f.varinfo` too. @@ -284,16 +286,10 @@ unvectorize(dists::AbstractVector) = length(dists) == 1 ? first(dists) : dists # possibly unpack and reshape samples according to the prior distribution reconstruct(dist::Distribution, val::AbstractVector) = DynamicPPL.reconstruct(dist, val) -function reconstruct( - dist::AbstractVector{<:UnivariateDistribution}, - val::AbstractVector -) +function reconstruct(dist::AbstractVector{<:UnivariateDistribution}, val::AbstractVector) return val end -function reconstruct( - dist::AbstractVector{<:MultivariateDistribution}, - val::AbstractVector -) +function reconstruct(dist::AbstractVector{<:MultivariateDistribution}, val::AbstractVector) offset = 0 return map(dist) do d n = length(d) @@ -319,23 +315,22 @@ function dist_val_tuple(spl::Sampler{<:MH}, vi::DynamicPPL.VarInfoOrThreadSafeVa return dt, vt end -@generated function _val_tuple( - vi::VarInfo, - vns::NamedTuple{names} -) where {names} +@generated function _val_tuple(vi::VarInfo, vns::NamedTuple{names}) where {names} isempty(names) === 0 && return :(NamedTuple()) expr = Expr(:tuple) expr.args = Any[ - :($name = reconstruct(unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name)), - DynamicPPL.getval(vi, vns.$name))) - for name in names] + :( + $name = reconstruct( + unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name)), + DynamicPPL.getval(vi, vns.$name), + ) + ) for name in names + ] return expr end @generated function _dist_tuple( - props::NamedTuple{propnames}, - vi::VarInfo, - vns::NamedTuple{names} + props::NamedTuple{propnames}, vi::VarInfo, vns::NamedTuple{names} ) where {names,propnames} isempty(names) === 0 && return :(NamedTuple()) expr = Expr(:tuple) @@ -345,14 +340,19 @@ end :($name = props.$name) else # Otherwise, use the default proposal. - :($name = AMH.StaticProposal(unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name)))) - end for name in names] + :( + $name = AMH.StaticProposal( + unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name)) + ) + ) + end for name in names + ] return expr end # Utility functions to link should_link(varinfo, sampler, proposal) = false -function should_link(varinfo, sampler, proposal::NamedTuple{(), Tuple{}}) +function should_link(varinfo, sampler, proposal::NamedTuple{(),Tuple{}}) # If it's an empty `NamedTuple`, we're using the priors as proposals # in which case we shouldn't link. return false @@ -362,24 +362,22 @@ function should_link(varinfo, sampler, proposal::AdvancedMH.RandomWalkProposal) end # FIXME: This won't be hit unless `vals` are all the exactly same concrete type of `AdvancedMH.RandomWalkProposal`! function should_link( - varinfo, - sampler, - proposal::NamedTuple{names, vals} -) where {names, vals<:NTuple{<:Any, <:AdvancedMH.RandomWalkProposal}} + varinfo, sampler, proposal::NamedTuple{names,vals} +) where {names,vals<:NTuple{<:Any,<:AdvancedMH.RandomWalkProposal}} return true end function maybe_link!!(varinfo, sampler, proposal, model) - return should_link(varinfo, sampler, proposal) ? link!!(varinfo, sampler, model) : varinfo + return if should_link(varinfo, sampler, proposal) + link!!(varinfo, sampler, model) + else + varinfo + end end # Make a proposal if we don't have a covariance proposal matrix (the default). function propose!!( - rng::AbstractRNG, - vi::AbstractVarInfo, - model::Model, - spl::Sampler{<:MH}, - proposal + rng::AbstractRNG, vi::AbstractVarInfo, model::Model, spl::Sampler{<:MH}, proposal ) # Retrieve distribution and value NamedTuples. dt, vt = dist_val_tuple(spl, vi) @@ -395,9 +393,9 @@ function propose!!( Turing.LogDensityFunction( vi, model, - DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)) - ) - ) + DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), + ), + ), ) trans, _ = AbstractMCMC.step(rng, densitymodel, mh_sampler, prev_trans) @@ -413,7 +411,7 @@ function propose!!( vi::AbstractVarInfo, model::Model, spl::Sampler{<:MH}, - proposal::AdvancedMH.RandomWalkProposal + proposal::AdvancedMH.RandomWalkProposal, ) # If this is the case, we can just draw directly from the proposal # matrix. @@ -430,9 +428,9 @@ function propose!!( Turing.LogDensityFunction( vi, model, - DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)) - ) - ) + DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)), + ), + ), ) trans, _ = AbstractMCMC.step(rng, densitymodel, mh_sampler, prev_trans) @@ -444,7 +442,7 @@ function DynamicPPL.initialstep( model::AbstractModel, spl::Sampler{<:MH}, vi::AbstractVarInfo; - kwargs... + kwargs..., ) # If we're doing random walk with a covariance matrix, # just link everything before sampling. @@ -454,11 +452,7 @@ function DynamicPPL.initialstep( end function AbstractMCMC.step( - rng::AbstractRNG, - model::Model, - spl::Sampler{<:MH}, - vi::AbstractVarInfo; - kwargs... + rng::AbstractRNG, model::Model, spl::Sampler{<:MH}, vi::AbstractVarInfo; kwargs... ) # Cases: # 1. A covariance proposal matrix @@ -471,13 +465,7 @@ end #### #### Compiler interface, i.e. tilde operators. #### -function DynamicPPL.assume( - rng, - spl::Sampler{<:MH}, - dist::Distribution, - vn::VarName, - vi, -) +function DynamicPPL.assume(rng, spl::Sampler{<:MH}, dist::Distribution, vn::VarName, vi) DynamicPPL.updategid!(vi, vn, spl) r = vi[vn] return r, logpdf_with_trans(dist, r, istrans(vi, vn)), vi @@ -502,7 +490,7 @@ end function DynamicPPL.dot_assume( rng, spl::Sampler{<:MH}, - dists::Union{Distribution, AbstractArray{<:Distribution}}, + dists::Union{Distribution,AbstractArray{<:Distribution}}, vn::VarName, var::AbstractArray, vi, @@ -515,18 +503,13 @@ function DynamicPPL.dot_assume( return var, sum(logpdf_with_trans.(dists, r, istrans(vi, vns[1]))), vi end -function DynamicPPL.observe( - spl::Sampler{<:MH}, - d::Distribution, - value, - vi, -) +function DynamicPPL.observe(spl::Sampler{<:MH}, d::Distribution, value, vi) return DynamicPPL.observe(SampleFromPrior(), d, value, vi) end function DynamicPPL.dot_observe( spl::Sampler{<:MH}, - ds::Union{Distribution, AbstractArray{<:Distribution}}, + ds::Union{Distribution,AbstractArray{<:Distribution}}, value::AbstractArray, vi, ) diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl index 7d0714c41..d5a5f0847 100644 --- a/src/mcmc/particle_mcmc.jl +++ b/src/mcmc/particle_mcmc.jl @@ -15,7 +15,7 @@ Sequential Monte Carlo sampler. $(TYPEDFIELDS) """ -struct SMC{space, R} <: ParticleInference +struct SMC{space,R} <: ParticleInference resampler::R end @@ -29,15 +29,17 @@ Create a sequential Monte Carlo sampler of type [`SMC`](@ref) for the variables If the algorithm for the resampling step is not specified explicitly, systematic resampling is performed if the estimated effective sample size per particle drops below 0.5. """ -function SMC(resampler = AdvancedPS.ResampleWithESSThreshold(), space::Tuple = ()) - return SMC{space, typeof(resampler)}(resampler) +function SMC(resampler=AdvancedPS.ResampleWithESSThreshold(), space::Tuple=()) + return SMC{space,typeof(resampler)}(resampler) end # Convenient constructors with ESS threshold -function SMC(resampler, threshold::Real, space::Tuple = ()) +function SMC(resampler, threshold::Real, space::Tuple=()) return SMC(AdvancedPS.ResampleWithESSThreshold(resampler, threshold), space) end -SMC(threshold::Real, space::Tuple = ()) = SMC(AdvancedPS.resample_systematic, threshold, space) +function SMC(threshold::Real, space::Tuple=()) + return SMC(AdvancedPS.resample_systematic, threshold, space) +end # If only the space is defined SMC(space::Symbol...) = SMC(space) @@ -62,7 +64,7 @@ function SMCTransition(model::DynamicPPL.Model, vi::AbstractVarInfo, weight) return SMCTransition(theta, lp, weight) end -metadata(t::SMCTransition) = (lp = t.lp, weight = t.weight) +metadata(t::SMCTransition) = (lp=t.lp, weight=t.weight) DynamicPPL.getlogp(t::SMCTransition) = t.lp @@ -86,18 +88,30 @@ function AbstractMCMC.sample( resume_from=nothing, initial_state=DynamicPPL.loadstate(resume_from), progress=PROGRESS[], - kwargs... + kwargs..., ) if resume_from === nothing - return AbstractMCMC.mcmcsample(rng, model, sampler, N; - chain_type=chain_type, - progress=progress, - nparticles=N, - kwargs...) + return AbstractMCMC.mcmcsample( + rng, + model, + sampler, + N; + chain_type=chain_type, + progress=progress, + nparticles=N, + kwargs..., + ) else return AbstractMCMC.mcmcsample( - rng, model, sampler, N; chain_type, initial_state, progress=progress, - nparticles=N, kwargs... + rng, + model, + sampler, + N; + chain_type, + initial_state, + progress=progress, + nparticles=N, + kwargs..., ) end end @@ -108,7 +122,7 @@ function DynamicPPL.initialstep( spl::Sampler{<:SMC}, vi::AbstractVarInfo; nparticles::Int, - kwargs... + kwargs..., ) # Reset the VarInfo. reset_num_produce!(vi) @@ -120,7 +134,7 @@ function DynamicPPL.initialstep( particles = AdvancedPS.ParticleContainer( [AdvancedPS.Trace(model, spl, vi, AdvancedPS.TracedRNG()) for _ in 1:nparticles], AdvancedPS.TracedRNG(), - rng + rng, ) # Perform particle sweep. @@ -138,11 +152,7 @@ function DynamicPPL.initialstep( end function AbstractMCMC.step( - ::AbstractRNG, - model::AbstractModel, - spl::Sampler{<:SMC}, - state::SMCState; - kwargs... + ::AbstractRNG, model::AbstractModel, spl::Sampler{<:SMC}, state::SMCState; kwargs... ) # Extract the index of the current particle. index = state.particleindex @@ -191,18 +201,16 @@ If the algorithm for the resampling step is not specified explicitly, systematic is performed if the estimated effective sample size per particle drops below 0.5. """ function PG( - nparticles::Int, - resampler = AdvancedPS.ResampleWithESSThreshold(), - space::Tuple = (), + nparticles::Int, resampler=AdvancedPS.ResampleWithESSThreshold(), space::Tuple=() ) - return PG{space, typeof(resampler)}(nparticles, resampler) + return PG{space,typeof(resampler)}(nparticles, resampler) end # Convenient constructors with ESS threshold -function PG(nparticles::Int, resampler, threshold::Real, space::Tuple = ()) +function PG(nparticles::Int, resampler, threshold::Real, space::Tuple=()) return PG(nparticles, AdvancedPS.ResampleWithESSThreshold(resampler, threshold), space) end -function PG(nparticles::Int, threshold::Real, space::Tuple = ()) +function PG(nparticles::Int, threshold::Real, space::Tuple=()) return PG(nparticles, AdvancedPS.resample_systematic, threshold, space) end @@ -238,7 +246,7 @@ function PGTransition(model::DynamicPPL.Model, vi::AbstractVarInfo, logevidence) return PGTransition(theta, lp, logevidence) end -metadata(t::PGTransition) = (lp = t.lp, logevidence = t.logevidence) +metadata(t::PGTransition) = (lp=t.lp, logevidence=t.logevidence) DynamicPPL.getlogp(t::PGTransition) = t.lp @@ -251,7 +259,7 @@ function DynamicPPL.initialstep( model::AbstractModel, spl::Sampler{<:PG}, vi::AbstractVarInfo; - kwargs... + kwargs..., ) # Reset the VarInfo before new sweep reset_num_produce!(vi) @@ -263,7 +271,7 @@ function DynamicPPL.initialstep( particles = AdvancedPS.ParticleContainer( [AdvancedPS.Trace(model, spl, vi, AdvancedPS.TracedRNG()) for _ in 1:num_particles], AdvancedPS.TracedRNG(), - rng + rng, ) # Perform a particle sweep. @@ -282,11 +290,7 @@ function DynamicPPL.initialstep( end function AbstractMCMC.step( - rng::AbstractRNG, - model::AbstractModel, - spl::Sampler{<:PG}, - state::PGState; - kwargs... + rng::AbstractRNG, model::AbstractModel, spl::Sampler{<:PG}, state::PGState; kwargs... ) # Reset the VarInfo before new sweep. vi = state.vi @@ -325,10 +329,14 @@ function AbstractMCMC.step( return transition, PGState(_vi, newreference.rng) end -DynamicPPL.use_threadsafe_eval(::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, ::AbstractVarInfo) = false +function DynamicPPL.use_threadsafe_eval( + ::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, ::AbstractVarInfo +) + return false +end function trace_local_varinfo_maybe(varinfo) - try + try trace = AdvancedPS.current_trace() return trace.model.f.varinfo catch e @@ -360,7 +368,7 @@ function DynamicPPL.assume( spl::Sampler{<:Union{PG,SMC}}, dist::Distribution, vn::VarName, - _vi::AbstractVarInfo + _vi::AbstractVarInfo, ) vi = trace_local_varinfo_maybe(_vi) trng = trace_local_rng_maybe(rng) @@ -399,18 +407,14 @@ function DynamicPPL.observe(spl::Sampler{<:Union{PG,SMC}}, dist::Distribution, v end function DynamicPPL.acclogp!!( - context::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, - varinfo::AbstractVarInfo, - logp + context::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, varinfo::AbstractVarInfo, logp ) varinfo_trace = trace_local_varinfo_maybe(varinfo) - DynamicPPL.acclogp!!(DynamicPPL.childcontext(context), varinfo_trace, logp) + return DynamicPPL.acclogp!!(DynamicPPL.childcontext(context), varinfo_trace, logp) end function DynamicPPL.acclogp_observe!!( - context::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, - varinfo::AbstractVarInfo, - logp + context::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, varinfo::AbstractVarInfo, logp ) Libtask.produce(logp) return trace_local_varinfo_maybe(varinfo) @@ -421,7 +425,7 @@ function AdvancedPS.Trace( model::Model, sampler::Sampler{<:Union{SMC,PG}}, varinfo::AbstractVarInfo, - rng::AdvancedPS.TracedRNG + rng::AdvancedPS.TracedRNG, ) newvarinfo = deepcopy(varinfo) DynamicPPL.reset_num_produce!(newvarinfo) diff --git a/src/mcmc/sghmc.jl b/src/mcmc/sghmc.jl index 61cffa7b3..84a9c18f3 100644 --- a/src/mcmc/sghmc.jl +++ b/src/mcmc/sghmc.jl @@ -44,10 +44,12 @@ function SGHMC( adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) _learning_rate, _momentum_decay = promote(learning_rate, momentum_decay) - return SGHMC{typeof(adtype),space,typeof(_learning_rate)}(_learning_rate, _momentum_decay, adtype) + return SGHMC{typeof(adtype),space,typeof(_learning_rate)}( + _learning_rate, _momentum_decay, adtype + ) end -struct SGHMCState{L,V<:AbstractVarInfo, T<:AbstractVector{<:Real}} +struct SGHMCState{L,V<:AbstractVarInfo,T<:AbstractVector{<:Real}} logdensity::L vi::V velocity::T @@ -68,7 +70,9 @@ function DynamicPPL.initialstep( # Compute initial sample and state. sample = Transition(model, vi) - ℓ = LogDensityProblemsAD.ADgradient(Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext())) + ℓ = LogDensityProblemsAD.ADgradient( + Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()) + ) state = SGHMCState(ℓ, vi, zero(vi[spl])) return sample, state @@ -79,7 +83,7 @@ function AbstractMCMC.step( model::Model, spl::Sampler{<:SGHMC}, state::SGHMCState; - kwargs... + kwargs..., ) # Compute gradient of log density. ℓ = state.logdensity @@ -134,7 +138,7 @@ struct PolynomialStepsize{T<:Real} "Decay rate of step size in (0.5, 1]." γ::T - function PolynomialStepsize{T}(a::T, b::T, γ::T) where T + function PolynomialStepsize{T}(a::T, b::T, γ::T) where {T} 0.5 < γ ≤ 1 || error("the decay rate `γ` has to be in (0.5, 1]") return new{T}(a, b, γ) end @@ -153,7 +157,7 @@ a (b + t)^{-γ}. function PolynomialStepsize(a::T, b::T, γ::T) where {T<:Real} return PolynomialStepsize{T}(a, b, γ) end -function PolynomialStepsize(a::Real, b::Real = 0, γ::Real = 0.55) +function PolynomialStepsize(a::Real, b::Real=0, γ::Real=0.55) return PolynomialStepsize(promote(a, b, γ)...) end @@ -183,8 +187,8 @@ See also: [`PolynomialStepsize`](@ref) """ function SGLD( space::Symbol...; - stepsize = PolynomialStepsize(0.01), - adtype::ADTypes.AbstractADType = Turing.DEFAULT_ADTYPE, + stepsize=PolynomialStepsize(0.01), + adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE, ) return SGLD{typeof(adtype),space,typeof(stepsize)}(stepsize, adtype) end @@ -204,7 +208,7 @@ function SGLDTransition(model::DynamicPPL.Model, vi::AbstractVarInfo, stepsize) return SGLDTransition(theta, lp, stepsize) end -metadata(t::SGLDTransition) = (lp = t.lp, SGLD_stepsize = t.stepsize) +metadata(t::SGLDTransition) = (lp=t.lp, SGLD_stepsize=t.stepsize) DynamicPPL.getlogp(t::SGLDTransition) = t.lp @@ -219,7 +223,7 @@ function DynamicPPL.initialstep( model::Model, spl::Sampler{<:SGLD}, vi::AbstractVarInfo; - kwargs... + kwargs..., ) # Transform the samples to unconstrained space and compute the joint log probability. if !DynamicPPL.islinked(vi, spl) @@ -229,18 +233,16 @@ function DynamicPPL.initialstep( # Create first sample and state. sample = SGLDTransition(model, vi, zero(spl.alg.stepsize(0))) - ℓ = LogDensityProblemsAD.ADgradient(Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext())) + ℓ = LogDensityProblemsAD.ADgradient( + Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()) + ) state = SGLDState(ℓ, vi, 1) return sample, state end function AbstractMCMC.step( - rng::Random.AbstractRNG, - model::Model, - spl::Sampler{<:SGLD}, - state::SGLDState; - kwargs... + rng::Random.AbstractRNG, model::Model, spl::Sampler{<:SGLD}, state::SGLDState; kwargs... ) # Perform gradient step. ℓ = state.logdensity diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl index 7a700f241..682e664a6 100644 --- a/src/optimisation/Optimisation.jl +++ b/src/optimisation/Optimisation.jl @@ -42,7 +42,13 @@ struct OptimizationContext{C<:DynamicPPL.AbstractContext} <: DynamicPPL.Abstract context::C function OptimizationContext{C}(context::C) where {C<:DynamicPPL.AbstractContext} - if !(context isa Union{DynamicPPL.DefaultContext,DynamicPPL.LikelihoodContext,DynamicPPL.PriorContext}) + if !( + context isa Union{ + DynamicPPL.DefaultContext, + DynamicPPL.LikelihoodContext, + DynamicPPL.PriorContext, + } + ) msg = """ `OptimizationContext` supports only leaf contexts of type `DynamicPPL.DefaultContext`, `DynamicPPL.LikelihoodContext`, @@ -93,19 +99,26 @@ function DynamicPPL.dot_tilde_assume(ctx::OptimizationContext, right, left, vns, return r, lp, vi end -DynamicPPL.tilde_observe(ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args...) = - DynamicPPL.tilde_observe(ctx.context, args...) +function DynamicPPL.tilde_observe( + ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args... +) + return DynamicPPL.tilde_observe(ctx.context, args...) +end -DynamicPPL.dot_tilde_observe(ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args...) = - DynamicPPL.dot_tilde_observe(ctx.context, args...) +function DynamicPPL.dot_tilde_observe( + ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args... +) + return DynamicPPL.dot_tilde_observe(ctx.context, args...) +end """ OptimLogDensity{M<:DynamicPPL.Model,C<:Context,V<:DynamicPPL.VarInfo} A struct that stores the negative log density function of a `DynamicPPL` model. """ -const OptimLogDensity{M<:DynamicPPL.Model,C<:OptimizationContext,V<:DynamicPPL.VarInfo} = - Turing.LogDensityFunction{V,M,C} +const OptimLogDensity{M<:DynamicPPL.Model,C<:OptimizationContext,V<:DynamicPPL.VarInfo} = Turing.LogDensityFunction{ + V,M,C +} """ OptimLogDensity(model::DynamicPPL.Model, context::OptimizationContext) @@ -176,11 +189,8 @@ end A wrapper struct to store various results from a MAP or MLE estimation. """ -struct ModeResult{ - V<:NamedArrays.NamedArray, - O<:Any, - M<:OptimLogDensity -} <: StatsBase.StatisticalModel +struct ModeResult{V<:NamedArrays.NamedArray,O<:Any,M<:OptimLogDensity} <: + StatsBase.StatisticalModel "A vector with the resulting point estimates." values::V "The stored optimiser results." @@ -195,11 +205,11 @@ function Base.show(io::IO, ::MIME"text/plain", m::ModeResult) print(io, "ModeResult with maximized lp of ") Printf.@printf(io, "%.2f", m.lp) println(io) - show(io, m.values) + return show(io, m.values) end function Base.show(io::IO, m::ModeResult) - show(io, m.values.array) + return show(io, m.values.array) end # Various StatsBase methods for ModeResult @@ -312,9 +322,9 @@ struct ModeEstimationConstraints{ end has_box_constraints(c::ModeEstimationConstraints) = c.ub !== nothing || c.lb !== nothing -has_generic_constraints(c::ModeEstimationConstraints) = ( - c.cons !== nothing || c.lcons !== nothing || c.ucons !== nothing -) +function has_generic_constraints(c::ModeEstimationConstraints) + return (c.cons !== nothing || c.lcons !== nothing || c.ucons !== nothing) +end has_constraints(c) = has_box_constraints(c) || has_generic_constraints(c) """ @@ -328,17 +338,19 @@ uniformly from the box constraints. If generic constraints are set, an error is """ function generate_initial_params(model::DynamicPPL.Model, initial_params, constraints) if initial_params === nothing && has_generic_constraints(constraints) - throw(ArgumentError( - "You must provide an initial value when using generic constraints." - )) + throw( + ArgumentError( + "You must provide an initial value when using generic constraints." + ), + ) end return if initial_params !== nothing copy(initial_params) elseif has_box_constraints(constraints) [ - rand(Distributions.Uniform(lower, upper)) - for (lower, upper) in zip(constraints.lb, constraints.ub) + rand(Distributions.Uniform(lower, upper)) for + (lower, upper) in zip(constraints.lb, constraints.ub) ] else rand(Vector, model) @@ -367,11 +379,12 @@ function Optimization.OptimizationProblem(log_density::OptimLogDensity, adtype, Optimization.OptimizationProblem(f, initial_params) else Optimization.OptimizationProblem( - f, initial_params; + f, + initial_params; lcons=constraints.lcons, ucons=constraints.ucons, lb=constraints.lb, - ub=constraints.ub + ub=constraints.ub, ) end return prob @@ -418,7 +431,7 @@ function estimate_mode( ucons=nothing, lb=nothing, ub=nothing, - kwargs... + kwargs..., ) constraints = ModeEstimationConstraints(lb, ub, cons, lcons, ucons) initial_params = generate_initial_params(model, initial_params, constraints) diff --git a/src/stdlib/RandomMeasures.jl b/src/stdlib/RandomMeasures.jl index 9858279e6..add452786 100644 --- a/src/stdlib/RandomMeasures.jl +++ b/src/stdlib/RandomMeasures.jl @@ -19,7 +19,8 @@ abstract type AbstractRandomProbabilityMeasure end The *Size-Biased Sampling Process* for random probability measures `rpm` with a surplus mass of `surplus`. """ -struct SizeBiasedSamplingProcess{T<:AbstractRandomProbabilityMeasure,V<:AbstractFloat} <: ContinuousUnivariateDistribution +struct SizeBiasedSamplingProcess{T<:AbstractRandomProbabilityMeasure,V<:AbstractFloat} <: + ContinuousUnivariateDistribution rpm::T surplus::V end @@ -34,7 +35,8 @@ maximum(d::SizeBiasedSamplingProcess) = d.surplus The *Stick-Breaking Process* for random probability measures `rpm`. """ -struct StickBreakingProcess{T<:AbstractRandomProbabilityMeasure} <: ContinuousUnivariateDistribution +struct StickBreakingProcess{T<:AbstractRandomProbabilityMeasure} <: + ContinuousUnivariateDistribution rpm::T end @@ -48,12 +50,13 @@ maximum(d::StickBreakingProcess) = 1.0 The *Chinese Restaurant Process* for random probability measures `rpm` with counts `m`. """ -struct ChineseRestaurantProcess{T<:AbstractRandomProbabilityMeasure,V<:AbstractVector{Int}} <: DiscreteUnivariateDistribution +struct ChineseRestaurantProcess{ + T<:AbstractRandomProbabilityMeasure,V<:AbstractVector{Int} +} <: DiscreteUnivariateDistribution rpm::T m::V end - """ _logpdf_table(d::AbstractRandomProbabilityMeasure, m::AbstractVector{Int}) @@ -81,7 +84,7 @@ function rand(rng::AbstractRNG, d::ChineseRestaurantProcess) end minimum(d::ChineseRestaurantProcess) = 1 -maximum(d::ChineseRestaurantProcess) = any(iszero, d.m) ? length(d.m) : length(d.m)+1 +maximum(d::ChineseRestaurantProcess) = any(iszero, d.m) ? length(d.m) : length(d.m) + 1 ## ################# ## ## Random partitions ## @@ -131,7 +134,7 @@ function _logpdf_table(d::DirichletProcess{T}, m::AbstractVector{Int}) where {T< # construct the table first_zero = findfirst(iszero, m) - K = first_zero === nothing ? length(m)+1 : length(m) + K = first_zero === nothing ? length(m) + 1 : length(m) table = fill(T(-Inf), K) # exit if m is empty or contains only zeros @@ -141,7 +144,7 @@ function _logpdf_table(d::DirichletProcess{T}, m::AbstractVector{Int}) where {T< end # compute logpdf for each occupied table - @inbounds for i in 1:(K-1) + @inbounds for i in 1:(K - 1) table[i] = T(log(m[i])) end @@ -187,7 +190,7 @@ end function distribution(d::StickBreakingProcess{<:PitmanYorProcess}) d_rpm = d.rpm d_rpm_d = d.rpm.d - return Beta(one(d_rpm_d)-d_rpm_d, d_rpm.θ + d_rpm.t*d_rpm_d) + return Beta(one(d_rpm_d) - d_rpm_d, d_rpm.θ + d_rpm.t * d_rpm_d) end @doc raw""" @@ -216,14 +219,15 @@ function stickbreak(v) K = length(v) + 1 cumprod_one_minus_v = cumprod(1 .- v) - eta = [if k == 1 - v[1] - elseif k == K - cumprod_one_minus_v[K - 1] - else - v[k] * cumprod_one_minus_v[k - 1] - end - for k in 1:K] + eta = [ + if k == 1 + v[1] + elseif k == K + cumprod_one_minus_v[K - 1] + else + v[k] * cumprod_one_minus_v[k - 1] + end for k in 1:K + ] return eta end @@ -231,7 +235,7 @@ end function distribution(d::SizeBiasedSamplingProcess{<:PitmanYorProcess}) d_rpm = d.rpm d_rpm_d = d.rpm.d - dist = Beta(one(d_rpm_d)-d_rpm_d, d_rpm.θ + d_rpm.t*d_rpm_d) + dist = Beta(one(d_rpm_d) - d_rpm_d, d_rpm.θ + d_rpm.t * d_rpm_d) return LocationScale(zero(d_rpm_d), d.surplus, dist) end @@ -241,7 +245,7 @@ function _logpdf_table(d::PitmanYorProcess{T}, m::AbstractVector{Int}) where {T< # construct table first_zero = findfirst(iszero, m) - K = first_zero === nothing ? length(m)+1 : length(m) + K = first_zero === nothing ? length(m) + 1 : length(m) table = fill(T(-Inf), K) # exit if m is empty or contains only zeros @@ -251,8 +255,8 @@ function _logpdf_table(d::PitmanYorProcess{T}, m::AbstractVector{Int}) where {T< end # compute logpdf for each occupied table - @inbounds for i in 1:(K-1) - !iszero(m[i]) && ( table[i] = T(log(m[i] - d.d)) ) + @inbounds for i in 1:(K - 1) + !iszero(m[i]) && (table[i] = T(log(m[i] - d.d))) end # logpdf for new table @@ -269,5 +273,4 @@ end export DirichletProcess, PitmanYorProcess export SizeBiasedSamplingProcess, StickBreakingProcess, ChineseRestaurantProcess - end # end module diff --git a/src/stdlib/distributions.jl b/src/stdlib/distributions.jl index f579876e8..c2b92c29d 100644 --- a/src/stdlib/distributions.jl +++ b/src/stdlib/distributions.jl @@ -79,9 +79,9 @@ struct BinomialLogit{T<:Real,S<:Real} <: DiscreteUnivariateDistribution logitp::T logconstant::S - function BinomialLogit{T}(n::Int, logitp::T) where T + function BinomialLogit{T}(n::Int, logitp::T) where {T} n >= 0 || error("parameter `n` has to be non-negative") - logconstant = - (log1p(n) + n * StatsFuns.log1pexp(logitp)) + logconstant = -(log1p(n) + n * StatsFuns.log1pexp(logitp)) return new{T,typeof(logconstant)}(n, logitp, logconstant) end end @@ -134,13 +134,13 @@ P(X = k) = \\begin{cases} ``` where `K = length(c) + 1`. """ -struct OrderedLogistic{T1, T2<:AbstractVector} <: DiscreteUnivariateDistribution +struct OrderedLogistic{T1,T2<:AbstractVector} <: DiscreteUnivariateDistribution η::T1 cutpoints::T2 function OrderedLogistic{T1,T2}(η::T1, cutpoints::T2) where {T1,T2} issorted(cutpoints) || error("cutpoints are not sorted") - return new{typeof(η), typeof(cutpoints)}(η, cutpoints) + return new{typeof(η),typeof(cutpoints)}(η, cutpoints) end end @@ -193,10 +193,10 @@ function unsafe_logpdf_ordered_logistic(η, cutpoints, K, k::Int) logp = if k == 1 -StatsFuns.log1pexp(η - cutpoints[k]) elseif k < K - tmp = StatsFuns.log1pexp(cutpoints[k-1] - η) + tmp = StatsFuns.log1pexp(cutpoints[k - 1] - η) -tmp + StatsFuns.log1mexp(tmp - StatsFuns.log1pexp(cutpoints[k] - η)) else - -StatsFuns.log1pexp(cutpoints[k-1] - η) + -StatsFuns.log1pexp(cutpoints[k - 1] - η) end end return logp @@ -221,7 +221,7 @@ struct LogPoisson{T<:Real,S} <: DiscreteUnivariateDistribution logλ::T λ::S - function LogPoisson{T}(logλ::T) where T + function LogPoisson{T}(logλ::T) where {T} λ = exp(logλ) return new{T,typeof(λ)}(logλ, λ) end diff --git a/test/essential/ad.jl b/test/essential/ad.jl index b67c8bd56..6583ed911 100644 --- a/test/essential/ad.jl +++ b/test/essential/ad.jl @@ -5,8 +5,8 @@ using Distributions: logpdf using DynamicPPL: getlogp, getval using ForwardDiff using LinearAlgebra -import LogDensityProblems -import LogDensityProblemsAD +using LogDensityProblems: LogDensityProblems +using LogDensityProblemsAD: LogDensityProblemsAD using ReverseDiff using Test: @test, @testset using Turing @@ -42,7 +42,9 @@ function test_model_ad(model, f, syms::Vector{Symbol}) for chunksize in (0, 1, 10), standardtag in (true, false, 0, 3) ℓ = LogDensityProblemsAD.ADgradient( Turing.AutoForwardDiff(; chunksize=chunksize, tag=standardtag), - Turing.LogDensityFunction(vi, model, SampleFromPrior(), DynamicPPL.DefaultContext()), + Turing.LogDensityFunction( + vi, model, SampleFromPrior(), DynamicPPL.DefaultContext() + ), ) l, ∇E = LogDensityProblems.logdensity_and_gradient(ℓ, z) @@ -72,7 +74,7 @@ end lik_dist = Normal(m, sqrt(s)) lp = logpdf(dist_s, s) + logpdf(Normal(0, sqrt(s)), m) lp += logpdf(lik_dist, 1.5) + logpdf(lik_dist, 2.0) - lp + return lp end # Call ForwardDiff's AD @@ -81,14 +83,20 @@ end _x = [_m, _s] grad_FWAD = sort(g(_x)) - ℓ = Turing.LogDensityFunction(vi, ad_test_f, SampleFromPrior(), DynamicPPL.DefaultContext()) + ℓ = Turing.LogDensityFunction( + vi, ad_test_f, SampleFromPrior(), DynamicPPL.DefaultContext() + ) x = map(x -> Float64(x), vi[SampleFromPrior()]) trackerℓ = LogDensityProblemsAD.ADgradient(Turing.AutoTracker(), ℓ) if isdefined(Base, :get_extension) - @test trackerℓ isa Base.get_extension(LogDensityProblemsAD, :LogDensityProblemsADTrackerExt).TrackerGradientLogDensity + @test trackerℓ isa + Base.get_extension( + LogDensityProblemsAD, :LogDensityProblemsADTrackerExt + ).TrackerGradientLogDensity else - @test trackerℓ isa LogDensityProblemsAD.LogDensityProblemsADTrackerExt.TrackerGradientLogDensity + @test trackerℓ isa + LogDensityProblemsAD.LogDensityProblemsADTrackerExt.TrackerGradientLogDensity end @test trackerℓ.ℓ === ℓ ∇E1 = LogDensityProblems.logdensity_and_gradient(trackerℓ, x)[2] @@ -96,9 +104,13 @@ end zygoteℓ = LogDensityProblemsAD.ADgradient(Turing.AutoZygote(), ℓ) if isdefined(Base, :get_extension) - @test zygoteℓ isa Base.get_extension(LogDensityProblemsAD, :LogDensityProblemsADZygoteExt).ZygoteGradientLogDensity + @test zygoteℓ isa + Base.get_extension( + LogDensityProblemsAD, :LogDensityProblemsADZygoteExt + ).ZygoteGradientLogDensity else - @test zygoteℓ isa LogDensityProblemsAD.LogDensityProblemsADZygoteExt.ZygoteGradientLogDensity + @test zygoteℓ isa + LogDensityProblemsAD.LogDensityProblemsADZygoteExt.ZygoteGradientLogDensity end @test zygoteℓ.ℓ === ℓ ∇E2 = LogDensityProblems.logdensity_and_gradient(zygoteℓ, x)[2] @@ -112,7 +124,9 @@ end s = x[2] m = x[1] lik_dist = Normal(m, sqrt(s)) - lp = Turing.logpdf_with_trans(dist_s, s, false) + Turing.logpdf_with_trans(Normal(0, sqrt(s)), m, false) + lp = + Turing.logpdf_with_trans(dist_s, s, false) + + Turing.logpdf_with_trans(Normal(0, sqrt(s)), m, false) lp += logpdf(lik_dist, 1.5) + logpdf(lik_dist, 2.0) return lp end @@ -122,7 +136,7 @@ end # Test Wishart AD. @model function wishart_ad() v ~ Wishart(7, [1 0.5; 0.5 1]) - v + return v end # Hand-written logp @@ -137,7 +151,7 @@ end end @testset "Simplex Tracker, Zygote and ReverseDiff (with and without caching) AD" begin @model function dir() - theta ~ Dirichlet(1 ./ fill(4, 4)) + return theta ~ Dirichlet(1 ./ fill(4, 4)) end sample(dir(), HMC(0.01, 1; adtype=AutoZygote()), 1000) sample(dir(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000) @@ -145,14 +159,14 @@ end end @testset "PDMatDistribution AD" begin @model function wishart() - theta ~ Wishart(4, Matrix{Float64}(I, 4, 4)) + return theta ~ Wishart(4, Matrix{Float64}(I, 4, 4)) end sample(wishart(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000) sample(wishart(), HMC(0.01, 1; adtype=AutoZygote()), 1000) @model function invwishart() - theta ~ InverseWishart(4, Matrix{Float64}(I, 4, 4)) + return theta ~ InverseWishart(4, Matrix{Float64}(I, 4, 4)) end sample(invwishart(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000) @@ -163,7 +177,7 @@ end params = TV(undef, 2) @. params ~ Normal(0, 1) - x ~ MvNormal(params, I) + return x ~ MvNormal(params, I) end function make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext) @@ -180,7 +194,11 @@ end if unlinked varinfo_init = DynamicPPL.invlink!!(varinfo_init, spl, model) end - varinfo = last(DynamicPPL.evaluate!!(model, varinfo, DynamicPPL.SamplingContext(spl, ctx))) + varinfo = last( + DynamicPPL.evaluate!!( + model, varinfo, DynamicPPL.SamplingContext(spl, ctx) + ), + ) if unlinked varinfo_init = DynamicPPL.link!!(varinfo_init, spl, model) end @@ -195,7 +213,7 @@ end model = tst(data) likelihood = make_logjoint(model, DynamicPPL.LikelihoodContext()) - target(x) = likelihood(x, unlinked=true) + target(x) = likelihood(x; unlinked=true) H_f = ForwardDiff.hessian(target, zeros(2)) H_r = ReverseDiff.hessian(target, zeros(2)) @@ -204,10 +222,9 @@ end end @testset "memoization: issue #1393" begin - @model function demo(data) sigma ~ Uniform(0.0, 20.0) - data ~ Normal(0, sigma) + return data ~ Normal(0, sigma) end N = 1000 @@ -217,7 +234,6 @@ end chn = sample(demo(data), NUTS(0.65; adtype=AutoReverseDiff(true)), 1000) @test mean(Array(chn[:sigma])) ≈ std(data) atol = 0.5 end - end @testset "ReverseDiff compiled without linking" begin @@ -225,10 +241,14 @@ end θ = DynamicPPL.getparams(f) f_rd = LogDensityProblemsAD.ADgradient(Turing.AutoReverseDiff(; compile=false), f) - f_rd_compiled = LogDensityProblemsAD.ADgradient(Turing.AutoReverseDiff(; compile=true), f) + f_rd_compiled = LogDensityProblemsAD.ADgradient( + Turing.AutoReverseDiff(; compile=true), f + ) ℓ, ℓ_grad = LogDensityProblems.logdensity_and_gradient(f_rd, θ) - ℓ_compiled, ℓ_grad_compiled = LogDensityProblems.logdensity_and_gradient(f_rd_compiled, θ) + ℓ_compiled, ℓ_grad_compiled = LogDensityProblems.logdensity_and_gradient( + f_rd_compiled, θ + ) @test ℓ == ℓ_compiled @test ℓ_grad == ℓ_grad_compiled diff --git a/test/essential/container.jl b/test/essential/container.jl index 90e56f46e..d1e1b21bd 100644 --- a/test/essential/container.jl +++ b/test/essential/container.jl @@ -1,6 +1,6 @@ module ContainerTests -import AdvancedPS +using AdvancedPS: AdvancedPS using Distributions: Bernoulli, Beta, Gamma, Normal using DynamicPPL: @model, Sampler using Test: @test, @testset @@ -14,46 +14,46 @@ using Turing 1 ~ Bernoulli(x / 2) c ~ Beta() 0 ~ Bernoulli(x / 2) - x + return x end @testset "constructor" begin - vi = DynamicPPL.VarInfo() - sampler = Sampler(PG(10)) - model = test() - trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG()) - - # Make sure we link the traces - @test haskey(trace.model.ctask.task.storage, :__trace) - - res = AdvancedPS.advance!(trace, false) - @test DynamicPPL.get_num_produce(trace.model.f.varinfo) == 1 - @test res ≈ -log(2) - - # Catch broken copy, espetially for RNG / VarInfo - newtrace = AdvancedPS.fork(trace) - res2 = AdvancedPS.advance!(trace) - @test DynamicPPL.get_num_produce(trace.model.f.varinfo) == 2 - @test DynamicPPL.get_num_produce(newtrace.model.f.varinfo) == 1 + vi = DynamicPPL.VarInfo() + sampler = Sampler(PG(10)) + model = test() + trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG()) + + # Make sure we link the traces + @test haskey(trace.model.ctask.task.storage, :__trace) + + res = AdvancedPS.advance!(trace, false) + @test DynamicPPL.get_num_produce(trace.model.f.varinfo) == 1 + @test res ≈ -log(2) + + # Catch broken copy, espetially for RNG / VarInfo + newtrace = AdvancedPS.fork(trace) + res2 = AdvancedPS.advance!(trace) + @test DynamicPPL.get_num_produce(trace.model.f.varinfo) == 2 + @test DynamicPPL.get_num_produce(newtrace.model.f.varinfo) == 1 end @testset "fork" begin - @model function normal() - a ~ Normal(0, 1) - 3 ~ Normal(a, 2) - b ~ Normal(a, 1) - 1.5 ~ Normal(b, 2) - a, b - end - vi = DynamicPPL.VarInfo() - sampler = Sampler(PG(10)) - model = normal() - - trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG()) - - newtrace = AdvancedPS.forkr(trace) - # Catch broken replay mechanism - @test AdvancedPS.advance!(trace) ≈ AdvancedPS.advance!(newtrace) + @model function normal() + a ~ Normal(0, 1) + 3 ~ Normal(a, 2) + b ~ Normal(a, 1) + 1.5 ~ Normal(b, 2) + return a, b + end + vi = DynamicPPL.VarInfo() + sampler = Sampler(PG(10)) + model = normal() + + trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG()) + + newtrace = AdvancedPS.forkr(trace) + # Catch broken replay mechanism + @test AdvancedPS.advance!(trace) ≈ AdvancedPS.advance!(newtrace) end end diff --git a/test/ext/OptimInterface.jl b/test/ext/OptimInterface.jl index 7bb72f3cc..817d7a520 100644 --- a/test/ext/OptimInterface.jl +++ b/test/ext/OptimInterface.jl @@ -3,9 +3,9 @@ module OptimInterfaceTests using ..Models: gdemo_default using Distributions.FillArrays: Zeros using LinearAlgebra: I -import Optim -import Random -import StatsBase +using Optim: Optim +using Random: Random +using StatsBase: StatsBase using StatsBase: coef, coefnames, coeftable, informationmatrix, stderror, vcov using Test: @test, @testset using Turing @@ -55,7 +55,7 @@ using Turing infomat = [2/(2 * true_values[1]^2) 0.0; 0.0 2/true_values[1]] @test all(isapprox.(infomat - informationmatrix(mle_est), 0.0, atol=0.01)) - vcovmat = [2*true_values[1]^2 / 2 0.0; 0.0 true_values[1]/2] + vcovmat = [2 * true_values[1]^2/2 0.0; 0.0 true_values[1]/2] @test all(isapprox.(vcovmat - vcov(mle_est), 0.0, atol=0.01)) ctable = coeftable(mle_est) @@ -73,33 +73,33 @@ using Turing @testset "Linear regression test" begin @model function regtest(x, y) beta ~ MvNormal(Zeros(2), I) - mu = x*beta - y ~ MvNormal(mu, I) + mu = x * beta + return y ~ MvNormal(mu, I) end - + Random.seed!(987) true_beta = [1.0, -2.2] x = rand(40, 2) - y = x*true_beta - + y = x * true_beta + model = regtest(x, y) mle = Optim.optimize(model, MLE()) - + vcmat = inv(x'x) vcmat_mle = vcov(mle).array - + @test isapprox(mle.values.array, true_beta) @test isapprox(vcmat, vcmat_mle) end @testset "Dot tilde test" begin @model function dot_gdemo(x) - s ~ InverseGamma(2,3) + s ~ InverseGamma(2, 3) m ~ Normal(0, sqrt(s)) - - (.~)(x, Normal(m, sqrt(s))) + + return (.~)(x, Normal(m, sqrt(s))) end - + model_dot = dot_gdemo([1.5, 2.0]) mle1 = Optim.optimize(gdemo_default, MLE()) @@ -123,13 +123,12 @@ using Turing for vn in DynamicPPL.TestUtils.varnames(model) for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn)) - @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05 + @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol = 0.05 end end end end - # Some of the models have one variance parameter per observation, and so # the MLE should have the variances set to 0. Since we're working in # transformed space, this corresponds to `-Inf`, which is of course not achievable. @@ -153,8 +152,8 @@ using Turing result_true = DynamicPPL.TestUtils.likelihood_optima(model) # `NelderMead` seems to struggle with convergence here, so we exclude it. - @testset "$(nameof(typeof(optimizer)))" for optimizer in [Optim.LBFGS(),] - options = Optim.Options(g_tol=1e-3, f_tol=1e-3) + @testset "$(nameof(typeof(optimizer)))" for optimizer in [Optim.LBFGS()] + options = Optim.Options(; g_tol=1e-3, f_tol=1e-3) result = Optim.optimize(model, MLE(), optimizer, options) vals = result.values @@ -163,7 +162,7 @@ using Turing if model.f in allowed_incorrect_mle @test isfinite(get(result_true, vn_leaf)) else - @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05 + @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol = 0.05 end end end @@ -175,18 +174,18 @@ using Turing @model demo_dirichlet() = x ~ Dirichlet(2 * ones(3)) model = demo_dirichlet() result = Optim.optimize(model, MAP()) - @test result.values ≈ mode(Dirichlet(2 * ones(3))) atol=0.2 + @test result.values ≈ mode(Dirichlet(2 * ones(3))) atol = 0.2 end @testset "with :=" begin @model function demo_track() x ~ Normal() - y := 100 + x + return y := 100 + x end model = demo_track() result = Optim.optimize(model, MAP()) - @test result.values[:x] ≈ 0 atol=1e-1 - @test result.values[:y] ≈ 100 atol=1e-1 + @test result.values[:x] ≈ 0 atol = 1e-1 + @test result.values[:y] ≈ 100 atol = 1e-1 end end diff --git a/test/ext/dynamichmc.jl b/test/ext/dynamichmc.jl index d525512ca..aa52093bc 100644 --- a/test/ext/dynamichmc.jl +++ b/test/ext/dynamichmc.jl @@ -4,10 +4,10 @@ using ..Models: gdemo_default using ..NumericalTests: check_gdemo using Test: @test, @testset using Distributions: sample -import DynamicHMC -import DynamicPPL +using DynamicHMC: DynamicHMC +using DynamicPPL: DynamicPPL using DynamicPPL: Sampler -import Random +using Random: Random using Turing @testset "TuringDynamicHMCExt" begin diff --git a/test/mcmc/abstractmcmc.jl b/test/mcmc/abstractmcmc.jl index 57554c0f2..10d202da3 100644 --- a/test/mcmc/abstractmcmc.jl +++ b/test/mcmc/abstractmcmc.jl @@ -1,15 +1,15 @@ module AbstractMCMCTests -import AdvancedMH +using AdvancedMH: AdvancedMH using Distributions: sample using Distributions.FillArrays: Zeros -import DynamicPPL -import ForwardDiff +using DynamicPPL: DynamicPPL +using ForwardDiff: ForwardDiff using LinearAlgebra: I -import LogDensityProblems -import LogDensityProblemsAD -import Random -import ReverseDiff +using LogDensityProblems: LogDensityProblems +using LogDensityProblemsAD: LogDensityProblemsAD +using Random: Random +using ReverseDiff: ReverseDiff using StableRNGs: StableRNG using Test: @test, @test_throws, @testset using Turing @@ -21,7 +21,9 @@ function initialize_nuts(model::Turing.Model) f = LogDensityProblemsAD.ADgradient(DynamicPPL.LogDensityFunction(model)) # Link the varinfo. - f = Turing.Inference.setvarinfo(f, DynamicPPL.link!!(Turing.Inference.getvarinfo(f), model)) + f = Turing.Inference.setvarinfo( + f, DynamicPPL.link!!(Turing.Inference.getvarinfo(f), model) + ) # Choose parameter dimensionality and initial parameter value D = LogDensityProblems.dimension(f) @@ -39,16 +41,18 @@ function initialize_nuts(model::Turing.Model) # - multinomial sampling scheme, # - generalised No-U-Turn criteria, and # - windowed adaption for step-size and diagonal mass matrix - proposal = AdvancedHMC.HMCKernel(AdvancedHMC.Trajectory{AdvancedHMC.MultinomialTS}(integrator, AdvancedHMC.GeneralisedNoUTurn())) + proposal = AdvancedHMC.HMCKernel( + AdvancedHMC.Trajectory{AdvancedHMC.MultinomialTS}( + integrator, AdvancedHMC.GeneralisedNoUTurn() + ), + ) adaptor = AdvancedHMC.StanHMCAdaptor( - AdvancedHMC.MassMatrixAdaptor(metric), - AdvancedHMC.StepSizeAdaptor(0.65, integrator) + AdvancedHMC.MassMatrixAdaptor(metric), AdvancedHMC.StepSizeAdaptor(0.65, integrator) ) return AdvancedHMC.HMCSampler(proposal, metric, adaptor) end - function initialize_mh_rw(model) f = DynamicPPL.LogDensityFunction(model) d = LogDensityProblems.dimension(f) @@ -57,17 +61,22 @@ end # TODO: Should this go somewhere else? # Convert a model into a `Distribution` to allow usage as a proposal in AdvancedMH.jl. -struct ModelDistribution{M<:DynamicPPL.Model,V<:DynamicPPL.VarInfo} <: ContinuousMultivariateDistribution +struct ModelDistribution{M<:DynamicPPL.Model,V<:DynamicPPL.VarInfo} <: + ContinuousMultivariateDistribution model::M varinfo::V end -ModelDistribution(model::DynamicPPL.Model) = ModelDistribution(model, DynamicPPL.VarInfo(model)) +function ModelDistribution(model::DynamicPPL.Model) + return ModelDistribution(model, DynamicPPL.VarInfo(model)) +end Base.length(d::ModelDistribution) = length(d.varinfo[:]) function Distributions._logpdf(d::ModelDistribution, x::AbstractVector) return logprior(d.model, DynamicPPL.unflatten(d.varinfo, x)) end -function Distributions._rand!(rng::Random.AbstractRNG, d::ModelDistribution, x::AbstractVector{<:Real}) +function Distributions._rand!( + rng::Random.AbstractRNG, d::ModelDistribution, x::AbstractVector{<:Real} +) model = d.model varinfo = deepcopy(d.varinfo) for vn in keys(varinfo) @@ -79,10 +88,14 @@ function Distributions._rand!(rng::Random.AbstractRNG, d::ModelDistribution, x:: end function initialize_mh_with_prior_proposal(model) - return AdvancedMH.MetropolisHastings(AdvancedMH.StaticProposal(ModelDistribution(model))) + return AdvancedMH.MetropolisHastings( + AdvancedMH.StaticProposal(ModelDistribution(model)) + ) end -function test_initial_params(model, sampler, initial_params=DynamicPPL.VarInfo(model)[:]; kwargs...) +function test_initial_params( + model, sampler, initial_params=DynamicPPL.VarInfo(model)[:]; kwargs... +) # Execute the transition with two different RNGs and check that the resulting # parameter values are the same. rng1 = Random.MersenneTwister(42) @@ -104,8 +117,10 @@ end @testset "$(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS # Need some functionality to initialize the sampler. # TODO: Remove this once the constructors in the respective packages become "lazy". - sampler = initialize_nuts(model); - sampler_ext = DynamicPPL.Sampler(externalsampler(sampler; adtype, unconstrained=true), model) + sampler = initialize_nuts(model) + sampler_ext = DynamicPPL.Sampler( + externalsampler(sampler; adtype, unconstrained=true), model + ) # FIXME: Once https://github.com/TuringLang/AdvancedHMC.jl/pull/366 goes through, uncomment. # @testset "initial_params" begin # test_initial_params(model, sampler_ext; n_adapts=0) @@ -119,9 +134,13 @@ end ) @testset "inference" begin - if adtype isa AutoReverseDiff && model.f === DynamicPPL.TestUtils.demo_assume_index_observe && VERSION < v"1.8" + if adtype isa AutoReverseDiff && + model.f === DynamicPPL.TestUtils.demo_assume_index_observe && + VERSION < v"1.8" # Ref: https://github.com/TuringLang/DynamicPPL.jl/issues/612 - @test_throws UndefRefError sample(model, sampler_ext, 5_000; sample_kwargs...) + @test_throws UndefRefError sample( + model, sampler_ext, 5_000; sample_kwargs... + ) else DynamicPPL.TestUtils.test_sampler( [model], @@ -140,12 +159,20 @@ end rng = Random.default_rng() model = DynamicPPL.TestUtils.DEMO_MODELS[1] sampler = initialize_nuts(model) - sampler_ext = externalsampler(sampler; unconstrained=true, adtype=AutoForwardDiff()) + sampler_ext = externalsampler( + sampler; unconstrained=true, adtype=AutoForwardDiff() + ) # Initial step. - state = last(AbstractMCMC.step(rng, model, DynamicPPL.Sampler(sampler_ext); n_adapts=0)) + state = last( + AbstractMCMC.step(rng, model, DynamicPPL.Sampler(sampler_ext); n_adapts=0) + ) @test state.logdensity isa LogDensityProblemsAD.ADGradientWrapper # Subsequent step. - state = last(AbstractMCMC.step(rng, model, DynamicPPL.Sampler(sampler_ext), state; n_adapts=0)) + state = last( + AbstractMCMC.step( + rng, model, DynamicPPL.Sampler(sampler_ext), state; n_adapts=0 + ), + ) @test state.logdensity isa LogDensityProblemsAD.ADGradientWrapper end end @@ -155,8 +182,10 @@ end @testset "$(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS # Need some functionality to initialize the sampler. # TODO: Remove this once the constructors in the respective packages become "lazy". - sampler = initialize_mh_rw(model); - sampler_ext = DynamicPPL.Sampler(externalsampler(sampler; unconstrained=true), model) + sampler = initialize_mh_rw(model) + sampler_ext = DynamicPPL.Sampler( + externalsampler(sampler; unconstrained=true), model + ) @testset "initial_params" begin test_initial_params(model, sampler_ext) end @@ -168,7 +197,7 @@ end discard_initial=1_000, thinning=10, rtol=0.2, - sampler_name="AdvancedMH" + sampler_name="AdvancedMH", ) end end diff --git a/test/mcmc/emcee.jl b/test/mcmc/emcee.jl index 04901775e..08ea16d2a 100644 --- a/test/mcmc/emcee.jl +++ b/test/mcmc/emcee.jl @@ -3,9 +3,9 @@ module EmceeTests using ..Models: gdemo_default using ..NumericalTests: check_gdemo using Distributions: sample -import DynamicPPL +using DynamicPPL: DynamicPPL using DynamicPPL: Sampler -import Random +using Random: Random using Test: @test, @test_throws, @testset using Turing @@ -15,7 +15,7 @@ using Turing n_samples = 1000 n_walkers = 250 - + spl = Emcee(n_walkers, 2.0) @test DynamicPPL.alg_str(Sampler(spl, gdemo_default)) == "Emcee" diff --git a/test/mcmc/ess.jl b/test/mcmc/ess.jl index 86029e43f..0a1c23a9e 100644 --- a/test/mcmc/ess.jl +++ b/test/mcmc/ess.jl @@ -3,23 +3,23 @@ module ESSTests using ..Models: MoGtest, MoGtest_default, gdemo, gdemo_default using ..NumericalTests: check_MoGtest_default, check_numerical using Distributions: Normal, sample -import DynamicPPL +using DynamicPPL: DynamicPPL using DynamicPPL: Sampler -import Random +using Random: Random using Test: @test, @testset using Turing @testset "ESS" begin @model function demo(x) m ~ Normal() - x ~ Normal(m, 0.5) + return x ~ Normal(m, 0.5) end demo_default = demo(1.0) @model function demodot(x) m = Vector{Float64}(undef, 2) @. m ~ Normal() - x ~ Normal(m[2], 0.5) + return x ~ Normal(m[2], 0.5) end demodot_default = demodot(1.0) @@ -45,26 +45,22 @@ using Turing @testset "ESS inference" begin Random.seed!(1) chain = sample(demo_default, ESS(), 5_000) - check_numerical(chain, [:m], [0.8], atol = 0.1) + check_numerical(chain, [:m], [0.8]; atol=0.1) Random.seed!(1) chain = sample(demodot_default, ESS(), 5_000) - check_numerical(chain, ["m[1]", "m[2]"], [0.0, 0.8], atol = 0.1) + check_numerical(chain, ["m[1]", "m[2]"], [0.0, 0.8]; atol=0.1) Random.seed!(100) - alg = Gibbs( - CSMC(15, :s), - ESS(:m)) + alg = Gibbs(CSMC(15, :s), ESS(:m)) chain = sample(gdemo(1.5, 2.0), alg, 10_000) - check_numerical(chain, [:s, :m], [49 / 24, 7 / 6], atol = 0.1) + check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1) # MoGtest Random.seed!(125) - alg = Gibbs( - CSMC(15, :z1, :z2, :z3, :z4), - ESS(:mu1), ESS(:mu2)) + alg = Gibbs(CSMC(15, :z1, :z2, :z3, :z4), ESS(:mu1), ESS(:mu2)) chain = sample(MoGtest_default, alg, 6000) - check_MoGtest_default(chain, atol = 0.1) + check_MoGtest_default(chain; atol=0.1) # Different "equivalent" models. # NOTE: Because `ESS` only supports "single" variables with @@ -72,13 +68,15 @@ using Turing # on the non-Gaussian variables in `DEMO_MODELS`. models_conditioned = map(DynamicPPL.TestUtils.DEMO_MODELS) do model # Condition on the non-Gaussian random variables. - model | (s = DynamicPPL.TestUtils.posterior_mean(model).s,) + model | (s=DynamicPPL.TestUtils.posterior_mean(model).s,) end DynamicPPL.TestUtils.test_sampler( - models_conditioned, DynamicPPL.Sampler(ESS()), 10_000; + models_conditioned, + DynamicPPL.Sampler(ESS()), + 10_000; # Filter out the varnames we've conditioned on. - varnames_filter=vn -> DynamicPPL.getsym(vn) != :s + varnames_filter=vn -> DynamicPPL.getsym(vn) != :s, ) end end diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl index 3e096dde1..4d2053c14 100644 --- a/test/mcmc/gibbs.jl +++ b/test/mcmc/gibbs.jl @@ -4,15 +4,17 @@ using ..Models: MoGtest_default, gdemo, gdemo_default using ..NumericalTests: check_MoGtest_default, check_gdemo, check_numerical using Distributions: InverseGamma, Normal using Distributions: sample -import ForwardDiff -import Random -import ReverseDiff +using ForwardDiff: ForwardDiff +using Random: Random +using ReverseDiff: ReverseDiff using Test: @test, @testset using Turing using Turing: Inference using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess -@testset "Testing gibbs.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)) +@testset "Testing gibbs.jl with $adbackend" for adbackend in ( + AutoForwardDiff(; chunksize=0), AutoReverseDiff(false) +) @testset "gibbs constructor" begin N = 500 s1 = Gibbs(HMC(0.1, 5, :s, :m; adtype=adbackend)) @@ -48,26 +50,28 @@ using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess Random.seed!(100) alg = Gibbs(CSMC(15, :s), HMC(0.2, 4, :m; adtype=adbackend)) chain = sample(gdemo(1.5, 2.0), alg, 10_000) - check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.15) + check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.15) Random.seed!(100) alg = Gibbs(MH(:s), HMC(0.2, 4, :m; adtype=adbackend)) chain = sample(gdemo(1.5, 2.0), alg, 10_000) - check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1) + check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1) alg = Gibbs(CSMC(15, :s), ESS(:m)) chain = sample(gdemo(1.5, 2.0), alg, 10_000) - check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1) + check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1) alg = CSMC(15) chain = sample(gdemo(1.5, 2.0), alg, 10_000) - check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1) + check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1) Random.seed!(200) - gibbs = Gibbs(PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2; adtype=adbackend)) + gibbs = Gibbs( + PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2; adtype=adbackend) + ) chain = sample(MoGtest_default, gibbs, 10_000) - check_MoGtest_default(chain, atol=0.15) + check_MoGtest_default(chain; atol=0.15) Random.seed!(200) for alg in [ @@ -95,42 +99,41 @@ using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess ::Turing.Sampler{<:Gibbs}, state, ::Type{MCMCChains.Chains}; - kwargs... + kwargs..., ) - samples isa Vector{<:Inference.Transition} || - error("incorrect transitions") - return + samples isa Vector{<:Inference.Transition} || error("incorrect transitions") + return nothing end function callback(rng, model, sampler, sample, state, i; kwargs...) sample isa Inference.Transition || error("incorrect sample") - return + return nothing end alg = Gibbs(MH(:s), HMC(0.2, 4, :m; adtype=adbackend)) - sample(model, alg, 100; callback = callback) + sample(model, alg, 100; callback=callback) end @testset "dynamic model" begin @model function imm(y, alpha, ::Type{M}=Vector{Float64}) where {M} N = length(y) rpm = DirichletProcess(alpha) - + z = zeros(Int, N) cluster_counts = zeros(Int, N) fill!(cluster_counts, 0) - + for i in 1:N z[i] ~ ChineseRestaurantProcess(rpm, cluster_counts) cluster_counts[z[i]] += 1 end - + Kmax = findlast(!iszero, cluster_counts) m = M(undef, Kmax) - for k = 1:Kmax + for k in 1:Kmax m[k] ~ Normal(1.0, 1.0) end end - model = imm(randn(100), 1.0); + model = imm(randn(100), 1.0) # https://github.com/TuringLang/Turing.jl/issues/1725 # sample(model, Gibbs(MH(:z), HMC(0.01, 4, :m)), 100); sample(model, Gibbs(PG(10, :z), HMC(0.01, 4, :m; adtype=adbackend)), 100) diff --git a/test/mcmc/gibbs_conditional.jl b/test/mcmc/gibbs_conditional.jl index 2abec68a4..3ba2fdbed 100644 --- a/test/mcmc/gibbs_conditional.jl +++ b/test/mcmc/gibbs_conditional.jl @@ -2,20 +2,23 @@ module GibbsConditionalTests using ..Models: gdemo, gdemo_default using ..NumericalTests: check_gdemo, check_numerical -import Clustering +using Clustering: Clustering using Distributions: Categorical, InverseGamma, Normal, sample -import ForwardDiff +using ForwardDiff: ForwardDiff using LinearAlgebra: Diagonal, I -import Random -import ReverseDiff +using Random: Random +using ReverseDiff: ReverseDiff using StableRNGs: StableRNG using StatsBase: counts -import StatsFuns +using StatsFuns: StatsFuns using Test: @test, @testset using Turing -@testset "Testing gibbs conditionals.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)) - Random.seed!(1000); rng = StableRNG(123) +@testset "Testing gibbs conditionals.jl with $adbackend" for adbackend in ( + AutoForwardDiff(; chunksize=0), AutoReverseDiff(false) +) + Random.seed!(1000) + rng = StableRNG(123) @testset "gdemo" begin # We consider the model @@ -36,7 +39,7 @@ using Turing # ```math # m | s, x ~ Normal(m_n, sqrt(s / (N + 1))) # ``` - cond_m = let N=N, m_n=m_n + cond_m = let N = N, m_n = m_n c -> Normal(m_n, sqrt(c.s / (N + 1))) end @@ -45,29 +48,31 @@ using Turing # s | m, x ~ InverseGamma(2 + (N + 1) / 2, 3 + (m^2 + ∑ (xᵢ - m)^2) / 2) = # InverseGamma(2 + (N + 1) / 2, 3 + m^2 / 2 + N / 2 * (x_var + (x_mean - m)^2)) # ``` - cond_s = let N=N, x_mean=x_mean, x_var=x_var - c -> InverseGamma(2 + (N + 1) / 2, 3 + c.m^2 / 2 + N / 2 * (x_var + (x_mean - c.m)^2)) + cond_s = let N = N, x_mean = x_mean, x_var = x_var + c -> InverseGamma( + 2 + (N + 1) / 2, 3 + c.m^2 / 2 + N / 2 * (x_var + (x_mean - c.m)^2) + ) end # Three Gibbs samplers: # one for each variable fixed to the posterior mean - s_posterior_mean = 49/24 + s_posterior_mean = 49 / 24 sampler1 = Gibbs( GibbsConditional(:m, cond_m), GibbsConditional(:s, _ -> Normal(s_posterior_mean, 0)), ) chain = sample(rng, gdemo_default, sampler1, 10_000) - cond_m_mean = mean(cond_m((s = s_posterior_mean,))) + cond_m_mean = mean(cond_m((s=s_posterior_mean,))) check_numerical(chain, [:m, :s], [cond_m_mean, s_posterior_mean]) @test all(==(s_posterior_mean), chain[:s][2:end]) - m_posterior_mean = 7/6 + m_posterior_mean = 7 / 6 sampler2 = Gibbs( GibbsConditional(:m, _ -> Normal(m_posterior_mean, 0)), GibbsConditional(:s, cond_s), ) chain = sample(rng, gdemo_default, sampler2, 10_000) - cond_s_mean = mean(cond_s((m = m_posterior_mean,))) + cond_s_mean = mean(cond_s((m=m_posterior_mean,))) check_numerical(chain, [:m, :s], [m_posterior_mean, cond_s_mean]) @test all(==(m_posterior_mean), chain[:m][2:end]) @@ -78,7 +83,8 @@ using Turing end @testset "GMM" begin - Random.seed!(1000); rng = StableRNG(123) + Random.seed!(1000) + rng = StableRNG(123) # We consider the model # ```math # μₖ ~ Normal(m, σ_μ), k = 1, …, K, @@ -87,7 +93,7 @@ using Turing # ``` # with ``K = 2`` clusters, ``N = 20`` observations, and the following parameters: K = 2 # number of clusters - π = fill(1/K, K) # uniform cluster weights + π = fill(1 / K, K) # uniform cluster weights m = 0.5 # prior mean of μₖ σ²_μ = 4.0 # prior variance of μₖ σ²_x = 0.01 # observation variance @@ -108,7 +114,7 @@ using Turing # Conditional distribution ``z | μ, x`` # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf - cond_z = let x=x_data, log_π=log.(π), σ_x=sqrt(σ²_x) + cond_z = let x = x_data, log_π = log.(π), σ_x = sqrt(σ²_x) c -> begin dists = map(x) do xi logp = log_π .+ logpdf.(Normal.(c.μ, σ_x), xi) @@ -120,7 +126,7 @@ using Turing # Conditional distribution ``μ | z, x`` # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf - cond_μ = let K=K, x_data=x_data, inv_σ²_μ=inv(σ²_μ), inv_σ²_x=inv(σ²_x) + cond_μ = let K = K, x_data = x_data, inv_σ²_μ = inv(σ²_μ), inv_σ²_x = inv(σ²_x) c -> begin # Convert cluster assignments to one-hot encodings z_onehot = c.z .== (1:K)' @@ -136,10 +142,10 @@ using Turing end end - estimate(chain, var) = dropdims(mean(Array(group(chain, var)), dims=1), dims=1) + estimate(chain, var) = dropdims(mean(Array(group(chain, var)); dims=1); dims=1) function estimatez(chain, var, range) z = Int.(Array(group(chain, var))) - return map(i -> findmax(counts(z[:,i], range))[2], 1:size(z,2)) + return map(i -> findmax(counts(z[:, i], range))[2], 1:size(z, 2)) end lμ_data, uμ_data = extrema(μ_data) diff --git a/test/mcmc/is.jl b/test/mcmc/is.jl index b2e26bba7..bd3186cd9 100644 --- a/test/mcmc/is.jl +++ b/test/mcmc/is.jl @@ -2,7 +2,7 @@ module ISTests using Distributions: Normal, sample using DynamicPPL: logpdf -import Random +using Random: Random using StatsFuns: logsumexp using Test: @test, @testset using Turing @@ -18,22 +18,22 @@ using Turing end logevidence = logsumexp(logps) - log(n) - return (as = as, bs = bs, logps = logps, logevidence = logevidence) + return (as=as, bs=bs, logps=logps, logevidence=logevidence) end function reference() - x = rand(Normal(4,5)) - y = rand(Normal(x,1)) - loglik = logpdf(Normal(x,2), 3) + logpdf(Normal(y,2), 1.5) + x = rand(Normal(4, 5)) + y = rand(Normal(x, 1)) + loglik = logpdf(Normal(x, 2), 3) + logpdf(Normal(y, 2), 1.5) return x, y, loglik end @model function normal() - a ~ Normal(4,5) - 3 ~ Normal(a,2) - b ~ Normal(a,1) - 1.5 ~ Normal(b,2) - a, b + a ~ Normal(4, 5) + 3 ~ Normal(a, 2) + b ~ Normal(a, 1) + 1.5 ~ Normal(b, 2) + return a, b end alg = IS() @@ -65,13 +65,13 @@ using Turing 1 ~ Bernoulli(x / 2) c ~ Beta() 0 ~ Bernoulli(x / 2) - x + return x end chains = sample(test(), IS(), 10000) @test all(isone, chains[:x]) - @test chains.logevidence ≈ - 2 * log(2) + @test chains.logevidence ≈ -2 * log(2) end end diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl index 3b15d8069..0e3cc91f6 100644 --- a/test/mcmc/mh.jl +++ b/test/mcmc/mh.jl @@ -1,12 +1,12 @@ module MHTests -import AdvancedMH -using Distributions: Bernoulli, Dirichlet, Exponential, InverseGamma, LogNormal, MvNormal, - Normal, sample -import DynamicPPL +using AdvancedMH: AdvancedMH +using Distributions: + Bernoulli, Dirichlet, Exponential, InverseGamma, LogNormal, MvNormal, Normal, sample +using DynamicPPL: DynamicPPL using DynamicPPL: Sampler using LinearAlgebra: I -import Random +using Random: Random using StableRNGs: StableRNG using Test: @test, @testset using Turing @@ -21,9 +21,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) @testset "mh constructor" begin Random.seed!(10) N = 500 - s1 = MH( - (:s, InverseGamma(2,3)), - (:m, GKernel(3.0))) + s1 = MH((:s, InverseGamma(2, 3)), (:m, GKernel(3.0))) s2 = MH(:s, :m) s3 = MH() for s in (s1, s2, s3) @@ -49,30 +47,27 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) Random.seed!(125) alg = MH() chain = sample(gdemo_default, alg, 10_000) - check_gdemo(chain, atol = 0.1) + check_gdemo(chain; atol=0.1) Random.seed!(125) # MH with Gaussian proposal - alg = MH( - (:s, InverseGamma(2,3)), - (:m, GKernel(1.0))) + alg = MH((:s, InverseGamma(2, 3)), (:m, GKernel(1.0))) chain = sample(gdemo_default, alg, 10_000) - check_gdemo(chain, atol = 0.1) + check_gdemo(chain; atol=0.1) Random.seed!(125) # MH within Gibbs alg = Gibbs(MH(:m), MH(:s)) chain = sample(gdemo_default, alg, 10_000) - check_gdemo(chain, atol = 0.1) + check_gdemo(chain; atol=0.1) Random.seed!(125) # MoGtest gibbs = Gibbs( - CSMC(15, :z1, :z2, :z3, :z4), - MH((:mu1,GKernel(1)), (:mu2,GKernel(1))) + CSMC(15, :z1, :z2, :z3, :z4), MH((:mu1, GKernel(1)), (:mu2, GKernel(1))) ) chain = sample(MoGtest_default, gibbs, 500) - check_MoGtest_default(chain, atol = 0.15) + check_MoGtest_default(chain; atol=0.15) end # Test MH shape passing. @@ -91,7 +86,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) -1.5 ~ Normal(m[1], m[2]) 1.5 ~ Normal(m[1], s) - 2.0 ~ Normal(m[1], s) + return 2.0 ~ Normal(m[1], s) end model = M(zeros(2), I, 1) @@ -100,7 +95,8 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) dt, vt = Inference.dist_val_tuple(sampler, Turing.VarInfo(model)) @test dt[:z] isa AdvancedMH.StaticProposal{false,<:MvNormal} - @test dt[:m] isa AdvancedMH.StaticProposal{false,Vector{ContinuousUnivariateDistribution}} + @test dt[:m] isa + AdvancedMH.StaticProposal{false,Vector{ContinuousUnivariateDistribution}} @test dt[:m].proposal[1] isa Normal && dt[:m].proposal[2] isa InverseGamma @test dt[:s] isa AdvancedMH.StaticProposal{false,<:InverseGamma} @@ -115,7 +111,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) @testset "proposal matrix" begin Random.seed!(100) - + mat = [1.0 -0.05; -0.05 1.0] prop1 = mat # Matrix only constructor @@ -142,42 +138,37 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) # generate data x = rand(Normal(5, 10), 20) y = rand(LogNormal(-3, 2), 20) - + # Turing model @model function twomeans(x, y) # Set Priors μ ~ MvNormal(zeros(2), 9 * I) σ ~ filldist(Exponential(1), 2) - + # Distributions of supplied data x .~ Normal(μ[1], σ[1]) - y .~ LogNormal(μ[2], σ[2]) - + return y .~ LogNormal(μ[2], σ[2]) end mod = twomeans(x, y) - + # generate covariance matrix for RWMH # with small-valued VC matrix to check if we only see very small steps - vc_μ = convert(Array, 1e-4*I(2)) - vc_σ = convert(Array, 1e-4*I(2)) + vc_μ = convert(Array, 1e-4 * I(2)) + vc_σ = convert(Array, 1e-4 * I(2)) - alg = Gibbs( - MH((:μ, vc_μ)), - MH((:σ, vc_σ)), - ) + alg = Gibbs(MH((:μ, vc_μ)), MH((:σ, vc_σ))) chn = sample( mod, alg, - 3_000 # draws + 3_000, # draws ) - - + chn2 = sample(mod, MH(), 3_000) # Test that the small variance version is actually smaller. - v1 = var(diff(Array(chn["μ[1]"]), dims=1)) - v2 = var(diff(Array(chn2["μ[1]"]), dims=1)) + v1 = var(diff(Array(chn["μ[1]"]); dims=1)) + v2 = var(diff(Array(chn2["μ[1]"]); dims=1)) # FIXME: Do this properly. It sometimes fails. # @test v1 < v2 @@ -241,7 +232,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) vi = deepcopy(vi_base) alg = MH( :m => AdvancedMH.StaticProposal(Normal()), - :s => AdvancedMH.RandomWalkProposal(Normal()) + :s => AdvancedMH.RandomWalkProposal(Normal()), ) spl = DynamicPPL.Sampler(alg) vi = Turing.Inference.maybe_link!!(vi, spl, alg.proposals, gdemo_default) @@ -253,21 +244,27 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var)) # find a non-trivial `atol` where the tests will pass for all seeds. Hence we fix it :/ rng = StableRNG(10) alg = MH() - gdemo_default_prior = DynamicPPL.contextualize(gdemo_default, DynamicPPL.PriorContext()) + gdemo_default_prior = DynamicPPL.contextualize( + gdemo_default, DynamicPPL.PriorContext() + ) burnin = 10_000 n = 10_000 - chain = sample(rng, gdemo_default_prior, alg, n; discard_initial = burnin, thinning=10) - check_numerical(chain, [:s, :m], [mean(InverseGamma(2, 3)), 0], atol=0.3) + chain = sample( + rng, gdemo_default_prior, alg, n; discard_initial=burnin, thinning=10 + ) + check_numerical(chain, [:s, :m], [mean(InverseGamma(2, 3)), 0]; atol=0.3) end @testset "`filldist` proposal (issue #2180)" begin @model demo_filldist_issue2180() = x ~ MvNormal(zeros(3), I) chain = sample( - demo_filldist_issue2180(), - MH(AdvancedMH.RandomWalkProposal(filldist(Normal(), 3))), - 10_000 + demo_filldist_issue2180(), + MH(AdvancedMH.RandomWalkProposal(filldist(Normal(), 3))), + 10_000, + ) + check_numerical( + chain, [Symbol("x[1]"), Symbol("x[2]"), Symbol("x[3]")], [0, 0, 0]; atol=0.2 ) - check_numerical(chain, [Symbol("x[1]"), Symbol("x[2]"), Symbol("x[3]")], [0, 0, 0], atol=0.2) end end diff --git a/test/mcmc/particle_mcmc.jl b/test/mcmc/particle_mcmc.jl index 01510f2a8..2e3744fef 100644 --- a/test/mcmc/particle_mcmc.jl +++ b/test/mcmc/particle_mcmc.jl @@ -5,7 +5,7 @@ using ..Models: gdemo_default using AdvancedPS: ResampleWithESSThreshold, resample_systematic, resample_multinomial using Distributions: Bernoulli, Beta, Gamma, Normal, sample using DynamicPPL: getspace -import Random +using Random: Random using Test: @test, @test_throws, @testset using Turing @@ -58,24 +58,24 @@ using Turing @testset "models" begin @model function normal() - a ~ Normal(4,5) - 3 ~ Normal(a,2) - b ~ Normal(a,1) - 1.5 ~ Normal(b,2) - a, b + a ~ Normal(4, 5) + 3 ~ Normal(a, 2) + b ~ Normal(a, 1) + 1.5 ~ Normal(b, 2) + return a, b end - tested = sample(normal(), SMC(), 100); + tested = sample(normal(), SMC(), 100) # failing test @model function fail_smc() - a ~ Normal(4,5) - 3 ~ Normal(a,2) - b ~ Normal(a,1) + a ~ Normal(4, 5) + 3 ~ Normal(a, 2) + b ~ Normal(a, 1) if a >= 4.0 - 1.5 ~ Normal(b,2) + 1.5 ~ Normal(b, 2) end - a, b + return a, b end @test_throws ErrorException sample(fail_smc(), SMC(), 100) @@ -91,7 +91,7 @@ using Turing 1 ~ Bernoulli(x / 2) c ~ Beta() 0 ~ Bernoulli(x / 2) - x + return x end chains_smc = sample(test(), SMC(), 100) @@ -169,7 +169,7 @@ end 1 ~ Bernoulli(x / 2) c ~ Beta() 0 ~ Bernoulli(x / 2) - x + return x end chains_pg = sample(test(), PG(10), 100) @@ -187,7 +187,7 @@ end # https://github.com/TuringLang/Turing.jl/issues/2007 @testset "keyword arguments not supported" begin - @model kwarg_demo(; x = 2) = return x + @model kwarg_demo(; x=2) = return x @test_throws ErrorException sample(kwarg_demo(), PG(1), 10) end end diff --git a/test/mcmc/utilities.jl b/test/mcmc/utilities.jl index 2b5e04b2d..06e92fbf2 100644 --- a/test/mcmc/utilities.jl +++ b/test/mcmc/utilities.jl @@ -3,7 +3,7 @@ module MCMCUtilitiesTests using ..Models: gdemo_default using Distributions: Normal, sample, truncated using LinearAlgebra: I, vec -import Random +using Random: Random using Random: MersenneTwister using Test: @test, @testset using Turing @@ -11,34 +11,36 @@ using Turing @testset "predict" begin Random.seed!(100) - @model function linear_reg(x, y, σ = 0.1) + @model function linear_reg(x, y, σ=0.1) β ~ Normal(0, 1) - for i ∈ eachindex(y) + for i in eachindex(y) y[i] ~ Normal(β * x[i], σ) end end - @model function linear_reg_vec(x, y, σ = 0.1) + @model function linear_reg_vec(x, y, σ=0.1) β ~ Normal(0, 1) - y ~ MvNormal(β .* x, σ^2 * I) + return y ~ MvNormal(β .* x, σ^2 * I) end f(x) = 2 * x + 0.1 * randn() Δ = 0.1 - xs_train = 0:Δ:10; ys_train = f.(xs_train); - xs_test = [10 + Δ, 10 + 2 * Δ]; ys_test = f.(xs_test); + xs_train = 0:Δ:10 + ys_train = f.(xs_train) + xs_test = [10 + Δ, 10 + 2 * Δ] + ys_test = f.(xs_test) # Infer - m_lin_reg = linear_reg(xs_train, ys_train); - chain_lin_reg = sample(m_lin_reg, NUTS(100, 0.65), 200); + m_lin_reg = linear_reg(xs_train, ys_train) + chain_lin_reg = sample(m_lin_reg, NUTS(100, 0.65), 200) # Predict on two last indices - m_lin_reg_test = linear_reg(xs_test, fill(missing, length(ys_test))); + m_lin_reg_test = linear_reg(xs_test, fill(missing, length(ys_test))) predictions = Turing.Inference.predict(m_lin_reg_test, chain_lin_reg) - ys_pred = vec(mean(Array(group(predictions, :y)); dims = 1)) + ys_pred = vec(mean(Array(group(predictions, :y)); dims=1)) @test sum(abs2, ys_test - ys_pred) ≤ 0.1 @@ -52,91 +54,88 @@ using Turing @test all(Array(predictions1) .== Array(predictions2)) # Predict on two last indices for vectorized - m_lin_reg_test = linear_reg_vec(xs_test, missing); + m_lin_reg_test = linear_reg_vec(xs_test, missing) predictions_vec = Turing.Inference.predict(m_lin_reg_test, chain_lin_reg) - ys_pred_vec = vec(mean(Array(group(predictions_vec, :y)); dims = 1)) + ys_pred_vec = vec(mean(Array(group(predictions_vec, :y)); dims=1)) @test sum(abs2, ys_test - ys_pred_vec) ≤ 0.1 # Multiple chains - chain_lin_reg = sample(m_lin_reg, NUTS(100, 0.65), MCMCThreads(), 200, 2); - m_lin_reg_test = linear_reg(xs_test, fill(missing, length(ys_test))); + chain_lin_reg = sample(m_lin_reg, NUTS(100, 0.65), MCMCThreads(), 200, 2) + m_lin_reg_test = linear_reg(xs_test, fill(missing, length(ys_test))) predictions = Turing.Inference.predict(m_lin_reg_test, chain_lin_reg) @test size(chain_lin_reg, 3) == size(predictions, 3) for chain_idx in MCMCChains.chains(chain_lin_reg) - ys_pred = vec(mean(Array(group(predictions[:, :, chain_idx], :y)); dims = 1)) + ys_pred = vec(mean(Array(group(predictions[:, :, chain_idx], :y)); dims=1)) @test sum(abs2, ys_test - ys_pred) ≤ 0.1 end # Predict on two last indices for vectorized - m_lin_reg_test = linear_reg_vec(xs_test, missing); + m_lin_reg_test = linear_reg_vec(xs_test, missing) predictions_vec = Turing.Inference.predict(m_lin_reg_test, chain_lin_reg) for chain_idx in MCMCChains.chains(chain_lin_reg) - ys_pred_vec = vec(mean( - Array(group(predictions_vec[:, :, chain_idx], :y)); - dims = 1 - )) + ys_pred_vec = vec(mean(Array(group(predictions_vec[:, :, chain_idx], :y)); dims=1)) @test sum(abs2, ys_test - ys_pred_vec) ≤ 0.1 end # https://github.com/TuringLang/Turing.jl/issues/1352 @model function simple_linear1(x, y) - intercept ~ Normal(0,1) + intercept ~ Normal(0, 1) coef ~ MvNormal(zeros(2), I) - coef = reshape(coef, 1, size(x,1)) + coef = reshape(coef, 1, size(x, 1)) - mu = intercept .+ coef * x |> vec - error ~ truncated(Normal(0,1), 0, Inf) - y ~ MvNormal(mu, error^2 * I) - end; + mu = vec(intercept .+ coef * x) + error ~ truncated(Normal(0, 1), 0, Inf) + return y ~ MvNormal(mu, error^2 * I) + end @model function simple_linear2(x, y) - intercept ~ Normal(0,1) - coef ~ filldist(Normal(0,1), 2) - coef = reshape(coef, 1, size(x,1)) + intercept ~ Normal(0, 1) + coef ~ filldist(Normal(0, 1), 2) + coef = reshape(coef, 1, size(x, 1)) - mu = intercept .+ coef * x |> vec - error ~ truncated(Normal(0,1), 0, Inf) - y ~ MvNormal(mu, error^2 * I) - end; + mu = vec(intercept .+ coef * x) + error ~ truncated(Normal(0, 1), 0, Inf) + return y ~ MvNormal(mu, error^2 * I) + end @model function simple_linear3(x, y) - intercept ~ Normal(0,1) + intercept ~ Normal(0, 1) coef = Vector(undef, 2) for i in axes(coef, 1) - coef[i] ~ Normal(0,1) + coef[i] ~ Normal(0, 1) end - coef = reshape(coef, 1, size(x,1)) + coef = reshape(coef, 1, size(x, 1)) - mu = intercept .+ coef * x |> vec - error ~ truncated(Normal(0,1), 0, Inf) - y ~ MvNormal(mu, error^2 * I) - end; + mu = vec(intercept .+ coef * x) + error ~ truncated(Normal(0, 1), 0, Inf) + return y ~ MvNormal(mu, error^2 * I) + end @model function simple_linear4(x, y) - intercept ~ Normal(0,1) - coef1 ~ Normal(0,1) - coef2 ~ Normal(0,1) + intercept ~ Normal(0, 1) + coef1 ~ Normal(0, 1) + coef2 ~ Normal(0, 1) coef = [coef1, coef2] - coef = reshape(coef, 1, size(x,1)) + coef = reshape(coef, 1, size(x, 1)) - mu = intercept .+ coef * x |> vec - error ~ truncated(Normal(0,1), 0, Inf) - y ~ MvNormal(mu, error^2 * I) - end; + mu = vec(intercept .+ coef * x) + error ~ truncated(Normal(0, 1), 0, Inf) + return y ~ MvNormal(mu, error^2 * I) + end # Some data - x = randn(2, 100); - y = [1 + 2 * a + 3 * b for (a,b) in eachcol(x)]; + x = randn(2, 100) + y = [1 + 2 * a + 3 * b for (a, b) in eachcol(x)] for model in [simple_linear1, simple_linear2, simple_linear3, simple_linear4] - m = model(x, y); - chain = sample(m, NUTS(), 100); - chain_predict = predict(model(x, missing), chain); - mean_prediction = [chain_predict["y[$i]"].data |> mean for i = 1:length(y)] + m = model(x, y) + chain = sample(m, NUTS(), 100) + chain_predict = predict(model(x, missing), chain) + mean_prediction = [mean(chain_predict["y[$i]"].data) for i in 1:length(y)] @test mean(abs2, mean_prediction - y) ≤ 1e-3 end end diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl index 6c4289f78..5e6144e57 100644 --- a/test/optimisation/Optimisation.jl +++ b/test/optimisation/Optimisation.jl @@ -3,15 +3,15 @@ module OptimisationTests using ..Models: gdemo, gdemo_default using Distributions using Distributions.FillArrays: Zeros -import DynamicPPL +using DynamicPPL: DynamicPPL using LinearAlgebra: I -import Random +using Random: Random using Optimization using Optimization: Optimization using OptimizationBBO: OptimizationBBO using OptimizationNLopt: OptimizationNLopt using OptimizationOptimJL: OptimizationOptimJL -import StatsBase +using StatsBase: StatsBase using StatsBase: coef, coefnames, coeftable, informationmatrix, stderror, vcov using Test: @test, @testset, @test_throws using Turing @@ -32,11 +32,8 @@ using Turing end DynamicPPL.NodeTrait(::OverrideContext) = DynamicPPL.IsParent() DynamicPPL.childcontext(parent::OverrideContext) = parent.context - DynamicPPL.setchildcontext(parent::OverrideContext, child) = OverrideContext( - child, - parent.logprior_weight, - parent.loglikelihood_weight - ) + DynamicPPL.setchildcontext(parent::OverrideContext, child) = + OverrideContext(child, parent.logprior_weight, parent.loglikelihood_weight) # Only implement what we need for the models above. function DynamicPPL.tilde_assume(context::OverrideContext, right, vn, vi) @@ -50,12 +47,12 @@ using Turing @model function model1(x) μ ~ Uniform(0, 2) - x ~ LogNormal(μ, 1) + return x ~ LogNormal(μ, 1) end @model function model2() μ ~ Uniform(0, 2) - x ~ LogNormal(μ, 1) + return x ~ LogNormal(μ, 1) end x = 1.0 @@ -66,7 +63,7 @@ using Turing m2 = model2() | (x=x,) ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) == - Turing.Optimisation.OptimLogDensity(m2, ctx)(w) + Turing.Optimisation.OptimLogDensity(m2, ctx)(w) end @testset "With prefixes" begin @@ -79,21 +76,20 @@ using Turing m2 = prefix_μ(model2() | (var"inner.x"=x,)) ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext()) @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) == - Turing.Optimisation.OptimLogDensity(m2, ctx)(w) + Turing.Optimisation.OptimLogDensity(m2, ctx)(w) end @testset "Weighted" begin function override(model) return DynamicPPL.contextualize( - model, - OverrideContext(model.context, 100, 1) + model, OverrideContext(model.context, 100, 1) ) end m1 = override(model1(x)) m2 = override(model2() | (x=x,)) ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.DefaultContext()) @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) == - Turing.Optimisation.OptimLogDensity(m2, ctx)(w) + Turing.Optimisation.OptimLogDensity(m2, ctx)(w) end @testset "Default, Likelihood, Prior Contexts" begin @@ -105,16 +101,17 @@ using Turing @test Turing.Optimisation.OptimLogDensity(m1, defctx)(a) == Turing.Optimisation.OptimLogDensity(m1, llhctx)(a) + - Turing.Optimisation.OptimLogDensity(m1, prictx)(a) + Turing.Optimisation.OptimLogDensity(m1, prictx)(a) # test that PriorContext is calculating the right thing - @test Turing.Optimisation.OptimLogDensity(m1, prictx)([0.3]) ≈ -Distributions.logpdf(Uniform(0, 2), 0.3) - @test Turing.Optimisation.OptimLogDensity(m1, prictx)([-0.3]) ≈ -Distributions.logpdf(Uniform(0, 2), -0.3) + @test Turing.Optimisation.OptimLogDensity(m1, prictx)([0.3]) ≈ + -Distributions.logpdf(Uniform(0, 2), 0.3) + @test Turing.Optimisation.OptimLogDensity(m1, prictx)([-0.3]) ≈ + -Distributions.logpdf(Uniform(0, 2), -0.3) end end @testset "gdemo" begin - """ check_success(result, true_value, true_logp, check_retcode=true) @@ -137,15 +134,11 @@ using Turing true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) check_success(result) = check_optimisation_result(result, true_value, true_logp) - m1 = Turing.Optimisation.estimate_mode( - gdemo_default, MLE() - ) + m1 = Turing.Optimisation.estimate_mode(gdemo_default, MLE()) m2 = maximum_likelihood( gdemo_default, OptimizationOptimJL.LBFGS(); initial_params=true_value ) - m3 = maximum_likelihood( - gdemo_default, OptimizationOptimJL.Newton() - ) + m3 = maximum_likelihood(gdemo_default, OptimizationOptimJL.Newton()) # TODO(mhauru) How can we check that the adtype is actually AutoReverseDiff? m4 = maximum_likelihood( gdemo_default, OptimizationOptimJL.BFGS(); adtype=AutoReverseDiff() @@ -153,9 +146,7 @@ using Turing m5 = maximum_likelihood( gdemo_default, OptimizationOptimJL.NelderMead(); initial_params=true_value ) - m6 = maximum_likelihood( - gdemo_default, OptimizationOptimJL.NelderMead() - ) + m6 = maximum_likelihood(gdemo_default, OptimizationOptimJL.NelderMead()) check_success(m1) check_success(m2) @@ -182,15 +173,11 @@ using Turing true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) check_success(result) = check_optimisation_result(result, true_value, true_logp) - m1 = Turing.Optimisation.estimate_mode( - gdemo_default, MAP() - ) + m1 = Turing.Optimisation.estimate_mode(gdemo_default, MAP()) m2 = maximum_a_posteriori( gdemo_default, OptimizationOptimJL.LBFGS(); initial_params=true_value ) - m3 = maximum_a_posteriori( - gdemo_default, OptimizationOptimJL.Newton() - ) + m3 = maximum_a_posteriori(gdemo_default, OptimizationOptimJL.Newton()) m4 = maximum_a_posteriori( gdemo_default, OptimizationOptimJL.BFGS(); adtype=AutoReverseDiff() ) @@ -222,34 +209,41 @@ using Turing Random.seed!(222) true_value = [0.0625, 1.75] true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) - check_success(result, check_retcode=true) = check_optimisation_result( - result, true_value, true_logp, check_retcode - ) + check_success(result, check_retcode=true) = + check_optimisation_result(result, true_value, true_logp, check_retcode) lb = [0.0, 0.0] ub = [2.0, 2.0] - m1 = Turing.Optimisation.estimate_mode( - gdemo_default, MLE(); lb=lb, ub=ub - ) + m1 = Turing.Optimisation.estimate_mode(gdemo_default, MLE(); lb=lb, ub=ub) m2 = maximum_likelihood( gdemo_default, OptimizationOptimJL.Fminbox(OptimizationOptimJL.LBFGS()); - initial_params=true_value, lb=lb, ub=ub) + initial_params=true_value, + lb=lb, + ub=ub, + ) m3 = maximum_likelihood( gdemo_default, OptimizationBBO.BBO_separable_nes(); - maxiters=100_000, abstol=1e-5, lb=lb, ub=ub + maxiters=100_000, + abstol=1e-5, + lb=lb, + ub=ub, ) m4 = maximum_likelihood( gdemo_default, OptimizationOptimJL.Fminbox(OptimizationOptimJL.BFGS()); - adtype=AutoReverseDiff(), lb=lb, ub=ub + adtype=AutoReverseDiff(), + lb=lb, + ub=ub, ) m5 = maximum_likelihood( gdemo_default, OptimizationOptimJL.IPNewton(); - initial_params=true_value, lb=lb, ub=ub + initial_params=true_value, + lb=lb, + ub=ub, ) m6 = maximum_likelihood(gdemo_default; lb=lb, ub=ub) @@ -275,35 +269,41 @@ using Turing Random.seed!(222) true_value = [49 / 54, 7 / 6] true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) - check_success(result, check_retcode=true) = check_optimisation_result( - result, true_value, true_logp, check_retcode - ) + check_success(result, check_retcode=true) = + check_optimisation_result(result, true_value, true_logp, check_retcode) lb = [0.0, 0.0] ub = [2.0, 2.0] - m1 = Turing.Optimisation.estimate_mode( - gdemo_default, MAP(); lb=lb, ub=ub - ) + m1 = Turing.Optimisation.estimate_mode(gdemo_default, MAP(); lb=lb, ub=ub) m2 = maximum_a_posteriori( gdemo_default, OptimizationOptimJL.Fminbox(OptimizationOptimJL.LBFGS()); - initial_params=true_value, lb=lb, ub=ub + initial_params=true_value, + lb=lb, + ub=ub, ) m3 = maximum_a_posteriori( gdemo_default, OptimizationBBO.BBO_separable_nes(); - maxiters=100_000, abstol=1e-5, lb=lb, ub=ub + maxiters=100_000, + abstol=1e-5, + lb=lb, + ub=ub, ) m4 = maximum_a_posteriori( gdemo_default, OptimizationOptimJL.Fminbox(OptimizationOptimJL.BFGS()); - adtype=AutoReverseDiff(), lb=lb, ub=ub + adtype=AutoReverseDiff(), + lb=lb, + ub=ub, ) m5 = maximum_a_posteriori( gdemo_default, OptimizationOptimJL.IPNewton(); - initial_params=true_value, lb=lb, ub=ub + initial_params=true_value, + lb=lb, + ub=ub, ) m6 = maximum_a_posteriori(gdemo_default; lb=lb, ub=ub) @@ -330,9 +330,8 @@ using Turing Random.seed!(222) true_value = [0.0625, 1.75] true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2])) - check_success(result, check_retcode=true) = check_optimisation_result( - result, true_value, true_logp, check_retcode - ) + check_success(result, check_retcode=true) = + check_optimisation_result(result, true_value, true_logp, check_retcode) # Set two constraints: The first parameter must be non-negative, and the L2 norm # of the parameters must be between 0.5 and 2. @@ -347,16 +346,20 @@ using Turing ) m2 = maximum_likelihood(gdemo_default; initial_params=true_value, cons_args...) m3 = maximum_likelihood( - gdemo_default, OptimizationOptimJL.IPNewton(); - initial_params=initial_params, cons_args... + gdemo_default, + OptimizationOptimJL.IPNewton(); + initial_params=initial_params, + cons_args..., ) m4 = maximum_likelihood( - gdemo_default, OptimizationOptimJL.IPNewton(); - initial_params=initial_params, adtype=AutoReverseDiff(), cons_args... + gdemo_default, + OptimizationOptimJL.IPNewton(); + initial_params=initial_params, + adtype=AutoReverseDiff(), + cons_args..., ) m5 = maximum_likelihood( - gdemo_default; - initial_params=initial_params, cons_args... + gdemo_default; initial_params=initial_params, cons_args... ) check_success(m1) @@ -380,9 +383,8 @@ using Turing Random.seed!(222) true_value = [49 / 54, 7 / 6] true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2])) - check_success(result, check_retcode=true) = check_optimisation_result( - result, true_value, true_logp, check_retcode - ) + check_success(result, check_retcode=true) = + check_optimisation_result(result, true_value, true_logp, check_retcode) # Set two constraints: The first parameter must be non-negative, and the L2 norm # of the parameters must be between 0.5 and 2. @@ -399,16 +401,20 @@ using Turing gdemo_default; initial_params=true_value, cons_args... ) m3 = maximum_a_posteriori( - gdemo_default, OptimizationOptimJL.IPNewton(); - initial_params=initial_params, cons_args... + gdemo_default, + OptimizationOptimJL.IPNewton(); + initial_params=initial_params, + cons_args..., ) m4 = maximum_a_posteriori( - gdemo_default, OptimizationOptimJL.IPNewton(); - initial_params=initial_params, adtype=AutoReverseDiff(), cons_args... + gdemo_default, + OptimizationOptimJL.IPNewton(); + initial_params=initial_params, + adtype=AutoReverseDiff(), + cons_args..., ) m5 = maximum_a_posteriori( - gdemo_default; - initial_params=initial_params, cons_args... + gdemo_default; initial_params=initial_params, cons_args... ) check_success(m1) @@ -440,10 +446,10 @@ using Turing diffs = coef(mle_est).array - [0.0625031; 1.75001] @test all(isapprox.(diffs, 0.0, atol=0.1)) - infomat = [2/(2*true_values[1]^2) 0.0; 0.0 2/true_values[1]] + infomat = [2/(2 * true_values[1]^2) 0.0; 0.0 2/true_values[1]] @test all(isapprox.(infomat - informationmatrix(mle_est), 0.0, atol=0.01)) - vcovmat = [2*true_values[1]^2/2 0.0; 0.0 true_values[1]/2] + vcovmat = [2 * true_values[1]^2/2 0.0; 0.0 true_values[1]/2] @test all(isapprox.(vcovmat - vcov(mle_est), 0.0, atol=0.01)) ctable = coeftable(mle_est) @@ -462,7 +468,7 @@ using Turing @model function regtest(x, y) beta ~ MvNormal(Zeros(2), I) mu = x * beta - y ~ MvNormal(mu, I) + return y ~ MvNormal(mu, I) end Random.seed!(987) @@ -485,7 +491,7 @@ using Turing s ~ InverseGamma(2, 3) m ~ Normal(0, sqrt(s)) - (.~)(x, Normal(m, sqrt(s))) + return (.~)(x, Normal(m, sqrt(s))) end model_dot = dot_gdemo([1.5, 2.0]) @@ -578,7 +584,7 @@ using Turing @testset "with :=" begin @model function demo_track() x ~ Normal() - y := 100 + x + return y := 100 + x end model = demo_track() result = maximum_a_posteriori(model) diff --git a/test/skipped/advi_demo.jl b/test/skipped/advi_demo.jl index d10e140d0..b6a71c4dc 100644 --- a/test/skipped/advi_demo.jl +++ b/test/skipped/advi_demo.jl @@ -20,7 +20,7 @@ using ConjugatePriors s ~ InverseGamma(2, 3) m ~ Normal(0.0, sqrt(s)) # `Normal(μ, σ)` has mean μ and variance σ², i.e. parametrize with std. not variance - for i = 1:length(x) + for i in 1:length(x) x[i] ~ Normal(m, sqrt(s)) end end @@ -28,28 +28,28 @@ end const seeds = [125, 245, 3] const ad_modes = [:forwarddiff, :reversediff, :tracker] -for seed ∈ seeds +for seed in seeds @info seed - - for ad_mode ∈ ad_modes + + for ad_mode in ad_modes @info ad_mode setadbackend(ad_mode) - + # set random seed Random.seed!(seed) # generate data - x = randn(1, 2000); + x = randn(1, 2000) # construct model m = model(x) - + # ADVI opt = Variational.TruncatedADAGrad() # optimizer advi = ADVI(10, 100) # <: VariationalInference q = Variational.meanfield(m) # => <: VariationalPosterior - + elbo = Variational.ELBO() # <: VariationalObjective μ, σs = params(q) @@ -58,16 +58,16 @@ for seed ∈ seeds history = [elbo(advi, q, m, 1000)] # construct animation - anim = @animate for j = 1:25 + anim = @animate for j in 1:25 # global q - Variational.optimize!(elbo, advi, q, m, θ; optimizer = opt) - μ, ω = θ[1:length(q)], θ[length(q) + 1:end] - + Variational.optimize!(elbo, advi, q, m, θ; optimizer=opt) + μ, ω = θ[1:length(q)], θ[(length(q) + 1):end] + q = Variational.update(q, μ, f.(ω)) samples = rand(q, 2000) # quick check - println([mean(samples, dims=2), [var(x), mean(x)]]) + println([mean(samples; dims=2), [var(x), mean(x)]]) # plotting code assumes (samples, dim) shape so we just transpose samples = transpose(samples) @@ -103,54 +103,59 @@ for seed ∈ seeds # numerically more stable but doesn't seem to have effect; issue is probably internal to # `pdf` which needs to compute ≈ Γ(1000) - p_μ_pdf = z -> exp(logpdf(p_μ, (z - μₙ) * exp(- 0.5 * log(βₙ) + 0.5 * log(αₙ) + 0.5 * log(κₙ)))) + p_μ_pdf = + z -> exp( + logpdf( + p_μ, + (z - μₙ) * exp(-0.5 * log(βₙ) + 0.5 * log(αₙ) + 0.5 * log(κₙ)), + ), + ) # p_μ_pdf1 = z -> pdf(p_μ, (z - μₙ) / √(βₙ / (αₙ * κₙ))) # posterior plots - p1 = plot(); - density!(samples[:, 1], label = "s (ADVI)", color = :blue, linestyle = :dash); - histogram!(samples[:, 1], label = "", normed = true, alpha = 0.3, color = :blue); + p1 = plot() + density!(samples[:, 1]; label="s (ADVI)", color=:blue, linestyle=:dash) + histogram!(samples[:, 1]; label="", normed=true, alpha=0.3, color=:blue) # normalize using Riemann approx. because of (almost certainly) numerical issues Δ = 0.001 r = 0.75:0.001:1.50 norm_const = sum(p_σ²_pdf.(r) .* Δ) - plot!(r, p_σ²_pdf, label = "s (posterior)", color = :red); - vline!([var(x)], label = "s (data)", linewidth = 1.5, color = :black, alpha = 0.7); - xlims!(0.5, 1.5); - title!("$(j * advi.max_iters) steps"); - - p2 = plot(); - density!(samples[:, 2], label = "m (ADVI)", color = :blue, linestyle = :dash); - histogram!(samples[:, 2], label = "", normed = true, alpha = 0.3, color = :blue); + plot!(r, p_σ²_pdf; label="s (posterior)", color=:red) + vline!([var(x)]; label="s (data)", linewidth=1.5, color=:black, alpha=0.7) + xlims!(0.5, 1.5) + title!("$(j * advi.max_iters) steps") + p2 = plot() + density!(samples[:, 2]; label="m (ADVI)", color=:blue, linestyle=:dash) + histogram!(samples[:, 2]; label="", normed=true, alpha=0.3, color=:blue) # normalize using Riemann approx. because of (almost certainly) numerical issues Δ = 0.0001 - r = -0.1 + mean(x):Δ:0.1 + mean(x) + r = (-0.1 + mean(x)):Δ:(0.1 + mean(x)) norm_const = sum(p_μ_pdf.(r) .* Δ) - plot!(r, z -> p_μ_pdf(z) / norm_const, label = "m (posterior)", color = :red); - vline!([mean(x)], label = "m (data)", linewidth = 1.5, color = :black, alpha = 0.7); + plot!(r, z -> p_μ_pdf(z) / norm_const; label="m (posterior)", color=:red) + vline!([mean(x)]; label="m (data)", linewidth=1.5, color=:black, alpha=0.7) - xlims!(-0.25, 0.25); + xlims!(-0.25, 0.25) # visualize evolution of objective wrt. optimization iterations obj = elbo(advi, q, m, 1000) @info "ELBO" obj push!(history, obj) - p3 = plot(); - plot!(1:advi.max_iters:length(history) * advi.max_iters, history, label = "") + p3 = plot() + plot!(1:(advi.max_iters):(length(history) * advi.max_iters), history; label="") title!("ELBO = $obj") # plot the latest 25 objective evaluations to visualize trend - p4 = plot(); - plot!(history[max(1, end - 10):end], label = "") + p4 = plot() + plot!(history[max(1, end - 10):end]; label="") + + p = plot(p1, p2, p3, p4; layout=(4, 1)) - p = plot(p1, p2, p3, p4; layout = (4, 1)) - @info "[$j] Done" p end - gif(anim, "advi_w_elbo_fps15_$(seed)_$(ad_mode).gif", fps = 15) + gif(anim, "advi_w_elbo_fps15_$(seed)_$(ad_mode).gif"; fps=15) end end diff --git a/test/skipped/bayes_lr.jl b/test/skipped/bayes_lr.jl index b426ce0cb..6b9b4d31e 100644 --- a/test/skipped/bayes_lr.jl +++ b/test/skipped/bayes_lr.jl @@ -9,13 +9,13 @@ using Turing s = 1 β ~ Normal(0, 1) - for n = 1:N + for n in 1:N ys[n] ~ Normal(xs[n] * β, sqrt(s)) end end N = 100 -xs = collect(range(-10, stop = 10, length = N)) +xs = collect(range(-10; stop=10, length=N)) s = 1 β = rand(Normal(0, 1)) ys = xs * β + rand(Normal(0, sqrt(s)), N) @@ -29,18 +29,18 @@ println("mean of β: ", mean(chn[1000:end, :β])) θ_dim = 1 function lj_func(θ) - N = length(xs) - _lj = zero(Real) + N = length(xs) + _lj = zero(Real) - s = 1 + s = 1 - β = θ[1] - _lj += logpdf(Normal(0, 1), β) - for n = 1:N - _lj += logpdf(Normal(xs[n] * β, sqrt(s)), ys[n]) - end + β = θ[1] + _lj += logpdf(Normal(0, 1), β) + for n in 1:N + _lj += logpdf(Normal(xs[n] * β, sqrt(s)), ys[n]) + end - return _lj + return _lj end neg_lj_func(θ) = -lj_func(θ) @@ -48,18 +48,16 @@ const f_tape = GradientTape(neg_lj_func, randn(θ_dim)) const compiled_f_tape = compile(f_tape) function grad_func(θ) + inputs = θ + results = similar(θ) + all_results = DiffResults.GradientResult(results) - inputs = θ - results = similar(θ) - all_results = DiffResults.GradientResult(results) - - ReverseDiff.gradient!(all_results, compiled_f_tape, inputs) - - neg_lj = all_results.value - grad, = all_results.derivs + ReverseDiff.gradient!(all_results, compiled_f_tape, inputs) - return -neg_lj, grad + neg_lj = all_results.value + grad, = all_results.derivs + return -neg_lj, grad end std = ones(θ_dim) @@ -70,12 +68,12 @@ chn = [] accept_num = 1 total_num = 2000 -for iter = 1:total_num - global θ, chn, lj, lj_func, grad_func, std, accept_num - push!(chn, θ) - θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 3, 0.005, std) - accept_num += is_accept -# if (iter % 50 == 0) println(θ) end +for iter in 1:total_num + global θ, chn, lj, lj_func, grad_func, std, accept_num + push!(chn, θ) + θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 3, 0.005, std) + accept_num += is_accept + # if (iter % 50 == 0) println(θ) end end @show mean(chn[1000:end]), lj diff --git a/test/skipped/dual_averaging.jl b/test/skipped/dual_averaging.jl index 01c1cb401..897abe8d9 100644 --- a/test/skipped/dual_averaging.jl +++ b/test/skipped/dual_averaging.jl @@ -1,6 +1,4 @@ -function _adapt_ϵ(logϵ, Hbar, logϵbar, da_stat, m, M_adapt, δ, μ; - γ=0.05, t0=10, κ=0.75) - +function _adapt_ϵ(logϵ, Hbar, logϵbar, da_stat, m, M_adapt, δ, μ; γ=0.05, t0=10, κ=0.75) if m <= M_adapt Hbar = (1.0 - 1.0 / (m + t0)) * Hbar + (1 / (m + t0)) * (δ - da_stat) logϵ = μ - sqrt(m) / γ * Hbar diff --git a/test/skipped/explicit_ret.jl b/test/skipped/explicit_ret.jl index f0b53d093..c1340464f 100644 --- a/test/skipped/explicit_ret.jl +++ b/test/skipped/explicit_ret.jl @@ -2,17 +2,18 @@ using Turing using Test @model function test_ex_rt() - x ~ Normal(10, 1) - y ~ Normal(x / 2, 1) - z = y + 1 - x = x - 1 - x, y, z + x ~ Normal(10, 1) + y ~ Normal(x / 2, 1) + z = y + 1 + x = x - 1 + return x, y, z end mf = test_ex_rt() -for alg = [HMC(0.2, 3), PG(20, 2000), SMC(), IS(10000), Gibbs(PG(20, 1, :x), HMC(0.2, 3, :y))] - chn = sample(mf, alg) - @test mean(chn[:x]) ≈ 10.0 atol=0.2 - @test mean(chn[:y]) ≈ 5.0 atol=0.2 +for alg in + [HMC(0.2, 3), PG(20, 2000), SMC(), IS(10000), Gibbs(PG(20, 1, :x), HMC(0.2, 3, :y))] + chn = sample(mf, alg) + @test mean(chn[:x]) ≈ 10.0 atol = 0.2 + @test mean(chn[:y]) ≈ 5.0 atol = 0.2 end diff --git a/test/skipped/gdemo.jl b/test/skipped/gdemo.jl index 610e5b6ee..84242067e 100644 --- a/test/skipped/gdemo.jl +++ b/test/skipped/gdemo.jl @@ -6,11 +6,11 @@ using Turing @model function gdemo() s ~ InverseGamma(2, 3) - m ~ Normal(0,sqrt(s)) + m ~ Normal(0, sqrt(s)) 1.5 ~ Normal(m, sqrt(s)) 2.0 ~ Normal(m, sqrt(s)) return s, m - end +end # Plain Julia @@ -19,18 +19,18 @@ using Turing: invlink, logpdf θ_dim = 2 function lj_func(θ) - _lj = zero(Real) + _lj = zero(Real) - d_s = InverseGamma(2, 3) - s = invlink(d_s, θ[1]) - _lj += logpdf(d_s, s, true) - m = θ[2] - _lj += logpdf(Normal(0, sqrt(s)), m) + d_s = InverseGamma(2, 3) + s = invlink(d_s, θ[1]) + _lj += logpdf(d_s, s, true) + m = θ[2] + _lj += logpdf(Normal(0, sqrt(s)), m) - _lj += logpdf(Normal(m, sqrt(s)), 1.5) - _lj += logpdf(Normal(m, sqrt(s)), 2.0) + _lj += logpdf(Normal(m, sqrt(s)), 1.5) + _lj += logpdf(Normal(m, sqrt(s)), 2.0) - return _lj + return _lj end neg_lj_func(θ) = -lj_func(θ) @@ -38,18 +38,16 @@ const f_tape = GradientTape(neg_lj_func, randn(θ_dim)) const compiled_f_tape = compile(f_tape) function grad_func(θ) + inputs = θ + results = similar(θ) + all_results = DiffResults.GradientResult(results) - inputs = θ - results = similar(θ) - all_results = DiffResults.GradientResult(results) - - gradient!(all_results, compiled_f_tape, inputs) - - neg_lj = all_results.value - grad, = all_results.derivs + gradient!(all_results, compiled_f_tape, inputs) - return -neg_lj, grad + neg_lj = all_results.value + grad, = all_results.derivs + return -neg_lj, grad end # Unit test for gradient diff --git a/test/skipped/gdemo_hmc.jl b/test/skipped/gdemo_hmc.jl index 577a10230..855b06f89 100644 --- a/test/skipped/gdemo_hmc.jl +++ b/test/skipped/gdemo_hmc.jl @@ -17,18 +17,18 @@ std = ones(θ_dim) θ = randn(θ_dim) lj = lj_func(θ) -chn = Dict(:θ=>Vector{Vector{Float64}}(), :logϵ=>Vector{Float64}()) +chn = Dict(:θ => Vector{Vector{Float64}}(), :logϵ => Vector{Float64}()) accept_num = 0 function dummy_print(args...) - nothing + return nothing end totla_num = 5000 -for iter = 1:totla_num - push!(chn[:θ], θ) - θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 5, 0.05, std) - accept_num += is_accept +for iter in 1:totla_num + push!(chn[:θ], θ) + θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 5, 0.05, std) + accept_num += is_accept end @show lj diff --git a/test/skipped/gdemo_nuts.jl b/test/skipped/gdemo_nuts.jl index 34c4c6f57..ecc4d5c7f 100644 --- a/test/skipped/gdemo_nuts.jl +++ b/test/skipped/gdemo_nuts.jl @@ -24,49 +24,47 @@ Hbar = 0 δ = 0.75 -for test_id = 1:2 - - test_name = "$test_id. NUTS " * (test_id == 1 ? "without DA" : "with DA") - - @testset "$test_name" begin - - std = ones(θ_dim) - θ = randn(θ_dim) - lj = lj_func(θ) - - chn = Dict(:θ=>Vector{Vector{Float64}}(), :logϵ=>Vector{Float64}()) - - function dummy_print(args...) - nothing - end - - println("Start to run NUTS") - - totla_num = 10000 - for iter = 1:totla_num - global logϵ, lj_func, grad_func, M_adapt, δ, μ - θ, da_stat = _nuts_step(θ, exp(logϵ), lj_func, grad_func, std) - if test_id == 1 - logϵ, Hbar, logϵbar = _adapt_ϵ(logϵ, Hbar, logϵbar, da_stat, iter, M_adapt, δ, μ) - end - - push!(chn[:θ], θ) - push!(chn[:logϵ], logϵ) - # if (iter % 50 == 0) println(θ) end +for test_id in 1:2 + test_name = "$test_id. NUTS " * (test_id == 1 ? "without DA" : "with DA") + + @testset "$test_name" begin + std = ones(θ_dim) + θ = randn(θ_dim) + lj = lj_func(θ) + + chn = Dict(:θ => Vector{Vector{Float64}}(), :logϵ => Vector{Float64}()) + + function dummy_print(args...) + return nothing + end + + println("Start to run NUTS") + + totla_num = 10000 + for iter in 1:totla_num + global logϵ, lj_func, grad_func, M_adapt, δ, μ + θ, da_stat = _nuts_step(θ, exp(logϵ), lj_func, grad_func, std) + if test_id == 1 + logϵ, Hbar, logϵbar = _adapt_ϵ( + logϵ, Hbar, logϵbar, da_stat, iter, M_adapt, δ, μ + ) + end + + push!(chn[:θ], θ) + push!(chn[:logϵ], logϵ) + # if (iter % 50 == 0) println(θ) end + end + + samples_s = exp.(map(x -> x[1], chn[:θ])) + samples_m = map(x -> x[2], chn[:θ]) + @show mean(samples_s[1000:end]) + @test mean(samples_s[1000:end]) ≈ 49 / 24 atol = 0.2 + @show mean(samples_m[1000:end]) + @test mean(samples_m[1000:end]) ≈ 7 / 6 atol = 0.2 + + @show std(samples_s[1000:end]) + @show std(samples_m[1000:end]) + + @show mean(exp.(chn[:logϵ])) end - - samples_s = exp.(map(x -> x[1], chn[:θ])) - samples_m = map(x -> x[2], chn[:θ]) - @show mean(samples_s[1000:end]) - @test mean(samples_s[1000:end]) ≈ 49/24 atol=0.2 - @show mean(samples_m[1000:end]) - @test mean(samples_m[1000:end]) ≈ 7/6 atol=0.2 - - @show std(samples_s[1000:end]) - @show std(samples_m[1000:end]) - - @show mean(exp.(chn[:logϵ])) - - end - end diff --git a/test/skipped/hmcda_geweke.jl b/test/skipped/hmcda_geweke.jl index b0e43ecb5..8e2f20986 100644 --- a/test/skipped/hmcda_geweke.jl +++ b/test/skipped/hmcda_geweke.jl @@ -5,29 +5,41 @@ using Gadfly import Gadfly.ElementOrFunction # First add a method to the basic Gadfly.plot function for QQPair types (generated by Distributions.qqbuild()) -Gadfly.plot(qq::QQPair, elements::ElementOrFunction...) = Gadfly.plot(x=qq.qx, y=qq.qy, Geom.point, Theme(highlight_width=0px), elements...) +function Gadfly.plot(qq::QQPair, elements::ElementOrFunction...) + return Gadfly.plot(; + x=qq.qx, y=qq.qy, Geom.point, Theme(; highlight_width=0px), elements... + ) +end # Now some shorthand functions qqplot(x, y, elements::ElementOrFunction...) = Gadfly.plot(qqbuild(x, y), elements...) -qqnorm(x, elements::ElementOrFunction...) = qqplot(Normal(), x, Guide.xlabel("Theoretical Normal quantiles"), Guide.ylabel("Observed quantiles"), elements...) +function qqnorm(x, elements::ElementOrFunction...) + return qqplot( + Normal(), + x, + Guide.xlabel("Theoretical Normal quantiles"), + Guide.ylabel("Observed quantiles"), + elements..., + ) +end NSamples = 5000 @model function gdemo_fw() - # s ~ InverseGamma(2,3) - s = 1 - m ~ Normal(0, sqrt(s)) - y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) + # s ~ InverseGamma(2,3) + s = 1 + m ~ Normal(0, sqrt(s)) + return y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) end @model function gdemo_bk(x) - # Backward Step 1: theta ~ theta | x - # s ~ InverseGamma(2,3) - s = 1 - m ~ Normal(0,sqrt(s)) - x ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) - # Backward Step 2: x ~ x | theta - y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) + # Backward Step 1: theta ~ theta | x + # s ~ InverseGamma(2,3) + s = 1 + m ~ Normal(0, sqrt(s)) + x ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) + # Backward Step 2: x ~ x | theta + return y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) end fw = PG(50, NSamples) @@ -44,24 +56,30 @@ s_bk = Array{Turing.Chain}(undef, N) simple_logger = Base.CoreLogging.SimpleLogger(stderr, Base.CoreLogging.Debug) Base.CoreLogging.with_logger(simple_logger) do - global x, bk, s_bk - i = 1 - while i <= N - s_bk[i] = sample(gdemo_bk(x), bk) - x = s_bk[i][end, :y] - i += 1 - end + global x, bk, s_bk + i = 1 + while i <= N + s_bk[i] = sample(gdemo_bk(x), bk) + x = s_bk[i][end, :y] + i += 1 + end end s2 = reduce(vcat, s_bk); # describe(s2) - using UnicodePlots qqm = qqbuild(s[:m], s2[:m]) -show(scatterplot(qqm.qx, qqm.qy, title = "QQ plot for m", canvas = DotCanvas)) -show(scatterplot(qqm.qx[51:end-50], qqm.qy[51:end-50], title = "QQ plot for m (removing first and last 50 quantiles):", canvas = DotCanvas)) +show(scatterplot(qqm.qx, qqm.qy; title="QQ plot for m", canvas=DotCanvas)) +show( + scatterplot( + qqm.qx[51:(end - 50)], + qqm.qy[51:(end - 50)]; + title="QQ plot for m (removing first and last 50 quantiles):", + canvas=DotCanvas, + ), +) qqm = qqbuild(s[:m], s2[:m]) X = qqm.qx @@ -71,10 +89,10 @@ slope = (1 / (transpose(X) * X)[1] * transpose(X) * y)[1] print(" slopeₛ = $slope ≈ 1 (ϵ = 0.1)") ans1 = abs(slope - 1.0) <= 0.1 if ans1 - printstyled(" ✓\n", color=:green) + printstyled(" ✓\n"; color=:green) else - printstyled(" X\n", color=:red) - printstyled(" slope = $slope, diff = $(slope - 1.0)\n", color=:red) + printstyled(" X\n"; color=:red) + printstyled(" slope = $slope, diff = $(slope - 1.0)\n"; color=:red) end # qqs = qqbuild(s[:s], s2[:s]) diff --git a/test/skipped/nuts_geweke.jl b/test/skipped/nuts_geweke.jl index 070b7f552..d312be884 100644 --- a/test/skipped/nuts_geweke.jl +++ b/test/skipped/nuts_geweke.jl @@ -5,29 +5,41 @@ using Gadfly import Gadfly.ElementOrFunction # First add a method to the basic Gadfly.plot function for QQPair types (generated by Distributions.qqbuild()) -Gadfly.plot(qq::QQPair, elements::ElementOrFunction...) = Gadfly.plot(x=qq.qx, y=qq.qy, Geom.point, Theme(highlight_width=0px), elements...) +function Gadfly.plot(qq::QQPair, elements::ElementOrFunction...) + return Gadfly.plot(; + x=qq.qx, y=qq.qy, Geom.point, Theme(; highlight_width=0px), elements... + ) +end # Now some shorthand functions qqplot(x, y, elements::ElementOrFunction...) = Gadfly.plot(qqbuild(x, y), elements...) -qqnorm(x, elements::ElementOrFunction...) = qqplot(Normal(), x, Guide.xlabel("Theoretical Normal quantiles"), Guide.ylabel("Observed quantiles"), elements...) +function qqnorm(x, elements::ElementOrFunction...) + return qqplot( + Normal(), + x, + Guide.xlabel("Theoretical Normal quantiles"), + Guide.ylabel("Observed quantiles"), + elements..., + ) +end NSamples = 5000 @model function gdemo_fw() - s ~ InverseGamma(2,3) - # s = 1 - m ~ Normal(0,sqrt(s)) - y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) + s ~ InverseGamma(2, 3) + # s = 1 + m ~ Normal(0, sqrt(s)) + return y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) end @model function gdemo_bk(x) - # Backward Step 1: theta ~ theta | x - s ~ InverseGamma(2,3) - # s = 1 - m ~ Normal(0,sqrt(s)) - x ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) - # Backward Step 2: x ~ x | theta - y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) + # Backward Step 1: theta ~ theta | x + s ~ InverseGamma(2, 3) + # s = 1 + m ~ Normal(0, sqrt(s)) + x ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) + # Backward Step 2: x ~ x | theta + return y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)]) end fw = IS(NSamples) @@ -45,18 +57,17 @@ s_bk = Array{MCMCChains.Chains}(undef, N) simple_logger = Base.CoreLogging.SimpleLogger(stderr, Base.CoreLogging.Debug) with_logger(simple_logger) do - i = 1 - while i <= N - s_bk[i] = sample(gdemo_bk(x), bk) - x = s_bk[i][end, :y] - i += 1 - end + i = 1 + while i <= N + s_bk[i] = sample(gdemo_bk(x), bk) + x = s_bk[i][end, :y] + i += 1 + end end s2 = chainscat(s_bk...) # describe(s2) - # qqplot(s[:m], s2[:m]) # qqplot(s[:s], s2[:s]) @@ -65,14 +76,33 @@ qqm = qqbuild(s[:m], s2[:m]) using UnicodePlots qqm = qqbuild(s[:m], s2[:m]) -show(scatterplot(qqm.qx, qqm.qy, title = "QQ plot for m", canvas = DotCanvas)) -show(scatterplot(qqm.qx[51:end-50], qqm.qy[51:end-50], title = "QQ plot for m (removing first and last 50 quantiles):", canvas = DotCanvas)) -show(scatterplot(qqm.qx, qqm.qy, title = "QQ plot for m")) -show(scatterplot(qqm.qx[51:end-50], qqm.qy[51:end-50], title = "QQ plot for m (removing first and last 50 quantiles):")) +show(scatterplot(qqm.qx, qqm.qy; title="QQ plot for m", canvas=DotCanvas)) +show( + scatterplot( + qqm.qx[51:(end - 50)], + qqm.qy[51:(end - 50)]; + title="QQ plot for m (removing first and last 50 quantiles):", + canvas=DotCanvas, + ), +) +show(scatterplot(qqm.qx, qqm.qy; title="QQ plot for m")) +show( + scatterplot( + qqm.qx[51:(end - 50)], + qqm.qy[51:(end - 50)]; + title="QQ plot for m (removing first and last 50 quantiles):", + ), +) qqs = qqbuild(s[:s].value, s2[:s].value) -show(scatterplot(qqs.qx, qqs.qy, title = "QQ plot for s")) -show(scatterplot(qqs.qx[51:end-50], qqs.qy[51:end-50], title = "QQ plot for s (removing first and last 50 quantiles):")) +show(scatterplot(qqs.qx, qqs.qy; title="QQ plot for s")) +show( + scatterplot( + qqs.qx[51:(end - 50)], + qqs.qy[51:(end - 50)]; + title="QQ plot for s (removing first and last 50 quantiles):", + ), +) X = qqm.qx y = qqm.qy @@ -81,10 +111,10 @@ slope = (1 / (transpose(X) * X)[1] * transpose(X) * y)[1] print(" slopeₛ = $slope ≈ 1 (ϵ = 0.1)") ans1 = abs(slope - 1.0) <= 0.1 if ans1 - printstyled(" ✓\n", color=:green) + printstyled(" ✓\n"; color=:green) else - printstyled(" X\n", color=:red) - printstyled(" slope = $slope, diff = $(slope - 1.0)\n", color=:red) + printstyled(" X\n"; color=:red) + printstyled(" slope = $slope, diff = $(slope - 1.0)\n"; color=:red) end # qqs = qqbuild(s[:s], s2[:s]) diff --git a/test/skipped/opt_param_of_dist.jl b/test/skipped/opt_param_of_dist.jl index e750208d8..3c8e4d2dc 100644 --- a/test/skipped/opt_param_of_dist.jl +++ b/test/skipped/opt_param_of_dist.jl @@ -2,11 +2,11 @@ using Turing using Test @model testassume begin - x ~ Bernoulli(1; :static = true) - y ~ Bernoulli(x / 2; :param = true) - 1 ~ Normal(0, 1; :static = true) - 2 ~ Normal(0, 1; :param = true) - y, x + x ~ Bernoulli(1; :static=true) + y ~ Bernoulli(x / 2; :param=true) + 1 ~ Normal(0, 1; :static=true) + 2 ~ Normal(0, 1; :param=true) + y, x end s = SMC() @@ -17,4 +17,4 @@ res = sample(testassume, s) @test all(isone, res[:x]) # check that the mean of y is between 0.4 and 0.6 -@test mean(res[:y]) ≈ 0.5 atol=0.1 +@test mean(res[:y]) ≈ 0.5 atol = 0.1 diff --git a/test/skipped/simple_gauss.jl b/test/skipped/simple_gauss.jl index 935ced27b..14f36dce1 100644 --- a/test/skipped/simple_gauss.jl +++ b/test/skipped/simple_gauss.jl @@ -6,9 +6,9 @@ using Turing @model function simple_gauss() s = 1 - m ~ Normal(0,sqrt(s)) + m ~ Normal(0, sqrt(s)) 2.0 ~ Normal(m, sqrt(s)) - 2.5 ~ Normal(m, sqrt(s)) + return 2.5 ~ Normal(m, sqrt(s)) end # Plain Julia @@ -17,17 +17,17 @@ using ReverseDiff: GradientTape, GradientConfig, gradient, gradient!, compile θ_dim = 1 function lj_func(θ) - _lj = zero(Real) + _lj = zero(Real) - s = 1 + s = 1 - m = θ[1] - _lj += logpdf(Normal(0, sqrt(s)), m) + m = θ[1] + _lj += logpdf(Normal(0, sqrt(s)), m) - _lj += logpdf(Normal(m, sqrt(s)), 2.0) - _lj += logpdf(Normal(m, sqrt(s)), 2.5) + _lj += logpdf(Normal(m, sqrt(s)), 2.0) + _lj += logpdf(Normal(m, sqrt(s)), 2.5) - return _lj + return _lj end neg_lj_func(θ) = -lj_func(θ) @@ -35,18 +35,16 @@ const f_tape = GradientTape(neg_lj_func, randn(θ_dim)) const compiled_f_tape = compile(f_tape) function grad_func(θ) + inputs = θ + results = similar(θ) + all_results = DiffResults.GradientResult(results) - inputs = θ - results = similar(θ) - all_results = DiffResults.GradientResult(results) - - gradient!(all_results, compiled_f_tape, inputs) - - neg_lj = all_results.value - grad, = all_results.derivs + gradient!(all_results, compiled_f_tape, inputs) - return -neg_lj, grad + neg_lj = all_results.value + grad, = all_results.derivs + return -neg_lj, grad end # Unit test for gradient diff --git a/test/skipped/simple_gauss_hmc.jl b/test/skipped/simple_gauss_hmc.jl index 8b2e41d7f..ba1f9dd23 100644 --- a/test/skipped/simple_gauss_hmc.jl +++ b/test/skipped/simple_gauss_hmc.jl @@ -16,17 +16,14 @@ std = ones(θ_dim) θ = randn(θ_dim) lj = lj_func(θ) -chn = Dict(:θ=>Vector{Vector{Float64}}(), :logϵ=>Vector{Float64}()) +chn = Dict(:θ => Vector{Vector{Float64}}(), :logϵ => Vector{Float64}()) accept_num = 1 - totla_num = 10000 -for iter = 1:totla_num - - push!(chn[:θ], θ) - θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 10, 0.05, std) - accept_num += is_accept - +for iter in 1:totla_num + push!(chn[:θ], θ) + θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 10, 0.05, std) + accept_num += is_accept end @show lj diff --git a/test/skipped/sv.jl b/test/skipped/sv.jl index 29b301614..b5fe22e71 100644 --- a/test/skipped/sv.jl +++ b/test/skipped/sv.jl @@ -2,37 +2,27 @@ using ReverseDiff: GradientTape, GradientConfig, gradient, gradient!, compile using Turing: _hmc_step using Turing using HDF5, JLD -sv_data = load(TPATH*"/example-models/nips-2017/sv-data.jld.data")["data"] +sv_data = load(TPATH * "/example-models/nips-2017/sv-data.jld.data")["data"] @model function sv_model(T, y) ϕ ~ Uniform(-1, 1) - σ ~ truncated(Cauchy(0,5), 0, Inf) + σ ~ truncated(Cauchy(0, 5), 0, Inf) μ ~ Cauchy(0, 10) h = Vector{Real}(T) h[1] ~ Normal(μ, σ / sqrt(1 - ϕ^2)) y[1] ~ Normal(0, exp.(h[1] / 2)) - for t = 2:T - h[t] ~ Normal(μ + ϕ * (h[t-1] - μ) , σ) - y[t] ~ Normal(0, exp.(h[t] / 2)) + for t in 2:T + h[t] ~ Normal(μ + ϕ * (h[t - 1] - μ), σ) + y[t] ~ Normal(0, exp.(h[t] / 2)) end - end +end - -mf = sv_model(data=sv_data[1]) +mf = sv_model(; data=sv_data[1]) chain_nuts = sample(mf, HMC(0.05, 10), 2000) println("mean of m: $(mean(chn[1000:end, :μ]))") - - - - - - - - - # θ_dim = 1 # function lj_func(θ) # _lj = zero(Real) @@ -74,7 +64,6 @@ println("mean of m: $(mean(chn[1000:end, :μ]))") # chn = [] # accept_num = 1 - # totla_num = 5000 # for iter = 1:totla_num # push!(chn, θ) diff --git a/test/skipped/unit_test_helper.jl b/test/skipped/unit_test_helper.jl index 1517459c5..174234480 100644 --- a/test/skipped/unit_test_helper.jl +++ b/test/skipped/unit_test_helper.jl @@ -10,9 +10,11 @@ function test_grad(turing_model, grad_f; trans=Dict()) @testset "Gradient using random inputs" begin ℓ = LogDensityProblemsAD.ADgradient( Turing.AutoTracker(), - Turing.LogDensityFunction(vi, model_f, SampleFromPrior(), DynamicPPL.DefaultContext()), + Turing.LogDensityFunction( + vi, model_f, SampleFromPrior(), DynamicPPL.DefaultContext() + ), ) - for _ = 1:10000 + for _ in 1:10000 theta = rand(d) @test LogDensityProblems.logdensity_and_gradient(ℓ, theta) == grad_f(theta)[2] end diff --git a/test/skipped/vec_assume_mat.jl b/test/skipped/vec_assume_mat.jl index 28ae99d47..b9efc94d0 100644 --- a/test/skipped/vec_assume_mat.jl +++ b/test/skipped/vec_assume_mat.jl @@ -4,17 +4,17 @@ N = 5 alg = HMC(0.2, 4) @model function vdemo(::Type{T}=Float64) where {T} - v = Vector{Matrix{T}}(undef, N) - @. v ~ Wishart(7, [1 0.5; 0.5 1]) + v = Vector{Matrix{T}}(undef, N) + @. v ~ Wishart(7, [1 0.5; 0.5 1]) end t_vec = @elapsed res_vec = sample(vdemo(), alg, 1000) @model function vdemo() - v = Vector{Matrix{Real}}(undef, N) - for i = 1:N - v[i] ~ Wishart(7, [1 0.5; 0.5 1]) - end + v = Vector{Matrix{Real}}(undef, N) + for i in 1:N + v[i] ~ Wishart(7, [1 0.5; 0.5 1]) + end end t_loop = @elapsed res = sample(vdemo(), alg, 1000) diff --git a/test/skipped/vec_assume_mv.jl b/test/skipped/vec_assume_mv.jl index bafa1f096..ba373b124 100644 --- a/test/skipped/vec_assume_mv.jl +++ b/test/skipped/vec_assume_mv.jl @@ -7,24 +7,26 @@ alg = HMC(0.2, 4) # Test for vectorize UnivariateDistribution @model function vdemo() - phi = Vector{Vector{Real}}(undef, N) - @> phi ~ Dirichlet(beta) + phi = Vector{Vector{Real}}(undef, N) + @> phi ~ Dirichlet(beta) end ch_vec, t_vec, m_vec, gctime, memallocs = @timed res_vec = sample(vdemo(), alg) @model function vdemo() - phi = Matrix(undef, 2, N) - @. phi ~ Dirichlet(beta) + phi = Matrix(undef, 2, N) + @. phi ~ Dirichlet(beta) end -ch_vec_mat, t_vec_mat, m_vec_mat, gctime, memallocs = @timed res_vec_mat = sample(vdemo(), alg) +ch_vec_mat, t_vec_mat, m_vec_mat, gctime, memallocs = @timed res_vec_mat = sample( + vdemo(), alg +) @model function vdemo() - phi = Vector{Vector{Real}}(undef, N) - for i = 1:N - phi[i] ~ Dirichlet(beta) - end + phi = Vector{Vector{Real}}(undef, N) + for i in 1:N + phi[i] ~ Dirichlet(beta) + end end ch_loop, t_loop, m_loop, gctime, memallocs = @timed res = sample(vdemo(), alg) diff --git a/test/stdlib/RandomMeasures.jl b/test/stdlib/RandomMeasures.jl index e2be8fb2d..cf4ef90ad 100644 --- a/test/stdlib/RandomMeasures.jl +++ b/test/stdlib/RandomMeasures.jl @@ -1,7 +1,7 @@ module RandomMeasuresTests using Distributions: Normal, sample -import Random +using Random: Random using Test: @test, @testset using Turing using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess @@ -31,37 +31,37 @@ using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess # Number of clusters. K = maximum(z) nk = Vector{Int}(map(k -> sum(z .== k), 1:K)) - + # Draw the latent assignment. z[i] ~ ChineseRestaurantProcess(rpm, nk) - + # Create a new cluster? if z[i] > K push!(μ, 0.0) - + # Draw location of new cluster. μ[z[i]] ~ H end - + # Draw observation. x[i] ~ Normal(μ[z[i]], 1.0) end end - + # Generate some test data. - Random.seed!(1); - data = vcat(randn(10), randn(10) .- 5, randn(10) .+ 10); - data .-= mean(data); - data /= std(data); - + Random.seed!(1) + data = vcat(randn(10), randn(10) .- 5, randn(10) .+ 10) + data .-= mean(data) + data /= std(data) + # MCMC sampling - Random.seed!(2); - iterations = 500; - model_fun = infiniteGMM(data); - chain = sample(model_fun, SMC(), iterations); + Random.seed!(2) + iterations = 500 + model_fun = infiniteGMM(data) + chain = sample(model_fun, SMC(), iterations) @test chain isa MCMCChains.Chains - @test eltype(chain.value) === Union{Float64, Missing} + @test eltype(chain.value) === Union{Float64,Missing} end # partitions = [ # [[1, 2, 3, 4]], diff --git a/test/stdlib/distributions.jl b/test/stdlib/distributions.jl index f1ce701da..0d42fb76b 100644 --- a/test/stdlib/distributions.jl +++ b/test/stdlib/distributions.jl @@ -3,7 +3,7 @@ module DistributionsTests using ..NumericalTests: check_dist_numerical using Distributions using LinearAlgebra: I -import Random +using Random: Random using StableRNGs: StableRNG using StatsFuns: logistic using Test: @testset, @test @@ -33,11 +33,10 @@ using Turing end @testset "distributions functions" begin - λ = .01:.01:5 - LLp = @. logpdf(Poisson(λ),1) - LLlp = @. logpdf(LogPoisson(log(λ)),1) - @test LLp ≈ LLlp atol = .0001 - + λ = 0.01:0.01:5 + LLp = @. logpdf(Poisson(λ), 1) + LLlp = @. logpdf(LogPoisson(log(λ)), 1) + @test LLp ≈ LLlp atol = 0.0001 end @testset "single distribution correctness" begin @@ -106,31 +105,32 @@ using Turing # 3. MatrixDistribution dist_matrix = [ - Wishart(7, [1.0 0.5; 0.5 1.0]), - InverseWishart(7, [1.0 0.5; 0.5 1.0]), + Wishart(7, [1.0 0.5; 0.5 1.0]), InverseWishart(7, [1.0 0.5; 0.5 1.0]) ] @testset "Correctness test for single distributions" begin - for (dist_set, dist_list) ∈ [ - ("UnivariateDistribution", dist_uni), + for (dist_set, dist_list) in [ + ("UnivariateDistribution", dist_uni), ("MultivariateDistribution", dist_multi), - ("MatrixDistribution", dist_matrix) + ("MatrixDistribution", dist_matrix), ] @testset "$(string(dist_set))" begin for dist in dist_list - @testset "$(string(typeof(dist)))" begin - @info "Distribution(params)" dist + @testset "$(string(typeof(dist)))" begin + @info "Distribution(params)" dist - @model m() = x ~ dist + @model m() = x ~ dist - chn = sample(rng, m(), HMC(0.05, 20), n_samples) + chn = sample(rng, m(), HMC(0.05, 20), n_samples) - # Numerical tests. - check_dist_numerical(dist, - chn, - mean_tol=mean_tol, - var_atol=var_atol, - var_tol=var_tol) + # Numerical tests. + check_dist_numerical( + dist, + chn; + mean_tol=mean_tol, + var_atol=var_atol, + var_tol=var_tol, + ) end end end diff --git a/test/test_utils/SelectiveTests.jl b/test/test_utils/SelectiveTests.jl index d4a20bc91..3026cd16d 100644 --- a/test/test_utils/SelectiveTests.jl +++ b/test/test_utils/SelectiveTests.jl @@ -24,7 +24,7 @@ function parse_args(args) excluded_paths = Vector{String}() for (i, arg) in enumerate(args) if arg == "--skip" - append!(excluded_paths, args[i+1:end]) + append!(excluded_paths, args[(i + 1):end]) break else push!(included_paths, arg) diff --git a/test/test_utils/models.jl b/test/test_utils/models.jl index 94790a041..344002c05 100644 --- a/test/test_utils/models.jl +++ b/test/test_utils/models.jl @@ -1,6 +1,11 @@ module Models -export MoGtest, MoGtest_default, MoGtest_default_z_vector, MoGtest_z_vector, gdemo, gdemo_d, +export MoGtest, + MoGtest_default, + MoGtest_default_z_vector, + MoGtest_z_vector, + gdemo, + gdemo_d, gdemo_default using Distributions @@ -8,19 +13,19 @@ using Turing: @model # The old-gdemo model. @model function gdemo(x, y) - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - x ~ Normal(m, sqrt(s)) - y ~ Normal(m, sqrt(s)) - return s, m + s ~ InverseGamma(2, 3) + m ~ Normal(0, sqrt(s)) + x ~ Normal(m, sqrt(s)) + y ~ Normal(m, sqrt(s)) + return s, m end @model function gdemo_d() - s ~ InverseGamma(2, 3) - m ~ Normal(0, sqrt(s)) - 1.5 ~ Normal(m, sqrt(s)) - 2.0 ~ Normal(m, sqrt(s)) - return s, m + s ~ InverseGamma(2, 3) + m ~ Normal(0, sqrt(s)) + 1.5 ~ Normal(m, sqrt(s)) + 2.0 ~ Normal(m, sqrt(s)) + return s, m end gdemo_default = gdemo_d() @@ -52,7 +57,7 @@ gdemo_default = gdemo_d() else D[4] ~ Normal(mu2, 1) end - z1, z2, z3, z4, mu1, mu2 + return z1, z2, z3, z4, mu1, mu2 end MoGtest_default = MoGtest([1.0 1.0 4.0 4.0]) @@ -86,7 +91,7 @@ MoGtest_default = MoGtest([1.0 1.0 4.0 4.0]) else D[4] ~ Normal(mu2, 1) end - z[1], z[2], z[3], z[4], mu1, mu2 + return z[1], z[2], z[3], z[4], mu1, mu2 end MoGtest_default_z_vector = MoGtest_z_vector([1.0 1.0 4.0 4.0])