diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml
new file mode 100644
index 0000000000..51d6bc633a
--- /dev/null
+++ b/.JuliaFormatter.toml
@@ -0,0 +1,25 @@
+style="blue"
+format_markdown = true
+# TODO
+# We ignore these files because when formatting was first put in place they were being worked on.
+# These ignores should be removed once the relevant PRs are merged/closed.
+ignore = [
+    # https://github.com/TuringLang/Turing.jl/pull/2231/files
+    "src/experimental/gibbs.jl",
+    "src/mcmc/abstractmcmc.jl",
+    "test/experimental/gibbs.jl",
+    "test/test_utils/numerical_tests.jl",
+    # https://github.com/TuringLang/Turing.jl/pull/2218/files
+    "src/mcmc/Inference.jl",
+    "test/mcmc/Inference.jl",
+    # https://github.com/TuringLang/Turing.jl/pull/2068
+    "src/variational/VariationalInference.jl",
+    "src/variational/advi.jl",
+    "test/variational/advi.jl",
+    "test/variational/optimisers.jl",
+    # https://github.com/TuringLang/Turing.jl/pull/1887
+    "test/mcmc/Inference.jl",
+    "test/mcmc/hmc.jl",
+    "test/mcmc/sghmc.jl",
+    "test/runtests.jl",
+]
diff --git a/.github/workflows/Format.yml b/.github/workflows/Format.yml
new file mode 100644
index 0000000000..abeefa989b
--- /dev/null
+++ b/.github/workflows/Format.yml
@@ -0,0 +1,38 @@
+name: Format
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+    branches:
+      - master
+  merge_group:
+    types: [checks_requested]
+
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
+
+jobs:
+  format:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@latest
+        with:
+          version: 1
+      - name: Format code
+        run: |
+          using Pkg
+          Pkg.add(; name="JuliaFormatter", uuid="98e50ef6-434e-11e9-1051-2b60c6c9e899")
+          using JuliaFormatter
+          format("."; verbose=true)
+        shell: julia --color=yes {0}
+      - uses: reviewdog/action-suggester@v1
+        if: github.event_name == 'pull_request'
+        with:
+          tool_name: JuliaFormatter
+          fail_on_error: true
diff --git a/HISTORY.md b/HISTORY.md
index 0b3661739b..2a39298783 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,91 +1,100 @@
 # Release 0.30.5
 
-- `essential/ad.jl` is removed, `ForwardDiff` and `ReverseDiff` integrations via `LogDensityProblemsAD` are moved to `DynamicPPL` and live in corresponding package extensions.
-- `LogDensityProblemsAD.ADgradient(ℓ::DynamicPPL.LogDensityFunction)` (i.e. the single argument method) is moved to `Inference` module. It will create `ADgradient` using the `adtype` information stored in `context` field of `ℓ`.
-- `getADbackend` function is renamed to `getADType`, the interface is preserved, but packages that previously used `getADbackend` should be updated to use `getADType`.
-- `TuringTag` for ForwardDiff is also removed, now `DynamicPPLTag` is defined in `DynamicPPL` package and should serve the same [purpose](https://www.stochasticlifestyle.com/improved-forwarddiff-jl-stacktraces-with-package-tags/).
+  - `essential/ad.jl` is removed, `ForwardDiff` and `ReverseDiff` integrations via `LogDensityProblemsAD` are moved to `DynamicPPL` and live in corresponding package extensions.
+  - `LogDensityProblemsAD.ADgradient(ℓ::DynamicPPL.LogDensityFunction)` (i.e. the single argument method) is moved to `Inference` module. It will create `ADgradient` using the `adtype` information stored in `context` field of `ℓ`.
+  - `getADbackend` function is renamed to `getADType`, the interface is preserved, but packages that previously used `getADbackend` should be updated to use `getADType`.
+  - `TuringTag` for ForwardDiff is also removed, now `DynamicPPLTag` is defined in `DynamicPPL` package and should serve the same [purpose](https://www.stochasticlifestyle.com/improved-forwarddiff-jl-stacktraces-with-package-tags/).
 
 # Release 0.30.0
 
-- [`ADTypes.jl`](https://github.com/SciML/ADTypes.jl) replaced Turing's global AD backend. Users should now specify the desired `ADType` directly in sampler constructors, e.g., `HMC(0.1, 10; adtype=AutoForwardDiff(; chunksize))`, or `HMC(0.1, 10; adtype=AutoReverseDiff(false))` (`false` indicates not to use compiled tape).
-- Interface functions such as `ADBackend`, `setadbackend`, `setadsafe`, `setchunksize`, and `setrdcache` are deprecated and will be removed in a future release.
-- Removed the outdated `verifygrad` function.
-- Updated to a newer version of `LogDensityProblemsAD` (v1.7).
+  - [`ADTypes.jl`](https://github.com/SciML/ADTypes.jl) replaced Turing's global AD backend. Users should now specify the desired `ADType` directly in sampler constructors, e.g., `HMC(0.1, 10; adtype=AutoForwardDiff(; chunksize))`, or `HMC(0.1, 10; adtype=AutoReverseDiff(false))` (`false` indicates not to use compiled tape).
+  - Interface functions such as `ADBackend`, `setadbackend`, `setadsafe`, `setchunksize`, and `setrdcache` are deprecated and will be removed in a future release.
+  - Removed the outdated `verifygrad` function.
+  - Updated to a newer version of `LogDensityProblemsAD` (v1.7).
 
 # Release 0.12.0
 
-- The interface for defining new distributions with constrained support and making them compatible with `Turing` has changed. To make a custom distribution type `CustomDistribution` compatible with `Turing`, the user needs to define the method `bijector(d::CustomDistribution)` that returns an instance of type `Bijector` implementing the `Bijectors.Bijector` API.
-- `~` is now thread-safe when used for observations, but not assumptions (non-observed model parameters) yet.
-- There were some performance improvements in the automatic differentiation (AD) of functions in `DistributionsAD` and `Bijectors`, leading to speeds closer to and sometimes faster than Stan's.
-- An `HMC` initialization bug was fixed. `HMC` initialization in Turing is now consistent with Stan's.
-- Sampling from the prior is now possible using `sample`.
-- `psample` is now deprecated in favour of `sample(model, sampler, parallel_method, n_samples, n_chains)` where `parallel_method` can be either `MCMCThreads()` or `MCMCDistributed()`. `MCMCThreads` will use your available threads to sample each chain (ensure that you have the environment variable `JULIA_NUM_THREADS` set to the number of threads you want to use), and `MCMCDistributed` will dispatch chain sampling to each available process (you can add processes with `addprocs()`).
-- Turing now uses `AdvancedMH.jl` v0.5, which mostly provides behind-the-scenes restructuring.
-- Custom expressions and macros can be interpolated in the `@model` definition with `$`; it is possible to use `@.` also for assumptions (non-observed model parameters) and observations.
-- The macros `@varinfo`, `@logpdf`, and `@sampler` are removed. Instead, one can access the internal variables `_varinfo`, `_model`, `_sampler`, and `_context` in the `@model` definition.
-- Additional constructors for `SMC` and `PG` make it easier to choose the resampling method and threshold.
+  - The interface for defining new distributions with constrained support and making them compatible with `Turing` has changed. To make a custom distribution type `CustomDistribution` compatible with `Turing`, the user needs to define the method `bijector(d::CustomDistribution)` that returns an instance of type `Bijector` implementing the `Bijectors.Bijector` API.
+  - `~` is now thread-safe when used for observations, but not assumptions (non-observed model parameters) yet.
+  - There were some performance improvements in the automatic differentiation (AD) of functions in `DistributionsAD` and `Bijectors`, leading to speeds closer to and sometimes faster than Stan's.
+  - An `HMC` initialization bug was fixed. `HMC` initialization in Turing is now consistent with Stan's.
+  - Sampling from the prior is now possible using `sample`.
+  - `psample` is now deprecated in favour of `sample(model, sampler, parallel_method, n_samples, n_chains)` where `parallel_method` can be either `MCMCThreads()` or `MCMCDistributed()`. `MCMCThreads` will use your available threads to sample each chain (ensure that you have the environment variable `JULIA_NUM_THREADS` set to the number of threads you want to use), and `MCMCDistributed` will dispatch chain sampling to each available process (you can add processes with `addprocs()`).
+  - Turing now uses `AdvancedMH.jl` v0.5, which mostly provides behind-the-scenes restructuring.
+  - Custom expressions and macros can be interpolated in the `@model` definition with `$`; it is possible to use `@.` also for assumptions (non-observed model parameters) and observations.
+  - The macros `@varinfo`, `@logpdf`, and `@sampler` are removed. Instead, one can access the internal variables `_varinfo`, `_model`, `_sampler`, and `_context` in the `@model` definition.
+  - Additional constructors for `SMC` and `PG` make it easier to choose the resampling method and threshold.
 
 # Release 0.11.0
-- Removed some extraneous imports and dependencies ([#1182](https://github.com/TuringLang/Turing.jl/pull/1182))
-- Minor backend changes to `sample` and `psample`, which now use functions defined upstream in AbstractMCMC.jl ([#1187](https://github.com/TuringLang/Turing.jl/pull/1187))
-- Fix for an AD-related crash ([#1202](https://github.com/TuringLang/Turing.jl/pull/1202))
-- StatsBase compat update to 0.33 ([#1185](https://github.com/TuringLang/Turing.jl/pull/1185))
-- Bugfix for ReverseDiff caching and memoization ([#1208](https://github.com/TuringLang/Turing.jl/pull/1208))
-- BREAKING: `VecBinomialLogit` is now removed. Also `BernoulliLogit` is added ([#1214](https://github.com/TuringLang/Turing.jl/pull/1214))
-- Bugfix for cases where dynamic models were breaking with HMC methods ([#1217](https://github.com/TuringLang/Turing.jl/pull/1217))
-- Updates to allow AdvancedHMC 0.2.23 ([#1218](https://github.com/TuringLang/Turing.jl/pull/1218))
-- Add more informative error messages for SMC ([#900](https://github.com/TuringLang/Turing.jl/pull/900))
+
+  - Removed some extraneous imports and dependencies ([#1182](https://github.com/TuringLang/Turing.jl/pull/1182))
+  - Minor backend changes to `sample` and `psample`, which now use functions defined upstream in AbstractMCMC.jl ([#1187](https://github.com/TuringLang/Turing.jl/pull/1187))
+  - Fix for an AD-related crash ([#1202](https://github.com/TuringLang/Turing.jl/pull/1202))
+  - StatsBase compat update to 0.33 ([#1185](https://github.com/TuringLang/Turing.jl/pull/1185))
+  - Bugfix for ReverseDiff caching and memoization ([#1208](https://github.com/TuringLang/Turing.jl/pull/1208))
+  - BREAKING: `VecBinomialLogit` is now removed. Also `BernoulliLogit` is added ([#1214](https://github.com/TuringLang/Turing.jl/pull/1214))
+  - Bugfix for cases where dynamic models were breaking with HMC methods ([#1217](https://github.com/TuringLang/Turing.jl/pull/1217))
+  - Updates to allow AdvancedHMC 0.2.23 ([#1218](https://github.com/TuringLang/Turing.jl/pull/1218))
+  - Add more informative error messages for SMC ([#900](https://github.com/TuringLang/Turing.jl/pull/900))
 
 # Release 0.10.1
-- Fix bug where arrays with mixed integers, floats, and missing values were not being passed to the `MCMCChains.Chains` constructor properly [#1180](https://github.com/TuringLang/Turing.jl/pull/1180).
+
+  - Fix bug where arrays with mixed integers, floats, and missing values were not being passed to the `MCMCChains.Chains` constructor properly [#1180](https://github.com/TuringLang/Turing.jl/pull/1180).
 
 # Release 0.10.0
-- Update elliptical slice sampling to use [EllipticalSliceSampling.jl](https://github.com/TuringLang/EllipticalSliceSampling.jl) on the backend. [#1145](https://github.com/TuringLang/Turing.jl/pull/1145). Nothing should change from a front-end perspective -- you can still call `sample(model, ESS(), 1000)`.
-- Added default progress loggers in [#1149](https://github.com/TuringLang/Turing.jl/pull/1149).
-- The symbols used to define the AD backend have changed to be the lowercase form of the package name used for AD. `forward_diff` is now `forwarddiff`, `reverse_diff` is now `tracker`, and `zygote` and `reversediff` are newly supported (see below). `forward_diff` and `reverse_diff` are deprecated and are slated to be removed.
-- Turing now has experimental support for Zygote.jl ([#783](https://github.com/TuringLang/Turing.jl/pull/783)) and ReverseDiff.jl ([#1170](https://github.com/TuringLang/Turing.jl/pull/1170)) AD backends. Both backends are experimental, so please report any bugs you find. Zygote does not allow mutation within your model, so please be aware of this issue. You can enable Zygote with `Turing.setadbackend(:zygote)` and you can enable ReverseDiff with `Turing.setadbackend(:reversediff)`, though to use either you must import the package with `using Zygote` or `using ReverseDiff`. `for` loops are not recommended for ReverseDiff or Zygote -- see [performance tips](https://turinglang.org/dev/docs/using-turing/performancetips#special-care-for-codetrackercode-and-codezygotecode) for more information. 
-- Fix MH indexing bug [#1135](https://github.com/TuringLang/Turing.jl/pull/1135).
-- Fix MH array sampling [#1167](https://github.com/TuringLang/Turing.jl/pull/1167).
-- Fix bug in VI where the bijectors where being inverted incorrectly [#1168](https://github.com/TuringLang/Turing.jl/pull/1168).
-- The Gibbs sampler handles state better by passing `Transition` structs to the local samplers ([#1169](https://github.com/TuringLang/Turing.jl/pull/1169) and [#1166](https://github.com/TuringLang/Turing.jl/pull/1166)).
+
+  - Update elliptical slice sampling to use [EllipticalSliceSampling.jl](https://github.com/TuringLang/EllipticalSliceSampling.jl) on the backend. [#1145](https://github.com/TuringLang/Turing.jl/pull/1145). Nothing should change from a front-end perspective -- you can still call `sample(model, ESS(), 1000)`.
+  - Added default progress loggers in [#1149](https://github.com/TuringLang/Turing.jl/pull/1149).
+  - The symbols used to define the AD backend have changed to be the lowercase form of the package name used for AD. `forward_diff` is now `forwarddiff`, `reverse_diff` is now `tracker`, and `zygote` and `reversediff` are newly supported (see below). `forward_diff` and `reverse_diff` are deprecated and are slated to be removed.
+  - Turing now has experimental support for Zygote.jl ([#783](https://github.com/TuringLang/Turing.jl/pull/783)) and ReverseDiff.jl ([#1170](https://github.com/TuringLang/Turing.jl/pull/1170)) AD backends. Both backends are experimental, so please report any bugs you find. Zygote does not allow mutation within your model, so please be aware of this issue. You can enable Zygote with `Turing.setadbackend(:zygote)` and you can enable ReverseDiff with `Turing.setadbackend(:reversediff)`, though to use either you must import the package with `using Zygote` or `using ReverseDiff`. `for` loops are not recommended for ReverseDiff or Zygote -- see [performance tips](https://turinglang.org/dev/docs/using-turing/performancetips#special-care-for-codetrackercode-and-codezygotecode) for more information.
+  - Fix MH indexing bug [#1135](https://github.com/TuringLang/Turing.jl/pull/1135).
+  - Fix MH array sampling [#1167](https://github.com/TuringLang/Turing.jl/pull/1167).
+  - Fix bug in VI where the bijectors where being inverted incorrectly [#1168](https://github.com/TuringLang/Turing.jl/pull/1168).
+  - The Gibbs sampler handles state better by passing `Transition` structs to the local samplers ([#1169](https://github.com/TuringLang/Turing.jl/pull/1169) and [#1166](https://github.com/TuringLang/Turing.jl/pull/1166)).
 
 # Release 0.4.0-alpha
-- Fix compatibility with Julia 0.6 [#341, #330, #293]
-- Support of Stan interface [#343, #326]
-- Fix Binomial distribution for gradients. [#311]
-- Stochastic gradient Hamiltonian Monte Carlo [#201]; Stochastic gradient Langevin dynamics [#27]
-- More particle MCMC family samplers: PIMH & PMMH [#364, #369]
-- Disable adaptive resampling for CSMC [#357]
-- Fix resampler for SMC [#338]
-- Interactive particle MCMC [#334]
-- Add type alias CSMC for PG [#333]
-- Fix progress meter [#317]
+
+  - Fix compatibility with Julia 0.6 [#341, #330, #293]
+  - Support of Stan interface [#343, #326]
+  - Fix Binomial distribution for gradients. [#311]
+  - Stochastic gradient Hamiltonian Monte Carlo [#201]; Stochastic gradient Langevin dynamics [#27]
+  - More particle MCMC family samplers: PIMH & PMMH [#364, #369]
+  - Disable adaptive resampling for CSMC [#357]
+  - Fix resampler for SMC [#338]
+  - Interactive particle MCMC [#334]
+  - Add type alias CSMC for PG [#333]
+  - Fix progress meter [#317]
 
 # Release 0.3
--  NUTS implementation #188
--  HMC: Transforms of ϵ for each variable #67 (replace with introducing mass matrix)
--  Finish: Sampler (internal) interface design #107
--  Substantially improve performance of HMC and Gibbs #7 
-  -  Vectorising likelihood computations #117 #255
- -  Remove obsolete `randoc`, `randc`? #156
--  Support truncated distribution. #87
--  Refactoring code: Unify VarInfo, Trace, TaskLocalStorage #96
--  Refactoring code: Better gradient interface #97
+
+  - NUTS implementation #188
+  - HMC: Transforms of ϵ for each variable #67 (replace with introducing mass matrix)
+  - Finish: Sampler (internal) interface design #107
+  - Substantially improve performance of HMC and Gibbs #7
+  - Vectorising likelihood computations #117 #255
+  - Remove obsolete `randoc`, `randc`? #156
+  - Support truncated distribution. #87
+  - Refactoring code: Unify VarInfo, Trace, TaskLocalStorage #96
+  - Refactoring code: Better gradient interface #97
 
 # Release 0.2
-- Gibbs sampler ([#73])
-- HMC for constrained variables ([#66]; no support for varying dimensions)
-- Added support for `Mamba.Chain` ([#90]): describe, plot etc.
-- New interface design ([#55]), ([#104])
-- Bugfixes and general improvements (e.g. `VarInfo` [#96]) 
+
+  - Gibbs sampler ([#73])
+  - HMC for constrained variables ([#66]; no support for varying dimensions)
+  - Added support for `Mamba.Chain` ([#90]): describe, plot etc.
+  - New interface design ([#55]), ([#104])
+  - Bugfixes and general improvements (e.g. `VarInfo` [#96])
 
 # Release 0.1.0
-- Initial support for Hamiltonian Monte Carlo (no support for discrete/constrained variables)
-- Require Julia 0.5
-- Bugfixes and general improvements
+
+  - Initial support for Hamiltonian Monte Carlo (no support for discrete/constrained variables)
+  - Require Julia 0.5
+  - Bugfixes and general improvements
 
 # Release 0.0.1-0.0.4
-The initial releases of Turing. 
-- Particle MCMC, SMC, IS
-- Implemented [copying for Julia Task](https://github.com/JuliaLang/julia/pull/15078)
-- Implemented copy-on-write data structure `TArray` for Tasks
+
+The initial releases of Turing.
+
+  - Particle MCMC, SMC, IS
+  - Implemented [copying for Julia Task](https://github.com/JuliaLang/julia/pull/15078)
+  - Implemented copy-on-write data structure `TArray` for Tasks
diff --git a/README.md b/README.md
index fa95b33781..4ea12d52e9 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,7 @@
 [![Build Status](https://github.com/TuringLang/Turing.jl/workflows/Turing-CI/badge.svg)](https://github.com/TuringLang/Turing.jl/actions?query=workflow%3ATuring-CI+branch%3Amaster)
 [![Coverage Status](https://coveralls.io/repos/github/TuringLang/Turing.jl/badge.svg?branch=master)](https://coveralls.io/github/TuringLang/Turing.jl?branch=master)
 [![codecov](https://codecov.io/gh/TuringLang/Turing.jl/branch/master/graph/badge.svg?token=OiUBsnDQqf)](https://codecov.io/gh/TuringLang/Turing.jl)
-[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor's%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
-
+[![ColPrac: Contributor's Guide on Collaborative Practices for Community Packages](https://img.shields.io/badge/ColPrac-Contributor%27s%20Guide-blueviolet)](https://github.com/SciML/ColPrac)
 
 ## Getting Started
 
@@ -12,7 +11,6 @@ Turing's home page, with links to everything you'll need to use Turing, is avail
 
 https://turinglang.org/docs/
 
-
 ## What's changed recently?
 
 See [releases](https://github.com/TuringLang/Turing.jl/releases).
@@ -25,6 +23,5 @@ You can see the complete list on Github: https://github.com/TuringLang/Turing.jl
 Turing is an open source project so if you feel you have some relevant skills and are interested in contributing, please get in touch. See the [Contributing](https://turinglang.org/dev/docs/contributing/guide) page for details on the process. You can contribute by opening issues on Github, implementing things yourself, and making a pull request. We would also appreciate example models written using Turing.
 
 ## Issues and Discussions
-Issues related to bugs and feature requests are welcome on the [issues page](https://github.com/TuringLang/Turing.jl/issues), while discussions and questions about statistical applications and theory should place on the [Discussions page](https://github.com/TuringLang/Turing.jl/discussions) or [our channel](https://julialang.slack.com/messages/turing/) (`#turing`) in the Julia Slack chat. If you do not have an invitation to Julia's Slack, you can get one by going [here](https://julialang.org/slack/).
-
 
+Issues related to bugs and feature requests are welcome on the [issues page](https://github.com/TuringLang/Turing.jl/issues), while discussions and questions about statistical applications and theory should place on the [Discussions page](https://github.com/TuringLang/Turing.jl/discussions) or [our channel](https://julialang.slack.com/messages/turing/) (`#turing`) in the Julia Slack chat. If you do not have an invitation to Julia's Slack, you can get one by going [here](https://julialang.org/slack/).
diff --git a/benchmarks/benchmarks_suite.jl b/benchmarks/benchmarks_suite.jl
index ef00117ee5..200380121a 100644
--- a/benchmarks/benchmarks_suite.jl
+++ b/benchmarks/benchmarks_suite.jl
@@ -16,18 +16,17 @@ BenchmarkSuite["constrained"] = BenchmarkGroup(["constrained"])
 
 data = [0, 1, 0, 1, 1, 1, 1, 1, 1, 1]
 
-
 @model function constrained_test(obs)
-    p ~ Beta(2,2)
-    for i = 1:length(obs)
+    p ~ Beta(2, 2)
+    for i in 1:length(obs)
         obs[i] ~ Bernoulli(p)
     end
-    p
+    return p
 end
 
-
-BenchmarkSuite["constrained"]["constrained"] = @benchmarkable sample($(constrained_test(data)), $(HMC(0.01, 2)), 2000)
-
+BenchmarkSuite["constrained"]["constrained"] = @benchmarkable sample(
+    $(constrained_test(data)), $(HMC(0.01, 2)), 2000
+)
 
 ## gdemo
 
@@ -41,9 +40,9 @@ BenchmarkSuite["gdemo"] = BenchmarkGroup(["gdemo"])
     return s², m
 end
 
-BenchmarkSuite["gdemo"]["hmc"] = @benchmarkable sample($(gdemo(1.5, 2.0)), $(HMC(0.01, 2)), 2000)
-
-
+BenchmarkSuite["gdemo"]["hmc"] = @benchmarkable sample(
+    $(gdemo(1.5, 2.0)), $(HMC(0.01, 2)), 2000
+)
 
 ## MvNormal
 
@@ -52,8 +51,8 @@ BenchmarkSuite["mnormal"] = BenchmarkGroup(["mnormal"])
 # Define the target distribution and its gradient
 
 @model function target(dim)
-   Θ = Vector{Real}(undef, dim)
-   θ ~ MvNormal(zeros(dim), I)
+    Θ = Vector{Real}(undef, dim)
+    return θ ~ MvNormal(zeros(dim), I)
 end
 
 # Sampling parameter settings
@@ -61,24 +60,29 @@ dim = 10
 n_samples = 100_000
 n_adapts = 2_000
 
-BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample($(target(dim)), $(HMC(0.1, 5)), $n_samples)
+BenchmarkSuite["mnormal"]["hmc"] = @benchmarkable sample(
+    $(target(dim)), $(HMC(0.1, 5)), $n_samples
+)
 
 ## MvNormal: ForwardDiff vs ReverseDiff
 
 @model function mdemo(d, N)
     Θ = Vector(undef, N)
-   for n=1:N
-      Θ[n] ~ d
-   end
+    for n in 1:N
+        Θ[n] ~ d
+    end
 end
 
 dim2 = 250
-A    = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2)));
-d    = MvNormal(zeros(dim2), A)
+A = rand(Wishart(dim2, Matrix{Float64}(I, dim2, dim2)));
+d = MvNormal(zeros(dim2), A)
 
 # ForwardDiff
-BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000)
-
+BenchmarkSuite["mnormal"]["forwarddiff"] = @benchmarkable sample(
+    $(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoForwardDiff(; chunksize=0))), 5000
+)
 
 # ReverseDiff
-BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample($(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000)
+BenchmarkSuite["mnormal"]["reversediff"] = @benchmarkable sample(
+    $(mdemo(d, 1)), $(HMC(0.1, 5; adtype=AutoReverseDiff(false))), 5000
+)
diff --git a/benchmarks/models/hlr.jl b/benchmarks/models/hlr.jl
index 36836263fe..b40c3b2c2d 100644
--- a/benchmarks/models/hlr.jl
+++ b/benchmarks/models/hlr.jl
@@ -10,15 +10,14 @@ end
 x, y = readlrdata()
 
 @model function hlr_nuts(x, y, θ)
+    N, D = size(x)
 
-    N,D = size(x)
-
-    σ² ~ Exponential(θ)
+    σ² ~ Exponential(θ)
     α ~ Normal(0, sqrt(σ²))
     β ~ MvNormal(zeros(D), σ² * I)
 
-    for n = 1:N
-        y[n] ~ BinomialLogit(1, dot(x[n,:], β) + α)
+    for n in 1:N
+        y[n] ~ BinomialLogit(1, dot(x[n, :], β) + α)
     end
 end
 
@@ -26,4 +25,6 @@ end
 n_samples = 10_000
 
 # Sampling
-BenchmarkSuite["nuts"]["hrl"] = @benchmarkable sample(hlr_nuts(x, y, 1/0.1), NUTS(0.65), n_samples)
+BenchmarkSuite["nuts"]["hrl"] = @benchmarkable sample(
+    hlr_nuts(x, y, 1 / 0.1), NUTS(0.65), n_samples
+)
diff --git a/benchmarks/models/lr.jl b/benchmarks/models/lr.jl
index c98963e790..3714a93d0a 100644
--- a/benchmarks/models/lr.jl
+++ b/benchmarks/models/lr.jl
@@ -10,14 +10,13 @@ end
 X, Y = readlrdata()
 
 @model function lr_nuts(x, y, σ)
-
-    N,D = size(x)
+    N, D = size(x)
 
     α ~ Normal(0, σ)
     β ~ MvNormal(zeros(D), σ^2 * I)
 
-    for n = 1:N
-        y[n] ~ BinomialLogit(1, dot(x[n,:], β) + α)
+    for n in 1:N
+        y[n] ~ BinomialLogit(1, dot(x[n, :], β) + α)
     end
 end
 
@@ -26,5 +25,6 @@ n_samples = 1_000
 n_adapts = 1_000
 
 # Sampling
-BenchmarkSuite["nuts"]["lr"] = @benchmarkable sample(lr_nuts(X, Y, 100),
-                                                     NUTS(0.65), n_samples)
+BenchmarkSuite["nuts"]["lr"] = @benchmarkable sample(
+    lr_nuts(X, Y, 100), NUTS(0.65), n_samples
+)
diff --git a/benchmarks/models/lr_helper.jl b/benchmarks/models/lr_helper.jl
index 20f6bf3f16..83de134c88 100644
--- a/benchmarks/models/lr_helper.jl
+++ b/benchmarks/models/lr_helper.jl
@@ -1,10 +1,9 @@
 using DelimitedFiles
 
 function readlrdata()
-
     fname = joinpath(dirname(@__FILE__), "lr_nuts.data")
     z = readdlm(fname)
-    x = z[:,1:end-1]
-    y = z[:,end] .- 1
+    x = z[:, 1:(end - 1)]
+    y = z[:, end] .- 1
     return x, y
 end
diff --git a/benchmarks/models/sv_nuts.jl b/benchmarks/models/sv_nuts.jl
index c54dade69e..697625dce2 100644
--- a/benchmarks/models/sv_nuts.jl
+++ b/benchmarks/models/sv_nuts.jl
@@ -6,26 +6,25 @@ if !haskey(BenchmarkSuite, "nuts")
 end
 
 fname = joinpath(dirname(@__FILE__), "sv_nuts.data")
-y, header = readdlm(fname, ',', header=true)
+y, header = readdlm(fname, ','; header=true)
 
 # Stochastic volatility (SV)
 @model function sv_nuts(y, dy, ::Type{T}=Vector{Float64}) where {T}
-    N = size(y,1)
+    N = size(y, 1)
 
-    τ ~ Exponential(1/100)
-    ν ~ Exponential(1/100)
+    τ ~ Exponential(1 / 100)
+    ν ~ Exponential(1 / 100)
     s = T(undef, N)
 
-    s[1] ~ Exponential(1/100)
+    s[1] ~ Exponential(1 / 100)
     for n in 2:N
-        s[n] ~ Normal(log(s[n-1]), τ)
+        s[n] ~ Normal(log(s[n - 1]), τ)
         s[n] = exp(s[n])
-        dy = log(y[n] / y[n-1]) / s[n]
-        dy ~ TDist(ν)
+        dy = log(y[n] / y[n - 1]) / s[n]
+        dy ~ TDist(ν)
     end
 end
 
-
 # Sampling parameter settings
 n_samples = 10_000
 
diff --git a/docs/README.md b/docs/README.md
index 2bc2ad6832..7a1ad91194 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,5 +1,5 @@
-Turing's documentation in this directory is in markdown format. 
+Turing's documentation in this directory is in markdown format.
 
 If you want to build the doc locally, please refer to the [README](https://github.com/TuringLang/turinglang.github.io) file in [turinglang.github.io](https://github.com/TuringLang/turinglang.github.io).
 
-Please also visit [this repo](https://github.com/TuringLang/TuringTutorials/tree/master/tutorials) for the docs. 
+Please also visit [this repo](https://github.com/TuringLang/TuringTutorials/tree/master/tutorials) for the docs.
diff --git a/docs/src/library/advancedhmc.md b/docs/src/library/advancedhmc.md
index 5742e4cffe..84f712f4de 100644
--- a/docs/src/library/advancedhmc.md
+++ b/docs/src/library/advancedhmc.md
@@ -22,4 +22,4 @@ Order   = [:function]
 ```@autodocs
 Modules = [AdvancedHMC]
 Order   = [:type]
-```
\ No newline at end of file
+```
diff --git a/docs/src/library/api.md b/docs/src/library/api.md
index 53a946ceb0..c598820b7e 100644
--- a/docs/src/library/api.md
+++ b/docs/src/library/api.md
@@ -7,6 +7,7 @@ toc: true
 ```@meta
 CurrentModule = Turing
 ```
+
 ## Index
 
 ```@index
diff --git a/docs/src/library/bijectors.md b/docs/src/library/bijectors.md
index 9fb4eaecf8..471da45fea 100644
--- a/docs/src/library/bijectors.md
+++ b/docs/src/library/bijectors.md
@@ -22,4 +22,4 @@ Order   = [:function]
 ```@autodocs
 Modules = [Bijectors]
 Order   = [:type]
-```
\ No newline at end of file
+```
diff --git a/ext/TuringDynamicHMCExt.jl b/ext/TuringDynamicHMCExt.jl
index 27bca72e1e..c82f237c0c 100644
--- a/ext/TuringDynamicHMCExt.jl
+++ b/ext/TuringDynamicHMCExt.jl
@@ -3,9 +3,8 @@ module TuringDynamicHMCExt
 ### DynamicHMC backend - https://github.com/tpapp/DynamicHMC.jl
 ###
 
-
 if isdefined(Base, :get_extension)
-    import DynamicHMC
+    using DynamicHMC: DynamicHMC
     using Turing
     using Turing: AbstractMCMC, Random, LogDensityProblems, DynamicPPL
     using Turing.Inference: ADTypes, LogDensityProblemsAD, TYPEDFIELDS
@@ -13,7 +12,7 @@ else
     import ..DynamicHMC
     using ..Turing
     using ..Turing: AbstractMCMC, Random, LogDensityProblems, DynamicPPL
-    using ..Turing.Inference:  ADTypes, LogDensityProblemsAD, TYPEDFIELDS
+    using ..Turing.Inference: ADTypes, LogDensityProblemsAD, TYPEDFIELDS
 end
 
 """
@@ -25,22 +24,22 @@ To use it, make sure you have DynamicHMC package (version >= 2) loaded:
 ```julia
 using DynamicHMC
 ```
-""" 
+"""
 struct DynamicNUTS{AD,space,T<:DynamicHMC.NUTS} <: Turing.Inference.Hamiltonian
     sampler::T
     adtype::AD
 end
 
 function DynamicNUTS(
-    spl::DynamicHMC.NUTS = DynamicHMC.NUTS(),
-    space::Tuple = ();
-    adtype::ADTypes.AbstractADType = Turing.DEFAULT_ADTYPE
+    spl::DynamicHMC.NUTS=DynamicHMC.NUTS(),
+    space::Tuple=();
+    adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
 )
     return DynamicNUTS{typeof(adtype),space,typeof(spl)}(spl, adtype)
 end
 Turing.externalsampler(spl::DynamicHMC.NUTS) = DynamicNUTS(spl)
 
-DynamicPPL.getspace(::DynamicNUTS{<:Any, space}) where {space} = space
+DynamicPPL.getspace(::DynamicNUTS{<:Any,space}) where {space} = space
 
 """
     DynamicNUTSState
@@ -59,14 +58,16 @@ struct DynamicNUTSState{L,V<:DynamicPPL.AbstractVarInfo,C,M,S}
     stepsize::S
 end
 
-DynamicPPL.initialsampler(::DynamicPPL.Sampler{<:DynamicNUTS}) = DynamicPPL.SampleFromUniform()
+function DynamicPPL.initialsampler(::DynamicPPL.Sampler{<:DynamicNUTS})
+    return DynamicPPL.SampleFromUniform()
+end
 
 function DynamicPPL.initialstep(
     rng::Random.AbstractRNG,
     model::DynamicPPL.Model,
     spl::DynamicPPL.Sampler{<:DynamicNUTS},
     vi::DynamicPPL.AbstractVarInfo;
-    kwargs...
+    kwargs...,
 )
     # Ensure that initial sample is in unconstrained space.
     if !DynamicPPL.islinked(vi, spl)
@@ -75,15 +76,13 @@ function DynamicPPL.initialstep(
     end
 
     # Define log-density function.
-    ℓ = LogDensityProblemsAD.ADgradient(Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()))
+    ℓ = LogDensityProblemsAD.ADgradient(
+        Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext())
+    )
 
     # Perform initial step.
     results = DynamicHMC.mcmc_keep_warmup(
-        rng,
-        ℓ,
-        0;
-        initialization = (q = vi[spl],),
-        reporter = DynamicHMC.NoProgressReport(),
+        rng, ℓ, 0; initialization=(q=vi[spl],), reporter=DynamicHMC.NoProgressReport()
     )
     steps = DynamicHMC.mcmc_steps(results.sampling_logdensity, results.final_warmup_state)
     Q, _ = DynamicHMC.mcmc_next_step(steps, results.final_warmup_state.Q)
@@ -104,18 +103,12 @@ function AbstractMCMC.step(
     model::DynamicPPL.Model,
     spl::DynamicPPL.Sampler{<:DynamicNUTS},
     state::DynamicNUTSState;
-    kwargs...
+    kwargs...,
 )
     # Compute next sample.
     vi = state.vi
     ℓ = state.logdensity
-    steps = DynamicHMC.mcmc_steps(
-        rng,
-        spl.alg.sampler,
-        state.metric,
-        ℓ,
-        state.stepsize,
-    )
+    steps = DynamicHMC.mcmc_steps(rng, spl.alg.sampler, state.metric, ℓ, state.stepsize)
     Q, _ = DynamicHMC.mcmc_next_step(steps, state.cache)
 
     # Update the variables.
diff --git a/ext/TuringOptimExt.jl b/ext/TuringOptimExt.jl
index d95bdde335..3d31af6f2a 100644
--- a/ext/TuringOptimExt.jl
+++ b/ext/TuringOptimExt.jl
@@ -1,20 +1,12 @@
 module TuringOptimExt
 
 if isdefined(Base, :get_extension)
-    import Turing
-    import Turing:
-        DynamicPPL,
-        NamedArrays,
-        Accessors,
-        Optimisation
-    import Optim
+    using Turing: Turing
+    import Turing: DynamicPPL, NamedArrays, Accessors, Optimisation
+    using Optim: Optim
 else
     import ..Turing
-    import ..Turing:
-        DynamicPPL,
-        NamedArrays,
-        Accessors,
-        Optimisation
+    import ..Turing: DynamicPPL, NamedArrays, Accessors, Optimisation
     import ..Optim
 end
 
@@ -43,8 +35,10 @@ mle = optimize(model, MLE(), NelderMead())
 ```
 """
 function Optim.optimize(
-    model::DynamicPPL.Model, ::Optimisation.MLE, options::Optim.Options=Optim.Options();
-    kwargs...
+    model::DynamicPPL.Model,
+    ::Optimisation.MLE,
+    options::Optim.Options=Optim.Options();
+    kwargs...,
 )
     ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext())
     f = Optimisation.OptimLogDensity(model, ctx)
@@ -57,7 +51,7 @@ function Optim.optimize(
     ::Optimisation.MLE,
     init_vals::AbstractArray,
     options::Optim.Options=Optim.Options();
-    kwargs...
+    kwargs...,
 )
     optimizer = Optim.LBFGS()
     return _mle_optimize(model, init_vals, optimizer, options; kwargs...)
@@ -67,7 +61,7 @@ function Optim.optimize(
     ::Optimisation.MLE,
     optimizer::Optim.AbstractOptimizer,
     options::Optim.Options=Optim.Options();
-    kwargs...
+    kwargs...,
 )
     ctx = Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext())
     f = Optimisation.OptimLogDensity(model, ctx)
@@ -80,7 +74,7 @@ function Optim.optimize(
     init_vals::AbstractArray,
     optimizer::Optim.AbstractOptimizer,
     options::Optim.Options=Optim.Options();
-    kwargs...
+    kwargs...,
 )
     return _mle_optimize(model, init_vals, optimizer, options; kwargs...)
 end
@@ -111,8 +105,10 @@ map_est = optimize(model, MAP(), NelderMead())
 ```
 """
 function Optim.optimize(
-    model::DynamicPPL.Model, ::Optimisation.MAP, options::Optim.Options=Optim.Options();
-    kwargs...
+    model::DynamicPPL.Model,
+    ::Optimisation.MAP,
+    options::Optim.Options=Optim.Options();
+    kwargs...,
 )
     ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext())
     f = Optimisation.OptimLogDensity(model, ctx)
@@ -125,7 +121,7 @@ function Optim.optimize(
     ::Optimisation.MAP,
     init_vals::AbstractArray,
     options::Optim.Options=Optim.Options();
-    kwargs...
+    kwargs...,
 )
     optimizer = Optim.LBFGS()
     return _map_optimize(model, init_vals, optimizer, options; kwargs...)
@@ -135,7 +131,7 @@ function Optim.optimize(
     ::Optimisation.MAP,
     optimizer::Optim.AbstractOptimizer,
     options::Optim.Options=Optim.Options();
-    kwargs...
+    kwargs...,
 )
     ctx = Optimisation.OptimizationContext(DynamicPPL.DefaultContext())
     f = Optimisation.OptimLogDensity(model, ctx)
@@ -148,7 +144,7 @@ function Optim.optimize(
     init_vals::AbstractArray,
     optimizer::Optim.AbstractOptimizer,
     options::Optim.Options=Optim.Options();
-    kwargs...
+    kwargs...,
 )
     return _map_optimize(model, init_vals, optimizer, options; kwargs...)
 end
@@ -170,7 +166,7 @@ function _optimize(
     optimizer::Optim.AbstractOptimizer=Optim.LBFGS(),
     options::Optim.Options=Optim.Options(),
     args...;
-    kwargs...
+    kwargs...,
 )
     # Convert the initial values, since it is assumed that users provide them
     # in the constrained space.
diff --git a/src/Turing.jl b/src/Turing.jl
index 4aa2fef2c8..e3ba2f81d6 100644
--- a/src/Turing.jl
+++ b/src/Turing.jl
@@ -7,19 +7,19 @@ using Libtask
 @reexport using Distributions, MCMCChains, Libtask, AbstractMCMC, Bijectors
 using Compat: pkgversion
 
-import AdvancedVI
+using AdvancedVI: AdvancedVI
 using DynamicPPL: DynamicPPL, LogDensityFunction
 import DynamicPPL: getspace, NoDist, NamedDist
-import LogDensityProblems
-import NamedArrays
-import Accessors
-import StatsAPI
-import StatsBase
+using LogDensityProblems: LogDensityProblems
+using NamedArrays: NamedArrays
+using Accessors: Accessors
+using StatsAPI: StatsAPI
+using StatsBase: StatsBase
 
 using Accessors: Accessors
 
-import Printf
-import Random
+using Printf: Printf
+using Random: Random
 
 using ADTypes: ADTypes
 
@@ -62,87 +62,82 @@ include("deprecated.jl") # to be removed in the next minor version release
 # Exports #
 ###########
 # `using` statements for stuff to re-export
-using DynamicPPL: pointwise_loglikelihoods, generated_quantities, logprior, logjoint, condition, decondition, fix, unfix, conditioned
+using DynamicPPL:
+    pointwise_loglikelihoods,
+    generated_quantities,
+    logprior,
+    logjoint,
+    condition,
+    decondition,
+    fix,
+    unfix,
+    conditioned
 using StatsBase: predict
 using Bijectors: ordered
 using OrderedCollections: OrderedDict
 
 # Turing essentials - modelling macros and inference algorithms
-export  @model,                 # modelling
-        @varname,
-        @submodel,
-        DynamicPPL,
-
-        Prior,                  # Sampling from the prior
-
-        MH,                     # classic sampling
-        Emcee,
-        ESS,
-        Gibbs,
-        GibbsConditional,
-
-        HMC,                    # Hamiltonian-like sampling
-        SGLD,
-        SGHMC,
-        HMCDA,
-        NUTS,
-        DynamicNUTS,
-        ANUTS,
-
-        PolynomialStepsize,
-
-        IS,                     # particle-based sampling
-        SMC,
-        CSMC,
-        PG,
-
-        vi,                     # variational inference
-        ADVI,
-
-        sample,                 # inference
-        @logprob_str,
-        @prob_str,
-        externalsampler,
-
-        AutoForwardDiff,        # ADTypes
-        AutoReverseDiff,
-        AutoZygote,
-        AutoTracker,
-
-        setprogress!,           # debugging
-
-        Flat,
-        FlatPos,
-        BinomialLogit,
-        BernoulliLogit,         # Part of Distributions >= 0.25.77
-        OrderedLogistic,
-        LogPoisson,
-        filldist,
-        arraydist,
-
-        NamedDist,              # Exports from DynamicPPL
-        predict,
-        pointwise_loglikelihoods,
-        elementwise_loglikelihoods,
-        generated_quantities,
-        logprior,
-        logjoint,
-        LogDensityFunction,
-
-        condition,
-        decondition,
-        fix,
-        unfix,
-        conditioned,
-        OrderedDict,
-
-        ordered,                # Exports from Bijectors
-
-        maximum_a_posteriori,
-        maximum_likelihood,
-        # The MAP and MLE exports are only needed for the Optim.jl interface.
-        MAP,
-        MLE
+export @model,                 # modelling
+    @varname,
+    @submodel,
+    DynamicPPL,
+    Prior,                  # Sampling from the prior
+    MH,                     # classic sampling
+    Emcee,
+    ESS,
+    Gibbs,
+    GibbsConditional,
+    HMC,                    # Hamiltonian-like sampling
+    SGLD,
+    SGHMC,
+    HMCDA,
+    NUTS,
+    DynamicNUTS,
+    ANUTS,
+    PolynomialStepsize,
+    IS,                     # particle-based sampling
+    SMC,
+    CSMC,
+    PG,
+    vi,                     # variational inference
+    ADVI,
+    sample,                 # inference
+    @logprob_str,
+    @prob_str,
+    externalsampler,
+    AutoForwardDiff,        # ADTypes
+    AutoReverseDiff,
+    AutoZygote,
+    AutoTracker,
+    setprogress!,           # debugging
+    Flat,
+    FlatPos,
+    BinomialLogit,
+    BernoulliLogit,         # Part of Distributions >= 0.25.77
+    OrderedLogistic,
+    LogPoisson,
+    filldist,
+    arraydist,
+    NamedDist,              # Exports from DynamicPPL
+    predict,
+    pointwise_loglikelihoods,
+    elementwise_loglikelihoods,
+    generated_quantities,
+    logprior,
+    logjoint,
+    LogDensityFunction,
+    condition,
+    decondition,
+    fix,
+    unfix,
+    conditioned,
+    OrderedDict,
+    ordered,                # Exports from Bijectors
+    maximum_a_posteriori,
+    maximum_likelihood,
+    # The MAP and MLE exports are only needed for the Optim.jl interface.
+    MAP,
+    MLE
 
 # AutoTapir is only supported by ADTypes v1.0 and above.
 @static if VERSION >= v"1.10" && pkgversion(ADTypes) >= v"1"
@@ -155,9 +150,13 @@ end
 
 function __init__()
     @static if !isdefined(Base, :get_extension)
-        @require Optim="429524aa-4258-5aef-a3af-852621145aeb" include("../ext/TuringOptimExt.jl")
-        @require DynamicHMC="bbc10e6e-7c05-544b-b16e-64fede858acb" include("../ext/TuringDynamicHMCExt.jl")
-  end
+        @require Optim = "429524aa-4258-5aef-a3af-852621145aeb" include(
+            "../ext/TuringOptimExt.jl"
+        )
+        @require DynamicHMC = "bbc10e6e-7c05-544b-b16e-64fede858acb" include(
+            "../ext/TuringDynamicHMCExt.jl"
+        )
+    end
 end
 
 end
diff --git a/src/deprecated.jl b/src/deprecated.jl
index 76f5854cf9..34305d4423 100644
--- a/src/deprecated.jl
+++ b/src/deprecated.jl
@@ -1,23 +1,39 @@
 export setadbackend, setchunksize, setadsafe
 
-function setadbackend(::Union{Symbol, Val})
-    Base.depwarn("`ADBACKEND` and `setbackend` are deprecated. Please specify the chunk size directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoForwardDiff())`.\n This function has no effects.", :setbackend; force=true)
-    nothing
+function setadbackend(::Union{Symbol,Val})
+    Base.depwarn(
+        "`ADBACKEND` and `setbackend` are deprecated. Please specify the chunk size directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoForwardDiff())`.\n This function has no effects.",
+        :setbackend;
+        force=true,
+    )
+    return nothing
 end
 
 function setchunksize(::Int)
-    Base.depwarn("`CHUNKSIZE` and `setchunksize` are deprecated. Please specify the chunk size directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoForwardDiff())`.\n This function has no effects.", :setchunksize; force=true)
-    nothing
+    Base.depwarn(
+        "`CHUNKSIZE` and `setchunksize` are deprecated. Please specify the chunk size directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoForwardDiff())`.\n This function has no effects.",
+        :setchunksize;
+        force=true,
+    )
+    return nothing
 end
 
-function setrdcache(::Union{Bool, Val})
-    Base.depwarn("`RDCACHE` and `setrdcache` are deprecated. Please specify if you wish to use compiled tape for ReverseDiff directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoReverseDiff(false))`.\n This function has no effects.", :setrdcache; force=true)
-    nothing
+function setrdcache(::Union{Bool,Val})
+    Base.depwarn(
+        "`RDCACHE` and `setrdcache` are deprecated. Please specify if you wish to use compiled tape for ReverseDiff directly in the sampler constructor, e.g., `HMC(0.1, 5; adtype=AutoReverseDiff(false))`.\n This function has no effects.",
+        :setrdcache;
+        force=true,
+    )
+    return nothing
 end
 
 function setadsafe(::Bool)
-    Base.depwarn("`ADSAFE` and `setadsafe` are outdated and no longer in use.", :setadsafe; force=true)
-    nothing
+    Base.depwarn(
+        "`ADSAFE` and `setadsafe` are outdated and no longer in use.",
+        :setadsafe;
+        force=true,
+    )
+    return nothing
 end
 
 Base.@deprecate_binding Core Essential false
diff --git a/src/essential/Essential.jl b/src/essential/Essential.jl
index 8cef7a681b..12413bef29 100644
--- a/src/essential/Essential.jl
+++ b/src/essential/Essential.jl
@@ -13,35 +13,35 @@ using StatsFuns: logsumexp, softmax
 @reexport using DynamicPPL
 using ADTypes: ADTypes, AutoForwardDiff, AutoTracker, AutoReverseDiff, AutoZygote
 
-import AdvancedPS
+using AdvancedPS: AdvancedPS
 
 include("container.jl")
 
-export  @model,
-        @varname,
-        generate_observe,
-        translate_tilde!,
-        get_vars,
-        get_data,
-        get_default_values,
-        ParticleContainer,
-        Particle,
-        Trace,
-        fork,
-        forkr,
-        current_trace,
-        getweights,
-        getweight,
-        effectiveSampleSize,
-        sweep!,
-        ResampleWithESSThreshold,
-        AutoForwardDiff,
-        AutoTracker,
-        AutoZygote,
-        AutoReverseDiff,
-        value,
-        @logprob_str,
-        @prob_str
+export @model,
+    @varname,
+    generate_observe,
+    translate_tilde!,
+    get_vars,
+    get_data,
+    get_default_values,
+    ParticleContainer,
+    Particle,
+    Trace,
+    fork,
+    forkr,
+    current_trace,
+    getweights,
+    getweight,
+    effectiveSampleSize,
+    sweep!,
+    ResampleWithESSThreshold,
+    AutoForwardDiff,
+    AutoTracker,
+    AutoZygote,
+    AutoReverseDiff,
+    value,
+    @logprob_str,
+    @prob_str
 
 # AutoTapir is only supported by ADTypes v1.0 and above.
 @static if VERSION >= v"1.10" && pkgversion(ADTypes) >= v"1"
diff --git a/src/essential/container.jl b/src/essential/container.jl
index 483270c9f1..bd4e21f6b5 100644
--- a/src/essential/container.jl
+++ b/src/essential/container.jl
@@ -1,4 +1,5 @@
-struct TracedModel{S<:AbstractSampler,V<:AbstractVarInfo,M<:Model,E<:Tuple} <: AdvancedPS.AbstractGenericModel
+struct TracedModel{S<:AbstractSampler,V<:AbstractVarInfo,M<:Model,E<:Tuple} <:
+       AdvancedPS.AbstractGenericModel
     model::M
     sampler::S
     varinfo::V
@@ -14,26 +15,24 @@ function TracedModel(
     context = SamplingContext(rng, sampler, DefaultContext())
     args, kwargs = DynamicPPL.make_evaluate_args_and_kwargs(model, varinfo, context)
     if kwargs !== nothing && !isempty(kwargs)
-        error("Sampling with `$(sampler.alg)` does not support models with keyword arguments. See issue #2007 for more details.")
+        error(
+            "Sampling with `$(sampler.alg)` does not support models with keyword arguments. See issue #2007 for more details.",
+        )
     end
     return TracedModel{AbstractSampler,AbstractVarInfo,Model,Tuple}(
-        model,
-        sampler,
-        varinfo,
-        (model.f, args...)
+        model, sampler, varinfo, (model.f, args...)
     )
 end
 
 function AdvancedPS.advance!(
-    trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}},
-    isref::Bool=false
+    trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}}, isref::Bool=false
 )
     # Make sure we load/reset the rng in the new replaying mechanism
     DynamicPPL.increment_num_produce!(trace.model.f.varinfo)
     isref ? AdvancedPS.load_state!(trace.rng) : AdvancedPS.save_state!(trace.rng)
     score = consume(trace.model.ctask)
     if score === nothing
-        return
+        return nothing
     else
         return score + DynamicPPL.getlogp(trace.model.f.varinfo)
     end
@@ -54,7 +53,9 @@ function AdvancedPS.reset_logprob!(trace::TracedModel)
     return trace
 end
 
-function AdvancedPS.update_rng!(trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}})
+function AdvancedPS.update_rng!(
+    trace::AdvancedPS.Trace{<:AdvancedPS.LibtaskModel{<:TracedModel}}
+)
     # Extract the `args`.
     args = trace.model.ctask.args
     # From `args`, extract the `SamplingContext`, which contains the RNG.
diff --git a/src/mcmc/emcee.jl b/src/mcmc/emcee.jl
index 49b9aab57c..4ebe49ea5f 100644
--- a/src/mcmc/emcee.jl
+++ b/src/mcmc/emcee.jl
@@ -2,7 +2,7 @@
 ### Sampler states
 ###
 
-struct Emcee{space, E<:AMH.Ensemble} <: InferenceAlgorithm
+struct Emcee{space,E<:AMH.Ensemble} <: InferenceAlgorithm
     ensemble::E
 end
 
@@ -12,7 +12,7 @@ function Emcee(n_walkers::Int, stretch_length=2.0)
     # ensemble sampling.
     prop = AMH.StretchProposal(nothing, stretch_length)
     ensemble = AMH.Ensemble(n_walkers, prop)
-    return Emcee{(), typeof(ensemble)}(ensemble)
+    return Emcee{(),typeof(ensemble)}(ensemble)
 end
 
 struct EmceeState{V<:AbstractVarInfo,S}
@@ -24,9 +24,9 @@ function AbstractMCMC.step(
     rng::Random.AbstractRNG,
     model::Model,
     spl::Sampler{<:Emcee};
-    resume_from = nothing,
-    initial_params = nothing,
-    kwargs...
+    resume_from=nothing,
+    initial_params=nothing,
+    kwargs...,
 )
     if resume_from !== nothing
         state = loadstate(resume_from)
@@ -39,9 +39,8 @@ function AbstractMCMC.step(
 
     # Update the parameters if provided.
     if initial_params !== nothing
-        length(initial_params) == n || throw(
-            ArgumentError("initial parameters have to be specified for each walker")
-        )
+        length(initial_params) == n ||
+            throw(ArgumentError("initial parameters have to be specified for each walker"))
         vis = map(vis, initial_params) do vi, init
             vi = DynamicPPL.initialize_parameters!!(vi, init, spl, model)
 
@@ -59,18 +58,14 @@ function AbstractMCMC.step(
         map(vis) do vi
             vi = DynamicPPL.link!!(vi, spl, model)
             AMH.Transition(vi[spl], getlogp(vi), false)
-        end
+        end,
     )
 
     return transition, state
 end
 
 function AbstractMCMC.step(
-    rng::AbstractRNG,
-    model::Model,
-    spl::Sampler{<:Emcee},
-    state::EmceeState;
-    kwargs...
+    rng::AbstractRNG, model::Model, spl::Sampler{<:Emcee}, state::EmceeState; kwargs...
 )
     # Generate a log joint function.
     vi = state.vi
@@ -98,11 +93,11 @@ function AbstractMCMC.bundle_samples(
     spl::Sampler{<:Emcee},
     state::EmceeState,
     chain_type::Type{MCMCChains.Chains};
-    save_state = false,
-    sort_chain = false,
-    discard_initial = 0,
-    thinning = 1,
-    kwargs...
+    save_state=false,
+    sort_chain=false,
+    discard_initial=0,
+    thinning=1,
+    kwargs...,
 )
     # Convert transitions to array format.
     # Also retrieve the variable names.
@@ -134,9 +129,9 @@ function AbstractMCMC.bundle_samples(
     le = getlogevidence(samples, state, spl)
 
     # Set up the info tuple.
-    info = (varname_to_symbol = OrderedDict(zip(varnames, varnames_symbol)),)
+    info = (varname_to_symbol=OrderedDict(zip(varnames, varnames_symbol)),)
     if save_state
-        info = merge(info, (model = model, sampler = spl, samplerstate = state))
+        info = merge(info, (model=model, sampler=spl, samplerstate=state))
     end
 
     # Concretize the array before giving it to MCMCChains.
@@ -146,7 +141,7 @@ function AbstractMCMC.bundle_samples(
     chain = MCMCChains.Chains(
         parray,
         nms,
-        (internals = extra_params,);
+        (internals=extra_params,);
         evidence=le,
         info=info,
         start=discard_initial + 1,
diff --git a/src/mcmc/ess.jl b/src/mcmc/ess.jl
index a12d683f5b..2910a7efd8 100644
--- a/src/mcmc/ess.jl
+++ b/src/mcmc/ess.jl
@@ -27,11 +27,7 @@ ESS(space::Symbol) = ESS{(space,)}()
 
 # always accept in the first step
 function DynamicPPL.initialstep(
-    rng::AbstractRNG,
-    model::Model,
-    spl::Sampler{<:ESS},
-    vi::AbstractVarInfo;
-    kwargs...
+    rng::AbstractRNG, model::Model, spl::Sampler{<:ESS}, vi::AbstractVarInfo; kwargs...
 )
     # Sanity check
     vns = _getvns(vi, spl)
@@ -47,11 +43,7 @@ function DynamicPPL.initialstep(
 end
 
 function AbstractMCMC.step(
-    rng::AbstractRNG,
-    model::Model,
-    spl::Sampler{<:ESS},
-    vi::AbstractVarInfo;
-    kwargs...
+    rng::AbstractRNG, model::Model, spl::Sampler{<:ESS}, vi::AbstractVarInfo; kwargs...
 )
     # obtain previous sample
     f = vi[spl]
@@ -64,7 +56,8 @@ function AbstractMCMC.step(
     sample, state = AbstractMCMC.step(
         rng,
         EllipticalSliceSampling.ESSModel(
-            ESSPrior(model, spl, vi), Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()),
+            ESSPrior(model, spl, vi),
+            Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()),
         ),
         EllipticalSliceSampling.ESS(),
         oldstate,
@@ -83,10 +76,10 @@ struct ESSPrior{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo,T}
     sampler::S
     varinfo::V
     μ::T
-    
-    function ESSPrior{M,S,V}(model::M, sampler::S, varinfo::V) where {
-        M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo
-    }
+
+    function ESSPrior{M,S,V}(
+        model::M, sampler::S, varinfo::V
+    ) where {M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo}
         vns = _getvns(varinfo, sampler)
         μ = mapreduce(vcat, vns[1]) do vn
             dist = getdist(varinfo, vn)
@@ -99,9 +92,7 @@ struct ESSPrior{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo,T}
 end
 
 function ESSPrior(model::Model, sampler::Sampler{<:ESS}, varinfo::AbstractVarInfo)
-    return ESSPrior{typeof(model),typeof(sampler),typeof(varinfo)}(
-        model, sampler, varinfo,
-    )
+    return ESSPrior{typeof(model),typeof(sampler),typeof(varinfo)}(model, sampler, varinfo)
 end
 
 # Ensure that the prior is a Gaussian distribution (checked in the constructor)
@@ -124,7 +115,9 @@ end
 Distributions.mean(p::ESSPrior) = p.μ
 
 # Evaluate log-likelihood of proposals
-const ESSLogLikelihood{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} = Turing.LogDensityFunction{V,M,<:DynamicPPL.SamplingContext{<:S}}
+const ESSLogLikelihood{M<:Model,S<:Sampler{<:ESS},V<:AbstractVarInfo} = Turing.LogDensityFunction{
+    V,M,<:DynamicPPL.SamplingContext{<:S}
+}
 
 function (ℓ::ESSLogLikelihood)(f::AbstractVector)
     sampler = DynamicPPL.getsampler(ℓ)
@@ -133,7 +126,9 @@ function (ℓ::ESSLogLikelihood)(f::AbstractVector)
     return getlogp(varinfo)
 end
 
-function DynamicPPL.tilde_assume(rng::Random.AbstractRNG, ctx::DefaultContext, sampler::Sampler{<:ESS}, right, vn, vi)
+function DynamicPPL.tilde_assume(
+    rng::Random.AbstractRNG, ctx::DefaultContext, sampler::Sampler{<:ESS}, right, vn, vi
+)
     return if inspace(vn, sampler)
         DynamicPPL.tilde_assume(rng, LikelihoodContext(), SampleFromPrior(), right, vn, vi)
     else
@@ -141,19 +136,33 @@ function DynamicPPL.tilde_assume(rng::Random.AbstractRNG, ctx::DefaultContext, s
     end
 end
 
-function DynamicPPL.tilde_observe(ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi)
+function DynamicPPL.tilde_observe(
+    ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi
+)
     return DynamicPPL.tilde_observe(ctx, SampleFromPrior(), right, left, vi)
 end
 
-function DynamicPPL.dot_tilde_assume(rng::Random.AbstractRNG, ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vns, vi)
+function DynamicPPL.dot_tilde_assume(
+    rng::Random.AbstractRNG,
+    ctx::DefaultContext,
+    sampler::Sampler{<:ESS},
+    right,
+    left,
+    vns,
+    vi,
+)
     # TODO: Or should we do `all(Base.Fix2(inspace, sampler), vns)`?
     return if inspace(first(vns), sampler)
-        DynamicPPL.dot_tilde_assume(rng, LikelihoodContext(), SampleFromPrior(), right, left, vns, vi)
+        DynamicPPL.dot_tilde_assume(
+            rng, LikelihoodContext(), SampleFromPrior(), right, left, vns, vi
+        )
     else
         DynamicPPL.dot_tilde_assume(rng, ctx, SampleFromPrior(), right, left, vns, vi)
     end
 end
 
-function DynamicPPL.dot_tilde_observe(ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi)
+function DynamicPPL.dot_tilde_observe(
+    ctx::DefaultContext, sampler::Sampler{<:ESS}, right, left, vi
+)
     return DynamicPPL.dot_tilde_observe(ctx, SampleFromPrior(), right, left, vi)
 end
diff --git a/src/mcmc/gibbs.jl b/src/mcmc/gibbs.jl
index 2a7572ffaf..8155664379 100644
--- a/src/mcmc/gibbs.jl
+++ b/src/mcmc/gibbs.jl
@@ -2,7 +2,6 @@
 ### Gibbs samplers / compositional samplers.
 ###
 
-
 """
     isgibbscomponent(alg)
 
@@ -16,7 +15,7 @@ isgibbscomponent(::Hamiltonian) = true
 isgibbscomponent(::MH) = true
 isgibbscomponent(::PG) = true
 
-const TGIBBS = Union{InferenceAlgorithm, GibbsConditional}
+const TGIBBS = Union{InferenceAlgorithm,GibbsConditional}
 
 """
     Gibbs(algs...)
@@ -47,12 +46,15 @@ Tips:
 methods like Particle Gibbs. You can increase the effectiveness of particle sampling by including
 more particles in the particle sampler.
 """
-struct Gibbs{space, N, A<:NTuple{N, TGIBBS}, B<:NTuple{N, Int}} <: InferenceAlgorithm
+struct Gibbs{space,N,A<:NTuple{N,TGIBBS},B<:NTuple{N,Int}} <: InferenceAlgorithm
     algs::A   # component sampling algorithms
     iterations::B
-    function Gibbs{space, N, A, B}(algs::A, iterations::B) where {space, N, A<:NTuple{N, TGIBBS}, B<:NTuple{N, Int}}
-        all(isgibbscomponent, algs) || error("all algorithms have to support Gibbs sampling")
-        return new{space, N, A, B}(algs, iterations)
+    function Gibbs{space,N,A,B}(
+        algs::A, iterations::B
+    ) where {space,N,A<:NTuple{N,TGIBBS},B<:NTuple{N,Int}}
+        all(isgibbscomponent, algs) ||
+            error("all algorithms have to support Gibbs sampling")
+        return new{space,N,A,B}(algs, iterations)
     end
 end
 
@@ -61,19 +63,18 @@ function Gibbs(alg1::TGIBBS, algrest::Vararg{TGIBBS,N}) where {N}
     iterations = ntuple(Returns(1), Val(N + 1))
     # obtain space for sampling algorithms
     space = Tuple(union(getspace.(algs)...))
-    return Gibbs{space, N + 1, typeof(algs), typeof(iterations)}(algs, iterations)
+    return Gibbs{space,N + 1,typeof(algs),typeof(iterations)}(algs, iterations)
 end
 
 function Gibbs(
-    arg1::Tuple{<:TGIBBS,Int},
-    argrest::Vararg{<:Tuple{<:TGIBBS,Int}, N},
+    arg1::Tuple{<:TGIBBS,Int}, argrest::Vararg{<:Tuple{<:TGIBBS,Int},N}
 ) where {N}
     allargs = (arg1, argrest...)
     algs = map(first, allargs)
     iterations = map(last, allargs)
     # obtain space for sampling algorithms
     space = Tuple(union(getspace.(algs)...))
-    return Gibbs{space, N + 1, typeof(algs), typeof(iterations)}(algs, iterations)
+    return Gibbs{space,N + 1,typeof(algs),typeof(iterations)}(algs, iterations)
 end
 
 """
@@ -110,14 +111,13 @@ Return an updated state, taking into account the variables sampled by other Gibb
 - `varinfo`: the variables, including the ones sampled by other Gibbs components.
 """
 gibbs_state(model, sampler, state::AbstractVarInfo, varinfo::AbstractVarInfo) = varinfo
-gibbs_state(model, sampler, state::PGState, varinfo::AbstractVarInfo) = PGState(varinfo, state.rng)
+function gibbs_state(model, sampler, state::PGState, varinfo::AbstractVarInfo)
+    return PGState(varinfo, state.rng)
+end
 
 # Update state in Gibbs sampling
 function gibbs_state(
-    model::Model,
-    spl::Sampler{<:Hamiltonian},
-    state::HMCState,
-    varinfo::AbstractVarInfo,
+    model::Model, spl::Sampler{<:Hamiltonian}, state::HMCState, varinfo::AbstractVarInfo
 )
     # Update hamiltonian
     θ_old = varinfo[spl]
@@ -159,11 +159,7 @@ gibbs_rerun(prev_alg, ::PG) = false
 
 # Initialize the Gibbs sampler.
 function DynamicPPL.initialstep(
-    rng::AbstractRNG,
-    model::Model,
-    spl::Sampler{<:Gibbs},
-    vi::AbstractVarInfo;
-    kwargs...
+    rng::AbstractRNG, model::Model, spl::Sampler{<:Gibbs}, vi::AbstractVarInfo; kwargs...
 )
     # TODO: Technically this only works for `VarInfo` or `ThreadSafeVarInfo{<:VarInfo}`.
     # Should we enforce this?
@@ -176,7 +172,7 @@ function DynamicPPL.initialstep(
         if i == 1
             prev_alg = algs[end]
         else
-            prev_alg = algs[i-1]
+            prev_alg = algs[i - 1]
         end
         rerun = gibbs_rerun(prev_alg, alg)
         selector = DynamicPPL.Selector(Symbol(typeof(alg)), rerun)
@@ -202,7 +198,11 @@ function DynamicPPL.initialstep(
     states = map(samplers) do local_spl
         # Recompute `vi.logp` if needed.
         if local_spl.selector.rerun
-            vi = last(DynamicPPL.evaluate!!(model, vi, DynamicPPL.SamplingContext(rng, local_spl)))
+            vi = last(
+                DynamicPPL.evaluate!!(
+                    model, vi, DynamicPPL.SamplingContext(rng, local_spl)
+                ),
+            )
         end
 
         # Compute initial state.
@@ -223,11 +223,7 @@ end
 
 # Subsequent steps
 function AbstractMCMC.step(
-    rng::AbstractRNG,
-    model::Model,
-    spl::Sampler{<:Gibbs},
-    state::GibbsState;
-    kwargs...
+    rng::AbstractRNG, model::Model, spl::Sampler{<:Gibbs}, state::GibbsState; kwargs...
 )
     # Iterate through each of the samplers.
     vi = state.vi
diff --git a/src/mcmc/gibbs_conditional.jl b/src/mcmc/gibbs_conditional.jl
index 5dd1b17870..fda79315b2 100644
--- a/src/mcmc/gibbs_conditional.jl
+++ b/src/mcmc/gibbs_conditional.jl
@@ -49,11 +49,11 @@ m = inverse_gdemo(x)
 sample(m, Gibbs(GibbsConditional(:λ, cond_λ), GibbsConditional(:m, cond_m)), 10)
 ```
 """
-struct GibbsConditional{S, C}
+struct GibbsConditional{S,C}
     conditional::C
 
     function GibbsConditional(sym::Symbol, conditional::C) where {C}
-        return new{sym, C}(conditional)
+        return new{sym,C}(conditional)
     end
 end
 
@@ -64,7 +64,7 @@ function DynamicPPL.initialstep(
     model::Model,
     spl::Sampler{<:GibbsConditional},
     vi::AbstractVarInfo;
-    kwargs...
+    kwargs...,
 )
     return nothing, vi
 end
@@ -74,7 +74,7 @@ function AbstractMCMC.step(
     model::Model,
     spl::Sampler{<:GibbsConditional},
     vi::AbstractVarInfo;
-    kwargs...
+    kwargs...,
 )
     condvals = DynamicPPL.values_as(DynamicPPL.invlink(vi, model), NamedTuple)
     conddist = spl.alg.conditional(condvals)
diff --git a/src/mcmc/hmc.jl b/src/mcmc/hmc.jl
index c3c90eab39..f17bca2e73 100644
--- a/src/mcmc/hmc.jl
+++ b/src/mcmc/hmc.jl
@@ -61,13 +61,19 @@ sample(gdemo([1.5, 2]), HMC(0.1, 10), 1000)
 sample(gdemo([1.5, 2]), HMC(0.01, 10), 1000)
 ```
 """
-struct HMC{AD, space, metricT <: AHMC.AbstractMetric} <: StaticHamiltonian
+struct HMC{AD,space,metricT<:AHMC.AbstractMetric} <: StaticHamiltonian
     ϵ::Float64 # leapfrog step size
     n_leapfrog::Int # leapfrog step number
     adtype::AD
 end
 
-function HMC(ϵ::Float64, n_leapfrog::Int, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE) where {metricT<:AHMC.AbstractMetric}
+function HMC(
+    ϵ::Float64,
+    n_leapfrog::Int,
+    ::Type{metricT},
+    space::Tuple;
+    adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
+) where {metricT<:AHMC.AbstractMetric}
     return HMC{typeof(adtype),space,metricT}(ϵ, n_leapfrog, adtype)
 end
 function HMC(
@@ -77,7 +83,7 @@ function HMC(
     metricT=AHMC.UnitEuclideanMetric,
     adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
 )
-    return HMC(ϵ, n_leapfrog, metricT, space; adtype = adtype)
+    return HMC(ϵ, n_leapfrog, metricT, space; adtype=adtype)
 end
 
 DynamicPPL.initialsampler(::Sampler{<:Hamiltonian}) = SampleFromUniform()
@@ -95,7 +101,7 @@ function AbstractMCMC.sample(
     nadapts=sampler.alg.n_adapts,
     discard_adapt=true,
     discard_initial=-1,
-    kwargs...
+    kwargs...,
 )
     if resume_from === nothing
         # If `nadapts` is `-1`, then the user called a convenience
@@ -114,15 +120,30 @@ function AbstractMCMC.sample(
             _discard_initial = discard_initial
         end
 
-        return AbstractMCMC.mcmcsample(rng, model, sampler, N;
-                                       chain_type=chain_type, progress=progress,
-                                       nadapts=_nadapts, discard_initial=_discard_initial,
-                                       kwargs...)
+        return AbstractMCMC.mcmcsample(
+            rng,
+            model,
+            sampler,
+            N;
+            chain_type=chain_type,
+            progress=progress,
+            nadapts=_nadapts,
+            discard_initial=_discard_initial,
+            kwargs...,
+        )
     else
         return AbstractMCMC.mcmcsample(
-            rng, model, sampler, N;
-            chain_type=chain_type, initial_state=initial_state, progress=progress,
-            nadapts=0, discard_adapt=false, discard_initial=0, kwargs...
+            rng,
+            model,
+            sampler,
+            N;
+            chain_type=chain_type,
+            initial_state=initial_state,
+            progress=progress,
+            nadapts=0,
+            discard_adapt=false,
+            discard_initial=0,
+            kwargs...,
         )
     end
 end
@@ -134,7 +155,7 @@ function DynamicPPL.initialstep(
     vi_original::AbstractVarInfo;
     initial_params=nothing,
     nadapts=0,
-    kwargs...
+    kwargs...,
 )
     # Transform the samples to unconstrained space and compute the joint log probability.
     vi = DynamicPPL.link(vi_original, spl, model)
@@ -152,8 +173,8 @@ function DynamicPPL.initialstep(
             # Use the leaf-context from the `model` in case the user has
             # contextualized the model with something like `PriorContext`
             # to sample from the prior.
-            DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context))
-        )
+            DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)),
+        ),
     )
     logπ = Base.Fix1(LogDensityProblems.logdensity, ℓ)
     ∂logπ∂θ(x) = LogDensityProblems.logdensity_and_gradient(ℓ, x)
@@ -203,9 +224,9 @@ function DynamicPPL.initialstep(
     # Adaptation
     adaptor = AHMCAdaptor(spl.alg, hamiltonian.metric; ϵ=ϵ)
     if spl.alg isa AdaptiveHamiltonian
-        hamiltonian, kernel, _ =
-            AHMC.adapt!(hamiltonian, kernel, adaptor,
-                        1, nadapts, t.z.θ, t.stat.acceptance_rate)
+        hamiltonian, kernel, _ = AHMC.adapt!(
+            hamiltonian, kernel, adaptor, 1, nadapts, t.z.θ, t.stat.acceptance_rate
+        )
     end
 
     # Update `vi` based on acceptance
@@ -229,7 +250,7 @@ function AbstractMCMC.step(
     spl::Sampler{<:Hamiltonian},
     state::HMCState;
     nadapts=0,
-    kwargs...
+    kwargs...,
 )
     # Get step size
     @debug "current ϵ" getstepsize(spl, state)
@@ -242,9 +263,15 @@ function AbstractMCMC.step(
     # Adaptation
     i = state.i + 1
     if spl.alg isa AdaptiveHamiltonian
-        hamiltonian, kernel, _ =
-            AHMC.adapt!(hamiltonian, state.kernel, state.adaptor,
-                        i, nadapts, t.z.θ, t.stat.acceptance_rate)
+        hamiltonian, kernel, _ = AHMC.adapt!(
+            hamiltonian,
+            state.kernel,
+            state.adaptor,
+            i,
+            nadapts,
+            t.z.θ,
+            t.stat.acceptance_rate,
+        )
     else
         kernel = state.kernel
     end
@@ -269,8 +296,8 @@ function get_hamiltonian(model, spl, vi, state, n)
         Turing.LogDensityFunction(
             vi,
             model,
-            DynamicPPL.SamplingContext(spl, DynamicPPL.leafcontext(model.context))
-        )
+            DynamicPPL.SamplingContext(spl, DynamicPPL.leafcontext(model.context)),
+        ),
     )
     ℓπ = Base.Fix1(LogDensityProblems.logdensity, ℓ)
     ∂ℓπ∂θ = Base.Fix1(LogDensityProblems.logdensity_and_gradient, ℓ)
@@ -308,15 +335,23 @@ Hoffman, Matthew D., and Andrew Gelman. "The No-U-turn sampler: adaptively
 setting path lengths in Hamiltonian Monte Carlo." Journal of Machine Learning
 Research 15, no. 1 (2014): 1593-1623.
 """
-struct HMCDA{AD, space, metricT <: AHMC.AbstractMetric} <: AdaptiveHamiltonian
-    n_adapts    ::  Int         # number of samples with adaption for ϵ
-    δ           ::  Float64     # target accept rate
-    λ           ::  Float64     # target leapfrog length
-    ϵ           ::  Float64     # (initial) step size
+struct HMCDA{AD,space,metricT<:AHMC.AbstractMetric} <: AdaptiveHamiltonian
+    n_adapts::Int         # number of samples with adaption for ϵ
+    δ::Float64     # target accept rate
+    λ::Float64     # target leapfrog length
+    ϵ::Float64     # (initial) step size
     adtype::AD
 end
 
-function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ϵ::Float64, ::Type{metricT}, space::Tuple; adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE) where {metricT<:AHMC.AbstractMetric}
+function HMCDA(
+    n_adapts::Int,
+    δ::Float64,
+    λ::Float64,
+    ϵ::Float64,
+    ::Type{metricT},
+    space::Tuple;
+    adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
+) where {metricT<:AHMC.AbstractMetric}
     return HMCDA{typeof(adtype),space,metricT}(n_adapts, δ, λ, ϵ, adtype)
 end
 
@@ -327,16 +362,10 @@ function HMCDA(
     metricT=AHMC.UnitEuclideanMetric,
     adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
 )
-    return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype = adtype)
+    return HMCDA(-1, δ, λ, init_ϵ, metricT, (); adtype=adtype)
 end
 
-function HMCDA(
-    n_adapts::Int,
-    δ::Float64,
-    λ::Float64,
-    ::Tuple{};
-    kwargs...
-)
+function HMCDA(n_adapts::Int, δ::Float64, λ::Float64, ::Tuple{}; kwargs...)
     return HMCDA(n_adapts, δ, λ; kwargs...)
 end
 
@@ -349,10 +378,9 @@ function HMCDA(
     metricT=AHMC.UnitEuclideanMetric,
     adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
 )
-    return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype = adtype)
+    return HMCDA(n_adapts, δ, λ, init_ϵ, metricT, space; adtype=adtype)
 end
 
-
 """
     NUTS(n_adapts::Int, δ::Float64; max_depth::Int=10, Δ_max::Float64=1000.0, init_ϵ::Float64=0.0; adtype::ADTypes.AbstractADType=AutoForwardDiff()
 
@@ -398,13 +426,8 @@ function NUTS(
     return NUTS{typeof(adtype),space,metricT}(n_adapts, δ, max_depth, Δ_max, ϵ, adtype)
 end
 
-function NUTS(
-    n_adapts::Int,
-    δ::Float64,
-    ::Tuple{};
-    kwargs...
-)
-    NUTS(n_adapts, δ; kwargs...)
+function NUTS(n_adapts::Int, δ::Float64, ::Tuple{}; kwargs...)
+    return NUTS(n_adapts, δ; kwargs...)
 end
 
 function NUTS(
@@ -417,7 +440,7 @@ function NUTS(
     metricT=AHMC.DiagEuclideanMetric,
     adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
 )
-    NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype)
+    return NUTS(n_adapts, δ, max_depth, Δ_max, init_ϵ, metricT, space; adtype=adtype)
 end
 
 function NUTS(
@@ -428,15 +451,15 @@ function NUTS(
     metricT=AHMC.DiagEuclideanMetric,
     adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
 )
-    NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype)
+    return NUTS(-1, δ, max_depth, Δ_max, init_ϵ, metricT, (); adtype=adtype)
 end
 
 function NUTS(; kwargs...)
-    NUTS(-1, 0.65; kwargs...)
+    return NUTS(-1, 0.65; kwargs...)
 end
 
 for alg in (:HMC, :HMCDA, :NUTS)
-    @eval getmetricT(::$alg{<:Any, <:Any, metricT}) where {metricT} = metricT
+    @eval getmetricT(::$alg{<:Any,<:Any,metricT}) where {metricT} = metricT
 end
 
 #####
@@ -452,23 +475,28 @@ function gen_metric(dim::Int, spl::Sampler{<:AdaptiveHamiltonian}, state)
 end
 
 function make_ahmc_kernel(alg::HMC, ϵ)
-    return AHMC.HMCKernel(AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedNSteps(alg.n_leapfrog)))
+    return AHMC.HMCKernel(
+        AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedNSteps(alg.n_leapfrog))
+    )
 end
 function make_ahmc_kernel(alg::HMCDA, ϵ)
-    return AHMC.HMCKernel(AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedIntegrationTime(alg.λ)))
+    return AHMC.HMCKernel(
+        AHMC.Trajectory{AHMC.EndPointTS}(AHMC.Leapfrog(ϵ), AHMC.FixedIntegrationTime(alg.λ))
+    )
+end
+function make_ahmc_kernel(alg::NUTS, ϵ)
+    return AHMC.HMCKernel(
+        AHMC.Trajectory{AHMC.MultinomialTS}(
+            AHMC.Leapfrog(ϵ), AHMC.GeneralisedNoUTurn(alg.max_depth, alg.Δ_max)
+        ),
+    )
 end
-make_ahmc_kernel(alg::NUTS, ϵ) =
-    AHMC.HMCKernel(AHMC.Trajectory{AHMC.MultinomialTS}(AHMC.Leapfrog(ϵ), AHMC.GeneralisedNoUTurn(alg.max_depth, alg.Δ_max)))
 
 ####
 #### Compiler interface, i.e. tilde operators.
 ####
 function DynamicPPL.assume(
-    rng,
-    spl::Sampler{<:Hamiltonian},
-    dist::Distribution,
-    vn::VarName,
-    vi,
+    rng, spl::Sampler{<:Hamiltonian}, dist::Distribution, vn::VarName, vi
 )
     DynamicPPL.updategid!(vi, vn, spl)
     return DynamicPPL.assume(dist, vn, vi)
@@ -488,7 +516,7 @@ end
 function DynamicPPL.dot_assume(
     rng,
     spl::Sampler{<:Hamiltonian},
-    dists::Union{Distribution, AbstractArray{<:Distribution}},
+    dists::Union{Distribution,AbstractArray{<:Distribution}},
     vns::AbstractArray{<:VarName},
     var::AbstractArray,
     vi,
@@ -497,18 +525,13 @@ function DynamicPPL.dot_assume(
     return DynamicPPL.dot_assume(dists, var, vns, vi)
 end
 
-function DynamicPPL.observe(
-    spl::Sampler{<:Hamiltonian},
-    d::Distribution,
-    value,
-    vi,
-)
+function DynamicPPL.observe(spl::Sampler{<:Hamiltonian}, d::Distribution, value, vi)
     return DynamicPPL.observe(d, value, vi)
 end
 
 function DynamicPPL.dot_observe(
     spl::Sampler{<:Hamiltonian},
-    ds::Union{Distribution, AbstractArray{<:Distribution}},
+    ds::Union{Distribution,AbstractArray{<:Distribution}},
     value::AbstractArray,
     vi,
 )
@@ -537,7 +560,9 @@ function AHMCAdaptor(alg::AdaptiveHamiltonian, metric::AHMC.AbstractMetric; ϵ=a
     return adaptor
 end
 
-AHMCAdaptor(::Hamiltonian, ::AHMC.AbstractMetric; kwargs...) = AHMC.Adaptation.NoAdaptation()
+function AHMCAdaptor(::Hamiltonian, ::AHMC.AbstractMetric; kwargs...)
+    return AHMC.Adaptation.NoAdaptation()
+end
 
 ##########################
 # HMC State Constructors #
@@ -548,7 +573,7 @@ function HMCState(
     model::Model,
     spl::Sampler{<:Hamiltonian},
     vi::AbstractVarInfo;
-    kwargs...
+    kwargs...,
 )
     # Link everything if needed.
     waslinked = islinked(vi, spl)
@@ -561,10 +586,9 @@ function HMCState(
     logπ = Turing.LogDensityFunction(
         vi,
         model,
-        DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context))
+        DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)),
     )
 
-
     # Get the metric type.
     metricT = getmetricT(spl.alg)
 
diff --git a/src/mcmc/is.jl b/src/mcmc/is.jl
index ac05e9aaf6..0fe6e10535 100644
--- a/src/mcmc/is.jl
+++ b/src/mcmc/is.jl
@@ -31,21 +31,13 @@ IS() = IS{()}()
 DynamicPPL.initialsampler(sampler::Sampler{<:IS}) = sampler
 
 function DynamicPPL.initialstep(
-    rng::AbstractRNG,
-    model::Model,
-    spl::Sampler{<:IS},
-    vi::AbstractVarInfo;
-    kwargs...
+    rng::AbstractRNG, model::Model, spl::Sampler{<:IS}, vi::AbstractVarInfo; kwargs...
 )
     return Transition(model, vi), nothing
 end
 
 function AbstractMCMC.step(
-    rng::Random.AbstractRNG,
-    model::Model,
-    spl::Sampler{<:IS},
-    ::Nothing;
-    kwargs...
+    rng::Random.AbstractRNG, model::Model, spl::Sampler{<:IS}, ::Nothing; kwargs...
 )
     vi = VarInfo(rng, model, spl)
     return Transition(model, vi), nothing
diff --git a/src/mcmc/mh.jl b/src/mcmc/mh.jl
index 6cc9f9a21f..9b7f3ff090 100644
--- a/src/mcmc/mh.jl
+++ b/src/mcmc/mh.jl
@@ -2,7 +2,7 @@
 ### Sampler states
 ###
 
-struct MH{space, P} <: InferenceAlgorithm
+struct MH{space,P} <: InferenceAlgorithm
     proposals::P
 end
 
@@ -173,7 +173,7 @@ function MH(space...)
             prop = proposal(s)
 
             # Return early, we got a covariance matrix.
-            return MH{(), typeof(prop)}(prop)
+            return MH{(),typeof(prop)}(prop)
         else
             # Try to convert it to a proposal anyways,
             # throw an error if not acceptable.
@@ -185,7 +185,7 @@ function MH(space...)
     proposals = NamedTuple{tuple(prop_syms...)}(tuple(props...))
     syms = vcat(syms, prop_syms)
 
-    return MH{tuple(syms...), typeof(proposals)}(proposals)
+    return MH{tuple(syms...),typeof(proposals)}(proposals)
 end
 
 # Some of the proposals require working in unconstrained space.
@@ -225,11 +225,11 @@ function set_namedtuple!(vi::DynamicPPL.VarInfoOrThreadSafeVarInfo, nt::NamedTup
             # assign the unpacked values
             if length(vals) == 1
                 vi[vns[1]] = [vals[1];]
-            # otherwise just assign the values
+                # otherwise just assign the values
             else
                 vi[vns[1]] = [vals;]
             end
-        # if there are multiple variables
+            # if there are multiple variables
         elseif vals isa AbstractArray
             nvals = length(vals)
             # if values are provided as an array with a single element
@@ -238,7 +238,7 @@ function set_namedtuple!(vi::DynamicPPL.VarInfoOrThreadSafeVarInfo, nt::NamedTup
                 for (vn, val) in zip(vns, vals[1])
                     vi[vn] = [val;]
                 end
-            # otherwise number of variables and number of values have to be equal
+                # otherwise number of variables and number of values have to be equal
             elseif nvals == nvns
                 # iterate over variables and values
                 for (vn, val) in zip(vns, vals)
@@ -260,7 +260,9 @@ A log density function for the MH sampler.
 
 This variant uses the  `set_namedtuple!` function to update the `VarInfo`.
 """
-const MHLogDensityFunction{M<:Model,S<:Sampler{<:MH},V<:AbstractVarInfo} = Turing.LogDensityFunction{V,M,<:DynamicPPL.SamplingContext{<:S}}
+const MHLogDensityFunction{M<:Model,S<:Sampler{<:MH},V<:AbstractVarInfo} = Turing.LogDensityFunction{
+    V,M,<:DynamicPPL.SamplingContext{<:S}
+}
 
 function LogDensityProblems.logdensity(f::MHLogDensityFunction, x::NamedTuple)
     # TODO: Make this work with immutable `f.varinfo` too.
@@ -284,16 +286,10 @@ unvectorize(dists::AbstractVector) = length(dists) == 1 ? first(dists) : dists
 
 # possibly unpack and reshape samples according to the prior distribution
 reconstruct(dist::Distribution, val::AbstractVector) = DynamicPPL.reconstruct(dist, val)
-function reconstruct(
-    dist::AbstractVector{<:UnivariateDistribution},
-    val::AbstractVector
-)
+function reconstruct(dist::AbstractVector{<:UnivariateDistribution}, val::AbstractVector)
     return val
 end
-function reconstruct(
-    dist::AbstractVector{<:MultivariateDistribution},
-    val::AbstractVector
-)
+function reconstruct(dist::AbstractVector{<:MultivariateDistribution}, val::AbstractVector)
     offset = 0
     return map(dist) do d
         n = length(d)
@@ -319,23 +315,22 @@ function dist_val_tuple(spl::Sampler{<:MH}, vi::DynamicPPL.VarInfoOrThreadSafeVa
     return dt, vt
 end
 
-@generated function _val_tuple(
-    vi::VarInfo,
-    vns::NamedTuple{names}
-) where {names}
+@generated function _val_tuple(vi::VarInfo, vns::NamedTuple{names}) where {names}
     isempty(names) === 0 && return :(NamedTuple())
     expr = Expr(:tuple)
     expr.args = Any[
-        :($name = reconstruct(unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name)),
-                              DynamicPPL.getval(vi, vns.$name)))
-        for name in names]
+        :(
+            $name = reconstruct(
+                unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name)),
+                DynamicPPL.getval(vi, vns.$name),
+            )
+        ) for name in names
+    ]
     return expr
 end
 
 @generated function _dist_tuple(
-    props::NamedTuple{propnames},
-    vi::VarInfo,
-    vns::NamedTuple{names}
+    props::NamedTuple{propnames}, vi::VarInfo, vns::NamedTuple{names}
 ) where {names,propnames}
     isempty(names) === 0 && return :(NamedTuple())
     expr = Expr(:tuple)
@@ -345,14 +340,19 @@ end
             :($name = props.$name)
         else
             # Otherwise, use the default proposal.
-            :($name = AMH.StaticProposal(unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name))))
-        end for name in names]
+            :(
+                $name = AMH.StaticProposal(
+                    unvectorize(DynamicPPL.getdist.(Ref(vi), vns.$name))
+                )
+            )
+        end for name in names
+    ]
     return expr
 end
 
 # Utility functions to link
 should_link(varinfo, sampler, proposal) = false
-function should_link(varinfo, sampler, proposal::NamedTuple{(), Tuple{}})
+function should_link(varinfo, sampler, proposal::NamedTuple{(),Tuple{}})
     # If it's an empty `NamedTuple`, we're using the priors as proposals
     # in which case we shouldn't link.
     return false
@@ -362,24 +362,22 @@ function should_link(varinfo, sampler, proposal::AdvancedMH.RandomWalkProposal)
 end
 # FIXME: This won't be hit unless `vals` are all the exactly same concrete type of `AdvancedMH.RandomWalkProposal`!
 function should_link(
-    varinfo,
-    sampler,
-    proposal::NamedTuple{names, vals}
-) where {names, vals<:NTuple{<:Any, <:AdvancedMH.RandomWalkProposal}}
+    varinfo, sampler, proposal::NamedTuple{names,vals}
+) where {names,vals<:NTuple{<:Any,<:AdvancedMH.RandomWalkProposal}}
     return true
 end
 
 function maybe_link!!(varinfo, sampler, proposal, model)
-    return should_link(varinfo, sampler, proposal) ? link!!(varinfo, sampler, model) : varinfo
+    return if should_link(varinfo, sampler, proposal)
+        link!!(varinfo, sampler, model)
+    else
+        varinfo
+    end
 end
 
 # Make a proposal if we don't have a covariance proposal matrix (the default).
 function propose!!(
-    rng::AbstractRNG,
-    vi::AbstractVarInfo,
-    model::Model,
-    spl::Sampler{<:MH},
-    proposal
+    rng::AbstractRNG, vi::AbstractVarInfo, model::Model, spl::Sampler{<:MH}, proposal
 )
     # Retrieve distribution and value NamedTuples.
     dt, vt = dist_val_tuple(spl, vi)
@@ -395,9 +393,9 @@ function propose!!(
             Turing.LogDensityFunction(
                 vi,
                 model,
-                DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context))
-            )
-        )
+                DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)),
+            ),
+        ),
     )
     trans, _ = AbstractMCMC.step(rng, densitymodel, mh_sampler, prev_trans)
 
@@ -413,7 +411,7 @@ function propose!!(
     vi::AbstractVarInfo,
     model::Model,
     spl::Sampler{<:MH},
-    proposal::AdvancedMH.RandomWalkProposal
+    proposal::AdvancedMH.RandomWalkProposal,
 )
     # If this is the case, we can just draw directly from the proposal
     # matrix.
@@ -430,9 +428,9 @@ function propose!!(
             Turing.LogDensityFunction(
                 vi,
                 model,
-                DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context))
-            )
-        )
+                DynamicPPL.SamplingContext(rng, spl, DynamicPPL.leafcontext(model.context)),
+            ),
+        ),
     )
     trans, _ = AbstractMCMC.step(rng, densitymodel, mh_sampler, prev_trans)
 
@@ -444,7 +442,7 @@ function DynamicPPL.initialstep(
     model::AbstractModel,
     spl::Sampler{<:MH},
     vi::AbstractVarInfo;
-    kwargs...
+    kwargs...,
 )
     # If we're doing random walk with a covariance matrix,
     # just link everything before sampling.
@@ -454,11 +452,7 @@ function DynamicPPL.initialstep(
 end
 
 function AbstractMCMC.step(
-    rng::AbstractRNG,
-    model::Model,
-    spl::Sampler{<:MH},
-    vi::AbstractVarInfo;
-    kwargs...
+    rng::AbstractRNG, model::Model, spl::Sampler{<:MH}, vi::AbstractVarInfo; kwargs...
 )
     # Cases:
     # 1. A covariance proposal matrix
@@ -471,13 +465,7 @@ end
 ####
 #### Compiler interface, i.e. tilde operators.
 ####
-function DynamicPPL.assume(
-    rng,
-    spl::Sampler{<:MH},
-    dist::Distribution,
-    vn::VarName,
-    vi,
-)
+function DynamicPPL.assume(rng, spl::Sampler{<:MH}, dist::Distribution, vn::VarName, vi)
     DynamicPPL.updategid!(vi, vn, spl)
     r = vi[vn]
     return r, logpdf_with_trans(dist, r, istrans(vi, vn)), vi
@@ -502,7 +490,7 @@ end
 function DynamicPPL.dot_assume(
     rng,
     spl::Sampler{<:MH},
-    dists::Union{Distribution, AbstractArray{<:Distribution}},
+    dists::Union{Distribution,AbstractArray{<:Distribution}},
     vn::VarName,
     var::AbstractArray,
     vi,
@@ -515,18 +503,13 @@ function DynamicPPL.dot_assume(
     return var, sum(logpdf_with_trans.(dists, r, istrans(vi, vns[1]))), vi
 end
 
-function DynamicPPL.observe(
-    spl::Sampler{<:MH},
-    d::Distribution,
-    value,
-    vi,
-)
+function DynamicPPL.observe(spl::Sampler{<:MH}, d::Distribution, value, vi)
     return DynamicPPL.observe(SampleFromPrior(), d, value, vi)
 end
 
 function DynamicPPL.dot_observe(
     spl::Sampler{<:MH},
-    ds::Union{Distribution, AbstractArray{<:Distribution}},
+    ds::Union{Distribution,AbstractArray{<:Distribution}},
     value::AbstractArray,
     vi,
 )
diff --git a/src/mcmc/particle_mcmc.jl b/src/mcmc/particle_mcmc.jl
index 7d0714c41d..d5a5f0847d 100644
--- a/src/mcmc/particle_mcmc.jl
+++ b/src/mcmc/particle_mcmc.jl
@@ -15,7 +15,7 @@ Sequential Monte Carlo sampler.
 
 $(TYPEDFIELDS)
 """
-struct SMC{space, R} <: ParticleInference
+struct SMC{space,R} <: ParticleInference
     resampler::R
 end
 
@@ -29,15 +29,17 @@ Create a sequential Monte Carlo sampler of type [`SMC`](@ref) for the variables
 If the algorithm for the resampling step is not specified explicitly, systematic resampling
 is performed if the estimated effective sample size per particle drops below 0.5.
 """
-function SMC(resampler = AdvancedPS.ResampleWithESSThreshold(), space::Tuple = ())
-    return SMC{space, typeof(resampler)}(resampler)
+function SMC(resampler=AdvancedPS.ResampleWithESSThreshold(), space::Tuple=())
+    return SMC{space,typeof(resampler)}(resampler)
 end
 
 # Convenient constructors with ESS threshold
-function SMC(resampler, threshold::Real, space::Tuple = ())
+function SMC(resampler, threshold::Real, space::Tuple=())
     return SMC(AdvancedPS.ResampleWithESSThreshold(resampler, threshold), space)
 end
-SMC(threshold::Real, space::Tuple = ()) = SMC(AdvancedPS.resample_systematic, threshold, space)
+function SMC(threshold::Real, space::Tuple=())
+    return SMC(AdvancedPS.resample_systematic, threshold, space)
+end
 
 # If only the space is defined
 SMC(space::Symbol...) = SMC(space)
@@ -62,7 +64,7 @@ function SMCTransition(model::DynamicPPL.Model, vi::AbstractVarInfo, weight)
     return SMCTransition(theta, lp, weight)
 end
 
-metadata(t::SMCTransition) = (lp = t.lp, weight = t.weight)
+metadata(t::SMCTransition) = (lp=t.lp, weight=t.weight)
 
 DynamicPPL.getlogp(t::SMCTransition) = t.lp
 
@@ -86,18 +88,30 @@ function AbstractMCMC.sample(
     resume_from=nothing,
     initial_state=DynamicPPL.loadstate(resume_from),
     progress=PROGRESS[],
-    kwargs...
+    kwargs...,
 )
     if resume_from === nothing
-        return AbstractMCMC.mcmcsample(rng, model, sampler, N;
-                                       chain_type=chain_type,
-                                       progress=progress,
-                                       nparticles=N,
-                                       kwargs...)
+        return AbstractMCMC.mcmcsample(
+            rng,
+            model,
+            sampler,
+            N;
+            chain_type=chain_type,
+            progress=progress,
+            nparticles=N,
+            kwargs...,
+        )
     else
         return AbstractMCMC.mcmcsample(
-            rng, model, sampler, N; chain_type, initial_state, progress=progress, 
-            nparticles=N, kwargs...
+            rng,
+            model,
+            sampler,
+            N;
+            chain_type,
+            initial_state,
+            progress=progress,
+            nparticles=N,
+            kwargs...,
         )
     end
 end
@@ -108,7 +122,7 @@ function DynamicPPL.initialstep(
     spl::Sampler{<:SMC},
     vi::AbstractVarInfo;
     nparticles::Int,
-    kwargs...
+    kwargs...,
 )
     # Reset the VarInfo.
     reset_num_produce!(vi)
@@ -120,7 +134,7 @@ function DynamicPPL.initialstep(
     particles = AdvancedPS.ParticleContainer(
         [AdvancedPS.Trace(model, spl, vi, AdvancedPS.TracedRNG()) for _ in 1:nparticles],
         AdvancedPS.TracedRNG(),
-        rng
+        rng,
     )
 
     # Perform particle sweep.
@@ -138,11 +152,7 @@ function DynamicPPL.initialstep(
 end
 
 function AbstractMCMC.step(
-    ::AbstractRNG,
-    model::AbstractModel,
-    spl::Sampler{<:SMC},
-    state::SMCState;
-    kwargs...
+    ::AbstractRNG, model::AbstractModel, spl::Sampler{<:SMC}, state::SMCState; kwargs...
 )
     # Extract the index of the current particle.
     index = state.particleindex
@@ -191,18 +201,16 @@ If the algorithm for the resampling step is not specified explicitly, systematic
 is performed if the estimated effective sample size per particle drops below 0.5.
 """
 function PG(
-    nparticles::Int,
-    resampler = AdvancedPS.ResampleWithESSThreshold(),
-    space::Tuple = (),
+    nparticles::Int, resampler=AdvancedPS.ResampleWithESSThreshold(), space::Tuple=()
 )
-    return PG{space, typeof(resampler)}(nparticles, resampler)
+    return PG{space,typeof(resampler)}(nparticles, resampler)
 end
 
 # Convenient constructors with ESS threshold
-function PG(nparticles::Int, resampler, threshold::Real, space::Tuple = ())
+function PG(nparticles::Int, resampler, threshold::Real, space::Tuple=())
     return PG(nparticles, AdvancedPS.ResampleWithESSThreshold(resampler, threshold), space)
 end
-function PG(nparticles::Int, threshold::Real, space::Tuple = ())
+function PG(nparticles::Int, threshold::Real, space::Tuple=())
     return PG(nparticles, AdvancedPS.resample_systematic, threshold, space)
 end
 
@@ -238,7 +246,7 @@ function PGTransition(model::DynamicPPL.Model, vi::AbstractVarInfo, logevidence)
     return PGTransition(theta, lp, logevidence)
 end
 
-metadata(t::PGTransition) = (lp = t.lp, logevidence = t.logevidence)
+metadata(t::PGTransition) = (lp=t.lp, logevidence=t.logevidence)
 
 DynamicPPL.getlogp(t::PGTransition) = t.lp
 
@@ -251,7 +259,7 @@ function DynamicPPL.initialstep(
     model::AbstractModel,
     spl::Sampler{<:PG},
     vi::AbstractVarInfo;
-    kwargs...
+    kwargs...,
 )
     # Reset the VarInfo before new sweep
     reset_num_produce!(vi)
@@ -263,7 +271,7 @@ function DynamicPPL.initialstep(
     particles = AdvancedPS.ParticleContainer(
         [AdvancedPS.Trace(model, spl, vi, AdvancedPS.TracedRNG()) for _ in 1:num_particles],
         AdvancedPS.TracedRNG(),
-        rng
+        rng,
     )
 
     # Perform a particle sweep.
@@ -282,11 +290,7 @@ function DynamicPPL.initialstep(
 end
 
 function AbstractMCMC.step(
-    rng::AbstractRNG,
-    model::AbstractModel,
-    spl::Sampler{<:PG},
-    state::PGState;
-    kwargs...
+    rng::AbstractRNG, model::AbstractModel, spl::Sampler{<:PG}, state::PGState; kwargs...
 )
     # Reset the VarInfo before new sweep.
     vi = state.vi
@@ -325,10 +329,14 @@ function AbstractMCMC.step(
     return transition, PGState(_vi, newreference.rng)
 end
 
-DynamicPPL.use_threadsafe_eval(::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, ::AbstractVarInfo) = false
+function DynamicPPL.use_threadsafe_eval(
+    ::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, ::AbstractVarInfo
+)
+    return false
+end
 
 function trace_local_varinfo_maybe(varinfo)
-    try 
+    try
         trace = AdvancedPS.current_trace()
         return trace.model.f.varinfo
     catch e
@@ -360,7 +368,7 @@ function DynamicPPL.assume(
     spl::Sampler{<:Union{PG,SMC}},
     dist::Distribution,
     vn::VarName,
-    _vi::AbstractVarInfo
+    _vi::AbstractVarInfo,
 )
     vi = trace_local_varinfo_maybe(_vi)
     trng = trace_local_rng_maybe(rng)
@@ -399,18 +407,14 @@ function DynamicPPL.observe(spl::Sampler{<:Union{PG,SMC}}, dist::Distribution, v
 end
 
 function DynamicPPL.acclogp!!(
-    context::SamplingContext{<:Sampler{<:Union{PG,SMC}}},
-    varinfo::AbstractVarInfo,
-    logp
+    context::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, varinfo::AbstractVarInfo, logp
 )
     varinfo_trace = trace_local_varinfo_maybe(varinfo)
-    DynamicPPL.acclogp!!(DynamicPPL.childcontext(context), varinfo_trace, logp)
+    return DynamicPPL.acclogp!!(DynamicPPL.childcontext(context), varinfo_trace, logp)
 end
 
 function DynamicPPL.acclogp_observe!!(
-    context::SamplingContext{<:Sampler{<:Union{PG,SMC}}},
-    varinfo::AbstractVarInfo,
-    logp
+    context::SamplingContext{<:Sampler{<:Union{PG,SMC}}}, varinfo::AbstractVarInfo, logp
 )
     Libtask.produce(logp)
     return trace_local_varinfo_maybe(varinfo)
@@ -421,7 +425,7 @@ function AdvancedPS.Trace(
     model::Model,
     sampler::Sampler{<:Union{SMC,PG}},
     varinfo::AbstractVarInfo,
-    rng::AdvancedPS.TracedRNG
+    rng::AdvancedPS.TracedRNG,
 )
     newvarinfo = deepcopy(varinfo)
     DynamicPPL.reset_num_produce!(newvarinfo)
diff --git a/src/mcmc/sghmc.jl b/src/mcmc/sghmc.jl
index 61cffa7b3c..84a9c18f3c 100644
--- a/src/mcmc/sghmc.jl
+++ b/src/mcmc/sghmc.jl
@@ -44,10 +44,12 @@ function SGHMC(
     adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
 )
     _learning_rate, _momentum_decay = promote(learning_rate, momentum_decay)
-    return SGHMC{typeof(adtype),space,typeof(_learning_rate)}(_learning_rate, _momentum_decay, adtype)
+    return SGHMC{typeof(adtype),space,typeof(_learning_rate)}(
+        _learning_rate, _momentum_decay, adtype
+    )
 end
 
-struct SGHMCState{L,V<:AbstractVarInfo, T<:AbstractVector{<:Real}}
+struct SGHMCState{L,V<:AbstractVarInfo,T<:AbstractVector{<:Real}}
     logdensity::L
     vi::V
     velocity::T
@@ -68,7 +70,9 @@ function DynamicPPL.initialstep(
 
     # Compute initial sample and state.
     sample = Transition(model, vi)
-    ℓ = LogDensityProblemsAD.ADgradient(Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()))
+    ℓ = LogDensityProblemsAD.ADgradient(
+        Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext())
+    )
     state = SGHMCState(ℓ, vi, zero(vi[spl]))
 
     return sample, state
@@ -79,7 +83,7 @@ function AbstractMCMC.step(
     model::Model,
     spl::Sampler{<:SGHMC},
     state::SGHMCState;
-    kwargs...
+    kwargs...,
 )
     # Compute gradient of log density.
     ℓ = state.logdensity
@@ -134,7 +138,7 @@ struct PolynomialStepsize{T<:Real}
     "Decay rate of step size in (0.5, 1]."
     γ::T
 
-    function PolynomialStepsize{T}(a::T, b::T, γ::T) where T
+    function PolynomialStepsize{T}(a::T, b::T, γ::T) where {T}
         0.5 < γ ≤ 1 || error("the decay rate `γ` has to be in (0.5, 1]")
         return new{T}(a, b, γ)
     end
@@ -153,7 +157,7 @@ a (b + t)^{-γ}.
 function PolynomialStepsize(a::T, b::T, γ::T) where {T<:Real}
     return PolynomialStepsize{T}(a, b, γ)
 end
-function PolynomialStepsize(a::Real, b::Real = 0, γ::Real = 0.55)
+function PolynomialStepsize(a::Real, b::Real=0, γ::Real=0.55)
     return PolynomialStepsize(promote(a, b, γ)...)
 end
 
@@ -183,8 +187,8 @@ See also: [`PolynomialStepsize`](@ref)
 """
 function SGLD(
     space::Symbol...;
-    stepsize = PolynomialStepsize(0.01),
-    adtype::ADTypes.AbstractADType = Turing.DEFAULT_ADTYPE,
+    stepsize=PolynomialStepsize(0.01),
+    adtype::ADTypes.AbstractADType=Turing.DEFAULT_ADTYPE,
 )
     return SGLD{typeof(adtype),space,typeof(stepsize)}(stepsize, adtype)
 end
@@ -204,7 +208,7 @@ function SGLDTransition(model::DynamicPPL.Model, vi::AbstractVarInfo, stepsize)
     return SGLDTransition(theta, lp, stepsize)
 end
 
-metadata(t::SGLDTransition) = (lp = t.lp, SGLD_stepsize = t.stepsize)
+metadata(t::SGLDTransition) = (lp=t.lp, SGLD_stepsize=t.stepsize)
 
 DynamicPPL.getlogp(t::SGLDTransition) = t.lp
 
@@ -219,7 +223,7 @@ function DynamicPPL.initialstep(
     model::Model,
     spl::Sampler{<:SGLD},
     vi::AbstractVarInfo;
-    kwargs...
+    kwargs...,
 )
     # Transform the samples to unconstrained space and compute the joint log probability.
     if !DynamicPPL.islinked(vi, spl)
@@ -229,18 +233,16 @@ function DynamicPPL.initialstep(
 
     # Create first sample and state.
     sample = SGLDTransition(model, vi, zero(spl.alg.stepsize(0)))
-    ℓ = LogDensityProblemsAD.ADgradient(Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext()))
+    ℓ = LogDensityProblemsAD.ADgradient(
+        Turing.LogDensityFunction(vi, model, spl, DynamicPPL.DefaultContext())
+    )
     state = SGLDState(ℓ, vi, 1)
 
     return sample, state
 end
 
 function AbstractMCMC.step(
-    rng::Random.AbstractRNG,
-    model::Model,
-    spl::Sampler{<:SGLD},
-    state::SGLDState;
-    kwargs...
+    rng::Random.AbstractRNG, model::Model, spl::Sampler{<:SGLD}, state::SGLDState; kwargs...
 )
     # Perform gradient step.
     ℓ = state.logdensity
diff --git a/src/optimisation/Optimisation.jl b/src/optimisation/Optimisation.jl
index 7a700f241a..682e664a66 100644
--- a/src/optimisation/Optimisation.jl
+++ b/src/optimisation/Optimisation.jl
@@ -42,7 +42,13 @@ struct OptimizationContext{C<:DynamicPPL.AbstractContext} <: DynamicPPL.Abstract
     context::C
 
     function OptimizationContext{C}(context::C) where {C<:DynamicPPL.AbstractContext}
-        if !(context isa Union{DynamicPPL.DefaultContext,DynamicPPL.LikelihoodContext,DynamicPPL.PriorContext})
+        if !(
+            context isa Union{
+                DynamicPPL.DefaultContext,
+                DynamicPPL.LikelihoodContext,
+                DynamicPPL.PriorContext,
+            }
+        )
             msg = """
                 `OptimizationContext` supports only leaf contexts of type
                 `DynamicPPL.DefaultContext`, `DynamicPPL.LikelihoodContext`,
@@ -93,19 +99,26 @@ function DynamicPPL.dot_tilde_assume(ctx::OptimizationContext, right, left, vns,
     return r, lp, vi
 end
 
-DynamicPPL.tilde_observe(ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args...) =
-    DynamicPPL.tilde_observe(ctx.context, args...)
+function DynamicPPL.tilde_observe(
+    ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args...
+)
+    return DynamicPPL.tilde_observe(ctx.context, args...)
+end
 
-DynamicPPL.dot_tilde_observe(ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args...) =
-    DynamicPPL.dot_tilde_observe(ctx.context, args...)
+function DynamicPPL.dot_tilde_observe(
+    ctx::OptimizationContext{<:DynamicPPL.PriorContext}, args...
+)
+    return DynamicPPL.dot_tilde_observe(ctx.context, args...)
+end
 
 """
     OptimLogDensity{M<:DynamicPPL.Model,C<:Context,V<:DynamicPPL.VarInfo}
 
 A struct that stores the negative log density function of a `DynamicPPL` model.
 """
-const OptimLogDensity{M<:DynamicPPL.Model,C<:OptimizationContext,V<:DynamicPPL.VarInfo} =
-    Turing.LogDensityFunction{V,M,C}
+const OptimLogDensity{M<:DynamicPPL.Model,C<:OptimizationContext,V<:DynamicPPL.VarInfo} = Turing.LogDensityFunction{
+    V,M,C
+}
 
 """
     OptimLogDensity(model::DynamicPPL.Model, context::OptimizationContext)
@@ -176,11 +189,8 @@ end
 
 A wrapper struct to store various results from a MAP or MLE estimation.
 """
-struct ModeResult{
-    V<:NamedArrays.NamedArray,
-    O<:Any,
-    M<:OptimLogDensity
-} <: StatsBase.StatisticalModel
+struct ModeResult{V<:NamedArrays.NamedArray,O<:Any,M<:OptimLogDensity} <:
+       StatsBase.StatisticalModel
     "A vector with the resulting point estimates."
     values::V
     "The stored optimiser results."
@@ -195,11 +205,11 @@ function Base.show(io::IO, ::MIME"text/plain", m::ModeResult)
     print(io, "ModeResult with maximized lp of ")
     Printf.@printf(io, "%.2f", m.lp)
     println(io)
-    show(io, m.values)
+    return show(io, m.values)
 end
 
 function Base.show(io::IO, m::ModeResult)
-    show(io, m.values.array)
+    return show(io, m.values.array)
 end
 
 # Various StatsBase methods for ModeResult
@@ -312,9 +322,9 @@ struct ModeEstimationConstraints{
 end
 
 has_box_constraints(c::ModeEstimationConstraints) = c.ub !== nothing || c.lb !== nothing
-has_generic_constraints(c::ModeEstimationConstraints) = (
-    c.cons !== nothing || c.lcons !== nothing || c.ucons !== nothing
-)
+function has_generic_constraints(c::ModeEstimationConstraints)
+    return (c.cons !== nothing || c.lcons !== nothing || c.ucons !== nothing)
+end
 has_constraints(c) = has_box_constraints(c) || has_generic_constraints(c)
 
 """
@@ -328,17 +338,19 @@ uniformly from the box constraints. If generic constraints are set, an error is
 """
 function generate_initial_params(model::DynamicPPL.Model, initial_params, constraints)
     if initial_params === nothing && has_generic_constraints(constraints)
-        throw(ArgumentError(
-            "You must provide an initial value when using generic constraints."
-        ))
+        throw(
+            ArgumentError(
+                "You must provide an initial value when using generic constraints."
+            ),
+        )
     end
 
     return if initial_params !== nothing
         copy(initial_params)
     elseif has_box_constraints(constraints)
         [
-            rand(Distributions.Uniform(lower, upper))
-            for (lower, upper) in zip(constraints.lb, constraints.ub)
+            rand(Distributions.Uniform(lower, upper)) for
+            (lower, upper) in zip(constraints.lb, constraints.ub)
         ]
     else
         rand(Vector, model)
@@ -367,11 +379,12 @@ function Optimization.OptimizationProblem(log_density::OptimLogDensity, adtype,
         Optimization.OptimizationProblem(f, initial_params)
     else
         Optimization.OptimizationProblem(
-            f, initial_params;
+            f,
+            initial_params;
             lcons=constraints.lcons,
             ucons=constraints.ucons,
             lb=constraints.lb,
-            ub=constraints.ub
+            ub=constraints.ub,
         )
     end
     return prob
@@ -418,7 +431,7 @@ function estimate_mode(
     ucons=nothing,
     lb=nothing,
     ub=nothing,
-    kwargs...
+    kwargs...,
 )
     constraints = ModeEstimationConstraints(lb, ub, cons, lcons, ucons)
     initial_params = generate_initial_params(model, initial_params, constraints)
diff --git a/src/stdlib/RandomMeasures.jl b/src/stdlib/RandomMeasures.jl
index 9858279e63..add4527864 100644
--- a/src/stdlib/RandomMeasures.jl
+++ b/src/stdlib/RandomMeasures.jl
@@ -19,7 +19,8 @@ abstract type AbstractRandomProbabilityMeasure end
 
 The *Size-Biased Sampling Process* for random probability measures `rpm` with a surplus mass of `surplus`.
 """
-struct SizeBiasedSamplingProcess{T<:AbstractRandomProbabilityMeasure,V<:AbstractFloat} <: ContinuousUnivariateDistribution
+struct SizeBiasedSamplingProcess{T<:AbstractRandomProbabilityMeasure,V<:AbstractFloat} <:
+       ContinuousUnivariateDistribution
     rpm::T
     surplus::V
 end
@@ -34,7 +35,8 @@ maximum(d::SizeBiasedSamplingProcess) = d.surplus
 
 The *Stick-Breaking Process* for random probability measures `rpm`.
 """
-struct StickBreakingProcess{T<:AbstractRandomProbabilityMeasure} <: ContinuousUnivariateDistribution
+struct StickBreakingProcess{T<:AbstractRandomProbabilityMeasure} <:
+       ContinuousUnivariateDistribution
     rpm::T
 end
 
@@ -48,12 +50,13 @@ maximum(d::StickBreakingProcess) = 1.0
 
 The *Chinese Restaurant Process* for random probability measures `rpm` with counts `m`.
 """
-struct ChineseRestaurantProcess{T<:AbstractRandomProbabilityMeasure,V<:AbstractVector{Int}} <: DiscreteUnivariateDistribution
+struct ChineseRestaurantProcess{
+    T<:AbstractRandomProbabilityMeasure,V<:AbstractVector{Int}
+} <: DiscreteUnivariateDistribution
     rpm::T
     m::V
 end
 
-
 """
     _logpdf_table(d::AbstractRandomProbabilityMeasure, m::AbstractVector{Int})
 
@@ -81,7 +84,7 @@ function rand(rng::AbstractRNG, d::ChineseRestaurantProcess)
 end
 
 minimum(d::ChineseRestaurantProcess) = 1
-maximum(d::ChineseRestaurantProcess) = any(iszero, d.m) ? length(d.m) : length(d.m)+1
+maximum(d::ChineseRestaurantProcess) = any(iszero, d.m) ? length(d.m) : length(d.m) + 1
 
 ## ################# ##
 ## Random partitions ##
@@ -131,7 +134,7 @@ function _logpdf_table(d::DirichletProcess{T}, m::AbstractVector{Int}) where {T<
 
     # construct the table
     first_zero = findfirst(iszero, m)
-    K = first_zero === nothing ? length(m)+1 : length(m)
+    K = first_zero === nothing ? length(m) + 1 : length(m)
     table = fill(T(-Inf), K)
 
     # exit if m is empty or contains only zeros
@@ -141,7 +144,7 @@ function _logpdf_table(d::DirichletProcess{T}, m::AbstractVector{Int}) where {T<
     end
 
     # compute logpdf for each occupied table
-    @inbounds for i in 1:(K-1)
+    @inbounds for i in 1:(K - 1)
         table[i] = T(log(m[i]))
     end
 
@@ -187,7 +190,7 @@ end
 function distribution(d::StickBreakingProcess{<:PitmanYorProcess})
     d_rpm = d.rpm
     d_rpm_d = d.rpm.d
-    return Beta(one(d_rpm_d)-d_rpm_d, d_rpm.θ + d_rpm.t*d_rpm_d)
+    return Beta(one(d_rpm_d) - d_rpm_d, d_rpm.θ + d_rpm.t * d_rpm_d)
 end
 
 @doc raw"""
@@ -216,14 +219,15 @@ function stickbreak(v)
     K = length(v) + 1
     cumprod_one_minus_v = cumprod(1 .- v)
 
-    eta = [if k == 1
-               v[1]
-           elseif k == K
-               cumprod_one_minus_v[K - 1]
-           else
-               v[k] * cumprod_one_minus_v[k - 1]
-           end
-           for k in 1:K]
+    eta = [
+        if k == 1
+            v[1]
+        elseif k == K
+            cumprod_one_minus_v[K - 1]
+        else
+            v[k] * cumprod_one_minus_v[k - 1]
+        end for k in 1:K
+    ]
 
     return eta
 end
@@ -231,7 +235,7 @@ end
 function distribution(d::SizeBiasedSamplingProcess{<:PitmanYorProcess})
     d_rpm = d.rpm
     d_rpm_d = d.rpm.d
-    dist = Beta(one(d_rpm_d)-d_rpm_d, d_rpm.θ + d_rpm.t*d_rpm_d)
+    dist = Beta(one(d_rpm_d) - d_rpm_d, d_rpm.θ + d_rpm.t * d_rpm_d)
     return LocationScale(zero(d_rpm_d), d.surplus, dist)
 end
 
@@ -241,7 +245,7 @@ function _logpdf_table(d::PitmanYorProcess{T}, m::AbstractVector{Int}) where {T<
 
     # construct table
     first_zero = findfirst(iszero, m)
-    K = first_zero === nothing ? length(m)+1 : length(m)
+    K = first_zero === nothing ? length(m) + 1 : length(m)
     table = fill(T(-Inf), K)
 
     # exit if m is empty or contains only zeros
@@ -251,8 +255,8 @@ function _logpdf_table(d::PitmanYorProcess{T}, m::AbstractVector{Int}) where {T<
     end
 
     # compute logpdf for each occupied table
-    @inbounds for i in 1:(K-1)
-        !iszero(m[i]) && ( table[i] = T(log(m[i] - d.d)) )
+    @inbounds for i in 1:(K - 1)
+        !iszero(m[i]) && (table[i] = T(log(m[i] - d.d)))
     end
 
     # logpdf for new table
@@ -269,5 +273,4 @@ end
 export DirichletProcess, PitmanYorProcess
 export SizeBiasedSamplingProcess, StickBreakingProcess, ChineseRestaurantProcess
 
-
 end # end module
diff --git a/src/stdlib/distributions.jl b/src/stdlib/distributions.jl
index f579876e8b..c2b92c29dc 100644
--- a/src/stdlib/distributions.jl
+++ b/src/stdlib/distributions.jl
@@ -79,9 +79,9 @@ struct BinomialLogit{T<:Real,S<:Real} <: DiscreteUnivariateDistribution
     logitp::T
     logconstant::S
 
-    function BinomialLogit{T}(n::Int, logitp::T) where T
+    function BinomialLogit{T}(n::Int, logitp::T) where {T}
         n >= 0 || error("parameter `n` has to be non-negative")
-        logconstant = - (log1p(n) + n * StatsFuns.log1pexp(logitp))
+        logconstant = -(log1p(n) + n * StatsFuns.log1pexp(logitp))
         return new{T,typeof(logconstant)}(n, logitp, logconstant)
     end
 end
@@ -134,13 +134,13 @@ P(X = k) = \\begin{cases}
 ```
 where `K = length(c) + 1`.
 """
-struct OrderedLogistic{T1, T2<:AbstractVector} <: DiscreteUnivariateDistribution
+struct OrderedLogistic{T1,T2<:AbstractVector} <: DiscreteUnivariateDistribution
     η::T1
     cutpoints::T2
 
     function OrderedLogistic{T1,T2}(η::T1, cutpoints::T2) where {T1,T2}
         issorted(cutpoints) || error("cutpoints are not sorted")
-        return new{typeof(η), typeof(cutpoints)}(η, cutpoints)
+        return new{typeof(η),typeof(cutpoints)}(η, cutpoints)
     end
 end
 
@@ -193,10 +193,10 @@ function unsafe_logpdf_ordered_logistic(η, cutpoints, K, k::Int)
         logp = if k == 1
             -StatsFuns.log1pexp(η - cutpoints[k])
         elseif k < K
-            tmp = StatsFuns.log1pexp(cutpoints[k-1] - η)
+            tmp = StatsFuns.log1pexp(cutpoints[k - 1] - η)
             -tmp + StatsFuns.log1mexp(tmp - StatsFuns.log1pexp(cutpoints[k] - η))
         else
-            -StatsFuns.log1pexp(cutpoints[k-1] - η)
+            -StatsFuns.log1pexp(cutpoints[k - 1] - η)
         end
     end
     return logp
@@ -221,7 +221,7 @@ struct LogPoisson{T<:Real,S} <: DiscreteUnivariateDistribution
     logλ::T
     λ::S
 
-    function LogPoisson{T}(logλ::T) where T
+    function LogPoisson{T}(logλ::T) where {T}
         λ = exp(logλ)
         return new{T,typeof(λ)}(logλ, λ)
     end
diff --git a/test/essential/ad.jl b/test/essential/ad.jl
index b67c8bd562..6583ed911f 100644
--- a/test/essential/ad.jl
+++ b/test/essential/ad.jl
@@ -5,8 +5,8 @@ using Distributions: logpdf
 using DynamicPPL: getlogp, getval
 using ForwardDiff
 using LinearAlgebra
-import LogDensityProblems
-import LogDensityProblemsAD
+using LogDensityProblems: LogDensityProblems
+using LogDensityProblemsAD: LogDensityProblemsAD
 using ReverseDiff
 using Test: @test, @testset
 using Turing
@@ -42,7 +42,9 @@ function test_model_ad(model, f, syms::Vector{Symbol})
     for chunksize in (0, 1, 10), standardtag in (true, false, 0, 3)
         ℓ = LogDensityProblemsAD.ADgradient(
             Turing.AutoForwardDiff(; chunksize=chunksize, tag=standardtag),
-            Turing.LogDensityFunction(vi, model, SampleFromPrior(), DynamicPPL.DefaultContext()),
+            Turing.LogDensityFunction(
+                vi, model, SampleFromPrior(), DynamicPPL.DefaultContext()
+            ),
         )
         l, ∇E = LogDensityProblems.logdensity_and_gradient(ℓ, z)
 
@@ -72,7 +74,7 @@ end
             lik_dist = Normal(m, sqrt(s))
             lp = logpdf(dist_s, s) + logpdf(Normal(0, sqrt(s)), m)
             lp += logpdf(lik_dist, 1.5) + logpdf(lik_dist, 2.0)
-            lp
+            return lp
         end
 
         # Call ForwardDiff's AD
@@ -81,14 +83,20 @@ end
         _x = [_m, _s]
         grad_FWAD = sort(g(_x))
 
-        ℓ = Turing.LogDensityFunction(vi, ad_test_f, SampleFromPrior(), DynamicPPL.DefaultContext())
+        ℓ = Turing.LogDensityFunction(
+            vi, ad_test_f, SampleFromPrior(), DynamicPPL.DefaultContext()
+        )
         x = map(x -> Float64(x), vi[SampleFromPrior()])
 
         trackerℓ = LogDensityProblemsAD.ADgradient(Turing.AutoTracker(), ℓ)
         if isdefined(Base, :get_extension)
-            @test trackerℓ isa Base.get_extension(LogDensityProblemsAD, :LogDensityProblemsADTrackerExt).TrackerGradientLogDensity
+            @test trackerℓ isa
+                Base.get_extension(
+                LogDensityProblemsAD, :LogDensityProblemsADTrackerExt
+            ).TrackerGradientLogDensity
         else
-            @test trackerℓ isa LogDensityProblemsAD.LogDensityProblemsADTrackerExt.TrackerGradientLogDensity
+            @test trackerℓ isa
+                LogDensityProblemsAD.LogDensityProblemsADTrackerExt.TrackerGradientLogDensity
         end
         @test trackerℓ.ℓ === ℓ
         ∇E1 = LogDensityProblems.logdensity_and_gradient(trackerℓ, x)[2]
@@ -96,9 +104,13 @@ end
 
         zygoteℓ = LogDensityProblemsAD.ADgradient(Turing.AutoZygote(), ℓ)
         if isdefined(Base, :get_extension)
-            @test zygoteℓ isa Base.get_extension(LogDensityProblemsAD, :LogDensityProblemsADZygoteExt).ZygoteGradientLogDensity
+            @test zygoteℓ isa
+                Base.get_extension(
+                LogDensityProblemsAD, :LogDensityProblemsADZygoteExt
+            ).ZygoteGradientLogDensity
         else
-            @test zygoteℓ isa LogDensityProblemsAD.LogDensityProblemsADZygoteExt.ZygoteGradientLogDensity
+            @test zygoteℓ isa
+                LogDensityProblemsAD.LogDensityProblemsADZygoteExt.ZygoteGradientLogDensity
         end
         @test zygoteℓ.ℓ === ℓ
         ∇E2 = LogDensityProblems.logdensity_and_gradient(zygoteℓ, x)[2]
@@ -112,7 +124,9 @@ end
             s = x[2]
             m = x[1]
             lik_dist = Normal(m, sqrt(s))
-            lp = Turing.logpdf_with_trans(dist_s, s, false) + Turing.logpdf_with_trans(Normal(0, sqrt(s)), m, false)
+            lp =
+                Turing.logpdf_with_trans(dist_s, s, false) +
+                Turing.logpdf_with_trans(Normal(0, sqrt(s)), m, false)
             lp += logpdf(lik_dist, 1.5) + logpdf(lik_dist, 2.0)
             return lp
         end
@@ -122,7 +136,7 @@ end
         # Test Wishart AD.
         @model function wishart_ad()
             v ~ Wishart(7, [1 0.5; 0.5 1])
-            v
+            return v
         end
 
         # Hand-written logp
@@ -137,7 +151,7 @@ end
     end
     @testset "Simplex Tracker, Zygote and ReverseDiff (with and without caching) AD" begin
         @model function dir()
-            theta ~ Dirichlet(1 ./ fill(4, 4))
+            return theta ~ Dirichlet(1 ./ fill(4, 4))
         end
         sample(dir(), HMC(0.01, 1; adtype=AutoZygote()), 1000)
         sample(dir(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000)
@@ -145,14 +159,14 @@ end
     end
     @testset "PDMatDistribution AD" begin
         @model function wishart()
-            theta ~ Wishart(4, Matrix{Float64}(I, 4, 4))
+            return theta ~ Wishart(4, Matrix{Float64}(I, 4, 4))
         end
 
         sample(wishart(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000)
         sample(wishart(), HMC(0.01, 1; adtype=AutoZygote()), 1000)
 
         @model function invwishart()
-            theta ~ InverseWishart(4, Matrix{Float64}(I, 4, 4))
+            return theta ~ InverseWishart(4, Matrix{Float64}(I, 4, 4))
         end
 
         sample(invwishart(), HMC(0.01, 1; adtype=AutoReverseDiff(false)), 1000)
@@ -163,7 +177,7 @@ end
             params = TV(undef, 2)
             @. params ~ Normal(0, 1)
 
-            x ~ MvNormal(params, I)
+            return x ~ MvNormal(params, I)
         end
 
         function make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext)
@@ -180,7 +194,11 @@ end
                 if unlinked
                     varinfo_init = DynamicPPL.invlink!!(varinfo_init, spl, model)
                 end
-                varinfo = last(DynamicPPL.evaluate!!(model, varinfo, DynamicPPL.SamplingContext(spl, ctx)))
+                varinfo = last(
+                    DynamicPPL.evaluate!!(
+                        model, varinfo, DynamicPPL.SamplingContext(spl, ctx)
+                    ),
+                )
                 if unlinked
                     varinfo_init = DynamicPPL.link!!(varinfo_init, spl, model)
                 end
@@ -195,7 +213,7 @@ end
         model = tst(data)
 
         likelihood = make_logjoint(model, DynamicPPL.LikelihoodContext())
-        target(x) = likelihood(x, unlinked=true)
+        target(x) = likelihood(x; unlinked=true)
 
         H_f = ForwardDiff.hessian(target, zeros(2))
         H_r = ReverseDiff.hessian(target, zeros(2))
@@ -204,10 +222,9 @@ end
     end
 
     @testset "memoization: issue #1393" begin
-
         @model function demo(data)
             sigma ~ Uniform(0.0, 20.0)
-            data ~ Normal(0, sigma)
+            return data ~ Normal(0, sigma)
         end
 
         N = 1000
@@ -217,7 +234,6 @@ end
             chn = sample(demo(data), NUTS(0.65; adtype=AutoReverseDiff(true)), 1000)
             @test mean(Array(chn[:sigma])) ≈ std(data) atol = 0.5
         end
-
     end
 
     @testset "ReverseDiff compiled without linking" begin
@@ -225,10 +241,14 @@ end
         θ = DynamicPPL.getparams(f)
 
         f_rd = LogDensityProblemsAD.ADgradient(Turing.AutoReverseDiff(; compile=false), f)
-        f_rd_compiled = LogDensityProblemsAD.ADgradient(Turing.AutoReverseDiff(; compile=true), f)
+        f_rd_compiled = LogDensityProblemsAD.ADgradient(
+            Turing.AutoReverseDiff(; compile=true), f
+        )
 
         ℓ, ℓ_grad = LogDensityProblems.logdensity_and_gradient(f_rd, θ)
-        ℓ_compiled, ℓ_grad_compiled = LogDensityProblems.logdensity_and_gradient(f_rd_compiled, θ)
+        ℓ_compiled, ℓ_grad_compiled = LogDensityProblems.logdensity_and_gradient(
+            f_rd_compiled, θ
+        )
 
         @test ℓ == ℓ_compiled
         @test ℓ_grad == ℓ_grad_compiled
diff --git a/test/essential/container.jl b/test/essential/container.jl
index 90e56f46ef..d1e1b21bd6 100644
--- a/test/essential/container.jl
+++ b/test/essential/container.jl
@@ -1,6 +1,6 @@
 module ContainerTests
 
-import AdvancedPS
+using AdvancedPS: AdvancedPS
 using Distributions: Bernoulli, Beta, Gamma, Normal
 using DynamicPPL: @model, Sampler
 using Test: @test, @testset
@@ -14,46 +14,46 @@ using Turing
         1 ~ Bernoulli(x / 2)
         c ~ Beta()
         0 ~ Bernoulli(x / 2)
-        x
+        return x
     end
 
     @testset "constructor" begin
-      vi = DynamicPPL.VarInfo()
-      sampler = Sampler(PG(10))
-      model = test()
-      trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG())
-
-      # Make sure we link the traces
-      @test haskey(trace.model.ctask.task.storage, :__trace)
-
-      res = AdvancedPS.advance!(trace, false)
-      @test DynamicPPL.get_num_produce(trace.model.f.varinfo) == 1
-      @test res ≈ -log(2)
-
-      # Catch broken copy, espetially for RNG / VarInfo
-      newtrace = AdvancedPS.fork(trace)
-      res2 = AdvancedPS.advance!(trace)
-      @test DynamicPPL.get_num_produce(trace.model.f.varinfo) == 2
-      @test DynamicPPL.get_num_produce(newtrace.model.f.varinfo) == 1
+        vi = DynamicPPL.VarInfo()
+        sampler = Sampler(PG(10))
+        model = test()
+        trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG())
+
+        # Make sure we link the traces
+        @test haskey(trace.model.ctask.task.storage, :__trace)
+
+        res = AdvancedPS.advance!(trace, false)
+        @test DynamicPPL.get_num_produce(trace.model.f.varinfo) == 1
+        @test res ≈ -log(2)
+
+        # Catch broken copy, espetially for RNG / VarInfo
+        newtrace = AdvancedPS.fork(trace)
+        res2 = AdvancedPS.advance!(trace)
+        @test DynamicPPL.get_num_produce(trace.model.f.varinfo) == 2
+        @test DynamicPPL.get_num_produce(newtrace.model.f.varinfo) == 1
     end
 
     @testset "fork" begin
-      @model function normal()
-          a ~ Normal(0, 1)
-          3 ~ Normal(a, 2)
-          b ~ Normal(a, 1) 
-          1.5 ~ Normal(b, 2)
-          a, b
-      end
-      vi = DynamicPPL.VarInfo()
-      sampler = Sampler(PG(10))
-      model = normal()
-
-      trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG())
-
-      newtrace = AdvancedPS.forkr(trace)
-      # Catch broken replay mechanism
-      @test AdvancedPS.advance!(trace) ≈ AdvancedPS.advance!(newtrace) 
+        @model function normal()
+            a ~ Normal(0, 1)
+            3 ~ Normal(a, 2)
+            b ~ Normal(a, 1)
+            1.5 ~ Normal(b, 2)
+            return a, b
+        end
+        vi = DynamicPPL.VarInfo()
+        sampler = Sampler(PG(10))
+        model = normal()
+
+        trace = AdvancedPS.Trace(model, sampler, vi, AdvancedPS.TracedRNG())
+
+        newtrace = AdvancedPS.forkr(trace)
+        # Catch broken replay mechanism
+        @test AdvancedPS.advance!(trace) ≈ AdvancedPS.advance!(newtrace)
     end
 end
 
diff --git a/test/ext/OptimInterface.jl b/test/ext/OptimInterface.jl
index 7bb72f3cc5..817d7a520a 100644
--- a/test/ext/OptimInterface.jl
+++ b/test/ext/OptimInterface.jl
@@ -3,9 +3,9 @@ module OptimInterfaceTests
 using ..Models: gdemo_default
 using Distributions.FillArrays: Zeros
 using LinearAlgebra: I
-import Optim
-import Random
-import StatsBase
+using Optim: Optim
+using Random: Random
+using StatsBase: StatsBase
 using StatsBase: coef, coefnames, coeftable, informationmatrix, stderror, vcov
 using Test: @test, @testset
 using Turing
@@ -55,7 +55,7 @@ using Turing
         infomat = [2/(2 * true_values[1]^2) 0.0; 0.0 2/true_values[1]]
         @test all(isapprox.(infomat - informationmatrix(mle_est), 0.0, atol=0.01))
 
-        vcovmat = [2*true_values[1]^2 / 2 0.0; 0.0 true_values[1]/2]
+        vcovmat = [2 * true_values[1]^2/2 0.0; 0.0 true_values[1]/2]
         @test all(isapprox.(vcovmat - vcov(mle_est), 0.0, atol=0.01))
 
         ctable = coeftable(mle_est)
@@ -73,33 +73,33 @@ using Turing
     @testset "Linear regression test" begin
         @model function regtest(x, y)
             beta ~ MvNormal(Zeros(2), I)
-            mu = x*beta
-            y ~ MvNormal(mu, I)
+            mu = x * beta
+            return y ~ MvNormal(mu, I)
         end
-        
+
         Random.seed!(987)
         true_beta = [1.0, -2.2]
         x = rand(40, 2)
-        y = x*true_beta
-        
+        y = x * true_beta
+
         model = regtest(x, y)
         mle = Optim.optimize(model, MLE())
-        
+
         vcmat = inv(x'x)
         vcmat_mle = vcov(mle).array
-        
+
         @test isapprox(mle.values.array, true_beta)
         @test isapprox(vcmat, vcmat_mle)
     end
 
     @testset "Dot tilde test" begin
         @model function dot_gdemo(x)
-            s ~ InverseGamma(2,3)
+            s ~ InverseGamma(2, 3)
             m ~ Normal(0, sqrt(s))
-        
-            (.~)(x, Normal(m, sqrt(s)))
+
+            return (.~)(x, Normal(m, sqrt(s)))
         end
-        
+
         model_dot = dot_gdemo([1.5, 2.0])
 
         mle1 = Optim.optimize(gdemo_default, MLE())
@@ -123,13 +123,12 @@ using Turing
 
             for vn in DynamicPPL.TestUtils.varnames(model)
                 for vn_leaf in DynamicPPL.TestUtils.varname_leaves(vn, get(result_true, vn))
-                    @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05
+                    @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol = 0.05
                 end
             end
         end
     end
 
-
     # Some of the models have one variance parameter per observation, and so
     # the MLE should have the variances set to 0. Since we're working in
     # transformed space, this corresponds to `-Inf`, which is of course not achievable.
@@ -153,8 +152,8 @@ using Turing
         result_true = DynamicPPL.TestUtils.likelihood_optima(model)
 
         # `NelderMead` seems to struggle with convergence here, so we exclude it.
-        @testset "$(nameof(typeof(optimizer)))" for optimizer in [Optim.LBFGS(),]
-            options = Optim.Options(g_tol=1e-3, f_tol=1e-3)
+        @testset "$(nameof(typeof(optimizer)))" for optimizer in [Optim.LBFGS()]
+            options = Optim.Options(; g_tol=1e-3, f_tol=1e-3)
             result = Optim.optimize(model, MLE(), optimizer, options)
             vals = result.values
 
@@ -163,7 +162,7 @@ using Turing
                     if model.f in allowed_incorrect_mle
                         @test isfinite(get(result_true, vn_leaf))
                     else
-                        @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol=0.05
+                        @test get(result_true, vn_leaf) ≈ vals[Symbol(vn_leaf)] atol = 0.05
                     end
                 end
             end
@@ -175,18 +174,18 @@ using Turing
         @model demo_dirichlet() = x ~ Dirichlet(2 * ones(3))
         model = demo_dirichlet()
         result = Optim.optimize(model, MAP())
-        @test result.values ≈ mode(Dirichlet(2 * ones(3))) atol=0.2
+        @test result.values ≈ mode(Dirichlet(2 * ones(3))) atol = 0.2
     end
 
     @testset "with :=" begin
         @model function demo_track()
             x ~ Normal()
-            y := 100 + x
+            return y := 100 + x
         end
         model = demo_track()
         result = Optim.optimize(model, MAP())
-        @test result.values[:x] ≈ 0 atol=1e-1
-        @test result.values[:y] ≈ 100 atol=1e-1
+        @test result.values[:x] ≈ 0 atol = 1e-1
+        @test result.values[:y] ≈ 100 atol = 1e-1
     end
 end
 
diff --git a/test/ext/dynamichmc.jl b/test/ext/dynamichmc.jl
index d525512ca5..aa52093bca 100644
--- a/test/ext/dynamichmc.jl
+++ b/test/ext/dynamichmc.jl
@@ -4,10 +4,10 @@ using ..Models: gdemo_default
 using ..NumericalTests: check_gdemo
 using Test: @test, @testset
 using Distributions: sample
-import DynamicHMC
-import DynamicPPL
+using DynamicHMC: DynamicHMC
+using DynamicPPL: DynamicPPL
 using DynamicPPL: Sampler
-import Random
+using Random: Random
 using Turing
 
 @testset "TuringDynamicHMCExt" begin
diff --git a/test/mcmc/abstractmcmc.jl b/test/mcmc/abstractmcmc.jl
index 57554c0f26..10d202da38 100644
--- a/test/mcmc/abstractmcmc.jl
+++ b/test/mcmc/abstractmcmc.jl
@@ -1,15 +1,15 @@
 module AbstractMCMCTests
 
-import AdvancedMH
+using AdvancedMH: AdvancedMH
 using Distributions: sample
 using Distributions.FillArrays: Zeros
-import DynamicPPL
-import ForwardDiff
+using DynamicPPL: DynamicPPL
+using ForwardDiff: ForwardDiff
 using LinearAlgebra: I
-import LogDensityProblems
-import LogDensityProblemsAD
-import Random
-import ReverseDiff
+using LogDensityProblems: LogDensityProblems
+using LogDensityProblemsAD: LogDensityProblemsAD
+using Random: Random
+using ReverseDiff: ReverseDiff
 using StableRNGs: StableRNG
 using Test: @test, @test_throws, @testset
 using Turing
@@ -21,7 +21,9 @@ function initialize_nuts(model::Turing.Model)
     f = LogDensityProblemsAD.ADgradient(DynamicPPL.LogDensityFunction(model))
 
     # Link the varinfo.
-    f = Turing.Inference.setvarinfo(f, DynamicPPL.link!!(Turing.Inference.getvarinfo(f), model))
+    f = Turing.Inference.setvarinfo(
+        f, DynamicPPL.link!!(Turing.Inference.getvarinfo(f), model)
+    )
 
     # Choose parameter dimensionality and initial parameter value
     D = LogDensityProblems.dimension(f)
@@ -39,16 +41,18 @@ function initialize_nuts(model::Turing.Model)
     #   - multinomial sampling scheme,
     #   - generalised No-U-Turn criteria, and
     #   - windowed adaption for step-size and diagonal mass matrix
-    proposal = AdvancedHMC.HMCKernel(AdvancedHMC.Trajectory{AdvancedHMC.MultinomialTS}(integrator, AdvancedHMC.GeneralisedNoUTurn()))
+    proposal = AdvancedHMC.HMCKernel(
+        AdvancedHMC.Trajectory{AdvancedHMC.MultinomialTS}(
+            integrator, AdvancedHMC.GeneralisedNoUTurn()
+        ),
+    )
     adaptor = AdvancedHMC.StanHMCAdaptor(
-        AdvancedHMC.MassMatrixAdaptor(metric),
-        AdvancedHMC.StepSizeAdaptor(0.65, integrator)
+        AdvancedHMC.MassMatrixAdaptor(metric), AdvancedHMC.StepSizeAdaptor(0.65, integrator)
     )
 
     return AdvancedHMC.HMCSampler(proposal, metric, adaptor)
 end
 
-
 function initialize_mh_rw(model)
     f = DynamicPPL.LogDensityFunction(model)
     d = LogDensityProblems.dimension(f)
@@ -57,17 +61,22 @@ end
 
 # TODO: Should this go somewhere else?
 # Convert a model into a `Distribution` to allow usage as a proposal in AdvancedMH.jl.
-struct ModelDistribution{M<:DynamicPPL.Model,V<:DynamicPPL.VarInfo} <: ContinuousMultivariateDistribution
+struct ModelDistribution{M<:DynamicPPL.Model,V<:DynamicPPL.VarInfo} <:
+       ContinuousMultivariateDistribution
     model::M
     varinfo::V
 end
-ModelDistribution(model::DynamicPPL.Model) = ModelDistribution(model, DynamicPPL.VarInfo(model))
+function ModelDistribution(model::DynamicPPL.Model)
+    return ModelDistribution(model, DynamicPPL.VarInfo(model))
+end
 
 Base.length(d::ModelDistribution) = length(d.varinfo[:])
 function Distributions._logpdf(d::ModelDistribution, x::AbstractVector)
     return logprior(d.model, DynamicPPL.unflatten(d.varinfo, x))
 end
-function Distributions._rand!(rng::Random.AbstractRNG, d::ModelDistribution, x::AbstractVector{<:Real})
+function Distributions._rand!(
+    rng::Random.AbstractRNG, d::ModelDistribution, x::AbstractVector{<:Real}
+)
     model = d.model
     varinfo = deepcopy(d.varinfo)
     for vn in keys(varinfo)
@@ -79,10 +88,14 @@ function Distributions._rand!(rng::Random.AbstractRNG, d::ModelDistribution, x::
 end
 
 function initialize_mh_with_prior_proposal(model)
-    return AdvancedMH.MetropolisHastings(AdvancedMH.StaticProposal(ModelDistribution(model)))
+    return AdvancedMH.MetropolisHastings(
+        AdvancedMH.StaticProposal(ModelDistribution(model))
+    )
 end
 
-function test_initial_params(model, sampler, initial_params=DynamicPPL.VarInfo(model)[:]; kwargs...)
+function test_initial_params(
+    model, sampler, initial_params=DynamicPPL.VarInfo(model)[:]; kwargs...
+)
     # Execute the transition with two different RNGs and check that the resulting
     # parameter values are the same.
     rng1 = Random.MersenneTwister(42)
@@ -104,8 +117,10 @@ end
             @testset "$(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS
                 # Need some functionality to initialize the sampler.
                 # TODO: Remove this once the constructors in the respective packages become "lazy".
-                sampler = initialize_nuts(model);
-                sampler_ext = DynamicPPL.Sampler(externalsampler(sampler; adtype, unconstrained=true), model)
+                sampler = initialize_nuts(model)
+                sampler_ext = DynamicPPL.Sampler(
+                    externalsampler(sampler; adtype, unconstrained=true), model
+                )
                 # FIXME: Once https://github.com/TuringLang/AdvancedHMC.jl/pull/366 goes through, uncomment.
                 # @testset "initial_params" begin
                 #     test_initial_params(model, sampler_ext; n_adapts=0)
@@ -119,9 +134,13 @@ end
                 )
 
                 @testset "inference" begin
-                    if adtype isa AutoReverseDiff && model.f === DynamicPPL.TestUtils.demo_assume_index_observe && VERSION < v"1.8"
+                    if adtype isa AutoReverseDiff &&
+                        model.f === DynamicPPL.TestUtils.demo_assume_index_observe &&
+                        VERSION < v"1.8"
                         # Ref: https://github.com/TuringLang/DynamicPPL.jl/issues/612
-                        @test_throws UndefRefError sample(model, sampler_ext, 5_000; sample_kwargs...)
+                        @test_throws UndefRefError sample(
+                            model, sampler_ext, 5_000; sample_kwargs...
+                        )
                     else
                         DynamicPPL.TestUtils.test_sampler(
                             [model],
@@ -140,12 +159,20 @@ end
             rng = Random.default_rng()
             model = DynamicPPL.TestUtils.DEMO_MODELS[1]
             sampler = initialize_nuts(model)
-            sampler_ext = externalsampler(sampler; unconstrained=true, adtype=AutoForwardDiff())
+            sampler_ext = externalsampler(
+                sampler; unconstrained=true, adtype=AutoForwardDiff()
+            )
             # Initial step.
-            state = last(AbstractMCMC.step(rng, model, DynamicPPL.Sampler(sampler_ext); n_adapts=0))
+            state = last(
+                AbstractMCMC.step(rng, model, DynamicPPL.Sampler(sampler_ext); n_adapts=0)
+            )
             @test state.logdensity isa LogDensityProblemsAD.ADGradientWrapper
             # Subsequent step.
-            state = last(AbstractMCMC.step(rng, model, DynamicPPL.Sampler(sampler_ext), state; n_adapts=0))
+            state = last(
+                AbstractMCMC.step(
+                    rng, model, DynamicPPL.Sampler(sampler_ext), state; n_adapts=0
+                ),
+            )
             @test state.logdensity isa LogDensityProblemsAD.ADGradientWrapper
         end
     end
@@ -155,8 +182,10 @@ end
             @testset "$(model.f)" for model in DynamicPPL.TestUtils.DEMO_MODELS
                 # Need some functionality to initialize the sampler.
                 # TODO: Remove this once the constructors in the respective packages become "lazy".
-                sampler = initialize_mh_rw(model);
-                sampler_ext = DynamicPPL.Sampler(externalsampler(sampler; unconstrained=true), model)
+                sampler = initialize_mh_rw(model)
+                sampler_ext = DynamicPPL.Sampler(
+                    externalsampler(sampler; unconstrained=true), model
+                )
                 @testset "initial_params" begin
                     test_initial_params(model, sampler_ext)
                 end
@@ -168,7 +197,7 @@ end
                         discard_initial=1_000,
                         thinning=10,
                         rtol=0.2,
-                        sampler_name="AdvancedMH"
+                        sampler_name="AdvancedMH",
                     )
                 end
             end
diff --git a/test/mcmc/emcee.jl b/test/mcmc/emcee.jl
index 04901775e5..08ea16d2a8 100644
--- a/test/mcmc/emcee.jl
+++ b/test/mcmc/emcee.jl
@@ -3,9 +3,9 @@ module EmceeTests
 using ..Models: gdemo_default
 using ..NumericalTests: check_gdemo
 using Distributions: sample
-import DynamicPPL
+using DynamicPPL: DynamicPPL
 using DynamicPPL: Sampler
-import Random
+using Random: Random
 using Test: @test, @test_throws, @testset
 using Turing
 
@@ -15,7 +15,7 @@ using Turing
 
         n_samples = 1000
         n_walkers = 250
-        
+
         spl = Emcee(n_walkers, 2.0)
         @test DynamicPPL.alg_str(Sampler(spl, gdemo_default)) == "Emcee"
 
diff --git a/test/mcmc/ess.jl b/test/mcmc/ess.jl
index 86029e43f3..0a1c23a9eb 100644
--- a/test/mcmc/ess.jl
+++ b/test/mcmc/ess.jl
@@ -3,23 +3,23 @@ module ESSTests
 using ..Models: MoGtest, MoGtest_default, gdemo, gdemo_default
 using ..NumericalTests: check_MoGtest_default, check_numerical
 using Distributions: Normal, sample
-import DynamicPPL
+using DynamicPPL: DynamicPPL
 using DynamicPPL: Sampler
-import Random
+using Random: Random
 using Test: @test, @testset
 using Turing
 
 @testset "ESS" begin
     @model function demo(x)
         m ~ Normal()
-        x ~ Normal(m, 0.5)
+        return x ~ Normal(m, 0.5)
     end
     demo_default = demo(1.0)
 
     @model function demodot(x)
         m = Vector{Float64}(undef, 2)
         @. m ~ Normal()
-        x ~ Normal(m[2], 0.5)
+        return x ~ Normal(m[2], 0.5)
     end
     demodot_default = demodot(1.0)
 
@@ -45,26 +45,22 @@ using Turing
     @testset "ESS inference" begin
         Random.seed!(1)
         chain = sample(demo_default, ESS(), 5_000)
-        check_numerical(chain, [:m], [0.8], atol = 0.1)
+        check_numerical(chain, [:m], [0.8]; atol=0.1)
 
         Random.seed!(1)
         chain = sample(demodot_default, ESS(), 5_000)
-        check_numerical(chain, ["m[1]", "m[2]"], [0.0, 0.8], atol = 0.1)
+        check_numerical(chain, ["m[1]", "m[2]"], [0.0, 0.8]; atol=0.1)
 
         Random.seed!(100)
-        alg = Gibbs(
-            CSMC(15, :s),
-            ESS(:m))
+        alg = Gibbs(CSMC(15, :s), ESS(:m))
         chain = sample(gdemo(1.5, 2.0), alg, 10_000)
-        check_numerical(chain, [:s, :m], [49 / 24, 7 / 6], atol = 0.1)
+        check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1)
 
         # MoGtest
         Random.seed!(125)
-        alg = Gibbs(
-            CSMC(15, :z1, :z2, :z3, :z4),
-            ESS(:mu1), ESS(:mu2))
+        alg = Gibbs(CSMC(15, :z1, :z2, :z3, :z4), ESS(:mu1), ESS(:mu2))
         chain = sample(MoGtest_default, alg, 6000)
-        check_MoGtest_default(chain, atol = 0.1)
+        check_MoGtest_default(chain; atol=0.1)
 
         # Different "equivalent" models.
         # NOTE: Because `ESS` only supports "single" variables with
@@ -72,13 +68,15 @@ using Turing
         # on the non-Gaussian variables in `DEMO_MODELS`.
         models_conditioned = map(DynamicPPL.TestUtils.DEMO_MODELS) do model
             # Condition on the non-Gaussian random variables.
-            model | (s = DynamicPPL.TestUtils.posterior_mean(model).s,)
+            model | (s=DynamicPPL.TestUtils.posterior_mean(model).s,)
         end
 
         DynamicPPL.TestUtils.test_sampler(
-            models_conditioned, DynamicPPL.Sampler(ESS()), 10_000;
+            models_conditioned,
+            DynamicPPL.Sampler(ESS()),
+            10_000;
             # Filter out the varnames we've conditioned on.
-            varnames_filter=vn -> DynamicPPL.getsym(vn) != :s
+            varnames_filter=vn -> DynamicPPL.getsym(vn) != :s,
         )
     end
 end
diff --git a/test/mcmc/gibbs.jl b/test/mcmc/gibbs.jl
index 3e096dde1c..4d2053c14e 100644
--- a/test/mcmc/gibbs.jl
+++ b/test/mcmc/gibbs.jl
@@ -4,15 +4,17 @@ using ..Models: MoGtest_default, gdemo, gdemo_default
 using ..NumericalTests: check_MoGtest_default, check_gdemo, check_numerical
 using Distributions: InverseGamma, Normal
 using Distributions: sample
-import ForwardDiff
-import Random
-import ReverseDiff
+using ForwardDiff: ForwardDiff
+using Random: Random
+using ReverseDiff: ReverseDiff
 using Test: @test, @testset
 using Turing
 using Turing: Inference
 using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess
 
-@testset "Testing gibbs.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false))
+@testset "Testing gibbs.jl with $adbackend" for adbackend in (
+    AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)
+)
     @testset "gibbs constructor" begin
         N = 500
         s1 = Gibbs(HMC(0.1, 5, :s, :m; adtype=adbackend))
@@ -48,26 +50,28 @@ using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess
         Random.seed!(100)
         alg = Gibbs(CSMC(15, :s), HMC(0.2, 4, :m; adtype=adbackend))
         chain = sample(gdemo(1.5, 2.0), alg, 10_000)
-        check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.15)
+        check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.15)
 
         Random.seed!(100)
 
         alg = Gibbs(MH(:s), HMC(0.2, 4, :m; adtype=adbackend))
         chain = sample(gdemo(1.5, 2.0), alg, 10_000)
-        check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1)
+        check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1)
 
         alg = Gibbs(CSMC(15, :s), ESS(:m))
         chain = sample(gdemo(1.5, 2.0), alg, 10_000)
-        check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1)
+        check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1)
 
         alg = CSMC(15)
         chain = sample(gdemo(1.5, 2.0), alg, 10_000)
-        check_numerical(chain, [:s, :m], [49/24, 7/6], atol=0.1)
+        check_numerical(chain, [:s, :m], [49 / 24, 7 / 6]; atol=0.1)
 
         Random.seed!(200)
-        gibbs = Gibbs(PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2; adtype=adbackend))
+        gibbs = Gibbs(
+            PG(15, :z1, :z2, :z3, :z4), HMC(0.15, 3, :mu1, :mu2; adtype=adbackend)
+        )
         chain = sample(MoGtest_default, gibbs, 10_000)
-        check_MoGtest_default(chain, atol=0.15)
+        check_MoGtest_default(chain; atol=0.15)
 
         Random.seed!(200)
         for alg in [
@@ -95,42 +99,41 @@ using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess
             ::Turing.Sampler{<:Gibbs},
             state,
             ::Type{MCMCChains.Chains};
-            kwargs...
+            kwargs...,
         )
-            samples isa Vector{<:Inference.Transition} ||
-                error("incorrect transitions")
-            return
+            samples isa Vector{<:Inference.Transition} || error("incorrect transitions")
+            return nothing
         end
 
         function callback(rng, model, sampler, sample, state, i; kwargs...)
             sample isa Inference.Transition || error("incorrect sample")
-            return
+            return nothing
         end
 
         alg = Gibbs(MH(:s), HMC(0.2, 4, :m; adtype=adbackend))
-        sample(model, alg, 100; callback = callback)
+        sample(model, alg, 100; callback=callback)
     end
     @testset "dynamic model" begin
         @model function imm(y, alpha, ::Type{M}=Vector{Float64}) where {M}
             N = length(y)
             rpm = DirichletProcess(alpha)
-        
+
             z = zeros(Int, N)
             cluster_counts = zeros(Int, N)
             fill!(cluster_counts, 0)
-        
+
             for i in 1:N
                 z[i] ~ ChineseRestaurantProcess(rpm, cluster_counts)
                 cluster_counts[z[i]] += 1
             end
-        
+
             Kmax = findlast(!iszero, cluster_counts)
             m = M(undef, Kmax)
-            for k = 1:Kmax
+            for k in 1:Kmax
                 m[k] ~ Normal(1.0, 1.0)
             end
         end
-        model = imm(randn(100), 1.0);
+        model = imm(randn(100), 1.0)
         # https://github.com/TuringLang/Turing.jl/issues/1725
         # sample(model, Gibbs(MH(:z), HMC(0.01, 4, :m)), 100);
         sample(model, Gibbs(PG(10, :z), HMC(0.01, 4, :m; adtype=adbackend)), 100)
diff --git a/test/mcmc/gibbs_conditional.jl b/test/mcmc/gibbs_conditional.jl
index 2abec68a4a..3ba2fdbedd 100644
--- a/test/mcmc/gibbs_conditional.jl
+++ b/test/mcmc/gibbs_conditional.jl
@@ -2,20 +2,23 @@ module GibbsConditionalTests
 
 using ..Models: gdemo, gdemo_default
 using ..NumericalTests: check_gdemo, check_numerical
-import Clustering
+using Clustering: Clustering
 using Distributions: Categorical, InverseGamma, Normal, sample
-import ForwardDiff
+using ForwardDiff: ForwardDiff
 using LinearAlgebra: Diagonal, I
-import Random
-import ReverseDiff
+using Random: Random
+using ReverseDiff: ReverseDiff
 using StableRNGs: StableRNG
 using StatsBase: counts
-import StatsFuns
+using StatsFuns: StatsFuns
 using Test: @test, @testset
 using Turing
 
-@testset "Testing gibbs conditionals.jl with $adbackend" for adbackend in (AutoForwardDiff(; chunksize=0), AutoReverseDiff(false))
-    Random.seed!(1000); rng = StableRNG(123)
+@testset "Testing gibbs conditionals.jl with $adbackend" for adbackend in (
+    AutoForwardDiff(; chunksize=0), AutoReverseDiff(false)
+)
+    Random.seed!(1000)
+    rng = StableRNG(123)
 
     @testset "gdemo" begin
         # We consider the model
@@ -36,7 +39,7 @@ using Turing
         # ```math
         # m | s, x ~ Normal(m_n, sqrt(s / (N + 1)))
         # ```
-        cond_m = let N=N, m_n=m_n
+        cond_m = let N = N, m_n = m_n
             c -> Normal(m_n, sqrt(c.s / (N + 1)))
         end
 
@@ -45,29 +48,31 @@ using Turing
         # s | m, x ~ InverseGamma(2 + (N + 1) / 2, 3 + (m^2 + ∑ (xᵢ - m)^2) / 2) =
         #            InverseGamma(2 + (N + 1) / 2, 3 + m^2 / 2 + N / 2 * (x_var + (x_mean - m)^2))
         # ```
-        cond_s = let N=N, x_mean=x_mean, x_var=x_var
-            c -> InverseGamma(2 + (N + 1) / 2, 3 + c.m^2 / 2 + N / 2 * (x_var + (x_mean - c.m)^2))
+        cond_s = let N = N, x_mean = x_mean, x_var = x_var
+            c -> InverseGamma(
+                2 + (N + 1) / 2, 3 + c.m^2 / 2 + N / 2 * (x_var + (x_mean - c.m)^2)
+            )
         end
 
         # Three Gibbs samplers:
         # one for each variable fixed to the posterior mean
-        s_posterior_mean = 49/24
+        s_posterior_mean = 49 / 24
         sampler1 = Gibbs(
             GibbsConditional(:m, cond_m),
             GibbsConditional(:s, _ -> Normal(s_posterior_mean, 0)),
         )
         chain = sample(rng, gdemo_default, sampler1, 10_000)
-        cond_m_mean = mean(cond_m((s = s_posterior_mean,)))
+        cond_m_mean = mean(cond_m((s=s_posterior_mean,)))
         check_numerical(chain, [:m, :s], [cond_m_mean, s_posterior_mean])
         @test all(==(s_posterior_mean), chain[:s][2:end])
 
-        m_posterior_mean = 7/6
+        m_posterior_mean = 7 / 6
         sampler2 = Gibbs(
             GibbsConditional(:m, _ -> Normal(m_posterior_mean, 0)),
             GibbsConditional(:s, cond_s),
         )
         chain = sample(rng, gdemo_default, sampler2, 10_000)
-        cond_s_mean = mean(cond_s((m = m_posterior_mean,)))
+        cond_s_mean = mean(cond_s((m=m_posterior_mean,)))
         check_numerical(chain, [:m, :s], [m_posterior_mean, cond_s_mean])
         @test all(==(m_posterior_mean), chain[:m][2:end])
 
@@ -78,7 +83,8 @@ using Turing
     end
 
     @testset "GMM" begin
-        Random.seed!(1000); rng = StableRNG(123)
+        Random.seed!(1000)
+        rng = StableRNG(123)
         # We consider the model
         # ```math
         # μₖ ~ Normal(m, σ_μ), k = 1, …, K,
@@ -87,7 +93,7 @@ using Turing
         # ```
         # with ``K = 2`` clusters, ``N = 20`` observations, and the following parameters:
         K = 2 # number of clusters
-        π = fill(1/K, K) # uniform cluster weights
+        π = fill(1 / K, K) # uniform cluster weights
         m = 0.5 # prior mean of μₖ
         σ²_μ = 4.0 # prior variance of μₖ
         σ²_x = 0.01 # observation variance
@@ -108,7 +114,7 @@ using Turing
 
         # Conditional distribution ``z | μ, x``
         # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
-        cond_z = let x=x_data, log_π=log.(π), σ_x=sqrt(σ²_x)
+        cond_z = let x = x_data, log_π = log.(π), σ_x = sqrt(σ²_x)
             c -> begin
                 dists = map(x) do xi
                     logp = log_π .+ logpdf.(Normal.(c.μ, σ_x), xi)
@@ -120,7 +126,7 @@ using Turing
 
         # Conditional distribution ``μ | z, x``
         # see http://www.cs.columbia.edu/~blei/fogm/2015F/notes/mixtures-and-gibbs.pdf
-        cond_μ = let K=K, x_data=x_data, inv_σ²_μ=inv(σ²_μ), inv_σ²_x=inv(σ²_x)
+        cond_μ = let K = K, x_data = x_data, inv_σ²_μ = inv(σ²_μ), inv_σ²_x = inv(σ²_x)
             c -> begin
                 # Convert cluster assignments to one-hot encodings
                 z_onehot = c.z .== (1:K)'
@@ -136,10 +142,10 @@ using Turing
             end
         end
 
-        estimate(chain, var) = dropdims(mean(Array(group(chain, var)), dims=1), dims=1)
+        estimate(chain, var) = dropdims(mean(Array(group(chain, var)); dims=1); dims=1)
         function estimatez(chain, var, range)
             z = Int.(Array(group(chain, var)))
-            return map(i -> findmax(counts(z[:,i], range))[2], 1:size(z,2))
+            return map(i -> findmax(counts(z[:, i], range))[2], 1:size(z, 2))
         end
 
         lμ_data, uμ_data = extrema(μ_data)
diff --git a/test/mcmc/is.jl b/test/mcmc/is.jl
index b2e26bba7b..bd3186cd93 100644
--- a/test/mcmc/is.jl
+++ b/test/mcmc/is.jl
@@ -2,7 +2,7 @@ module ISTests
 
 using Distributions: Normal, sample
 using DynamicPPL: logpdf
-import Random
+using Random: Random
 using StatsFuns: logsumexp
 using Test: @test, @testset
 using Turing
@@ -18,22 +18,22 @@ using Turing
         end
         logevidence = logsumexp(logps) - log(n)
 
-        return (as = as, bs = bs, logps = logps, logevidence = logevidence)
+        return (as=as, bs=bs, logps=logps, logevidence=logevidence)
     end
 
     function reference()
-        x = rand(Normal(4,5))
-        y = rand(Normal(x,1))
-        loglik = logpdf(Normal(x,2), 3) + logpdf(Normal(y,2), 1.5)
+        x = rand(Normal(4, 5))
+        y = rand(Normal(x, 1))
+        loglik = logpdf(Normal(x, 2), 3) + logpdf(Normal(y, 2), 1.5)
         return x, y, loglik
     end
 
     @model function normal()
-        a ~ Normal(4,5)
-        3 ~ Normal(a,2)
-        b ~ Normal(a,1)
-        1.5 ~ Normal(b,2)
-        a, b
+        a ~ Normal(4, 5)
+        3 ~ Normal(a, 2)
+        b ~ Normal(a, 1)
+        1.5 ~ Normal(b, 2)
+        return a, b
     end
 
     alg = IS()
@@ -65,13 +65,13 @@ using Turing
             1 ~ Bernoulli(x / 2)
             c ~ Beta()
             0 ~ Bernoulli(x / 2)
-            x
+            return x
         end
 
         chains = sample(test(), IS(), 10000)
 
         @test all(isone, chains[:x])
-        @test chains.logevidence ≈ - 2 * log(2)
+        @test chains.logevidence ≈ -2 * log(2)
     end
 end
 
diff --git a/test/mcmc/mh.jl b/test/mcmc/mh.jl
index 3b15d80694..0e3cc91f6f 100644
--- a/test/mcmc/mh.jl
+++ b/test/mcmc/mh.jl
@@ -1,12 +1,12 @@
 module MHTests
 
-import AdvancedMH
-using Distributions: Bernoulli, Dirichlet, Exponential, InverseGamma, LogNormal, MvNormal,
-    Normal, sample
-import DynamicPPL
+using AdvancedMH: AdvancedMH
+using Distributions:
+    Bernoulli, Dirichlet, Exponential, InverseGamma, LogNormal, MvNormal, Normal, sample
+using DynamicPPL: DynamicPPL
 using DynamicPPL: Sampler
 using LinearAlgebra: I
-import Random
+using Random: Random
 using StableRNGs: StableRNG
 using Test: @test, @testset
 using Turing
@@ -21,9 +21,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var))
     @testset "mh constructor" begin
         Random.seed!(10)
         N = 500
-        s1 = MH(
-            (:s, InverseGamma(2,3)),
-            (:m, GKernel(3.0)))
+        s1 = MH((:s, InverseGamma(2, 3)), (:m, GKernel(3.0)))
         s2 = MH(:s, :m)
         s3 = MH()
         for s in (s1, s2, s3)
@@ -49,30 +47,27 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var))
         Random.seed!(125)
         alg = MH()
         chain = sample(gdemo_default, alg, 10_000)
-        check_gdemo(chain, atol = 0.1)
+        check_gdemo(chain; atol=0.1)
 
         Random.seed!(125)
         # MH with Gaussian proposal
-        alg = MH(
-            (:s, InverseGamma(2,3)),
-            (:m, GKernel(1.0)))
+        alg = MH((:s, InverseGamma(2, 3)), (:m, GKernel(1.0)))
         chain = sample(gdemo_default, alg, 10_000)
-        check_gdemo(chain, atol = 0.1)
+        check_gdemo(chain; atol=0.1)
 
         Random.seed!(125)
         # MH within Gibbs
         alg = Gibbs(MH(:m), MH(:s))
         chain = sample(gdemo_default, alg, 10_000)
-        check_gdemo(chain, atol = 0.1)
+        check_gdemo(chain; atol=0.1)
 
         Random.seed!(125)
         # MoGtest
         gibbs = Gibbs(
-            CSMC(15, :z1, :z2, :z3, :z4),
-            MH((:mu1,GKernel(1)), (:mu2,GKernel(1)))
+            CSMC(15, :z1, :z2, :z3, :z4), MH((:mu1, GKernel(1)), (:mu2, GKernel(1)))
         )
         chain = sample(MoGtest_default, gibbs, 500)
-        check_MoGtest_default(chain, atol = 0.15)
+        check_MoGtest_default(chain; atol=0.15)
     end
 
     # Test MH shape passing.
@@ -91,7 +86,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var))
             -1.5 ~ Normal(m[1], m[2])
 
             1.5 ~ Normal(m[1], s)
-            2.0 ~ Normal(m[1], s)
+            return 2.0 ~ Normal(m[1], s)
         end
 
         model = M(zeros(2), I, 1)
@@ -100,7 +95,8 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var))
         dt, vt = Inference.dist_val_tuple(sampler, Turing.VarInfo(model))
 
         @test dt[:z] isa AdvancedMH.StaticProposal{false,<:MvNormal}
-        @test dt[:m] isa AdvancedMH.StaticProposal{false,Vector{ContinuousUnivariateDistribution}}
+        @test dt[:m] isa
+            AdvancedMH.StaticProposal{false,Vector{ContinuousUnivariateDistribution}}
         @test dt[:m].proposal[1] isa Normal && dt[:m].proposal[2] isa InverseGamma
         @test dt[:s] isa AdvancedMH.StaticProposal{false,<:InverseGamma}
 
@@ -115,7 +111,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var))
 
     @testset "proposal matrix" begin
         Random.seed!(100)
-        
+
         mat = [1.0 -0.05; -0.05 1.0]
 
         prop1 = mat # Matrix only constructor
@@ -142,42 +138,37 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var))
         # generate data
         x = rand(Normal(5, 10), 20)
         y = rand(LogNormal(-3, 2), 20)
-        
+
         # Turing model
         @model function twomeans(x, y)
             # Set Priors
             μ ~ MvNormal(zeros(2), 9 * I)
             σ ~ filldist(Exponential(1), 2)
-        
+
             # Distributions of supplied data
             x .~ Normal(μ[1], σ[1])
-            y .~ LogNormal(μ[2], σ[2])
-        
+            return y .~ LogNormal(μ[2], σ[2])
         end
         mod = twomeans(x, y)
-        
+
         # generate covariance matrix for RWMH
         # with small-valued VC matrix to check if we only see very small steps
-        vc_μ = convert(Array, 1e-4*I(2))
-        vc_σ = convert(Array, 1e-4*I(2))
+        vc_μ = convert(Array, 1e-4 * I(2))
+        vc_σ = convert(Array, 1e-4 * I(2))
 
-        alg = Gibbs(
-            MH((:μ, vc_μ)),
-            MH((:σ, vc_σ)),
-        )
+        alg = Gibbs(MH((:μ, vc_μ)), MH((:σ, vc_σ)))
 
         chn = sample(
             mod,
             alg,
-            3_000 # draws
+            3_000, # draws
         )
-        
-            
+
         chn2 = sample(mod, MH(), 3_000)
 
         # Test that the small variance version is actually smaller.
-        v1 = var(diff(Array(chn["μ[1]"]), dims=1))
-        v2 = var(diff(Array(chn2["μ[1]"]), dims=1))
+        v1 = var(diff(Array(chn["μ[1]"]); dims=1))
+        v2 = var(diff(Array(chn2["μ[1]"]); dims=1))
 
         # FIXME: Do this properly. It sometimes fails.
         # @test v1 < v2
@@ -241,7 +232,7 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var))
         vi = deepcopy(vi_base)
         alg = MH(
             :m => AdvancedMH.StaticProposal(Normal()),
-            :s => AdvancedMH.RandomWalkProposal(Normal())
+            :s => AdvancedMH.RandomWalkProposal(Normal()),
         )
         spl = DynamicPPL.Sampler(alg)
         vi = Turing.Inference.maybe_link!!(vi, spl, alg.proposals, gdemo_default)
@@ -253,21 +244,27 @@ GKernel(var) = (x) -> Normal(x, sqrt.(var))
         # find a non-trivial `atol` where the tests will pass for all seeds. Hence we fix it :/
         rng = StableRNG(10)
         alg = MH()
-        gdemo_default_prior = DynamicPPL.contextualize(gdemo_default, DynamicPPL.PriorContext())
+        gdemo_default_prior = DynamicPPL.contextualize(
+            gdemo_default, DynamicPPL.PriorContext()
+        )
         burnin = 10_000
         n = 10_000
-        chain = sample(rng, gdemo_default_prior, alg, n; discard_initial = burnin, thinning=10)
-        check_numerical(chain, [:s, :m], [mean(InverseGamma(2, 3)), 0], atol=0.3)
+        chain = sample(
+            rng, gdemo_default_prior, alg, n; discard_initial=burnin, thinning=10
+        )
+        check_numerical(chain, [:s, :m], [mean(InverseGamma(2, 3)), 0]; atol=0.3)
     end
 
     @testset "`filldist` proposal (issue #2180)" begin
         @model demo_filldist_issue2180() = x ~ MvNormal(zeros(3), I)
         chain = sample(
-           demo_filldist_issue2180(),
-           MH(AdvancedMH.RandomWalkProposal(filldist(Normal(), 3))),
-           10_000
+            demo_filldist_issue2180(),
+            MH(AdvancedMH.RandomWalkProposal(filldist(Normal(), 3))),
+            10_000,
+        )
+        check_numerical(
+            chain, [Symbol("x[1]"), Symbol("x[2]"), Symbol("x[3]")], [0, 0, 0]; atol=0.2
         )
-        check_numerical(chain, [Symbol("x[1]"), Symbol("x[2]"), Symbol("x[3]")], [0, 0, 0], atol=0.2)
     end
 end
 
diff --git a/test/mcmc/particle_mcmc.jl b/test/mcmc/particle_mcmc.jl
index 01510f2a89..2e3744fefa 100644
--- a/test/mcmc/particle_mcmc.jl
+++ b/test/mcmc/particle_mcmc.jl
@@ -5,7 +5,7 @@ using ..Models: gdemo_default
 using AdvancedPS: ResampleWithESSThreshold, resample_systematic, resample_multinomial
 using Distributions: Bernoulli, Beta, Gamma, Normal, sample
 using DynamicPPL: getspace
-import Random
+using Random: Random
 using Test: @test, @test_throws, @testset
 using Turing
 
@@ -58,24 +58,24 @@ using Turing
 
     @testset "models" begin
         @model function normal()
-            a ~ Normal(4,5)
-            3 ~ Normal(a,2)
-            b ~ Normal(a,1)
-            1.5 ~ Normal(b,2)
-            a, b
+            a ~ Normal(4, 5)
+            3 ~ Normal(a, 2)
+            b ~ Normal(a, 1)
+            1.5 ~ Normal(b, 2)
+            return a, b
         end
 
-        tested = sample(normal(), SMC(), 100);
+        tested = sample(normal(), SMC(), 100)
 
         # failing test
         @model function fail_smc()
-            a ~ Normal(4,5)
-            3 ~ Normal(a,2)
-            b ~ Normal(a,1)
+            a ~ Normal(4, 5)
+            3 ~ Normal(a, 2)
+            b ~ Normal(a, 1)
             if a >= 4.0
-                1.5 ~ Normal(b,2)
+                1.5 ~ Normal(b, 2)
             end
-            a, b
+            return a, b
         end
 
         @test_throws ErrorException sample(fail_smc(), SMC(), 100)
@@ -91,7 +91,7 @@ using Turing
             1 ~ Bernoulli(x / 2)
             c ~ Beta()
             0 ~ Bernoulli(x / 2)
-            x
+            return x
         end
 
         chains_smc = sample(test(), SMC(), 100)
@@ -169,7 +169,7 @@ end
             1 ~ Bernoulli(x / 2)
             c ~ Beta()
             0 ~ Bernoulli(x / 2)
-            x
+            return x
         end
 
         chains_pg = sample(test(), PG(10), 100)
@@ -187,7 +187,7 @@ end
 
     # https://github.com/TuringLang/Turing.jl/issues/2007
     @testset "keyword arguments not supported" begin
-        @model kwarg_demo(; x = 2) = return x
+        @model kwarg_demo(; x=2) = return x
         @test_throws ErrorException sample(kwarg_demo(), PG(1), 10)
     end
 end
diff --git a/test/mcmc/utilities.jl b/test/mcmc/utilities.jl
index 2b5e04b2da..06e92fbf28 100644
--- a/test/mcmc/utilities.jl
+++ b/test/mcmc/utilities.jl
@@ -3,7 +3,7 @@ module MCMCUtilitiesTests
 using ..Models: gdemo_default
 using Distributions: Normal, sample, truncated
 using LinearAlgebra: I, vec
-import Random
+using Random: Random
 using Random: MersenneTwister
 using Test: @test, @testset
 using Turing
@@ -11,34 +11,36 @@ using Turing
 @testset "predict" begin
     Random.seed!(100)
 
-    @model function linear_reg(x, y, σ = 0.1)
+    @model function linear_reg(x, y, σ=0.1)
         β ~ Normal(0, 1)
 
-        for i ∈ eachindex(y)
+        for i in eachindex(y)
             y[i] ~ Normal(β * x[i], σ)
         end
     end
 
-    @model function linear_reg_vec(x, y, σ = 0.1)
+    @model function linear_reg_vec(x, y, σ=0.1)
         β ~ Normal(0, 1)
-        y ~ MvNormal(β .* x, σ^2 * I)
+        return y ~ MvNormal(β .* x, σ^2 * I)
     end
 
     f(x) = 2 * x + 0.1 * randn()
 
     Δ = 0.1
-    xs_train = 0:Δ:10; ys_train = f.(xs_train);
-    xs_test = [10 + Δ, 10 + 2 * Δ]; ys_test = f.(xs_test);
+    xs_train = 0:Δ:10
+    ys_train = f.(xs_train)
+    xs_test = [10 + Δ, 10 + 2 * Δ]
+    ys_test = f.(xs_test)
 
     # Infer
-    m_lin_reg = linear_reg(xs_train, ys_train);
-    chain_lin_reg = sample(m_lin_reg, NUTS(100, 0.65), 200);
+    m_lin_reg = linear_reg(xs_train, ys_train)
+    chain_lin_reg = sample(m_lin_reg, NUTS(100, 0.65), 200)
 
     # Predict on two last indices
-    m_lin_reg_test = linear_reg(xs_test, fill(missing, length(ys_test)));    
+    m_lin_reg_test = linear_reg(xs_test, fill(missing, length(ys_test)))
     predictions = Turing.Inference.predict(m_lin_reg_test, chain_lin_reg)
 
-    ys_pred = vec(mean(Array(group(predictions, :y)); dims = 1))
+    ys_pred = vec(mean(Array(group(predictions, :y)); dims=1))
 
     @test sum(abs2, ys_test - ys_pred) ≤ 0.1
 
@@ -52,91 +54,88 @@ using Turing
     @test all(Array(predictions1) .== Array(predictions2))
 
     # Predict on two last indices for vectorized
-    m_lin_reg_test = linear_reg_vec(xs_test, missing);
+    m_lin_reg_test = linear_reg_vec(xs_test, missing)
     predictions_vec = Turing.Inference.predict(m_lin_reg_test, chain_lin_reg)
-    ys_pred_vec = vec(mean(Array(group(predictions_vec, :y)); dims = 1))
+    ys_pred_vec = vec(mean(Array(group(predictions_vec, :y)); dims=1))
 
     @test sum(abs2, ys_test - ys_pred_vec) ≤ 0.1
 
     # Multiple chains
-    chain_lin_reg = sample(m_lin_reg, NUTS(100, 0.65), MCMCThreads(), 200, 2);
-    m_lin_reg_test = linear_reg(xs_test, fill(missing, length(ys_test)));
+    chain_lin_reg = sample(m_lin_reg, NUTS(100, 0.65), MCMCThreads(), 200, 2)
+    m_lin_reg_test = linear_reg(xs_test, fill(missing, length(ys_test)))
     predictions = Turing.Inference.predict(m_lin_reg_test, chain_lin_reg)
 
     @test size(chain_lin_reg, 3) == size(predictions, 3)
 
     for chain_idx in MCMCChains.chains(chain_lin_reg)
-        ys_pred = vec(mean(Array(group(predictions[:, :, chain_idx], :y)); dims = 1))
+        ys_pred = vec(mean(Array(group(predictions[:, :, chain_idx], :y)); dims=1))
         @test sum(abs2, ys_test - ys_pred) ≤ 0.1
     end
 
     # Predict on two last indices for vectorized
-    m_lin_reg_test = linear_reg_vec(xs_test, missing);
+    m_lin_reg_test = linear_reg_vec(xs_test, missing)
     predictions_vec = Turing.Inference.predict(m_lin_reg_test, chain_lin_reg)
 
     for chain_idx in MCMCChains.chains(chain_lin_reg)
-        ys_pred_vec = vec(mean(
-            Array(group(predictions_vec[:, :, chain_idx], :y));
-            dims = 1
-        ))
+        ys_pred_vec = vec(mean(Array(group(predictions_vec[:, :, chain_idx], :y)); dims=1))
         @test sum(abs2, ys_test - ys_pred_vec) ≤ 0.1
     end
 
     # https://github.com/TuringLang/Turing.jl/issues/1352
     @model function simple_linear1(x, y)
-        intercept ~ Normal(0,1)
+        intercept ~ Normal(0, 1)
         coef ~ MvNormal(zeros(2), I)
-        coef = reshape(coef, 1, size(x,1))
+        coef = reshape(coef, 1, size(x, 1))
 
-        mu = intercept .+ coef * x |> vec
-        error ~ truncated(Normal(0,1), 0, Inf)
-        y ~ MvNormal(mu, error^2 * I)
-    end;
+        mu = vec(intercept .+ coef * x)
+        error ~ truncated(Normal(0, 1), 0, Inf)
+        return y ~ MvNormal(mu, error^2 * I)
+    end
 
     @model function simple_linear2(x, y)
-        intercept ~ Normal(0,1)
-        coef ~ filldist(Normal(0,1), 2)
-        coef = reshape(coef, 1, size(x,1))
+        intercept ~ Normal(0, 1)
+        coef ~ filldist(Normal(0, 1), 2)
+        coef = reshape(coef, 1, size(x, 1))
 
-        mu = intercept .+ coef * x |> vec
-        error ~ truncated(Normal(0,1), 0, Inf)
-        y ~ MvNormal(mu, error^2 * I)
-    end;
+        mu = vec(intercept .+ coef * x)
+        error ~ truncated(Normal(0, 1), 0, Inf)
+        return y ~ MvNormal(mu, error^2 * I)
+    end
 
     @model function simple_linear3(x, y)
-        intercept ~ Normal(0,1)
+        intercept ~ Normal(0, 1)
         coef = Vector(undef, 2)
         for i in axes(coef, 1)
-            coef[i] ~ Normal(0,1)
+            coef[i] ~ Normal(0, 1)
         end
-        coef = reshape(coef, 1, size(x,1))
+        coef = reshape(coef, 1, size(x, 1))
 
-        mu = intercept .+ coef * x |> vec
-        error ~ truncated(Normal(0,1), 0, Inf)
-        y ~ MvNormal(mu, error^2 * I)
-    end;
+        mu = vec(intercept .+ coef * x)
+        error ~ truncated(Normal(0, 1), 0, Inf)
+        return y ~ MvNormal(mu, error^2 * I)
+    end
 
     @model function simple_linear4(x, y)
-        intercept ~ Normal(0,1)
-        coef1 ~ Normal(0,1)
-        coef2 ~ Normal(0,1)
+        intercept ~ Normal(0, 1)
+        coef1 ~ Normal(0, 1)
+        coef2 ~ Normal(0, 1)
         coef = [coef1, coef2]
-        coef = reshape(coef, 1, size(x,1))
+        coef = reshape(coef, 1, size(x, 1))
 
-        mu = intercept .+ coef * x |> vec
-        error ~ truncated(Normal(0,1), 0, Inf)
-        y ~ MvNormal(mu, error^2 * I)
-    end;
+        mu = vec(intercept .+ coef * x)
+        error ~ truncated(Normal(0, 1), 0, Inf)
+        return y ~ MvNormal(mu, error^2 * I)
+    end
 
     # Some data
-    x = randn(2, 100);
-    y = [1 + 2 * a + 3 * b for (a,b) in eachcol(x)];
+    x = randn(2, 100)
+    y = [1 + 2 * a + 3 * b for (a, b) in eachcol(x)]
 
     for model in [simple_linear1, simple_linear2, simple_linear3, simple_linear4]
-        m = model(x, y);
-        chain = sample(m, NUTS(), 100);
-        chain_predict = predict(model(x, missing), chain);
-        mean_prediction = [chain_predict["y[$i]"].data |> mean for i = 1:length(y)]
+        m = model(x, y)
+        chain = sample(m, NUTS(), 100)
+        chain_predict = predict(model(x, missing), chain)
+        mean_prediction = [mean(chain_predict["y[$i]"].data) for i in 1:length(y)]
         @test mean(abs2, mean_prediction - y) ≤ 1e-3
     end
 end
diff --git a/test/optimisation/Optimisation.jl b/test/optimisation/Optimisation.jl
index 6c4289f787..5e6144e578 100644
--- a/test/optimisation/Optimisation.jl
+++ b/test/optimisation/Optimisation.jl
@@ -3,15 +3,15 @@ module OptimisationTests
 using ..Models: gdemo, gdemo_default
 using Distributions
 using Distributions.FillArrays: Zeros
-import DynamicPPL
+using DynamicPPL: DynamicPPL
 using LinearAlgebra: I
-import Random
+using Random: Random
 using Optimization
 using Optimization: Optimization
 using OptimizationBBO: OptimizationBBO
 using OptimizationNLopt: OptimizationNLopt
 using OptimizationOptimJL: OptimizationOptimJL
-import StatsBase
+using StatsBase: StatsBase
 using StatsBase: coef, coefnames, coeftable, informationmatrix, stderror, vcov
 using Test: @test, @testset, @test_throws
 using Turing
@@ -32,11 +32,8 @@ using Turing
         end
         DynamicPPL.NodeTrait(::OverrideContext) = DynamicPPL.IsParent()
         DynamicPPL.childcontext(parent::OverrideContext) = parent.context
-        DynamicPPL.setchildcontext(parent::OverrideContext, child) = OverrideContext(
-            child,
-            parent.logprior_weight,
-            parent.loglikelihood_weight
-        )
+        DynamicPPL.setchildcontext(parent::OverrideContext, child) =
+            OverrideContext(child, parent.logprior_weight, parent.loglikelihood_weight)
 
         # Only implement what we need for the models above.
         function DynamicPPL.tilde_assume(context::OverrideContext, right, vn, vi)
@@ -50,12 +47,12 @@ using Turing
 
         @model function model1(x)
             μ ~ Uniform(0, 2)
-            x ~ LogNormal(μ, 1)
+            return x ~ LogNormal(μ, 1)
         end
 
         @model function model2()
             μ ~ Uniform(0, 2)
-            x ~ LogNormal(μ, 1)
+            return x ~ LogNormal(μ, 1)
         end
 
         x = 1.0
@@ -66,7 +63,7 @@ using Turing
             m2 = model2() | (x=x,)
             ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext())
             @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) ==
-                  Turing.Optimisation.OptimLogDensity(m2, ctx)(w)
+                Turing.Optimisation.OptimLogDensity(m2, ctx)(w)
         end
 
         @testset "With prefixes" begin
@@ -79,21 +76,20 @@ using Turing
             m2 = prefix_μ(model2() | (var"inner.x"=x,))
             ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.LikelihoodContext())
             @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) ==
-                  Turing.Optimisation.OptimLogDensity(m2, ctx)(w)
+                Turing.Optimisation.OptimLogDensity(m2, ctx)(w)
         end
 
         @testset "Weighted" begin
             function override(model)
                 return DynamicPPL.contextualize(
-                    model,
-                    OverrideContext(model.context, 100, 1)
+                    model, OverrideContext(model.context, 100, 1)
                 )
             end
             m1 = override(model1(x))
             m2 = override(model2() | (x=x,))
             ctx = Turing.Optimisation.OptimizationContext(DynamicPPL.DefaultContext())
             @test Turing.Optimisation.OptimLogDensity(m1, ctx)(w) ==
-                  Turing.Optimisation.OptimLogDensity(m2, ctx)(w)
+                Turing.Optimisation.OptimLogDensity(m2, ctx)(w)
         end
 
         @testset "Default, Likelihood, Prior Contexts" begin
@@ -105,16 +101,17 @@ using Turing
 
             @test Turing.Optimisation.OptimLogDensity(m1, defctx)(a) ==
                 Turing.Optimisation.OptimLogDensity(m1, llhctx)(a) +
-                Turing.Optimisation.OptimLogDensity(m1, prictx)(a)
+                  Turing.Optimisation.OptimLogDensity(m1, prictx)(a)
 
             # test that PriorContext is calculating the right thing
-            @test Turing.Optimisation.OptimLogDensity(m1, prictx)([0.3]) ≈ -Distributions.logpdf(Uniform(0, 2), 0.3)
-            @test Turing.Optimisation.OptimLogDensity(m1, prictx)([-0.3]) ≈ -Distributions.logpdf(Uniform(0, 2), -0.3)
+            @test Turing.Optimisation.OptimLogDensity(m1, prictx)([0.3]) ≈
+                -Distributions.logpdf(Uniform(0, 2), 0.3)
+            @test Turing.Optimisation.OptimLogDensity(m1, prictx)([-0.3]) ≈
+                -Distributions.logpdf(Uniform(0, 2), -0.3)
         end
     end
 
     @testset "gdemo" begin
-
         """
             check_success(result, true_value, true_logp, check_retcode=true)
 
@@ -137,15 +134,11 @@ using Turing
             true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2]))
             check_success(result) = check_optimisation_result(result, true_value, true_logp)
 
-            m1 = Turing.Optimisation.estimate_mode(
-                gdemo_default, MLE()
-            )
+            m1 = Turing.Optimisation.estimate_mode(gdemo_default, MLE())
             m2 = maximum_likelihood(
                 gdemo_default, OptimizationOptimJL.LBFGS(); initial_params=true_value
             )
-            m3 = maximum_likelihood(
-                gdemo_default, OptimizationOptimJL.Newton()
-            )
+            m3 = maximum_likelihood(gdemo_default, OptimizationOptimJL.Newton())
             # TODO(mhauru) How can we check that the adtype is actually AutoReverseDiff?
             m4 = maximum_likelihood(
                 gdemo_default, OptimizationOptimJL.BFGS(); adtype=AutoReverseDiff()
@@ -153,9 +146,7 @@ using Turing
             m5 = maximum_likelihood(
                 gdemo_default, OptimizationOptimJL.NelderMead(); initial_params=true_value
             )
-            m6 = maximum_likelihood(
-                gdemo_default, OptimizationOptimJL.NelderMead()
-            )
+            m6 = maximum_likelihood(gdemo_default, OptimizationOptimJL.NelderMead())
 
             check_success(m1)
             check_success(m2)
@@ -182,15 +173,11 @@ using Turing
             true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2]))
             check_success(result) = check_optimisation_result(result, true_value, true_logp)
 
-            m1 = Turing.Optimisation.estimate_mode(
-                gdemo_default, MAP()
-            )
+            m1 = Turing.Optimisation.estimate_mode(gdemo_default, MAP())
             m2 = maximum_a_posteriori(
                 gdemo_default, OptimizationOptimJL.LBFGS(); initial_params=true_value
             )
-            m3 = maximum_a_posteriori(
-                gdemo_default, OptimizationOptimJL.Newton()
-            )
+            m3 = maximum_a_posteriori(gdemo_default, OptimizationOptimJL.Newton())
             m4 = maximum_a_posteriori(
                 gdemo_default, OptimizationOptimJL.BFGS(); adtype=AutoReverseDiff()
             )
@@ -222,34 +209,41 @@ using Turing
             Random.seed!(222)
             true_value = [0.0625, 1.75]
             true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2]))
-            check_success(result, check_retcode=true) = check_optimisation_result(
-                result, true_value, true_logp, check_retcode
-            )
+            check_success(result, check_retcode=true) =
+                check_optimisation_result(result, true_value, true_logp, check_retcode)
 
             lb = [0.0, 0.0]
             ub = [2.0, 2.0]
 
-            m1 = Turing.Optimisation.estimate_mode(
-                gdemo_default, MLE(); lb=lb, ub=ub
-            )
+            m1 = Turing.Optimisation.estimate_mode(gdemo_default, MLE(); lb=lb, ub=ub)
             m2 = maximum_likelihood(
                 gdemo_default,
                 OptimizationOptimJL.Fminbox(OptimizationOptimJL.LBFGS());
-                initial_params=true_value, lb=lb, ub=ub)
+                initial_params=true_value,
+                lb=lb,
+                ub=ub,
+            )
             m3 = maximum_likelihood(
                 gdemo_default,
                 OptimizationBBO.BBO_separable_nes();
-                maxiters=100_000, abstol=1e-5, lb=lb, ub=ub
+                maxiters=100_000,
+                abstol=1e-5,
+                lb=lb,
+                ub=ub,
             )
             m4 = maximum_likelihood(
                 gdemo_default,
                 OptimizationOptimJL.Fminbox(OptimizationOptimJL.BFGS());
-                adtype=AutoReverseDiff(), lb=lb, ub=ub
+                adtype=AutoReverseDiff(),
+                lb=lb,
+                ub=ub,
             )
             m5 = maximum_likelihood(
                 gdemo_default,
                 OptimizationOptimJL.IPNewton();
-                initial_params=true_value, lb=lb, ub=ub
+                initial_params=true_value,
+                lb=lb,
+                ub=ub,
             )
             m6 = maximum_likelihood(gdemo_default; lb=lb, ub=ub)
 
@@ -275,35 +269,41 @@ using Turing
             Random.seed!(222)
             true_value = [49 / 54, 7 / 6]
             true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2]))
-            check_success(result, check_retcode=true) = check_optimisation_result(
-                result, true_value, true_logp, check_retcode
-            )
+            check_success(result, check_retcode=true) =
+                check_optimisation_result(result, true_value, true_logp, check_retcode)
 
             lb = [0.0, 0.0]
             ub = [2.0, 2.0]
 
-            m1 = Turing.Optimisation.estimate_mode(
-                gdemo_default, MAP(); lb=lb, ub=ub
-            )
+            m1 = Turing.Optimisation.estimate_mode(gdemo_default, MAP(); lb=lb, ub=ub)
             m2 = maximum_a_posteriori(
                 gdemo_default,
                 OptimizationOptimJL.Fminbox(OptimizationOptimJL.LBFGS());
-                initial_params=true_value, lb=lb, ub=ub
+                initial_params=true_value,
+                lb=lb,
+                ub=ub,
             )
             m3 = maximum_a_posteriori(
                 gdemo_default,
                 OptimizationBBO.BBO_separable_nes();
-                maxiters=100_000, abstol=1e-5, lb=lb, ub=ub
+                maxiters=100_000,
+                abstol=1e-5,
+                lb=lb,
+                ub=ub,
             )
             m4 = maximum_a_posteriori(
                 gdemo_default,
                 OptimizationOptimJL.Fminbox(OptimizationOptimJL.BFGS());
-                adtype=AutoReverseDiff(), lb=lb, ub=ub
+                adtype=AutoReverseDiff(),
+                lb=lb,
+                ub=ub,
             )
             m5 = maximum_a_posteriori(
                 gdemo_default,
                 OptimizationOptimJL.IPNewton();
-                initial_params=true_value, lb=lb, ub=ub
+                initial_params=true_value,
+                lb=lb,
+                ub=ub,
             )
             m6 = maximum_a_posteriori(gdemo_default; lb=lb, ub=ub)
 
@@ -330,9 +330,8 @@ using Turing
             Random.seed!(222)
             true_value = [0.0625, 1.75]
             true_logp = loglikelihood(gdemo_default, (s=true_value[1], m=true_value[2]))
-            check_success(result, check_retcode=true) = check_optimisation_result(
-                result, true_value, true_logp, check_retcode
-            )
+            check_success(result, check_retcode=true) =
+                check_optimisation_result(result, true_value, true_logp, check_retcode)
 
             # Set two constraints: The first parameter must be non-negative, and the L2 norm
             # of the parameters must be between 0.5 and 2.
@@ -347,16 +346,20 @@ using Turing
             )
             m2 = maximum_likelihood(gdemo_default; initial_params=true_value, cons_args...)
             m3 = maximum_likelihood(
-                gdemo_default, OptimizationOptimJL.IPNewton();
-                initial_params=initial_params, cons_args...
+                gdemo_default,
+                OptimizationOptimJL.IPNewton();
+                initial_params=initial_params,
+                cons_args...,
             )
             m4 = maximum_likelihood(
-                gdemo_default, OptimizationOptimJL.IPNewton();
-                initial_params=initial_params, adtype=AutoReverseDiff(), cons_args...
+                gdemo_default,
+                OptimizationOptimJL.IPNewton();
+                initial_params=initial_params,
+                adtype=AutoReverseDiff(),
+                cons_args...,
             )
             m5 = maximum_likelihood(
-                gdemo_default;
-                initial_params=initial_params, cons_args...
+                gdemo_default; initial_params=initial_params, cons_args...
             )
 
             check_success(m1)
@@ -380,9 +383,8 @@ using Turing
             Random.seed!(222)
             true_value = [49 / 54, 7 / 6]
             true_logp = logjoint(gdemo_default, (s=true_value[1], m=true_value[2]))
-            check_success(result, check_retcode=true) = check_optimisation_result(
-                result, true_value, true_logp, check_retcode
-            )
+            check_success(result, check_retcode=true) =
+                check_optimisation_result(result, true_value, true_logp, check_retcode)
 
             # Set two constraints: The first parameter must be non-negative, and the L2 norm
             # of the parameters must be between 0.5 and 2.
@@ -399,16 +401,20 @@ using Turing
                 gdemo_default; initial_params=true_value, cons_args...
             )
             m3 = maximum_a_posteriori(
-                gdemo_default, OptimizationOptimJL.IPNewton();
-                initial_params=initial_params, cons_args...
+                gdemo_default,
+                OptimizationOptimJL.IPNewton();
+                initial_params=initial_params,
+                cons_args...,
             )
             m4 = maximum_a_posteriori(
-                gdemo_default, OptimizationOptimJL.IPNewton();
-                initial_params=initial_params, adtype=AutoReverseDiff(), cons_args...
+                gdemo_default,
+                OptimizationOptimJL.IPNewton();
+                initial_params=initial_params,
+                adtype=AutoReverseDiff(),
+                cons_args...,
             )
             m5 = maximum_a_posteriori(
-                gdemo_default;
-                initial_params=initial_params, cons_args...
+                gdemo_default; initial_params=initial_params, cons_args...
             )
 
             check_success(m1)
@@ -440,10 +446,10 @@ using Turing
         diffs = coef(mle_est).array - [0.0625031; 1.75001]
         @test all(isapprox.(diffs, 0.0, atol=0.1))
 
-        infomat = [2/(2*true_values[1]^2) 0.0; 0.0 2/true_values[1]]
+        infomat = [2/(2 * true_values[1]^2) 0.0; 0.0 2/true_values[1]]
         @test all(isapprox.(infomat - informationmatrix(mle_est), 0.0, atol=0.01))
 
-        vcovmat = [2*true_values[1]^2/2 0.0; 0.0 true_values[1]/2]
+        vcovmat = [2 * true_values[1]^2/2 0.0; 0.0 true_values[1]/2]
         @test all(isapprox.(vcovmat - vcov(mle_est), 0.0, atol=0.01))
 
         ctable = coeftable(mle_est)
@@ -462,7 +468,7 @@ using Turing
         @model function regtest(x, y)
             beta ~ MvNormal(Zeros(2), I)
             mu = x * beta
-            y ~ MvNormal(mu, I)
+            return y ~ MvNormal(mu, I)
         end
 
         Random.seed!(987)
@@ -485,7 +491,7 @@ using Turing
             s ~ InverseGamma(2, 3)
             m ~ Normal(0, sqrt(s))
 
-            (.~)(x, Normal(m, sqrt(s)))
+            return (.~)(x, Normal(m, sqrt(s)))
         end
 
         model_dot = dot_gdemo([1.5, 2.0])
@@ -578,7 +584,7 @@ using Turing
     @testset "with :=" begin
         @model function demo_track()
             x ~ Normal()
-            y := 100 + x
+            return y := 100 + x
         end
         model = demo_track()
         result = maximum_a_posteriori(model)
diff --git a/test/skipped/advi_demo.jl b/test/skipped/advi_demo.jl
index d10e140d0f..b6a71c4dc8 100644
--- a/test/skipped/advi_demo.jl
+++ b/test/skipped/advi_demo.jl
@@ -20,7 +20,7 @@ using ConjugatePriors
     s ~ InverseGamma(2, 3)
     m ~ Normal(0.0, sqrt(s))  # `Normal(μ, σ)` has mean μ and variance σ², i.e. parametrize with std. not variance
 
-    for i = 1:length(x)
+    for i in 1:length(x)
         x[i] ~ Normal(m, sqrt(s))
     end
 end
@@ -28,28 +28,28 @@ end
 const seeds = [125, 245, 3]
 const ad_modes = [:forwarddiff, :reversediff, :tracker]
 
-for seed ∈ seeds
+for seed in seeds
     @info seed
-    
-    for ad_mode ∈ ad_modes
+
+    for ad_mode in ad_modes
         @info ad_mode
         setadbackend(ad_mode)
-        
+
         # set random seed
         Random.seed!(seed)
 
         # generate data
-        x = randn(1, 2000);
+        x = randn(1, 2000)
 
         # construct model
         m = model(x)
-        
+
         # ADVI
         opt = Variational.TruncatedADAGrad()   # optimizer
 
         advi = ADVI(10, 100)                   # <: VariationalInference
         q = Variational.meanfield(m)           # => <: VariationalPosterior
-        
+
         elbo = Variational.ELBO()              # <: VariationalObjective
 
         μ, σs = params(q)
@@ -58,16 +58,16 @@ for seed ∈ seeds
         history = [elbo(advi, q, m, 1000)]
 
         # construct animation
-        anim = @animate for j = 1:25
+        anim = @animate for j in 1:25
             # global q
-            Variational.optimize!(elbo, advi, q, m, θ; optimizer = opt)
-            μ, ω = θ[1:length(q)], θ[length(q) + 1:end]
-            
+            Variational.optimize!(elbo, advi, q, m, θ; optimizer=opt)
+            μ, ω = θ[1:length(q)], θ[(length(q) + 1):end]
+
             q = Variational.update(q, μ, f.(ω))
             samples = rand(q, 2000)
 
             # quick check
-            println([mean(samples, dims=2), [var(x), mean(x)]])
+            println([mean(samples; dims=2), [var(x), mean(x)]])
 
             # plotting code assumes (samples, dim) shape so we just transpose
             samples = transpose(samples)
@@ -103,54 +103,59 @@ for seed ∈ seeds
 
             # numerically more stable but doesn't seem to have effect; issue is probably internal to
             # `pdf` which needs to compute ≈ Γ(1000) 
-            p_μ_pdf = z -> exp(logpdf(p_μ, (z - μₙ) * exp(- 0.5 * log(βₙ) + 0.5 * log(αₙ) + 0.5 * log(κₙ))))
+            p_μ_pdf =
+                z -> exp(
+                    logpdf(
+                        p_μ,
+                        (z - μₙ) * exp(-0.5 * log(βₙ) + 0.5 * log(αₙ) + 0.5 * log(κₙ)),
+                    ),
+                )
             # p_μ_pdf1 = z -> pdf(p_μ, (z - μₙ) / √(βₙ / (αₙ * κₙ)))
 
             # posterior plots
-            p1 = plot();
-            density!(samples[:, 1], label = "s (ADVI)", color = :blue, linestyle = :dash);
-            histogram!(samples[:, 1], label = "", normed = true, alpha = 0.3, color = :blue);
+            p1 = plot()
+            density!(samples[:, 1]; label="s (ADVI)", color=:blue, linestyle=:dash)
+            histogram!(samples[:, 1]; label="", normed=true, alpha=0.3, color=:blue)
 
             # normalize using Riemann approx. because of (almost certainly) numerical issues
             Δ = 0.001
             r = 0.75:0.001:1.50
             norm_const = sum(p_σ²_pdf.(r) .* Δ)
-            plot!(r, p_σ²_pdf, label = "s (posterior)", color = :red);
-            vline!([var(x)], label = "s (data)", linewidth = 1.5, color = :black, alpha = 0.7);
-            xlims!(0.5, 1.5);
-            title!("$(j * advi.max_iters) steps");
-
-            p2 = plot();
-            density!(samples[:, 2], label = "m (ADVI)", color = :blue, linestyle = :dash);
-            histogram!(samples[:, 2], label = "", normed = true, alpha = 0.3, color = :blue);
+            plot!(r, p_σ²_pdf; label="s (posterior)", color=:red)
+            vline!([var(x)]; label="s (data)", linewidth=1.5, color=:black, alpha=0.7)
+            xlims!(0.5, 1.5)
+            title!("$(j * advi.max_iters) steps")
 
+            p2 = plot()
+            density!(samples[:, 2]; label="m (ADVI)", color=:blue, linestyle=:dash)
+            histogram!(samples[:, 2]; label="", normed=true, alpha=0.3, color=:blue)
 
             # normalize using Riemann approx. because of (almost certainly) numerical issues
             Δ = 0.0001
-            r = -0.1 + mean(x):Δ:0.1 + mean(x)
+            r = (-0.1 + mean(x)):Δ:(0.1 + mean(x))
             norm_const = sum(p_μ_pdf.(r) .* Δ)
-            plot!(r, z -> p_μ_pdf(z) / norm_const, label = "m (posterior)", color = :red);
-            vline!([mean(x)], label = "m (data)", linewidth = 1.5, color = :black, alpha = 0.7);
+            plot!(r, z -> p_μ_pdf(z) / norm_const; label="m (posterior)", color=:red)
+            vline!([mean(x)]; label="m (data)", linewidth=1.5, color=:black, alpha=0.7)
 
-            xlims!(-0.25, 0.25);
+            xlims!(-0.25, 0.25)
 
             # visualize evolution of objective wrt. optimization iterations
             obj = elbo(advi, q, m, 1000)
             @info "ELBO" obj
             push!(history, obj)
-            p3 = plot();
-            plot!(1:advi.max_iters:length(history) * advi.max_iters, history, label = "")
+            p3 = plot()
+            plot!(1:(advi.max_iters):(length(history) * advi.max_iters), history; label="")
             title!("ELBO = $obj")
 
             # plot the latest 25 objective evaluations to visualize trend
-            p4 = plot();
-            plot!(history[max(1, end - 10):end], label = "")
+            p4 = plot()
+            plot!(history[max(1, end - 10):end]; label="")
+
+            p = plot(p1, p2, p3, p4; layout=(4, 1))
 
-            p = plot(p1, p2, p3, p4; layout = (4, 1))
-            
             @info "[$j] Done"
             p
         end
-        gif(anim, "advi_w_elbo_fps15_$(seed)_$(ad_mode).gif", fps = 15)
+        gif(anim, "advi_w_elbo_fps15_$(seed)_$(ad_mode).gif"; fps=15)
     end
 end
diff --git a/test/skipped/bayes_lr.jl b/test/skipped/bayes_lr.jl
index b426ce0cbc..6b9b4d31e7 100644
--- a/test/skipped/bayes_lr.jl
+++ b/test/skipped/bayes_lr.jl
@@ -9,13 +9,13 @@ using Turing
     s = 1
     β ~ Normal(0, 1)
 
-    for n = 1:N
+    for n in 1:N
         ys[n] ~ Normal(xs[n] * β, sqrt(s))
     end
 end
 
 N = 100
-xs = collect(range(-10, stop = 10, length = N))
+xs = collect(range(-10; stop=10, length=N))
 s = 1
 β = rand(Normal(0, 1))
 ys = xs * β + rand(Normal(0, sqrt(s)), N)
@@ -29,18 +29,18 @@ println("mean of β: ", mean(chn[1000:end, :β]))
 
 θ_dim = 1
 function lj_func(θ)
-  N = length(xs)
-  _lj = zero(Real)
+    N = length(xs)
+    _lj = zero(Real)
 
-  s = 1
+    s = 1
 
-  β = θ[1]
-  _lj += logpdf(Normal(0, 1), β)
-  for n = 1:N
-    _lj += logpdf(Normal(xs[n] * β, sqrt(s)), ys[n])
-  end
+    β = θ[1]
+    _lj += logpdf(Normal(0, 1), β)
+    for n in 1:N
+        _lj += logpdf(Normal(xs[n] * β, sqrt(s)), ys[n])
+    end
 
-  return _lj
+    return _lj
 end
 
 neg_lj_func(θ) = -lj_func(θ)
@@ -48,18 +48,16 @@ const f_tape = GradientTape(neg_lj_func, randn(θ_dim))
 const compiled_f_tape = compile(f_tape)
 
 function grad_func(θ)
+    inputs = θ
+    results = similar(θ)
+    all_results = DiffResults.GradientResult(results)
 
-  inputs = θ
-  results = similar(θ)
-  all_results = DiffResults.GradientResult(results)
-
-  ReverseDiff.gradient!(all_results, compiled_f_tape, inputs)
-
-  neg_lj = all_results.value
-  grad, = all_results.derivs
+    ReverseDiff.gradient!(all_results, compiled_f_tape, inputs)
 
-  return -neg_lj, grad
+    neg_lj = all_results.value
+    grad, = all_results.derivs
 
+    return -neg_lj, grad
 end
 
 std = ones(θ_dim)
@@ -70,12 +68,12 @@ chn = []
 accept_num = 1
 
 total_num = 2000
-for iter = 1:total_num
-  global θ, chn, lj, lj_func, grad_func, std, accept_num
-  push!(chn, θ)
-  θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 3, 0.005, std)
-  accept_num += is_accept
-#   if (iter % 50 == 0) println(θ) end
+for iter in 1:total_num
+    global θ, chn, lj, lj_func, grad_func, std, accept_num
+    push!(chn, θ)
+    θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 3, 0.005, std)
+    accept_num += is_accept
+    #   if (iter % 50 == 0) println(θ) end
 end
 
 @show mean(chn[1000:end]), lj
diff --git a/test/skipped/dual_averaging.jl b/test/skipped/dual_averaging.jl
index 01c1cb401e..897abe8d90 100644
--- a/test/skipped/dual_averaging.jl
+++ b/test/skipped/dual_averaging.jl
@@ -1,6 +1,4 @@
-function _adapt_ϵ(logϵ, Hbar, logϵbar, da_stat, m, M_adapt, δ, μ;
-    γ=0.05, t0=10, κ=0.75)
-
+function _adapt_ϵ(logϵ, Hbar, logϵbar, da_stat, m, M_adapt, δ, μ; γ=0.05, t0=10, κ=0.75)
     if m <= M_adapt
         Hbar = (1.0 - 1.0 / (m + t0)) * Hbar + (1 / (m + t0)) * (δ - da_stat)
         logϵ = μ - sqrt(m) / γ * Hbar
diff --git a/test/skipped/explicit_ret.jl b/test/skipped/explicit_ret.jl
index f0b53d0930..c1340464fb 100644
--- a/test/skipped/explicit_ret.jl
+++ b/test/skipped/explicit_ret.jl
@@ -2,17 +2,18 @@ using Turing
 using Test
 
 @model function test_ex_rt()
-  x ~ Normal(10, 1)
-  y ~ Normal(x / 2, 1)
-  z = y + 1
-  x = x - 1
-  x, y, z
+    x ~ Normal(10, 1)
+    y ~ Normal(x / 2, 1)
+    z = y + 1
+    x = x - 1
+    return x, y, z
 end
 
 mf = test_ex_rt()
 
-for alg = [HMC(0.2, 3), PG(20, 2000), SMC(), IS(10000), Gibbs(PG(20, 1, :x), HMC(0.2, 3, :y))]
-  chn = sample(mf, alg)
-  @test mean(chn[:x]) ≈ 10.0 atol=0.2
-  @test mean(chn[:y]) ≈ 5.0 atol=0.2
+for alg in
+    [HMC(0.2, 3), PG(20, 2000), SMC(), IS(10000), Gibbs(PG(20, 1, :x), HMC(0.2, 3, :y))]
+    chn = sample(mf, alg)
+    @test mean(chn[:x]) ≈ 10.0 atol = 0.2
+    @test mean(chn[:y]) ≈ 5.0 atol = 0.2
 end
diff --git a/test/skipped/gdemo.jl b/test/skipped/gdemo.jl
index 610e5b6ee2..84242067eb 100644
--- a/test/skipped/gdemo.jl
+++ b/test/skipped/gdemo.jl
@@ -6,11 +6,11 @@ using Turing
 
 @model function gdemo()
     s ~ InverseGamma(2, 3)
-    m ~ Normal(0,sqrt(s))
+    m ~ Normal(0, sqrt(s))
     1.5 ~ Normal(m, sqrt(s))
     2.0 ~ Normal(m, sqrt(s))
     return s, m
-  end
+end
 
 # Plain Julia
 
@@ -19,18 +19,18 @@ using Turing: invlink, logpdf
 
 θ_dim = 2
 function lj_func(θ)
-  _lj = zero(Real)
+    _lj = zero(Real)
 
-  d_s = InverseGamma(2, 3)
-  s = invlink(d_s, θ[1])
-  _lj += logpdf(d_s, s, true)
-  m = θ[2]
-  _lj += logpdf(Normal(0, sqrt(s)), m)
+    d_s = InverseGamma(2, 3)
+    s = invlink(d_s, θ[1])
+    _lj += logpdf(d_s, s, true)
+    m = θ[2]
+    _lj += logpdf(Normal(0, sqrt(s)), m)
 
-  _lj += logpdf(Normal(m, sqrt(s)), 1.5)
-  _lj += logpdf(Normal(m, sqrt(s)), 2.0)
+    _lj += logpdf(Normal(m, sqrt(s)), 1.5)
+    _lj += logpdf(Normal(m, sqrt(s)), 2.0)
 
-  return _lj
+    return _lj
 end
 
 neg_lj_func(θ) = -lj_func(θ)
@@ -38,18 +38,16 @@ const f_tape = GradientTape(neg_lj_func, randn(θ_dim))
 const compiled_f_tape = compile(f_tape)
 
 function grad_func(θ)
+    inputs = θ
+    results = similar(θ)
+    all_results = DiffResults.GradientResult(results)
 
-  inputs = θ
-  results = similar(θ)
-  all_results = DiffResults.GradientResult(results)
-
-  gradient!(all_results, compiled_f_tape, inputs)
-
-  neg_lj = all_results.value
-  grad, = all_results.derivs
+    gradient!(all_results, compiled_f_tape, inputs)
 
-  return -neg_lj, grad
+    neg_lj = all_results.value
+    grad, = all_results.derivs
 
+    return -neg_lj, grad
 end
 
 # Unit test for gradient
diff --git a/test/skipped/gdemo_hmc.jl b/test/skipped/gdemo_hmc.jl
index 577a102309..855b06f894 100644
--- a/test/skipped/gdemo_hmc.jl
+++ b/test/skipped/gdemo_hmc.jl
@@ -17,18 +17,18 @@ std = ones(θ_dim)
 θ = randn(θ_dim)
 lj = lj_func(θ)
 
-chn = Dict(:θ=>Vector{Vector{Float64}}(), :logϵ=>Vector{Float64}())
+chn = Dict(:θ => Vector{Vector{Float64}}(), :logϵ => Vector{Float64}())
 accept_num = 0
 
 function dummy_print(args...)
-  nothing
+    return nothing
 end
 
 totla_num = 5000
-for iter = 1:totla_num
-  push!(chn[:θ], θ)
-  θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 5, 0.05, std)
-  accept_num += is_accept
+for iter in 1:totla_num
+    push!(chn[:θ], θ)
+    θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 5, 0.05, std)
+    accept_num += is_accept
 end
 
 @show lj
diff --git a/test/skipped/gdemo_nuts.jl b/test/skipped/gdemo_nuts.jl
index 34c4c6f572..ecc4d5c7f8 100644
--- a/test/skipped/gdemo_nuts.jl
+++ b/test/skipped/gdemo_nuts.jl
@@ -24,49 +24,47 @@ Hbar = 0
 
 δ = 0.75
 
-for test_id = 1:2
-
-  test_name =  "$test_id. NUTS " * (test_id == 1 ? "without DA" : "with DA")
-
-  @testset "$test_name" begin
-
-    std = ones(θ_dim)
-    θ = randn(θ_dim)
-    lj = lj_func(θ)
-
-    chn = Dict(:θ=>Vector{Vector{Float64}}(), :logϵ=>Vector{Float64}())
-
-    function dummy_print(args...)
-      nothing
-    end
-
-    println("Start to run NUTS")
-
-    totla_num = 10000
-    for iter = 1:totla_num
-      global logϵ, lj_func, grad_func, M_adapt, δ, μ
-      θ, da_stat = _nuts_step(θ, exp(logϵ), lj_func, grad_func, std)
-      if test_id == 1
-        logϵ, Hbar, logϵbar = _adapt_ϵ(logϵ, Hbar, logϵbar, da_stat, iter, M_adapt, δ, μ)
-      end
-
-      push!(chn[:θ], θ)
-      push!(chn[:logϵ], logϵ)
-      # if (iter % 50 == 0) println(θ) end
+for test_id in 1:2
+    test_name = "$test_id. NUTS " * (test_id == 1 ? "without DA" : "with DA")
+
+    @testset "$test_name" begin
+        std = ones(θ_dim)
+        θ = randn(θ_dim)
+        lj = lj_func(θ)
+
+        chn = Dict(:θ => Vector{Vector{Float64}}(), :logϵ => Vector{Float64}())
+
+        function dummy_print(args...)
+            return nothing
+        end
+
+        println("Start to run NUTS")
+
+        totla_num = 10000
+        for iter in 1:totla_num
+            global logϵ, lj_func, grad_func, M_adapt, δ, μ
+            θ, da_stat = _nuts_step(θ, exp(logϵ), lj_func, grad_func, std)
+            if test_id == 1
+                logϵ, Hbar, logϵbar = _adapt_ϵ(
+                    logϵ, Hbar, logϵbar, da_stat, iter, M_adapt, δ, μ
+                )
+            end
+
+            push!(chn[:θ], θ)
+            push!(chn[:logϵ], logϵ)
+            # if (iter % 50 == 0) println(θ) end
+        end
+
+        samples_s = exp.(map(x -> x[1], chn[:θ]))
+        samples_m = map(x -> x[2], chn[:θ])
+        @show mean(samples_s[1000:end])
+        @test mean(samples_s[1000:end]) ≈ 49 / 24 atol = 0.2
+        @show mean(samples_m[1000:end])
+        @test mean(samples_m[1000:end]) ≈ 7 / 6 atol = 0.2
+
+        @show std(samples_s[1000:end])
+        @show std(samples_m[1000:end])
+
+        @show mean(exp.(chn[:logϵ]))
     end
-
-    samples_s = exp.(map(x -> x[1], chn[:θ]))
-    samples_m = map(x -> x[2], chn[:θ])
-    @show mean(samples_s[1000:end])
-    @test mean(samples_s[1000:end]) ≈ 49/24 atol=0.2
-    @show mean(samples_m[1000:end])
-    @test mean(samples_m[1000:end]) ≈ 7/6 atol=0.2
-
-    @show std(samples_s[1000:end])
-    @show std(samples_m[1000:end])
-
-    @show mean(exp.(chn[:logϵ]))
-
-  end
-
 end
diff --git a/test/skipped/hmcda_geweke.jl b/test/skipped/hmcda_geweke.jl
index b0e43ecb54..8e2f209864 100644
--- a/test/skipped/hmcda_geweke.jl
+++ b/test/skipped/hmcda_geweke.jl
@@ -5,29 +5,41 @@ using Gadfly
 import Gadfly.ElementOrFunction
 
 # First add a method to the basic Gadfly.plot function for QQPair types (generated by Distributions.qqbuild())
-Gadfly.plot(qq::QQPair, elements::ElementOrFunction...) = Gadfly.plot(x=qq.qx, y=qq.qy, Geom.point, Theme(highlight_width=0px), elements...)
+function Gadfly.plot(qq::QQPair, elements::ElementOrFunction...)
+    return Gadfly.plot(;
+        x=qq.qx, y=qq.qy, Geom.point, Theme(; highlight_width=0px), elements...
+    )
+end
 
 # Now some shorthand functions
 qqplot(x, y, elements::ElementOrFunction...) = Gadfly.plot(qqbuild(x, y), elements...)
-qqnorm(x, elements::ElementOrFunction...) = qqplot(Normal(), x, Guide.xlabel("Theoretical Normal quantiles"), Guide.ylabel("Observed quantiles"), elements...)
+function qqnorm(x, elements::ElementOrFunction...)
+    return qqplot(
+        Normal(),
+        x,
+        Guide.xlabel("Theoretical Normal quantiles"),
+        Guide.ylabel("Observed quantiles"),
+        elements...,
+    )
+end
 
 NSamples = 5000
 
 @model function gdemo_fw()
-  # s ~ InverseGamma(2,3)
-  s = 1
-  m ~ Normal(0, sqrt(s))
-  y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
+    # s ~ InverseGamma(2,3)
+    s = 1
+    m ~ Normal(0, sqrt(s))
+    return y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
 end
 
 @model function gdemo_bk(x)
-  # Backward Step 1: theta ~ theta | x
-  # s ~ InverseGamma(2,3)
-  s = 1
-  m ~ Normal(0,sqrt(s))
-  x ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
-  # Backward Step 2: x ~ x | theta
-  y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
+    # Backward Step 1: theta ~ theta | x
+    # s ~ InverseGamma(2,3)
+    s = 1
+    m ~ Normal(0, sqrt(s))
+    x ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
+    # Backward Step 2: x ~ x | theta
+    return y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
 end
 
 fw = PG(50, NSamples)
@@ -44,24 +56,30 @@ s_bk = Array{Turing.Chain}(undef, N)
 
 simple_logger = Base.CoreLogging.SimpleLogger(stderr, Base.CoreLogging.Debug)
 Base.CoreLogging.with_logger(simple_logger) do
-  global x, bk, s_bk
-  i = 1
-  while i <= N
-    s_bk[i] = sample(gdemo_bk(x), bk)
-    x = s_bk[i][end, :y]
-    i += 1
-  end
+    global x, bk, s_bk
+    i = 1
+    while i <= N
+        s_bk[i] = sample(gdemo_bk(x), bk)
+        x = s_bk[i][end, :y]
+        i += 1
+    end
 end
 
 s2 = reduce(vcat, s_bk);
 # describe(s2)
 
-
 using UnicodePlots
 
 qqm = qqbuild(s[:m], s2[:m])
-show(scatterplot(qqm.qx, qqm.qy, title = "QQ plot for m", canvas = DotCanvas))
-show(scatterplot(qqm.qx[51:end-50], qqm.qy[51:end-50], title = "QQ plot for m (removing first and last 50 quantiles):", canvas = DotCanvas))
+show(scatterplot(qqm.qx, qqm.qy; title="QQ plot for m", canvas=DotCanvas))
+show(
+    scatterplot(
+        qqm.qx[51:(end - 50)],
+        qqm.qy[51:(end - 50)];
+        title="QQ plot for m (removing first and last 50 quantiles):",
+        canvas=DotCanvas,
+    ),
+)
 
 qqm = qqbuild(s[:m], s2[:m])
 X = qqm.qx
@@ -71,10 +89,10 @@ slope = (1 / (transpose(X) * X)[1] * transpose(X) * y)[1]
 print("  slopeₛ = $slope ≈ 1 (ϵ = 0.1)")
 ans1 = abs(slope - 1.0) <= 0.1
 if ans1
-  printstyled(" ✓\n", color=:green)
+    printstyled(" ✓\n"; color=:green)
 else
-  printstyled(" X\n", color=:red)
-  printstyled("    slope = $slope, diff = $(slope - 1.0)\n", color=:red)
+    printstyled(" X\n"; color=:red)
+    printstyled("    slope = $slope, diff = $(slope - 1.0)\n"; color=:red)
 end
 
 # qqs = qqbuild(s[:s], s2[:s])
diff --git a/test/skipped/nuts_geweke.jl b/test/skipped/nuts_geweke.jl
index 070b7f5528..d312be8841 100644
--- a/test/skipped/nuts_geweke.jl
+++ b/test/skipped/nuts_geweke.jl
@@ -5,29 +5,41 @@ using Gadfly
 import Gadfly.ElementOrFunction
 
 # First add a method to the basic Gadfly.plot function for QQPair types (generated by Distributions.qqbuild())
-Gadfly.plot(qq::QQPair, elements::ElementOrFunction...) = Gadfly.plot(x=qq.qx, y=qq.qy, Geom.point, Theme(highlight_width=0px), elements...)
+function Gadfly.plot(qq::QQPair, elements::ElementOrFunction...)
+    return Gadfly.plot(;
+        x=qq.qx, y=qq.qy, Geom.point, Theme(; highlight_width=0px), elements...
+    )
+end
 
 # Now some shorthand functions
 qqplot(x, y, elements::ElementOrFunction...) = Gadfly.plot(qqbuild(x, y), elements...)
-qqnorm(x, elements::ElementOrFunction...) = qqplot(Normal(), x, Guide.xlabel("Theoretical Normal quantiles"), Guide.ylabel("Observed quantiles"), elements...)
+function qqnorm(x, elements::ElementOrFunction...)
+    return qqplot(
+        Normal(),
+        x,
+        Guide.xlabel("Theoretical Normal quantiles"),
+        Guide.ylabel("Observed quantiles"),
+        elements...,
+    )
+end
 
 NSamples = 5000
 
 @model function gdemo_fw()
-  s ~ InverseGamma(2,3)
-  # s = 1
-  m ~ Normal(0,sqrt(s))
-  y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
+    s ~ InverseGamma(2, 3)
+    # s = 1
+    m ~ Normal(0, sqrt(s))
+    return y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
 end
 
 @model function gdemo_bk(x)
-  # Backward Step 1: theta ~ theta | x
-  s ~ InverseGamma(2,3)
-  # s = 1
-  m ~ Normal(0,sqrt(s))
-  x ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
-  # Backward Step 2: x ~ x | theta
-  y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
+    # Backward Step 1: theta ~ theta | x
+    s ~ InverseGamma(2, 3)
+    # s = 1
+    m ~ Normal(0, sqrt(s))
+    x ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
+    # Backward Step 2: x ~ x | theta
+    return y ~ MvNormal([m; m; m], [sqrt(s) 0 0; 0 sqrt(s) 0; 0 0 sqrt(s)])
 end
 
 fw = IS(NSamples)
@@ -45,18 +57,17 @@ s_bk = Array{MCMCChains.Chains}(undef, N)
 
 simple_logger = Base.CoreLogging.SimpleLogger(stderr, Base.CoreLogging.Debug)
 with_logger(simple_logger) do
-  i = 1
-  while i <= N
-    s_bk[i] = sample(gdemo_bk(x), bk)
-    x = s_bk[i][end, :y]
-    i += 1
-  end
+    i = 1
+    while i <= N
+        s_bk[i] = sample(gdemo_bk(x), bk)
+        x = s_bk[i][end, :y]
+        i += 1
+    end
 end
 
 s2 = chainscat(s_bk...)
 # describe(s2)
 
-
 # qqplot(s[:m], s2[:m])
 # qqplot(s[:s], s2[:s])
 
@@ -65,14 +76,33 @@ qqm = qqbuild(s[:m], s2[:m])
 using UnicodePlots
 
 qqm = qqbuild(s[:m], s2[:m])
-show(scatterplot(qqm.qx, qqm.qy, title = "QQ plot for m", canvas = DotCanvas))
-show(scatterplot(qqm.qx[51:end-50], qqm.qy[51:end-50], title = "QQ plot for m (removing first and last 50 quantiles):", canvas = DotCanvas))
-show(scatterplot(qqm.qx, qqm.qy, title = "QQ plot for m"))
-show(scatterplot(qqm.qx[51:end-50], qqm.qy[51:end-50], title = "QQ plot for m (removing first and last 50 quantiles):"))
+show(scatterplot(qqm.qx, qqm.qy; title="QQ plot for m", canvas=DotCanvas))
+show(
+    scatterplot(
+        qqm.qx[51:(end - 50)],
+        qqm.qy[51:(end - 50)];
+        title="QQ plot for m (removing first and last 50 quantiles):",
+        canvas=DotCanvas,
+    ),
+)
+show(scatterplot(qqm.qx, qqm.qy; title="QQ plot for m"))
+show(
+    scatterplot(
+        qqm.qx[51:(end - 50)],
+        qqm.qy[51:(end - 50)];
+        title="QQ plot for m (removing first and last 50 quantiles):",
+    ),
+)
 
 qqs = qqbuild(s[:s].value, s2[:s].value)
-show(scatterplot(qqs.qx, qqs.qy, title = "QQ plot for s"))
-show(scatterplot(qqs.qx[51:end-50], qqs.qy[51:end-50], title = "QQ plot for s (removing first and last 50 quantiles):"))
+show(scatterplot(qqs.qx, qqs.qy; title="QQ plot for s"))
+show(
+    scatterplot(
+        qqs.qx[51:(end - 50)],
+        qqs.qy[51:(end - 50)];
+        title="QQ plot for s (removing first and last 50 quantiles):",
+    ),
+)
 
 X = qqm.qx
 y = qqm.qy
@@ -81,10 +111,10 @@ slope = (1 / (transpose(X) * X)[1] * transpose(X) * y)[1]
 print("  slopeₛ = $slope ≈ 1 (ϵ = 0.1)")
 ans1 = abs(slope - 1.0) <= 0.1
 if ans1
-  printstyled(" ✓\n", color=:green)
+    printstyled(" ✓\n"; color=:green)
 else
-  printstyled(" X\n", color=:red)
-  printstyled("    slope = $slope, diff = $(slope - 1.0)\n", color=:red)
+    printstyled(" X\n"; color=:red)
+    printstyled("    slope = $slope, diff = $(slope - 1.0)\n"; color=:red)
 end
 
 # qqs = qqbuild(s[:s], s2[:s])
diff --git a/test/skipped/opt_param_of_dist.jl b/test/skipped/opt_param_of_dist.jl
index e750208d84..3c8e4d2dcd 100644
--- a/test/skipped/opt_param_of_dist.jl
+++ b/test/skipped/opt_param_of_dist.jl
@@ -2,11 +2,11 @@ using Turing
 using Test
 
 @model testassume begin
-  x ~ Bernoulli(1; :static = true)
-  y ~ Bernoulli(x / 2; :param = true)
-  1 ~ Normal(0, 1; :static = true)
-  2 ~ Normal(0, 1; :param = true)
-  y, x
+    x ~ Bernoulli(1; :static=true)
+    y ~ Bernoulli(x / 2; :param=true)
+    1 ~ Normal(0, 1; :static=true)
+    2 ~ Normal(0, 1; :param=true)
+    y, x
 end
 
 s = SMC()
@@ -17,4 +17,4 @@ res = sample(testassume, s)
 @test all(isone, res[:x])
 
 # check that the mean of y is between 0.4 and 0.6
-@test mean(res[:y]) ≈ 0.5 atol=0.1
+@test mean(res[:y]) ≈ 0.5 atol = 0.1
diff --git a/test/skipped/simple_gauss.jl b/test/skipped/simple_gauss.jl
index 935ced27b6..14f36dce1f 100644
--- a/test/skipped/simple_gauss.jl
+++ b/test/skipped/simple_gauss.jl
@@ -6,9 +6,9 @@ using Turing
 
 @model function simple_gauss()
     s = 1
-    m ~ Normal(0,sqrt(s))
+    m ~ Normal(0, sqrt(s))
     2.0 ~ Normal(m, sqrt(s))
-    2.5 ~ Normal(m, sqrt(s))
+    return 2.5 ~ Normal(m, sqrt(s))
 end
 
 # Plain Julia
@@ -17,17 +17,17 @@ using ReverseDiff: GradientTape, GradientConfig, gradient, gradient!, compile
 
 θ_dim = 1
 function lj_func(θ)
-  _lj = zero(Real)
+    _lj = zero(Real)
 
-  s = 1
+    s = 1
 
-  m = θ[1]
-  _lj += logpdf(Normal(0, sqrt(s)), m)
+    m = θ[1]
+    _lj += logpdf(Normal(0, sqrt(s)), m)
 
-  _lj += logpdf(Normal(m, sqrt(s)), 2.0)
-  _lj += logpdf(Normal(m, sqrt(s)), 2.5)
+    _lj += logpdf(Normal(m, sqrt(s)), 2.0)
+    _lj += logpdf(Normal(m, sqrt(s)), 2.5)
 
-  return _lj
+    return _lj
 end
 
 neg_lj_func(θ) = -lj_func(θ)
@@ -35,18 +35,16 @@ const f_tape = GradientTape(neg_lj_func, randn(θ_dim))
 const compiled_f_tape = compile(f_tape)
 
 function grad_func(θ)
+    inputs = θ
+    results = similar(θ)
+    all_results = DiffResults.GradientResult(results)
 
-  inputs = θ
-  results = similar(θ)
-  all_results = DiffResults.GradientResult(results)
-
-  gradient!(all_results, compiled_f_tape, inputs)
-
-  neg_lj = all_results.value
-  grad, = all_results.derivs
+    gradient!(all_results, compiled_f_tape, inputs)
 
-  return -neg_lj, grad
+    neg_lj = all_results.value
+    grad, = all_results.derivs
 
+    return -neg_lj, grad
 end
 
 # Unit test for gradient
diff --git a/test/skipped/simple_gauss_hmc.jl b/test/skipped/simple_gauss_hmc.jl
index 8b2e41d7f0..ba1f9dd231 100644
--- a/test/skipped/simple_gauss_hmc.jl
+++ b/test/skipped/simple_gauss_hmc.jl
@@ -16,17 +16,14 @@ std = ones(θ_dim)
 θ = randn(θ_dim)
 lj = lj_func(θ)
 
-chn = Dict(:θ=>Vector{Vector{Float64}}(), :logϵ=>Vector{Float64}())
+chn = Dict(:θ => Vector{Vector{Float64}}(), :logϵ => Vector{Float64}())
 accept_num = 1
 
-
 totla_num = 10000
-for iter = 1:totla_num
-
-  push!(chn[:θ], θ)
-  θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 10, 0.05, std)
-  accept_num += is_accept
-
+for iter in 1:totla_num
+    push!(chn[:θ], θ)
+    θ, lj, is_accept, τ_valid, α = _hmc_step(θ, lj, lj_func, grad_func, 10, 0.05, std)
+    accept_num += is_accept
 end
 
 @show lj
diff --git a/test/skipped/sv.jl b/test/skipped/sv.jl
index 29b3016148..b5fe22e715 100644
--- a/test/skipped/sv.jl
+++ b/test/skipped/sv.jl
@@ -2,37 +2,27 @@ using ReverseDiff: GradientTape, GradientConfig, gradient, gradient!, compile
 using Turing: _hmc_step
 using Turing
 using HDF5, JLD
-sv_data = load(TPATH*"/example-models/nips-2017/sv-data.jld.data")["data"]
+sv_data = load(TPATH * "/example-models/nips-2017/sv-data.jld.data")["data"]
 
 @model function sv_model(T, y)
     ϕ ~ Uniform(-1, 1)
-    σ ~ truncated(Cauchy(0,5), 0, Inf)
+    σ ~ truncated(Cauchy(0, 5), 0, Inf)
     μ ~ Cauchy(0, 10)
 
     h = Vector{Real}(T)
     h[1] ~ Normal(μ, σ / sqrt(1 - ϕ^2))
     y[1] ~ Normal(0, exp.(h[1] / 2))
-    for t = 2:T
-      h[t] ~ Normal(μ + ϕ * (h[t-1] - μ) , σ)
-      y[t] ~ Normal(0, exp.(h[t] / 2))
+    for t in 2:T
+        h[t] ~ Normal(μ + ϕ * (h[t - 1] - μ), σ)
+        y[t] ~ Normal(0, exp.(h[t] / 2))
     end
-  end
+end
 
-
-mf = sv_model(data=sv_data[1])
+mf = sv_model(; data=sv_data[1])
 chain_nuts = sample(mf, HMC(0.05, 10), 2000)
 
 println("mean of m: $(mean(chn[1000:end, :μ]))")
 
-
-
-
-
-
-
-
-
-
 # θ_dim = 1
 # function lj_func(θ)
 #   _lj = zero(Real)
@@ -74,7 +64,6 @@ println("mean of m: $(mean(chn[1000:end, :μ]))")
 # chn = []
 # accept_num = 1
 
-
 # totla_num = 5000
 # for iter = 1:totla_num
 #   push!(chn, θ)
diff --git a/test/skipped/unit_test_helper.jl b/test/skipped/unit_test_helper.jl
index 1517459c56..174234480e 100644
--- a/test/skipped/unit_test_helper.jl
+++ b/test/skipped/unit_test_helper.jl
@@ -10,9 +10,11 @@ function test_grad(turing_model, grad_f; trans=Dict())
     @testset "Gradient using random inputs" begin
         ℓ = LogDensityProblemsAD.ADgradient(
             Turing.AutoTracker(),
-            Turing.LogDensityFunction(vi, model_f, SampleFromPrior(), DynamicPPL.DefaultContext()),
+            Turing.LogDensityFunction(
+                vi, model_f, SampleFromPrior(), DynamicPPL.DefaultContext()
+            ),
         )
-        for _ = 1:10000
+        for _ in 1:10000
             theta = rand(d)
             @test LogDensityProblems.logdensity_and_gradient(ℓ, theta) == grad_f(theta)[2]
         end
diff --git a/test/skipped/vec_assume_mat.jl b/test/skipped/vec_assume_mat.jl
index 28ae99d47d..b9efc94d0a 100644
--- a/test/skipped/vec_assume_mat.jl
+++ b/test/skipped/vec_assume_mat.jl
@@ -4,17 +4,17 @@ N = 5
 alg = HMC(0.2, 4)
 
 @model function vdemo(::Type{T}=Float64) where {T}
-  v = Vector{Matrix{T}}(undef, N)
-  @. v ~ Wishart(7, [1 0.5; 0.5 1])
+    v = Vector{Matrix{T}}(undef, N)
+    @. v ~ Wishart(7, [1 0.5; 0.5 1])
 end
 
 t_vec = @elapsed res_vec = sample(vdemo(), alg, 1000)
 
 @model function vdemo()
-  v = Vector{Matrix{Real}}(undef, N)
-  for i = 1:N
-    v[i] ~ Wishart(7, [1 0.5; 0.5 1])
-  end
+    v = Vector{Matrix{Real}}(undef, N)
+    for i in 1:N
+        v[i] ~ Wishart(7, [1 0.5; 0.5 1])
+    end
 end
 
 t_loop = @elapsed res = sample(vdemo(), alg, 1000)
diff --git a/test/skipped/vec_assume_mv.jl b/test/skipped/vec_assume_mv.jl
index bafa1f0963..ba373b124f 100644
--- a/test/skipped/vec_assume_mv.jl
+++ b/test/skipped/vec_assume_mv.jl
@@ -7,24 +7,26 @@ alg = HMC(0.2, 4)
 
 # Test for vectorize UnivariateDistribution
 @model function vdemo()
-  phi = Vector{Vector{Real}}(undef, N)
-  @> phi ~ Dirichlet(beta)
+    phi = Vector{Vector{Real}}(undef, N)
+    @> phi ~ Dirichlet(beta)
 end
 
 ch_vec, t_vec, m_vec, gctime, memallocs = @timed res_vec = sample(vdemo(), alg)
 
 @model function vdemo()
-  phi = Matrix(undef, 2, N)
-  @. phi ~ Dirichlet(beta)
+    phi = Matrix(undef, 2, N)
+    @. phi ~ Dirichlet(beta)
 end
 
-ch_vec_mat, t_vec_mat, m_vec_mat, gctime, memallocs = @timed res_vec_mat = sample(vdemo(), alg)
+ch_vec_mat, t_vec_mat, m_vec_mat, gctime, memallocs = @timed res_vec_mat = sample(
+    vdemo(), alg
+)
 
 @model function vdemo()
-  phi = Vector{Vector{Real}}(undef, N)
-  for i = 1:N
-    phi[i] ~ Dirichlet(beta)
-  end
+    phi = Vector{Vector{Real}}(undef, N)
+    for i in 1:N
+        phi[i] ~ Dirichlet(beta)
+    end
 end
 
 ch_loop, t_loop, m_loop, gctime, memallocs = @timed res = sample(vdemo(), alg)
diff --git a/test/stdlib/RandomMeasures.jl b/test/stdlib/RandomMeasures.jl
index e2be8fb2d4..cf4ef90ada 100644
--- a/test/stdlib/RandomMeasures.jl
+++ b/test/stdlib/RandomMeasures.jl
@@ -1,7 +1,7 @@
 module RandomMeasuresTests
 
 using Distributions: Normal, sample
-import Random
+using Random: Random
 using Test: @test, @testset
 using Turing
 using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess
@@ -31,37 +31,37 @@ using Turing.RandomMeasures: ChineseRestaurantProcess, DirichletProcess
                 # Number of clusters.
                 K = maximum(z)
                 nk = Vector{Int}(map(k -> sum(z .== k), 1:K))
-        
+
                 # Draw the latent assignment.
                 z[i] ~ ChineseRestaurantProcess(rpm, nk)
-                
+
                 # Create a new cluster?
                 if z[i] > K
                     push!(μ, 0.0)
-        
+
                     # Draw location of new cluster.
                     μ[z[i]] ~ H
                 end
-                        
+
                 # Draw observation.
                 x[i] ~ Normal(μ[z[i]], 1.0)
             end
         end
-        
+
         # Generate some test data.
-        Random.seed!(1);
-        data = vcat(randn(10), randn(10) .- 5, randn(10) .+ 10);
-        data .-= mean(data);
-        data /= std(data);
-        
+        Random.seed!(1)
+        data = vcat(randn(10), randn(10) .- 5, randn(10) .+ 10)
+        data .-= mean(data)
+        data /= std(data)
+
         # MCMC sampling
-        Random.seed!(2);
-        iterations = 500;
-        model_fun = infiniteGMM(data);
-        chain = sample(model_fun, SMC(), iterations);
+        Random.seed!(2)
+        iterations = 500
+        model_fun = infiniteGMM(data)
+        chain = sample(model_fun, SMC(), iterations)
 
         @test chain isa MCMCChains.Chains
-        @test eltype(chain.value) === Union{Float64, Missing}
+        @test eltype(chain.value) === Union{Float64,Missing}
     end
     # partitions = [
     #     [[1, 2, 3, 4]],
diff --git a/test/stdlib/distributions.jl b/test/stdlib/distributions.jl
index f1ce701dac..0d42fb76bb 100644
--- a/test/stdlib/distributions.jl
+++ b/test/stdlib/distributions.jl
@@ -3,7 +3,7 @@ module DistributionsTests
 using ..NumericalTests: check_dist_numerical
 using Distributions
 using LinearAlgebra: I
-import Random
+using Random: Random
 using StableRNGs: StableRNG
 using StatsFuns: logistic
 using Test: @testset, @test
@@ -33,11 +33,10 @@ using Turing
     end
 
     @testset "distributions functions" begin
-        λ = .01:.01:5
-        LLp = @. logpdf(Poisson(λ),1)
-        LLlp = @. logpdf(LogPoisson(log(λ)),1)
-        @test LLp ≈ LLlp atol = .0001
-
+        λ = 0.01:0.01:5
+        LLp = @. logpdf(Poisson(λ), 1)
+        LLlp = @. logpdf(LogPoisson(log(λ)), 1)
+        @test LLp ≈ LLlp atol = 0.0001
     end
 
     @testset "single distribution correctness" begin
@@ -106,31 +105,32 @@ using Turing
 
         # 3. MatrixDistribution
         dist_matrix = [
-            Wishart(7, [1.0 0.5; 0.5 1.0]),
-            InverseWishart(7, [1.0 0.5; 0.5 1.0]),
+            Wishart(7, [1.0 0.5; 0.5 1.0]), InverseWishart(7, [1.0 0.5; 0.5 1.0])
         ]
 
         @testset "Correctness test for single distributions" begin
-            for (dist_set, dist_list) ∈ [
-                ("UnivariateDistribution",   dist_uni),
+            for (dist_set, dist_list) in [
+                ("UnivariateDistribution", dist_uni),
                 ("MultivariateDistribution", dist_multi),
-                ("MatrixDistribution",       dist_matrix)
+                ("MatrixDistribution", dist_matrix),
             ]
                 @testset "$(string(dist_set))" begin
                     for dist in dist_list
-                    @testset "$(string(typeof(dist)))" begin
-                        @info "Distribution(params)" dist
+                        @testset "$(string(typeof(dist)))" begin
+                            @info "Distribution(params)" dist
 
-                        @model m() = x ~ dist
+                            @model m() = x ~ dist
 
-                        chn = sample(rng, m(), HMC(0.05, 20), n_samples)
+                            chn = sample(rng, m(), HMC(0.05, 20), n_samples)
 
-                        # Numerical tests.
-                        check_dist_numerical(dist,
-                            chn,
-                            mean_tol=mean_tol,
-                            var_atol=var_atol,
-                            var_tol=var_tol)
+                            # Numerical tests.
+                            check_dist_numerical(
+                                dist,
+                                chn;
+                                mean_tol=mean_tol,
+                                var_atol=var_atol,
+                                var_tol=var_tol,
+                            )
                         end
                     end
                 end
diff --git a/test/test_utils/SelectiveTests.jl b/test/test_utils/SelectiveTests.jl
index d4a20bc91c..3026cd16d6 100644
--- a/test/test_utils/SelectiveTests.jl
+++ b/test/test_utils/SelectiveTests.jl
@@ -24,7 +24,7 @@ function parse_args(args)
     excluded_paths = Vector{String}()
     for (i, arg) in enumerate(args)
         if arg == "--skip"
-            append!(excluded_paths, args[i+1:end])
+            append!(excluded_paths, args[(i + 1):end])
             break
         else
             push!(included_paths, arg)
diff --git a/test/test_utils/models.jl b/test/test_utils/models.jl
index 94790a0415..344002c053 100644
--- a/test/test_utils/models.jl
+++ b/test/test_utils/models.jl
@@ -1,6 +1,11 @@
 module Models
 
-export MoGtest, MoGtest_default, MoGtest_default_z_vector, MoGtest_z_vector, gdemo, gdemo_d,
+export MoGtest,
+    MoGtest_default,
+    MoGtest_default_z_vector,
+    MoGtest_z_vector,
+    gdemo,
+    gdemo_d,
     gdemo_default
 
 using Distributions
@@ -8,19 +13,19 @@ using Turing: @model
 
 # The old-gdemo model.
 @model function gdemo(x, y)
-  s ~ InverseGamma(2, 3)
-  m ~ Normal(0, sqrt(s))
-  x ~ Normal(m, sqrt(s))
-  y ~ Normal(m, sqrt(s))
-  return s, m
+    s ~ InverseGamma(2, 3)
+    m ~ Normal(0, sqrt(s))
+    x ~ Normal(m, sqrt(s))
+    y ~ Normal(m, sqrt(s))
+    return s, m
 end
 
 @model function gdemo_d()
-  s ~ InverseGamma(2, 3)
-  m ~ Normal(0, sqrt(s))
-  1.5 ~ Normal(m, sqrt(s))
-  2.0 ~ Normal(m, sqrt(s))
-  return s, m
+    s ~ InverseGamma(2, 3)
+    m ~ Normal(0, sqrt(s))
+    1.5 ~ Normal(m, sqrt(s))
+    2.0 ~ Normal(m, sqrt(s))
+    return s, m
 end
 
 gdemo_default = gdemo_d()
@@ -52,7 +57,7 @@ gdemo_default = gdemo_d()
     else
         D[4] ~ Normal(mu2, 1)
     end
-    z1, z2, z3, z4, mu1, mu2
+    return z1, z2, z3, z4, mu1, mu2
 end
 
 MoGtest_default = MoGtest([1.0 1.0 4.0 4.0])
@@ -86,7 +91,7 @@ MoGtest_default = MoGtest([1.0 1.0 4.0 4.0])
     else
         D[4] ~ Normal(mu2, 1)
     end
-    z[1], z[2], z[3], z[4], mu1, mu2
+    return z[1], z[2], z[3], z[4], mu1, mu2
 end
 
 MoGtest_default_z_vector = MoGtest_z_vector([1.0 1.0 4.0 4.0])