diff --git a/src/_quarto.yml b/src/_quarto.yml index a202fded4..f6aa243df 100644 --- a/src/_quarto.yml +++ b/src/_quarto.yml @@ -188,6 +188,7 @@ website: - reference-manual/pathfinder.qmd - reference-manual/variational.qmd - reference-manual/laplace.qmd + - reference-manual/laplace_embedded.qmd - reference-manual/diagnostics.qmd - section: "Usage" contents: @@ -237,6 +238,7 @@ website: - section: "Additional Distributions" contents: - functions-reference/hidden_markov_models.qmd + - functions-reference/embedded_laplace.qmd - section: "Appendix" contents: - functions-reference/mathematical_functions.qmd diff --git a/src/bibtex/all.bib b/src/bibtex/all.bib index 9f8fabd3e..d59eca688 100644 --- a/src/bibtex/all.bib +++ b/src/bibtex/all.bib @@ -274,7 +274,7 @@ @article{turing:1936 volume={58}, number={345-363}, pages={5}, year={1936} } - + @article{kucukelbir:2015, title={Automatic variational inference in Stan}, author={Kucukelbir, Alp and Ranganath, Rajesh and Gelman, Andrew and Blei, David}, journal={Advances in @@ -1273,7 +1273,7 @@ @article{Timonen+etal:2023:ODE-PSIS title={An importance sampling approach for reliable and efficient inference in {Bayesian} ordinary differential equation models}, author={Timonen, Juho and Siccha, Nikolas and Bales, Ben and L{\"a}hdesm{\"a}ki, Harri and Vehtari, - Aki}, journal={Stat}, year={2023}, volume = 12, number = 1, pages = {e614} + Aki}, journal={Stat}, year={2023}, volume = 12, number = 1, pages = {e614} } @article{Vehtari+etal:2024:PSIS, @@ -1320,5 +1320,71 @@ @misc{seyboldt:2024 @article{lancaster:1965, ISSN = {00029890, 19300972}, URL = {http://www.jstor.org/stable/2312989}, author = {H. O. Lancaster}, journal = {The American Mathematical Monthly}, number = {1}, pages = {4--12}, publisher = -{Mathematical Association of America}, title = {The {H}elmert Matrices}, urldate -= {2025-05-07}, volume = {72}, year = {1965} } +{Mathematical Association of America}, title = {The {H}elmert Matrices}, +urldate = {2025-05-07}, volume = {72}, year = {1965} } + +@article{Margossian:2020, + Author = {Margossian, Charles C and Vehtari, Aki and Simpson, Daniel + and Agrawal, Raj}, + Title = {Hamiltonian Monte Carlo using an adjoint-differentiated Laplace approximation: Bayesian inference for latent Gaussian models and beyond}, + journal = {Advances in Neural Information Processing Systems}, + volume = {34}, + Year = {2020}} + +@article{Kuss:2005, + author = {Kuss, Malte and Rasmussen, Carl E}, + title = {Assessing Approximate Inference for Binary {Gaussian} Process Classification}, + journal = {Journal of Machine Learning Research}, + volume = {6}, + pages = {1679 -- 1704}, + year = {2005}} + +@article{Vanhatalo:2010, + author = {Jarno Vanhatalo and Ville Pietil\"{a}inen and Aki Vehtari}, + title = {Approximate inference for disease mapping with sparse {Gaussian} processes}, + journal = {Statistics in Medicine}, + year = {2010}, + volume = {29}, + number = {15}, + pages = {1580--1607} +} + +@article{Cseke:2011, + author = {Botond Cseke and Heskes, Tom}, + title = {Approximate marginals in latent {Gaussian} models}, + journal = {Journal of Machine Learning Research}, + volume = {12}, + issue = {2}, + page = {417 -- 454}, + year = {2011}} + +@article{Vehtari:2016, + author = {Aki Vehtari and Tommi Mononen and Ville Tolvanen and Tuomas Sivula and Ole Winther}, + title = {Bayesian Leave-One-Out Cross-Validation Approximations for {Gaussian} Latent Variable Models}, + journal = {Journal of Machine Learning Research}, + year = {2016}, + volume = {17}, + number = {103}, + pages = {1--38}, + url = {http://jmlr.org/papers/v17/14-540.html} +} + + +@article{Margossian:2023, + author = {Margossian, Charles C}, + title = {General adjoint-differentiated Laplace approximation}, + journal = {arXiv:2306.14976 }, + year = {2023}} + + + @article{Rue:2009, + title={Approximate Bayesian inference for latent Gaussian models by using integrated nested Laplace approximations}, + author={Rue, H{\aa}vard and Martino, Sara and Chopin, Nicolas}, + journal={Journal of the Royal Statistical Society: Series B (Statistical Methodology)}, + volume={71}, + number={2}, + pages={319--392}, + year={2009}, + publisher={Wiley Online Library}, + doi={10.1111/j.1467-9868.2008.00700.x} +} diff --git a/src/functions-reference/_quarto.yml b/src/functions-reference/_quarto.yml index bc6e83acb..9fd6f9a9a 100644 --- a/src/functions-reference/_quarto.yml +++ b/src/functions-reference/_quarto.yml @@ -72,6 +72,7 @@ book: - part: "Additional Distributions" chapters: - hidden_markov_models.qmd + - embedded_laplace.qmd - part: "Appendix" chapters: - mathematical_functions.qmd diff --git a/src/functions-reference/embedded_laplace.qmd b/src/functions-reference/embedded_laplace.qmd new file mode 100644 index 000000000..bf9e7f301 --- /dev/null +++ b/src/functions-reference/embedded_laplace.qmd @@ -0,0 +1,694 @@ +--- +pagetitle: Embedded Laplace Approximation +--- + +# Embedded Laplace Approximation + +The embedded Laplace approximation can be used to approximate certain +marginal and conditional distributions that arise in latent Gaussian models. +A latent Gaussian model observes the following hierarchical structure: +$$ + \phi \sim p(\phi), \\ + \theta \sim \text{MultiNormal}(0, K(\phi)), \\ + y \sim p(y \mid \theta, \phi). +$$ +In this formulation, $y$ represents the +observed data, and $p(y \mid \theta, \phi)$ is the likelihood function that +specifies how observations are generated conditional on the latent variables +$\theta$ and hyperparameters $\phi$. +$\phi$ denotes the set of hyperparameters governing the model and +$p(\phi)$ is the prior distribution placed over these hyperparameters. +$K(\phi)$ denotes the prior covariance matrix for the latent Gaussian variables +$\theta$ and is parameterized by $\phi$. +The prior $p(\theta)$ is restricted to be a multivariate normal. +To sample from the joint posterior $p(\phi, \theta \mid y)$, we can either +use a standard method, such as Markov chain Monte Carlo, or we can follow +a two-step procedure: + +1. sample from the *marginal posterior* $p(\phi \mid y)$, +2. sample from the *conditional posterior* $p(\theta \mid y, \phi)$. + +In the above procedure, neither the marginal posterior nor the conditional posterior +are typically available in closed form and so they must be approximated. +The marginal posterior can be written as $p(\phi \mid y) \propto p(y \mid \phi) p(\phi)$, +where $p(y \mid \phi) = \int p(y \mid \phi, \theta) p(\theta) \text{d}\theta$ +is called the marginal likelihood. The Laplace method approximates +$p(y \mid \phi, \theta) p(\theta)$ with a normal distribution centered at +$$ + \theta^* = \underset{\theta}{\text{argmax}} \ \log p(\theta \mid y, \phi), +$$ +and $\theta^*$ is obtained using a numerical optimizer. +The resulting Gaussian integral can be evaluated analytically to obtain an +approximation to the log marginal likelihood +$\log \hat p(y \mid \phi) \approx \log p(y \mid \phi)$. +Specifically: +$$ + \hat p(y \mid \phi) = \frac{p(\theta^* \mid \phi) p(y \mid \theta^*, \phi)}{\hat p (\theta^* \mid \phi, y)}. +$$ + +Combining this marginal likelihood with the prior in the `model` +block, we can then sample from the marginal posterior $p(\phi \mid y)$ +using one of Stan's algorithms. The marginal posterior is lower +dimensional and likely to have a simpler geometry leading to more +efficient inference. On the other hand each marginal likelihood +computation is more costly, and the combined change in efficiency +depends on the case. + +To obtain posterior draws for $\theta$, we sample from the normal +approximation to $p(\theta \mid y, \phi)$ in `generated quantities`. +The process of iteratively sampling from $p(\phi \mid y)$ (say, with MCMC) and +then $p(\theta \mid y, \phi)$ produces samples from the joint posterior +$p(\theta, \phi \mid y)$. + +The Laplace approximation is especially useful if $p(\theta)$ is +multivariate normal and $p(y \mid \phi, \theta)$ is +log-concave. Stan's embedded Laplace approximation is restricted to the case +where the prior $p(\theta)$ is multivariate normal. +Furthermore, the likelihood $p(y \mid \phi, \theta)$ must be computed using +only operations which support higher-order derivatives +(see section [specifying the likelihood function](#laplace_likelihood_spec)). + +## Approximating the log marginal likelihood $\log p(y \mid \phi)$ + +In the `model` block, we increment `target` with `laplace_marginal`, a function +that approximates the log marginal likelihood $\log p(y \mid \phi)$. +The signature of the function is: + +\index{{\tt \bfseries laplace\_marginal\_tol }!{\tt (function likelihood\_function, tuple(...) likelihood\_arguments, vector theta\_init, function covariance\_function, tuple(...) covariance\_arguments): real}|hyperpage} + + + +`real` **`laplace_marginal`**`(function likelihood_function, tuple(...) likelihood_arguments, vector theta_init, function covariance_function, tuple(...) covariance_arguments)` + +Which returns an approximation to the log marginal likelihood $p(y \mid \phi)$. +{{< since 2.37 >}} + +This function takes in the following arguments. + +1. `likelihood_function` - user-specified log likelihood whose first argument is the vector of latent Gaussian variables `theta` +2. `likelihood_arguments` - A tuple of the log likelihood arguments whose internal members will be passed to the covariance function +3. `theta_init` - an initial guess for the optimization problem that underlies the Laplace approximation, +4. `covariance_function` - Prior covariance function +5. `covariance_arguments` A tuple of the arguments whose internal members will be passed to the the covariance function + +The size of $\theta_\text{init}$ must be consistent with the size of the $\theta$ argument +passed to `likelihood_function`. + +Below we go over each argument in more detail. + +## Specifying the log likelihood function {#laplace-likelihood_spec} + +The first step to use the embedded Laplace approximation is to write down a +function in the `functions` block which returns the log joint likelihood +$\log p(y \mid \theta, \phi)$. + +There are a few constraints on this function: + +1. The function return type must be `real` + +2. The first argument must be the latent Gaussian variable $\theta$ and must +have type `vector`. + +3. The operations in the function must support higher-order automatic +differentiation (AD). Most functions in Stan support higher-order AD. +The exceptions are functions with specialized calls for reverse-mode AD, and +these are higher-order functions (algebraic solvers, differential equation +solvers, and integrators), the marginalization function for hidden Markov +models (HMM) function, and the embedded Laplace approximation itself. + +The base signature of the function is + +```stan +real likelihood_function(vector theta, ...) +``` + +The `...` represents a set of optional variadic arguments. There is no type +restrictions for the variadic arguments `...` and each argument can be passed +as data or parameter. + +The tuple after `likelihood_function` contains the arguments that get passed +to `likelihood_function` *excluding $\theta$*. For instance, if a user defined +likelihood uses a real and a matrix the likelihood function's signature would +first have a vector and then a real and matrix argument. + +```stan +real likelihood_fun(vector theta, real a, matrix X) +``` + +The call to the laplace marginal would start with this likelihood and +tuple holding the other likelihood arguments. + +```stan +real val = laplace_marginal(likelihood_fun, (a, X), ...); +``` + +As always, users should use parameter arguments only when necessary in order to +speed up differentiation. +In general, we recommend marking data only arguments with the keyword `data`, +for example, + +```stan +real likelihood_function(vector theta, data vector x, ...) +``` + +## Specifying the covariance function + +The argument `covariance_function` returns the prior covariance matrix +$K$. The signature for this function is the same as a standard stan function. +It's return type must be a matrix of size $n \times n$ where $n$ is the size of $\theta$. + +```stan +matrix covariance_function(...) +``` + + + +The `...` represents a set of optional +variadic arguments. There is no type restrictions for the variadic arguments +`...` and each argument can be passed as data or parameter. The variables +$\phi$ is implicitly defined as the collection of all non-data arguments passed +to `likelihood_function` (excluding $\theta$) and `covariance_function`. + +The tuple after `covariance_function` contains the arguments that get passed +to `covariance_function`. For instance, if a user defined covariance function +uses two vectors +```stan +matrix cov_fun(real b, matrix Z) +``` +the call to the Laplace marginal would include the covariance function and +a tuple holding the covariance function arguments. + +```stan +real val = laplace_marginal(likelihood_fun, (a, X), theta_init, cov_fun, (b, Z), ...); +``` + +## Control parameters + +It also possible to specify control parameters, which can help improve the +optimization that underlies the Laplace approximation, using `laplace_marginal_tol` +with the following signature: + +\index{{\tt \bfseries laplace\_marginal\_tol }!{\tt (function likelihood\_function, tuple(...), vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} + + +\index{{\tt \bfseries laplace\_marginal\_tol }!{\tt (function likelihood\_function, tuple(...), vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} + +`real` **`laplace_marginal_tol`**`(function likelihood_function, tuple(...), vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +and allows the user to tune the control parameters of the approximation. + +* `tol`: the tolerance $\epsilon$ of the optimizer. Specifically, the optimizer +stops when $||\nabla \log p(\theta \mid y, \phi)|| \le \epsilon$. By default, +the value is $\epsilon = 10^{-6}$. + +* `max_num_steps`: the maximum number of steps taken by the optimizer before +it gives up (in which case the Metropolis proposal gets rejected). The default +is 100 steps. + +* `hessian_block_size`: the size of the blocks, assuming the Hessian +$\partial \log p(y \mid \theta, phi) \ \partial \theta$ is block-diagonal. +The structure of the Hessian is determined by the dependence structure of $y$ +on $\theta$. By default, the Hessian is treated as diagonal +(`hessian_block_size=1`). If the Hessian is not block diagonal, then set +`hessian_block_size=n`, where `n` is the size of $\theta$. + +* `solver`: choice of Newton solver. The optimizer used to compute the +Laplace approximation does one of three matrix decompositions to compute a +Newton step. The problem determines which decomposition is numerical stable. +By default (`solver=1`), the solver makes a Cholesky decomposition of the +negative Hessian, $- \partial \log p(y \mid \theta, \phi) / \partial \theta$. +If `solver=2`, the solver makes a Cholesky decomposition of the covariance +matrix $K(\phi)$. +If the Cholesky decomposition cannot be computed for neither the negative +Hessian nor the covariance matrix, use `solver=3` which uses a more expensive +but less specialized approach. + +* `max_steps_linesearch`: maximum number of steps in linesearch. The linesearch +method tries to insure that the Newton step leads to a decrease in the +objective function. If the Newton step does not improve the objective function, +the step is repeatedly halved until the objective function decreases or the +maximum number of steps in the linesearch is reached. By default, +`max_steps_linesearch=0`, meaning no linesearch is performed. + +{{< since 2.37 >}} + +## Sample from the approximate conditional $\hat{p}(\theta \mid y, \phi)$ + +In `generated quantities`, it is possible to sample from the Laplace +approximation of $p(\theta \mid \phi, y)$ using `laplace_latent_rng`. +The signature for `laplace_latent_rng` follows closely +the signature for `laplace_marginal`: + + +\index{{\tt \bfseries laplace\_latent\_rng }!{\tt (function likelihood\_function, tuple(...), vector theta\_init, function covariance\_function, tuple(...)): vector}|hyperpage} + +`vector` **`laplace_latent_rng`**`(function likelihood_function, tuple(...), vector theta_init, function covariance_function, tuple(...))`
\newline + +Draws approximate samples from the conditional posterior $p(\theta \mid y, \phi)$. +{{< since 2.37 >}} + +Once again, it is possible to specify control parameters: +\index{{\tt \bfseries laplace\_latent\_tol\_rng }!{\tt (function likelihood\_function, tuple(...), vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): vector}|hyperpage} + +`vector` **`laplace_latent_tol_rng`**`(function likelihood_function, tuple(...), vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline +Draws approximate samples from the conditional posterior $p(\theta \mid y, \phi)$ +and allows the user to tune the control parameters of the approximation. +{{< since 2.37 >}} + +## Built-in Laplace marginal likelihood functions + +Stan provides convenient wrappers for the embedded Laplace approximation +when applied to latent Gaussian models with certain likelihoods. +With this wrapper, the likelihood is pre-specified and does not need to be +specified by the user. +The selection of supported likelihoods is currently +narrow and expected to grow. The wrappers exist for the user's +convenience but are not more computationally efficient than specifying log +likelihoods in the `functions` block. + +### Poisson with log link + +Given count data, with each observed count $y_i$ associated with a group +$g(i)$ and a corresponding latent variable $\theta_{g(i)}$, and Poisson model, +the likelihood is +$$ +p(y \mid \theta, \phi) = \prod_i\text{Poisson} (y_i \mid \exp(\theta_{g(i)})). +$$ +The arguments required to compute this likelihood are: + +* `y`: an array of counts. +* `y_index`: an array whose $i^\text{th}$ element indicates to which +group the $i^\text{th}$ observation belongs to. + + +\index{{\tt \bfseries laplace\_marginal\_poisson\_log }!sampling statement|hyperpage} + +`y ~ ` **`laplace_marginal_poisson_log`**`(y_index, theta_init, covariance_function, (...))`
\newline + +Increment target log probability density with `laplace_marginal_poisson_log_lupmf(y | y_index, theta_init, covariance_function, (...))`. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_poisson\_log }!sampling statement|hyperpage} + +`y ~ ` **`laplace_marginal_tol_poisson_log`**`(y_index, theta_init, covariance_function, (...), tol, max_steps, hessian_block_size, solver, max_steps_linesearch)`
\newline + +Increment target log probability density with `laplace_marginal_poisson_log_lupmf(y | y_index, theta_init, covariance_function, (...))`. + +The signatures for the embedded Laplace approximation function with a Poisson +likelihood are + + +\index{{\tt \bfseries laplace\_marginal\_poisson\_log\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): real}|hyperpage} +`real` **`laplace_marginal_poisson_log_lpmf`**`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...))`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a Poisson +distribution with a log link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_poisson\_log\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} +`real` **`laplace_marginal_tol_poisson_log_lpmf`**`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a Poisson +distribution with a log link, and allows the user to tune the control +parameters of the approximation. +{{< since 2.37 >}} + + + +\index{{\tt \bfseries laplace\_marginal\_poisson\_log\_lupmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} +`real` **`laplace_marginal_poisson_log_lupmf`**`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a Poisson +distribution with a log link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_poisson\_log\_lupmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} + +`real` **`laplace_marginal_tol_poisson_log_lupmf`**`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a Poisson +distribution with a log link, and allows the user to tune the control +parameters of the approximation. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_latent\_poisson\_log\_rng }!{\tt (array[] int y, array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): vector}|hyperpage} +`vector` **`laplace_latent_poisson_log_rng`**`(array[] int y, array[] int y_index, vector theta_init, function covariance_function, tuple(...))`
\newline +Returns a draw from the Laplace approximation to the conditional posterior +$p(\theta \mid y, \phi)$ in the special case where the likelihood +$p(y \mid \theta)$ is a Poisson distribution with a log link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_latent\_tol\_poisson\_log\_rng }!{\tt (array[] int y, array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): vector}|hyperpage} + +`vector` **`laplace_latent_tol_poisson_log_rng`**`(array[] int y, array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns a draw from the Laplace approximation to the conditional posterior +$p(\theta \mid y, \phi)$ in the special case where the likelihood +$p(y \mid \theta)$ is a Poisson distribution with a log link +and allows the user to tune the control parameters of the approximation. +{{< since 2.37 >}} + +A similar built-in likelihood lets users specify an offset $x \in \mathbb R$ +with $x_i \ge 0$ to the rate parameter of the Poisson. This is equivalent to +specifying a prior mean $log(x_i)$ for $\theta_i$. The likelihood is then, +$$ +p(y \mid \theta, \phi) = \prod_i\text{Poisson} (y_i \mid \exp(\theta_{g(i)}) x_i). +$$ + + +\index{{\tt \bfseries laplace\_marginal\_poisson\_2\_log }!sampling statement|hyperpage} + +`y ~ ` **`laplace_marginal_poisson_2_log`**`(y_index, x, theta_init, covariance_function, (...))`
\newline + +Increment target log probability density with `laplace_marginal_poisson_2_log_lupmf(y | y_index, x, theta_init, covariance_function, (...))`. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_poisson\_2\_log }!sampling statement|hyperpage} + +`y ~ ` **`laplace_marginal_tol_poisson_2_log`**`(y_index, x, theta_init, covariance_function, (...), tol, max_steps, hessian_block_size, solver, max_steps_linesearch)`
\newline +Increment target log probability density with `laplace_marginal_tol_poisson_2_log_lupmf(y | y_index, x, theta_init, covariance_function, (...), tol, max_steps, hessian_block_size, solver, max_steps_linesearch)`. +{{< since 2.37 >}} + +The signatures for this function are: + + +\index{{\tt \bfseries laplace\_marginal\_poisson\_2\_log\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector x, vector theta\_init, function covariance\_function, tuple(...)): real}|hyperpage} +`real` **`laplace_marginal_poisson_2_log_lpmf`**`(array[] int y | array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...))`
\newline +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a Poisson +distribution with a log link and an offset. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_poisson\_2\_log\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector x, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} + +`real` **`laplace_marginal_tol_poisson_2_log_lpmf`**`(array[] int y | array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a Poisson +distribution with a log link and an offset +and allows the user to tune the control parameters of the approximation. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_poisson\_2\_log\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector x, vector theta\_init, function covariance\_function, tuple(...): real}|hyperpage} +`real` **`laplace_marginal_poisson_2_log_lpmf`**`(array[] int y | array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a Poisson +distribution with a log link and an offset. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_poisson\_2\_log\_lupmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector x, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch)): real}|hyperpage} + +`real` **`laplace_marginal_tol_poisson_2_log_lupmf`**`(array[] int y | array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a Poisson +distribution with a log link and an offset +and allows the user to tune the control parameters of the approximation. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_latent\_poisson\_2\_log\_rng }!{\tt (array[] int y, array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): vector}|hyperpage} + +`vector` **`laplace_latent_poisson_2_log_rng`**`(array[] int y, array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...))`
\newline + +Returns a draw from the Laplace approximation to the conditional posterior +$p(\theta \mid y, \phi)$ in the special case where the likelihood +$p(y \mid \theta)$ is a Poisson distribution with a log link and an offset. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_latent\_tol\_poisson\_2\_log\_rng }!{\tt (array[] int y, array[] int y\_index, vector x, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): vector}|hyperpage} + +`vector` **`laplace_latent_tol_poisson_2_log_rng`**`(array[] int y, array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns a draw from the Laplace approximation to the conditional posterior +$p(\theta \mid y, \phi)$ in the special case where the likelihood +$p(y \mid \theta)$ is a Poisson distribution with a log link and an offset, +and allows the user to tune the control parameters of the approximation. +{{< since 2.37 >}} + + +### Negative Binomial with log link + +The negative Binomial distribution generalizes the Poisson distribution by +introducing the dispersion parameter $\eta$. The corresponding likelihood is then +$$ +p(y \mid \theta, \phi) = \prod_i\text{NegBinomial2} (y_i \mid \exp(\theta_{g(i)}), \eta). +$$ +Here we use the alternative parameterization implemented in Stan, meaning that +$$ +\mathbb E(y_i) = \exp (\theta_{g(i)}), \\ +\text{Var}(y_i) = \mathbb E(y_i) + \frac{(\mathbb E(y_i))^2}{\eta}. +$$ +The arguments for the likelihood function are: + +* `y`: the observed counts +* `y_index`: an array whose $i^\text{th}$ element indicates to which +group the $i^\text{th}$ observation belongs to. +* `eta`: the overdispersion parameter. + + +\index{{\tt \bfseries laplace\_marginal\_neg\_binomial\_2\_log }!sampling statement|hyperpage} + +`y ~ ` **`laplace_marginal_neg_binomial_2_log`**`(y_index, eta, theta_init, covariance_function, (...))`
\newline + +Increment target log probability density with `laplace_marginal_neg_binomial_2_log_lupmf(y | y_index, eta, theta_init, covariance_function, (...))`. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_neg\_binomial\_2\_log }!sampling statement|hyperpage} + +`y ~ ` **`laplace_marginal_tol_neg_binomial_2_log`**`(y_index, eta, theta_init, covariance_function, (...), tol, max_steps, hessian_block_size, solver, max_steps_linesearch)`
\newline + +Increment target log probability density with `laplace_marginal_tol_neg_binomial_2_log_lupmf(y | y_index, eta, theta_init, covariance_function, (...), tol, max_steps, hessian_block_size, solver, max_steps_linesearch)`. +{{< since 2.37 >}} + + +The function signatures for the embedded Laplace approximation with a negative +Binomial likelihood are + + +\index{{\tt \bfseries laplace\_marginal\_neg\_binomial\_2\_log\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): real}|hyperpage} + +`real` **`laplace_marginal_neg_binomial_2_log_lpmf`**`(array[] int y | array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...))`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi, \eta)$ +in the special case where the likelihood $p(y \mid \theta, \eta)$ is a Negative +Binomial distribution with a log link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_neg\_binomial\_2\_log\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} + +`real` **`laplace_marginal_tol_neg_binomial_2_log_lpmf`**`(array[] int y | array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi, \eta)$ +in the special case where the likelihood $p(y \mid \theta, \eta)$ is a Negative +Binomial distribution with a log link, and allows the user to tune the control +parameters of the approximation. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_neg\_binomial\_2\_log\_lupmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): real}|hyperpage} + +`real` **`laplace_marginal_neg_binomial_2_log_lupmf`**`(array[] int y | array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...))`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi, \eta)$ +in the special case where the likelihood $p(y \mid \theta, \eta)$ is a Negative +Binomial distribution with a log link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_neg\_binomial\_2\_log\_lupmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} + +`real` **`laplace_marginal_tol_neg_binomial_2_log_lupmf`**`(array[] int y | array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi, \eta)$ +in the special case where the likelihood $p(y \mid \theta, \eta)$ is a Negative +Binomial distribution with a log link, and allows the user to tune the control +parameters of the approximation. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_latent\_neg\_binomial\_2\_log\_rng }!{\tt (array[] int y, array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): vector}|hyperpage} + +`vector` **`laplace_latent_neg_binomial_2_log_rng`**`(array[] int y, array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...))`
\newline + +Returns a draw from the Laplace approximation to the conditional posterior +$p(\theta \mid y, \phi, \eta)$ in the special case where the likelihood +$p(y \mid \theta, \eta)$ is a Negative binomial distribution with a log link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_latent\_tol\_neg\_binomial\_2\_log\_rng }!{\tt (array[] int y, array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): vector}|hyperpage} + +`vector` **`laplace_latent_tol_neg_binomial_2_log_rng`**`(array[] int y, array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns a draw from the Laplace approximation to the conditional posterior +$p(\theta \mid y, \phi, \eta)$ in the special case where the likelihood +$p(y \mid \theta, \eta)$ is a Negative binomial distribution with a log link +and allows the user to tune the control parameters of the approximation. +{{< since 2.37 >}} + +### Bernoulli with logit link + +Given binary outcome $y_i \in \{0, 1\}$ and Bernoulli model, the likelihood is +$$ +p(y \mid \theta, \phi) = \prod_i\text{Bernoulli} (y_i \mid \text{logit}^{-1}(\theta_{g(i)})). +$$ +The arguments of the likelihood function are: + +* `y`: the observed counts +* `y_index`: an array whose $i^\text{th}$ element indicates to which +group the $i^\text{th}$ observation belongs to. + + +\index{{\tt \bfseries laplace\_marginal\_bernoulli\_logit }!sampling statement|hyperpage} + +`y ~ ` **`laplace_marginal_bernoulli_logit`**`(y_index, theta_init, covariance_function, (...))`
\newline + +Increment target log probability density with `laplace_marginal_bernoulli_logit_lupmf(y | y_index, theta_init, covariance_function, (...))`. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_bernoulli\_logit }!sampling statement|hyperpage} + +`y ~ ` **`laplace_marginal_tol_bernoulli_logit`**`(y_index, theta_init, covariance_function, (...), tol, max_steps, hessian_block_size, solver, max_steps_linesearch)`
\newline + +Increment target log probability density with `laplace_marginal_tol_bernoulli_logit_lupmf(y | y_index, theta_init, covariance_function, (...), tol, max_steps, hessian_block_size, solver, max_steps_linesearch)`. +{{< since 2.37 >}} + + +The function signatures for the embedded Laplace approximation with a Bernoulli likelihood are + + +\index{{\tt \bfseries laplace\_marginal\_bernoulli\_logit\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): real}|hyperpage} + +`real` **`laplace_marginal_bernoulli_logit_lpmf`**`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...))`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a bernoulli +distribution with a logit link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_bernoulli\_logit\_lpmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} + +`real` **`laplace_marginal_tol_bernoulli_logit_lpmf`**`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a bernoulli +distribution with a logit link and allows the user to tune the control parameters. +{{< since 2.37 >}} + + + +\index{{\tt \bfseries laplace\_marginal\_bernoulli\_logit\_lupmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): real}|hyperpage} + +`real` **`laplace_marginal_bernoulli_logit_lupmf`**`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...))`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a bernoulli +distribution with a logit link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_marginal\_tol\_bernoulli\_logit\_lupmf }!{\tt (array[] int y \textbar\ array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): real}|hyperpage} + +`real` **`laplace_marginal_tol_bernoulli_logit_lupmf`**`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns an approximation to the log marginal likelihood $p(y \mid \phi)$ +in the special case where the likelihood $p(y \mid \theta)$ is a bernoulli +distribution with a logit link and allows the user to tune the control parameters. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_latent\_bernoulli\_logit\_rng }!{\tt (array[] int y, array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...)): vector}|hyperpage} + +`vector` **`laplace_latent_bernoulli_logit_rng`**`(array[] int y, array[] int y_index, vector theta_init, function covariance_function, tuple(...))`
\newline + +Returns a draw from the Laplace approximation to the conditional posterior +$p(\theta \mid y, \phi)$ in the special case where the likelihood +$p(y \mid \theta)$ is a Bernoulli distribution with a logit link. +{{< since 2.37 >}} + + +\index{{\tt \bfseries laplace\_latent\_tol\_bernoulli\_logit\_rng }!{\tt (array[] int y, array[] int y\_index, vector theta\_init, function covariance\_function, tuple(...), real tol, int max\_steps, int hessian\_block\_size, int solver, int max\_steps\_linesearch): vector}|hyperpage} + +`vector` **`laplace_latent_tol_bernoulli_logit_rng`**`(array[] int y, array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch)`
\newline + +Returns a draw from the Laplace approximation to the conditional posterior +$p(\theta \mid y, \phi)$ in the special case where the likelihood +$p(y \mid \theta)$ is a Bernoulli distribution with a logit link, +and lets the user tune the control parameters of the approximation. +{{< since 2.37 >}} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/functions-reference/functions_index.qmd b/src/functions-reference/functions_index.qmd index 8bf2b8ad3..77ac2441b 100644 --- a/src/functions-reference/functions_index.qmd +++ b/src/functions-reference/functions_index.qmd @@ -1621,6 +1621,181 @@ pagetitle: Alphabetical Index -
[`(T x) : R`](real-valued_basic_functions.qmd#index-entry-ab5ca07ba7ba53020939ca3ba7c6e64ccf05cf19) (real-valued_basic_functions.html)
+**laplace_latent_bernoulli_logit_rng**: + + -
[`(array[] int y, array[] int y_index, vector theta_init, function covariance_function, tuple(...)) : vector`](embedded_laplace.qmd#index-entry-1fc637cfa219f5661eaf691aed6979aa11715e42) (embedded_laplace.html)
+ + +**laplace_latent_neg_binomial_2_log_rng**: + + -
[`(array[] int y, array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...)) : vector`](embedded_laplace.qmd#index-entry-cdf01a782863e45a083e48b4bd247071d9a4de50) (embedded_laplace.html)
+ + +**laplace_latent_poisson_2_log_rng**: + + -
[`(array[] int y, array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...)) : vector`](embedded_laplace.qmd#index-entry-51913d568f11fc64a9f166ff680e92b7943b85bc) (embedded_laplace.html)
+ + +**laplace_latent_poisson_log_rng**: + + -
[`(array[] int y, array[] int y_index, vector theta_init, function covariance_function, tuple(...)) : vector`](embedded_laplace.qmd#index-entry-de9bf8cc1a51693f3e9a1a49dca129ae45c868b9) (embedded_laplace.html)
+ + +**laplace_latent_rng**: + + -
[`(function likelihood_function, tuple(...), vector theta_init, function covariance_function, tuple(...)) : vector`](embedded_laplace.qmd#index-entry-e736fd996b0dd5ec34cc3a31e68ae93361bcfe4c) (embedded_laplace.html)
+ + +**laplace_latent_tol_bernoulli_logit_rng**: + + -
[`(array[] int y, array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : vector`](embedded_laplace.qmd#index-entry-929b023be5b575bb12c7568d48c4292d68484e4b) (embedded_laplace.html)
+ + +**laplace_latent_tol_neg_binomial_2_log_rng**: + + -
[`(array[] int y, array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : vector`](embedded_laplace.qmd#index-entry-5549a3af52023a92a370b327107898233c546f09) (embedded_laplace.html)
+ + +**laplace_latent_tol_poisson_2_log_rng**: + + -
[`(array[] int y, array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : vector`](embedded_laplace.qmd#index-entry-b65fcddeadb2a0edba8bda048961a188a3c02e67) (embedded_laplace.html)
+ + +**laplace_latent_tol_poisson_log_rng**: + + -
[`(array[] int y, array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : vector`](embedded_laplace.qmd#index-entry-2660b79c1a17a7def5edb58129a847511818959d) (embedded_laplace.html)
+ + +**laplace_marginal**: + + -
[`(function likelihood_function, tuple(...) likelihood_arguments, vector theta_init, function covariance_function, tuple(...) covariance_arguments) : real`](embedded_laplace.qmd#index-entry-95e1c296ee63b312ffa7ef98140792e7f3fadeac) (embedded_laplace.html)
+ + +**laplace_marginal_bernoulli_logit**: + + -
[distribution statement](embedded_laplace.qmd#index-entry-1d93ab799518d0aac88e63c01f9655f36c7cbeb6) (embedded_laplace.html)
+ + +**laplace_marginal_bernoulli_logit_lpmf**: + + -
[`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...)) : real`](embedded_laplace.qmd#index-entry-f842c1af3fe63a0220e54721301ff2e9ffa6cd52) (embedded_laplace.html)
+ + +**laplace_marginal_bernoulli_logit_lupmf**: + + -
[`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...)) : real`](embedded_laplace.qmd#index-entry-31c6ccea0ce342412ce6c8b0d31e29eafaa91f5c) (embedded_laplace.html)
+ + +**laplace_marginal_neg_binomial_2_log**: + + -
[distribution statement](embedded_laplace.qmd#index-entry-7f807083cfb7347ece100da5dfe719186a678d02) (embedded_laplace.html)
+ + +**laplace_marginal_neg_binomial_2_log_lpmf**: + + -
[`(array[] int y | array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...)) : real`](embedded_laplace.qmd#index-entry-221578be4523a20b22e6a523ba6457be9b21f792) (embedded_laplace.html)
+ + +**laplace_marginal_neg_binomial_2_log_lupmf**: + + -
[`(array[] int y | array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...)) : real`](embedded_laplace.qmd#index-entry-720cc42c0aa5a63ef214bcb27374e2ceabd37455) (embedded_laplace.html)
+ + +**laplace_marginal_poisson_2_log**: + + -
[distribution statement](embedded_laplace.qmd#index-entry-47b6396bce572a75c537fe2902d7be0f80b7af4e) (embedded_laplace.html)
+ + +**laplace_marginal_poisson_2_log_lpmf**: + + -
[`(array[] int y | array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...)) : real`](embedded_laplace.qmd#index-entry-6d26f0abe1ac0de87b82f12b3ed92195ef8cd7f8) (embedded_laplace.html)
+ + +**laplace_marginal_poisson_2_log_lupmf**: + + -
[`(array[] int y | array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...)) : real`](embedded_laplace.qmd#index-entry-01db5858ff57fce301d7f58e3dcb5cac55998091) (embedded_laplace.html)
+ + +**laplace_marginal_poisson_log**: + + -
[distribution statement](embedded_laplace.qmd#index-entry-b2c6946b65561039eb8bf3999fb59d38a6336e10) (embedded_laplace.html)
+ + +**laplace_marginal_poisson_log_lpmf**: + + -
[`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...)) : real`](embedded_laplace.qmd#index-entry-9b336691c420ff95c5a2c48f78e69c8e605224f6) (embedded_laplace.html)
+ + +**laplace_marginal_poisson_log_lupmf**: + + -
[`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...)) : real`](embedded_laplace.qmd#index-entry-e7c7252606cc5d8b1f77617eef2af52d21642250) (embedded_laplace.html)
+ + +**laplace_marginal_tol**: + + -
[`(function likelihood_function, tuple(...), vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-88d77a8692d68016d68f400d2a2541259bcf24a2) (embedded_laplace.html)
+ + +**laplace_marginal_tol_bernoulli_logit**: + + -
[distribution statement](embedded_laplace.qmd#index-entry-92d43f7c3643c7d85966bbfc0b2a71546facb37c) (embedded_laplace.html)
+ + +**laplace_marginal_tol_bernoulli_logit_lpmf**: + + -
[`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-98481a032cb5ad2b100b7db23109d3f4f70e0af9) (embedded_laplace.html)
+ + +**laplace_marginal_tol_bernoulli_logit_lupmf**: + + -
[`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-4b2493e6cf1e09674de57a4f42b3334b42d02136) (embedded_laplace.html)
+ + +**laplace_marginal_tol_neg_binomial_2_log**: + + -
[distribution statement](embedded_laplace.qmd#index-entry-40016f7d5d96b9d999ca42f446720cf3f3cd5769) (embedded_laplace.html)
+ + +**laplace_marginal_tol_neg_binomial_2_log_lpmf**: + + -
[`(array[] int y | array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-c31e9b3864a282b96f30819d5a39b4995a9eb911) (embedded_laplace.html)
+ + +**laplace_marginal_tol_neg_binomial_2_log_lupmf**: + + -
[`(array[] int y | array[] int y_index, real eta, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-17764e1afa12272c00795a785590169d0d70fda4) (embedded_laplace.html)
+ + +**laplace_marginal_tol_poisson_2_log**: + + -
[distribution statement](embedded_laplace.qmd#index-entry-257d3af0df49c240293dc3a2d4f8cac109dd54d1) (embedded_laplace.html)
+ + +**laplace_marginal_tol_poisson_2_log_lpmf**: + + -
[`(array[] int y | array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-603a63e43b3518251e5f402a738efe4906015c36) (embedded_laplace.html)
+ + +**laplace_marginal_tol_poisson_2_log_lupmf**: + + -
[`(array[] int y | array[] int y_index, vector x, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-e3eca950639aed9276dd8c0e880d3982d7a6c642) (embedded_laplace.html)
+ + +**laplace_marginal_tol_poisson_log**: + + -
[distribution statement](embedded_laplace.qmd#index-entry-4a7ecca812a308ea648bb31996559cba79738d83) (embedded_laplace.html)
+ + +**laplace_marginal_tol_poisson_log_lpmf**: + + -
[`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-b9f92ff7606b590d41fd9c33e480439337aae673) (embedded_laplace.html)
+ + +**laplace_marginal_tol_poisson_log_lupmf**: + + -
[`(array[] int y | array[] int y_index, vector theta_init, function covariance_function, tuple(...), real tol, int max_steps, int hessian_block_size, int solver, int max_steps_linesearch) : real`](embedded_laplace.qmd#index-entry-42b965299d48dc219eaed0a64fec07d8d070d47b) (embedded_laplace.html)
+ + **lbeta**: -
[`(real alpha, real beta) : real`](real-valued_basic_functions.qmd#index-entry-def48992e0f8724381904fba78466b0f4a0d14bd) (real-valued_basic_functions.html)
diff --git a/src/reference-manual/_quarto.yml b/src/reference-manual/_quarto.yml index 889680338..1ef1b59fc 100644 --- a/src/reference-manual/_quarto.yml +++ b/src/reference-manual/_quarto.yml @@ -59,6 +59,7 @@ book: - pathfinder.qmd - variational.qmd - laplace.qmd + - laplace_embedded.qmd - diagnostics.qmd - part: "Usage" diff --git a/src/reference-manual/laplace_embedded.qmd b/src/reference-manual/laplace_embedded.qmd new file mode 100644 index 000000000..7a9dda324 --- /dev/null +++ b/src/reference-manual/laplace_embedded.qmd @@ -0,0 +1,174 @@ +--- +pagetitle: Embedded Laplace Approximation +--- + +# Embedded Laplace Approximation + +Stan provides functions to perform an embedded Laplace +approximation for latent Gaussian models. Bearing a slight abuse of language, +this is sometimes known as an integrated or nested Laplace approximation. +Details of Stan's implementation can be found in references +[@Margossian:2020] and [@Margossian:2023]. + +A standard approach to fit a latent Gaussian model would be to perform inference +jointly over the latent Gaussian variables and the hyperparameters. +Instead, the embedded Laplace approximation can be used to do *approximate* +marginalization of the latent Gaussian variables; we can then +use any inference over the remaining hyperparameters, for example Hamiltonian +Monte Carlo sampling. + +Formally, consider a latent Gaussian model, +\begin{eqnarray*} + \phi & \sim & p(\phi) \\ + \theta & \sim & \text{Multi-Normal}(0, K(\phi)) \\ + y & \sim & p(y \mid \theta, \phi). +\end{eqnarray*} +The motivation for marginalization is to bypass the challenging geometry of the joint +posterior $p(\phi, \theta \mid y)$. This geometry (e.g. funnels) often frustrates +inference algorithms, including Hamiltonian Monte Carlo sampling and approximate +methods such as variational inference. On the other hand, the marginal posterior +$p(\phi \mid y)$ is often well-behaved and in many cases low-dimensional. +Furthermore, the conditional posterior $p(\theta \mid \phi, y)$ can be well +approximated by a normal distribution, if the likelihood $p(y \mid \theta, \phi)$ +is log concave. + + +## Approximation of the conditional posterior and marginal likelihood + +The Laplace approximation is the normal distribution that matches the mode +and curvature of the conditional posterior $p(\theta \mid y, \phi)$. +The mode, +$$ + \theta^* = \underset{\theta}{\text{argmax}} \ p(\theta \mid y, \phi), +$$ +is estimated by a Newton solver. Since the approximation is normal, +the curvature is matched by setting the covariance to the negative Hessian +of the log conditional posterior, evaluated at the mode, +$$ + \Sigma^* = - \left . \frac{\partial^2}{\partial \theta^2} + \log p (\theta \mid \phi, y) \right |_{\theta =\theta^*}. +$$ +The resulting Laplace approximation is then, +$$ +\hat p_\mathcal{L} (\theta \mid y, \phi) = \text{Multi-Normal}(\theta^*, \Sigma^*) +\approx p(\theta \mid y, \phi). +$$ +This approximation implies another approximation for the marginal likelihood, +$$ + \hat p_\mathcal{L}(y \mid \phi) := \frac{p(\theta^* \mid \phi) \ + p(y \mid \theta^*, \phi) }{ \hat p_\mathcal{L} (\theta^* \mid \phi, y) } + \approx p(y \mid \phi). +$$ +Hence, a strategy to approximate the posterior of the latent Gaussian model +is to first estimate the marginal posterior +$\hat p_\mathcal{L}(\phi \mid y) \propto p(\phi) p_\mathcal{L} (y \mid \phi)$ +using any algorithm supported by Stan. +Approximate posterior draws for the latent Gaussian variables are then +obtained by first drawing $\phi \sim \hat p_\mathcal{L}(\phi \mid y)$ and +then $\theta \sim hat p_\mathcal{L}(\theta \mid \phi, y)$. + + +## Trade-offs of the approximation + +The embedded Laplace approximation presents several trade-offs with standard +inference over the joint posterior $p(\theta, \phi \mid y)$. The main +advantage of the embedded Laplace approximation is that it side-steps the +intricate geometry of hierarchical models. The marginal posterior +$p(\phi \mid y)$ can then be handled by Hamiltonian Monte Carlo sampling +without extensive tuning or reparameterization, and the mixing time is faster, +meaning we can run shorter chains to achieve a desired precision. In some cases, +approximate methods, e.g. variational inference, which +work poorly on the joint $p(\theta, \phi \mid y)$ work well on the marginal +posterior $p(\phi \mid y)$. + +On the other hand, the embedded Laplace approximation presents certain +disadvantages. First, we need to perform a Laplace approximation each time +the log marginal likelihood is evaluated, meaning each iteration +can be expensive. Secondly, the approximation can introduce non-negligible +error, especially with non-conventional likelihoods (note the prior +is always multivariate normal). How these trade-offs are resolved depends on the application; see [@Margossian:2020] for some examples. + + +## Details of the approximation + +### Tuning the Newton solver + +A critical component of the embedded Laplace approximation is the Newton solver +used to estimate the mode $\theta^*$ of $p(\theta \mid \phi, y)$. The objective +function being maximized is +$$ +\Psi(\theta) = \log p(\theta \mid \phi) + \log p(y \mid \theta, \phi), +$$ +and convergence is declared if the change in the objective is sufficiently +small between two iterations +$$ +| \Psi (\theta^{(i + 1)}) - \Psi (\theta^{(i)}) | \le \Delta, +$$ +for some *tolerance* $\Delta$. The solver also stops after reaching a +pre-specified *maximum number of steps*: in that case, Stan throws an exception +and rejects the current proposal. This is not a problem, as +long as these exceptions are rare and confined to early phases of the warmup. + +The Newton iteration can be augmented with a linesearch step to insure that +at each iteration the objective function $\Psi$ decreases. Specifically, +suppose that +$$ +\Psi (\theta^{(i + 1)}) < \Psi (\theta^{(i)}). +$$ +This can indicate that the Newton step is too large and that we skipped a region +where the objective function decreases. In that case, we can reduce the step +length by a factor of 2, using +$$ + \theta^{(i + 1)} \leftarrow \frac{\theta^{(i + 1)} + \theta^{(i)}}{2}. +$$ +We repeat this halving of steps until +$\Psi (\theta^{(i + 1)}) \ge \Psi (\theta^{(i)})$, or until a maximum number +of linesearch steps is reached. By default, this maximum is set to 0, which +means the Newton solver performs no linesearch. For certain problems, adding +a linsearch can make the optimization more stable. + + +The embedded Laplace approximation uses a custom Newton solver,specialized +to find the mode of $p(\theta \mid \phi, y)$. +A keystep for efficient optimization is to insure all matrix inversions are +numerically stable. This can be done using the Woodburry-Sherman-Morrison +formula and requires one of three matrix decompositions: + +1. Cholesky decomposition of the Hessian of the negative log likelihood +$W = - \partial^2_\theta \log p(y \mid \theta, \phi)$ + +2. Cholesky decomposition of the prior covariance matrix $K(\phi)$. + +3. LU-decomposition of $I + KW$, where $I$ is the identity matrix. + +The first solver (1) should be used if the negative log likelihood is +positive-definite. Otherwise the user should rely on (2). In rarer cases where +it is not numerically safe to invert the covariance matrix $K$, users can +use the third solver as a last-resort option. + + +### Sparse Hessian of the log likelihood + +A key step to speed up computation is to take advantage of the sparsity of +the Hessian of the log likelihood, +$$ + H = \frac{\partial^2}{\partial \theta^2} \log p(y \mid \theta, \phi). +$$ +For example, if the observations $(y_1, \cdots, y_N)$ are conditionally +independent and each depends on only depend on one component of $\theta$, +such that +$$ + \log p(y \mid \theta, \phi) = \sum_{i = 1}^N \log p(y_i \mid \theta_i, \phi), +$$ +then the Hessian is diagonal. This leads to faster calculations of the Hessian +and subsequently sparse matrix operations. This case is common in Gaussian +process models, and certain hierarchical models. + +Stan's suite of functions for the embedded Laplace approximation are not +equipped to handle arbitrary sparsity structures; instead, they work on +block-diagonal Hessians, and the user can specify the size $B$ of these blocks. +The user is responsible for working out what $B$ is. If the Hessian is dense, +then we simply set $B = N$. + +NOTE: currently, there is no support for sparse prior covariance matrix. +We expect this to be supported in future versions of Stan. diff --git a/src/stan-users-guide/gaussian-processes.qmd b/src/stan-users-guide/gaussian-processes.qmd index e7acf9a84..6ad62f387 100644 --- a/src/stan-users-guide/gaussian-processes.qmd +++ b/src/stan-users-guide/gaussian-processes.qmd @@ -28,7 +28,7 @@ functions drawn from the process. Gaussian processes can be encoded in Stan by implementing their mean and covariance functions or by using the specialized covariance functions -outlined below, and plugging the result into the Gaussian model. +outlined below, and plugging the result into the Gaussian model. This form of model is straightforward and may be used for simulation, model fitting, or posterior predictive inference. A more efficient Stan implementation for the GP with a normally distributed outcome marginalizes @@ -486,8 +486,173 @@ model { } ``` +#### Poisson GP using an embedded Laplace approximation {-} + +For computational reasons, we may want to integrate out the Gaussian process +$f$, as was done in the normal output model. Unfortunately, exact +marginalization over $f$ is not possible when the outcome model is not normal. +Instead, we may perform *approximate* marginalization with an *embedded +Laplace approximation* [@Rue:2009; @Margossian:2020]. +To do so, we first use the function `laplace_marginal` to approximate the marginal +likelihood $p(y \mid \rho, \alpha, a)$ and sample the +hyperparameters with Hamiltonian Monte Carlo sampling. Then, we recover the +integrated out $f$ in the `generated quantities` block using +`laplace_latent_rng`. + +The embedded Laplace approximation computes a Gaussian approximation of the +conditional posterior, +$$ + \hat p_\mathcal{L}(f \mid \rho, \alpha, a, y) \approx p(f \mid \rho, \alpha, a, y), +$$ +where $\hat p_\mathcal{L}$ is a Gaussian that matches the mode and curvature +of $p(f \mid \rho, \alpha, a, y)$. We then obtain an approximation of +the marginal likelihood as follows: +$$ + \hat p_\mathcal{L}(y \mid \rho, \alpha, a) + = \frac{p(f^* \mid \alpha, \rho) p(y \mid f^*, a)}{ + \hat p_\mathcal{L}(f \mid \rho, \alpha, a, y)}, +$$ +where $f^*$ is the mode of $p(f \mid \rho, \alpha, a, y)$, obtained via +numerical optimization. + +To use Stan's embedded Laplace approximation, we must define the prior covariance +function and the log likelihood function in the functions block. +```stan +functions { + // log likelihood function + real ll_function(vector f, real a, array[] int y) { + return poisson_log_lpmf(y | a + f); + } + + // covariance function + matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) { + matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho); + return add_diag(K, delta) + } + +} +``` + +Furthermore, we must specify an initial value $f_\text{init}$ for the +numerical optimizer that underlies the Laplace approximation. In our experience, +we have found setting all values to 0 to be a good default. -#### Logistic Gaussian process regression {-} +```stan +transformed data { + vector[N] f_init = rep_vector(0, N); +} +``` + +We then increment `target` in the model block with the approximation to +$\log p(y \mid \rho, \alpha, a)$. +```stan +model { + rho ~ inv_gamma(5, 5); + alpha ~ std_normal(); + sigma ~ std_normal(); + + target += laplace_marginal(ll_function, (a, y), f_init, + cov_function, (rho, alpha, x, N, delta)); +} +``` +Notice that we do not need to construct $f$ explicitly, since it is +marginalized out. Instead, we recover the GP function in `generated quantities`: +```stan +generated quantities { + vector[N] f = laplace_latent_rng(ll_function, (a, y), f_init, + cov_function, (rho, alpha, x, N, delta)); +} +``` + +Users can set the control parameters of the embedded Laplace approximation, +via `laplace_marginal_tol` and `laplace_latent_tol_rng`. When using these +functions, the user must set *all* the control parameters. +```stan +transformed data { +// ... + + real tol = 1e-6; // optimizer's tolerance for Laplace approx. + int max_num_steps = 1e3; // maximum number of steps for optimizer. + int hessian_block_size = 1; // when hessian of log likelihood is block + // diagonal, size of block (here 1). + int solver = 1; // which Newton optimizer to use; default is 1, + // use 2 and 3 only for special cases. + max_steps_linesearch = 0; // if >= 1, optimizer does a lineseach with + // specified number of steps. +} + +// ... + +model { +// ... + + target += laplace_marginal(ll_function, (a, y), f_init, + cov_function, (rho, alpha, x, N, delta), + tol, max_num_steps, hessian_block_size, + solver, max_steps_linesearch); +} + +generated quantities { + vector[N] f = laplace_latent_rng(ll_function, (a, y), f_init, + cov_function, (rho, alpha, x, N, delta), + tol, max_num_steps, hessian_block_size, + solver, max_steps_linesearch); +} + +``` +For details about the control parameters, see @Margossian:2023. + + +Stan also provides support for a limited menu of built-in functions, including +the Poisson distribution with a log link and and prior mean $a$. When using such +a built-in function, the user does not need to specify a likelihood in the +`functions` block. However, the user must strictly follow the signature of the +likelihood: in this case, $a$ must be a vector of length $N$ (to allow for +different offsets for each observation $y_i$) and we must indicate which +element of $f$ each component of $y$ matches using the variable $y_\text{index}$. +In our example, there is a simple pairing $(y_i, f_i)$, however we could imagine +a scenario where multiple observations $(y_{j1}, y_{j2}, ...)$ are observed +for a single $f_j$. + +```stan +transformed data { + // ... + array[n_obs] int y_index; + for (i in 1:n_obs) y_index[i] = i - 1; +} + +// ... + +transformed parameter { + vector[N] a_vec = rep_vector(a, N); +} + +model { + // ... + target += laplace_marginal_poisson_2_log_lpmf(y | y_index, a_vec, f_init, + cov_function, (rho, alpha, x, N, delta)); +} + +generated quantities { + vector[N] f = laplace_latent_poisson_2_log_rng(y, y_index, a_vec, f_init, + cov_function, (rho, alpha, x, N, delta)); +} + +``` + +As before, we could specify the control parameters for the embedded Laplace +approximation using `laplace_marginal_tol_poisson_2_log_lpmf` and +`laplace_latent_tol_poisson_2_log_nrg`. + +Marginalization with a Laplace approximation can lead to faster inference, +however it also introduces an approximation error. In practice, this error +is negligible when using a Poisson likelihood and the approximation works well +for log concave likelihoods [@Kuss:2005; @Vanhatalo:2010; @Cseke:2011; +@Vehtari:2016]. +Still, users should exercise caution, especially +when trying unconventional likelihoods. + +#### Logistic GP regression {-} For binary classification problems, the observed outputs $z_n \in \{ 0,1 \}$ are binary. These outputs are modeled using a Gaussian @@ -514,10 +679,47 @@ data { // ... model { // ... - y ~ bernoulli_logit(a + f); + z ~ bernoulli_logit(a + f); } ``` +#### Logistic GP regression with an embedded Laplace approximation {-} + +As with the Poisson GP, we cannot marginalize the GP function exactly, +however we can resort to an embedded Laplace approximation. + +```stan +functions { + // log likelihood function + real ll_function(vector f, real a, array[] int z) { + return bernoulli_logit_lpmf(z | a + f); + } + + // covariance function + matrix cov_function(real rho, real alpha, array[] real x, int N, real delta) { + matrix[N, N] K = gp_exp_quad_cov(x, alpha, rho); + return add_diag(K, delta) + } +} + +// ... + +model { + target += laplace_marginal(ll_function, (a, z), f_init, + cov_function, (rho, alpha, x, N, delta)); +} + +generated quantities { + vector[N] f = laplace_latent_rng(ll_function, (a, z), f_init, + cov_function, (rho, alpha, x, N, delta)); +} +``` + +While marginalization with a Laplace approximation can lead to faster inference, +it also introduces an approximation error. In practice, this error may not be +negligible with a Bernoulli likelihood; for more discussion see, e.g. +[@Vehtari:2016; @Margossian:2020]. + ### Automatic relevance determination {-} @@ -799,7 +1001,7 @@ input vector `x`. All that is left is to define the univariate normal distribution statement for `y`. The generated quantities block defines the quantity `y2`. We generate -`y2` by randomly generating `N2` values from univariate normals with +`y2` by randomly generating `N2` values from univariate normals with each mean corresponding to the appropriate element in `f`.