diff --git a/DESCRIPTION b/DESCRIPTION index 55066cb..d9acf9d 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: SBC Title: Simulation Based Calibration for rstan/cmdstanr models -Version: 0.1.1.9000 +Version: 0.2.0.9000 Authors@R: c(person(given = "Shinyoung", family = "Kim", diff --git a/NAMESPACE b/NAMESPACE index 9371fe2..e3fb7cf 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,6 +90,7 @@ export(SBC_print_example_model) export(SBC_results) export(SBC_statistics_from_single_fit) export(bind_datasets) +export(bind_derived_quantities) export(bind_generated_quantities) export(bind_results) export(calculate_prior_sd) @@ -98,11 +99,13 @@ export(calculate_sds_draws_matrix) export(check_all_SBC_diagnostics) export(cjs_dist) export(compute_SBC) +export(compute_dquants) export(compute_gen_quants) export(compute_results) export(data_for_ecdf_plots) export(default_chunk_size) export(default_cores_per_fit) +export(derived_quantities) export(draws_rvars_to_standata) export(draws_rvars_to_standata_single) export(empirical_coverage) @@ -123,6 +126,7 @@ export(recompute_statistics) export(set2set) export(validate_SBC_datasets) export(validate_SBC_results) +export(validate_derived_quantities) export(validate_generated_quantities) export(wasserstein) import(ggplot2) diff --git a/R/derived-quantities.R b/R/derived-quantities.R new file mode 100644 index 0000000..4a366d7 --- /dev/null +++ b/R/derived-quantities.R @@ -0,0 +1,162 @@ +#' Create a definition of derived quantities evaluated in R. +#' +#' When the expression contains non-library functions/objects, and parallel processing +#' is enabled, those must be +#' named in the `.globals` parameter (hopefully we'll be able to detect those +#' automatically in the future). Note that [recompute_SBC_statistics()] currently +#' does not use parallel processing, so `.globals` don't need to be set. +#' +#' @param ... named expressions representing the quantitites +#' @param .globals A list of names of objects that are defined +#' in the global environment and need to present for the gen. quants. to evaluate. +#' It is added to the `globals` argument to [future::future()], to make those +#' objects available on all workers. +#' @examples +#'# Derived quantity computing the total log likelihood of a normal distribution +#'# with known sd = 1 +#'normal_lpdf <- function(y, mu, sigma) { +#' sum(dnorm(y, mean = mu, sd = sigma, log = TRUE)) +#'} +#' +#'# Note the use of .globals to make the normal_lpdf function available +#'# within the expression +#'log_lik_dq <- derived_quantities(log_lik = normal_lpdf(y, mu, 1), +#' .globals = "normal_lpdf" ) +#' +#' @export +derived_quantities <- function(..., .globals = list()) { + structure(rlang::enquos(..., .named = TRUE), + class = "SBC_derived_quantities", + globals = .globals + ) +} + +#' @title Validate a definition of derived quantities evaluated in R. +#' @export +validate_derived_quantities <- function(x) { + # Backwards compatibility + if(inherits(x, "SBC_generated_quantities")) { + class(x) <- "SBC_derived_quantities" + } + stopifnot(inherits(x, "SBC_derived_quantities")) + invisible(x) +} + +#' @title Combine two lists of derived quantities +#' @export +bind_derived_quantities <- function(dq1, dq2) { + validate_derived_quantities(dq1) + validate_derived_quantities(dq2) + structure(c(dq1, dq2), + class = "SBC_derived_quantities", + globals = bind_globals(attr(dq1, "globals"), attr(dq2, "globals"))) +} + +#'@title Compute derived quantities based on given data and posterior draws. +#'@param gen_quants Deprecated, use `dquants` +#'@export +compute_dquants <- function(draws, generated, dquants, gen_quants = NULL) { + if(!is.null(gen_quants)) { + warning("gen_quants argument is deprecated, use dquants") + if(rlang::is_missing(dquants)) { + dquants <- gen_quants + } + } + dquants <- validate_derived_quantities(dquants) + draws_rv <- posterior::as_draws_rvars(draws) + + draws_env <- list2env(draws_rv) + if(!is.null(generated)) { + if(!is.list(generated)) { + stop("compute_dquants assumes that generated is a list, but this is not the case") + } + generated_env <- list2env(generated, parent = draws_env) + + data_mask <- rlang::new_data_mask(bottom = generated_env, top = draws_env) + } else { + data_mask <- rlang::new_data_mask(bottom = draws_env) + } + + eval_func <- function(dq) { + # Wrap the expression in `rdo` which will mostly do what we need + # all the tricks are just to have the correct environment when we need it + wrapped_dq <- rlang::new_quosure(rlang::expr(posterior::rdo(!!rlang::get_expr(dq))), rlang::get_env(dq)) + rlang::eval_tidy(wrapped_dq, data = data_mask) + } + rvars <- lapply(dquants, FUN = eval_func) + do.call(posterior::draws_rvars, rvars) +} + + + +#' @title Create a definition of derived quantities evaluated in R. +#' @description Delegates directly to `derived_quantities()`. +#' +#' @name generated_quantities-deprecated +#' @seealso \code{\link{SBC-deprecated}} +#' @keywords internal +NULL + +#' @rdname SBC-deprecated +#' @section \code{generated_quantities}: +#' Instead of \code{generated_quantities}, use \code{\link{derived_quantities}}. +#' +#' @export +generated_quantities <- function(...) { + warning("generated_quantities() is deprecated, use derived_quantities instead.") + derived_quantities(...) +} + +#' @title Validate a definition of derived quantities evaluated in R. +#' @description Delegates directly to `validate_derived_quantities()`. +#' +#' @name generated_quantities-deprecated +#' @seealso \code{\link{SBC-deprecated}} +#' @keywords internal +NULL + +#' @rdname SBC-deprecated +#' @section \code{validate_generated_quantities}: +#' Instead of \code{validate_generated_quantities}, use \code{\link{validate_derived_quantities}}. +#' +#' @export +validate_generated_quantities <- function(...) { + warning("generated_quantities() is deprecated, use validate_derived_quantities instead.") + validate_derived_quantities(...) +} + +#' @title Combine two lists of derived quantities +#' @description Delegates directly to `bind_derived_quantities()`. +#' +#' @name bind_generated_quantities-deprecated +#' @seealso \code{\link{SBC-deprecated}} +#' @keywords internal +NULL + +#' @rdname SBC-deprecated +#' @section \code{bind_generated_quantities}: +#' Instead of \code{bind_generated_quantities}, use \code{\link{bind_derived_quantities}}. +#' +#' @export +bind_generated_quantities <- function(...) { + warning("bind_generated_quantities() is deprecated, use bind_derived_quantities instead.") + bind_derived_quantities(...) +} + +#'@title Compute derived quantities based on given data and posterior draws. +#' @description Delegates directly to `compute_dquants()`. +#' +#' @name compute_gen_quants-deprecated +#' @seealso \code{\link{SBC-deprecated}} +#' @keywords internal +NULL + +#' @rdname SBC-deprecated +#' @section \code{compute_gen_quants}: +#' Instead of \code{compute_gen_quants}, use \code{\link{compute_dquants}}. +#' +#' @export +compute_gen_quants <- function(...) { + warning("compute_gen_quants() is deprecated, use compute_dquants() instead.") + compute_dquants(...) +} diff --git a/R/results.R b/R/results.R index c3d5c05..d5a4223 100644 --- a/R/results.R +++ b/R/results.R @@ -254,7 +254,8 @@ length.SBC_results <- function(x) { errors = x$errors[indices]) } -#' Bind globals used in gen quants or backend +#' Combine two sets globals for use in derived quantities or backend +#' @seealso [compute_SBC()], [derived_quantities()] bind_globals <- function(globals1, globals2) { if(length(globals1) > 0 && length(globals2) > 0) { if(is.list(globals1) != is.list(globals2)) { @@ -365,6 +366,8 @@ compute_results <- function(...) { #' @param cache_location The filesystem location of cache. For `cache_mode = "results"` #' this should be a name of a single file. If the file name does not end with #' `.rds`, this extension is appended. +#' @param dquants Derived quantities to include in SBC. Use [derived_quantities()] to construct them. +#' @param gen_quants Deprecated, use dquants instead #' @param globals A list of names of objects that are defined #' in the global environment and need to present for the backend to work ( #' if they are not already available in package). @@ -378,15 +381,23 @@ compute_SBC <- function(datasets, backend, thin_ranks = SBC_backend_default_thin_ranks(backend), ensure_num_ranks_divisor = 2, chunk_size = default_chunk_size(length(datasets)), - gen_quants = NULL, + dquants = NULL, cache_mode = "none", cache_location = NULL, - globals = list()) { + globals = list(), + gen_quants = NULL) { stopifnot(length(datasets) > 0) - datasets <- validate_SBC_datasets(datasets) if(!is.null(gen_quants)) { - gen_quants <- validate_generated_quantities(gen_quants) + warning("gen_quants argument is deprecated, use dquants") + if(is.null(dquants)) { + dquants <- gen_quants + } + } + + datasets <- validate_SBC_datasets(datasets) + if(!is.null(dquants)) { + dquants <- validate_derived_quantities(dquants) } ## Handle caching @@ -410,9 +421,14 @@ compute_SBC <- function(datasets, backend, if(file.exists(cache_location)) { results_from_cache <- readRDS(cache_location) + # Ensure backwards compatibility of cache + if(!("dquants" %in% names(results_from_cache)) && ("gen_quants" %in% names(results_from_cache))) { + # This type of assignment necessary to preserve NULL values + results_from_cache["dquants"] <- list(results_from_cache$gen_quants) + } if(!is.list(results_from_cache) || !all( - c("result", "backend_hash", "data_hash", "thin_ranks", "gen_quants","keep_fits") + c("result", "backend_hash", "data_hash", "thin_ranks", "dquants","keep_fits") %in% names(results_from_cache))) { warning("Cache file exists but is in invalid format. Will recompute.") } else if(results_from_cache$backend_hash != backend_hash) { @@ -427,21 +443,31 @@ compute_SBC <- function(datasets, backend, result <- tryCatch(validate_SBC_results(results_from_cache$result), error = function(e) { NULL }) + error_dquants <- "error dquants" + if(!is.null(results_from_cache$dquants)) { + results_from_cache$dquants <- + tryCatch(validate_derived_quantities(results_from_cache$dquants), + error = function(e) { error_dquants }) + + } if(is.null(result)) { warning("Cache file contains invalid SBC_results object. Will recompute.") } else if(results_from_cache$thin_ranks != thin_ranks || - !identical(results_from_cache$gen_quants, gen_quants) || + !identical(results_from_cache$dquants, dquants) || results_from_cache$ensure_num_ranks_divisor != ensure_num_ranks_divisor) { + if(identical(results_from_cache$dquants, error_dquants)) { + warning("dquants loaded from cache are invalid") + } if(!results_from_cache$keep_fits) { - message("Cache file exists, but was computed with different thin_ranks/gen_quants/ensure_num_ranks_divisor and keep_fits == FALSE. Will recompute.") + message("Cache file exists, but was computed with different thin_ranks/dquants/ensure_num_ranks_divisor and keep_fits == FALSE. Will recompute.") } else { message(paste0("Results loaded from cache file '", cache_basename, - "' but it was computed with different thin_ranks/gen_quants/ensure_num_ranks_divisor.\n", + "' but it was computed with different thin_ranks/dquants/ensure_num_ranks_divisor.\n", "Calling recompute_SBC_statistics.")) return(recompute_SBC_statistics(old_results = result, datasets = datasets, thin_ranks = thin_ranks, ensure_num_ranks_divisor = ensure_num_ranks_divisor, - gen_quants = gen_quants, + dquants = dquants, backend = backend)) } } else { @@ -470,11 +496,11 @@ compute_SBC <- function(datasets, backend, generated = datasets$generated[[i]] ) } - if(is.null(gen_quants)) { + if(is.null(dquants)) { future.globals <- globals } else { - gq_globals <- attr(gen_quants, "globals") - future.globals <- bind_globals(globals, gq_globals) + dq_globals <- attr(dquants, "globals") + future.globals <- bind_globals(globals, dq_globals) } results_raw <- future.apply::future_lapply( @@ -482,7 +508,7 @@ compute_SBC <- function(datasets, backend, backend = backend, cores = cores_per_fit, keep_fit = keep_fits, thin_ranks = thin_ranks, ensure_num_ranks_divisor = ensure_num_ranks_divisor, - gen_quants = gen_quants, + dquants = dquants, future.seed = TRUE, future.globals = future.globals, future.chunk.size = chunk_size) @@ -591,7 +617,7 @@ compute_SBC <- function(datasets, backend, results_for_cache <- list(result = res, backend_hash = backend_hash, data_hash = data_hash, thin_ranks = thin_ranks, ensure_num_ranks_divisor = ensure_num_ranks_divisor, - gen_quants = gen_quants, keep_fits = keep_fits) + dquants = dquants, keep_fits = keep_fits) tryCatch(saveRDS(results_for_cache, file = cache_location), error = function(e) { warning("Error when saving cache file: ", e) }) } @@ -679,7 +705,7 @@ reemit_captured <- function(captured) { compute_SBC_single <- function(vars_and_generated, backend, cores, keep_fit, thin_ranks, ensure_num_ranks_divisor, - gen_quants) { + dquants) { variables <- vars_and_generated$variables generated <- vars_and_generated$generated @@ -706,7 +732,7 @@ compute_SBC_single <- function(vars_and_generated, backend, cores, res$stats <- SBC::SBC_statistics_from_single_fit( res$fit, variables = variables, thin_ranks = thin_ranks, ensure_num_ranks_divisor = ensure_num_ranks_divisor, - generated = generated, gen_quants = gen_quants, + generated = generated, dquants = dquants, backend = backend) res$backend_diagnostics <- SBC::SBC_fit_to_diagnostics( @@ -752,18 +778,26 @@ compute_SBC_single <- function(vars_and_generated, backend, cores, SBC_statistics_from_single_fit <- function(fit, variables, generated, thin_ranks, ensure_num_ranks_divisor, - gen_quants, - backend) { + dquants, + backend, + gen_quants = NULL) { + + if(!is.null(gen_quants)) { + warning("gen_quants argument is deprecated, use dquants") + if(rlang::is_missing(dquants)) { + dquants <- gen_quants + } + } fit_matrix <- SBC_fit_to_draws_matrix(fit) - if(!is.null(gen_quants)){ - gen_quants <- validate_generated_quantities(gen_quants) - gq_fit <- compute_gen_quants(fit_matrix, generated, gen_quants) - fit_matrix <- posterior::bind_draws(fit_matrix, gq_fit, along = "variable") + if(!is.null(dquants)){ + dquants <- validate_derived_quantities(dquants) + dq_fit <- compute_dquants(fit_matrix, generated, dquants) + fit_matrix <- posterior::bind_draws(fit_matrix, dq_fit, along = "variable") - gq_variable <- compute_gen_quants(variables, generated, gen_quants) - variables <- posterior::bind_draws(variables, gq_variable, along = "variable") + dq_variable <- compute_dquants(variables, generated, dquants) + variables <- posterior::bind_draws(variables, dq_variable, along = "variable") } shared_vars <- intersect(posterior::variables(variables), @@ -857,68 +891,6 @@ check_stats <- function(stats, datasets, thin_ranks, } } -#' Create a definition of generated quantities evaluated in R. -#' -#' When the expression contains non-library functions/objects, and parallel processing -#' is enabled, those must be -#' named in the `.globals` parameter (hopefully we'll be able to detect those -#' automatically in the future). Note that [recompute_SBC_statistics()] currently -#' does not use parallel processing, so `.globals` don't need to be set. -#' -#' @param ... named expressions representing the quantitites -#' @param .globals A list of names of objects that are defined -#' in the global environment and need to present for the gen. quants. to evaluate. -#' It is added to the `globals` argument to [future::future()], to make those -#' objects available on all workers. -#' @export -generated_quantities <- function(..., .globals = list()) { - structure(rlang::enquos(..., .named = TRUE), - class = "SBC_generated_quantities", - globals = .globals - ) -} - -#' @export -validate_generated_quantities <- function(x) { - stopifnot(inherits(x, "SBC_generated_quantities")) - invisible(x) -} - -#' @export -bind_generated_quantities <- function(gq1, gq2) { - validate_generated_quantities(gq1) - validate_generated_quantities(gq2) - structure(c(gq1, gq2), - class = "SBC_generated_quantities", - globals = bind_globals(attr(gq1, "globals"), attr(gq2, "globals"))) -} - -#'@export -compute_gen_quants <- function(draws, generated, gen_quants) { - gen_quants <- validate_generated_quantities(gen_quants) - draws_rv <- posterior::as_draws_rvars(draws) - - draws_env <- list2env(draws_rv) - if(!is.null(generated)) { - if(!is.list(generated)) { - stop("compute_gen_quants assumes that generated is a list, but this is not the case") - } - generated_env <- list2env(generated, parent = draws_env) - - data_mask <- rlang::new_data_mask(bottom = generated_env, top = draws_env) - } else { - data_mask <- rlang::new_data_mask(bottom = draws_env) - } - - eval_func <- function(gq) { - # Wrap the expression in `rdo` which will mostly do what we need - # all the tricks are just to have the correct environment when we need it - wrapped_gq <- rlang::new_quosure(rlang::expr(posterior::rdo(!!rlang::get_expr(gq))), rlang::get_env(gq)) - rlang::eval_tidy(wrapped_gq, data = data_mask) - } - rvars <- lapply(gen_quants, FUN = eval_func) - do.call(posterior::draws_rvars, rvars) -} #' @title Recompute SBC statistics without refitting models. #' @description Delegates directly to `recompute_SBC_statistics()`. @@ -941,7 +913,7 @@ recompute_statistics <- function(...) { #' Recompute SBC statistics without refitting models. #' #' Useful for example to recompute SBC ranks with a different choice of `thin_ranks` -#' or added generated quantities. +#' or added derived quantities. #' @return An S3 object of class `SBC_results` with updated `$stats` and `$default_diagnostics` fields. #' @param backend backend used to fit the results. Used to pull various defaults #' and other setting influencing the computation of statistics. @@ -950,10 +922,18 @@ recompute_statistics <- function(...) { recompute_SBC_statistics <- function(old_results, datasets, backend, thin_ranks = SBC_backend_default_thin_ranks(backend), ensure_num_ranks_divisor = 2, - gen_quants = NULL) { + dquants = NULL, gen_quants = NULL) { validate_SBC_results(old_results) validate_SBC_datasets(datasets) + if(!is.null(gen_quants)) { + warning("gen_quants argument is deprecated, use dquants") + if(is.null(dquants)) { + dquants <- gen_quants + } + } + + if(length(old_results) != length(datasets)) { stop("The number of fits in old_results does not match the number of simulations") } @@ -976,7 +956,7 @@ recompute_SBC_statistics <- function(old_results, datasets, backend, generated = datasets$generated[[i]], thin_ranks = thin_ranks, ensure_num_ranks_divisor = ensure_num_ranks_divisor, - gen_quants = gen_quants, + dquants = dquants, backend = backend) new_stats_list[[i]]$sim_id <- i new_stats_list[[i]] <- dplyr::select(new_stats_list[[i]], sim_id, tidyselect::everything()) diff --git a/README.md b/README.md index 1da10e8..0a11ecd 100755 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ plot_ecdf_diff(results_sd) The diagnostic plots show no problems in this case. As with any other software test, we can observe clear failures, but absence of failures does not imply correctness. We can however make the SBC check more thorough by using a lot of -simulations and including suitable generated quantities to guard against +simulations and including suitable derived quantities to guard against [known limitations of vanilla SBC](https://hyunjimoon.github.io/SBC/articles/limits_of_SBC.html). ## Paralellization @@ -99,7 +99,8 @@ With a little additional work, you can integrate SBC with any exact or approxima ## References * Theoretical support - * [Validating Bayesian Inference Algorithms with Simulation-Based Calibration](https://arxiv.org/pdf/1804.06788.pdf) Talts, Betancourt, Simpson, Vehtari, Gelman, 2018 + * [Simulation-Based Calibration Checking for Bayesian Computation: The Choice of Test Quantities Shapes Sensitivity](https://arxiv.org/abs/2211.02383v1) Modrák, Moon, Kim, Bürkner, Huurre, Faltejsková, Gelman, Vehtari, 2022 + * [Validating Bayesian Inference Algorithms with Simulation-Based Calibration](http://www.stat.columbia.edu/~gelman/research/unpublished/sbc.pdf) Talts, Betancourt, Simpson, Vehtari, Gelman, 2018 * [Graphical Test for Discrete Uniformity and its Applications in Goodness of Fit Evaluation and Multiple Sample Comparison](https://arxiv.org/abs/2103.10522) Säilynoja, Bürkner, Vehtari, 2021 * [Bayesian Workflow](https://arxiv.org/abs/2011.01808), Gelman et al., 2020 * [Toward a principled Bayesian workflow in cognitive science](https://psycnet.apa.org/record/2020-43606-001) Schad, Betancourt, Vasishth, 2021 diff --git a/_pkgdown.yml b/_pkgdown.yml index 79ac2b3..3e32d1a 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -90,7 +90,9 @@ reference: - contents: - compute_SBC - contains("SBC_results") - - generated_quantities + - contains("derived_quantities") + - contains("dquants") + - bind_globals - SBC_statistics_from_single_fit - recompute_SBC_statistics - bind_results diff --git a/docs/404.html b/docs/404.html index 149411c..747d6cc 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@
@@ -145,7 +145,7 @@vignettes/SBC.Rmd
SBC.Rmd
This opens two principal use-cases of SBC:
## Results loaded from cache file 'results'
## - 1 (1%) fits had at least one Rhat > 1.01. Largest Rhat was 1.015.
+## - 94 (94%) fits had some steps rejected. Maximum number of rejections was 2.
## Not all diagnostics are OK.
## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
@@ -439,7 +441,7 @@ We can now inspect the results to see if there were any errors and check individual stats:
-+results$stats
## # A tibble: 100 × 15 ## sim_id variable simulated_value rank z_score mean median sd mad q5 @@ -464,13 +466,13 @@
Plots are uniformly distributed. We can check the rank histogram and ECDF plots (see
vignette("rank_visualizations")
for description of the plots): -+-plot_rank_hist(results)
@@ -127,7 +127,7 @@+-plot_ecdf(results)
diff --git a/docs/articles/bad_parametrization.html b/docs/articles/bad_parametrization.html index 2d37169..bed371c 100644 --- a/docs/articles/bad_parametrization.html +++ b/docs/articles/bad_parametrization.html @@ -33,7 +33,7 @@+plot_ecdf_diff(results)
Since our simulator and model do match and Stan works well, we see @@ -550,7 +552,7 @@
Acknowledgements -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
Discovering bad parametrization with SBC
Martin Modrák
-2022-07-04
+2022-12-15
Source:vignettes/bad_parametrization.Rmd
@@ -303,7 +303,7 @@bad_parametrization.Rmd
2022-07-04
cache_mode = "results", cache_location = file.path(cache_dir, "model2"))-## Results loaded from cache file 'model2'
+## - 10 (100%) fits had some steps rejected. Maximum number of rejections was 9.
## - 10 (100%) fits had some steps rejected. Maximum number of rejections was 6.
@@ -335,7 +335,7 @@## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
2022-07-04
diff --git a/docs/articles/brms.html b/docs/articles/brms.html index 0a667b3..bc3a71f 100644 --- a/docs/articles/brms.html +++ b/docs/articles/brms.html @@ -33,7 +33,7 @@
vignettes/brms.Rmd
brms.Rmd
For increased sensitivity, we also add the log likelihood of the data
-given parameters as a generated quantity that we’ll also monitor (see
-the limits_of_SBC
+given parameters as a derived quantity that we’ll also monitor (see the
+limits_of_SBC
vignette for discussion on why this is useful).
-log_lik_gq_func <- generated_quantities(
- log_lik = sum(dnorm(y, b_Intercept + x * b_x + r_group[group], sigma, log = TRUE)))
log_lik_dq_func <- derived_quantities(
+ log_lik = sum(dnorm(y, b_Intercept + x * b_x + r_group[group], sigma, log = TRUE))
+ # Testing CRPS, probably not worth it
+ #, CRPS = mean(scoringRules::crps_norm(y, b_Intercept + x * b_x + r_group[group], sigma))
+ )
set.seed(12239755)
datasets_func <- generate_datasets(n_sims_generator, 100)
results_func <- compute_SBC(datasets_func, backend_func,
- gen_quants = log_lik_gq_func,
+ dquants = log_lik_dq_func,
cache_mode = "results",
cache_location = file.path(cache_dir, "func"))
## Results loaded from cache file 'func'
@@ -386,7 +389,7 @@
results_func2 <- compute_SBC(datasets_func, backend_func2,
- gen_quants = log_lik_gq_func,
+ dquants = log_lik_dq_func,
cache_mode = "results",
cache_location = file.path(cache_dir, "func2"))
## Results loaded from cache file 'func2'
@@ -428,7 +431,7 @@ Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
diff --git a/docs/articles/computational_algorithm1.html b/docs/articles/computational_algorithm1.html index 6ac4209..8361cd5 100644 --- a/docs/articles/computational_algorithm1.html +++ b/docs/articles/computational_algorithm1.html @@ -33,7 +33,7 @@ @@ -127,7 +127,7 @@vignettes/computational_algorithm1.Rmd
computational_algorithm1.Rmd
So the 90% central credible interval for mu_signal
likely contains less than 82% of true values.
For a crude result, the default ADVI setup we just tested is not @@ -630,13 +630,13 @@
This pattern where the default meanfield approximation is overconfident and the fullrank approximation is underconfident is in fact quite frequently seen, which motivated some experiments with a low @@ -692,11 +692,11 @@
This variant has somewhat lower overall mismatch, but tends to be @@ -752,27 +752,22 @@
## Results loaded from cache file 'hmm_sample'
-## - 1 (2%) fits had at least one Rhat > 1.01. Largest Rhat was 1.019.
-## - 1 (2%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing
-## the rank statistics. The lowest tail ESS was 154.
-## If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics)
-## or number of posterior draws (by refitting) might help.
-## - 1 (2%) fits had divergent transitions. Maximum number of divergences was 51.
+## - 2 (4%) fits had divergent transitions. Maximum number of divergences was 70.
## Not all diagnostics are OK.
## You can learn more by inspecting $default_diagnostics, $backend_diagnostics
## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
We get a small number of problematic fits, which we will ignore for now. We check that there are no obvious calibration problems:
-+-plot_ecdf_diff(res_hmm_sample)
+plot_rank_hist(res_hmm_sample)
For the machine we built the vignette on, here are the distributions of times (for ADVI and optimizing) and time of longest chain (for HMC):
-@@ -144,7 +144,7 @@+diff --git a/docs/reference/SBC_example_backend.html b/docs/reference/SBC_example_backend.html index 7146d39..9d26d4e 100644 --- a/docs/reference/SBC_example_backend.html +++ b/docs/reference/SBC_example_backend.html @@ -17,7 +17,7 @@hmm_time <- rbind( data.frame(alg = "Optimizing", @@ -807,7 +802,7 @@
Summary @@ -820,7 +815,7 @@
Example III - Hidden difference between the two states) and move to the log scale. So instead of
mu_background
andmu_signal
we have anordered
vectorlog_mu
: -+data { int N; // Number of observations @@ -875,7 +870,7 @@
Example III - Hidden that it implies a slightly different prior on the active (higher mean) state. Here is how we can generate data with this mildly different prior (we need rejection sampling to fulfill the ordering constraint): -
@@ -142,7 +142,7 @@+generator_HMM_ordered <- function(N) { # Rejection sampling for ordered mu with the correct priors @@ -922,10 +917,10 @@
Example III - Hidden }
So let us build a default variational backend and fit it to just 20 simulations.
-++-model_HMM_ordered <- cmdstan_model("stan/hmm_poisson_ordered.stan") backend_HMM_ordered <- SBC_backend_cmdstan_variational(model_HMM_ordered, n_retries_init = 3)
@@ -128,7 +128,7 @@+diff --git a/docs/articles/discrete_vars_files/figure-html/ranks_jags-1.png b/docs/articles/discrete_vars_files/figure-html/ranks_jags-1.png index e935c69..33b069b 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/ranks_jags-1.png and b/docs/articles/discrete_vars_files/figure-html/ranks_jags-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/ranks_jags-2.png b/docs/articles/discrete_vars_files/figure-html/ranks_jags-2.png index af41a33..ee38b00 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/ranks_jags-2.png and b/docs/articles/discrete_vars_files/figure-html/ranks_jags-2.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-1.png b/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-1.png index 9bf4f06..f1ebb9e 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-1.png and b/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-2.png b/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-2.png index 2257acb..2749716 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-2.png and b/docs/articles/discrete_vars_files/figure-html/ranks_jags_marginalized-2.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results1_plots-1.png b/docs/articles/discrete_vars_files/figure-html/results1_plots-1.png index 6bd273c..bc8653f 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/results1_plots-1.png and b/docs/articles/discrete_vars_files/figure-html/results1_plots-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results1_plots-2.png b/docs/articles/discrete_vars_files/figure-html/results1_plots-2.png index 24a8368..47a5397 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/results1_plots-2.png and b/docs/articles/discrete_vars_files/figure-html/results1_plots-2.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-1.png b/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-1.png index 962f448..9657c01 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-1.png and b/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-2.png b/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-2.png index 30f1812..30c788e 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-2.png and b/docs/articles/discrete_vars_files/figure-html/results_2_all_plots-2.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results_2_plots-1.png b/docs/articles/discrete_vars_files/figure-html/results_2_plots-1.png index 4515ea2..7015412 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/results_2_plots-1.png and b/docs/articles/discrete_vars_files/figure-html/results_2_plots-1.png differ diff --git a/docs/articles/discrete_vars_files/figure-html/results_2_plots-2.png b/docs/articles/discrete_vars_files/figure-html/results_2_plots-2.png index 38556e0..064d2e7 100644 Binary files a/docs/articles/discrete_vars_files/figure-html/results_2_plots-2.png and b/docs/articles/discrete_vars_files/figure-html/results_2_plots-2.png differ diff --git a/docs/articles/implementing_backends.html b/docs/articles/implementing_backends.html index 3a705d3..c38216b 100644 --- a/docs/articles/implementing_backends.html +++ b/docs/articles/implementing_backends.html @@ -33,7 +33,7 @@set.seed(12333654) ds_hmm_ordered <- generate_datasets( SBC_generator_function(generator_HMM_ordered, N = 100), @@ -937,10 +932,10 @@
Example III - Hidden
## Results loaded from cache file 'hmm_ordered'
Immediately we see that the
-log_mu[1]
variable is heavily miscalibrated.+-plot_ecdf_diff(res_hmm_ordered)
+plot_rank_hist(res_hmm_ordered)
What changed? To understand that we need to remember how Stan represents @@ -956,7 +951,7 @@
Example III - Hidden complex correlation structure between the unconstrained parameters that the ADVI algorithm is unable to handle well.
Even trying the fullrank variant does not help:
-+backend_HMM_ordered_fullrank <- SBC_backend_cmdstan_variational(model_HMM_ordered, algorithm = "fullrank", n_retries_init = 3) @@ -966,14 +961,14 @@
Example III - Hidden cache_mode = "results", cache_location = file.path(cache_dir, "hmm_ordered_fullrank"))
## Results loaded from cache file 'hmm_ordered_fullrank'
The results are still strongly miscalibrated.
-+-plot_ecdf_diff(res_hmm_ordered_fullrank)
+plot_rank_hist(res_hmm_ordered_fullrank)
To have a complete overview we may also try the optimizing fit:
--+@@ -128,7 +128,7 @@model_HMM_ordered_rstan <- stan_model("stan/hmm_poisson_ordered.stan") res_hmm_ordered_optimizing <- compute_SBC( @@ -984,10 +979,10 @@
Example III - Hidden
in this case, optimizing has better calibration for
-log_mu
, but worse calibration forrho
than ADVI.diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-1.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-1.png index 346da2d..997bd08 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-1.png and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-2.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-2.png index 08aee5e..3228cce 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-2.png and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-1.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-1.png index 29acea7..6b5ce3d 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-1.png and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-2.png b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-2.png index 9aae60f..aba7c4b 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-2.png and b/docs/articles/computational_algorithm1_files/figure-html/ecdf_rank_poisson_100-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_coverage-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_coverage-1.png index 08947b6..c58c851 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_coverage-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_coverage-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-1.png index 3355d06..52f1c9b 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-2.png index 379bf36..900efb4 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-2.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_2_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_coverage-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_coverage-1.png index 94d364f..7806418 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_coverage-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_coverage-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-1.png index 8d81c1b..9e3b138 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-2.png index 12c12bc..4d844c1 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-2.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_coverage-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_coverage-1.png index abaaa89..4ce28c0 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_coverage-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_coverage-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-1.png index 89148ee..93fa917 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-2.png index 9e57a59..cd575bb 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-2.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_fullrank_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_coverage-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_coverage-1.png index 4ed7527..88f74c0 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_coverage-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_coverage-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-1.png index 2aabd1f..394b883 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-2.png index c8e8bd7..8890836 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-2.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_lowtol_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-1.png index b9efcf9..68b27f6 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-2.png index 2f324f9..18cf158 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-2.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-1.png index 5601438..2a60bfc 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-2.png index b3f9c3e..f7aad01 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-2.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_ordered_fullrank_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-1.png index e5179ef..adad8f0 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-1.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-2.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-2.png index d812c0b..f36fee4 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-2.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_sample_ecdf_ranks-2.png differ diff --git a/docs/articles/computational_algorithm1_files/figure-html/hmm_time-1.png b/docs/articles/computational_algorithm1_files/figure-html/hmm_time-1.png index 8f1b7ba..050d5ea 100644 Binary files a/docs/articles/computational_algorithm1_files/figure-html/hmm_time-1.png and b/docs/articles/computational_algorithm1_files/figure-html/hmm_time-1.png differ diff --git a/docs/articles/discrete_vars.html b/docs/articles/discrete_vars.html index d242305..abf75de 100644 --- a/docs/articles/discrete_vars.html +++ b/docs/articles/discrete_vars.html @@ -33,7 +33,7 @@+-plot_ecdf_diff(res_hmm_ordered_optimizing)
@@ -1048,7 +1043,7 @@+plot_rank_hist(res_hmm_ordered_optimizing)
References -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
SBC with discrete parameters in Stan and
Martin Modrák
-2022-07-04
+2022-12-15
Source:vignettes/discrete_vars.Rmd
@@ -256,24 +256,24 @@discrete_vars.Rmd
Stan version and debugging
set.seed(85394672) datasets_1 <- generate_datasets(generator_1, 30)
Additionally, we’ll add a generated quantity expressing the total +
Additionally, we’ll add a derived quantity expressing the total log-likelihood of data given the fitted parameters. The expression -within the
generated_quantities()
call is evaluated for -both prior and posterior draws and included as another variable in SBC -checks. It turns out this type of generated quantities can increase the +within thederived_quantities()
call is evaluated for both +prior and posterior draws and included as another variable in SBC +checks. It turns out this type of derived quantities can increase the sensitivity of the SBC against some issues in the model. Seevignette("limits_of_SBC")
for a more detailed discussion of this.+-log_lik_gq <- generated_quantities(log_lik = sum(dpois(y, ifelse(1:T < s, e, l), log = TRUE)))
log_lik_dq <- derived_quantities(log_lik = sum(dpois(y, ifelse(1:T < s, e, l), log = TRUE)))
So finally, lets actually compute SBC:
+ dquants = log_lik_dq)results_1 <- compute_SBC(datasets_1, backend_1, cache_mode = "results", cache_location = file.path(cache_dir, "model1"), - gen_quants = log_lik_gq)
-## Results loaded from cache file 'model1'
+## - 5 (17%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
## - 4 (13%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
## - 20 (67%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing ## the rank statistics. The lowest tail ESS was NA. ## If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) @@ -297,12 +297,12 @@
Stan version and debugging
## # A tibble: 30 × 15 ## sim_id variable simulated_value rank z_score mean median sd mad ## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -## 1 1 s 3 185 0.0182 2.97 3 1.61 2.97 +## 1 1 s 3 186 0.0200 2.97 3 1.59 2.97 ## 2 2 s 1 24 -1.90 2.02 2 0.537 0 ## 3 3 s 4 126 -1.37 4.67 5 0.489 0 ## 4 4 s 1 10 -2.85 2.86 3 0.651 0 ## 5 5 s 5 397 2.76 2.86 3 0.775 0 -## 6 6 s 2 271 0.0449 1.94 1 1.42 0 +## 6 6 s 2 290 0.118 1.84 1 1.35 0 ## 7 7 s 3 0 -Inf 4 4 0 0 ## 8 8 s 2 129 -0.594 2.87 3 1.46 1.48 ## 9 9 s 2 0 -6.84 2.99 3 0.144 0 @@ -315,7 +315,7 @@
Stan version and debuggingLooking at the
ecdf_diff
plot we see that this seems to compromise heavily the inference fors
, but the other parameters do not show such bad behaviour. Note that the -log_lik
generated quantity shows even starker failure than +log_lik
derived quantity shows even starker failure thans
, so it indeed poses a stricter check in this scenario.@@ -376,7 +376,7 @@Stan version and debuggingset.seed(5846502) datasets_2 <- generate_datasets(generator_2, 30) results_2 <- compute_SBC(datasets_2, backend_1, - gen_quants = log_lik_gq, + dquants = log_lik_dq, cache_mode = "results", cache_location = file.path(cache_dir, "model2"))
@@ -401,16 +401,16 @@## Results loaded from cache file 'model2'
Stan version and debuggingset.seed(54321488) datasets_2_more <- generate_datasets(generator_2, 100) results_2_more <- compute_SBC(datasets_2_more, backend_1, - gen_quants = log_lik_gq, + dquants = log_lik_dq, cache_mode = "results", cache_location = file.path(cache_dir, "model3"))
## Results loaded from cache file 'model3'
-## - 15 (15%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
## - 73 (73%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing +
-## - 72 (72%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing ## the rank statistics. The lowest tail ESS was NA. ## If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) ## or number of posterior draws (by refitting) might help.
+## - 7 (7%) fits had divergent transitions. Maximum number of divergences was 20.
## - 9 (9%) fits had divergent transitions. Maximum number of divergences was 25.
@@ -424,7 +424,7 @@## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
Stan version and debugging
Now - as far as this amount of SBC steps can see, the model is good and we get good behaviour for both the continuous and the discrete -parameters and the
+parameters and thelog_lik
generated quantity. Hooray!log_lik
derived quantity. Hooray!JAGS version @@ -469,12 +469,12 @@
JAGS version
datasets_2_all <- bind_datasets(datasets_2, datasets_2_more) results_jags <- compute_SBC(datasets_2_all, backend_jags, - gen_quants = log_lik_gq, + dquants = log_lik_dq, cache_mode = "results", cache_location = file.path(cache_dir_jags, "rjags"))
-## Results loaded from cache file 'rjags'
-## - 21 (16%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
## - 95 (73%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing +
+## - 20 (15%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
@@ -550,12 +550,12 @@## - 93 (72%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing ## the rank statistics. The lowest tail ESS was NA. ## If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) ## or number of posterior draws (by refitting) might help.
JAGS version
results_jags_marginalized <- compute_SBC(datasets_2_all, backend_jags_marginalized, - gen_quants = log_lik_gq, + dquants = log_lik_dq, cache_mode = "results", cache_location = file.path(cache_dir_jags, "rjags_marginalized"))
-## Results loaded from cache file 'rjags_marginalized'
-## - 24 (18%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
## - 89 (68%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing +
+## - 26 (20%) fits had at least one Rhat > 1.01. Largest Rhat was NA.
@@ -590,7 +590,7 @@## - 93 (72%) fits had tail ESS undefined or less than half of the maximum rank, potentially skewing ## the rank statistics. The lowest tail ESS was NA. ## If the fits look good otherwise, increasing `thin_ranks` (via recompute_SBC_statistics) ## or number of posterior draws (by refitting) might help.
JAGS version -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
Implementing a new backend (algorithm) +
Martin Modrák
-2022-07-04
+2022-12-15
Source:vignettes/implementing_backends.Rmd
@@ -275,13 +275,14 @@implementing_backends.Rmd
Minimal backend support thin_ranks = 1, cache_mode = "results", cache_location = file.path(cache_dir,"poisson"))
## Results loaded from cache file 'poisson'
We have set
thin_ranks = 1
as no thinning is needed (the draws are i.i.d. by construction).The rank and ecdf plots show no big problems
-diff --git a/docs/reference/SBC_datasets.html b/docs/reference/SBC_datasets.html index 6a7fc37..f02566e 100644 --- a/docs/reference/SBC_datasets.html +++ b/docs/reference/SBC_datasets.html @@ -18,7 +18,7 @@+-plot_rank_hist(res_poisson)
+plot_ecdf_diff(res_poisson)
This is not unexpected - we’ve used a large number of observations @@ -290,7 +291,7 @@
Minimal backend support
We can see that both variables are recovered almost exactly in almost all fits:
-@@ -307,7 +308,7 @@+plot_sim_estimated(res_poisson)
Additional backend improvementsthin_ranks = 1 argument to
compute_SBC
and will not assess convergence/autocorrelation via the R-hat and ESS diagnostics. -@@ -136,7 +136,7 @@+@@ -318,7 +319,7 @@SBC_backend_iid_draws.SBC_backend_glm <- function(backend) { TRUE }
Additional backend improvements
To see some of the problems that
-glm
can encounter, we’ll run a quite pathological logistic regression:diff --git a/docs/reference/SBC_backend_rstan_sample.html b/docs/reference/SBC_backend_rstan_sample.html index 580104c..c277008 100644 --- a/docs/reference/SBC_backend_rstan_sample.html +++ b/docs/reference/SBC_backend_rstan_sample.html @@ -17,7 +17,7 @@+problematic_data <- data.frame(y = rep(0:1, each = 100), x = 1:200) glm(y ~ x, data = problematic_data, family = "binomial")
@@ -348,7 +349,7 @@## Warning: glm.fit: algorithm did not converge
Additional backend improvementsglm class: -
+SBC_fit_to_diagnostics.glm <- function(fit, fit_output, fit_messages, fit_warnings) { res <- data.frame( probs_0_1 = any(grepl("fitted probabilities numerically 0 or 1 occurred", fit_warnings)), @@ -360,7 +361,7 @@
Additional backend improvements}
Having a custom class let’s us implement a
-summary
implementation for our diagnostics:@@ -142,7 +142,7 @@+diff --git a/docs/reference/SBC_backend_rstan_optimizing.html b/docs/reference/SBC_backend_rstan_optimizing.html index aa89a37..c3d854d 100644 --- a/docs/reference/SBC_backend_rstan_optimizing.html +++ b/docs/reference/SBC_backend_rstan_optimizing.html @@ -17,7 +17,7 @@summary.SBC_glm_diagnostics <- function(x) { summ <- list( n_fits = nrow(x), @@ -378,7 +379,7 @@
object.Additional backend improvementsSBC_results
We’ll use our
-summary
implementation forSBC_glm_diagnostics
to create the messages:+@@ -163,7 +163,7 @@get_diagnostic_messages.SBC_glm_diagnostics <- function(x) { get_diagnostic_messages(summary(x)) } @@ -479,7 +480,7 @@
normal priors on the intercept and predictors. Note that we do some rejection sampling here to avoid using simulations where the generated response is the same or almost the same for all rows. -
diff --git a/docs/reference/SBC_backend_rjags.html b/docs/reference/SBC_backend_rjags.html index 02c2549..859a272 100644 --- a/docs/reference/SBC_backend_rjags.html +++ b/docs/reference/SBC_backend_rjags.html @@ -17,7 +17,7 @@+generator_single_logistic <- function(formula, template_data, intercept_prior_loc = 0, @@ -532,7 +533,7 @@
Well-informed model
+-formula_indo_simple <- outcome ~ rx set.seed(6524243) datasets_indo_simple <- generate_datasets(SBC_generator_function( @@ -542,24 +543,25 @@
Well-informed model= 500) backend_indo_simple <- SBC_backend_glm(formula = formula_indo_simple, family = "binomial")
@@ -140,7 +140,7 @@++res_indo_simple <- compute_SBC(datasets_indo_simple, backend_indo_simple, cache_mode = "results", cache_location = file.path(cache_dir,"indo_simple"))
## Results loaded from cache file 'indo_simple'
The rank plots look good:
-diff --git a/docs/reference/SBC_backend_mock.html b/docs/reference/SBC_backend_mock.html index 4b7be02..8717d7e 100644 --- a/docs/reference/SBC_backend_mock.html +++ b/docs/reference/SBC_backend_mock.html @@ -19,7 +19,7 @@+-plot_rank_hist(res_indo_simple)
+plot_ecdf_diff(res_indo_simple)
The coverages are very tight
-@@ -147,7 +147,7 @@+plot_coverage(res_indo_simple)
we can make this precise by inspecting the same results numerically:
-diff --git a/docs/reference/SBC_backend_iid_draws.html b/docs/reference/SBC_backend_iid_draws.html index 56085b4..991f809 100644 --- a/docs/reference/SBC_backend_iid_draws.html +++ b/docs/reference/SBC_backend_iid_draws.html @@ -24,7 +24,7 @@+@@ -575,13 +577,13 @@stats_effect <- res_indo_simple$stats[res_indo_simple$stats$variable == "rx1_indomethacin",] main_eff_coverage <- empirical_coverage(stats_effect, width = c(0.5,0.9, 0.95)) main_eff_coverage
Well-informed model
+plot_sim_estimated(res_indo_simple)
There is a simulation where the posterior uncertainty is very large. This corresponds to observed data where the outcome is the same for all rows where the treatment was used:
-+@@ -591,7 +593,7 @@biggest_sd_sim <- res_indo_simple$stats$sim_id[ which.max(res_indo_simple$stats$sd)] table(datasets_indo_simple$generated[[biggest_sd_sim]][c("outcome", "rx")])
Well-informed model## 1 281 295
Filtering the extreme simulations out, we see that most commonly, we get a decently precise estimate.
-@@ -126,7 +126,7 @@+@@ -608,7 +610,7 @@stats_filtered <- res_indo_simple$stats[res_indo_simple$stats$sd < 200, ] plot_sim_estimated(stats_filtered, alpha = 0.3)
Badly-informed model\(N(0,1)\) prior on the age coefficient have a sensible scale. To make matters worse, we further subsample the data to contain only 100 rows. -
diff --git a/docs/reference/SBC_backend_hash_for_cache.html b/docs/reference/SBC_backend_hash_for_cache.html index f3783ce..7bc16af 100644 --- a/docs/reference/SBC_backend_hash_for_cache.html +++ b/docs/reference/SBC_backend_hash_for_cache.html @@ -18,7 +18,7 @@+set.seed(21645222) indo_rct_complex <- droplevels( @@ -628,10 +630,11 @@
Badly-informed modelbackend_indo_complex <- SBC_backend_glm(formula = formula_indo_complex, family = "binomial")
Now we are ready to run SBC:
-@@ -129,7 +129,7 @@++res_indo_complex <- compute_SBC(datasets_indo_complex, backend_indo_complex, cache_mode = "results", cache_location = file.path(cache_dir,"indo_complex"))
## Results loaded from cache file 'indo_complex'
## - 19 (4%) of fits had 0/1 probabilities.
## - 2 (0%) of fits did not converge.
## Not all diagnostics are OK. @@ -641,10 +644,10 @@
Badly-informed model
+-plot_rank_hist(res_indo_complex)
diff --git a/docs/reference/SBC_backend_default_thin_ranks.html b/docs/reference/SBC_backend_default_thin_ranks.html index 99cd9e7..64c7fcf 100644 --- a/docs/reference/SBC_backend_default_thin_ranks.html +++ b/docs/reference/SBC_backend_default_thin_ranks.html @@ -18,7 +18,7 @@+plot_ecdf_diff(res_indo_complex)
What happens is that many of the simulations result in extremely wide @@ -654,7 +657,7 @@
Badly-informed model
+@@ -672,7 +675,7 @@stats_effect <- res_indo_complex$stats[res_indo_complex$stats$variable == "rx1_indomethacin",] main_eff_coverage <- empirical_coverage(stats_effect, width = c(0.5,0.9, 0.95)) main_eff_coverage
Badly-informed model, narrow priors< posterior problematic. We can obviously make things worse by also introducing a strong prior, concentrating away from zero which we’ll do here: -
++-set.seed(1685554) datasets_indo_complex_narrow <- generate_datasets(SBC_generator_function( generator_single_logistic, @@ -683,10 +686,11 @@
Badly-informed model, narrow priors< predictor_prior_loc = c(-2, 2), predictor_prior_width = 0.5), n_sims = 500)
@@ -127,7 +127,7 @@++res_indo_complex_narrow <- compute_SBC(datasets_indo_complex_narrow, backend_indo_complex, cache_mode = "results", cache_location = file.path(cache_dir,"indo_complex_narrow"))
## Results loaded from cache file 'indo_complex_narrow'
## - 169 (34%) of fits had 0/1 probabilities.
## - 2 (0%) of fits did not converge.
## Not all diagnostics are OK. @@ -694,10 +698,10 @@
Badly-informed model, narrow priors< ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
This is enough to make basically all the variables poorly calibrated:
-diff --git a/docs/articles/indexing.html b/docs/articles/indexing.html index 5af0197..a9f9bb8 100644 --- a/docs/articles/indexing.html +++ b/docs/articles/indexing.html @@ -33,7 +33,7 @@+-plot_rank_hist(res_indo_complex_narrow)
@@ -706,7 +710,7 @@+plot_ecdf_diff(res_indo_complex_narrow)
Well-informed model, narrow priors
To make the analysis complete, we’ll also return to the simple, well-informed model, but use narrow priors.
-+-set.seed(3289542) datasets_indo_simple_narrow <- generate_datasets(SBC_generator_function( generator_single_logistic, @@ -717,17 +721,18 @@
Well-informed model, narrow priors predictor_prior_loc = c(-2, 2), predictor_prior_width = 0.5), n_sims = 500)
@@ -147,7 +147,7 @@++res_indo_simple_narrow <- compute_SBC(datasets_indo_simple_narrow, backend_indo_simple, cache_mode = "results", cache_location = file.path(cache_dir,"indo_simple_narrow"))
## Results loaded from cache file 'indo_simple_narrow'
Turns out that in this case, the likelihood is sometimes not enough to completely overwhelm the prior and the main treatment effect is poorly calibrated:
-diff --git a/docs/articles/index.html b/docs/articles/index.html index 7eb0680..e32219f 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@+-plot_rank_hist(res_indo_simple_narrow)
@@ -763,7 +768,7 @@+plot_ecdf_diff(res_indo_simple_narrow)
Conclusions -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
Additional use cases and advanced topics
Discovering indexing errors with SBC
Martin Modrák
-2022-07-04
+2022-12-15
Source:vignettes/indexing.Rmd
@@ -301,43 +301,46 @@indexing.Rmd
2022-07-04
results_regression_1 <- compute_SBC(datasets_regression, backend_regression_1, cache_mode = "results", cache_location = file.path(cache_dir, "regression1"))
## Results loaded from cache file 'regression1'
## - 7 (70%) fits had some steps rejected. Maximum number of rejections was 4.
-## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
++results_regression_2 <- compute_SBC(datasets_regression, backend_regression_2, cache_mode = "results", cache_location = file.path(cache_dir, "regression2"))
## Results loaded from cache file 'regression2'
## - 1 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.011.
## - 6 (60%) fits had some steps rejected. Maximum number of rejections was 4.
-## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
@@ -143,7 +143,7 @@+-results_regression_3 <- compute_SBC(datasets_regression, backend_regression_3, cache_mode = "results", cache_location = file.path(cache_dir, "regression3"))
## - 3 (30%) fits had some steps rejected. Maximum number of rejections was 3. -## Not all diagnostics are OK. +
+## Results loaded from cache file 'regression3'
+## - 3 (30%) fits had some steps rejected. Maximum number of rejections was 3.
## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
Here we also use the caching feature to avoid recomputing the fits when recompiling this vignette. In practice, caching is not necessary but is often useful.
-diff --git a/docs/reference/SBC_backend_cmdstan_variational.html b/docs/reference/SBC_backend_cmdstan_variational.html index fae605b..89db017 100644 --- a/docs/reference/SBC_backend_cmdstan_variational.html +++ b/docs/reference/SBC_backend_cmdstan_variational.html @@ -17,7 +17,7 @@+-plot_ecdf_diff(results_regression_1)
+plot_rank_hist(results_regression_1)
As far as a quick SBC can see the first code is OK. You could verify further with more iterations but we tested the model for you and it is OK (although the implementation is not the best one).
-@@ -136,7 +136,7 @@+-plot_ecdf_diff(results_regression_2)
diff --git a/docs/reference/SBC_backend_cmdstan_sample.html b/docs/reference/SBC_backend_cmdstan_sample.html index 74e20cd..f35554b 100644 --- a/docs/reference/SBC_backend_cmdstan_sample.html +++ b/docs/reference/SBC_backend_cmdstan_sample.html @@ -17,7 +17,7 @@+plot_rank_hist(results_regression_2)
But the second model is actually not looking good. In fact there is @@ -347,10 +350,10 @@
2022-07-04
to thesigma
variable (reusing the samex
element leads to more similar predictions for each row, sosigma
needs to be inflated to accommodate this) -+-plot_ecdf_diff(results_regression_3)
@@ -127,7 +127,7 @@+plot_rank_hist(results_regression_3)
And the third model looks OK once again - and in fact we are pretty @@ -372,7 +375,7 @@
2022-07-04
diff --git a/docs/articles/limits_of_SBC.html b/docs/articles/limits_of_SBC.html index 109514d..2ed4558 100644 --- a/docs/articles/limits_of_SBC.html +++ b/docs/articles/limits_of_SBC.html @@ -33,7 +33,7 @@Limits of SBC
Martin Modrák
-2022-07-04
+2022-12-15
Source:vignettes/limits_of_SBC.Rmd
@@ -136,12 +136,16 @@limits_of_SBC.Rmd
2022-07-04
-Here, we’ll walk through some problems that are hard/impossible to -diagnose with SBC. As usual the focus is on problems with models, -assuming our inference algorithm is correct. But for each of those -problems, one can imagine a corresponding failure in an algorithm — -although some of those failures are quite unlikely for actual +
Here, we’ll walk through some problems that are hard to diagnose with +SBC in its default settings. As usual the focus is on problems with +models, assuming our inference algorithm is correct. But for each of +those problems, one can imagine a corresponding failure in an algorithm +— although some of those failures are quite unlikely for actual algorithms.
+A more extensive theoretical discussion of those limits can be found +in the Simulation-Based +Calibration Checking for Bayesian Computation: The Choice of Test +Quantities Shapes Sensitivity preprint, additional examples at https://martinmodrak.github.io/sbc_test_quantities_paper/
library(SBC) library(ggplot2) @@ -261,23 +265,22 @@
SBC and minor changes to model= "results", cache_location = file.path(cache_dir, "minor_10"))
-## Results loaded from cache file 'minor_10'
-## - 1 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.024.
+## - 2 (20%) fits had some steps rejected. Maximum number of rejections was 1.
## - 3 (30%) fits had some steps rejected. Maximum number of rejections was 2.
## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
Not really…
-@@ -129,7 +129,7 @@+-plot_rank_hist(results_minor_10)
diff --git a/docs/reference/SBC_backend_brms_from_generator.html b/docs/reference/SBC_backend_brms_from_generator.html index 10cecbf..e06b124 100644 --- a/docs/reference/SBC_backend_brms_from_generator.html +++ b/docs/reference/SBC_backend_brms_from_generator.html @@ -20,7 +20,7 @@+plot_ecdf_diff(results_minor_10)
Will we have better luck with 100 simulations? (Note that we can use
-bind_results
to combine multiple results, letting us start small, but not throw away the computation spent for the initial simulations)+results_minor_100 <- bind_results( results_minor_10, compute_SBC(datasets_minor[11:100], backend_minor, @@ -285,21 +288,21 @@
SBC and minor changes to model= file.path(cache_dir, "minor_90")) )
-## Results loaded from cache file 'minor_90'
-## - 6 (7%) fits had at least one Rhat > 1.01. Largest Rhat was 1.02.
+## - 16 (18%) fits had some steps rejected. Maximum number of rejections was 1.
+## - 7 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.02.
## - 13 (14%) fits had some steps rejected. Maximum number of rejections was 1.
## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
Here we see something suspicios with the
-sigma
variable, but it is not very convincing.@@ -139,7 +139,7 @@+-plot_rank_hist(results_minor_100)
@@ -102,7 +102,7 @@+plot_ecdf_diff(results_minor_100)
So let’s do additional 100 SBC steps
-diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png new file mode 100644 index 0000000..17a3580 Binary files /dev/null and b/docs/reference/Rplot001.png differ diff --git a/docs/reference/SBC-deprecated.html b/docs/reference/SBC-deprecated.html index a6ac4be..a5dac6f 100644 --- a/docs/reference/SBC-deprecated.html +++ b/docs/reference/SBC-deprecated.html @@ -20,7 +20,7 @@+results_minor_200 <- bind_results( results_minor_100, compute_SBC(datasets_minor[101:200], backend_minor, @@ -307,17 +310,17 @@
SBC and minor changes to model= file.path(cache_dir, "minor_next_100")) )
-## Results loaded from cache file 'minor_next_100'
-## - 6 (6%) fits had at least one Rhat > 1.01. Largest Rhat was 1.02.
+## - 14 (14%) fits had some steps rejected. Maximum number of rejections was 1.
+## - 9 (9%) fits had at least one Rhat > 1.01. Largest Rhat was 1.016.
## - 12 (12%) fits had some steps rejected. Maximum number of rejections was 2.
## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
OK, so this looks at least a bit conclusive, but still, the violation of uniformity is not very big.
-+-plot_rank_hist(results_minor_200)
+plot_ecdf_diff(results_minor_200)
If we used more data points per simulation (here we simulated just @@ -329,35 +332,35 @@
SBC and minor changes to model
+plot_sim_estimated(results_minor_200, alpha = 0.5)
Another way to investigate this is the coverage plot, showing the attained coverage of various central credible intervals.
-+plot_coverage(results_minor_200)
Or we can even directly inspect some intervals of interest:
-+coverage <- empirical_coverage(results_minor_200$stats, width = c(0.5,0.9,0.95)) coverage
-## # A tibble: 6 × 6 ## variable width width_represented ci_low estimate ci_high ## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> -## 1 mu 0.5 0.5 0.407 0.475 0.544 -## 2 mu 0.9 0.9 0.862 0.91 0.942 -## 3 mu 0.95 0.95 0.930 0.965 0.983 -## 4 sigma 0.5 0.5 0.368 0.435 0.504 -## 5 sigma 0.9 0.9 0.750 0.81 0.858 -## 6 sigma 0.95 0.95 0.839 0.89 0.926
+## 1 mu 0.5 0.5 0.397 0.465 0.534 +## 2 mu 0.9 0.9 0.856 0.905 0.938 +## 3 mu 0.95 0.95 0.923 0.96 0.979 +## 4 sigma 0.5 0.5 0.373 0.44 0.509 +## 5 sigma 0.9 0.9 0.728 0.79 0.841 +## 6 sigma 0.95 0.95 0.828 0.88 0.918+sigma_90_coverage_string <- paste0(round(100 * as.numeric( coverage[coverage$variable == "sigma" & coverage$width == 0.9, c("ci_low","ci_high")])), "%", collapse = " - ")
where we see that for example for the 90% central credible interval -of
+ofsigma
we would expect the actual coverage to be 75% - -86%.sigma
we would expect the actual coverage to be 73% - +84%.+Prior mismatch @@ -373,7 +376,7 @@
Missing likelihood
+single_sim_missing <- function(N) { mu <- rnorm(n = 1, mean = 0, sd = 1) y <- rnorm(n = N, mean = mu, sd = 1) @@ -389,7 +392,7 @@
Missing likelihooddatasets_missing <- generate_datasets(generator_missing, n_sims = 200)
And here is a model that just completely ignores the data, but has the right prior:
-@@ -134,7 +134,7 @@+data { int<lower=0> N; @@ -403,7 +406,7 @@
Missing likelihood
+iter_warmup <- 300 iter_sampling <- 1000 @@ -419,20 +422,20 @@
Missing likelihoodmodel_missing, iter = iter_sampling + iter_warmup, warmup = iter_warmup, chains = 1) }
Now we’ll compute the results for 200 simulations:
-diff --git a/docs/index.html b/docs/index.html index e1a6259..296afd2 100644 --- a/docs/index.html +++ b/docs/index.html @@ -36,7 +36,7 @@+results_missing <- compute_SBC(datasets_missing, backend_missing, cache_mode = "results", cache_location = file.path(cache_dir, "missing"))
-## Results loaded from cache file 'missing'
+## - 16 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.027.
## - 15 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.03.
## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
And here are our rank plots:
-+-plot_rank_hist(results_missing)
++plot_ecdf_diff(results_missing)
It’s just nothing out of the ordinary.
@@ -443,7 +446,7 @@Missing likelihood
+@@ -456,37 +459,37 @@prior_sd <- c("mu" = 1) #prior_sd <- calculate_prior_sd(generate_datasets(generator_missing, 1000)) plot_contraction(results_missing, prior_sd)
Missing likelihood
+plot_sim_estimated(results_missing, alpha = 0.5)
There is however even more powerful method - and that is to include -the likelihood in the SBC. This is most easily done by adding a -“generated quantity” to the SBC results - this is a function that is -evaluated within the context of the variables AND data. And it can be -added without recomputing the fits!
-++the likelihood in the SBC. This is most easily done by adding a “derived +quantity” to the SBC results - this is a function that is evaluated +within the context of the variables AND data. And it can be added +without recomputing the fits! +-normal_lpdf <- function(y, mu, sigma) { sum(dnorm(y, mean = mu, sd = sigma, log = TRUE)) } -log_lik_gq <- generated_quantities(log_lik = normal_lpdf(y, mu, 1), +log_lik_dq <- derived_quantities(log_lik = normal_lpdf(y, mu, 1), .globals = "normal_lpdf" ) -results_missing_gq <- recompute_SBC_statistics( +results_missing_dq <- recompute_SBC_statistics( results_missing, datasets_missing, - backend = backend_missing, gen_quants = log_lik_gq)
+ backend = backend_missing, dquants = log_lik_dq)## - 19 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.027.
## - 17 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.03.
## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
The rank plots for the
+log_lik
quantity immediately shows a severe problem:++plot_ecdf_diff(results_missing_dq)
- --plot_ecdf_diff(results_missing_gq)
- +-plot_rank_hist(results_missing_gq)
plot_rank_hist(results_missing_dq)
Partially missing likelihood @@ -495,7 +498,7 @@
Partially missing likelihood
+data { int<lower=0> N; @@ -516,7 +519,7 @@
Partially missing likelihood
+if(use_cmdstanr) { model_missing_2 <- cmdstan_model("stan/partially_missing_likelihood.stan") @@ -529,33 +532,31 @@
Partially missing likelihoodmodel_missing_2, iter = iter_sampling + iter_warmup, warmup = iter_warmup, chains = 1) }
Let us use this model for the same set of simulations.
--results_missing_2 <- compute_SBC(datasets_missing, backend_missing_2, gen_quants = log_lik_gq, +
-+results_missing_2 <- compute_SBC(datasets_missing, backend_missing_2, dquants = log_lik_dq, cache_mode = "results", cache_location = file.path(cache_dir, "missing_2"))
-## Results loaded from cache file 'missing_2' but it was computed with different thin_ranks/gen_quants/ensure_num_ranks_divisor. -## Calling recompute_SBC_statistics.
+## - 20 (10%) fits had at least one Rhat > 1.01. Largest Rhat was 1.025.
+## Results loaded from cache file 'missing_2'
## - 17 (8%) fits had at least one Rhat > 1.01. Largest Rhat was 1.035.
## Not all diagnostics are OK. ## You can learn more by inspecting $default_diagnostics, $backend_diagnostics ## and/or investigating $outputs/$messages/$warnings for detailed output from the backend.
The contraction plot would not show anything suspicious - we get decent contraction
-@@ -127,7 +127,7 @@+plot_contraction(results_missing_2, prior_sd, variables = "mu")
Similarly, our posterior estimates now cluster around the true values.
-@@ -127,7 +127,7 @@+plot_sim_estimated(results_missing_2, variables = "mu", alpha = 0.5)
Now contraction is pretty high, and
-mu
is behaving well, -but ourlog_lik
generated quantity shows a clear -problemdiff --git a/docs/articles/rejection_sampling.html b/docs/articles/rejection_sampling.html index 91836d0..3202771 100644 --- a/docs/articles/rejection_sampling.html +++ b/docs/articles/rejection_sampling.html @@ -33,7 +33,7 @@+but ourlog_lik
derived quantity shows a clear problem +-plot_ecdf_diff(results_missing_2)
@@ -127,7 +127,7 @@+plot_rank_hist(results_missing_2)
We could definitely find even smaller deviations than omitting half @@ -586,7 +587,7 @@
Incorrect CorrelationsWe however generate posterior samples from a set of independent normal distributions that happen to have the correct mean and standard deviation, just the correlation is missing. -
diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_corr_dq-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_corr_dq-1.png new file mode 100644 index 0000000..40ee6af Binary files /dev/null and b/docs/articles/limits_of_SBC_files/figure-html/results_corr_dq-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_corr_dq-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_corr_dq-2.png new file mode 100644 index 0000000..9f90e45 Binary files /dev/null and b/docs/articles/limits_of_SBC_files/figure-html/results_corr_dq-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-1.png index 9bcbbc6..7d410ef 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-2.png index 852b434..c790a61 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_100_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-1.png index bfaf61d..68d3448 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-2.png index 1f944e6..6ad4f41 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_10_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_coverage-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_coverage-1.png index 47d78a4..0ff28a8 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_coverage-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_coverage-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-1.png index 38f84a8..fe8fa28 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-2.png index 7f45465..82be123 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_sim_estimated-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_sim_estimated-1.png index 1a4bfeb..f2ba6b3 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_sim_estimated-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_minor_200_sim_estimated-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_contraction-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_contraction-1.png index 4618598..69e7ff6 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_contraction-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_contraction-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-1.png index f97b06c..b31153d 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-2.png index 7a64439..4df3ee6 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_sim_estimated-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_sim_estimated-1.png index 5a2617d..94987c0 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_sim_estimated-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_2_sim_estimated-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_contraction-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_contraction-1.png index a0c8ab4..afb1fd3 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_contraction-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_contraction-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_dq_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_dq_plots-1.png new file mode 100644 index 0000000..63c9886 Binary files /dev/null and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_dq_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_dq_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_dq_plots-2.png new file mode 100644 index 0000000..2eb9447 Binary files /dev/null and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_dq_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-1.png index 26e465d..4fdb70e 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-1.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-2.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-2.png index c19cf45..b4581ed 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-2.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_plots-2.png differ diff --git a/docs/articles/limits_of_SBC_files/figure-html/results_missing_sim_estimated-1.png b/docs/articles/limits_of_SBC_files/figure-html/results_missing_sim_estimated-1.png index b93cd1f..9955b58 100644 Binary files a/docs/articles/limits_of_SBC_files/figure-html/results_missing_sim_estimated-1.png and b/docs/articles/limits_of_SBC_files/figure-html/results_missing_sim_estimated-1.png differ diff --git a/docs/articles/overview_wide.png b/docs/articles/overview_wide.png index ec8847c..c95fbb0 100644 Binary files a/docs/articles/overview_wide.png and b/docs/articles/overview_wide.png differ diff --git a/docs/articles/rank_visualizations.html b/docs/articles/rank_visualizations.html index ee21aa4..ef090f1 100644 --- a/docs/articles/rank_visualizations.html +++ b/docs/articles/rank_visualizations.html @@ -33,7 +33,7 @@+@@ -693,7 +694,7 @@set.seed(546852) mvn_sigma <- matrix(c(1, 0.8,0.8,1), nrow = 2) @@ -641,38 +642,38 @@
Incorrect Correlations
## Results loaded from cache file 'corr'
Although the posterior is incorrect, the default univariate checks don’t show any problem even with 1000 simulations.
-+-plot_rank_hist(res_corr)
++plot_ecdf_diff(res_corr)
We can however add derived quantities that depend on both elements of mu. We’ll try their sum, difference, product and the multivarite normal log likelihood
-+-gq_corr <- generated_quantities(sum = mu[1] + mu[2], +
-+dq_corr <- derived_quantities(sum = mu[1] + mu[2], diff = mu[1] - mu[2], prod = mu[1] * mu[2], mvn_log_lik = sum(mvtnorm::dmvnorm(y, mean = mu, sigma = mvn_sigma, log = TRUE))) -res_corr_gq <- compute_SBC(datasets_correlated, backend_uncorr, keep_fits = FALSE, +res_corr_dq <- compute_SBC(datasets_correlated, backend_uncorr, keep_fits = FALSE, globals = analytic_backend_uncorr_globals, - gen_quants = gq_corr, + dquants = dq_corr, cache_mode = "results", - cache_location = file.path(cache_dir, "corr_gq"))
+ cache_location = file.path(cache_dir, "corr_dq"))## Results loaded from cache file 'corr_gq'
## Results loaded from cache file 'corr_dq'
We see that all of the derived quantities show problems, but with different strength of signal. We’ll especially note that the log likelihood is once again a very good choice, while sum is probably the worst of those tested.
+++plot_rank_hist(res_corr_dq)
- --plot_rank_hist(res_corr_gq)
- +-plot_ecdf_diff(res_corr_gq)
plot_ecdf_diff(res_corr_dq)
Incorrect Correlations -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
SBC rank visualizations
Martin Modrák
-2022-07-04
+2022-12-15
Source:vignettes/rank_visualizations.Rmd
@@ -419,7 +419,7 @@rank_visualizations.Rmd
Side by side comparison -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
Rejection sampling in simulations
Martin Modrák
-2022-07-04
+2022-12-15
Source:vignettes/rejection_sampling.Rmd
@@ -151,61 +151,38 @@rejection_sampling.Rmd
2022-07-04
certain condition we impose (e.g. that no observed count is larger than \(10^8\)). But does rejection sampling when generating simulations affect the validity of SBC? -Thanks to forum user Niko Huurre who derived the necessary math at Stan -Discourse discussion of the topic we know exactly when it is OK. -Briefly: for algorithms that only need to know the posterior density up -to a constant (which includes Stan and many others), it is OK as long as -the rejection criterion only uses observed data and not the unobserved -variables.
+It turns out that it does not as long as the rejection criterion only +uses observed data and not the unobserved variables.
We’ll first walk through the math and then show examples of both OK and problematic rejection sampling.
The math
-Let \(f\left(y\right)\) be the -probability that the simulated data \(y\) is rejected (usually a 0-1 function if -you have a clear idea what a “bad” dataset looks like, but could be -probabilistic if you’re relying on finicky diagnostics). The important -numbers are the probability of rejection for variable \(\theta\)
+Let \(\mathtt{accept}(y)\) be the +probability the the simulated data \(y\) is accepted. Note that \(\mathtt{accept}\) uses only data as input +and would usually be a 0-1 function if you have a clear idea what a +“bad” dataset looks like, but could be probabilistic if you’re relying +on finicky diagnostics.
+We define a variable \(a \sim +\text{Bernoulli}(\mathtt{accept}(y))\). Given the parameter space +\(\Theta\) and a specific \(\theta \in \Theta\), this implies a joint +distribution \(\pi(\theta, y, a)\) that +factorizes as \(\pi(\theta, y, a) = +\pi(a|y)\pi(y | \theta)\pi(\theta)\). We can then look at the +posterior conditional on accepting a dataset to see the claimed +invariance:
\[ -L\left(\theta\right)=\int -f\left(y\right)\pi\left(y|\theta\right)\mathrm{d}y +\begin{equation} +\pi(\theta | y, a = 1) = \frac{\pi(a = 1 | y) \pi(y | +\theta)\pi(\theta)}{\int_\Theta \mathrm{d}\tilde\theta \: \pi(a = 1 | y) +\pi(y | \tilde\theta)\pi(\tilde\theta)} = +\frac{\pi(y | \theta)\pi(\theta)}{\int_\Theta \mathrm{d}\tilde\theta \: +\pi(y | \tilde\theta)\pi(\tilde\theta)} = \pi(\theta | y) +\end{equation} \]
-and the total rate of rejections from the prior
-\[ -R=\iint -f\left(y\right)\pi\left(y|\theta\right)\pi\left(\theta\right)\mathrm{d}y\mathrm{d}\theta=\int -L\left(\theta\right)\pi\left(\theta\right)\mathrm{d}\theta -\]
-Rejecting the simulation when it generates “bad” data effectively -distorts the prior
-\[ -\pi\left(\theta\right)\to\frac{L\left(\theta\right)}{R}\pi\left(\theta\right) -\]
-and of course rejections change the generating distribution
-\[ -\pi\left(y|\theta\right)\to\frac{f\left(y\right)}{L\left(\theta\right)}\pi\left(y|\theta\right) -\]
-but crucially these changes cancel out when computing the posterior. -Before rejections we have:
-\[ -\pi(\theta | y) \propto \pi(y | \theta) \pi(\theta) -\]
-After rejections we have
-\[ -\pi(\theta | y) \propto \frac{L(\theta)}{R} \pi(y | \theta) -\frac{f(y)}{L(\theta)} \pi(\theta) = \frac{f(y)}{R} \pi(y | \theta) -\pi(\theta) -\]
-And since \(\frac{f(y)}{R}\) is a -constant for any given simulation (and hence the fit), the overall -posterior for Stan (and most other MCMC algorithms) is the same, because -Stan only needs the posterior density up to a constant. So whether we -take rejection into account or not, the model will match the generating -process. However, if \(f\) also -depended on \(\theta\), it would no -longer contribute a constant and we’ll get a mismatch between the -generator and model.
+So whether we take rejection into account or not, the model will +match the generating process. However, if \(\mathtt{accept}\) also depended on \(\theta\), it would no longer contribute a +constant and we’ll get a mismatch between the generator and model.
diff --git a/docs/articles/small_model_workflow.html b/docs/articles/small_model_workflow.html index e018bc8..943ea25 100644 --- a/docs/articles/small_model_workflow.html +++ b/docs/articles/small_model_workflow.html @@ -33,7 +33,7 @@Practical examples @@ -408,7 +385,7 @@
Take home message -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
Small model implementation workflow
Martin Modrák
-2022-07-04
+2022-12-15
Source:vignettes/small_model_workflow.Rmd
@@ -528,16 +528,16 @@small_model_workflow.Rmd
Fixing ordering
results_fixed_ordered$backend_diagnostics
+## 8 8 0.455 0 0 0 0 +## 9 9 0.376 0 0 0 0 +## 10 10 0.374 0 0 0 0## sim_id max_chain_time n_failed_chains n_divergent n_max_treedepth n_rejects -## 1 1 0.505 0 7 0 0 -## 2 2 0.715 0 145 0 0 -## 3 3 0.539 0 0 0 0 -## 4 4 0.461 0 0 0 0 -## 5 5 2.524 0 0 0 0 -## 6 6 0.723 0 0 0 0 +## 1 1 0.484 0 7 0 0 +## 2 2 0.777 0 145 0 0 +## 3 3 0.556 0 0 0 0 +## 4 4 0.501 0 0 0 0 +## 5 5 2.629 0 0 0 0 +## 6 6 0.665 0 0 0 0 ## 7 7 0.498 0 0 0 0 -## 8 8 0.440 0 0 0 0 -## 9 9 0.361 0 0 0 0 -## 10 10 0.354 0 0 0 0
One of the fits has quite a lot of divergent transitions. Let’s look at the pairs plot for the model:
@@ -150,7 +150,7 @@@@ -612,7 +612,7 @@+## - Maximum time per chain was 2.629 sec.Fixing degenerate components?## - No fits had divergent transitions. ## - No fits had iterations that saturated max treedepth. ## - No fits had steps rejected. -## - Maximum time per chain was 2.524 sec.
This gives us no obvious problems.
@@ -695,7 +695,7 @@plot_rank_hist(results_fixed_ordered_subset)
Fixing degenerate components?## - No fits had divergent transitions. ## - No fits had iterations that saturated max treedepth. ## - No fits had steps rejected. -## - Maximum time per chain was 4.253 sec. +## - Maximum time per chain was 4.316 sec.
And we can use
bind_results
to combine the new results with the previous fits to not waste our computational effort.diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_coverage-1.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_coverage-1.png index 78d5434..79a9ecc 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_coverage-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_coverage-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-1.png index d796b85..73c101e 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-2.png index a0c9f8a..673473e 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_200_plots-2.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-1.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-1.png index 6d456cc..b34b715 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-1.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-1.png differ diff --git a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-2.png b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-2.png index e8866b2..81935c8 100644 Binary files a/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-2.png and b/docs/articles/small_model_workflow_files/figure-html/results_beta_precision_fixed_prior_plots-2.png differ diff --git a/docs/authors.html b/docs/authors.html index 20e849d..cd6fde9 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@@@ -1482,7 +1482,7 @@Take home message -
Site built with pkgdown 2.0.5.
+Site built with pkgdown 2.0.6.
Citation
Installation
-devtools::install_github("hyunjimoon/SBC")
devtools::install_github("hyunjimoon/SBC")
-Quick tour @@ -163,7 +163,7 @@
Quick tourplot_rank_hist(results_sd) plot_ecdf_diff(results_sd)
The diagnostic plots show no problems in this case. As with any other software test, we can observe clear failures, but absence of failures does not imply correctness. We can however make the SBC check more thorough by using a lot of simulations and including suitable generated quantities to guard against known limitations of vanilla SBC.
+The diagnostic plots show no problems in this case. As with any other software test, we can observe clear failures, but absence of failures does not imply correctness. We can however make the SBC check more thorough by using a lot of simulations and including suitable derived quantities to guard against known limitations of vanilla SBC.
@@ -200,7 +200,7 @@Paralellization @@ -192,7 +192,9 @@
ReferencesValidating Bayesian Inference Algorithms with Simulation-Based Calibration Talts, Betancourt, Simpson, Vehtari, Gelman, 2018 +Simulation-Based Calibration Checking for Bayesian Computation: The Choice of Test Quantities Shapes Sensitivity Modrák, Moon, Kim, Bürkner, Huurre, Faltejsková, Gelman, Vehtari, 2022 +
- +Validating Bayesian Inference Algorithms with Simulation-Based Calibration Talts, Betancourt, Simpson, Vehtari, Gelman, 2018
- Graphical Test for Discrete Uniformity and its Applications in Goodness of Fit Evaluation and Multiple Sample Comparison Säilynoja, Bürkner, Vehtari, 2021
- @@ -278,7 +280,7 @@
Developers
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 5feb8fb..ece5d2c 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,5 +1,5 @@ -pandoc: 2.17.1.1 -pkgdown: 2.0.5 +pandoc: 2.19.2 +pkgdown: 2.0.6 pkgdown_sha: ~ articles: bad_parametrization: bad_parametrization.html @@ -13,7 +13,7 @@ articles: rejection_sampling: rejection_sampling.html SBC: SBC.html small_model_workflow: small_model_workflow.html -last_built: 2022-07-04T11:54Z +last_built: 2022-12-15T13:30Z urls: reference: https://hyunjimoon.github.io/SBC/reference article: https://hyunjimoon.github.io/SBC/articles diff --git a/docs/reference/ECDF-plots.html b/docs/reference/ECDF-plots.html index d13b62e..b8e7563 100644 --- a/docs/reference/ECDF-plots.html +++ b/docs/reference/ECDF-plots.html @@ -19,7 +19,7 @@See also
@@ -114,11 +114,43 @@Deprecated functions in package SBC.
- Source:R/SBC-deprecated.R
,R/results.R
+ Source:R/SBC-deprecated.R
,R/derived-quantities.R
,R/results.R
SBC-deprecated.Rd
Deprecated functions in package SBC.
-diff --git a/docs/reference/SBC_backend_brms.html b/docs/reference/SBC_backend_brms.html index b4721fe..b39569a 100644 --- a/docs/reference/SBC_backend_brms.html +++ b/docs/reference/SBC_backend_brms.html @@ -17,7 +17,7 @@+compute_results(...) +
generated_quantities(...) + +validate_generated_quantities(...) + +bind_generated_quantities(...) + +compute_gen_quants(...) + +compute_results(...) recompute_statistics(...)
+++ + +
generated_quantities
Instead of
+generated_quantities
, usederived_quantities
.+++ + +
validate_generated_quantities
Instead of
+validate_generated_quantities
, usevalidate_derived_quantities
.+++ + +
bind_generated_quantities
Instead of
+bind_generated_quantities
, usebind_derived_quantities
.++ + +
compute_gen_quants
Instead of
+compute_gen_quants
, usecompute_dquants
.@@ -144,7 +176,7 @@
compute_results
recompute_statistics
Arguments
Build a brms backend, reusing the compiled model from a previously created <
Arguments
Arguments
S3 generic to get backend-specific default thinning for rank computation.
Get hash used to identify cached results.
Arguments
Arguments
Arguments
Arguments
Arguments
Arguments
Arguments
draws_matrix
object.<
diff --git a/docs/reference/SBC_generator_brms.html b/docs/reference/SBC_generator_brms.html
index d43ae74..8ff6802 100644
--- a/docs/reference/SBC_generator_brms.html
+++ b/docs/reference/SBC_generator_brms.html
@@ -18,7 +18,7 @@
@@ -138,7 +138,7 @@ Running:
gen <- SBC_generator_custom(f, <<some other args>>)
+ Running:
+gen <- SBC_generator_custom(f, <<some other args>>)
datasets <- generate_datasets(gen, n_sims = my_n_sims)
-is equivalent to just running
datasets <- f(<<some other args>>, n_sims = my_n_sims)
+is equivalent to just running
+datasets <- f(<<some other args>>, n_sims = my_n_sims)
So whenever you control the code calling generate_datasets
,
it usually makes more sense to just create an SBC_datasets
@@ -151,7 +153,7 @@
Details
Derived quantities to include in SBC. Use derived_quantities()
to construct them.
the model + sampling algorithm. The built-in backends can be constructed
using SBC_backend_cmdstan_sample()
, SBC_backend_cmdstan_variational()
,
@@ -143,6 +148,10 @@
SBC_fit()
,
SBC_fit_to_draws_matrix()
methods.Deprecated, use dquants instead
R/derived-quantities.R
+ bind_derived_quantities.Rd
Combine two lists of derived quantities
+bind_derived_quantities(dq1, dq2)
R/derived-quantities.R
+ bind_generated_quantities-deprecated.Rd
Delegates directly to bind_derived_quantities()
.
R/results.R
+ bind_globals.Rd
Combine two sets globals for use in derived quantities or backend
+bind_globals(globals1, globals2)
future.chunk.size
argument for future.apply::future_lapply()
for more details.
+Derived quantities to include in SBC. Use derived_quantities()
to construct them.
Type of caching of results, currently the only supported modes are
"none"
(do not cache) and "results"
where the whole results object is stored
@@ -193,6 +198,10 @@
globals
argument to future::future()
, to make those
objects available on all workers.Deprecated, use dquants instead
R/derived-quantities.R
+ compute_dquants.Rd
Compute derived quantities based on given data and posterior draws.
+compute_dquants(draws, generated, dquants, gen_quants = NULL)
Deprecated, use dquants
R/derived-quantities.R
+ compute_gen_quants-deprecated.Rd
Delegates directly to compute_dquants()
.
R/derived-quantities.R
+ derived_quantities.Rd
When the expression contains non-library functions/objects, and parallel processing
+is enabled, those must be
+named in the .globals
parameter (hopefully we'll be able to detect those
+automatically in the future). Note that recompute_SBC_statistics()
currently
+does not use parallel processing, so .globals
don't need to be set.
derived_quantities(..., .globals = list())
named expressions representing the quantitites
A list of names of objects that are defined
+in the global environment and need to present for the gen. quants. to evaluate.
+It is added to the globals
argument to future::future()
, to make those
+objects available on all workers.
# Derived quantity computing the total log likelihood of a normal distribution
+# with known sd = 1
+normal_lpdf <- function(y, mu, sigma) {
+ sum(dnorm(y, mean = mu, sd = sigma, log = TRUE))
+}
+
+# Note the use of .globals to make the normal_lpdf function available
+# within the expression
+log_lik_dq <- derived_quantities(log_lik = normal_lpdf(y, mu, 1),
+ .globals = "normal_lpdf" )
+
+
R/derived-quantities.R
+ generated_quantities-deprecated.Rd
Delegates directly to derived_quantities()
.
Delegates directly to validate_derived_quantities()
.
Create a definition of generated quantities evaluated in R.
Combine two lists of derived quantities
Create a definition of derived quantities evaluated in R.
Validate a definition of derived quantities evaluated in R.
Compute derived quantities based on given data and posterior draws.
Combine two sets globals for use in derived quantities or backend
Guess the number of bins for plot_rank_hist()
.
Guess the number of bins for plot_rank_hist()
.
DEPRECATED. Use variables
instead.
maximum number of points where to evaluate the coverage.
+If set to NULL
, coverage is evaluated across the whole range of ranks.
+Setting to some smaller number may reduce memory footprint and increase speed.
Recompute SBC statistics without refitting models.
+Useful for example to recompute SBC ranks with a different choice of thin_ranks
+or added derived quantities.
Potentially drop some posterior samples to ensure that this number divides the total number of SBC ranks (see Details).
Derived quantities to include in SBC. Use derived_quantities()
to construct them.
Deprecated, use dquants instead
An S3 object of class SBC_results
with updated $stats
and $default_diagnostics
fields.
Useful for example to recompute SBC ranks with a different choice of thin_ranks
-or added generated quantities.
SBC_datasets
object.R/derived-quantities.R
+ validate_derived_quantities.Rd
Validate a definition of derived quantities evaluated in R.
+validate_derived_quantities(x)