Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

report.compare.loo: minor edits #427

Merged
merged 4 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 77 additions & 51 deletions R/report.compare.loo.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#' Automatically report the results of Bayesian model comparison using the `loo` package.
#'
#' @param x An object of class [brms::loo_compare].
#' @param index type if index to report - expected log pointwise predictive
#' density (ELPD) or information criteria (IC).
#' @param include_IC Whether to include the information criteria (IC).
#' @param include_ENP Whether to include the effective number of parameters (ENP).
#' @param ... Additional arguments (not used for now).
#'
#' @examplesIf require("brms", quietly = TRUE)
Expand All @@ -13,24 +13,33 @@
#'
#' m1 <- brms::brm(mpg ~ qsec, data = mtcars)
#' m2 <- brms::brm(mpg ~ qsec + drat, data = mtcars)
#' m3 <- brms::brm(mpg ~ qsec + drat + wt, data = mtcars)
#'
#' x <- brms::loo_compare(brms::add_criterion(m1, "loo"),
#' x <- brms::loo_compare(
#' brms::add_criterion(m1, "loo"),
#' brms::add_criterion(m2, "loo"),
#' model_names = c("m1", "m2")
#' brms::add_criterion(m3, "loo"),
#' model_names = c("m1", "m2", "m3")
#' )
#' report(x)
#' report(x, include_IC = FALSE)
#' report(x, include_ENP = TRUE)
#' }
#'
#' @details
#' The rule of thumb is that the models are "very similar" if |elpd_diff| (the
#' absolute value of elpd_diff) is less than 4 (Sivula, Magnusson and Vehtari, 2020).
#' If superior to 4, then one can use the SE to obtain a standardized difference
#' (Z-diff) and interpret it as such, assuming that the difference is normally
#' distributed.
#' distributed. The corresponding p-value is then calculated as `2 * pnorm(-abs(Z-diff))`.
#' However, note that if the raw ELPD difference is small (less than 4), it doesn't
#' make much sense to rely on its standardized value: it is not very useful to
#' conclude that a model is much better than another if both models make very
#' similar predictions.
#'
#' @return Objects of class [report_text()].
#' @export
report.compare.loo <- function(x, index = c("ELPD", "IC"), ...) {
report.compare.loo <- function(x, include_IC = TRUE, include_ENP = FALSE, ...) {
# nolint start
# https://stats.stackexchange.com/questions/608881/how-to-interpret-elpd-diff-of-bayesian-loo-estimate-in-bayesian-logistic-regress
# nolint end
Expand All @@ -40,72 +49,89 @@ report.compare.loo <- function(x, index = c("ELPD", "IC"), ...) {
# The difference in expected log predictive density (elpd) between each model
# and the best model as well as the standard error of this difference (assuming
# the difference is approximately normal).
index <- match.arg(index)
x <- as.data.frame(x)

# The values in the first row are 0s because the models are ordered from best to worst according to their elpd.
x <- as.data.frame(x)
modnames <- rownames(x)

elpd_diff <- x[["elpd_diff"]]
se_elpd_diff <- x[["se_diff"]]
ic_diff <- -2 * elpd_diff

z_elpd_diff <- elpd_diff / x[["se_diff"]]
z_elpd_diff <- elpd_diff / se_elpd_diff
p_elpd_diff <- 2 * stats::pnorm(-abs(z_elpd_diff))
z_ic_diff <- -z_elpd_diff

if ("looic" %in% colnames(x)) {
type <- "LOO"
ENP <- x[["p_loo"]]
elpd <- x[["elpd_loo"]]
enp <- x[["p_loo"]]
index_label <- "ELPD-LOO"
ic <- x[["looic"]]
index_ic <- "LOOIC"
} else {
type <- "WAIC"
ENP <- x[["p_waic"]]
elpd <- x[["elpd_waic"]]
enp <- x[["p_waic"]]
index_label <- "ELPD-WAIC"
ic <- x[["waic"]]
index_ic <- "WAIC"
}

if (index == "ELPD") {
index_label <- sprintf("Expected Log Predictive Density (ELPD-%s)", type)
} else if (type == "LOO") {
index_label <- "Leave-One-Out CV Information Criterion (LOOIC)"
} else {
index_label <- "Widely Applicable Information Criterion (WAIC)"
}
# TODO: The above indices-computation and name-matching should be implemented
# in a parameters.compare.loo() function which would be run here.

out_text <- sprintf(
paste(
"The difference in predictive accuracy, as index by %s, suggests that '%s' ",
"is the best model (effective number of parameters (ENP) = %.2f), followed by"
# Starting text -----
text1 <- sprintf(
paste0(
"The difference in predictive accuracy, as indexed by Expected Log ",
"Predictive Density (%s), suggests that '%s' is the best model ("
),
index_label, modnames[1], ENP[1]
index_label, modnames[1]
)

if (index == "ELPD") {
other_texts <- sprintf(
"'%s' (diff = %.2f, ENP = %.2f, z-diff = %.2f)",
modnames[-1],
elpd_diff[-1],
ENP[-1],
z_elpd_diff[-1]
)
if (all(c(include_IC, include_ENP))) {
if (include_IC) {
text1 <- sprintf(paste0(text1, "%s = %.2f"), index_ic, ic[1])
}
if (include_ENP) {
if (include_IC) {
text1 <- sprintf(paste0(text1, ", ENP = %.2f)"), enp[1])
} else {
text1 <- sprintf(paste0(text1, "ENP = %.2f)"), enp[1])
}
} else {
text1 <- paste0(text1, ")")
}
} else {
other_texts <- sprintf(
"'%s' (diff = %.2f, ENP = %.2f, z-diff = %.2f)",
modnames[-1],
ic_diff[-1],
ENP[-1],
z_ic_diff[-1]
)
text1 <- sprintf(paste0(text1, "ELPD = %.2f)"), elpd[1])
}

sep <- "."
nothermods <- length(other_texts)
if (nothermods > 1L) {
if (nothermods == 2L) {
sep <- c(" and ", sep)
# Other models ---
text_models <- sprintf(
"'%s' (diff-ELPD = %.2f +- %.2f, %s",
modnames[-1],
elpd_diff[-1],
se_elpd_diff[-1],
insight::format_p(p_elpd_diff[-1])
)

if (all(c(include_IC, include_ENP))) {
if (include_IC) {
text_models <- sprintf(paste0(text_models, ", %s = %.2f"), index_ic, ic[-1])
}
if (include_ENP) {
if (include_IC) {
text_models <- sprintf(paste0(text_models, ", ENP = %.2f)"), enp[-1])
} else {
text_models <- sprintf(paste0(text_models, "ENP = %.2f)"), enp[-1])
}
} else {
sep <- c(rep(", ", length = nothermods - 2), ", and ", sep)
text_models <- sprintf(paste0(text_models, ")"))
}
} else {
text_models <- paste0(text_models, ")")
}

other_texts <- paste0(other_texts, sep, collapse = "")

out_text <- paste(out_text, other_texts, collapse = "")
class(text) <- c("report_text", class(text))
out_text
text1 <- paste0(text1, ", followed by ", datawizard::text_concatenate(text_models))
class(text1) <- c("report_text", class(text1))
text1
}
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ The package documentation can be found [**here**](https://easystats.github.io/re

## Report all the things

<a href=https://easystats.github.io/report/><img src="man/figures/allthethings.jpg" height="60"></a>
<a href=https://easystats.github.io/report/><img src="man/figures/allthethings.jpg" height="60" alt="All the things meme by Allie Brosh" ></a>

### General Workflow

Expand Down
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ CMD
CSL
Dom
ELPD
ENP
ESS
Gabry
Hotfix
Expand Down
22 changes: 16 additions & 6 deletions man/report.compare.loo.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading