metrumresearchgroup · barrettk · Jan 7, 2025 · Jan 7, 2025
diff --git a/R/bootstrap-model.R b/R/bootstrap-model.R
@@ -528,7 +528,9 @@ summarize_bootstrap_run <- function(
     )
 
     boot_sum_log <- summary_log(
-      boot_dir, .bbi_args = list(
+      boot_dir,
+      calc_aic_bic = FALSE, calc_dofv = FALSE,
+      .bbi_args = list(
         no_grd_file = TRUE, no_ext_file = TRUE, no_shk_file = TRUE
       )
     ) %>% dplyr::select(-"error_msg") # only join based on model run

diff --git a/R/summary-log.R b/R/summary-log.R
@@ -36,7 +36,7 @@
 #' @importFrom glue glue
 #' @importFrom tidyr unnest_wider
 #' @export
-summary_log <- function(.base_dir, .recurse = FALSE, .include = NULL , ...) {
+summary_log <- function(.base_dir, .recurse = FALSE, .include = NULL, ...) {
   checkmate::assert_string(.base_dir)
 
   mod_list <- find_models(.base_dir, .recurse, .include)
@@ -72,9 +72,23 @@ add_summary <- function(
 #' @importFrom tibble tibble
 #' @importFrom tidyselect all_of
 #' @param .mods List of model objects that will be passed to [model_summaries()].
+#' @param calc_aic_bic Logical(T/F). If `TRUE`, calculate the Akaike Information
+#' Criterion (AIC) and Bayesian Information Criterion (BIC)
+#' @param calc_dofv Logical(T/F). If `TRUE`, calculate the difference in
+#'  objective function value between each run and the run it's based on.
 #' @param ... Arguments passed through to [model_summaries()]
+#'
+#' @details
+#' `calc_aic_bic` and `calc_dofv` should be set to `FALSE` for certain model
+#' types, such as bootstrap runs.
+#'
 #' @keywords internal
-summary_log_impl <- function(.mods, ...) {
+summary_log_impl <- function(
+    .mods,
+    calc_aic_bic = TRUE,
+    calc_dofv = TRUE,
+    ...
+) {
 
   if(length(.mods) == 0) {
     return(tibble())
@@ -127,6 +141,19 @@ summary_log_impl <- function(.mods, ...) {
 
   res_df <- res_df %>% unnest_wider("d") %>% unnest_wider("h")
 
+  # Calculate AIC and BIC
+  if(isTRUE(calc_aic_bic)) res_df <- res_df %>% add_aic_bic()
+
+  # Calculate difference in ofv (requires based_on and model_type for linking)
+  if(isTRUE(calc_dofv)){
+    link_cols <- c(YAML_BASED_ON, "model_type")
+    run_df <- map_df(.mods, run_log_entry) %>%
+      dplyr::select(all_of(c(ABS_MOD_PATH, link_cols)))
+    res_df <- res_df %>% dplyr::left_join(run_df, by = ABS_MOD_PATH) %>%
+      dplyr::relocate(all_of(YAML_BASED_ON), .after = "run") %>%
+      add_dofv() %>% dplyr::select(-all_of(link_cols))
+  }
+
   res_df <- create_summary_log_object(res_df)
 
   return(res_df)
@@ -266,3 +293,63 @@ do_if_bbi_sum <- function(fn, mode) {
     return(res)
   }
 }
+
+#' Calculate Akaike Information Criterion (AIC) and Bayesian Information
+#' Criterion (BIC), and append to summary log
+#' @param .log_df The output of [summary_log()]) or `run_log() %>% add_summary()`
+#' @keywords internal
+add_aic_bic <- function(.log_df){
+  .log_df %>% dplyr::mutate(
+    aic = 2*.data$param_count + .data$ofv,
+    bic = .data$param_count*log(.data$number_of_obs) + .data$ofv
+  )
+}
+
+#' Calculate the difference in objective function value (ofv) between the
+#' current and based_on model.
+#' @inheritParams add_aic_bic
+#' @keywords internal
+add_dofv <- function(.log_df) {
+  # Determine linking of models to handle models in sub-directories
+  #  - We can't just rely on the based_on and run columns for this calculation
+  #    since based_on models in sub-directories will be displayed as '../2',
+  #    which wouldn't show up in the run column. make_tree_data does extra
+  #    computation we don't need, but handles the linking of models well and
+  #    takes in a run log data frame.
+  # TODO: pull the relevant linking code out into a separate function to avoid
+  # unneeded computation
+  link_cols <- c("from", "to")
+  tree_data <-
+    make_tree_data(.log_df, include_info = NULL, add_summary = FALSE) %>%
+    dplyr::select(all_of(c(link_cols, ABS_MOD_PATH, "addl_based_on"))) %>%
+    dplyr::mutate(from = ifelse(.data$from == "Start", NA_character_, .data$from))
+
+  log_df <- dplyr::left_join(.log_df, tree_data, by = ABS_MOD_PATH)
+
+  # Calculate difference in ofv using linking
+  log_df <- log_df %>%
+    dplyr::mutate(based_on_join = .data$from) %>%
+    dplyr::left_join(
+      dplyr::select(log_df, "based_on_join" = "to", "based_on_ofv" = "ofv"),
+      by = c("based_on_join")
+    ) %>%
+    dplyr::mutate(dofv = .data$ofv - .data$based_on_ofv)
+
+  # Denotes which runs were compared for clarity in the event multiple
+  # models were included as based_on
+  if(!all(is.na(log_df$addl_based_on))){
+    log_df <- log_df %>% dplyr::mutate(
+      dofv_run_annot = ifelse(
+        !is.na(.data$from), paste(.data$to, "-", .data$from), NA
+      )
+    )
+  }
+
+  # Prune and organize columns
+  log_df <- log_df %>%
+    dplyr::select(
+      -all_of(c("based_on_ofv", "based_on_join", link_cols, "addl_based_on"))
+    ) %>%
+    dplyr::relocate(any_of(c("dofv", "dofv_run_annot")), .after = "ofv")
+  return(log_df)
+}
diff --git a/man/add_aic_bic.Rd b/man/add_aic_bic.Rd
diff --git a/man/add_dofv.Rd b/man/add_dofv.Rd
diff --git a/man/summary_log_impl.Rd b/man/summary_log_impl.Rd
diff --git a/tests/testthat/setup-workflow-ref.R b/tests/testthat/setup-workflow-ref.R
@@ -105,10 +105,10 @@ RUN_LOG_ROWS <- 3L
 RUN_LOG_COLS <- 10L
 CONFIG_COLS <- 9L
 SUM_LOG_COLS <- if (test_bbi_version(read_bbi_path(), .min_version = "3.0.3")) {
-  23L
+  26L
 } else {
   # eigenvalue_issue isn't present yet.
-  22L
+  25L
 }
 
 ref_json <- jsonlite::fromJSON(system.file("test-refs", "ref_values.json", package = "bbr"))

diff --git a/tests/testthat/test-summary-log.R b/tests/testthat/test-summary-log.R
@@ -55,6 +55,7 @@ test_that("add_summary() works correctly [BBR-SMLG-004]", {
   clean_test_enviroment(create_all_models)
   copy_all_output_dirs()
   sum_df <- run_log(MODEL_DIR, .recurse = TRUE) %>% add_summary()
+  # Subtract 2 columns that overlap: ABS_MOD_PATH and "run"
   test_sum_df(sum_df, c(MOD1_PATH, NEW_MOD2, NEW_MOD3, LEVEL2_MOD), RUN_LOG_COLS+SUM_LOG_COLS-2)
   expect_identical(sum_df$model_type, rep("nonmem", RUN_LOG_ROWS+1))
   expect_identical(sum_df$yaml_md5, ALL_MODS_YAML_MD5)
@@ -71,7 +72,7 @@ test_that("add_summary() has correct columns [BBR-SMLG-005]", {
   expect_identical(names(add_df), c(names(log_df), names(sum_df)[3:length(names(sum_df))]))
 
   # check one col to make sure it matches
-  col_to_check <- names(sum_df)[3]
+  col_to_check <- "bbi_summary"
   expect_identical(sum_df[[col_to_check]], add_df[[col_to_check]])
 })