From 3807f0a75f42eb7b0e7f953e6c38bb3f3f71e141 Mon Sep 17 00:00:00 2001 From: "Kyle M. Lang" Date: Wed, 17 Jul 2024 17:45:07 +0200 Subject: [PATCH 1/9] adjusting the variable parsing for plot_trace() to make it accept all putative input types --- DESCRIPTION | 5 ++-- NAMESPACE | 2 ++ R/plot_trace.R | 51 +++++++++++++++++++++++++++++--- man/plot_trace.Rd | 27 +++++++++++++++++ tests/testthat/test-plot_trace.R | 5 ++++ 5 files changed, 84 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 39077d11..ed5736b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,8 @@ Authors@R: c( person("Thom", "Volker", role = "ctb", comment = c(ORCID = "0000-0002-2408-7820")), person("Gerko", "Vink", role = "ctb", comment = c(ORCID = "0000-0001-9767-1924")), person("Pepijn", "Vink", role = "ctb", comment = c(ORCID = "0000-0001-6960-9904")), - person("Jamie", "Wallis", role = "ctb", comment = c(ORCID = "0000-0003-2765-3813")) + person("Jamie", "Wallis", role = "ctb", comment = c(ORCID = "0000-0003-2765-3813")), + person("Kyle", "Lang", role = "ctb", comment = c(ORCID = "0000-0001-5340-7849")) ) Description: Enhance a 'mice' imputation workflow with visualizations for incomplete and/or imputed data. The plotting functions produce @@ -46,4 +47,4 @@ Config/testthat/edition: 3 Copyright: 'ggmice' authors Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 5a976a40..d3b3a8ae 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,3 +13,5 @@ export(stripplot) export(xyplot) importFrom(magrittr,"%>%") importFrom(rlang,.data) +importFrom(rlang,enexpr) +importFrom(utils,tail) diff --git a/R/plot_trace.R b/R/plot_trace.R index ec8f2580..2a24321c 100644 --- a/R/plot_trace.R +++ b/R/plot_trace.R @@ -3,11 +3,40 @@ #' @param data An object of class [mice::mids]. #' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". #' +#' @details +#' The `vrb` argument is "quoted" via [rlang::enexpr()] and evaluated according +#' to [Tidy Evaluation principles](https://adv-r.hadley.nz/metaprogramming.html). +#' In practice, this technical nuance only affects users when passing an object +#' from the environment (e.g., a vector of variable names) to the `vrb` argument. +#' In such cases, the object must be "unquoted" via the `!!` prefix operator. +#' #' @return An object of class [ggplot2::ggplot]. #' #' @examples #' imp <- mice::mice(mice::nhanes, print = FALSE) +#' +#' ## Plot all imputed variables #' plot_trace(imp) +#' +#' ## Variables can be specified via character vectors comprising their names +#' plot_trace(imp, "bmi") +#' plot_trace(imp, c("bmi", "hyp")) +#' +#' ## Variable names can be unquoted +#' plot_trace(imp, bmi) +#' plot_trace(imp, c(bmi, hyp)) +#' +#' ## When passing the variable names as an object from the environment, the +#' ## object's name must be unqoted via `!!`. +#' vars <- c("bmi", "hyp") +#' plot_trace(imp, vars) |> try() # Error +#' plot_trace(imp, !!vars) # Runs because the 'vrb' argument is unquoted +#' +#' for(v in vars) +#' plot_trace(imp, !!v) |> print() +#' +#' @importFrom utils tail +#' @importFrom rlang enexpr #' @export plot_trace <- function(data, vrb = "all") { verify_data(data, imp = TRUE) @@ -20,14 +49,27 @@ plot_trace <- function(data, vrb = "all") { sm <- sqrt(data$chainVar) # select variable to plot from list of imputed variables - vrb <- substitute(vrb) + vrb <- enexpr(vrb) + if(is.call(vrb)) + vrb <- as.character(vrb) |> tail(-1) + else if(is.symbol(vrb)) + vrb <- as.character(vrb) + varlist <- names(data$imp)[apply(!(is.nan(mn) | is.na(mn)), 1, all)] - if (as.character(vrb)[1] == "all") { + if (length(vrb) == 1 && as.character(vrb) == "all") { vrb <- varlist - } else { - vrb <- names(dplyr::select(data$data, {{vrb}})) + } else if (any(vrb %nin% colnames(data$data))) { + cli::cli_abort( + c( + "x" = "The following variables are not present in 'data':", + " " = paste(setdiff(vrb, colnames(data$data)), collapse = ", "), + "i" = "Did you forget to use `!!` to unqote the object name you passed to the `vrb` argument?", + "i" = "Or maybe you just made a typo?" + ) + ) } + if (any(vrb %nin% varlist)) { cli::cli_inform( c( @@ -89,3 +131,4 @@ plot_trace <- function(data, vrb = "all") { strip.switch.pad.wrap = ggplot2::unit(0, "cm") ) } + diff --git a/man/plot_trace.Rd b/man/plot_trace.Rd index acb4cf34..f0eb9a96 100644 --- a/man/plot_trace.Rd +++ b/man/plot_trace.Rd @@ -17,7 +17,34 @@ An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. \description{ Plot the trace lines of the imputation algorithm } +\details{ +The \code{vrb} argument is "quoted" via \code{\link[rlang:defusing-advanced]{rlang::enexpr()}} and evaluated according +to \href{https://adv-r.hadley.nz/metaprogramming.html}{Tidy Evaluation principles}. +In practice, this technical nuance only affects users when passing an object +from the environment (e.g., a vector of variable names) to the \code{vrb} argument. +In such cases, the object must be "unquoted" via the \verb{!!} prefix operator. +} \examples{ imp <- mice::mice(mice::nhanes, print = FALSE) + +## Plot all imputed variables plot_trace(imp) + +## Variables can be specified via character vectors comprising their names +plot_trace(imp, "bmi") +plot_trace(imp, c("bmi", "hyp")) + +## Variable names can be unquoted +plot_trace(imp, bmi) +plot_trace(imp, c(bmi, hyp)) + +## When passing the variable names as an object from the environment, the +## object's name must be unqoted via `!!`. +vars <- c("bmi", "hyp") +plot_trace(imp, vars) |> try() # Error +plot_trace(imp, !!vars) # Runs because the 'vrb' argument is unquoted + +for(v in vars) + plot_trace(imp, !!v) |> print() + } diff --git a/tests/testthat/test-plot_trace.R b/tests/testthat/test-plot_trace.R index ff18805d..f047008e 100644 --- a/tests/testthat/test-plot_trace.R +++ b/tests/testthat/test-plot_trace.R @@ -1,6 +1,7 @@ # create test objects dat <- mice::nhanes imp <- mice::mice(dat, printFlag = FALSE) +v <- c("bmi", "hyp") # tests test_that("plot_trace creates ggplot object", { @@ -11,6 +12,8 @@ test_that("plot_trace creates ggplot object", { expect_s3_class(plot_trace(imp, vrb = bmi), "ggplot") expect_s3_class(plot_trace(imp, vrb = c("bmi", "hyp")), "ggplot") expect_s3_class(plot_trace(imp, vrb = c(bmi, hyp)), "ggplot") + expect_s3_class(plot_trace(imp, vrb = !!v), "ggplot") + expect_s3_class(plot_trace(imp, vrb = !!v[1]), "ggplot") }) test_that("plot_trace returns error with incorrect argument(s)", { @@ -18,4 +21,6 @@ test_that("plot_trace returns error with incorrect argument(s)", { expect_error(plot_trace(imp, vrb = "test")) expect_error(plot_trace(imp, vrb = "age")) expect_message(plot_trace(imp, vrb = c("age", "bmi"))) + expect_error(plot_trace(imp, vrb = v)) + expect_error(plot_trace(imp, vrb = v[1])) }) From 5b66d1958bf589eec8415fb8a49e6b236d60ed55 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Fri, 19 Jul 2024 17:01:57 +0200 Subject: [PATCH 2/9] code style edits (see review) --- NAMESPACE | 2 -- R/plot_trace.R | 24 +++++++++--------------- tests/testthat/test-plot_trace.R | 2 +- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index d3b3a8ae..5a976a40 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,5 +13,3 @@ export(stripplot) export(xyplot) importFrom(magrittr,"%>%") importFrom(rlang,.data) -importFrom(rlang,enexpr) -importFrom(utils,tail) diff --git a/R/plot_trace.R b/R/plot_trace.R index 2a24321c..402eacaa 100644 --- a/R/plot_trace.R +++ b/R/plot_trace.R @@ -35,8 +35,6 @@ #' for(v in vars) #' plot_trace(imp, !!v) |> print() #' -#' @importFrom utils tail -#' @importFrom rlang enexpr #' @export plot_trace <- function(data, vrb = "all") { verify_data(data, imp = TRUE) @@ -49,17 +47,18 @@ plot_trace <- function(data, vrb = "all") { sm <- sqrt(data$chainVar) # select variable to plot from list of imputed variables - vrb <- enexpr(vrb) - if(is.call(vrb)) - vrb <- as.character(vrb) |> tail(-1) - else if(is.symbol(vrb)) + vrb <- rlang::enexpr(vrb) + if (is.call(vrb)) + vrb <- as.character(vrb) |> utils::tail(-1) + if (is.symbol(vrb)) vrb <- as.character(vrb) varlist <- names(data$imp)[apply(!(is.nan(mn) | is.na(mn)), 1, all)] if (length(vrb) == 1 && as.character(vrb) == "all") { vrb <- varlist - } else if (any(vrb %nin% colnames(data$data))) { + } + if (any(vrb %nin% colnames(data$data))) { cli::cli_abort( c( "x" = "The following variables are not present in 'data':", @@ -81,8 +80,7 @@ plot_trace <- function(data, vrb = "all") { if (any(vrb %in% varlist)) { vrb <- vrb[which(vrb %in% varlist)] } else { - cli::cli_abort(c("x" = "None of the variables are imputed.", - "No plots can be produced.")) + cli::cli_abort(c("x" = "None of the variables are imputed.", "No plots can be produced.")) } } @@ -95,8 +93,7 @@ plot_trace <- function(data, vrb = "all") { vrb = rep(vrb, each = m * it, times = 2), val = c(matrix(aperm(mn[vrb, , , drop = FALSE], c( 2, 3, 1 - )), nrow = m * it * p), - matrix(aperm(sm[vrb, , , drop = FALSE], c( + )), nrow = m * it * p), matrix(aperm(sm[vrb, , , drop = FALSE], c( 2, 3, 1 )), nrow = m * it * p)) )) @@ -121,9 +118,7 @@ plot_trace <- function(data, vrb = "all") { list(do.call(paste, c(labels, list(sep = "\n")))) } ) + - ggplot2::labs(x = "Iteration", - y = "Imputation parameter", - color = "Imputation number") + + ggplot2::labs(x = "Iteration", y = "Imputation parameter", color = "Imputation number") + theme_mice() + ggplot2::theme( strip.background = ggplot2::element_blank(), @@ -131,4 +126,3 @@ plot_trace <- function(data, vrb = "all") { strip.switch.pad.wrap = ggplot2::unit(0, "cm") ) } - diff --git a/tests/testthat/test-plot_trace.R b/tests/testthat/test-plot_trace.R index f047008e..e294ebcb 100644 --- a/tests/testthat/test-plot_trace.R +++ b/tests/testthat/test-plot_trace.R @@ -1,7 +1,7 @@ # create test objects dat <- mice::nhanes imp <- mice::mice(dat, printFlag = FALSE) -v <- c("bmi", "hyp") +v <- c("bmi", "hyp") # tests test_that("plot_trace creates ggplot object", { From e34a1b577a9b94974bd838048e04c4f9a395ff18 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Wed, 24 Jul 2024 16:16:30 +0200 Subject: [PATCH 3/9] documentation and error message text --- R/plot_trace.R | 45 ++++++++++++++++++++++++--------------------- man/plot_trace.Rd | 27 +++++++++++++-------------- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/R/plot_trace.R b/R/plot_trace.R index 402eacaa..d6a5a1b2 100644 --- a/R/plot_trace.R +++ b/R/plot_trace.R @@ -1,39 +1,39 @@ #' Plot the trace lines of the imputation algorithm #' #' @param data An object of class [mice::mids]. -#' @param vrb String, vector, or unquoted expression with variable name(s), default is "all". +#' @param vrb String, vector, or unquoted expression with variable name(s), +#' default is "all". #' #' @details #' The `vrb` argument is "quoted" via [rlang::enexpr()] and evaluated according -#' to [Tidy Evaluation principles](https://adv-r.hadley.nz/metaprogramming.html). +#' to [tidy evaluation principles](https://adv-r.hadley.nz/metaprogramming.html). #' In practice, this technical nuance only affects users when passing an object #' from the environment (e.g., a vector of variable names) to the `vrb` argument. #' In such cases, the object must be "unquoted" via the `!!` prefix operator. #' -#' @return An object of class [ggplot2::ggplot]. +#' @returns An object of class [ggplot2::ggplot]. #' #' @examples +#' # create [mice::mids] object with [mice::mice()] #' imp <- mice::mice(mice::nhanes, print = FALSE) #' -#' ## Plot all imputed variables +#' # plot trace lines for all imputed columns #' plot_trace(imp) #' -#' ## Variables can be specified via character vectors comprising their names +#' # plot trace lines for specific columns by supplying a string or character vector #' plot_trace(imp, "bmi") #' plot_trace(imp, c("bmi", "hyp")) -#' -#' ## Variable names can be unquoted + +#' # plot trace lines for specific columns by supplying unquoted variable names #' plot_trace(imp, bmi) #' plot_trace(imp, c(bmi, hyp)) #' -#' ## When passing the variable names as an object from the environment, the -#' ## object's name must be unqoted via `!!`. -#' vars <- c("bmi", "hyp") -#' plot_trace(imp, vars) |> try() # Error -#' plot_trace(imp, !!vars) # Runs because the 'vrb' argument is unquoted -#' -#' for(v in vars) -#' plot_trace(imp, !!v) |> print() +#' # plot trace lines for specific columns by passing an object with variable names +#' # from the environment, unquoted with `!!` +#' my_variables <- c("bmi", "hyp") +#' plot_trace(imp, !!my_variables) +#' # object with variable names must be unquoted with `!!` +#' try(plot_trace(imp, my_variables)) #' #' @export plot_trace <- function(data, vrb = "all") { @@ -58,17 +58,20 @@ plot_trace <- function(data, vrb = "all") { if (length(vrb) == 1 && as.character(vrb) == "all") { vrb <- varlist } - if (any(vrb %nin% colnames(data$data))) { + if (all(vrb %nin% colnames(data$data))) { cli::cli_abort( c( - "x" = "The following variables are not present in 'data':", - " " = paste(setdiff(vrb, colnames(data$data)), collapse = ", "), - "i" = "Did you forget to use `!!` to unqote the object name you passed to the `vrb` argument?", - "i" = "Or maybe you just made a typo?" + "x" = "Variable name(s) not found in {.code data}.", + "i" = "If you supply an object with variable names from the environment, use `!!` to unqote:", + " " = paste0("{.code vrb = !!", vrb, "}") ) ) } - + if (any(vrb %nin% colnames(data$data))) { + cli::cli_abort(c("x" = "The following variables are not present in {.code data}:", " " = paste(setdiff( + vrb, colnames(data$data) + ), collapse = ", "))) + } if (any(vrb %nin% varlist)) { cli::cli_inform( c( diff --git a/man/plot_trace.Rd b/man/plot_trace.Rd index f0eb9a96..43848fbc 100644 --- a/man/plot_trace.Rd +++ b/man/plot_trace.Rd @@ -9,7 +9,8 @@ plot_trace(data, vrb = "all") \arguments{ \item{data}{An object of class \link[mice:mids-class]{mice::mids}.} -\item{vrb}{String, vector, or unquoted expression with variable name(s), default is "all".} +\item{vrb}{String, vector, or unquoted expression with variable name(s), +default is "all".} } \value{ An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. @@ -19,32 +20,30 @@ Plot the trace lines of the imputation algorithm } \details{ The \code{vrb} argument is "quoted" via \code{\link[rlang:defusing-advanced]{rlang::enexpr()}} and evaluated according -to \href{https://adv-r.hadley.nz/metaprogramming.html}{Tidy Evaluation principles}. +to \href{https://adv-r.hadley.nz/metaprogramming.html}{tidy evaluation principles}. In practice, this technical nuance only affects users when passing an object from the environment (e.g., a vector of variable names) to the \code{vrb} argument. In such cases, the object must be "unquoted" via the \verb{!!} prefix operator. } \examples{ +# create [mice::mids] object with [mice::mice()] imp <- mice::mice(mice::nhanes, print = FALSE) -## Plot all imputed variables +# plot trace lines for all imputed columns plot_trace(imp) -## Variables can be specified via character vectors comprising their names +# plot trace lines for specific columns by supplying a string or character vector plot_trace(imp, "bmi") plot_trace(imp, c("bmi", "hyp")) - -## Variable names can be unquoted +# plot trace lines for specific columns by supplying unquoted variable names plot_trace(imp, bmi) plot_trace(imp, c(bmi, hyp)) -## When passing the variable names as an object from the environment, the -## object's name must be unqoted via `!!`. -vars <- c("bmi", "hyp") -plot_trace(imp, vars) |> try() # Error -plot_trace(imp, !!vars) # Runs because the 'vrb' argument is unquoted - -for(v in vars) - plot_trace(imp, !!v) |> print() +# plot trace lines for specific columns by passing an object with variable names +# from the environment, unquoted with `!!` +my_variables <- c("bmi", "hyp") +plot_trace(imp, !!my_variables) +# object with variable names must be unquoted with `!!` +try(plot_trace(imp, my_variables)) } From 82522e42f2505c2a3be502e0a41a9a80eaeb15cb Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Thu, 25 Jul 2024 17:21:04 +0200 Subject: [PATCH 4/9] remove unnecessary(?) use of `tail()` --- R/plot_trace.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/plot_trace.R b/R/plot_trace.R index d6a5a1b2..bdab8d09 100644 --- a/R/plot_trace.R +++ b/R/plot_trace.R @@ -49,7 +49,7 @@ plot_trace <- function(data, vrb = "all") { # select variable to plot from list of imputed variables vrb <- rlang::enexpr(vrb) if (is.call(vrb)) - vrb <- as.character(vrb) |> utils::tail(-1) + vrb <- as.character(vrb)[-1] if (is.symbol(vrb)) vrb <- as.character(vrb) From dd4b9d21745064fc82eef91c1707ac2a6c35b7e7 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Fri, 26 Jul 2024 14:43:55 +0200 Subject: [PATCH 5/9] put variable processing in utils function for re-use --- R/plot_trace.R | 67 +++++++++++++++++--------------------------------- R/utils.R | 34 +++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 45 deletions(-) diff --git a/R/plot_trace.R b/R/plot_trace.R index bdab8d09..21661b9f 100644 --- a/R/plot_trace.R +++ b/R/plot_trace.R @@ -48,58 +48,35 @@ plot_trace <- function(data, vrb = "all") { # select variable to plot from list of imputed variables vrb <- rlang::enexpr(vrb) - if (is.call(vrb)) - vrb <- as.character(vrb)[-1] - if (is.symbol(vrb)) - vrb <- as.character(vrb) - - varlist <- - names(data$imp)[apply(!(is.nan(mn) | is.na(mn)), 1, all)] - if (length(vrb) == 1 && as.character(vrb) == "all") { - vrb <- varlist - } - if (all(vrb %nin% colnames(data$data))) { - cli::cli_abort( - c( - "x" = "Variable name(s) not found in {.code data}.", - "i" = "If you supply an object with variable names from the environment, use `!!` to unqote:", - " " = paste0("{.code vrb = !!", vrb, "}") - ) - ) - } - if (any(vrb %nin% colnames(data$data))) { - cli::cli_abort(c("x" = "The following variables are not present in {.code data}:", " " = paste(setdiff( - vrb, colnames(data$data) - ), collapse = ", "))) - } - if (any(vrb %nin% varlist)) { + vrbs_in_data <- names(data$imp) + vrb_matched <- match_vrb(vrb, vrbs_in_data) + available_vrbs <- vrbs_in_data[apply(!(is.nan(mn) | is.na(sm)), 1, all)] + if (any(vrb_matched %nin% available_vrbs)) { cli::cli_inform( c( "Trace plot could not be produced for variable(s):", - " " = paste(vrb[which(vrb %nin% varlist)], collapse = ", "), - "x" = "No convergence diagnostics found." + " " = paste(vrb_matched[which(vrb_matched %nin% available_vrbs)], collapse = ", "), + "i" = "No convergence diagnostics found." ) ) - if (any(vrb %in% varlist)) { - vrb <- vrb[which(vrb %in% varlist)] - } else { - cli::cli_abort(c("x" = "None of the variables are imputed.", "No plots can be produced.")) - } } - - p <- length(vrb) + vrb_matched <- vrb_matched[which(vrb_matched %in% available_vrbs)] + p <- length(vrb_matched) m <- data$m it <- data$iteration - long <- cbind(expand.grid(.it = seq_len(it), .m = seq_len(m)), - data.frame( - .ms = rep(c("mean", "sd"), each = m * it * p), - vrb = rep(vrb, each = m * it, times = 2), - val = c(matrix(aperm(mn[vrb, , , drop = FALSE], c( - 2, 3, 1 - )), nrow = m * it * p), matrix(aperm(sm[vrb, , , drop = FALSE], c( - 2, 3, 1 - )), nrow = m * it * p)) - )) + long <- cbind( + expand.grid(.it = seq_len(it), .m = seq_len(m)), + data.frame( + .ms = rep(c("mean", "sd"), each = m * it * p), + vrb_matched = rep(vrb_matched, each = m * it, times = 2), + val = c( + matrix(aperm(mn[vrb_matched, , , drop = FALSE], c( + 2, 3, 1)), nrow = m * it * p), + matrix(aperm(sm[vrb_matched, , , drop = FALSE], c( + 2, 3, 1)), nrow = m * it * p) + ) + ) + ) # plot the convergence diagnostics ggplot2::ggplot(long, @@ -111,7 +88,7 @@ plot_trace <- function(data, vrb = "all") { ggplot2::geom_line(linewidth = 0.6) + ggplot2::geom_hline(yintercept = -Inf) + ggplot2::facet_wrap( - .ms ~ vrb, + .ms ~ vrb_matched, dir = "v", ncol = 2, scales = "free_y", diff --git a/R/utils.R b/R/utils.R index 2cb42595..89b62d89 100644 --- a/R/utils.R +++ b/R/utils.R @@ -100,5 +100,39 @@ verify_data <- function(data, } } +#' Utils function to match `vrb` argument to variable names in `data` +#' +#' @param vrb The input supplied to the 'vrb' argument. +#' @param vrbs_in_data A character vector of available variable names in `data`. +#' +#' @return String or character vector with matched variable name(s). +#' +#' @keywords internal +#' @noRd +match_vrb <- function(vrb, vrbs_in_data) { + if (is.call(vrb)) + vrb <- as.character(vrb)[-1] + if (is.symbol(vrb)) + vrb <- as.character(vrb) + if (length(vrb) == 1 && as.character(vrb) == "all") { + vrb <- vrbs_in_data + } + if (all(vrb %nin% vrbs_in_data)) { + cli::cli_abort( + c( + "x" = "The variable name(s) supplied to {.var vrb} could not be found in {.var data}.", + "i" = "If you supply an object with variable names from the environment, use `!!` to unqote:", + " " = paste0("{.code vrb = !!", vrb, "}") + ) + ) + } + if (any(vrb %nin% vrbs_in_data)) { + cli::cli_warn(c("x" = "The following variables are not present in {.var data}:", " " = paste( + setdiff(vrb, vrbs_in_data), collapse = ", " + ))) + } + return(vrb) +} + # suppress undefined global functions or variables note utils::globalVariables(c(".id", ".imp", ".where", ".id", "where", "name", "value")) From 734d3d016f7a735965eaca75619a540cc84585d8 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Fri, 26 Jul 2024 15:43:44 +0200 Subject: [PATCH 6/9] match documentation other `plot_*` functions --- R/plot_trace.R | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/R/plot_trace.R b/R/plot_trace.R index 21661b9f..ecc6c57f 100644 --- a/R/plot_trace.R +++ b/R/plot_trace.R @@ -21,16 +21,16 @@ #' plot_trace(imp) #' #' # plot trace lines for specific columns by supplying a string or character vector -#' plot_trace(imp, "bmi") -#' plot_trace(imp, c("bmi", "hyp")) +#' plot_trace(imp, "chl") +#' plot_trace(imp, c("chl", "hyp")) #' # plot trace lines for specific columns by supplying unquoted variable names -#' plot_trace(imp, bmi) -#' plot_trace(imp, c(bmi, hyp)) +#' plot_trace(imp, chl) +#' plot_trace(imp, c(chl, hyp)) #' #' # plot trace lines for specific columns by passing an object with variable names #' # from the environment, unquoted with `!!` -#' my_variables <- c("bmi", "hyp") +#' my_variables <- c("chl", "hyp") #' plot_trace(imp, !!my_variables) #' # object with variable names must be unquoted with `!!` #' try(plot_trace(imp, my_variables)) @@ -41,15 +41,12 @@ plot_trace <- function(data, vrb = "all") { if (is.null(data$chainMean) && is.null(data$chainVar)) { cli::cli_abort("No convergence diagnostics found", call. = FALSE) } - - # extract chain means and chain standard deviations - mn <- data$chainMean - sm <- sqrt(data$chainVar) - - # select variable to plot from list of imputed variables vrb <- rlang::enexpr(vrb) vrbs_in_data <- names(data$imp) vrb_matched <- match_vrb(vrb, vrbs_in_data) + # extract chain means and chain standard deviations + mn <- data$chainMean + sm <- sqrt(data$chainVar) available_vrbs <- vrbs_in_data[apply(!(is.nan(mn) | is.na(sm)), 1, all)] if (any(vrb_matched %nin% available_vrbs)) { cli::cli_inform( @@ -61,6 +58,7 @@ plot_trace <- function(data, vrb = "all") { ) } vrb_matched <- vrb_matched[which(vrb_matched %in% available_vrbs)] + # compute diagnostics p <- length(vrb_matched) m <- data$m it <- data$iteration @@ -77,8 +75,7 @@ plot_trace <- function(data, vrb = "all") { ) ) ) - - # plot the convergence diagnostics + # create plot ggplot2::ggplot(long, ggplot2::aes( x = .data$.it, From c163aff8ee803140becff0d0bea8260bcc9d0c76 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Fri, 26 Jul 2024 15:45:16 +0200 Subject: [PATCH 7/9] implement utils function from #157 in all other `plot_*` functions, and add examples --- R/plot_corr.R | 49 +++++++++++++++++++++++++++------------------ R/plot_flux.R | 30 ++++++++++++++++++--------- R/plot_pattern.R | 32 +++++++++++++++++++---------- R/plot_pred.R | 36 ++++++++++++++++++++++----------- man/plot_corr.Rd | 17 +++++++++++++++- man/plot_flux.Rd | 15 ++++++++++++++ man/plot_pattern.Rd | 15 ++++++++++++++ man/plot_pred.Rd | 17 ++++++++++++++++ man/plot_trace.Rd | 10 ++++----- 9 files changed, 164 insertions(+), 57 deletions(-) diff --git a/R/plot_corr.R b/R/plot_corr.R index 29f7d957..290eeb0f 100644 --- a/R/plot_corr.R +++ b/R/plot_corr.R @@ -11,7 +11,22 @@ #' @return An object of class [ggplot2::ggplot]. #' #' @examples -#' plot_corr(mice::nhanes, label = TRUE) +#' # plot correlations for all columns +#' plot_corr(mice::nhanes) +#' +#' # plot correlations for specific columns by supplying a character vector +#' plot_corr(mice::nhanes, c("chl", "hyp")) +#' +#' # plot correlations for specific columns by supplying unquoted variable names +#' plot_corr(mice::nhanes, c(chl, hyp)) +#' +#' # plot correlations for specific columns by passing an object with variable names +#' # from the environment, unquoted with `!!` +#' my_variables <- c("chl", "hyp") +#' plot_corr(mice::nhanes, !!my_variables) +#' # object with variable names must be unquoted with `!!` +#' try(plot_corr(mice::nhanes, my_variables)) +#' #' @export plot_corr <- function(data, @@ -25,38 +40,33 @@ plot_corr <- data <- as.data.frame(data) } verify_data(data = data, df = TRUE) - vrb <- substitute(vrb) - if (vrb != "all" && length(vrb) < 2) { + vrb <- rlang::enexpr(vrb) + vrb_matched <- match_vrb(vrb, names(data)) + if (length(vrb_matched) < 2) { cli::cli_abort("The number of variables should be two or more to compute correlations.") } - if (vrb[1] == "all") { - vrb <- names(data) - } else { - data <- dplyr::select(data, {{vrb}}) - vrb <- names(data) - } # check if any column is constant - constants <- apply(data, MARGIN = 2, function(x) { + constants <- apply(data[, vrb_matched], MARGIN = 2, function(x) { all(is.na(x)) || max(x, na.rm = TRUE) == min(x, na.rm = TRUE) }) if (any(constants)) { - vrb <- names(data[, !constants]) + vrb_matched <- vrb_matched[!constants] cli::cli_inform( c( "No correlations computed for variable(s):", " " = paste(names(constants[which(constants)]), collapse = ", "), - "x" = "Correlation undefined for constants." + "i" = "Correlations are undefined for constants." ) ) } - - p <- length(vrb) + # compute correlations + p <- length(vrb_matched) corrs <- data.frame( - vrb = rep(vrb, each = p), - prd = vrb, + vrb = rep(vrb_matched, each = p), + prd = vrb_matched, corr = matrix( round(stats::cov2cor( - stats::cov(data.matrix(data[, vrb]), use = "pairwise.complete.obs") + stats::cov(data.matrix(data[, vrb_matched]), use = "pairwise.complete.obs") ), 2), nrow = p * p, byrow = TRUE @@ -65,6 +75,7 @@ plot_corr <- if (!diagonal) { corrs[corrs$vrb == corrs$prd, "corr"] <- NA } + # create plot gg <- ggplot2::ggplot(corrs, ggplot2::aes( @@ -74,8 +85,8 @@ plot_corr <- fill = .data$corr )) + ggplot2::geom_tile(color = "black", alpha = 0.6) + - ggplot2::scale_x_discrete(limits = vrb, position = "top") + - ggplot2::scale_y_discrete(limits = rev(vrb)) + + ggplot2::scale_x_discrete(limits = vrb_matched, position = "top") + + ggplot2::scale_y_discrete(limits = rev(vrb_matched)) + ggplot2::scale_fill_gradient2( low = ggplot2::alpha("deepskyblue", 0.6), mid = "lightyellow", diff --git a/R/plot_flux.R b/R/plot_flux.R index 8f110f41..a3cb5b0b 100644 --- a/R/plot_flux.R +++ b/R/plot_flux.R @@ -8,7 +8,22 @@ #' @return An object of class [ggplot2::ggplot]. #' #' @examples +#' # plot flux for all columns #' plot_flux(mice::nhanes) +#' +#' # plot flux for specific columns by supplying a character vector +#' plot_flux(mice::nhanes, c("chl", "hyp")) +#' +#' # plot flux for specific columns by supplying unquoted variable names +#' plot_flux(mice::nhanes, c(chl, hyp)) +#' +#' # plot flux for specific columns by passing an object with variable names +#' # from the environment, unquoted with `!!` +#' my_variables <- c("chl", "hyp") +#' plot_flux(mice::nhanes, !!my_variables) +#' # object with variable names must be unquoted with `!!` +#' try(plot_flux(mice::nhanes, my_variables)) +#' #' @export plot_flux <- function(data, @@ -16,17 +31,14 @@ plot_flux <- label = TRUE, caption = TRUE) { verify_data(data, df = TRUE) - vrb <- substitute(vrb) - if (vrb != "all" && length(vrb) < 2) { + vrb <- rlang::enexpr(vrb) + vrb_matched <- match_vrb(vrb, names(data)) + if (length(vrb_matched) < 2) { cli::cli_abort("The number of variables should be two or more to compute flux.") } - if (vrb[1] == "all") { - vrb <- names(data) - } else { - vrb <- names(dplyr::select(data, {{vrb}})) - } - # plot in and outflux - flx <- mice::flux(data[, vrb])[, c("influx", "outflux")] + # compute flux + flx <- mice::flux(data[, vrb_matched])[, c("influx", "outflux")] + # create plot gg <- data.frame( vrb = rownames(flx), diff --git a/R/plot_pattern.R b/R/plot_pattern.R index 06342e6c..ed8d9192 100644 --- a/R/plot_pattern.R +++ b/R/plot_pattern.R @@ -11,7 +11,22 @@ #' @return An object of class [ggplot2::ggplot]. #' #' @examples +#' # plot missing data pattern for all columns #' plot_pattern(mice::nhanes) +#' +#' # plot missing data pattern for specific columns by supplying a character vector +#' plot_pattern(mice::nhanes, c("chl", "hyp")) +#' +#' # plot missing data pattern for specific columns by supplying unquoted variable names +#' plot_pattern(mice::nhanes, c(chl, hyp)) +#' +#' # plot missing data pattern for specific columns by passing an object with variable names +#' # from the environment, unquoted with `!!` +#' my_variables <- c("chl", "hyp") +#' plot_pattern(mice::nhanes, !!my_variables) +#' # object with variable names must be unquoted with `!!` +#' try(plot_pattern(mice::nhanes, my_variables)) +#' #' @export plot_pattern <- function(data, @@ -21,21 +36,16 @@ plot_pattern <- cluster = NULL, npat = NULL, caption = TRUE) { - # input processing if (is.matrix(data) && ncol(data) > 1) { data <- as.data.frame(data) } verify_data(data, df = TRUE) - vrb <- substitute(vrb) - if (vrb != "all" && length(vrb) < 2) { + vrb <- rlang::enexpr(vrb) + vrb_matched <- match_vrb(vrb, names(data)) + if (length(vrb_matched) < 2) { cli::cli_abort("The number of variables should be two or more to compute missing data patterns.") } - if (vrb[1] == "all") { - vrb <- names(data) - } else { - vrb <- names(dplyr::select(as.data.frame(data), {{vrb}})) - } - if (".x" %in% vrb || ".y" %in% vrb) { + if (".x" %in% vrb_matched || ".y" %in% vrb_matched) { cli::cli_abort( c( "The variable names '.x' and '.y' are used internally to produce the missing data pattern plot.", @@ -44,7 +54,7 @@ plot_pattern <- ) } if (!is.null(cluster)) { - if (cluster %nin% names(data[, vrb])) { + if (cluster %nin% names(data[, vrb_matched])) { cli::cli_abort( c("Cluster variable not recognized.", "i" = "Please provide the variable name as a character string.") @@ -61,7 +71,7 @@ plot_pattern <- } # get missing data pattern - pat <- mice::md.pattern(data[, vrb], plot = FALSE) + pat <- mice::md.pattern(data[, vrb_matched], plot = FALSE) rows_pat_full <- (nrow(pat) - 1) # full number of missing data patterns diff --git a/R/plot_pred.R b/R/plot_pred.R index f1484a87..0fb8b7c9 100644 --- a/R/plot_pred.R +++ b/R/plot_pred.R @@ -10,8 +10,25 @@ #' @return An object of class `ggplot2::ggplot`. #' #' @examples +#' # generate a predictor matrix #' pred <- mice::quickpred(mice::nhanes) +#' +#' # plot predictor matrix for all columns #' plot_pred(pred) +#' +#' # plot predictor matrix for specific columns by supplying a character vector +#' plot_pred(pred, c("chl", "hyp")) +#' +#' # plot predictor matrix for specific columns by supplying unquoted variable names +#' plot_pred(pred, c(chl, hyp)) +#' +#' # plot predictor matrix for specific columns by passing an object with variable names +#' # from the environment, unquoted with `!!` +#' my_variables <- c("chl", "hyp") +#' plot_pred(pred, !!my_variables) +#' # object with variable names must be unquoted with `!!` +#' try(plot_pred(pred, my_variables)) +#' #' @export plot_pred <- function(data, @@ -21,7 +38,9 @@ plot_pred <- square = TRUE, rotate = FALSE) { verify_data(data, pred = TRUE) - p <- nrow(data) + vrb <- rlang::enexpr(vrb) + vrb_matched <- match_vrb(vrb, row.names(data)) + p <- length(vrb_matched) if (!is.null(method) && is.character(method)) { if (length(method) == 1) { method <- rep(method, p) @@ -37,17 +56,10 @@ plot_pred <- if (!is.character(method) || length(method) != p) { cli::cli_abort("Method should be NULL or a character string or vector (of length 1 or `ncol(data)`).") } - vrb <- substitute(vrb) - if (vrb[1] == "all") { - vrb <- names(data) - } else { - vrb <- names(dplyr::select(as.data.frame(data), {{vrb}})) - } - vrbs <- row.names(data) long <- data.frame( vrb = 1:p, - prd = rep(vrbs, each = p), - ind = matrix(data, nrow = p * p, byrow = TRUE) + prd = rep(vrb_matched, each = p), + ind = matrix(data[vrb_matched, vrb_matched], nrow = p * p, byrow = TRUE) ) %>% dplyr::mutate(clr = factor( .data$ind, levels = c(-3, -2, 0, 1, 2), @@ -70,10 +82,10 @@ plot_pred <- fill = .data$clr )) + ggplot2::geom_tile(color = "black", alpha = 0.6) + - ggplot2::scale_x_discrete(limits = vrbs, position = "top") + + ggplot2::scale_x_discrete(limits = vrb_matched, position = "top") + ggplot2::scale_y_reverse( breaks = 1:p, - labels = vrbs, + labels = vrb_matched, sec.axis = ggplot2::dup_axis(labels = method, name = ylabel) ) + ggplot2::scale_fill_manual( diff --git a/man/plot_corr.Rd b/man/plot_corr.Rd index fa57995d..a12af753 100644 --- a/man/plot_corr.Rd +++ b/man/plot_corr.Rd @@ -36,5 +36,20 @@ An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. Plot correlations between (incomplete) variables } \examples{ -plot_corr(mice::nhanes, label = TRUE) +# plot correlations for all columns +plot_corr(mice::nhanes) + +# plot correlations for specific columns by supplying a character vector +plot_corr(mice::nhanes, c("chl", "hyp")) + +# plot correlations for specific columns by supplying unquoted variable names +plot_corr(mice::nhanes, c(chl, hyp)) + +# plot correlations for specific columns by passing an object with variable names +# from the environment, unquoted with `!!` +my_variables <- c("chl", "hyp") +plot_corr(mice::nhanes, !!my_variables) +# object with variable names must be unquoted with `!!` +try(plot_corr(mice::nhanes, my_variables)) + } diff --git a/man/plot_flux.Rd b/man/plot_flux.Rd index 7bea22c5..6a274b23 100644 --- a/man/plot_flux.Rd +++ b/man/plot_flux.Rd @@ -22,5 +22,20 @@ An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. Plot the influx and outflux of a multivariate missing data pattern } \examples{ +# plot flux for all columns plot_flux(mice::nhanes) + +# plot flux for specific columns by supplying a character vector +plot_flux(mice::nhanes, c("chl", "hyp")) + +# plot flux for specific columns by supplying unquoted variable names +plot_flux(mice::nhanes, c(chl, hyp)) + +# plot flux for specific columns by passing an object with variable names +# from the environment, unquoted with `!!` +my_variables <- c("chl", "hyp") +plot_flux(mice::nhanes, !!my_variables) +# object with variable names must be unquoted with `!!` +try(plot_flux(mice::nhanes, my_variables)) + } diff --git a/man/plot_pattern.Rd b/man/plot_pattern.Rd index 3d27a04a..f964c99d 100644 --- a/man/plot_pattern.Rd +++ b/man/plot_pattern.Rd @@ -36,5 +36,20 @@ An object of class \link[ggplot2:ggplot]{ggplot2::ggplot}. Plot the missing data pattern of an incomplete dataset } \examples{ +# plot missing data pattern for all columns plot_pattern(mice::nhanes) + +# plot missing data pattern for specific columns by supplying a character vector +plot_pattern(mice::nhanes, c("chl", "hyp")) + +# plot missing data pattern for specific columns by supplying unquoted variable names +plot_pattern(mice::nhanes, c(chl, hyp)) + +# plot missing data pattern for specific columns by passing an object with variable names +# from the environment, unquoted with `!!` +my_variables <- c("chl", "hyp") +plot_pattern(mice::nhanes, !!my_variables) +# object with variable names must be unquoted with `!!` +try(plot_pattern(mice::nhanes, my_variables)) + } diff --git a/man/plot_pred.Rd b/man/plot_pred.Rd index c71970a6..2e3ee6f3 100644 --- a/man/plot_pred.Rd +++ b/man/plot_pred.Rd @@ -33,6 +33,23 @@ An object of class \code{ggplot2::ggplot}. Plot the predictor matrix of an imputation model } \examples{ +# generate a predictor matrix pred <- mice::quickpred(mice::nhanes) + +# plot predictor matrix for all columns plot_pred(pred) + +# plot predictor matrix for specific columns by supplying a character vector +plot_pred(pred, c("chl", "hyp")) + +# plot predictor matrix for specific columns by supplying unquoted variable names +plot_pred(pred, c(chl, hyp)) + +# plot predictor matrix for specific columns by passing an object with variable names +# from the environment, unquoted with `!!` +my_variables <- c("chl", "hyp") +plot_pred(pred, !!my_variables) +# object with variable names must be unquoted with `!!` +try(plot_pred(pred, my_variables)) + } diff --git a/man/plot_trace.Rd b/man/plot_trace.Rd index 43848fbc..cc939820 100644 --- a/man/plot_trace.Rd +++ b/man/plot_trace.Rd @@ -33,15 +33,15 @@ imp <- mice::mice(mice::nhanes, print = FALSE) plot_trace(imp) # plot trace lines for specific columns by supplying a string or character vector -plot_trace(imp, "bmi") -plot_trace(imp, c("bmi", "hyp")) +plot_trace(imp, "chl") +plot_trace(imp, c("chl", "hyp")) # plot trace lines for specific columns by supplying unquoted variable names -plot_trace(imp, bmi) -plot_trace(imp, c(bmi, hyp)) +plot_trace(imp, chl) +plot_trace(imp, c(chl, hyp)) # plot trace lines for specific columns by passing an object with variable names # from the environment, unquoted with `!!` -my_variables <- c("bmi", "hyp") +my_variables <- c("chl", "hyp") plot_trace(imp, !!my_variables) # object with variable names must be unquoted with `!!` try(plot_trace(imp, my_variables)) From 51a5569e86bca5298d05a89a4aa64c3eab6065e0 Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Fri, 26 Jul 2024 16:10:30 +0200 Subject: [PATCH 8/9] lint whitespace --- R/ggmice.R | 14 +++++--------- R/plot_corr.R | 6 ++---- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/R/ggmice.R b/R/ggmice.R index 3ab1862e..d2e47336 100644 --- a/R/ggmice.R +++ b/R/ggmice.R @@ -60,8 +60,7 @@ ggmice <- function(data = NULL, } if (length(vrbs) > length(unique(vrbs))) { cli::cli_abort( - c("The data must have unique column names.", - "x" = "Duplication found in {vrbs[duplicated(vrbs)]}") + c("The data must have unique column names.", "x" = "Duplication found in {vrbs[duplicated(vrbs)]}") ) } # extract mapping variables @@ -107,8 +106,8 @@ ggmice <- function(data = NULL, .imp = 0, .id = rownames(data$data), data$data - )[!miss_xy,], - data.frame(.where = "imputed", mice::complete(data, action = "long"))[where_xy,] + )[!miss_xy, ], + data.frame(.where = "imputed", mice::complete(data, action = "long"))[where_xy, ] ), .where = factor( .where, @@ -154,7 +153,6 @@ ggmice <- function(data = NULL, return(gg) } - #' Utils function to extract mapping variables #' #' @param data Incomplete dataset or mids object. @@ -197,11 +195,9 @@ match_mapping <- function(data, vrbs, mapping_in) { inherits(try(dplyr::mutate(mapping_data, !!rlang::parse_quo(mapping_text, env = rlang::current_env())), silent = TRUE) - , - "try-error")) { + , "try-error")) { cli::cli_abort( - c("Must provide a valid mapping variable.", - "x" = "Mapping variable '{mapping_text}' not found in the data or imputations.") + c("Must provide a valid mapping variable.", "x" = "Mapping variable '{mapping_text}' not found in the data or imputations.") ) } else { cli::cli_warn( diff --git a/R/plot_corr.R b/R/plot_corr.R index 290eeb0f..2d290e04 100644 --- a/R/plot_corr.R +++ b/R/plot_corr.R @@ -102,13 +102,11 @@ plot_corr <- y = "Variable to impute", fill = "Correlation* ", - caption = "*pairwise complete observations" + caption = "*pairwise complete observations" ) } else { gg <- gg + - ggplot2::labs(x = "Imputation model predictor", - y = "Variable to impute", - fill = "Correlation") + ggplot2::labs(x = "Imputation model predictor", y = "Variable to impute", fill = "Correlation") } if (label) { gg <- From 7fe9028c3403afba4e269303ea6ee4a8571a3eaf Mon Sep 17 00:00:00 2001 From: hanneoberman Date: Fri, 26 Jul 2024 16:16:14 +0200 Subject: [PATCH 9/9] add #157 to NEWS --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index ef10f9d5..1e0b2721 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,8 @@ ## Bug fixes -* Correct labeling of 'exclusion-restriction' variables in `plot_pred()` (#128) +* Correct labeling of 'exclusion-restriction' variables in `plot_pred()` (#128) +* Parsing of `vrb` argument in all `plot_*()` functions: variable name(s) from object in global environment now recognized using `!!` notation (#157) ## Minor changes