From ee4214a09b99a05e2b4c8a62b5a3dd962c64e228 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 5 Dec 2024 21:51:47 -0500 Subject: [PATCH 01/25] Add sorting function --- NAMESPACE | 2 + R/sort_tbl_hierarchical.R | 196 +++++++++++++++++++++++++++++++++++ man/sort_tbl_hierarchical.Rd | 55 ++++++++++ 3 files changed, 253 insertions(+) create mode 100644 R/sort_tbl_hierarchical.R create mode 100644 man/sort_tbl_hierarchical.Rd diff --git a/NAMESPACE b/NAMESPACE index 21100d773..e0301b684 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -77,6 +77,7 @@ S3method(tbl_regression,multinom) S3method(tbl_regression,stanreg) S3method(tbl_regression,survreg) S3method(tbl_regression,workflow) +S3method(tbl_sort,tbl_hierarchical) S3method(tbl_split,gtsummary) S3method(tbl_survfit,data.frame) S3method(tbl_survfit,list) @@ -209,6 +210,7 @@ export(tbl_hierarchical_count) export(tbl_likert) export(tbl_merge) export(tbl_regression) +export(tbl_sort) export(tbl_split) export(tbl_stack) export(tbl_strata) diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R new file mode 100644 index 000000000..3c7e576dc --- /dev/null +++ b/R/sort_tbl_hierarchical.R @@ -0,0 +1,196 @@ +#' Sort Hierarchical Tables +#' +#' @description `r lifecycle::badge('experimental')`\cr +#' +#' This function is used to sort hierarchical tables. Options for sorting criteria are: +#' +#' 1. Alphanumeric - rows are ordered alphanumerically by label text (default). +#' 2. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are +#' ordered accordingly. +#' +#' @param x (`tbl_hierarchical`)\cr +#' A hierarchical gtsummary table of class `'tbl_hierarchical'`. +#' @param sort (`string`)\cr +#' Specifies sorting to perform. Values must be one of `c("alphanumeric", "frequency")`. Default is `"frequency"`. +#' @param ascending (`logical`)\cr +#' Whether to sort rows in ascending or descending order. Default is descending (`ascending = FALSE`). +#' @param .stat (`string`)\cr +#' Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all +#' hierarchy levels. +#' +#' @name sort_tbl_hierarchical +#' +#' @examples +#' ADAE_subset <- cards::ADAE |> +#' dplyr::filter( +#' AESOC %in% unique(cards::ADAE$AESOC)[1:5], +#' AETERM %in% unique(cards::ADAE$AETERM)[1:5] +#' ) +#' +#' tbl <- tbl_hierarchical( +#' data = ADAE_subset, +#' variables = c(SEX, RACE, AETERM), +#' by = TRTA, +#' denominator = cards::ADSL |> mutate(TRTA = ARM), +#' id = USUBJID, +#' include = AETERM, +#' overall_row = TRUE +#' ) +#' +#' tbl_sort(tbl, .stat = "N") +NULL + +#' @rdname sort_tbl_hierarchical +#' @export +tbl_sort <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + + UseMethod("tbl_sort") +} + +#' @rdname sort_tbl_hierarchical +#' @export +tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", ascending = FALSE, .stat = "n") { + set_cli_abort_call() + + # process and check inputs --------------------------------------------------- + check_logical(ascending) + + if (!sort %in% c("frequency", "alphanumeric")) { + cli::cli_abort( + "The {.arg sort} argument must be either {.val frequency} or {.val alphanumeric}.", + call = get_cli_abort_call() + ) + } + + if (sort == "alphanumeric") { + sort_cols <- c( + x$table_body |> select(cards::all_ard_groups("names")) |> names(), + "label" + ) + + x$table_body <- x$table_body |> + arrange(across(sort_cols, ~ if (!ascending) desc(.x) else .x)) + } else { + x <- .append_sort_counts(x, .stat) + u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() + g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + + # assign counts for each hierarchy level + for (g in names(g_cols)) { + x$table_body <- x$table_body |> + dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE) + + x$table_body <- x$table_body |> + left_join( + summarise(x$table_body, !!paste0("count_", g) := first(count_total)), + by = x$table_body |> dplyr::group_vars() + ) + } + x$table_body <- x$table_body |> + dplyr::rowwise() |> + mutate(inner_var = if (.data$variable %in% u_cols) " " else .data$variable) |> + dplyr::ungroup() + + # sort by counts ------------------------------------------------------------------------------ + sort_cols <- c(rbind( + x$table_body |> select(cards::all_ard_groups("names")) |> names(), + x$table_body |> select(starts_with("count_group")) |> names(), + x$table_body |> select(cards::all_ard_groups("levels")) |> names() + ), "inner_var", "count_total", "label") + + x$table_body <- x$table_body |> + arrange(across(sort_cols, ~ if (is.numeric(.x) && !ascending) desc(.x) else .x)) + } + + x +} + +.append_sort_counts <- function(x, .stat) { + if (!.stat %in% x$cards$tbl_hierarchical$stat_name) { + cli::cli_abort( + "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid + statistic options see the {.val stat_name} column of {.code x$cards$tbl_hierarchical}.", + call = get_cli_abort_call() + ) + } + + cards <- x$cards$tbl_hierarchical + by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA + g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + + # fill in variable_level column of cards + cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |> + dplyr::filter(variable == "..ard_hierarchical_overall..") |> + dplyr::pull("label") |> + as.list() + + # extract counts ------------------------------------------------------------------------------ + cards <- cards |> + dplyr::filter(stat_name == .stat, variable %in% x$table_body$variable) |> + dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -by_cols))) |> + dplyr::summarise(count_total = sum(unlist(stat))) |> + dplyr::ungroup() |> + dplyr::rename(label = variable_level) |> + tidyr::unnest(cols = everything()) + + # match names to x$table_body + if (length(by_cols) > 1) { + names(cards)[grep("group", names(cards))] <- x$table_body |> + select(cards::all_ard_groups()) |> + names() + } + cards[cards$variable == "..ard_hierarchical_overall..", 1] <- "..ard_hierarchical_overall.." + + # align cards layout with x$table_body -------------------------------------------------------- + cards <- cards |> + dplyr::rowwise() |> + dplyr::mutate(across( + cards::all_ard_groups(), + ~ if (is.na(.x) && !grepl("_level", cur_column()) && variable == g_cols[cur_column()]) { + variable + } else if (is.na(.x) && variable %in% g_cols[gsub("_level", "", cur_column())]) { + label + } else { + .x + } + )) + + # calculate total group sums for any variables not in include --------------------------------- + if (!all(g_cols %in% cards$variable)) { + gp_vars <- g_cols[g_cols %in% setdiff(g_cols, cards$variable)] + cli::cli_warn( + "Not all hierarchy variables present in the table were included in the {.arg include} argument. + These variables ({gp_vars}) do not have event rate data available so the total sum of the event + rates for this hierarchy section will be used instead. To use event rates to sort all sections of the table, + set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}.", + call = get_cli_abort_call() + ) + + gp_cols <- names(gp_vars) + for (i in seq_along(gp_cols)) { + cards <- cards |> + dplyr::bind_rows( + cards |> + filter(variable != "..ard_hierarchical_overall..") |> + group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |> + summarize(count_total = sum(count_total)) |> + mutate( + variable = .data[[gp_cols[i]]], + label = .data[[paste0(gp_cols[i], "_level")]] + ) + ) + } + } + + # add counts to x$table_body ------------------------------------------------------------------ + x$table_body <- x$table_body |> + dplyr::left_join( + cards, + by = c(cards |> select(-"count_total") |> names()) + ) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " "))) + + x +} diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd new file mode 100644 index 000000000..0619f94be --- /dev/null +++ b/man/sort_tbl_hierarchical.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sort_tbl_hierarchical.R +\name{sort_tbl_hierarchical} +\alias{sort_tbl_hierarchical} +\alias{tbl_sort} +\alias{tbl_sort.tbl_hierarchical} +\title{Sort Hierarchical Tables} +\usage{ +tbl_sort(x, ...) + +\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", ascending = FALSE, .stat = "n") +} +\arguments{ +\item{x}{(\code{tbl_hierarchical})\cr +A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.} + +\item{sort}{(\code{string})\cr +Specifies sorting to perform. Values must be one of \code{c("alphanumeric", "frequency")}. Default is \code{"frequency"}.} + +\item{ascending}{(\code{logical})\cr +Whether to sort rows in ascending or descending order. Default is descending (\code{ascending = FALSE}).} + +\item{.stat}{(\code{string})\cr +Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all +hierarchy levels.} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr + +This function is used to sort hierarchical tables. Options for sorting criteria are: +\enumerate{ +\item Alphanumeric - rows are ordered alphanumerically by label text (default). +\item Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are +ordered accordingly. +} +} +\examples{ +ADAE_subset <- cards::ADAE |> + dplyr::filter( + AESOC \%in\% unique(cards::ADAE$AESOC)[1:5], + AETERM \%in\% unique(cards::ADAE$AETERM)[1:5] + ) + +tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + include = AETERM, + overall_row = TRUE +) + +tbl_sort(tbl, .stat = "N") +} From a9e19ec5aa58b4b55c4f57a29fde4b3f51204173 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Thu, 5 Dec 2024 22:07:53 -0500 Subject: [PATCH 02/25] Clean up documentation --- R/sort_tbl_hierarchical.R | 15 +++++++-------- man/sort_tbl_hierarchical.Rd | 9 ++++----- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R index 3c7e576dc..c8a8e5ed2 100644 --- a/R/sort_tbl_hierarchical.R +++ b/R/sort_tbl_hierarchical.R @@ -12,8 +12,8 @@ #' A hierarchical gtsummary table of class `'tbl_hierarchical'`. #' @param sort (`string`)\cr #' Specifies sorting to perform. Values must be one of `c("alphanumeric", "frequency")`. Default is `"frequency"`. -#' @param ascending (`logical`)\cr -#' Whether to sort rows in ascending or descending order. Default is descending (`ascending = FALSE`). +#' @param desc (scalar `logical`)\cr +#' Whether to sort rows in ascending or descending order. Default is descending (`desc = TRUE`). #' @param .stat (`string`)\cr #' Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all #' hierarchy levels. @@ -33,11 +33,10 @@ #' by = TRTA, #' denominator = cards::ADSL |> mutate(TRTA = ARM), #' id = USUBJID, -#' include = AETERM, #' overall_row = TRUE #' ) #' -#' tbl_sort(tbl, .stat = "N") +#' tbl_sort(tbl) NULL #' @rdname sort_tbl_hierarchical @@ -51,11 +50,11 @@ tbl_sort <- function(x, ...) { #' @rdname sort_tbl_hierarchical #' @export -tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", ascending = FALSE, .stat = "n") { +tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat = "n") { set_cli_abort_call() # process and check inputs --------------------------------------------------- - check_logical(ascending) + check_scalar_logical(desc) if (!sort %in% c("frequency", "alphanumeric")) { cli::cli_abort( @@ -71,7 +70,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", ascending = FALSE, ) x$table_body <- x$table_body |> - arrange(across(sort_cols, ~ if (!ascending) desc(.x) else .x)) + arrange(across(sort_cols, ~ if (desc) desc(.x) else .x)) } else { x <- .append_sort_counts(x, .stat) u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() @@ -101,7 +100,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", ascending = FALSE, ), "inner_var", "count_total", "label") x$table_body <- x$table_body |> - arrange(across(sort_cols, ~ if (is.numeric(.x) && !ascending) desc(.x) else .x)) + arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) desc(.x) else .x)) } x diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd index 0619f94be..dd66b6f96 100644 --- a/man/sort_tbl_hierarchical.Rd +++ b/man/sort_tbl_hierarchical.Rd @@ -8,7 +8,7 @@ \usage{ tbl_sort(x, ...) -\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", ascending = FALSE, .stat = "n") +\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = FALSE, .stat = "n") } \arguments{ \item{x}{(\code{tbl_hierarchical})\cr @@ -17,8 +17,8 @@ A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.} \item{sort}{(\code{string})\cr Specifies sorting to perform. Values must be one of \code{c("alphanumeric", "frequency")}. Default is \code{"frequency"}.} -\item{ascending}{(\code{logical})\cr -Whether to sort rows in ascending or descending order. Default is descending (\code{ascending = FALSE}).} +\item{desc}{(scalar \code{logical})\cr +Whether to sort rows in ascending or descending order. Default is descending (\code{desc = TRUE}).} \item{.stat}{(\code{string})\cr Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all @@ -47,9 +47,8 @@ tbl <- tbl_hierarchical( by = TRTA, denominator = cards::ADSL |> mutate(TRTA = ARM), id = USUBJID, - include = AETERM, overall_row = TRUE ) -tbl_sort(tbl, .stat = "N") +tbl_sort(tbl) } From 1585a63d3fe8cd48c0709f67310a410173cd053c Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 6 Dec 2024 00:39:21 -0500 Subject: [PATCH 03/25] Add filtering function --- NAMESPACE | 2 + R/filter_tbl_hierarchical.R | 114 +++++++++++++++++++++++++++++++++ man/filter_tbl_hierarchical.Rd | 57 +++++++++++++++++ 3 files changed, 173 insertions(+) create mode 100644 R/filter_tbl_hierarchical.R create mode 100644 man/filter_tbl_hierarchical.Rd diff --git a/NAMESPACE b/NAMESPACE index e0301b684..d08955540 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,6 +62,7 @@ S3method(plot,tbl_regression) S3method(plot,tbl_uvregression) S3method(print,gtsummary) S3method(print,tbl_split) +S3method(tbl_filter,tbl_hierarchical) S3method(tbl_regression,brmsfit) S3method(tbl_regression,crr) S3method(tbl_regression,default) @@ -205,6 +206,7 @@ export(tbl_butcher) export(tbl_continuous) export(tbl_cross) export(tbl_custom_summary) +export(tbl_filter) export(tbl_hierarchical) export(tbl_hierarchical_count) export(tbl_likert) diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R new file mode 100644 index 000000000..9c9bca108 --- /dev/null +++ b/R/filter_tbl_hierarchical.R @@ -0,0 +1,114 @@ +#' Filter Hierarchical Tables +#' +#' @description `r lifecycle::badge('experimental')`\cr +#' +#' This function is used to filter hierarchical tables by total row frequencies. +#' +#' @param x (`tbl_hierarchical`)\cr +#' A hierarchical gtsummary table of class `'tbl_hierarchical'`. +#' @param t (scalar `numeric`)\cr +#' Threshold used to determine which values will be retained. +#' @param gt (scalar `logical`)\cr +#' Whether to filter for row sums greater than `t` or less than `t`. Default is greater than (`gt = TRUE`). +#' @param eq (scalar `logical`)\cr +#' Whether to include the value of `t` in the filtered range, i.e. whether to use exclusive comparators (`>`, `<`) or +#' inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`. +#' @param .stat (`string`)\cr +#' Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all +#' hierarchy levels. +#' @name filter_tbl_hierarchical +#' +#' @examples +#' ADAE_subset <- cards::ADAE |> +#' dplyr::filter( +#' AESOC %in% unique(cards::ADAE$AESOC)[1:5], +#' AETERM %in% unique(cards::ADAE$AETERM)[1:5] +#' ) +#' +#' tbl <- tbl_hierarchical( +#' data = ADAE_subset, +#' variables = c(SEX, RACE, AETERM), +#' by = TRTA, +#' denominator = cards::ADSL |> mutate(TRTA = ARM), +#' id = USUBJID, +#' overall_row = TRUE +#' ) +#' +#' # Example 1 - Row Sums > 10 ------------------ +#' tbl_filter(tbl, t = 10) +#' +#' # Example 2 - Row Sums <= 5 ------------------ +#' tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE) +NULL + +#' @rdname filter_tbl_hierarchical +#' @export +tbl_filter <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + + UseMethod("tbl_filter") +} + +#' @export +#' @rdname filter_tbl_hierarchical +tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n") { + set_cli_abort_call() + + # process and check inputs --------------------------------------------------- + check_numeric(t) + check_scalar_logical(gt) + check_scalar_logical(eq) + + x <- .append_hierarchy_row_sums(x, .stat) + g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + if (!gt) x$table_body$count_total[x$table_body$variable %in% g_cols] <- t - 1 + + filt_expr <- paste( + "count_total", + dplyr::case_when( + gt && eq ~ ">=", + !gt && eq ~ "<=", + !gt ~ "<", + TRUE ~ ">" + ), + t + ) + + x$table_body <- x$table_body |> + dplyr::filter(!!parse_expr(filt_expr)) + + # remove summary rows with no sub-rows still present ------------------------- + if (!gt) { + for (i in rev(seq_along(g_cols))) { + empty_gps <- x$table_body |> + dplyr::group_by(across(c(names(g_cols[1:i]), paste0(names(g_cols[1:i]), "_level")))) |> + dplyr::summarize(empty_gp := dplyr::n() == 1) |> + na.omit() + + if (!all(!empty_gps$empty_gp)) { + cli::cli_inform( + "For readability, any summary row that supercedes a row that meets the filtering criteria will be kept + regardless of whether it meets the filtering criteria itself.", + .frequency = "once", + .frequency_id = "hierarchy_filter_lt" + ) + + x$table_body <- x$table_body |> + dplyr::left_join( + empty_gps, + by = empty_gps |> select(cards::all_ard_groups()) |> names() + ) |> + dplyr::filter(!empty_gp | is.na(empty_gp)) |> + dplyr::select(-"empty_gp") + } else { + break + } + } + } + + x$table_body <- x$table_body |> + dplyr::select(-"count_total") + + x +} diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd new file mode 100644 index 000000000..9696b1091 --- /dev/null +++ b/man/filter_tbl_hierarchical.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/filter_tbl_hierarchical.R +\name{filter_tbl_hierarchical} +\alias{filter_tbl_hierarchical} +\alias{tbl_filter} +\alias{tbl_filter.tbl_hierarchical} +\title{Filter Hierarchical Tables} +\usage{ +tbl_filter(x, ...) + +\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n") +} +\arguments{ +\item{x}{(\code{tbl_hierarchical})\cr +A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.} + +\item{t}{(scalar \code{numeric})\cr +Threshold used to determine which values will be retained.} + +\item{gt}{(scalar \code{logical})\cr +Whether to filter for row sums greater than \code{t} or less than \code{t}. Default is greater than (\code{gt = TRUE}).} + +\item{eq}{(scalar \code{logical})\cr +Whether to include the value of \code{t} in the filtered range, i.e. whether to use exclusive comparators (\code{>}, \code{<}) or +inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.} + +\item{.stat}{(\code{string})\cr +Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all +hierarchy levels.} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr + +This function is used to filter hierarchical tables by total row frequencies. +} +\examples{ +ADAE_subset <- cards::ADAE |> + dplyr::filter( + AESOC \%in\% unique(cards::ADAE$AESOC)[1:5], + AETERM \%in\% unique(cards::ADAE$AETERM)[1:5] + ) + +tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE +) + +# Example 1 - Row Sums > 10 ------------------ +tbl_filter(tbl, t = 10) + +# Example 2 - Row Sums <= 5 ------------------ +tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE) +} From 6c28907358250f0003a9e3bb9a7d1edff72f83e7 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 6 Dec 2024 00:40:17 -0500 Subject: [PATCH 04/25] Clean up --- R/sort_tbl_hierarchical.R | 71 +++++++++++++++++++++++------------- man/sort_tbl_hierarchical.Rd | 6 ++- 2 files changed, 51 insertions(+), 26 deletions(-) diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R index c8a8e5ed2..ca14a8047 100644 --- a/R/sort_tbl_hierarchical.R +++ b/R/sort_tbl_hierarchical.R @@ -36,7 +36,11 @@ #' overall_row = TRUE #' ) #' +#' # Example 1 - Decreasing Frequency Sort ------ #' tbl_sort(tbl) +#' +#' # Example 2 - Reverse Alphanumeric Sort ------ +#' tbl_sort(tbl, sort = "alphanumeric") NULL #' @rdname sort_tbl_hierarchical @@ -50,7 +54,7 @@ tbl_sort <- function(x, ...) { #' @rdname sort_tbl_hierarchical #' @export -tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat = "n") { +tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n") { set_cli_abort_call() # process and check inputs --------------------------------------------------- @@ -63,33 +67,50 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat ) } + u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() + if (sort == "alphanumeric") { - sort_cols <- c( - x$table_body |> select(cards::all_ard_groups("names")) |> names(), - "label" - ) + sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label") + rep_str <- if (desc) "zzzz" else " " + + # overall row always appears first + if (desc && "..ard_hierarchical_overall.." %in% x$table_body$variable) { + ovrl_row <- x$table_body[1, ] + x$table_body <- x$table_body[-1,] + } x$table_body <- x$table_body |> - arrange(across(sort_cols, ~ if (desc) desc(.x) else .x)) + dplyr::rowwise() |> + dplyr::mutate(inner_var = if (.data$variable %in% u_cols) rep_str else .data$variable) |> + dplyr::ungroup() |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., rep_str))) |> + dplyr::arrange(across(sort_cols, ~ if (desc) dplyr::desc(.x) else .x)) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., paste0("^", rep_str, "$"), NA))) |> + select(-"inner_var") + + if (desc) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body) } else { - x <- .append_sort_counts(x, .stat) - u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() + x <- .append_hierarchy_row_sums(x, .stat) g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + # sort summary rows first + x$table_body <- x$table_body |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " "))) + # assign counts for each hierarchy level for (g in names(g_cols)) { x$table_body <- x$table_body |> dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE) x$table_body <- x$table_body |> - left_join( - summarise(x$table_body, !!paste0("count_", g) := first(count_total)), + dplyr::left_join( + dplyr::summarize(x$table_body, !!paste0("count_", g) := dplyr::first(count_total)), by = x$table_body |> dplyr::group_vars() ) } x$table_body <- x$table_body |> dplyr::rowwise() |> - mutate(inner_var = if (.data$variable %in% u_cols) " " else .data$variable) |> + dplyr::mutate(inner_var = if (.data$variable %in% u_cols) " " else .data$variable) |> dplyr::ungroup() # sort by counts ------------------------------------------------------------------------------ @@ -100,13 +121,15 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat ), "inner_var", "count_total", "label") x$table_body <- x$table_body |> - arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) desc(.x) else .x)) + dplyr::arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., "^ $", NA))) |> + dplyr::select(-starts_with("count_"), -"inner_var") } x } -.append_sort_counts <- function(x, .stat) { +.append_hierarchy_row_sums <- function(x, .stat) { if (!.stat %in% x$cards$tbl_hierarchical$stat_name) { cli::cli_abort( "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid @@ -147,9 +170,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat dplyr::rowwise() |> dplyr::mutate(across( cards::all_ard_groups(), - ~ if (is.na(.x) && !grepl("_level", cur_column()) && variable == g_cols[cur_column()]) { + ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && variable == g_cols[dplyr::cur_column()]) { variable - } else if (is.na(.x) && variable %in% g_cols[gsub("_level", "", cur_column())]) { + } else if (is.na(.x) && variable %in% g_cols[gsub("_level", "", dplyr::cur_column())]) { label } else { .x @@ -159,12 +182,11 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat # calculate total group sums for any variables not in include --------------------------------- if (!all(g_cols %in% cards$variable)) { gp_vars <- g_cols[g_cols %in% setdiff(g_cols, cards$variable)] - cli::cli_warn( + cli::cli_inform( "Not all hierarchy variables present in the table were included in the {.arg include} argument. These variables ({gp_vars}) do not have event rate data available so the total sum of the event - rates for this hierarchy section will be used instead. To use event rates to sort all sections of the table, - set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}.", - call = get_cli_abort_call() + rates for this hierarchy section will be used instead. To use event rates for all sections of the table, + set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." ) gp_cols <- names(gp_vars) @@ -172,10 +194,10 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat cards <- cards |> dplyr::bind_rows( cards |> - filter(variable != "..ard_hierarchical_overall..") |> - group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |> - summarize(count_total = sum(count_total)) |> - mutate( + dplyr::filter(variable != "..ard_hierarchical_overall..") |> + dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |> + dplyr::summarize(count_total = sum(count_total)) |> + dplyr::mutate( variable = .data[[gp_cols[i]]], label = .data[[paste0(gp_cols[i], "_level")]] ) @@ -188,8 +210,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat dplyr::left_join( cards, by = c(cards |> select(-"count_total") |> names()) - ) |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " "))) + ) x } diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd index dd66b6f96..538cd30fa 100644 --- a/man/sort_tbl_hierarchical.Rd +++ b/man/sort_tbl_hierarchical.Rd @@ -8,7 +8,7 @@ \usage{ tbl_sort(x, ...) -\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = FALSE, .stat = "n") +\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = TRUE, .stat = "n") } \arguments{ \item{x}{(\code{tbl_hierarchical})\cr @@ -50,5 +50,9 @@ tbl <- tbl_hierarchical( overall_row = TRUE ) +# Example 1 - Decreasing Frequency Sort ------ tbl_sort(tbl) + +# Example 2 - Reverse Alphanumeric Sort ------ +tbl_sort(tbl, sort = "alphanumeric") } From 42724994fdda531be9892c0be0f3108f62a09702 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 6 Dec 2024 00:43:00 -0500 Subject: [PATCH 05/25] Document --- R/filter_tbl_hierarchical.R | 4 ++-- R/sort_tbl_hierarchical.R | 5 +++-- man/filter_tbl_hierarchical.Rd | 3 +-- man/sort_tbl_hierarchical.Rd | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R index 9c9bca108..2c3eab696 100644 --- a/R/filter_tbl_hierarchical.R +++ b/R/filter_tbl_hierarchical.R @@ -14,8 +14,7 @@ #' Whether to include the value of `t` in the filtered range, i.e. whether to use exclusive comparators (`>`, `<`) or #' inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`. #' @param .stat (`string`)\cr -#' Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all -#' hierarchy levels. +#' Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. #' @name filter_tbl_hierarchical #' #' @examples @@ -59,6 +58,7 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n" check_numeric(t) check_scalar_logical(gt) check_scalar_logical(eq) + check_string(.stat) x <- .append_hierarchy_row_sums(x, .stat) g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R index ca14a8047..5c0bc53e1 100644 --- a/R/sort_tbl_hierarchical.R +++ b/R/sort_tbl_hierarchical.R @@ -15,8 +15,8 @@ #' @param desc (scalar `logical`)\cr #' Whether to sort rows in ascending or descending order. Default is descending (`desc = TRUE`). #' @param .stat (`string`)\cr -#' Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all -#' hierarchy levels. +#' Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for +#' all hierarchy levels. #' #' @name sort_tbl_hierarchical #' @@ -59,6 +59,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat # process and check inputs --------------------------------------------------- check_scalar_logical(desc) + check_string(.stat) if (!sort %in% c("frequency", "alphanumeric")) { cli::cli_abort( diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd index 9696b1091..40b72a1e6 100644 --- a/man/filter_tbl_hierarchical.Rd +++ b/man/filter_tbl_hierarchical.Rd @@ -25,8 +25,7 @@ Whether to include the value of \code{t} in the filtered range, i.e. whether to inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.} \item{.stat}{(\code{string})\cr -Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all -hierarchy levels.} +Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd index 538cd30fa..731dd6d62 100644 --- a/man/sort_tbl_hierarchical.Rd +++ b/man/sort_tbl_hierarchical.Rd @@ -21,8 +21,8 @@ Specifies sorting to perform. Values must be one of \code{c("alphanumeric", "fre Whether to sort rows in ascending or descending order. Default is descending (\code{desc = TRUE}).} \item{.stat}{(\code{string})\cr -Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all -hierarchy levels.} +Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for +all hierarchy levels.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr From f02c7c5ca6d9a63b3d478af6e0f063422cbe6936 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 6 Dec 2024 21:23:36 -0500 Subject: [PATCH 06/25] Improve code --- R/filter_tbl_hierarchical.R | 61 ++++++++++--------- R/sort_tbl_hierarchical.R | 107 ++++++++++++++++++--------------- man/filter_tbl_hierarchical.Rd | 15 ++--- man/sort_tbl_hierarchical.Rd | 21 ++++--- 4 files changed, 109 insertions(+), 95 deletions(-) diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R index 2c3eab696..7a1ad5b69 100644 --- a/R/filter_tbl_hierarchical.R +++ b/R/filter_tbl_hierarchical.R @@ -2,12 +2,12 @@ #' #' @description `r lifecycle::badge('experimental')`\cr #' -#' This function is used to filter hierarchical tables by total row frequencies. +#' This function is used to filter hierarchical tables by frequency row sum. #' #' @param x (`tbl_hierarchical`)\cr #' A hierarchical gtsummary table of class `'tbl_hierarchical'`. #' @param t (scalar `numeric`)\cr -#' Threshold used to determine which values will be retained. +#' Threshold used to determine which rows will be retained. #' @param gt (scalar `logical`)\cr #' Whether to filter for row sums greater than `t` or less than `t`. Default is greater than (`gt = TRUE`). #' @param eq (scalar `logical`)\cr @@ -15,14 +15,14 @@ #' inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`. #' @param .stat (`string`)\cr #' Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. +#' Default is `"n"`. +#' #' @name filter_tbl_hierarchical +#' @seealso [tbl_sort()] #' #' @examples #' ADAE_subset <- cards::ADAE |> -#' dplyr::filter( -#' AESOC %in% unique(cards::ADAE$AESOC)[1:5], -#' AETERM %in% unique(cards::ADAE$AETERM)[1:5] -#' ) +#' dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) #' #' tbl <- tbl_hierarchical( #' data = ADAE_subset, @@ -54,18 +54,23 @@ tbl_filter <- function(x, ...) { tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n") { set_cli_abort_call() - # process and check inputs --------------------------------------------------- + # process and check inputs ---------------------------------------------------------------------- check_numeric(t) check_scalar_logical(gt) check_scalar_logical(eq) check_string(.stat) + outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + + # get row sums ---------------------------------------------------------------------------------- x <- .append_hierarchy_row_sums(x, .stat) - g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) - if (!gt) x$table_body$count_total[x$table_body$variable %in% g_cols] <- t - 1 + # keep all summary rows (removed later if no sub-rows are kept) + if (!gt) x$table_body$sum_row[x$table_body$variable %in% outer_cols] <- t - 1 + + # create and apply filtering expression --------------------------------------------------------- filt_expr <- paste( - "count_total", + "sum_row", dplyr::case_when( gt && eq ~ ">=", !gt && eq ~ "<=", @@ -74,41 +79,39 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n" ), t ) - x$table_body <- x$table_body |> dplyr::filter(!!parse_expr(filt_expr)) - # remove summary rows with no sub-rows still present ------------------------- + # remove any summary rows with no sub-rows still present ---------------------------------------- if (!gt) { - for (i in rev(seq_along(g_cols))) { - empty_gps <- x$table_body |> - dplyr::group_by(across(c(names(g_cols[1:i]), paste0(names(g_cols[1:i]), "_level")))) |> - dplyr::summarize(empty_gp := dplyr::n() == 1) |> + for (i in rev(seq_along(outer_cols))) { + gp_empty <- x$table_body |> + dplyr::group_by(across(c(names(outer_cols[1:i]), paste0(names(outer_cols[1:i]), "_level")))) |> + dplyr::summarize(is_empty := dplyr::n() == 1) |> na.omit() - if (!all(!empty_gps$empty_gp)) { - cli::cli_inform( - "For readability, any summary row that supercedes a row that meets the filtering criteria will be kept - regardless of whether it meets the filtering criteria itself.", - .frequency = "once", - .frequency_id = "hierarchy_filter_lt" - ) - + if (!all(!gp_empty$is_empty)) { x$table_body <- x$table_body |> dplyr::left_join( - empty_gps, - by = empty_gps |> select(cards::all_ard_groups()) |> names() + gp_empty, + by = gp_empty |> select(cards::all_ard_groups()) |> names() ) |> - dplyr::filter(!empty_gp | is.na(empty_gp)) |> - dplyr::select(-"empty_gp") + dplyr::filter(!is_empty | is.na(is_empty)) |> + dplyr::select(-"is_empty") } else { break } } + if (nrow(x$table_body) > 0) { + cli::cli_inform( + "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept + regardless of whether they meet the filtering criteria themselves." + ) + } } x$table_body <- x$table_body |> - dplyr::select(-"count_total") + dplyr::select(-"sum_row") x } diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R index 5c0bc53e1..1038701b9 100644 --- a/R/sort_tbl_hierarchical.R +++ b/R/sort_tbl_hierarchical.R @@ -4,28 +4,27 @@ #' #' This function is used to sort hierarchical tables. Options for sorting criteria are: #' -#' 1. Alphanumeric - rows are ordered alphanumerically by label text (default). -#' 2. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are -#' ordered accordingly. +#' 1. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are +#' ordered accordingly (default). +#' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables +#' in ascending alphanumeric order (i.e. A to Z). #' #' @param x (`tbl_hierarchical`)\cr #' A hierarchical gtsummary table of class `'tbl_hierarchical'`. #' @param sort (`string`)\cr -#' Specifies sorting to perform. Values must be one of `c("alphanumeric", "frequency")`. Default is `"frequency"`. +#' Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`. #' @param desc (scalar `logical`)\cr #' Whether to sort rows in ascending or descending order. Default is descending (`desc = TRUE`). #' @param .stat (`string`)\cr #' Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for -#' all hierarchy levels. +#' all hierarchy levels. Default is `"n"`. #' #' @name sort_tbl_hierarchical +#' @seealso [tbl_filter()] #' #' @examples #' ADAE_subset <- cards::ADAE |> -#' dplyr::filter( -#' AESOC %in% unique(cards::ADAE$AESOC)[1:5], -#' AETERM %in% unique(cards::ADAE$AETERM)[1:5] -#' ) +#' dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) #' #' tbl <- tbl_hierarchical( #' data = ADAE_subset, @@ -36,10 +35,10 @@ #' overall_row = TRUE #' ) #' -#' # Example 1 - Decreasing Frequency Sort ------ +#' # Example 1 - Decreasing Frequency Sort ------------------ #' tbl_sort(tbl) #' -#' # Example 2 - Reverse Alphanumeric Sort ------ +#' # Example 2 - Ascending Alphanumeric Sort (Z to A) ------- #' tbl_sort(tbl, sort = "alphanumeric") NULL @@ -57,7 +56,7 @@ tbl_sort <- function(x, ...) { tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n") { set_cli_abort_call() - # process and check inputs --------------------------------------------------- + # process and check inputs ---------------------------------------------------------------------- check_scalar_logical(desc) check_string(.stat) @@ -68,21 +67,30 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat ) } - u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() + overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable + + outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + inner_col <- setdiff( + x$table_body$variable, + x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() + ) if (sort == "alphanumeric") { - sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label") + # summary rows remain at the top of each sub-section rep_str <- if (desc) "zzzz" else " " # overall row always appears first if (desc && "..ard_hierarchical_overall.." %in% x$table_body$variable) { ovrl_row <- x$table_body[1, ] - x$table_body <- x$table_body[-1,] + x$table_body <- x$table_body[-1, ] } + # sort by label ------------------------------------------------------------------------------- + sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label") + x$table_body <- x$table_body |> dplyr::rowwise() |> - dplyr::mutate(inner_var = if (.data$variable %in% u_cols) rep_str else .data$variable) |> + dplyr::mutate(inner_var = if (!.data$variable == inner_col) rep_str else .data$variable) |> dplyr::ungroup() |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., rep_str))) |> dplyr::arrange(across(sort_cols, ~ if (desc) dplyr::desc(.x) else .x)) |> @@ -91,47 +99,48 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat if (desc) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body) } else { + # get row sums -------------------------------------------------------------------------------- x <- .append_hierarchy_row_sums(x, .stat) - g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) - - # sort summary rows first - x$table_body <- x$table_body |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " "))) - - # assign counts for each hierarchy level - for (g in names(g_cols)) { - x$table_body <- x$table_body |> - dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE) + # append outer hierarchy level sums in each row to sort at all levels ------------------------- + for (g in names(outer_cols)) { + x$table_body <- x$table_body |> dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE) x$table_body <- x$table_body |> dplyr::left_join( - dplyr::summarize(x$table_body, !!paste0("count_", g) := dplyr::first(count_total)), + x$table_body |> + dplyr::summarize(!!paste0("sum_", g) := dplyr::first(sum_row)), by = x$table_body |> dplyr::group_vars() ) } + + # summary rows remain at the top of each sub-section x$table_body <- x$table_body |> + dplyr::ungroup() |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " "))) |> dplyr::rowwise() |> - dplyr::mutate(inner_var = if (.data$variable %in% u_cols) " " else .data$variable) |> + dplyr::mutate(inner_var = if (!.data$variable == inner_col) " " else .data$variable) |> dplyr::ungroup() - # sort by counts ------------------------------------------------------------------------------ + # sort by row sum ----------------------------------------------------------------------------- sort_cols <- c(rbind( x$table_body |> select(cards::all_ard_groups("names")) |> names(), - x$table_body |> select(starts_with("count_group")) |> names(), + x$table_body |> select(starts_with("sum_group")) |> names(), x$table_body |> select(cards::all_ard_groups("levels")) |> names() - ), "inner_var", "count_total", "label") + ), "inner_var", "sum_row", "label") x$table_body <- x$table_body |> dplyr::arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., "^ $", NA))) |> - dplyr::select(-starts_with("count_"), -"inner_var") + select(-starts_with("sum_"), -"inner_var") } x } .append_hierarchy_row_sums <- function(x, .stat) { - if (!.stat %in% x$cards$tbl_hierarchical$stat_name) { + cards <- x$cards$tbl_hierarchical + + if (!.stat %in% cards$stat_name) { cli::cli_abort( "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid statistic options see the {.val stat_name} column of {.code x$cards$tbl_hierarchical}.", @@ -139,26 +148,25 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat ) } - cards <- x$cards$tbl_hierarchical by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA - g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) - # fill in variable_level column of cards + # update logical variable_level entries from overall row to character cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |> dplyr::filter(variable == "..ard_hierarchical_overall..") |> dplyr::pull("label") |> as.list() - # extract counts ------------------------------------------------------------------------------ + # extract row sums ------------------------------------------------------------------------------ cards <- cards |> dplyr::filter(stat_name == .stat, variable %in% x$table_body$variable) |> dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -by_cols))) |> - dplyr::summarise(count_total = sum(unlist(stat))) |> + dplyr::summarise(sum_row = sum(unlist(stat))) |> dplyr::ungroup() |> dplyr::rename(label = variable_level) |> tidyr::unnest(cols = everything()) - # match names to x$table_body + # match cards names to x$table_body ------------------------------------------------------------- if (length(by_cols) > 1) { names(cards)[grep("group", names(cards))] <- x$table_body |> select(cards::all_ard_groups()) |> @@ -166,23 +174,25 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat } cards[cards$variable == "..ard_hierarchical_overall..", 1] <- "..ard_hierarchical_overall.." - # align cards layout with x$table_body -------------------------------------------------------- + # fill in NAs to align cards layout with x$table_body ------------------------------------------- cards <- cards |> dplyr::rowwise() |> dplyr::mutate(across( cards::all_ard_groups(), - ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && variable == g_cols[dplyr::cur_column()]) { + ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && variable == outer_cols[dplyr::cur_column()]) { variable - } else if (is.na(.x) && variable %in% g_cols[gsub("_level", "", dplyr::cur_column())]) { + } else if (is.na(.x) && variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) { label } else { .x } )) - # calculate total group sums for any variables not in include --------------------------------- - if (!all(g_cols %in% cards$variable)) { - gp_vars <- g_cols[g_cols %in% setdiff(g_cols, cards$variable)] + # for any variables not in include, calculate group sums ---------------------------------------- + if (!all(outer_cols %in% cards$variable)) { + gp_vars <- outer_cols[outer_cols %in% setdiff(outer_cols, cards$variable)] + gp_cols <- names(gp_vars) + cli::cli_inform( "Not all hierarchy variables present in the table were included in the {.arg include} argument. These variables ({gp_vars}) do not have event rate data available so the total sum of the event @@ -190,14 +200,13 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." ) - gp_cols <- names(gp_vars) for (i in seq_along(gp_cols)) { cards <- cards |> dplyr::bind_rows( cards |> dplyr::filter(variable != "..ard_hierarchical_overall..") |> dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |> - dplyr::summarize(count_total = sum(count_total)) |> + dplyr::summarize(sum_row = sum(sum_row)) |> dplyr::mutate( variable = .data[[gp_cols[i]]], label = .data[[paste0(gp_cols[i], "_level")]] @@ -206,11 +215,11 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat } } - # add counts to x$table_body ------------------------------------------------------------------ + # append row sums to x$table_body --------------------------------------------------------------- x$table_body <- x$table_body |> dplyr::left_join( cards, - by = c(cards |> select(-"count_total") |> names()) + by = c(cards |> select(-"sum_row") |> names()) ) x diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd index 40b72a1e6..e7f3b3f1d 100644 --- a/man/filter_tbl_hierarchical.Rd +++ b/man/filter_tbl_hierarchical.Rd @@ -15,7 +15,7 @@ tbl_filter(x, ...) A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.} \item{t}{(scalar \code{numeric})\cr -Threshold used to determine which values will be retained.} +Threshold used to determine which rows will be retained.} \item{gt}{(scalar \code{logical})\cr Whether to filter for row sums greater than \code{t} or less than \code{t}. Default is greater than (\code{gt = TRUE}).} @@ -25,19 +25,17 @@ Whether to include the value of \code{t} in the filtered range, i.e. whether to inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.} \item{.stat}{(\code{string})\cr -Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.} +Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. +Default is \code{"n"}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr -This function is used to filter hierarchical tables by total row frequencies. +This function is used to filter hierarchical tables by frequency row sum. } \examples{ ADAE_subset <- cards::ADAE |> - dplyr::filter( - AESOC \%in\% unique(cards::ADAE$AESOC)[1:5], - AETERM \%in\% unique(cards::ADAE$AETERM)[1:5] - ) + dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]) tbl <- tbl_hierarchical( data = ADAE_subset, @@ -54,3 +52,6 @@ tbl_filter(tbl, t = 10) # Example 2 - Row Sums <= 5 ------------------ tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE) } +\seealso{ +\code{\link[=tbl_sort]{tbl_sort()}} +} diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd index 731dd6d62..3b004ef5e 100644 --- a/man/sort_tbl_hierarchical.Rd +++ b/man/sort_tbl_hierarchical.Rd @@ -15,31 +15,29 @@ tbl_sort(x, ...) A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.} \item{sort}{(\code{string})\cr -Specifies sorting to perform. Values must be one of \code{c("alphanumeric", "frequency")}. Default is \code{"frequency"}.} +Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.} \item{desc}{(scalar \code{logical})\cr Whether to sort rows in ascending or descending order. Default is descending (\code{desc = TRUE}).} \item{.stat}{(\code{string})\cr Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for -all hierarchy levels.} +all hierarchy levels. Default is \code{"n"}.} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr This function is used to sort hierarchical tables. Options for sorting criteria are: \enumerate{ -\item Alphanumeric - rows are ordered alphanumerically by label text (default). \item Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are -ordered accordingly. +ordered accordingly (default). +\item Alphanumeric - rows are ordered alphanumerically by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}} sorts tables +in ascending alphanumeric order (i.e. A to Z). } } \examples{ ADAE_subset <- cards::ADAE |> - dplyr::filter( - AESOC \%in\% unique(cards::ADAE$AESOC)[1:5], - AETERM \%in\% unique(cards::ADAE$AETERM)[1:5] - ) + dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]) tbl <- tbl_hierarchical( data = ADAE_subset, @@ -50,9 +48,12 @@ tbl <- tbl_hierarchical( overall_row = TRUE ) -# Example 1 - Decreasing Frequency Sort ------ +# Example 1 - Decreasing Frequency Sort ------------------ tbl_sort(tbl) -# Example 2 - Reverse Alphanumeric Sort ------ +# Example 2 - Ascending Alphanumeric Sort (Z to A) ------- tbl_sort(tbl, sort = "alphanumeric") } +\seealso{ +\code{\link[=tbl_filter]{tbl_filter()}} +} From 30c574bb897048b50c6f5e8d106f415dbb3665d7 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 6 Dec 2024 22:48:34 -0500 Subject: [PATCH 07/25] Fix checks --- R/filter_tbl_hierarchical.R | 20 ++++++++----- R/sort_tbl_hierarchical.R | 51 ++++++++++++++++++++-------------- R/tbl_hierarchical.R | 3 ++ inst/WORDLIST | 1 + man/filter_tbl_hierarchical.Rd | 15 +++++++--- man/sort_tbl_hierarchical.Rd | 13 +++++++-- pkgdown/_pkgdown.yml | 2 ++ 7 files changed, 70 insertions(+), 35 deletions(-) diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R index 7a1ad5b69..8821fd75e 100644 --- a/R/filter_tbl_hierarchical.R +++ b/R/filter_tbl_hierarchical.R @@ -2,10 +2,10 @@ #' #' @description `r lifecycle::badge('experimental')`\cr #' -#' This function is used to filter hierarchical tables by frequency row sum. +#' This function is used to filter hierarchical table rows by frequency row sum. #' -#' @param x (`tbl_hierarchical`)\cr -#' A hierarchical gtsummary table of class `'tbl_hierarchical'`. +#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr +#' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. #' @param t (scalar `numeric`)\cr #' Threshold used to determine which rows will be retained. #' @param gt (scalar `logical`)\cr @@ -16,11 +16,14 @@ #' @param .stat (`string`)\cr #' Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. #' Default is `"n"`. +#' @inheritParams rlang::args_dots_empty +#' +#' @return A `gtsummary` of the same class as `x`. #' #' @name filter_tbl_hierarchical #' @seealso [tbl_sort()] #' -#' @examples +#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true")) #' ADAE_subset <- cards::ADAE |> #' dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) #' @@ -51,7 +54,7 @@ tbl_filter <- function(x, ...) { #' @export #' @rdname filter_tbl_hierarchical -tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n") { +tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...) { set_cli_abort_call() # process and check inputs ---------------------------------------------------------------------- @@ -60,7 +63,10 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n" check_scalar_logical(eq) check_string(.stat) - outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + outer_cols <- sapply( + x$table_body |> select(cards::all_ard_groups("names")), + function(x) dplyr::last(unique(stats::na.omit(x))) + ) # get row sums ---------------------------------------------------------------------------------- x <- .append_hierarchy_row_sums(x, .stat) @@ -88,7 +94,7 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n" gp_empty <- x$table_body |> dplyr::group_by(across(c(names(outer_cols[1:i]), paste0(names(outer_cols[1:i]), "_level")))) |> dplyr::summarize(is_empty := dplyr::n() == 1) |> - na.omit() + stats::na.omit() if (!all(!gp_empty$is_empty)) { x$table_body <- x$table_body |> diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R index 1038701b9..3b221da0c 100644 --- a/R/sort_tbl_hierarchical.R +++ b/R/sort_tbl_hierarchical.R @@ -9,8 +9,8 @@ #' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables #' in ascending alphanumeric order (i.e. A to Z). #' -#' @param x (`tbl_hierarchical`)\cr -#' A hierarchical gtsummary table of class `'tbl_hierarchical'`. +#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr +#' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. #' @param sort (`string`)\cr #' Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`. #' @param desc (scalar `logical`)\cr @@ -18,11 +18,14 @@ #' @param .stat (`string`)\cr #' Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for #' all hierarchy levels. Default is `"n"`. +#' @inheritParams rlang::args_dots_empty +#' +#' @return A `gtsummary` of the same class as `x`. #' #' @name sort_tbl_hierarchical #' @seealso [tbl_filter()] #' -#' @examples +#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true")) #' ADAE_subset <- cards::ADAE |> #' dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) #' @@ -53,7 +56,7 @@ tbl_sort <- function(x, ...) { #' @rdname sort_tbl_hierarchical #' @export -tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n") { +tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n", ...) { set_cli_abort_call() # process and check inputs ---------------------------------------------------------------------- @@ -69,7 +72,10 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable - outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + outer_cols <- sapply( + x$table_body |> select(cards::all_ard_groups("names")), + function(x) dplyr::last(unique(stats::na.omit(x))) + ) inner_col <- setdiff( x$table_body$variable, x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() @@ -92,9 +98,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat dplyr::rowwise() |> dplyr::mutate(inner_var = if (!.data$variable == inner_col) rep_str else .data$variable) |> dplyr::ungroup() |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., rep_str))) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |> dplyr::arrange(across(sort_cols, ~ if (desc) dplyr::desc(.x) else .x)) |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., paste0("^", rep_str, "$"), NA))) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |> select(-"inner_var") if (desc) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body) @@ -108,7 +114,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat x$table_body <- x$table_body |> dplyr::left_join( x$table_body |> - dplyr::summarize(!!paste0("sum_", g) := dplyr::first(sum_row)), + dplyr::summarize(!!paste0("sum_", g) := dplyr::first(.data$sum_row)), by = x$table_body |> dplyr::group_vars() ) } @@ -116,7 +122,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat # summary rows remain at the top of each sub-section x$table_body <- x$table_body |> dplyr::ungroup() |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " "))) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., " "))) |> dplyr::rowwise() |> dplyr::mutate(inner_var = if (!.data$variable == inner_col) " " else .data$variable) |> dplyr::ungroup() @@ -130,7 +136,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat x$table_body <- x$table_body |> dplyr::arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., "^ $", NA))) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |> select(-starts_with("sum_"), -"inner_var") } @@ -149,21 +155,24 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat } by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA - outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1)) + outer_cols <- sapply( + x$table_body |> select(cards::all_ard_groups("names")), + function(x) dplyr::last(unique(stats::na.omit(x))) + ) # update logical variable_level entries from overall row to character cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |> - dplyr::filter(variable == "..ard_hierarchical_overall..") |> + dplyr::filter(.data$variable == "..ard_hierarchical_overall..") |> dplyr::pull("label") |> as.list() # extract row sums ------------------------------------------------------------------------------ cards <- cards |> - dplyr::filter(stat_name == .stat, variable %in% x$table_body$variable) |> + dplyr::filter(.data$stat_name == .stat, .data$variable %in% x$table_body$variable) |> dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -by_cols))) |> - dplyr::summarise(sum_row = sum(unlist(stat))) |> + dplyr::summarise(sum_row = sum(unlist(.data$stat))) |> dplyr::ungroup() |> - dplyr::rename(label = variable_level) |> + dplyr::rename(label = "variable_level") |> tidyr::unnest(cols = everything()) # match cards names to x$table_body ------------------------------------------------------------- @@ -179,10 +188,10 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat dplyr::rowwise() |> dplyr::mutate(across( cards::all_ard_groups(), - ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && variable == outer_cols[dplyr::cur_column()]) { - variable - } else if (is.na(.x) && variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) { - label + ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && .data$variable == outer_cols[dplyr::cur_column()]) { + .data$variable + } else if (is.na(.x) && .data$variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) { + .data$label } else { .x } @@ -204,9 +213,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat cards <- cards |> dplyr::bind_rows( cards |> - dplyr::filter(variable != "..ard_hierarchical_overall..") |> + dplyr::filter(.data$variable != "..ard_hierarchical_overall..") |> dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |> - dplyr::summarize(sum_row = sum(sum_row)) |> + dplyr::summarize(sum_row = sum(.data$sum_row)) |> dplyr::mutate( variable = .data[[gp_cols[i]]], label = .data[[paste0(gp_cols[i], "_level")]] diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R index 789244787..908242541 100644 --- a/R/tbl_hierarchical.R +++ b/R/tbl_hierarchical.R @@ -216,6 +216,9 @@ internal_tbl_hierarchical <- function(data, if ("..ard_hierarchical_overall.." %in% variables) { cli::cli_abort("The {.arg variables} argument cannot include a column named {.val ..ard_hierarchical_overall..}.") } + if (!all(variables == unique(variables))) { + cli::cli_abort("The {.arg variables} argument cannot contain repeated variables.") + } # evaluate tidyselect cards::process_selectors(data[variables], include = {{ include }}) diff --git a/inst/WORDLIST b/inst/WORDLIST index 0655b37bf..b25ab3581 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -46,6 +46,7 @@ cardx cli codebase coef +comparators conf customizability customizable diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd index e7f3b3f1d..a84a127f9 100644 --- a/man/filter_tbl_hierarchical.Rd +++ b/man/filter_tbl_hierarchical.Rd @@ -8,11 +8,13 @@ \usage{ tbl_filter(x, ...) -\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n") +\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...) } \arguments{ -\item{x}{(\code{tbl_hierarchical})\cr -A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.} +\item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr +A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.} + +\item{...}{These dots are for future extensions and must be empty.} \item{t}{(scalar \code{numeric})\cr Threshold used to determine which rows will be retained.} @@ -28,12 +30,16 @@ inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FA Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. Default is \code{"n"}.} } +\value{ +A \code{gtsummary} of the same class as \code{x}. +} \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr -This function is used to filter hierarchical tables by frequency row sum. +This function is used to filter hierarchical table rows by frequency row sum. } \examples{ +\dontshow{if ((identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} ADAE_subset <- cards::ADAE |> dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]) @@ -51,6 +57,7 @@ tbl_filter(tbl, t = 10) # Example 2 - Row Sums <= 5 ------------------ tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=tbl_sort]{tbl_sort()}} diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd index 3b004ef5e..2d6c21728 100644 --- a/man/sort_tbl_hierarchical.Rd +++ b/man/sort_tbl_hierarchical.Rd @@ -8,11 +8,13 @@ \usage{ tbl_sort(x, ...) -\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = TRUE, .stat = "n") +\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = TRUE, .stat = "n", ...) } \arguments{ -\item{x}{(\code{tbl_hierarchical})\cr -A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.} +\item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr +A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.} + +\item{...}{These dots are for future extensions and must be empty.} \item{sort}{(\code{string})\cr Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.} @@ -24,6 +26,9 @@ Whether to sort rows in ascending or descending order. Default is descending (\c Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for all hierarchy levels. Default is \code{"n"}.} } +\value{ +A \code{gtsummary} of the same class as \code{x}. +} \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr @@ -36,6 +41,7 @@ in ascending alphanumeric order (i.e. A to Z). } } \examples{ +\dontshow{if ((identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} ADAE_subset <- cards::ADAE |> dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]) @@ -53,6 +59,7 @@ tbl_sort(tbl) # Example 2 - Ascending Alphanumeric Sort (Z to A) ------- tbl_sort(tbl, sort = "alphanumeric") +\dontshow{\}) # examplesIf} } \seealso{ \code{\link[=tbl_filter]{tbl_filter()}} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index b195af0ad..767995ab5 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -86,6 +86,8 @@ reference: - subtitle: Hierarchical Summary Tables - contents: - tbl_hierarchical + - tbl_sort.tbl_hierarchical + - tbl_filter.tbl_hierarchical - add_overall.tbl_hierarchical - subtitle: Likert Summary Tables - contents: From 7103b5960d82588166e18e3b972d515db243b733 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 6 Dec 2024 23:56:08 -0500 Subject: [PATCH 08/25] Add tests for sorting --- R/sort_tbl_hierarchical.R | 28 +-- man/sort_tbl_hierarchical.Rd | 11 +- .../testthat/_snaps/sort_tbl_hierarchical.md | 56 +++++ tests/testthat/test-sort_tbl_hierarchical.R | 206 ++++++++++++++++++ 4 files changed, 282 insertions(+), 19 deletions(-) create mode 100644 tests/testthat/_snaps/sort_tbl_hierarchical.md create mode 100644 tests/testthat/test-sort_tbl_hierarchical.R diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R index 3b221da0c..623ac1b7f 100644 --- a/R/sort_tbl_hierarchical.R +++ b/R/sort_tbl_hierarchical.R @@ -14,7 +14,8 @@ #' @param sort (`string`)\cr #' Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`. #' @param desc (scalar `logical`)\cr -#' Whether to sort rows in ascending or descending order. Default is descending (`desc = TRUE`). +#' Whether to sort rows in ascending or descending order. Default is descending (`TRUE`) when `sort = "frequency"` +#' and ascending (`FALSE`) when `sort = "alphanumeric"`. #' @param .stat (`string`)\cr #' Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for #' all hierarchy levels. Default is `"n"`. @@ -38,11 +39,11 @@ #' overall_row = TRUE #' ) #' -#' # Example 1 - Decreasing Frequency Sort ------------------ +#' # Example 1 - Descending Frequency Sort ------------------ #' tbl_sort(tbl) #' -#' # Example 2 - Ascending Alphanumeric Sort (Z to A) ------- -#' tbl_sort(tbl, sort = "alphanumeric") +#' # Example 2 - Descending Alphanumeric Sort (Z to A) ------ +#' tbl_sort(tbl, sort = "alphanumeric", desc = TRUE) NULL #' @rdname sort_tbl_hierarchical @@ -56,7 +57,7 @@ tbl_sort <- function(x, ...) { #' @rdname sort_tbl_hierarchical #' @export -tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n", ...) { +tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...) { set_cli_abort_call() # process and check inputs ---------------------------------------------------------------------- @@ -71,7 +72,6 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat } overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable - outer_cols <- sapply( x$table_body |> select(cards::all_ard_groups("names")), function(x) dplyr::last(unique(stats::na.omit(x))) @@ -86,7 +86,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat rep_str <- if (desc) "zzzz" else " " # overall row always appears first - if (desc && "..ard_hierarchical_overall.." %in% x$table_body$variable) { + if (desc && overall) { ovrl_row <- x$table_body[1, ] x$table_body <- x$table_body[-1, ] } @@ -96,21 +96,21 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat x$table_body <- x$table_body |> dplyr::rowwise() |> - dplyr::mutate(inner_var = if (!.data$variable == inner_col) rep_str else .data$variable) |> + dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |> dplyr::ungroup() |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |> - dplyr::arrange(across(sort_cols, ~ if (desc) dplyr::desc(.x) else .x)) |> + dplyr::arrange(across(all_of(sort_cols), ~ if (desc) dplyr::desc(.x) else .x)) |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |> select(-"inner_var") - if (desc) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body) + if (desc && overall) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body) } else { # get row sums -------------------------------------------------------------------------------- x <- .append_hierarchy_row_sums(x, .stat) # append outer hierarchy level sums in each row to sort at all levels ------------------------- for (g in names(outer_cols)) { - x$table_body <- x$table_body |> dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE) + x$table_body <- x$table_body |> dplyr::group_by(across(all_of(c(g, paste0(g, "_level")))), .add = TRUE) x$table_body <- x$table_body |> dplyr::left_join( x$table_body |> @@ -124,7 +124,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat dplyr::ungroup() |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., " "))) |> dplyr::rowwise() |> - dplyr::mutate(inner_var = if (!.data$variable == inner_col) " " else .data$variable) |> + dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) " " else .data$variable) |> dplyr::ungroup() # sort by row sum ----------------------------------------------------------------------------- @@ -135,7 +135,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat ), "inner_var", "sum_row", "label") x$table_body <- x$table_body |> - dplyr::arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |> + dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |> select(-starts_with("sum_"), -"inner_var") } @@ -169,7 +169,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat # extract row sums ------------------------------------------------------------------------------ cards <- cards |> dplyr::filter(.data$stat_name == .stat, .data$variable %in% x$table_body$variable) |> - dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -by_cols))) |> + dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> dplyr::summarise(sum_row = sum(unlist(.data$stat))) |> dplyr::ungroup() |> dplyr::rename(label = "variable_level") |> diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd index 2d6c21728..45c165614 100644 --- a/man/sort_tbl_hierarchical.Rd +++ b/man/sort_tbl_hierarchical.Rd @@ -8,7 +8,7 @@ \usage{ tbl_sort(x, ...) -\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = TRUE, .stat = "n", ...) +\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...) } \arguments{ \item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr @@ -20,7 +20,8 @@ A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_ Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.} \item{desc}{(scalar \code{logical})\cr -Whether to sort rows in ascending or descending order. Default is descending (\code{desc = TRUE}).} +Whether to sort rows in ascending or descending order. Default is descending (\code{TRUE}) when \code{sort = "frequency"} +and ascending (\code{FALSE}) when \code{sort = "alphanumeric"}.} \item{.stat}{(\code{string})\cr Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for @@ -54,11 +55,11 @@ tbl <- tbl_hierarchical( overall_row = TRUE ) -# Example 1 - Decreasing Frequency Sort ------------------ +# Example 1 - Descending Frequency Sort ------------------ tbl_sort(tbl) -# Example 2 - Ascending Alphanumeric Sort (Z to A) ------- -tbl_sort(tbl, sort = "alphanumeric") +# Example 2 - Descending Alphanumeric Sort (Z to A) ------ +tbl_sort(tbl, sort = "alphanumeric", desc = TRUE) \dontshow{\}) # examplesIf} } \seealso{ diff --git a/tests/testthat/_snaps/sort_tbl_hierarchical.md b/tests/testthat/_snaps/sort_tbl_hierarchical.md new file mode 100644 index 000000000..99a9f5f65 --- /dev/null +++ b/tests/testthat/_snaps/sort_tbl_hierarchical.md @@ -0,0 +1,56 @@ +# tbl_sort.tbl_hierarchical() works + + Code + as.data.frame(tbl) + Output + **Sex** \n    **Race** \n        **Reported Term for the Adverse Event** **Placebo** \nN = 86 **Xanomeline High Dose** \nN = 84 **Xanomeline Low Dose** \nN = 84 + 1 Number of patients with event 26 (30%) 42 (50%) 40 (48%) + 2 F 13 (25%) 18 (45%) 23 (46%) + 3 WHITE 10 (21%) 14 (41%) 20 (45%) + 4 APPLICATION SITE PRURITUS 2 (4.2%) 8 (24%) 10 (23%) + 5 ERYTHEMA 6 (13%) 6 (18%) 8 (18%) + 6 APPLICATION SITE ERYTHEMA 2 (4.2%) 5 (15%) 5 (11%) + 7 DIARRHOEA 2 (4.2%) 0 (0%) 3 (6.8%) + 8 BLACK OR AFRICAN AMERICAN 3 (60%) 4 (67%) 3 (50%) + 9 APPLICATION SITE PRURITUS 2 (40%) 2 (33%) 2 (33%) + 10 ERYTHEMA 0 (0%) 1 (17%) 1 (17%) + 11 ATRIOVENTRICULAR BLOCK SECOND DEGREE 0 (0%) 1 (17%) 0 (0%) + 12 DIARRHOEA 1 (20%) 0 (0%) 0 (0%) + 13 M 13 (39%) 24 (55%) 17 (50%) + 14 WHITE 12 (40%) 22 (55%) 17 (50%) + 15 APPLICATION SITE PRURITUS 1 (3.3%) 12 (30%) 10 (29%) + 16 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%) + 17 ERYTHEMA 3 (10%) 5 (13%) 6 (18%) + 18 DIARRHOEA 6 (20%) 3 (7.5%) 2 (5.9%) + 19 ATRIOVENTRICULAR BLOCK SECOND DEGREE 2 (6.7%) 2 (5.0%) 0 (0%) + 20 BLACK OR AFRICAN AMERICAN 1 (33%) 1 (33%) 0 (NA%) + 21 APPLICATION SITE PRURITUS 1 (33%) 0 (0%) 0 (NA%) + 22 DIARRHOEA 0 (0%) 1 (33%) 0 (NA%) + 23 ERYTHEMA 0 (0%) 1 (33%) 0 (NA%) + 24 AMERICAN INDIAN OR ALASKA NATIVE 0 (NA%) 1 (100%) 0 (NA%) + 25 ERYTHEMA 0 (NA%) 1 (100%) 0 (NA%) + +# tbl_sort.tbl_hierarchical() error messaging works + + Code + tbl_sort(data.frame()) + Condition + Error in `check_class()`: + ! The `x` argument must be class , not a data frame. + +--- + + Code + tbl_sort(tbl, sort = "no_sorting") + Condition + Error in `tbl_sort()`: + ! The `sort` argument must be either "frequency" or "alphanumeric". + +--- + + Code + tbl_sort(tbl, .stat = "mean") + Condition + Error in `tbl_sort()`: + ! The `.stat` argument is "mean" but this statistic is not present in `x`. For all valid statistic options see the "stat_name" column of `x$cards$tbl_hierarchical`. + diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-sort_tbl_hierarchical.R new file mode 100644 index 000000000..bc69937aa --- /dev/null +++ b/tests/testthat/test-sort_tbl_hierarchical.R @@ -0,0 +1,206 @@ +skip_on_cran() + +ADAE_subset <- cards::ADAE |> + dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) + +tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE +) + +test_that("tbl_sort.tbl_hierarchical() works", { + withr::local_options(width = 200) + # no errors + expect_silent(tbl <- tbl_sort(tbl)) + expect_snapshot(tbl |> as.data.frame()) + + # .stat argument works + expect_silent(tbl <- tbl_sort(tbl, .stat = "p")) +}) + +test_that("tbl_sort.tbl_hierarchical(sort = 'frequency') works", { + # descending frequency (default) + expect_silent(tbl <- tbl_sort(tbl)) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "SEX") |> + dplyr::pull(label), + c("F", "M") + ) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "RACE") |> + dplyr::pull(label), + c("WHITE", "BLACK OR AFRICAN AMERICAN", "WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE") + + ) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + c( + "APPLICATION SITE PRURITUS", "ERYTHEMA", "APPLICATION SITE ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS", + "ERYTHEMA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "APPLICATION SITE PRURITUS", + "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", + "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "ERYTHEMA" + ) + ) + + # ascending frequency + expect_silent(tbl <- tbl_sort(tbl, desc = FALSE)) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "SEX") |> + dplyr::pull(label), + c("F", "M") # F and M have equal numbers + ) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "RACE") |> + dplyr::pull(label), + c("BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE", "BLACK OR AFRICAN AMERICAN", "WHITE") + ) + expect_equal( + tbl$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + c( + "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", "APPLICATION SITE PRURITUS", "DIARRHOEA", + "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", "APPLICATION SITE PRURITUS", + "DIARRHOEA", "ERYTHEMA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", + "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS" + ) + ) +}) + +test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", { + # descending (Z to A) + expect_silent(result <- tbl_sort(tbl, sort = "alphanumeric", desc = TRUE)) + expect_equal( + result$table_body |> + dplyr::filter(variable == "SEX") |> + dplyr::pull(label), + c("M", "F") + ) + expect_equal( + result$table_body |> + dplyr::filter(variable == "RACE") |> + dplyr::pull(label), + c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "WHITE", "BLACK OR AFRICAN AMERICAN") + ) + expect_equal( + result$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + c( + "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS", + "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA","APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", + "DIARRHOEA", "APPLICATION SITE PRURITUS", "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", + "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS" + ) + ) + + # ascending (A to Z) + expect_silent(result <- tbl_sort(result, sort = "alphanumeric")) + + # results match with tbl_hierarchical which sorts A to Z by default + expect_equal( + result |> as.data.frame(), + tbl |> as.data.frame() + ) +}) + +test_that("tbl_sort.tbl_hierarchical() works when there is no overall row in x", { + tbl_no_overall <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = FALSE + ) + + # sort = 'frequency' + expect_silent(tbl_no_overall <- tbl_sort(tbl_no_overall)) + expect_equal( + tbl_no_overall$table_body, + tbl_sort(tbl)$table_body[-1, ] + ) + + # sort = 'alphanumeric' + expect_silent(tbl_no_overall <- tbl_sort(tbl_no_overall, sort = "alphanumeric")) + expect_equal( + tbl_no_overall$table_body, + tbl$table_body[-1, ] + ) +}) + +test_that("tbl_sort.tbl_hierarchical() works with only one variable in x", { + tbl_single <- tbl_hierarchical( + data = ADAE_subset, + variables = AETERM, + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE + ) + + # sort = 'frequency' + expect_silent(tbl_single <- tbl_sort(tbl_single)) + expect_equal( + tbl_single$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + c( + "APPLICATION SITE PRURITUS", "ERYTHEMA", "APPLICATION SITE ERYTHEMA", "DIARRHOEA", + "ATRIOVENTRICULAR BLOCK SECOND DEGREE" + ) + ) + + # sort = 'alphanumeric' + expect_silent(tbl_single <- tbl_sort(tbl_single, sort = "alphanumeric")) + expect_equal( + tbl_single$table_body |> + dplyr::filter(variable == "AETERM") |> + dplyr::pull(label), + sort(unique(ADAE_subset$AETERM)) + ) +}) + +test_that("tbl_sort.tbl_hierarchical() works when some variables not included in x", { + tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + include = c(SEX, AETERM), + overall_row = TRUE + ) + + expect_message(tbl_sort(tbl)) +}) + +test_that("tbl_sort.tbl_hierarchical() error messaging works", { + # invalid x input + expect_snapshot( + tbl_sort(data.frame()), + error = TRUE + ) + + # invalid sort input + expect_snapshot( + tbl_sort(tbl, sort = "no_sorting"), + error = TRUE + ) + + # invalid .stat input + expect_snapshot( + tbl_sort(tbl, .stat = "mean"), + error = TRUE + ) +}) From d82c06f67247e35094aeed7cb2d7da125b5181b7 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Mon, 9 Dec 2024 15:49:11 -0500 Subject: [PATCH 09/25] Add filtering tests --- R/filter_tbl_hierarchical.R | 4 +- .../_snaps/filter_tbl_hierarchical.md | 59 ++++++++ tests/testthat/test-filter_tbl_hierarchical.R | 138 ++++++++++++++++++ tests/testthat/test-sort_tbl_hierarchical.R | 6 +- 4 files changed, 203 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/_snaps/filter_tbl_hierarchical.md create mode 100644 tests/testthat/test-filter_tbl_hierarchical.R diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R index 8821fd75e..56b968041 100644 --- a/R/filter_tbl_hierarchical.R +++ b/R/filter_tbl_hierarchical.R @@ -111,7 +111,9 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n" if (nrow(x$table_body) > 0) { cli::cli_inform( "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept - regardless of whether they meet the filtering criteria themselves." + regardless of whether they meet the filtering criteria themselves.", + .frequency = "once", + .frequency_id = "sum_rows_lt" ) } } diff --git a/tests/testthat/_snaps/filter_tbl_hierarchical.md b/tests/testthat/_snaps/filter_tbl_hierarchical.md new file mode 100644 index 000000000..237023eac --- /dev/null +++ b/tests/testthat/_snaps/filter_tbl_hierarchical.md @@ -0,0 +1,59 @@ +# tbl_filter.tbl_hierarchical() works + + Code + as.data.frame(tbl) + Output + **Sex** \n    **Race** \n        **Reported Term for the Adverse Event** **Placebo** \nN = 86 **Xanomeline High Dose** \nN = 84 **Xanomeline Low Dose** \nN = 84 + 1 Number of patients with event 26 (30%) 42 (50%) 40 (48%) + 2 F 13 (25%) 18 (45%) 23 (46%) + 3 WHITE 10 (21%) 14 (41%) 20 (45%) + 4 APPLICATION SITE ERYTHEMA 2 (4.2%) 5 (15%) 5 (11%) + 5 APPLICATION SITE PRURITUS 2 (4.2%) 8 (24%) 10 (23%) + 6 ERYTHEMA 6 (13%) 6 (18%) 8 (18%) + 7 M 13 (39%) 24 (55%) 17 (50%) + 8 WHITE 12 (40%) 22 (55%) 17 (50%) + 9 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%) + 10 APPLICATION SITE PRURITUS 1 (3.3%) 12 (30%) 10 (29%) + 11 DIARRHOEA 6 (20%) 3 (7.5%) 2 (5.9%) + 12 ERYTHEMA 3 (10%) 5 (13%) 6 (18%) + +# tbl_filter.tbl_hierarchical() error messaging works + + Code + tbl_filter(data.frame(), t = 10) + Condition + Error in `check_class()`: + ! The `x` argument must be class , not a data frame. + +--- + + Code + tbl_filter(tbl, t = "10") + Condition + Error in `tbl_filter()`: + ! The `t` argument must be numeric. + +--- + + Code + tbl_filter(tbl, t = "10", gt = "yes") + Condition + Error in `tbl_filter()`: + ! The `t` argument must be numeric. + +--- + + Code + tbl_filter(tbl, t = "10", eq = "no") + Condition + Error in `tbl_filter()`: + ! The `t` argument must be numeric. + +--- + + Code + tbl_filter(tbl, t = "10", .stat = "pct") + Condition + Error in `tbl_filter()`: + ! The `t` argument must be numeric. + diff --git a/tests/testthat/test-filter_tbl_hierarchical.R b/tests/testthat/test-filter_tbl_hierarchical.R new file mode 100644 index 000000000..0542df4bb --- /dev/null +++ b/tests/testthat/test-filter_tbl_hierarchical.R @@ -0,0 +1,138 @@ +skip_on_cran() + +ADAE_subset <- cards::ADAE |> + dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) + +tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE +) + +test_that("tbl_filter.tbl_hierarchical() works", { + withr::local_options(width = 200) + + # no errors + expect_silent(tbl <- tbl_filter(tbl, t = 10)) + expect_snapshot(tbl |> as.data.frame()) + + # .stat argument works + expect_silent(tbl <- tbl_filter(tbl, t = 10, .stat = "p")) +}) + +test_that("tbl_filter.tbl_hierarchical(gt) works", { + # gt = TRUE + expect_silent(tbl_gt <- tbl_filter(tbl, t = 10)) + + # gt = FALSE + expect_silent(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE)) + + expect_equal( + dplyr::inner_join( + tbl_gt$table_body, + tbl_lt$table_body, + by = names(tbl_gt$table_body) + ) |> + dplyr::filter(variable == "AETERM") |> + nrow(), + 0 + ) + + expect_equal( + sum( + tbl_gt$table_body |> + dplyr::filter(variable == "AETERM") |> + nrow(), + tbl_lt$table_body |> + dplyr::filter(variable == "AETERM") |> + nrow() + ), + tbl$table_body |> + dplyr::filter(variable == "AETERM") |> + nrow() + ) +}) + +test_that("tbl_filter.tbl_hierarchical(eq) works", { + # gt = TRUE, eq = FALSE + expect_silent(tbl_gt <- tbl_filter(tbl, t = 12)) + + # gt = TRUE, eq = TRUE + expect_silent(tbl_geq <- tbl_filter(tbl, t = 12, eq = TRUE)) + expect_gt(nrow(tbl_geq$table_body), nrow(tbl_gt$table_body)) + + # gt = FALSE, eq = FALSE + expect_silent(tbl_lt <- tbl_filter(tbl, t = 12, gt = FALSE)) + + # gt = TRUE, eq = TRUE + expect_silent(tbl_leq <- tbl_filter(tbl, t = 12, gt = FALSE, eq = TRUE)) + expect_lt(nrow(tbl_lt$table_body), nrow(tbl_leq$table_body)) +}) + +test_that("tbl_filter.tbl_hierarchical() returns empty table when all rows filtered out", { + expect_silent(tbl <- tbl_filter(tbl, t = 200)) + expect_equal(nrow(tbl$table_body), 0) +}) + +test_that("tbl_filter.tbl_hierarchical() works with only one variable in x", { + tbl_single <- tbl_hierarchical( + data = ADAE_subset, + variables = AETERM, + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + overall_row = TRUE + ) + + expect_silent(tbl_single <- tbl_filter(tbl_single, t = 20)) + expect_equal(nrow(tbl_single$table_body), 4) +}) + +test_that("tbl_filter.tbl_hierarchical() works when some variables not included in x", { + tbl <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID, + include = c(SEX, AETERM), + overall_row = TRUE + ) + + expect_message(tbl_filter(tbl, t = 10)) +}) + +test_that("tbl_filter.tbl_hierarchical() error messaging works", { + # invalid x input + expect_snapshot( + tbl_filter(data.frame(), t = 10), + error = TRUE + ) + + # invalid t input + expect_snapshot( + tbl_filter(tbl, t = "10"), + error = TRUE + ) + + # invalid gt input + expect_snapshot( + tbl_filter(tbl, t = "10", gt = "yes"), + error = TRUE + ) + + # invalid eq input + expect_snapshot( + tbl_filter(tbl, t = "10", eq = "no"), + error = TRUE + ) + + # invalid .stat input + expect_snapshot( + tbl_filter(tbl, t = "10", .stat = "pct"), + error = TRUE + ) +}) diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-sort_tbl_hierarchical.R index bc69937aa..7c61ea284 100644 --- a/tests/testthat/test-sort_tbl_hierarchical.R +++ b/tests/testthat/test-sort_tbl_hierarchical.R @@ -14,6 +14,7 @@ tbl <- tbl_hierarchical( test_that("tbl_sort.tbl_hierarchical() works", { withr::local_options(width = 200) + # no errors expect_silent(tbl <- tbl_sort(tbl)) expect_snapshot(tbl |> as.data.frame()) @@ -36,7 +37,6 @@ test_that("tbl_sort.tbl_hierarchical(sort = 'frequency') works", { dplyr::filter(variable == "RACE") |> dplyr::pull(label), c("WHITE", "BLACK OR AFRICAN AMERICAN", "WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE") - ) expect_equal( tbl$table_body |> @@ -98,7 +98,7 @@ test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", { dplyr::pull(label), c( "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS", - "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA","APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", + "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS", "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS" ) @@ -178,7 +178,7 @@ test_that("tbl_sort.tbl_hierarchical() works when some variables not included in by = TRTA, denominator = cards::ADSL |> mutate(TRTA = ARM), id = USUBJID, - include = c(SEX, AETERM), + include = c(SEX, AETERM), overall_row = TRUE ) From cd484f6faaab2c6e9171fc98359f1bbdaa580f9a Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Mon, 9 Dec 2024 16:22:51 -0500 Subject: [PATCH 10/25] Fix test --- tests/testthat/test-filter_tbl_hierarchical.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-filter_tbl_hierarchical.R b/tests/testthat/test-filter_tbl_hierarchical.R index 0542df4bb..13e0906cc 100644 --- a/tests/testthat/test-filter_tbl_hierarchical.R +++ b/tests/testthat/test-filter_tbl_hierarchical.R @@ -28,7 +28,7 @@ test_that("tbl_filter.tbl_hierarchical(gt) works", { expect_silent(tbl_gt <- tbl_filter(tbl, t = 10)) # gt = FALSE - expect_silent(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE)) + expect_message(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE)) expect_equal( dplyr::inner_join( From 0fa9fe87b6dc8da7f54b42aeb68dee7a69c552bb Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 28 Jan 2025 19:04:07 -0500 Subject: [PATCH 11/25] Update sort parameters --- R/sort_tbl_hierarchical.R | 48 +++++++---------- .../testthat/_snaps/sort_tbl_hierarchical.md | 2 +- tests/testthat/test-sort_tbl_hierarchical.R | 52 +------------------ 3 files changed, 21 insertions(+), 81 deletions(-) diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R index 623ac1b7f..7494da717 100644 --- a/R/sort_tbl_hierarchical.R +++ b/R/sort_tbl_hierarchical.R @@ -4,20 +4,17 @@ #' #' This function is used to sort hierarchical tables. Options for sorting criteria are: #' -#' 1. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are -#' ordered accordingly (default). +#' 1. Descending - within each section of the hierarchy table, frequency sums are calculated for each row and rows are +#' sorted in descending order by sum (default). #' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables #' in ascending alphanumeric order (i.e. A to Z). #' #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr #' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. #' @param sort (`string`)\cr -#' Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`. -#' @param desc (scalar `logical`)\cr -#' Whether to sort rows in ascending or descending order. Default is descending (`TRUE`) when `sort = "frequency"` -#' and ascending (`FALSE`) when `sort = "alphanumeric"`. +#' Specifies sorting to perform. Values must be one of `c("alphanumeric", "descending")`. Default is `"descending"`. #' @param .stat (`string`)\cr -#' Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for +#' Statistic to use to calculate row sums when `sort = "descending"`. This statistic must be present in the table for #' all hierarchy levels. Default is `"n"`. #' @inheritParams rlang::args_dots_empty #' @@ -40,10 +37,12 @@ #' ) #' #' # Example 1 - Descending Frequency Sort ------------------ -#' tbl_sort(tbl) +#' tbl <- tbl_sort(tbl) +#' tbl #' -#' # Example 2 - Descending Alphanumeric Sort (Z to A) ------ -#' tbl_sort(tbl, sort = "alphanumeric", desc = TRUE) +#' # Example 2 - Alphanumeric Sort -------------------------- +#' tbl <- tbl_sort(tbl, sort = "alphanumeric") +#' tbl NULL #' @rdname sort_tbl_hierarchical @@ -57,16 +56,15 @@ tbl_sort <- function(x, ...) { #' @rdname sort_tbl_hierarchical #' @export -tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...) { +tbl_sort.tbl_hierarchical <- function(x, sort = "descending", .stat = "n", ...) { set_cli_abort_call() # process and check inputs ---------------------------------------------------------------------- - check_scalar_logical(desc) check_string(.stat) - if (!sort %in% c("frequency", "alphanumeric")) { + if (!sort %in% c("descending", "alphanumeric")) { cli::cli_abort( - "The {.arg sort} argument must be either {.val frequency} or {.val alphanumeric}.", + "The {.arg sort} argument must be either {.val descending} or {.val alphanumeric}.", call = get_cli_abort_call() ) } @@ -81,16 +79,10 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "fr x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() ) - if (sort == "alphanumeric") { - # summary rows remain at the top of each sub-section - rep_str <- if (desc) "zzzz" else " " - - # overall row always appears first - if (desc && overall) { - ovrl_row <- x$table_body[1, ] - x$table_body <- x$table_body[-1, ] - } + # keep summary rows at the top of each sub-section + rep_str <- " " + if (sort == "alphanumeric") { # sort by label ------------------------------------------------------------------------------- sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label") @@ -99,11 +91,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "fr dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |> dplyr::ungroup() |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |> - dplyr::arrange(across(all_of(sort_cols), ~ if (desc) dplyr::desc(.x) else .x)) |> + dplyr::arrange(across(all_of(sort_cols), ~ .x)) |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |> select(-"inner_var") - - if (desc && overall) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body) } else { # get row sums -------------------------------------------------------------------------------- x <- .append_hierarchy_row_sums(x, .stat) @@ -122,9 +112,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "fr # summary rows remain at the top of each sub-section x$table_body <- x$table_body |> dplyr::ungroup() |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., " "))) |> + dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |> dplyr::rowwise() |> - dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) " " else .data$variable) |> + dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |> dplyr::ungroup() # sort by row sum ----------------------------------------------------------------------------- @@ -135,7 +125,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "fr ), "inner_var", "sum_row", "label") x$table_body <- x$table_body |> - dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |> + dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x)) dplyr::desc(.x) else .x)) |> dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |> select(-starts_with("sum_"), -"inner_var") } diff --git a/tests/testthat/_snaps/sort_tbl_hierarchical.md b/tests/testthat/_snaps/sort_tbl_hierarchical.md index 99a9f5f65..07ff271ff 100644 --- a/tests/testthat/_snaps/sort_tbl_hierarchical.md +++ b/tests/testthat/_snaps/sort_tbl_hierarchical.md @@ -44,7 +44,7 @@ tbl_sort(tbl, sort = "no_sorting") Condition Error in `tbl_sort()`: - ! The `sort` argument must be either "frequency" or "alphanumeric". + ! The `sort` argument must be either "descending" or "alphanumeric". --- diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-sort_tbl_hierarchical.R index 7c61ea284..80e7068ab 100644 --- a/tests/testthat/test-sort_tbl_hierarchical.R +++ b/tests/testthat/test-sort_tbl_hierarchical.R @@ -49,60 +49,10 @@ test_that("tbl_sort.tbl_hierarchical(sort = 'frequency') works", { "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "ERYTHEMA" ) ) - - # ascending frequency - expect_silent(tbl <- tbl_sort(tbl, desc = FALSE)) - expect_equal( - tbl$table_body |> - dplyr::filter(variable == "SEX") |> - dplyr::pull(label), - c("F", "M") # F and M have equal numbers - ) - expect_equal( - tbl$table_body |> - dplyr::filter(variable == "RACE") |> - dplyr::pull(label), - c("BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE", "BLACK OR AFRICAN AMERICAN", "WHITE") - ) - expect_equal( - tbl$table_body |> - dplyr::filter(variable == "AETERM") |> - dplyr::pull(label), - c( - "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", "APPLICATION SITE PRURITUS", "DIARRHOEA", - "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", "APPLICATION SITE PRURITUS", - "DIARRHOEA", "ERYTHEMA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", - "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS" - ) - ) }) test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", { - # descending (Z to A) - expect_silent(result <- tbl_sort(tbl, sort = "alphanumeric", desc = TRUE)) - expect_equal( - result$table_body |> - dplyr::filter(variable == "SEX") |> - dplyr::pull(label), - c("M", "F") - ) - expect_equal( - result$table_body |> - dplyr::filter(variable == "RACE") |> - dplyr::pull(label), - c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "WHITE", "BLACK OR AFRICAN AMERICAN") - ) - expect_equal( - result$table_body |> - dplyr::filter(variable == "AETERM") |> - dplyr::pull(label), - c( - "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS", - "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", - "DIARRHOEA", "APPLICATION SITE PRURITUS", "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", - "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS" - ) - ) + expect_silent(result <- tbl_sort(tbl)) # ascending (A to Z) expect_silent(result <- tbl_sort(result, sort = "alphanumeric")) From 86b9898e6bf84c0f687767945cf111f1090a05e3 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 12 Feb 2025 20:42:47 -0500 Subject: [PATCH 12/25] testing --- R/sort_tbl_hierarchical.R | 189 +++++------------------------------ R/tbl_hierarchical.R | 12 ++- man/sort_tbl_hierarchical.Rd | 34 ++++--- 3 files changed, 56 insertions(+), 179 deletions(-) diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R index 7494da717..1d0449ca1 100644 --- a/R/sort_tbl_hierarchical.R +++ b/R/sort_tbl_hierarchical.R @@ -4,18 +4,14 @@ #' #' This function is used to sort hierarchical tables. Options for sorting criteria are: #' -#' 1. Descending - within each section of the hierarchy table, frequency sums are calculated for each row and rows are +#' 1. Descending - within each section of the hierarchy table, count sums are calculated for each row and rows are #' sorted in descending order by sum (default). #' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables #' in ascending alphanumeric order (i.e. A to Z). #' #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr -#' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. -#' @param sort (`string`)\cr -#' Specifies sorting to perform. Values must be one of `c("alphanumeric", "descending")`. Default is `"descending"`. -#' @param .stat (`string`)\cr -#' Statistic to use to calculate row sums when `sort = "descending"`. This statistic must be present in the table for -#' all hierarchy levels. Default is `"n"`. +#' a hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. +#' @inheritParams cards::ard_sort #' @inheritParams rlang::args_dots_empty #' #' @return A `gtsummary` of the same class as `x`. @@ -37,7 +33,7 @@ #' ) #' #' # Example 1 - Descending Frequency Sort ------------------ -#' tbl <- tbl_sort(tbl) +#' tbl <- tbl_sort(tbl, sort = "descending") #' tbl #' #' # Example 2 - Alphanumeric Sort -------------------------- @@ -56,170 +52,37 @@ tbl_sort <- function(x, ...) { #' @rdname sort_tbl_hierarchical #' @export -tbl_sort.tbl_hierarchical <- function(x, sort = "descending", .stat = "n", ...) { +tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { set_cli_abort_call() - # process and check inputs ---------------------------------------------------------------------- - check_string(.stat) + ard_args <- attributes(x$cards$tbl_hierarchical)$args + by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) - if (!sort %in% c("descending", "alphanumeric")) { - cli::cli_abort( - "The {.arg sort} argument must be either {.val descending} or {.val alphanumeric}.", - call = get_cli_abort_call() - ) - } - - overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable - outer_cols <- sapply( - x$table_body |> select(cards::all_ard_groups("names")), - function(x) dplyr::last(unique(stats::na.omit(x))) - ) - inner_col <- setdiff( - x$table_body$variable, - x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique() - ) - - # keep summary rows at the top of each sub-section - rep_str <- " " - - if (sort == "alphanumeric") { - # sort by label ------------------------------------------------------------------------------- - sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label") - - x$table_body <- x$table_body |> - dplyr::rowwise() |> - dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |> - dplyr::ungroup() |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |> - dplyr::arrange(across(all_of(sort_cols), ~ .x)) |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |> - select(-"inner_var") - } else { - # get row sums -------------------------------------------------------------------------------- - x <- .append_hierarchy_row_sums(x, .stat) - - # append outer hierarchy level sums in each row to sort at all levels ------------------------- - for (g in names(outer_cols)) { - x$table_body <- x$table_body |> dplyr::group_by(across(all_of(c(g, paste0(g, "_level")))), .add = TRUE) - x$table_body <- x$table_body |> - dplyr::left_join( - x$table_body |> - dplyr::summarize(!!paste0("sum_", g) := dplyr::first(.data$sum_row)), - by = x$table_body |> dplyr::group_vars() - ) - } - - # summary rows remain at the top of each sub-section - x$table_body <- x$table_body |> - dplyr::ungroup() |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |> - dplyr::rowwise() |> - dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |> - dplyr::ungroup() - - # sort by row sum ----------------------------------------------------------------------------- - sort_cols <- c(rbind( - x$table_body |> select(cards::all_ard_groups("names")) |> names(), - x$table_body |> select(starts_with("sum_group")) |> names(), - x$table_body |> select(cards::all_ard_groups("levels")) |> names() - ), "inner_var", "sum_row", "label") - - x$table_body <- x$table_body |> - dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x)) dplyr::desc(.x) else .x)) |> - dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |> - select(-starts_with("sum_"), -"inner_var") - } - - x -} - -.append_hierarchy_row_sums <- function(x, .stat) { - cards <- x$cards$tbl_hierarchical - - if (!.stat %in% cards$stat_name) { - cli::cli_abort( - "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid - statistic options see the {.val stat_name} column of {.code x$cards$tbl_hierarchical}.", - call = get_cli_abort_call() - ) - } - - by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA - outer_cols <- sapply( - x$table_body |> select(cards::all_ard_groups("names")), - function(x) dplyr::last(unique(stats::na.omit(x))) - ) - - # update logical variable_level entries from overall row to character - cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |> - dplyr::filter(.data$variable == "..ard_hierarchical_overall..") |> - dplyr::pull("label") |> - as.list() - - # extract row sums ------------------------------------------------------------------------------ - cards <- cards |> - dplyr::filter(.data$stat_name == .stat, .data$variable %in% x$table_body$variable) |> + # remove rows from ARD that do not correspond to a table row, append indices + x_sort_match <- x$cards$tbl_hierarchical |> + dplyr::filter(!is.na(group1)) |> dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> - dplyr::summarise(sum_row = sum(unlist(.data$stat))) |> + dplyr::mutate(idx_sort = dplyr::cur_group_id()) |> dplyr::ungroup() |> - dplyr::rename(label = "variable_level") |> - tidyr::unnest(cols = everything()) - - # match cards names to x$table_body ------------------------------------------------------------- - if (length(by_cols) > 1) { - names(cards)[grep("group", names(cards))] <- x$table_body |> - select(cards::all_ard_groups()) |> - names() - } - cards[cards$variable == "..ard_hierarchical_overall..", 1] <- "..ard_hierarchical_overall.." - - # fill in NAs to align cards layout with x$table_body ------------------------------------------- - cards <- cards |> - dplyr::rowwise() |> - dplyr::mutate(across( - cards::all_ard_groups(), - ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && .data$variable == outer_cols[dplyr::cur_column()]) { - .data$variable - } else if (is.na(.x) && .data$variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) { - .data$label - } else { - .x - } - )) + cards::as_card() + attr(x_sort_match, "args") <- ard_args - # for any variables not in include, calculate group sums ---------------------------------------- - if (!all(outer_cols %in% cards$variable)) { - gp_vars <- outer_cols[outer_cols %in% setdiff(outer_cols, cards$variable)] - gp_cols <- names(gp_vars) + # pull indices each corresponding to one row of x$table_body + pre_idx <- x_sort_match |> + dplyr::pull("idx_sort") |> + unique() - cli::cli_inform( - "Not all hierarchy variables present in the table were included in the {.arg include} argument. - These variables ({gp_vars}) do not have event rate data available so the total sum of the event - rates for this hierarchy section will be used instead. To use event rates for all sections of the table, - set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." - ) + # pull updated index order after sorting + post_idx <- x_sort_match |> + cards::ard_sort(sort) |> + dplyr::pull("idx_sort") |> + unique() - for (i in seq_along(gp_cols)) { - cards <- cards |> - dplyr::bind_rows( - cards |> - dplyr::filter(.data$variable != "..ard_hierarchical_overall..") |> - dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |> - dplyr::summarize(sum_row = sum(.data$sum_row)) |> - dplyr::mutate( - variable = .data[[gp_cols[i]]], - label = .data[[paste0(gp_cols[i], "_level")]] - ) - ) - } - } + # update x$cards + x$cards$tbl_hierarchical <- x$cards$tbl_hierarchical |> cards::ard_sort(sort) - # append row sums to x$table_body --------------------------------------------------------------- - x$table_body <- x$table_body |> - dplyr::left_join( - cards, - by = c(cards |> select(-"sum_row") |> names()) - ) + # update x$table_body according to updated (relative) row positions + x$table_body <- x$table_body[sapply(post_idx, function(x) which(pre_idx == x)), ] x } diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R index 908242541..03dfe9e29 100644 --- a/R/tbl_hierarchical.R +++ b/R/tbl_hierarchical.R @@ -326,6 +326,9 @@ internal_tbl_hierarchical <- function(data, # add the gtsummary column names to ARD data frame --------------------------- cards <- .add_gts_column_to_cards_hierarchical(cards, variables, by) + # sort ARD alphanumerically (the default table layout) + cards <- cards |> cards::ard_sort("alphanumeric") + # call bridge function here brdg_hierarchical( cards = cards, @@ -416,6 +419,8 @@ internal_tbl_hierarchical <- function(data, } .add_gts_column_to_cards_hierarchical <- function(cards, variables, by) { + args <- attributes(cards)$args + # adding the name of the column the stats will populate if (is_empty(by)) { cards$gts_column <- @@ -435,7 +440,12 @@ internal_tbl_hierarchical <- function(data, dplyr::mutate(gts_column = paste0("stat_", dplyr::cur_group_id())) } - cards |> + cards <- cards |> dplyr::ungroup() |> cards::as_card() + + # re-add dropped args attribute + attr(cards, "args") <- args + + cards } diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd index 45c165614..0e61cb7d4 100644 --- a/man/sort_tbl_hierarchical.Rd +++ b/man/sort_tbl_hierarchical.Rd @@ -8,24 +8,26 @@ \usage{ tbl_sort(x, ...) -\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...) +\method{tbl_sort}{tbl_hierarchical}(x, sort = "descending", ...) } \arguments{ \item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr -A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.} +a hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.} \item{...}{These dots are for future extensions and must be empty.} \item{sort}{(\code{string})\cr -Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.} - -\item{desc}{(scalar \code{logical})\cr -Whether to sort rows in ascending or descending order. Default is descending (\code{TRUE}) when \code{sort = "frequency"} -and ascending (\code{FALSE}) when \code{sort = "alphanumeric"}.} +type of sorting to perform. Value must be one of: +\itemize{ +\item \code{"alphanumeric"} - within each hierarchical section of the ARD, rows are ordered alphanumerically (i.e. A to Z) +by \code{variable_label} text. +\item \code{"descending"} - within each hierarchical section of the ARD, count sums are calculated for each row and rows are +sorted in descending order by sum. If \code{sort = "descending"}, the \code{n} statistic is used to calculate row sums if +included in \code{statistic} for all variables, otherwise \code{p} is used. If neither \code{n} nor \code{p} are present in \code{x} for +all variables, an error will occur. +} -\item{.stat}{(\code{string})\cr -Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for -all hierarchy levels. Default is \code{"n"}.} +Defaults to \code{"descending"}.} } \value{ A \code{gtsummary} of the same class as \code{x}. @@ -35,8 +37,8 @@ A \code{gtsummary} of the same class as \code{x}. This function is used to sort hierarchical tables. Options for sorting criteria are: \enumerate{ -\item Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are -ordered accordingly (default). +\item Descending - within each section of the hierarchy table, count sums are calculated for each row and rows are +sorted in descending order by sum (default). \item Alphanumeric - rows are ordered alphanumerically by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}} sorts tables in ascending alphanumeric order (i.e. A to Z). } @@ -56,10 +58,12 @@ tbl <- tbl_hierarchical( ) # Example 1 - Descending Frequency Sort ------------------ -tbl_sort(tbl) +tbl <- tbl_sort(tbl, sort = "descending") +tbl -# Example 2 - Descending Alphanumeric Sort (Z to A) ------ -tbl_sort(tbl, sort = "alphanumeric", desc = TRUE) +# Example 2 - Alphanumeric Sort -------------------------- +tbl <- tbl_sort(tbl, sort = "alphanumeric") +tbl \dontshow{\}) # examplesIf} } \seealso{ From cd204108278842a5131bcc323b4443e2204b919e Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 12 Feb 2025 22:05:03 -0500 Subject: [PATCH 13/25] Finish sort function, update tests --- R/sort_tbl_hierarchical.R | 88 ---------- R/tbl_sort.R | 162 ++++++++++++++++++ man/{sort_tbl_hierarchical.Rd => tbl_sort.Rd} | 13 +- .../{sort_tbl_hierarchical.md => tbl_sort.md} | 24 --- ...ort_tbl_hierarchical.R => test-tbl_sort.R} | 20 --- 5 files changed, 168 insertions(+), 139 deletions(-) delete mode 100644 R/sort_tbl_hierarchical.R create mode 100644 R/tbl_sort.R rename man/{sort_tbl_hierarchical.Rd => tbl_sort.Rd} (83%) rename tests/testthat/_snaps/{sort_tbl_hierarchical.md => tbl_sort.md} (87%) rename tests/testthat/{test-sort_tbl_hierarchical.R => test-tbl_sort.R} (91%) diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R deleted file mode 100644 index 1d0449ca1..000000000 --- a/R/sort_tbl_hierarchical.R +++ /dev/null @@ -1,88 +0,0 @@ -#' Sort Hierarchical Tables -#' -#' @description `r lifecycle::badge('experimental')`\cr -#' -#' This function is used to sort hierarchical tables. Options for sorting criteria are: -#' -#' 1. Descending - within each section of the hierarchy table, count sums are calculated for each row and rows are -#' sorted in descending order by sum (default). -#' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables -#' in ascending alphanumeric order (i.e. A to Z). -#' -#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr -#' a hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. -#' @inheritParams cards::ard_sort -#' @inheritParams rlang::args_dots_empty -#' -#' @return A `gtsummary` of the same class as `x`. -#' -#' @name sort_tbl_hierarchical -#' @seealso [tbl_filter()] -#' -#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true")) -#' ADAE_subset <- cards::ADAE |> -#' dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) -#' -#' tbl <- tbl_hierarchical( -#' data = ADAE_subset, -#' variables = c(SEX, RACE, AETERM), -#' by = TRTA, -#' denominator = cards::ADSL |> mutate(TRTA = ARM), -#' id = USUBJID, -#' overall_row = TRUE -#' ) -#' -#' # Example 1 - Descending Frequency Sort ------------------ -#' tbl <- tbl_sort(tbl, sort = "descending") -#' tbl -#' -#' # Example 2 - Alphanumeric Sort -------------------------- -#' tbl <- tbl_sort(tbl, sort = "alphanumeric") -#' tbl -NULL - -#' @rdname sort_tbl_hierarchical -#' @export -tbl_sort <- function(x, ...) { - check_not_missing(x) - check_class(x, "gtsummary") - - UseMethod("tbl_sort") -} - -#' @rdname sort_tbl_hierarchical -#' @export -tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { - set_cli_abort_call() - - ard_args <- attributes(x$cards$tbl_hierarchical)$args - by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) - - # remove rows from ARD that do not correspond to a table row, append indices - x_sort_match <- x$cards$tbl_hierarchical |> - dplyr::filter(!is.na(group1)) |> - dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> - dplyr::mutate(idx_sort = dplyr::cur_group_id()) |> - dplyr::ungroup() |> - cards::as_card() - attr(x_sort_match, "args") <- ard_args - - # pull indices each corresponding to one row of x$table_body - pre_idx <- x_sort_match |> - dplyr::pull("idx_sort") |> - unique() - - # pull updated index order after sorting - post_idx <- x_sort_match |> - cards::ard_sort(sort) |> - dplyr::pull("idx_sort") |> - unique() - - # update x$cards - x$cards$tbl_hierarchical <- x$cards$tbl_hierarchical |> cards::ard_sort(sort) - - # update x$table_body according to updated (relative) row positions - x$table_body <- x$table_body[sapply(post_idx, function(x) which(pre_idx == x)), ] - - x -} diff --git a/R/tbl_sort.R b/R/tbl_sort.R new file mode 100644 index 000000000..ecafd15a6 --- /dev/null +++ b/R/tbl_sort.R @@ -0,0 +1,162 @@ +#' Sort Hierarchical Tables +#' +#' @description `r lifecycle::badge('experimental')`\cr +#' +#' This function is used to sort hierarchical tables. Options for sorting criteria are: +#' +#' 1. Descending - within each section of the hierarchy table, event rate sums are calculated for each row and rows are +#' sorted in descending order by sum (default). +#' 2. Alphanumeric - rows are ordered alphanumerically (i.e. A to Z) by label text. By default, [tbl_hierarchical()] +#' sorts tables in alphanumeric order. +#' +#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr +#' a hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. +#' @inheritParams cards::ard_sort +#' @inheritParams rlang::args_dots_empty +#' +#' @return A `gtsummary` of the same class as `x`. +#' +#' @name tbl_sort +#' @seealso [tbl_filter()] +#' +#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true")) +#' ADAE_subset <- cards::ADAE |> +#' dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5]) +#' +#' tbl <- tbl_hierarchical( +#' data = ADAE_subset, +#' variables = c(SEX, RACE, AETERM), +#' by = TRTA, +#' denominator = cards::ADSL |> mutate(TRTA = ARM), +#' id = USUBJID, +#' overall_row = TRUE +#' ) +#' +#' # Example 1 - Descending Frequency Sort ------------------ +#' tbl <- tbl_sort(tbl) +#' tbl +#' +#' # Example 2 - Alphanumeric Sort -------------------------- +#' tbl <- tbl_sort(tbl, sort = "alphanumeric") +#' tbl +NULL + +#' @rdname tbl_sort +#' @export +tbl_sort <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + + UseMethod("tbl_sort") +} + +#' @rdname tbl_sort +#' @export +tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { + set_cli_abort_call() + + ard_args <- attributes(x$cards$tbl_hierarchical)$args + by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) + x_ard <- x$cards$tbl_hierarchical + + # add dummy rows for variables not in include so their label rows are sorted correctly + not_incl <- setdiff(ard_args$variables, ard_args$include) + if (length(not_incl) > 0) { + cli::cli_inform( + "Not all hierarchy variables present in the table were included in the {.arg include} argument. + These variables ({not_incl}) do not have event rate data available so the total sum of the event rates + for this hierarchy section will be used instead. To use true event rates for all sections of the table, + set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." + ) + + x_ard <- x_ard |> mutate(idx_o = seq_len(nrow(x$cards$tbl_hierarchical))) + for (v in not_incl) { + i <- length(ard_args$by) + which(ard_args$variables == v) + x_sum_rows <- x_ard |> + dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |> + dplyr::group_map(function(.df, .g) { + # get pseudo-summary row stat value for descending sort + if (sort == "descending") { + stat_nm <- setdiff(.df$stat_name, "N")[1] + sum <- .df |> + dplyr::filter(stat_name == !!stat_nm) |> + dplyr::summarize(s = sum(unlist(stat))) |> + dplyr::pull(s) + } + g_cur <- .g[[ncol(.g) - 1]] + if (!is.na(g_cur) && g_cur == v) { + # dummy summary row to add in + .df[1, ] |> mutate( + variable = g_cur, + variable_level = .g[[ncol(.g)]], + stat_name = if (sort == "descending") stat_nm else "no_stat", + stat = if (sort == "descending") list(sum) else list(0), + idx_o = min(.df$idx_o) + 1, + tmp = TRUE + ) + } else { + NULL + } + }, .keep = TRUE) + sum_row_pos <- dplyr::bind_rows(x_sum_rows) |> dplyr::pull(idx_o) + # adjust prior row indices to add in dummy summary rows + x_ard <- x_ard |> + dplyr::bind_rows(x_sum_rows) |> + dplyr::rowwise() |> + mutate(idx_o = .data$idx_o + sum(sum_row_pos > .data$idx_o)) |> + dplyr::ungroup() + } + x_ard <- x_ard |> + dplyr::arrange(idx_o, tmp) |> + select(-"idx_o") + } + + # add indices to ARD + x_ard <- x_ard |> + dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> + dplyr::mutate(idx_sort = dplyr::cur_group_id()) |> + dplyr::ungroup() + + # re-add dropped args attribute + x_ard <- x_ard |> cards::as_card() + attr(x_ard, "args") <- ard_args + + # get `by` variable count rows (do not correspond to a table row) + rm_idx <- x_ard |> + dplyr::filter(is.na(group1)) |> + dplyr::pull("idx_sort") |> + unique() + + # pull index order (each corresponding to one row of x$table_body) + pre_sort_idx <- x_ard |> + dplyr::pull("idx_sort") |> + unique() |> + setdiff(rm_idx) |> + as.character() + + # apply sorting + x_ard_sort <- x_ard |> cards::ard_sort(sort) + + # pull updated index order after sorting + post_sort_idx <- x_ard_sort |> + dplyr::pull("idx_sort") |> + unique() |> + setdiff(rm_idx) |> + as.character() + + # get updated (relative) row positions + idx <- (seq_len(length(pre_sort_idx)) |> stats::setNames(pre_sort_idx))[post_sort_idx] + + # update x$cards + if ("tmp" %in% names(x_ard_sort)) { + x_ard_sort <- x_ard_sort |> + dplyr::filter(is.na(tmp)) |> + select(-"tmp") + } + x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_sort") + + # update x$table_body + x$table_body <- x$table_body[idx, ] + + x +} diff --git a/man/sort_tbl_hierarchical.Rd b/man/tbl_sort.Rd similarity index 83% rename from man/sort_tbl_hierarchical.Rd rename to man/tbl_sort.Rd index 0e61cb7d4..12d7fe3b1 100644 --- a/man/sort_tbl_hierarchical.Rd +++ b/man/tbl_sort.Rd @@ -1,7 +1,6 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sort_tbl_hierarchical.R -\name{sort_tbl_hierarchical} -\alias{sort_tbl_hierarchical} +% Please edit documentation in R/tbl_sort.R +\name{tbl_sort} \alias{tbl_sort} \alias{tbl_sort.tbl_hierarchical} \title{Sort Hierarchical Tables} @@ -37,10 +36,10 @@ A \code{gtsummary} of the same class as \code{x}. This function is used to sort hierarchical tables. Options for sorting criteria are: \enumerate{ -\item Descending - within each section of the hierarchy table, count sums are calculated for each row and rows are +\item Descending - within each section of the hierarchy table, event rate sums are calculated for each row and rows are sorted in descending order by sum (default). -\item Alphanumeric - rows are ordered alphanumerically by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}} sorts tables -in ascending alphanumeric order (i.e. A to Z). +\item Alphanumeric - rows are ordered alphanumerically (i.e. A to Z) by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}} +sorts tables in alphanumeric order. } } \examples{ @@ -58,7 +57,7 @@ tbl <- tbl_hierarchical( ) # Example 1 - Descending Frequency Sort ------------------ -tbl <- tbl_sort(tbl, sort = "descending") +tbl <- tbl_sort(tbl) tbl # Example 2 - Alphanumeric Sort -------------------------- diff --git a/tests/testthat/_snaps/sort_tbl_hierarchical.md b/tests/testthat/_snaps/tbl_sort.md similarity index 87% rename from tests/testthat/_snaps/sort_tbl_hierarchical.md rename to tests/testthat/_snaps/tbl_sort.md index 07ff271ff..fc2d4d810 100644 --- a/tests/testthat/_snaps/sort_tbl_hierarchical.md +++ b/tests/testthat/_snaps/tbl_sort.md @@ -30,27 +30,3 @@ 24 AMERICAN INDIAN OR ALASKA NATIVE 0 (NA%) 1 (100%) 0 (NA%) 25 ERYTHEMA 0 (NA%) 1 (100%) 0 (NA%) -# tbl_sort.tbl_hierarchical() error messaging works - - Code - tbl_sort(data.frame()) - Condition - Error in `check_class()`: - ! The `x` argument must be class , not a data frame. - ---- - - Code - tbl_sort(tbl, sort = "no_sorting") - Condition - Error in `tbl_sort()`: - ! The `sort` argument must be either "descending" or "alphanumeric". - ---- - - Code - tbl_sort(tbl, .stat = "mean") - Condition - Error in `tbl_sort()`: - ! The `.stat` argument is "mean" but this statistic is not present in `x`. For all valid statistic options see the "stat_name" column of `x$cards$tbl_hierarchical`. - diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-tbl_sort.R similarity index 91% rename from tests/testthat/test-sort_tbl_hierarchical.R rename to tests/testthat/test-tbl_sort.R index 80e7068ab..0621a07f5 100644 --- a/tests/testthat/test-sort_tbl_hierarchical.R +++ b/tests/testthat/test-tbl_sort.R @@ -134,23 +134,3 @@ test_that("tbl_sort.tbl_hierarchical() works when some variables not included in expect_message(tbl_sort(tbl)) }) - -test_that("tbl_sort.tbl_hierarchical() error messaging works", { - # invalid x input - expect_snapshot( - tbl_sort(data.frame()), - error = TRUE - ) - - # invalid sort input - expect_snapshot( - tbl_sort(tbl, sort = "no_sorting"), - error = TRUE - ) - - # invalid .stat input - expect_snapshot( - tbl_sort(tbl, .stat = "mean"), - error = TRUE - ) -}) From 1b46407d5caf268ed192f65a015eddd3beaecff9 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 12 Feb 2025 22:06:52 -0500 Subject: [PATCH 14/25] I think this is not supposed to be missing? --- R/tbl_hierarchical.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R index 03dfe9e29..5290fab69 100644 --- a/R/tbl_hierarchical.R +++ b/R/tbl_hierarchical.R @@ -368,6 +368,7 @@ internal_tbl_hierarchical <- function(data, denominator = denominator, include = all_of(dplyr::nth(variables, -2)), statistic = statistic, + over_variables = overall_row, total_n = (is_empty(by) && length(include) == 1) ) From d7ccb1838e6816765a4e8bd797e7bd21951b8761 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 12 Feb 2025 22:32:47 -0500 Subject: [PATCH 15/25] Move sorting --- R/tbl_hierarchical.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R index 5290fab69..b3b6c9d03 100644 --- a/R/tbl_hierarchical.R +++ b/R/tbl_hierarchical.R @@ -326,9 +326,6 @@ internal_tbl_hierarchical <- function(data, # add the gtsummary column names to ARD data frame --------------------------- cards <- .add_gts_column_to_cards_hierarchical(cards, variables, by) - # sort ARD alphanumerically (the default table layout) - cards <- cards |> cards::ard_sort("alphanumeric") - # call bridge function here brdg_hierarchical( cards = cards, @@ -343,7 +340,7 @@ internal_tbl_hierarchical <- function(data, ) |> append( list( - cards = list(cards) |> stats::setNames(calling_fun), + cards = list(cards |> cards::ard_sort("alphanumeric")) |> stats::setNames(calling_fun), inputs = tbl_hierarchical_inputs ) ) |> From db75c331890271df1991778905a39bf9fe0aeaa5 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 12 Feb 2025 22:33:38 -0500 Subject: [PATCH 16/25] testing --- R/filter_tbl_hierarchical.R | 108 +++++++++++++++------------------ man/filter_tbl_hierarchical.Rd | 6 +- 2 files changed, 52 insertions(+), 62 deletions(-) diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R index 56b968041..3d221b4cc 100644 --- a/R/filter_tbl_hierarchical.R +++ b/R/filter_tbl_hierarchical.R @@ -37,10 +37,10 @@ #' ) #' #' # Example 1 - Row Sums > 10 ------------------ -#' tbl_filter(tbl, t = 10) +#' tbl_filter(tbl, sum(n) > 10) #' #' # Example 2 - Row Sums <= 5 ------------------ -#' tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE) +#' tbl_filter(tbl, sum(n) <= 5) NULL #' @rdname filter_tbl_hierarchical @@ -54,72 +54,62 @@ tbl_filter <- function(x, ...) { #' @export #' @rdname filter_tbl_hierarchical -tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...) { +tbl_filter.tbl_hierarchical <- function(x, filter, ...) { set_cli_abort_call() - # process and check inputs ---------------------------------------------------------------------- - check_numeric(t) - check_scalar_logical(gt) - check_scalar_logical(eq) - check_string(.stat) + ard_args <- attributes(x$cards$tbl_hierarchical)$args + by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) - outer_cols <- sapply( - x$table_body |> select(cards::all_ard_groups("names")), - function(x) dplyr::last(unique(stats::na.omit(x))) - ) + browser() + # add indices to ARD + x_ard <- x$cards$tbl_hierarchical |> + dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> + dplyr::mutate(idx_sort = dplyr::cur_group_id()) |> + dplyr::ungroup() |> + cards::as_card() - # get row sums ---------------------------------------------------------------------------------- - x <- .append_hierarchy_row_sums(x, .stat) + # re-add dropped args attribute + attr(x_ard, "args") <- ard_args - # keep all summary rows (removed later if no sub-rows are kept) - if (!gt) x$table_body$sum_row[x$table_body$variable %in% outer_cols] <- t - 1 + # get `by` variable count rows (do not correspond to a table row) + rm_idx <- x_ard |> + dplyr::filter(is.na(group1)) |> + dplyr::pull("idx_sort") |> + unique() - # create and apply filtering expression --------------------------------------------------------- - filt_expr <- paste( - "sum_row", - dplyr::case_when( - gt && eq ~ ">=", - !gt && eq ~ "<=", - !gt ~ "<", - TRUE ~ ">" - ), - t - ) - x$table_body <- x$table_body |> - dplyr::filter(!!parse_expr(filt_expr)) + # pull index order (each corresponding to one row of x$table_body) + pre_sort_idx <- x_ard |> + dplyr::pull("idx_sort") |> + unique() |> + setdiff(rm_idx) |> + as.character() - # remove any summary rows with no sub-rows still present ---------------------------------------- - if (!gt) { - for (i in rev(seq_along(outer_cols))) { - gp_empty <- x$table_body |> - dplyr::group_by(across(c(names(outer_cols[1:i]), paste0(names(outer_cols[1:i]), "_level")))) |> - dplyr::summarize(is_empty := dplyr::n() == 1) |> - stats::na.omit() + # apply sorting + x_ard_sort <- x_ard |> cards::ard_filter({{ filter }}) - if (!all(!gp_empty$is_empty)) { - x$table_body <- x$table_body |> - dplyr::left_join( - gp_empty, - by = gp_empty |> select(cards::all_ard_groups()) |> names() - ) |> - dplyr::filter(!is_empty | is.na(is_empty)) |> - dplyr::select(-"is_empty") - } else { - break - } - } - if (nrow(x$table_body) > 0) { - cli::cli_inform( - "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept - regardless of whether they meet the filtering criteria themselves.", - .frequency = "once", - .frequency_id = "sum_rows_lt" - ) - } - } + # pull updated index order after sorting + post_sort_idx <- x_ard_sort |> + dplyr::pull("idx_sort") |> + unique() |> + setdiff(rm_idx) |> + as.character() - x$table_body <- x$table_body |> - dplyr::select(-"sum_row") + # get updated (relative) row positions + idx <- (seq_len(length(pre_sort_idx)) |> stats::setNames(pre_sort_idx))[post_sort_idx] + + # update x$cards + x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_sort") + + # update x$table_body + x$table_body <- x$table_body[idx, ] x + # if (nrow(x$table_body) > 0) { + # cli::cli_inform( + # "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept + # regardless of whether they meet the filtering criteria themselves.", + # .frequency = "once", + # .frequency_id = "sum_rows_lt" + # ) + # } } diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd index a84a127f9..621d5a532 100644 --- a/man/filter_tbl_hierarchical.Rd +++ b/man/filter_tbl_hierarchical.Rd @@ -8,7 +8,7 @@ \usage{ tbl_filter(x, ...) -\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...) +\method{tbl_filter}{tbl_hierarchical}(x, filter, ...) } \arguments{ \item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr @@ -53,10 +53,10 @@ tbl <- tbl_hierarchical( ) # Example 1 - Row Sums > 10 ------------------ -tbl_filter(tbl, t = 10) +tbl_filter(tbl, sum(n) > 10) # Example 2 - Row Sums <= 5 ------------------ -tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE) +tbl_filter(tbl, sum(n) <= 5) \dontshow{\}) # examplesIf} } \seealso{ From 34aca1027f22f848c58e5ea15398e148400aadb6 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 12 Feb 2025 22:38:23 -0500 Subject: [PATCH 17/25] Rename --- R/{filter_tbl_hierarchical.R => tbl_filter.R} | 6 +- ...lter_tbl_hierarchical.Rd => tbl_filter.Rd} | 5 +- .../_snaps/filter_tbl_hierarchical.md | 59 ------------------- tests/testthat/_snaps/tbl_filter.md | 40 +++++++++++++ ...r_tbl_hierarchical.R => test-tbl_filter.R} | 0 5 files changed, 45 insertions(+), 65 deletions(-) rename R/{filter_tbl_hierarchical.R => tbl_filter.R} (97%) rename man/{filter_tbl_hierarchical.Rd => tbl_filter.Rd} (94%) delete mode 100644 tests/testthat/_snaps/filter_tbl_hierarchical.md create mode 100644 tests/testthat/_snaps/tbl_filter.md rename tests/testthat/{test-filter_tbl_hierarchical.R => test-tbl_filter.R} (100%) diff --git a/R/filter_tbl_hierarchical.R b/R/tbl_filter.R similarity index 97% rename from R/filter_tbl_hierarchical.R rename to R/tbl_filter.R index 3d221b4cc..03692c762 100644 --- a/R/filter_tbl_hierarchical.R +++ b/R/tbl_filter.R @@ -20,7 +20,7 @@ #' #' @return A `gtsummary` of the same class as `x`. #' -#' @name filter_tbl_hierarchical +#' @name tbl_filter #' @seealso [tbl_sort()] #' #' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true")) @@ -43,7 +43,7 @@ #' tbl_filter(tbl, sum(n) <= 5) NULL -#' @rdname filter_tbl_hierarchical +#' @rdname tbl_filter #' @export tbl_filter <- function(x, ...) { check_not_missing(x) @@ -53,7 +53,7 @@ tbl_filter <- function(x, ...) { } #' @export -#' @rdname filter_tbl_hierarchical +#' @rdname tbl_filter tbl_filter.tbl_hierarchical <- function(x, filter, ...) { set_cli_abort_call() diff --git a/man/filter_tbl_hierarchical.Rd b/man/tbl_filter.Rd similarity index 94% rename from man/filter_tbl_hierarchical.Rd rename to man/tbl_filter.Rd index 621d5a532..66dfdeb22 100644 --- a/man/filter_tbl_hierarchical.Rd +++ b/man/tbl_filter.Rd @@ -1,7 +1,6 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/filter_tbl_hierarchical.R -\name{filter_tbl_hierarchical} -\alias{filter_tbl_hierarchical} +% Please edit documentation in R/tbl_filter.R +\name{tbl_filter} \alias{tbl_filter} \alias{tbl_filter.tbl_hierarchical} \title{Filter Hierarchical Tables} diff --git a/tests/testthat/_snaps/filter_tbl_hierarchical.md b/tests/testthat/_snaps/filter_tbl_hierarchical.md deleted file mode 100644 index 237023eac..000000000 --- a/tests/testthat/_snaps/filter_tbl_hierarchical.md +++ /dev/null @@ -1,59 +0,0 @@ -# tbl_filter.tbl_hierarchical() works - - Code - as.data.frame(tbl) - Output - **Sex** \n    **Race** \n        **Reported Term for the Adverse Event** **Placebo** \nN = 86 **Xanomeline High Dose** \nN = 84 **Xanomeline Low Dose** \nN = 84 - 1 Number of patients with event 26 (30%) 42 (50%) 40 (48%) - 2 F 13 (25%) 18 (45%) 23 (46%) - 3 WHITE 10 (21%) 14 (41%) 20 (45%) - 4 APPLICATION SITE ERYTHEMA 2 (4.2%) 5 (15%) 5 (11%) - 5 APPLICATION SITE PRURITUS 2 (4.2%) 8 (24%) 10 (23%) - 6 ERYTHEMA 6 (13%) 6 (18%) 8 (18%) - 7 M 13 (39%) 24 (55%) 17 (50%) - 8 WHITE 12 (40%) 22 (55%) 17 (50%) - 9 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%) - 10 APPLICATION SITE PRURITUS 1 (3.3%) 12 (30%) 10 (29%) - 11 DIARRHOEA 6 (20%) 3 (7.5%) 2 (5.9%) - 12 ERYTHEMA 3 (10%) 5 (13%) 6 (18%) - -# tbl_filter.tbl_hierarchical() error messaging works - - Code - tbl_filter(data.frame(), t = 10) - Condition - Error in `check_class()`: - ! The `x` argument must be class , not a data frame. - ---- - - Code - tbl_filter(tbl, t = "10") - Condition - Error in `tbl_filter()`: - ! The `t` argument must be numeric. - ---- - - Code - tbl_filter(tbl, t = "10", gt = "yes") - Condition - Error in `tbl_filter()`: - ! The `t` argument must be numeric. - ---- - - Code - tbl_filter(tbl, t = "10", eq = "no") - Condition - Error in `tbl_filter()`: - ! The `t` argument must be numeric. - ---- - - Code - tbl_filter(tbl, t = "10", .stat = "pct") - Condition - Error in `tbl_filter()`: - ! The `t` argument must be numeric. - diff --git a/tests/testthat/_snaps/tbl_filter.md b/tests/testthat/_snaps/tbl_filter.md new file mode 100644 index 000000000..b9f41f6dc --- /dev/null +++ b/tests/testthat/_snaps/tbl_filter.md @@ -0,0 +1,40 @@ +# tbl_filter.tbl_hierarchical() error messaging works + + Code + tbl_filter(data.frame(), t = 10) + Condition + Error in `check_class()`: + ! The `x` argument must be class , not a data frame. + +--- + + Code + tbl_filter(tbl, t = "10") + Condition + Error in `tbl_filter()`: + ! `filter` must be an expression. + +--- + + Code + tbl_filter(tbl, t = "10", gt = "yes") + Condition + Error in `tbl_filter()`: + ! `filter` must be an expression. + +--- + + Code + tbl_filter(tbl, t = "10", eq = "no") + Condition + Error in `tbl_filter()`: + ! `filter` must be an expression. + +--- + + Code + tbl_filter(tbl, t = "10", .stat = "pct") + Condition + Error in `tbl_filter()`: + ! `filter` must be an expression. + diff --git a/tests/testthat/test-filter_tbl_hierarchical.R b/tests/testthat/test-tbl_filter.R similarity index 100% rename from tests/testthat/test-filter_tbl_hierarchical.R rename to tests/testthat/test-tbl_filter.R From dfa61bd0d80e6c33e9456dca66caf4ed80a3d28f Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 14 Feb 2025 16:56:00 -0500 Subject: [PATCH 18/25] test --- R/tbl_filter.R | 102 +++++++++++++++++++++++-------- R/tbl_hierarchical.R | 2 +- man/tbl_filter.Rd | 34 ++++++----- tests/testthat/test-tbl_filter.R | 6 +- 4 files changed, 98 insertions(+), 46 deletions(-) diff --git a/R/tbl_filter.R b/R/tbl_filter.R index 03692c762..24b25872d 100644 --- a/R/tbl_filter.R +++ b/R/tbl_filter.R @@ -2,22 +2,29 @@ #' #' @description `r lifecycle::badge('experimental')`\cr #' -#' This function is used to filter hierarchical table rows by frequency row sum. +#' This function is used to filter hierarchical table rows. #' #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr #' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. -#' @param t (scalar `numeric`)\cr -#' Threshold used to determine which rows will be retained. -#' @param gt (scalar `logical`)\cr -#' Whether to filter for row sums greater than `t` or less than `t`. Default is greater than (`gt = TRUE`). -#' @param eq (scalar `logical`)\cr -#' Whether to include the value of `t` in the filtered range, i.e. whether to use exclusive comparators (`>`, `<`) or -#' inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`. -#' @param .stat (`string`)\cr -#' Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. -#' Default is `"n"`. +#' @inheritParams cards::ard_sort #' @inheritParams rlang::args_dots_empty #' +#' @details +#' The `filter` argument can be used to filter out rows of a table which do not meet the criteria provided as an +#' expression. Rows can be filtered on the values of any of the possible statistics (`n`, `p`, and `N`) provided they +#' are included at least once in the table, as well as the values of any `by` variables. Filtering is only applied to +#' rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at +#' least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria +#' themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e. +#' across all `by` variable values) by using aggregate functions such as `sum()` and `mean()`. +#' +#' Some examples of possible filters: +#' - `filter = n > 5` +#' - `filter = n == 2 & p < 0.05` +#' - `filter = sum(n) > 4` +#' - `filter = mean(n) > 4 | n > 3` +#' - `filter = any(n > 2 & TRTA == "Xanomeline High Dose")` +#' #' @return A `gtsummary` of the same class as `x`. #' #' @name tbl_filter @@ -59,46 +66,89 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) { ard_args <- attributes(x$cards$tbl_hierarchical)$args by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) + x_ard <- x$cards$tbl_hierarchical + + # add dummy rows for variables not in include so their label rows are ordered correctly + not_incl <- setdiff(ard_args$variables, ard_args$include) + if (length(not_incl) > 0) { + x_ard <- x_ard |> mutate(idx_o = seq_len(nrow(x$cards$tbl_hierarchical))) + for (v in not_incl) { + i <- length(ard_args$by) + which(ard_args$variables == v) + x_sum_rows <- x_ard |> + dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |> + dplyr::group_map(function(.df, .g) { + g_cur <- .g[[ncol(.g) - 1]] + if (!is.na(g_cur) && g_cur == v) { + # dummy summary row to add in + .df[1, ] |> mutate( + variable = g_cur, + variable_level = .g[[ncol(.g)]], + stat_name = "no_stat", + stat = list(0), + idx_o = min(.df$idx_o) + 1, + tmp = TRUE + ) + } else { + NULL + } + }, .keep = TRUE) + sum_row_pos <- dplyr::bind_rows(x_sum_rows) |> dplyr::pull(idx_o) + # adjust prior row indices to add in dummy summary rows + x_ard <- x_ard |> + dplyr::bind_rows(x_sum_rows) |> + dplyr::rowwise() |> + mutate(idx_o = .data$idx_o + sum(sum_row_pos > .data$idx_o)) |> + dplyr::ungroup() + } + x_ard <- x_ard |> + dplyr::arrange(idx_o, tmp) |> + select(-"idx_o") + } - browser() # add indices to ARD - x_ard <- x$cards$tbl_hierarchical |> + x_ard <- x_ard |> dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> - dplyr::mutate(idx_sort = dplyr::cur_group_id()) |> - dplyr::ungroup() |> - cards::as_card() + dplyr::mutate(idx_filter = dplyr::cur_group_id()) |> + dplyr::ungroup() # re-add dropped args attribute + x_ard <- x_ard |> cards::as_card() attr(x_ard, "args") <- ard_args # get `by` variable count rows (do not correspond to a table row) rm_idx <- x_ard |> dplyr::filter(is.na(group1)) |> - dplyr::pull("idx_sort") |> + dplyr::pull("idx_filter") |> unique() # pull index order (each corresponding to one row of x$table_body) - pre_sort_idx <- x_ard |> - dplyr::pull("idx_sort") |> + pre_filter_idx <- x_ard |> + dplyr::pull("idx_filter") |> unique() |> setdiff(rm_idx) |> as.character() - # apply sorting - x_ard_sort <- x_ard |> cards::ard_filter({{ filter }}) + browser() + # apply filtering + x_ard_filter <- x_ard |> cards::ard_filter({{ filter }}) - # pull updated index order after sorting - post_sort_idx <- x_ard_sort |> - dplyr::pull("idx_sort") |> + # pull updated index order after filtering + post_filter_idx <- x_ard_filter |> + dplyr::pull("idx_filter") |> unique() |> setdiff(rm_idx) |> as.character() # get updated (relative) row positions - idx <- (seq_len(length(pre_sort_idx)) |> stats::setNames(pre_sort_idx))[post_sort_idx] + idx <- (seq_len(length(pre_filter_idx)) |> stats::setNames(pre_filter_idx))[post_filter_idx] # update x$cards - x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_sort") + if ("tmp" %in% names(x_ard_filter)) { + x_ard_filter <- x_ard_filter |> + dplyr::filter(is.na(tmp)) |> + select(-"tmp") + } + x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_filter") # update x$table_body x$table_body <- x$table_body[idx, ] diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R index b3b6c9d03..6ebcad68a 100644 --- a/R/tbl_hierarchical.R +++ b/R/tbl_hierarchical.R @@ -340,7 +340,7 @@ internal_tbl_hierarchical <- function(data, ) |> append( list( - cards = list(cards |> cards::ard_sort("alphanumeric")) |> stats::setNames(calling_fun), + cards = list(cards) |> stats::setNames(calling_fun), inputs = tbl_hierarchical_inputs ) ) |> diff --git a/man/tbl_filter.Rd b/man/tbl_filter.Rd index 66dfdeb22..640ec7264 100644 --- a/man/tbl_filter.Rd +++ b/man/tbl_filter.Rd @@ -14,20 +14,6 @@ tbl_filter(x, ...) A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.} \item{...}{These dots are for future extensions and must be empty.} - -\item{t}{(scalar \code{numeric})\cr -Threshold used to determine which rows will be retained.} - -\item{gt}{(scalar \code{logical})\cr -Whether to filter for row sums greater than \code{t} or less than \code{t}. Default is greater than (\code{gt = TRUE}).} - -\item{eq}{(scalar \code{logical})\cr -Whether to include the value of \code{t} in the filtered range, i.e. whether to use exclusive comparators (\code{>}, \code{<}) or -inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.} - -\item{.stat}{(\code{string})\cr -Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels. -Default is \code{"n"}.} } \value{ A \code{gtsummary} of the same class as \code{x}. @@ -35,7 +21,25 @@ A \code{gtsummary} of the same class as \code{x}. \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr -This function is used to filter hierarchical table rows by frequency row sum. +This function is used to filter hierarchical table rows. +} +\details{ +The \code{filter} argument can be used to filter out rows of a table which do not meet the criteria provided as an +expression. Rows can be filtered on the values of any of the possible statistics (\code{n}, \code{p}, and \code{N}) provided they +are included at least once in the table, as well as the values of any \code{by} variables. Filtering is only applied to +rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at +least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria +themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e. +across all \code{by} variable values) by using aggregate functions such as \code{sum()} and \code{mean()}. + +Some examples of possible filters: +\itemize{ +\item \code{filter = n > 5} +\item \code{filter = n == 2 & p < 0.05} +\item \code{filter = sum(n) > 4} +\item \code{filter = mean(n) > 4 | n > 3} +\item \code{filter = any(n > 2 & TRTA == "Xanomeline High Dose")} +} } \examples{ \dontshow{if ((identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/tests/testthat/test-tbl_filter.R b/tests/testthat/test-tbl_filter.R index 13e0906cc..c06ef0c9b 100644 --- a/tests/testthat/test-tbl_filter.R +++ b/tests/testthat/test-tbl_filter.R @@ -16,11 +16,9 @@ test_that("tbl_filter.tbl_hierarchical() works", { withr::local_options(width = 200) # no errors - expect_silent(tbl <- tbl_filter(tbl, t = 10)) + expect_silent(tbl <- tbl_filter(tbl, n > 10)) expect_snapshot(tbl |> as.data.frame()) - - # .stat argument works - expect_silent(tbl <- tbl_filter(tbl, t = 10, .stat = "p")) + expect_silent(tbl <- tbl_filter(tbl, p > 10)) }) test_that("tbl_filter.tbl_hierarchical(gt) works", { From a9d9f1942adf179a8028d3effc9cae2ff4229653 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 14 Feb 2025 17:19:26 -0500 Subject: [PATCH 19/25] Sort using x$table_body --- R/tbl_sort.R | 79 ++++++++++++++++++++-------------- tests/testthat/test-tbl_sort.R | 15 +++++-- 2 files changed, 57 insertions(+), 37 deletions(-) diff --git a/R/tbl_sort.R b/R/tbl_sort.R index ecafd15a6..04f5bc0d7 100644 --- a/R/tbl_sort.R +++ b/R/tbl_sort.R @@ -69,7 +69,6 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." ) - x_ard <- x_ard |> mutate(idx_o = seq_len(nrow(x$cards$tbl_hierarchical))) for (v in not_incl) { i <- length(ard_args$by) + which(ard_args$variables == v) x_sum_rows <- x_ard |> @@ -91,72 +90,86 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { variable_level = .g[[ncol(.g)]], stat_name = if (sort == "descending") stat_nm else "no_stat", stat = if (sort == "descending") list(sum) else list(0), - idx_o = min(.df$idx_o) + 1, tmp = TRUE ) } else { NULL } }, .keep = TRUE) - sum_row_pos <- dplyr::bind_rows(x_sum_rows) |> dplyr::pull(idx_o) - # adjust prior row indices to add in dummy summary rows - x_ard <- x_ard |> - dplyr::bind_rows(x_sum_rows) |> - dplyr::rowwise() |> - mutate(idx_o = .data$idx_o + sum(sum_row_pos > .data$idx_o)) |> - dplyr::ungroup() + + x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows) } - x_ard <- x_ard |> - dplyr::arrange(idx_o, tmp) |> - select(-"idx_o") } # add indices to ARD x_ard <- x_ard |> dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> - dplyr::mutate(idx_sort = dplyr::cur_group_id()) |> - dplyr::ungroup() + dplyr::mutate(idx_unsort = dplyr::cur_group_id()) + + gps <- x_ard |> + dplyr::group_keys() |> + dplyr::mutate(idx_unsort = dplyr::row_number()) |> + cards::as_card() |> + cards::rename_ard_groups_shift(shift = -1) |> + dplyr::filter(!variable %in% ard_args$by) |> + dplyr::rename(label = variable_level) + + overall_lbl <- x$table_body$label[x$table_body$variable == "..ard_hierarchical_overall.."] + if (length(overall_lbl) > 0) { + gps$label[gps$variable == "..ard_hierarchical_overall.."] <- overall_lbl + if (length(ard_args$variables) > 1) { + gps$group1[gps$variable == "..ard_hierarchical_overall.."] <- "..ard_hierarchical_overall.." + } + } + + # match structure of ARD grouping columns to x$table_body grouping columns + gps <- gps |> tidyr::unnest(everything()) + outer_cols <- if (length(ard_args$variables) > 1) { + ard_args$variables |> + utils::head(-1) |> + stats::setNames(paste0("group", seq_len(length(ard_args$variables) - 1))) + } else { + NULL + } + for (g in names(outer_cols)) { + which_g <- gps$variable == outer_cols[g] + gps[g][which_g, ] <- gps$variable[which_g] + gps[paste0(g, "_level")][which_g, ] <- gps$label[which_g] + } + x$table_body <- x$table_body |> dplyr::left_join(gps, by = names(gps) |> utils::head(-1)) # re-add dropped args attribute - x_ard <- x_ard |> cards::as_card() + x_ard <- x_ard |> + dplyr::ungroup() |> + cards::as_card() attr(x_ard, "args") <- ard_args # get `by` variable count rows (do not correspond to a table row) rm_idx <- x_ard |> dplyr::filter(is.na(group1)) |> - dplyr::pull("idx_sort") |> + dplyr::pull("idx_unsort") |> unique() - # pull index order (each corresponding to one row of x$table_body) - pre_sort_idx <- x_ard |> - dplyr::pull("idx_sort") |> - unique() |> - setdiff(rm_idx) |> - as.character() - # apply sorting x_ard_sort <- x_ard |> cards::ard_sort(sort) # pull updated index order after sorting - post_sort_idx <- x_ard_sort |> - dplyr::pull("idx_sort") |> + idx_sort <- x_ard_sort |> + dplyr::pull("idx_unsort") |> unique() |> - setdiff(rm_idx) |> - as.character() - - # get updated (relative) row positions - idx <- (seq_len(length(pre_sort_idx)) |> stats::setNames(pre_sort_idx))[post_sort_idx] + setdiff(rm_idx) - # update x$cards if ("tmp" %in% names(x_ard_sort)) { x_ard_sort <- x_ard_sort |> dplyr::filter(is.na(tmp)) |> select(-"tmp") } - x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_sort") + + # update x$cards + x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_unsort") # update x$table_body - x$table_body <- x$table_body[idx, ] + x$table_body <- x$table_body[match(idx_sort, x$table_body$idx_unsort), ] |> select(-"idx_unsort") x } diff --git a/tests/testthat/test-tbl_sort.R b/tests/testthat/test-tbl_sort.R index 0621a07f5..1923d7015 100644 --- a/tests/testthat/test-tbl_sort.R +++ b/tests/testthat/test-tbl_sort.R @@ -57,10 +57,17 @@ test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", { # ascending (A to Z) expect_silent(result <- tbl_sort(result, sort = "alphanumeric")) - # results match with tbl_hierarchical which sorts A to Z by default + # results are ordered correctly expect_equal( - result |> as.data.frame(), - tbl |> as.data.frame() + as.data.frame(result)[[1]], + c( + "Number of patients with event", "F", "BLACK OR AFRICAN AMERICAN", "APPLICATION SITE PRURITUS", + "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", "WHITE", "APPLICATION SITE ERYTHEMA", + "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "M", "AMERICAN INDIAN OR ALASKA NATIVE", "ERYTHEMA", + "BLACK OR AFRICAN AMERICAN", "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "WHITE", + "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", + "ERYTHEMA" + ) ) }) @@ -85,7 +92,7 @@ test_that("tbl_sort.tbl_hierarchical() works when there is no overall row in x", expect_silent(tbl_no_overall <- tbl_sort(tbl_no_overall, sort = "alphanumeric")) expect_equal( tbl_no_overall$table_body, - tbl$table_body[-1, ] + tbl_sort(tbl, sort = "alphanumeric")$table_body[-1, ] ) }) From 07646b7121e578ef21e165e193b026e7900d1d87 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Fri, 14 Feb 2025 17:27:06 -0500 Subject: [PATCH 20/25] temp --- R/tbl_filter.R | 92 ++++++++++++++++++----------- tests/testthat/_snaps/tbl_filter.md | 28 +-------- tests/testthat/test-tbl_filter.R | 46 +++++---------- 3 files changed, 72 insertions(+), 94 deletions(-) diff --git a/R/tbl_filter.R b/R/tbl_filter.R index 24b25872d..781ed1ec8 100644 --- a/R/tbl_filter.R +++ b/R/tbl_filter.R @@ -6,7 +6,7 @@ #' #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr #' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. -#' @inheritParams cards::ard_sort +#' @inheritParams cards::ard_filter #' @inheritParams rlang::args_dots_empty #' #' @details @@ -68,10 +68,16 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) { by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) x_ard <- x$cards$tbl_hierarchical - # add dummy rows for variables not in include so their label rows are ordered correctly + # add dummy rows for variables not in include so their label rows are filtered correctly not_incl <- setdiff(ard_args$variables, ard_args$include) if (length(not_incl) > 0) { - x_ard <- x_ard |> mutate(idx_o = seq_len(nrow(x$cards$tbl_hierarchical))) + cli::cli_inform( + "Not all hierarchy variables present in the table were included in the {.arg include} argument. + These variables ({not_incl}) do not have event rate data available so the total sum of the event rates + for this hierarchy section will be used instead. To use true event rates for all sections of the table, + set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." + ) + for (v in not_incl) { i <- length(ard_args$by) + which(ard_args$variables == v) x_sum_rows <- x_ard |> @@ -85,75 +91,89 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) { variable_level = .g[[ncol(.g)]], stat_name = "no_stat", stat = list(0), - idx_o = min(.df$idx_o) + 1, tmp = TRUE ) } else { NULL } }, .keep = TRUE) - sum_row_pos <- dplyr::bind_rows(x_sum_rows) |> dplyr::pull(idx_o) - # adjust prior row indices to add in dummy summary rows - x_ard <- x_ard |> - dplyr::bind_rows(x_sum_rows) |> - dplyr::rowwise() |> - mutate(idx_o = .data$idx_o + sum(sum_row_pos > .data$idx_o)) |> - dplyr::ungroup() + + x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows) } - x_ard <- x_ard |> - dplyr::arrange(idx_o, tmp) |> - select(-"idx_o") } # add indices to ARD x_ard <- x_ard |> dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> - dplyr::mutate(idx_filter = dplyr::cur_group_id()) |> - dplyr::ungroup() + dplyr::mutate(idx_nofilter = dplyr::cur_group_id()) + + gps <- x_ard |> + dplyr::group_keys() |> + dplyr::mutate(idx_nofilter = dplyr::row_number()) |> + cards::as_card() |> + cards::rename_ard_groups_shift(shift = -1) |> + dplyr::filter(!variable %in% ard_args$by) |> + dplyr::rename(label = variable_level) + + overall_lbl <- x$table_body$label[x$table_body$variable == "..ard_hierarchical_overall.."] + if (length(overall_lbl) > 0) { + gps$label[gps$variable == "..ard_hierarchical_overall.."] <- overall_lbl + if (length(ard_args$variables) > 1) { + gps$group1[gps$variable == "..ard_hierarchical_overall.."] <- "..ard_hierarchical_overall.." + } + } + + # match structure of ARD grouping columns to x$table_body grouping columns + gps <- gps |> tidyr::unnest(everything()) + outer_cols <- if (length(ard_args$variables) > 1) { + ard_args$variables |> + utils::head(-1) |> + stats::setNames(paste0("group", seq_len(length(ard_args$variables) - 1))) + } else { + NULL + } + for (g in names(outer_cols)) { + which_g <- gps$variable == outer_cols[g] + gps[g][which_g, ] <- gps$variable[which_g] + gps[paste0(g, "_level")][which_g, ] <- gps$label[which_g] + } + x$table_body <- x$table_body |> dplyr::left_join(gps, by = names(gps) |> utils::head(-1)) # re-add dropped args attribute - x_ard <- x_ard |> cards::as_card() + x_ard <- x_ard |> + dplyr::ungroup() |> + cards::as_card() attr(x_ard, "args") <- ard_args # get `by` variable count rows (do not correspond to a table row) rm_idx <- x_ard |> dplyr::filter(is.na(group1)) |> - dplyr::pull("idx_filter") |> + dplyr::pull("idx_nofilter") |> unique() - # pull index order (each corresponding to one row of x$table_body) - pre_filter_idx <- x_ard |> - dplyr::pull("idx_filter") |> - unique() |> - setdiff(rm_idx) |> - as.character() - - browser() # apply filtering x_ard_filter <- x_ard |> cards::ard_filter({{ filter }}) # pull updated index order after filtering - post_filter_idx <- x_ard_filter |> - dplyr::pull("idx_filter") |> + idx_filter <- x_ard_filter |> + dplyr::pull("idx_nofilter") |> unique() |> - setdiff(rm_idx) |> - as.character() - - # get updated (relative) row positions - idx <- (seq_len(length(pre_filter_idx)) |> stats::setNames(pre_filter_idx))[post_filter_idx] + setdiff(rm_idx) - # update x$cards if ("tmp" %in% names(x_ard_filter)) { x_ard_filter <- x_ard_filter |> dplyr::filter(is.na(tmp)) |> select(-"tmp") } - x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_filter") + + # update x$cards + x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter") # update x$table_body - x$table_body <- x$table_body[idx, ] + x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ] |> select(-"idx_nofilter") x + # if (nrow(x$table_body) > 0) { # cli::cli_inform( # "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept diff --git a/tests/testthat/_snaps/tbl_filter.md b/tests/testthat/_snaps/tbl_filter.md index b9f41f6dc..82b4ca8d0 100644 --- a/tests/testthat/_snaps/tbl_filter.md +++ b/tests/testthat/_snaps/tbl_filter.md @@ -1,7 +1,7 @@ # tbl_filter.tbl_hierarchical() error messaging works Code - tbl_filter(data.frame(), t = 10) + tbl_filter(data.frame(), sum(n) > 10) Condition Error in `check_class()`: ! The `x` argument must be class , not a data frame. @@ -9,31 +9,7 @@ --- Code - tbl_filter(tbl, t = "10") - Condition - Error in `tbl_filter()`: - ! `filter` must be an expression. - ---- - - Code - tbl_filter(tbl, t = "10", gt = "yes") - Condition - Error in `tbl_filter()`: - ! `filter` must be an expression. - ---- - - Code - tbl_filter(tbl, t = "10", eq = "no") - Condition - Error in `tbl_filter()`: - ! `filter` must be an expression. - ---- - - Code - tbl_filter(tbl, t = "10", .stat = "pct") + tbl_filter(tbl, 10) Condition Error in `tbl_filter()`: ! `filter` must be an expression. diff --git a/tests/testthat/test-tbl_filter.R b/tests/testthat/test-tbl_filter.R index c06ef0c9b..cad926ba8 100644 --- a/tests/testthat/test-tbl_filter.R +++ b/tests/testthat/test-tbl_filter.R @@ -16,17 +16,17 @@ test_that("tbl_filter.tbl_hierarchical() works", { withr::local_options(width = 200) # no errors - expect_silent(tbl <- tbl_filter(tbl, n > 10)) + expect_silent(tbl <- tbl_filter(tbl, sum(n) > 10)) expect_snapshot(tbl |> as.data.frame()) - expect_silent(tbl <- tbl_filter(tbl, p > 10)) + expect_silent(tbl <- tbl_filter(tbl, p > 0.05)) }) test_that("tbl_filter.tbl_hierarchical(gt) works", { # gt = TRUE - expect_silent(tbl_gt <- tbl_filter(tbl, t = 10)) + expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 10)) # gt = FALSE - expect_message(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE)) + expect_message(tbl_lt <- tbl_filter(tbl, sum(n) < 10)) expect_equal( dplyr::inner_join( @@ -56,22 +56,22 @@ test_that("tbl_filter.tbl_hierarchical(gt) works", { test_that("tbl_filter.tbl_hierarchical(eq) works", { # gt = TRUE, eq = FALSE - expect_silent(tbl_gt <- tbl_filter(tbl, t = 12)) + expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 12)) # gt = TRUE, eq = TRUE - expect_silent(tbl_geq <- tbl_filter(tbl, t = 12, eq = TRUE)) + expect_silent(tbl_geq <- tbl_filter(tbl, sum(n) >= 12)) expect_gt(nrow(tbl_geq$table_body), nrow(tbl_gt$table_body)) # gt = FALSE, eq = FALSE - expect_silent(tbl_lt <- tbl_filter(tbl, t = 12, gt = FALSE)) + expect_silent(tbl_lt <- tbl_filter(tbl, sum(n) < 12)) # gt = TRUE, eq = TRUE - expect_silent(tbl_leq <- tbl_filter(tbl, t = 12, gt = FALSE, eq = TRUE)) + expect_silent(tbl_leq <- tbl_filter(tbl, sum(n) <= 12)) expect_lt(nrow(tbl_lt$table_body), nrow(tbl_leq$table_body)) }) test_that("tbl_filter.tbl_hierarchical() returns empty table when all rows filtered out", { - expect_silent(tbl <- tbl_filter(tbl, t = 200)) + expect_silent(tbl <- tbl_filter(tbl, sum(n) > 200)) expect_equal(nrow(tbl$table_body), 0) }) @@ -85,7 +85,7 @@ test_that("tbl_filter.tbl_hierarchical() works with only one variable in x", { overall_row = TRUE ) - expect_silent(tbl_single <- tbl_filter(tbl_single, t = 20)) + expect_silent(tbl_single <- tbl_filter(tbl_single, sum(n) > 20)) expect_equal(nrow(tbl_single$table_body), 4) }) @@ -100,37 +100,19 @@ test_that("tbl_filter.tbl_hierarchical() works when some variables not included overall_row = TRUE ) - expect_message(tbl_filter(tbl, t = 10)) + expect_message(tbl_filter(tbl, sum(n) > 10)) }) test_that("tbl_filter.tbl_hierarchical() error messaging works", { # invalid x input expect_snapshot( - tbl_filter(data.frame(), t = 10), + tbl_filter(data.frame(), sum(n) > 10), error = TRUE ) - # invalid t input + # invalid filter input expect_snapshot( - tbl_filter(tbl, t = "10"), - error = TRUE - ) - - # invalid gt input - expect_snapshot( - tbl_filter(tbl, t = "10", gt = "yes"), - error = TRUE - ) - - # invalid eq input - expect_snapshot( - tbl_filter(tbl, t = "10", eq = "no"), - error = TRUE - ) - - # invalid .stat input - expect_snapshot( - tbl_filter(tbl, t = "10", .stat = "pct"), + tbl_filter(tbl, 10), error = TRUE ) }) From 0168a6db562df5b6bf634b9c1a8e22f497425207 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 18 Feb 2025 19:15:13 -0500 Subject: [PATCH 21/25] Fix filtering function, add parameter --- R/tbl_filter.R | 85 +++++++++++----------------- R/tbl_sort.R | 87 ++++++++++++++++------------- man/tbl_filter.Rd | 21 ++++--- tests/testthat/_snaps/tbl_filter.md | 22 ++++++++ 4 files changed, 117 insertions(+), 98 deletions(-) diff --git a/R/tbl_filter.R b/R/tbl_filter.R index 781ed1ec8..5095fe30a 100644 --- a/R/tbl_filter.R +++ b/R/tbl_filter.R @@ -2,10 +2,13 @@ #' #' @description `r lifecycle::badge('experimental')`\cr #' -#' This function is used to filter hierarchical table rows. +#' This function is used to filter hierarchical table rows. Filters are not applied to summary or overall rows. #' #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr #' A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`. +#' @param keep_empty_summary (scalar `logical`)\cr +#' Logical argument indicating whether to retain summary rows corresponding to table hierarchy sections that have had +#' all rows filtered out. Default is `TRUE`. #' @inheritParams cards::ard_filter #' @inheritParams rlang::args_dots_empty #' @@ -13,15 +16,15 @@ #' The `filter` argument can be used to filter out rows of a table which do not meet the criteria provided as an #' expression. Rows can be filtered on the values of any of the possible statistics (`n`, `p`, and `N`) provided they #' are included at least once in the table, as well as the values of any `by` variables. Filtering is only applied to -#' rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at -#' least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria -#' themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e. -#' across all `by` variable values) by using aggregate functions such as `sum()` and `mean()`. +#' rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows are kept +#' regardless of whether they meet the filtering criteria themselves. In addition to filtering on individual statistic +#' values, filters can be applied across the row (i.e. across all `by` variable values) by using aggregate functions +#' such as `sum()` and `mean()`. #' #' Some examples of possible filters: #' - `filter = n > 5` #' - `filter = n == 2 & p < 0.05` -#' - `filter = sum(n) > 4` +#' - `filter = sum(n) >= 4` #' - `filter = mean(n) > 4 | n > 3` #' - `filter = any(n > 2 & TRTA == "Xanomeline High Dose")` #' @@ -61,7 +64,7 @@ tbl_filter <- function(x, ...) { #' @export #' @rdname tbl_filter -tbl_filter.tbl_hierarchical <- function(x, filter, ...) { +tbl_filter.tbl_hierarchical <- function(x, filter, keep_empty_summary = TRUE, ...) { set_cli_abort_call() ard_args <- attributes(x$cards$tbl_hierarchical)$args @@ -69,38 +72,7 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) { x_ard <- x$cards$tbl_hierarchical # add dummy rows for variables not in include so their label rows are filtered correctly - not_incl <- setdiff(ard_args$variables, ard_args$include) - if (length(not_incl) > 0) { - cli::cli_inform( - "Not all hierarchy variables present in the table were included in the {.arg include} argument. - These variables ({not_incl}) do not have event rate data available so the total sum of the event rates - for this hierarchy section will be used instead. To use true event rates for all sections of the table, - set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." - ) - - for (v in not_incl) { - i <- length(ard_args$by) + which(ard_args$variables == v) - x_sum_rows <- x_ard |> - dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |> - dplyr::group_map(function(.df, .g) { - g_cur <- .g[[ncol(.g) - 1]] - if (!is.na(g_cur) && g_cur == v) { - # dummy summary row to add in - .df[1, ] |> mutate( - variable = g_cur, - variable_level = .g[[ncol(.g)]], - stat_name = "no_stat", - stat = list(0), - tmp = TRUE - ) - } else { - NULL - } - }, .keep = TRUE) - - x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows) - } - } + x_ard <- x_ard |> .append_not_incl(ard_args) # add indices to ARD x_ard <- x_ard |> @@ -160,26 +132,37 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) { unique() |> setdiff(rm_idx) + # apply filtering while retaining original row order + idx_filter <- intersect(x$table_body$idx_nofilter, idx_filter) + x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ] + if ("tmp" %in% names(x_ard_filter)) { x_ard_filter <- x_ard_filter |> dplyr::filter(is.na(tmp)) |> select(-"tmp") } - # update x$cards - x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter") + # remove summary rows from empty sections if requested + if (!keep_empty_summary) { + if (!dplyr::last(ard_args$variables) %in% x$table_body$variable) { + x$table_body <- x$table_body |> dplyr::filter(!variable %in% outer_cols) + x_ard_filter <- x_ard_filter |> dplyr::filter(!variable %in% outer_cols) + } else { + for (v in rev(outer_cols)) { + empty_rows <- x$table_body |> + dplyr::filter(variable == dplyr::lead(variable) & variable == v) |> + dplyr::pull("idx_nofilter") + x$table_body <- x$table_body |> dplyr::filter(!idx_nofilter %in% empty_rows) + x_ard_filter <- x_ard_filter |> dplyr::filter(!idx_nofilter %in% empty_rows) + } + } + } # update x$table_body - x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ] |> select(-"idx_nofilter") + x$table_body <- x$table_body |> select(-"idx_nofilter") - x + # update x$cards + x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter") - # if (nrow(x$table_body) > 0) { - # cli::cli_inform( - # "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept - # regardless of whether they meet the filtering criteria themselves.", - # .frequency = "once", - # .frequency_id = "sum_rows_lt" - # ) - # } + x } diff --git a/R/tbl_sort.R b/R/tbl_sort.R index 04f5bc0d7..0f2b6aa76 100644 --- a/R/tbl_sort.R +++ b/R/tbl_sort.R @@ -60,46 +60,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { x_ard <- x$cards$tbl_hierarchical # add dummy rows for variables not in include so their label rows are sorted correctly - not_incl <- setdiff(ard_args$variables, ard_args$include) - if (length(not_incl) > 0) { - cli::cli_inform( - "Not all hierarchy variables present in the table were included in the {.arg include} argument. - These variables ({not_incl}) do not have event rate data available so the total sum of the event rates - for this hierarchy section will be used instead. To use true event rates for all sections of the table, - set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." - ) - - for (v in not_incl) { - i <- length(ard_args$by) + which(ard_args$variables == v) - x_sum_rows <- x_ard |> - dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |> - dplyr::group_map(function(.df, .g) { - # get pseudo-summary row stat value for descending sort - if (sort == "descending") { - stat_nm <- setdiff(.df$stat_name, "N")[1] - sum <- .df |> - dplyr::filter(stat_name == !!stat_nm) |> - dplyr::summarize(s = sum(unlist(stat))) |> - dplyr::pull(s) - } - g_cur <- .g[[ncol(.g) - 1]] - if (!is.na(g_cur) && g_cur == v) { - # dummy summary row to add in - .df[1, ] |> mutate( - variable = g_cur, - variable_level = .g[[ncol(.g)]], - stat_name = if (sort == "descending") stat_nm else "no_stat", - stat = if (sort == "descending") list(sum) else list(0), - tmp = TRUE - ) - } else { - NULL - } - }, .keep = TRUE) - - x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows) - } - } + x_ard <- x_ard |> .append_not_incl(ard_args, sort) # add indices to ARD x_ard <- x_ard |> @@ -173,3 +134,49 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { x } + +.append_not_incl <- function(x, ard_args, sort = NULL) { + # add dummy rows for variables not in include so their label rows are sorted correctly + not_incl <- setdiff(ard_args$variables, ard_args$include) + if (length(not_incl) > 0) { + cli::cli_inform( + "Not all hierarchy variables present in the table were included in the {.arg include} argument. + These variables ({not_incl}) do not have event rate data available so the total sum of the event rates + for this hierarchy section will be used instead. To use true event rates for all sections of the table, + set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}." + ) + + for (v in not_incl) { + i <- length(ard_args$by) + which(ard_args$variables == v) + x_sum_rows <- x |> + dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |> + dplyr::group_map(function(.df, .g) { + # get pseudo-summary row stat value for descending sort + if (!is.null(sort) && sort == "descending") { + stat_nm <- setdiff(.df$stat_name, "N")[1] + sum <- .df |> + dplyr::filter(stat_name == !!stat_nm) |> + dplyr::summarize(s = sum(unlist(stat))) |> + dplyr::pull(s) + } + g_cur <- .g[[ncol(.g) - 1]] + if (!is.na(g_cur) && g_cur == v) { + # dummy summary row to add in + .df[1, ] |> mutate( + variable = g_cur, + variable_level = .g[[ncol(.g)]], + stat_name = if (!is.null(sort) && sort == "descending") stat_nm else "no_stat", + stat = if (!is.null(sort) && sort == "descending") list(sum) else list(0), + tmp = TRUE + ) + } else { + NULL + } + }, .keep = TRUE) + + x <- x |> dplyr::bind_rows(x_sum_rows) + } + } + + x +} diff --git a/man/tbl_filter.Rd b/man/tbl_filter.Rd index 640ec7264..0de42e998 100644 --- a/man/tbl_filter.Rd +++ b/man/tbl_filter.Rd @@ -7,13 +7,20 @@ \usage{ tbl_filter(x, ...) -\method{tbl_filter}{tbl_hierarchical}(x, filter, ...) +\method{tbl_filter}{tbl_hierarchical}(x, filter, keep_empty_summary = TRUE, ...) } \arguments{ \item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.} \item{...}{These dots are for future extensions and must be empty.} + +\item{filter}{(\code{expression})\cr an expression that is used to filter rows of the hierarchical ARD. See the Details +section below for more information.} + +\item{keep_empty_summary}{(scalar \code{logical})\cr +Logical argument indicating whether to retain summary rows corresponding to table hierarchy sections that have had +all rows filtered out. Default is \code{TRUE}.} } \value{ A \code{gtsummary} of the same class as \code{x}. @@ -21,22 +28,22 @@ A \code{gtsummary} of the same class as \code{x}. \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr -This function is used to filter hierarchical table rows. +This function is used to filter hierarchical table rows. Filters are not applied to summary or overall rows. } \details{ The \code{filter} argument can be used to filter out rows of a table which do not meet the criteria provided as an expression. Rows can be filtered on the values of any of the possible statistics (\code{n}, \code{p}, and \code{N}) provided they are included at least once in the table, as well as the values of any \code{by} variables. Filtering is only applied to -rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at -least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria -themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e. -across all \code{by} variable values) by using aggregate functions such as \code{sum()} and \code{mean()}. +rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows are kept +regardless of whether they meet the filtering criteria themselves. In addition to filtering on individual statistic +values, filters can be applied across the row (i.e. across all \code{by} variable values) by using aggregate functions +such as \code{sum()} and \code{mean()}. Some examples of possible filters: \itemize{ \item \code{filter = n > 5} \item \code{filter = n == 2 & p < 0.05} -\item \code{filter = sum(n) > 4} +\item \code{filter = sum(n) >= 4} \item \code{filter = mean(n) > 4 | n > 3} \item \code{filter = any(n > 2 & TRTA == "Xanomeline High Dose")} } diff --git a/tests/testthat/_snaps/tbl_filter.md b/tests/testthat/_snaps/tbl_filter.md index 82b4ca8d0..cdca373e9 100644 --- a/tests/testthat/_snaps/tbl_filter.md +++ b/tests/testthat/_snaps/tbl_filter.md @@ -1,3 +1,25 @@ +# tbl_filter.tbl_hierarchical() works + + Code + as.data.frame(tbl) + Output + **Sex** \n    **Race** \n        **Reported Term for the Adverse Event** **Placebo** \nN = 86 **Xanomeline High Dose** \nN = 84 **Xanomeline Low Dose** \nN = 84 + 1 Number of patients with event 26 (30%) 42 (50%) 40 (48%) + 2 F 13 (25%) 18 (45%) 23 (46%) + 3 BLACK OR AFRICAN AMERICAN 3 (60%) 4 (67%) 3 (50%) + 4 WHITE 10 (21%) 14 (41%) 20 (45%) + 5 APPLICATION SITE PRURITUS 2 (4.2%) 8 (24%) 10 (23%) + 6 ERYTHEMA 6 (13%) 6 (18%) 8 (18%) + 7 APPLICATION SITE ERYTHEMA 2 (4.2%) 5 (15%) 5 (11%) + 8 M 13 (39%) 24 (55%) 17 (50%) + 9 AMERICAN INDIAN OR ALASKA NATIVE 0 (NA%) 1 (100%) 0 (NA%) + 10 BLACK OR AFRICAN AMERICAN 1 (33%) 1 (33%) 0 (NA%) + 11 WHITE 12 (40%) 22 (55%) 17 (50%) + 12 APPLICATION SITE PRURITUS 1 (3.3%) 12 (30%) 10 (29%) + 13 DIARRHOEA 6 (20%) 3 (7.5%) 2 (5.9%) + 14 ERYTHEMA 3 (10%) 5 (13%) 6 (18%) + 15 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%) + # tbl_filter.tbl_hierarchical() error messaging works Code From 73861f16e70a24e8863357f97afd9f67a64fa972 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 18 Feb 2025 19:59:39 -0500 Subject: [PATCH 22/25] Update tests --- tests/testthat/_snaps/tbl_filter.md | 19 +++++++ tests/testthat/test-tbl_filter.R | 77 ++++++++++++++++++----------- 2 files changed, 66 insertions(+), 30 deletions(-) diff --git a/tests/testthat/_snaps/tbl_filter.md b/tests/testthat/_snaps/tbl_filter.md index cdca373e9..f1386cf8a 100644 --- a/tests/testthat/_snaps/tbl_filter.md +++ b/tests/testthat/_snaps/tbl_filter.md @@ -20,6 +20,25 @@ 14 ERYTHEMA 3 (10%) 5 (13%) 6 (18%) 15 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%) +# tbl_filter.tbl_hierarchical() works with various different filter conditions + + Code + as.data.frame(tbl_f) + Output + **Sex** \n    **Race** \n        **Reported Term for the Adverse Event** **Placebo** \nN = 86 **Xanomeline High Dose** \nN = 84 **Xanomeline Low Dose** \nN = 84 + 1 Number of patients with event 26 (30%) 42 (50%) 40 (48%) + 2 F 13 (25%) 18 (45%) 23 (46%) + 3 WHITE 10 (21%) 14 (41%) 20 (45%) + 4 APPLICATION SITE PRURITUS 2 (4.2%) 8 (24%) 10 (23%) + 5 ERYTHEMA 6 (13%) 6 (18%) 8 (18%) + 6 APPLICATION SITE ERYTHEMA 2 (4.2%) 5 (15%) 5 (11%) + 7 M 13 (39%) 24 (55%) 17 (50%) + 8 WHITE 12 (40%) 22 (55%) 17 (50%) + 9 APPLICATION SITE PRURITUS 1 (3.3%) 12 (30%) 10 (29%) + 10 DIARRHOEA 6 (20%) 3 (7.5%) 2 (5.9%) + 11 ERYTHEMA 3 (10%) 5 (13%) 6 (18%) + 12 APPLICATION SITE ERYTHEMA 1 (3.3%) 10 (25%) 7 (21%) + # tbl_filter.tbl_hierarchical() error messaging works Code diff --git a/tests/testthat/test-tbl_filter.R b/tests/testthat/test-tbl_filter.R index cad926ba8..2498d4e3a 100644 --- a/tests/testthat/test-tbl_filter.R +++ b/tests/testthat/test-tbl_filter.R @@ -17,28 +17,34 @@ test_that("tbl_filter.tbl_hierarchical() works", { # no errors expect_silent(tbl <- tbl_filter(tbl, sum(n) > 10)) + + # row order is retained expect_snapshot(tbl |> as.data.frame()) - expect_silent(tbl <- tbl_filter(tbl, p > 0.05)) }) -test_that("tbl_filter.tbl_hierarchical(gt) works", { - # gt = TRUE - expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 10)) +test_that("tbl_filter.tbl_hierarchical(keep_empty_summary) works", { + tbl2 <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AEBODSYS, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID + ) - # gt = FALSE - expect_message(tbl_lt <- tbl_filter(tbl, sum(n) < 10)) + # keep summary rows + expect_silent(tbl_f <- tbl_filter(tbl2, sum(n) > 10, keep_empty_summary = TRUE)) + expect_equal(nrow(tbl_f$table_body), 29) - expect_equal( - dplyr::inner_join( - tbl_gt$table_body, - tbl_lt$table_body, - by = names(tbl_gt$table_body) - ) |> - dplyr::filter(variable == "AETERM") |> - nrow(), - 0 - ) + # remove summary rows + expect_silent(tbl_f <- tbl_filter(tbl2, sum(n) > 10, keep_empty_summary = FALSE)) + expect_equal(nrow(tbl_f$table_body), 22) +}) + +test_that("tbl_filter.tbl_hierarchical() works with various different filter conditions", { + withr::local_options(width = 200) + expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 10)) + expect_silent(tbl_lt <- tbl_filter(tbl, sum(n) <= 10)) expect_equal( sum( tbl_gt$table_body |> @@ -52,27 +58,38 @@ test_that("tbl_filter.tbl_hierarchical(gt) works", { dplyr::filter(variable == "AETERM") |> nrow() ) -}) -test_that("tbl_filter.tbl_hierarchical(eq) works", { - # gt = TRUE, eq = FALSE - expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 12)) + expect_silent(tbl_f <- tbl_filter(tbl, n > 5)) + expect_equal(nrow(tbl_f$table_body), 14) - # gt = TRUE, eq = TRUE - expect_silent(tbl_geq <- tbl_filter(tbl, sum(n) >= 12)) - expect_gt(nrow(tbl_geq$table_body), nrow(tbl_gt$table_body)) + expect_silent(tbl_f <- tbl_filter(tbl, p > 0.05)) + expect_equal(nrow(tbl_f$table_body), 25) - # gt = FALSE, eq = FALSE - expect_silent(tbl_lt <- tbl_filter(tbl, sum(n) < 12)) + expect_silent(tbl_f <- tbl_filter(tbl, n == 2 & p < 0.05)) + expect_equal(nrow(tbl_f$table_body), 11) - # gt = TRUE, eq = TRUE - expect_silent(tbl_leq <- tbl_filter(tbl, sum(n) <= 12)) - expect_lt(nrow(tbl_lt$table_body), nrow(tbl_leq$table_body)) + expect_silent(tbl_f <- tbl_filter(tbl, mean(n) > 4 | n > 3)) + expect_equal(nrow(tbl_f$table_body), 15) + + expect_silent(tbl_f <- tbl_filter(tbl, any(n > 2 & TRTA == "Xanomeline High Dose"), keep_empty_summary = FALSE)) + expect_snapshot(tbl_f |> as.data.frame()) }) test_that("tbl_filter.tbl_hierarchical() returns empty table when all rows filtered out", { - expect_silent(tbl <- tbl_filter(tbl, sum(n) > 200)) - expect_equal(nrow(tbl$table_body), 0) + tbl2 <- tbl_hierarchical( + data = ADAE_subset, + variables = c(SEX, RACE, AETERM), + by = TRTA, + denominator = cards::ADSL |> mutate(TRTA = ARM), + id = USUBJID + ) + + expect_silent(tbl_f <- tbl_filter(tbl2, sum(n) > 200, keep_empty_summary = FALSE)) + expect_equal(nrow(tbl_f$table_body), 0) + + # overall row present + expect_silent(tbl_f <- tbl_filter(tbl, sum(n) > 200, keep_empty_summary = FALSE)) + expect_equal(nrow(tbl_f$table_body), 1) }) test_that("tbl_filter.tbl_hierarchical() works with only one variable in x", { From a9737e41930c46bb9c3654a21150e61b36dbfffc Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 18 Feb 2025 20:47:20 -0500 Subject: [PATCH 23/25] Fix checks --- R/tbl_filter.R | 88 ++++++++++++++------------------------------------ R/tbl_sort.R | 84 ++++++++++++++++++++++++++--------------------- 2 files changed, 73 insertions(+), 99 deletions(-) diff --git a/R/tbl_filter.R b/R/tbl_filter.R index 5095fe30a..2b065dfd6 100644 --- a/R/tbl_filter.R +++ b/R/tbl_filter.R @@ -71,56 +71,15 @@ tbl_filter.tbl_hierarchical <- function(x, filter, keep_empty_summary = TRUE, .. by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) x_ard <- x$cards$tbl_hierarchical - # add dummy rows for variables not in include so their label rows are filtered correctly - x_ard <- x_ard |> .append_not_incl(ard_args) - - # add indices to ARD - x_ard <- x_ard |> - dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> - dplyr::mutate(idx_nofilter = dplyr::cur_group_id()) - - gps <- x_ard |> - dplyr::group_keys() |> - dplyr::mutate(idx_nofilter = dplyr::row_number()) |> - cards::as_card() |> - cards::rename_ard_groups_shift(shift = -1) |> - dplyr::filter(!variable %in% ard_args$by) |> - dplyr::rename(label = variable_level) - - overall_lbl <- x$table_body$label[x$table_body$variable == "..ard_hierarchical_overall.."] - if (length(overall_lbl) > 0) { - gps$label[gps$variable == "..ard_hierarchical_overall.."] <- overall_lbl - if (length(ard_args$variables) > 1) { - gps$group1[gps$variable == "..ard_hierarchical_overall.."] <- "..ard_hierarchical_overall.." - } - } - - # match structure of ARD grouping columns to x$table_body grouping columns - gps <- gps |> tidyr::unnest(everything()) - outer_cols <- if (length(ard_args$variables) > 1) { - ard_args$variables |> - utils::head(-1) |> - stats::setNames(paste0("group", seq_len(length(ard_args$variables) - 1))) - } else { - NULL - } - for (g in names(outer_cols)) { - which_g <- gps$variable == outer_cols[g] - gps[g][which_g, ] <- gps$variable[which_g] - gps[paste0(g, "_level")][which_g, ] <- gps$label[which_g] - } - x$table_body <- x$table_body |> dplyr::left_join(gps, by = names(gps) |> utils::head(-1)) - - # re-add dropped args attribute - x_ard <- x_ard |> - dplyr::ungroup() |> - cards::as_card() - attr(x_ard, "args") <- ard_args + # add row indices to match structure of ard to x$table_body + reshape_x <- .reshape_ard_compare(x, x_ard, ard_args) + x <- reshape_x$x + x_ard <- reshape_x$x_ard # get `by` variable count rows (do not correspond to a table row) rm_idx <- x_ard |> - dplyr::filter(is.na(group1)) |> - dplyr::pull("idx_nofilter") |> + dplyr::filter(is.na(.data$group1)) |> + dplyr::pull("pre_idx") |> unique() # apply filtering @@ -128,41 +87,44 @@ tbl_filter.tbl_hierarchical <- function(x, filter, keep_empty_summary = TRUE, .. # pull updated index order after filtering idx_filter <- x_ard_filter |> - dplyr::pull("idx_nofilter") |> + dplyr::pull("pre_idx") |> unique() |> setdiff(rm_idx) # apply filtering while retaining original row order - idx_filter <- intersect(x$table_body$idx_nofilter, idx_filter) - x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ] + idx_filter <- intersect(x$table_body$pre_idx, idx_filter) + x$table_body <- x$table_body[match(idx_filter, x$table_body$pre_idx), ] if ("tmp" %in% names(x_ard_filter)) { x_ard_filter <- x_ard_filter |> - dplyr::filter(is.na(tmp)) |> + dplyr::filter(is.na(.data$tmp)) |> select(-"tmp") } # remove summary rows from empty sections if requested if (!keep_empty_summary) { - if (!dplyr::last(ard_args$variables) %in% x$table_body$variable) { - x$table_body <- x$table_body |> dplyr::filter(!variable %in% outer_cols) - x_ard_filter <- x_ard_filter |> dplyr::filter(!variable %in% outer_cols) - } else { - for (v in rev(outer_cols)) { - empty_rows <- x$table_body |> - dplyr::filter(variable == dplyr::lead(variable) & variable == v) |> - dplyr::pull("idx_nofilter") - x$table_body <- x$table_body |> dplyr::filter(!idx_nofilter %in% empty_rows) - x_ard_filter <- x_ard_filter |> dplyr::filter(!idx_nofilter %in% empty_rows) + if (length(ard_args$variables) > 1) { + outer_cols <- ard_args$variables |> utils::head(-1) + if (!dplyr::last(ard_args$variables) %in% x$table_body$variable) { + x$table_body <- x$table_body |> dplyr::filter(!.data$variable %in% outer_cols) + x_ard_filter <- x_ard_filter |> dplyr::filter(!.data$variable %in% outer_cols) + } else { + for (v in rev(outer_cols)) { + empty_rows <- x$table_body |> + dplyr::filter(.data$variable == dplyr::lead(.data$variable) & .data$variable == v) |> + dplyr::pull("pre_idx") + x$table_body <- x$table_body |> dplyr::filter(!.data$pre_idx %in% empty_rows) + x_ard_filter <- x_ard_filter |> dplyr::filter(!.data$pre_idx %in% empty_rows) + } } } } # update x$table_body - x$table_body <- x$table_body |> select(-"idx_nofilter") + x$table_body <- x$table_body |> select(-"pre_idx") # update x$cards - x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter") + x$cards$tbl_hierarchical <- x_ard_filter |> select(-"pre_idx") x } diff --git a/R/tbl_sort.R b/R/tbl_sort.R index 0f2b6aa76..2a84e070a 100644 --- a/R/tbl_sort.R +++ b/R/tbl_sort.R @@ -56,25 +56,64 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { set_cli_abort_call() ard_args <- attributes(x$cards$tbl_hierarchical)$args - by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) x_ard <- x$cards$tbl_hierarchical + # add row indices match structure of ard to x$table_body + reshape_x <- .reshape_ard_compare(x, x_ard, ard_args, sort) + x <- reshape_x$x + x_ard <- reshape_x$x_ard + + # get `by` variable count rows (do not correspond to a table row) + rm_idx <- x_ard |> + dplyr::filter(is.na(.data$group1)) |> + dplyr::pull("pre_idx") |> + unique() + + # apply sorting + x_ard_sort <- x_ard |> cards::ard_sort(sort) + + # pull updated index order after sorting + idx_sort <- x_ard_sort |> + dplyr::pull("pre_idx") |> + unique() |> + setdiff(rm_idx) + + if ("tmp" %in% names(x_ard_sort)) { + x_ard_sort <- x_ard_sort |> + dplyr::filter(is.na(.data$tmp)) |> + select(-"tmp") + } + + # update x$cards + x$cards$tbl_hierarchical <- x_ard_sort |> select(-"pre_idx") + + # update x$table_body + x$table_body <- x$table_body[match(idx_sort, x$table_body$pre_idx), ] |> select(-"pre_idx") + + x +} + +.reshape_ard_compare <- function(x, x_ard, ard_args, sort = NULL) { + by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level")) + # add dummy rows for variables not in include so their label rows are sorted correctly x_ard <- x_ard |> .append_not_incl(ard_args, sort) # add indices to ARD x_ard <- x_ard |> dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |> - dplyr::mutate(idx_unsort = dplyr::cur_group_id()) + dplyr::mutate(pre_idx = dplyr::cur_group_id()) + # get grouping structure gps <- x_ard |> dplyr::group_keys() |> - dplyr::mutate(idx_unsort = dplyr::row_number()) |> + dplyr::mutate(pre_idx = dplyr::row_number()) |> cards::as_card() |> cards::rename_ard_groups_shift(shift = -1) |> - dplyr::filter(!variable %in% ard_args$by) |> - dplyr::rename(label = variable_level) + dplyr::filter(!.data$variable %in% ard_args$by) |> + dplyr::rename(label = "variable_level") + # match overall row if present overall_lbl <- x$table_body$label[x$table_body$variable == "..ard_hierarchical_overall.."] if (length(overall_lbl) > 0) { gps$label[gps$variable == "..ard_hierarchical_overall.."] <- overall_lbl @@ -105,34 +144,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { cards::as_card() attr(x_ard, "args") <- ard_args - # get `by` variable count rows (do not correspond to a table row) - rm_idx <- x_ard |> - dplyr::filter(is.na(group1)) |> - dplyr::pull("idx_unsort") |> - unique() - - # apply sorting - x_ard_sort <- x_ard |> cards::ard_sort(sort) - - # pull updated index order after sorting - idx_sort <- x_ard_sort |> - dplyr::pull("idx_unsort") |> - unique() |> - setdiff(rm_idx) - - if ("tmp" %in% names(x_ard_sort)) { - x_ard_sort <- x_ard_sort |> - dplyr::filter(is.na(tmp)) |> - select(-"tmp") - } - - # update x$cards - x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_unsort") - - # update x$table_body - x$table_body <- x$table_body[match(idx_sort, x$table_body$idx_unsort), ] |> select(-"idx_unsort") - - x + list(x = x, x_ard = x_ard) } .append_not_incl <- function(x, ard_args, sort = NULL) { @@ -155,9 +167,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) { if (!is.null(sort) && sort == "descending") { stat_nm <- setdiff(.df$stat_name, "N")[1] sum <- .df |> - dplyr::filter(stat_name == !!stat_nm) |> - dplyr::summarize(s = sum(unlist(stat))) |> - dplyr::pull(s) + dplyr::filter(.data$stat_name == !!stat_nm) |> + dplyr::summarize(sum_stat = sum(unlist(.data$stat))) |> + dplyr::pull("sum_stat") } g_cur <- .g[[ncol(.g) - 1]] if (!is.na(g_cur) && g_cur == v) { From ae322a1b608a8ceeee5d1c20d0fdec96221e2785 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 18 Feb 2025 20:47:46 -0500 Subject: [PATCH 24/25] Add to author list --- DESCRIPTION | 2 ++ man/gtsummary-package.Rd | 1 + 2 files changed, 3 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 674e79f8f..587d9c48e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,6 +14,8 @@ Authors@R: c( comment = c(ORCID = "0000-0002-4683-1868")), person("Emily C.", "Zabor", role = "aut", comment = c(ORCID = "0000-0002-1402-4498")), + person("Emily", "de la Rua", , role = "aut", + comment = c(ORCID = "0009-0000-8738-5561")), person("Xing", "Bai", role = "ctb"), person("Esther", "Drill", role = "ctb", comment = c(ORCID = "0000-0002-3315-4538")), diff --git a/man/gtsummary-package.Rd b/man/gtsummary-package.Rd index c4d8eeb1e..44b7086a4 100644 --- a/man/gtsummary-package.Rd +++ b/man/gtsummary-package.Rd @@ -29,6 +29,7 @@ Authors: \item Jessica Lavery (\href{https://orcid.org/0000-0002-2746-5647}{ORCID}) \item Karissa Whiting (\href{https://orcid.org/0000-0002-4683-1868}{ORCID}) \item Emily C. Zabor (\href{https://orcid.org/0000-0002-1402-4498}{ORCID}) + \item Emily de la Rua (\href{https://orcid.org/0009-0000-8738-5561}{ORCID}) } Other contributors: From b8bac66303a9baf872d2953b0a0260368336609c Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Tue, 18 Feb 2025 21:00:50 -0500 Subject: [PATCH 25/25] Add test --- DESCRIPTION | 1 + R/tbl_hierarchical.R | 2 +- tests/testthat/_snaps/tbl_sort.md | 16 ++++++++++++++++ tests/testthat/test-tbl_sort.R | 14 ++++++++++++++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 587d9c48e..68d2210f2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -88,6 +88,7 @@ Suggests: testthat (>= 3.2.0), withr (>= 2.5.0), workflows (>= 0.2.4) +Remotes: insightsengineering/cards@301_sort_filter_ard_stack_hierarchical VignetteBuilder: knitr Config/Needs/check: hms diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R index 790bcca86..270b0521e 100644 --- a/R/tbl_hierarchical.R +++ b/R/tbl_hierarchical.R @@ -214,7 +214,7 @@ internal_tbl_hierarchical <- function(data, if ("..ard_hierarchical_overall.." %in% variables) { cli::cli_abort("The {.arg variables} argument cannot include a column named {.val ..ard_hierarchical_overall..}.") } - if (!all(variables == unique(variables))) { + if (length(variables) != length(unique(variables))) { cli::cli_abort("The {.arg variables} argument cannot contain repeated variables.") } diff --git a/tests/testthat/_snaps/tbl_sort.md b/tests/testthat/_snaps/tbl_sort.md index fc2d4d810..58388091c 100644 --- a/tests/testthat/_snaps/tbl_sort.md +++ b/tests/testthat/_snaps/tbl_sort.md @@ -30,3 +30,19 @@ 24 AMERICAN INDIAN OR ALASKA NATIVE 0 (NA%) 1 (100%) 0 (NA%) 25 ERYTHEMA 0 (NA%) 1 (100%) 0 (NA%) +# tbl_sort.tbl_hierarchical() error messaging works + + Code + tbl_sort(data.frame()) + Condition + Error in `check_class()`: + ! The `x` argument must be class , not a data frame. + +--- + + Code + tbl_sort(tbl, 10) + Condition + Error in `tbl_sort()`: + ! The `sort` argument must be a string, not a number. + diff --git a/tests/testthat/test-tbl_sort.R b/tests/testthat/test-tbl_sort.R index 1923d7015..9c32dd0ec 100644 --- a/tests/testthat/test-tbl_sort.R +++ b/tests/testthat/test-tbl_sort.R @@ -141,3 +141,17 @@ test_that("tbl_sort.tbl_hierarchical() works when some variables not included in expect_message(tbl_sort(tbl)) }) + +test_that("tbl_sort.tbl_hierarchical() error messaging works", { + # invalid x input + expect_snapshot( + tbl_sort(data.frame()), + error = TRUE + ) + + # invalid sort input + expect_snapshot( + tbl_sort(tbl, 10), + error = TRUE + ) +})