From ee4214a09b99a05e2b4c8a62b5a3dd962c64e228 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 5 Dec 2024 21:51:47 -0500
Subject: [PATCH 01/25] Add sorting function

---
 NAMESPACE                    |   2 +
 R/sort_tbl_hierarchical.R    | 196 +++++++++++++++++++++++++++++++++++
 man/sort_tbl_hierarchical.Rd |  55 ++++++++++
 3 files changed, 253 insertions(+)
 create mode 100644 R/sort_tbl_hierarchical.R
 create mode 100644 man/sort_tbl_hierarchical.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 21100d773..e0301b684 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -77,6 +77,7 @@ S3method(tbl_regression,multinom)
 S3method(tbl_regression,stanreg)
 S3method(tbl_regression,survreg)
 S3method(tbl_regression,workflow)
+S3method(tbl_sort,tbl_hierarchical)
 S3method(tbl_split,gtsummary)
 S3method(tbl_survfit,data.frame)
 S3method(tbl_survfit,list)
@@ -209,6 +210,7 @@ export(tbl_hierarchical_count)
 export(tbl_likert)
 export(tbl_merge)
 export(tbl_regression)
+export(tbl_sort)
 export(tbl_split)
 export(tbl_stack)
 export(tbl_strata)
diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
new file mode 100644
index 000000000..3c7e576dc
--- /dev/null
+++ b/R/sort_tbl_hierarchical.R
@@ -0,0 +1,196 @@
+#' Sort Hierarchical Tables
+#'
+#' @description `r lifecycle::badge('experimental')`\cr
+#'
+#' This function is used to sort hierarchical tables. Options for sorting criteria are:
+#'
+#' 1. Alphanumeric - rows are ordered alphanumerically by label text (default).
+#' 2. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
+#'    ordered accordingly.
+#'
+#' @param x (`tbl_hierarchical`)\cr
+#'   A hierarchical gtsummary table of class `'tbl_hierarchical'`.
+#' @param sort (`string`)\cr
+#'   Specifies sorting to perform. Values must be one of `c("alphanumeric", "frequency")`. Default is `"frequency"`.
+#' @param ascending (`logical`)\cr
+#'   Whether to sort rows in ascending or descending order. Default is descending (`ascending = FALSE`).
+#' @param .stat (`string`)\cr
+#'   Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all
+#'   hierarchy levels.
+#'
+#' @name sort_tbl_hierarchical
+#'
+#' @examples
+#' ADAE_subset <- cards::ADAE |>
+#'   dplyr::filter(
+#'     AESOC %in% unique(cards::ADAE$AESOC)[1:5],
+#'     AETERM %in% unique(cards::ADAE$AETERM)[1:5]
+#'   )
+#'
+#' tbl <- tbl_hierarchical(
+#'   data = ADAE_subset,
+#'   variables = c(SEX, RACE, AETERM),
+#'   by = TRTA,
+#'   denominator = cards::ADSL |> mutate(TRTA = ARM),
+#'   id = USUBJID,
+#'   include = AETERM,
+#'   overall_row = TRUE
+#' )
+#'
+#' tbl_sort(tbl, .stat = "N")
+NULL
+
+#' @rdname sort_tbl_hierarchical
+#' @export
+tbl_sort <- function(x, ...) {
+  check_not_missing(x)
+  check_class(x, "gtsummary")
+
+  UseMethod("tbl_sort")
+}
+
+#' @rdname sort_tbl_hierarchical
+#' @export
+tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", ascending = FALSE, .stat = "n") {
+  set_cli_abort_call()
+
+  # process and check inputs ---------------------------------------------------
+  check_logical(ascending)
+
+  if (!sort %in% c("frequency", "alphanumeric")) {
+    cli::cli_abort(
+      "The {.arg sort} argument must be either {.val frequency} or {.val alphanumeric}.",
+      call = get_cli_abort_call()
+    )
+  }
+
+  if (sort == "alphanumeric") {
+    sort_cols <- c(
+      x$table_body |> select(cards::all_ard_groups("names")) |> names(),
+      "label"
+    )
+
+    x$table_body <- x$table_body |>
+      arrange(across(sort_cols, ~ if (!ascending) desc(.x) else .x))
+  } else {
+    x <- .append_sort_counts(x, .stat)
+    u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
+    g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+
+    # assign counts for each hierarchy level
+    for (g in names(g_cols)) {
+      x$table_body <- x$table_body |>
+        dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE)
+
+      x$table_body <- x$table_body |>
+        left_join(
+          summarise(x$table_body, !!paste0("count_", g) := first(count_total)),
+          by = x$table_body |> dplyr::group_vars()
+        )
+    }
+    x$table_body <- x$table_body |>
+      dplyr::rowwise() |>
+      mutate(inner_var = if (.data$variable %in% u_cols) " " else .data$variable) |>
+      dplyr::ungroup()
+
+    # sort by counts ------------------------------------------------------------------------------
+    sort_cols <- c(rbind(
+      x$table_body |> select(cards::all_ard_groups("names")) |> names(),
+      x$table_body |> select(starts_with("count_group")) |> names(),
+      x$table_body |> select(cards::all_ard_groups("levels")) |> names()
+    ), "inner_var", "count_total", "label")
+
+    x$table_body <- x$table_body |>
+      arrange(across(sort_cols, ~ if (is.numeric(.x) && !ascending) desc(.x) else .x))
+  }
+
+  x
+}
+
+.append_sort_counts <- function(x, .stat) {
+  if (!.stat %in% x$cards$tbl_hierarchical$stat_name) {
+    cli::cli_abort(
+      "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid
+      statistic options see the {.val stat_name} column of {.code x$cards$tbl_hierarchical}.",
+      call = get_cli_abort_call()
+    )
+  }
+
+  cards <- x$cards$tbl_hierarchical
+  by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA
+  g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+
+  # fill in variable_level column of cards
+  cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |>
+    dplyr::filter(variable == "..ard_hierarchical_overall..") |>
+    dplyr::pull("label") |>
+    as.list()
+
+  # extract counts ------------------------------------------------------------------------------
+  cards <- cards |>
+    dplyr::filter(stat_name == .stat, variable %in% x$table_body$variable) |>
+    dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -by_cols))) |>
+    dplyr::summarise(count_total = sum(unlist(stat))) |>
+    dplyr::ungroup() |>
+    dplyr::rename(label = variable_level) |>
+    tidyr::unnest(cols = everything())
+
+  # match names to x$table_body
+  if (length(by_cols) > 1) {
+    names(cards)[grep("group", names(cards))] <- x$table_body |>
+      select(cards::all_ard_groups()) |>
+      names()
+  }
+  cards[cards$variable == "..ard_hierarchical_overall..", 1] <- "..ard_hierarchical_overall.."
+
+  # align cards layout with x$table_body --------------------------------------------------------
+  cards <- cards |>
+    dplyr::rowwise() |>
+    dplyr::mutate(across(
+      cards::all_ard_groups(),
+      ~ if (is.na(.x) && !grepl("_level", cur_column()) && variable == g_cols[cur_column()]) {
+        variable
+      } else if (is.na(.x) && variable %in% g_cols[gsub("_level", "", cur_column())]) {
+        label
+      } else {
+        .x
+      }
+    ))
+
+  # calculate total group sums for any variables not in include ---------------------------------
+  if (!all(g_cols %in% cards$variable)) {
+    gp_vars <- g_cols[g_cols %in% setdiff(g_cols, cards$variable)]
+    cli::cli_warn(
+      "Not all hierarchy variables present in the table were included in the {.arg include} argument.
+      These variables ({gp_vars}) do not have event rate data available so the total sum of the event
+      rates for this hierarchy section will be used instead. To use event rates to sort all sections of the table,
+      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}.",
+      call = get_cli_abort_call()
+    )
+
+    gp_cols <- names(gp_vars)
+    for (i in seq_along(gp_cols)) {
+      cards <- cards |>
+        dplyr::bind_rows(
+          cards |>
+            filter(variable != "..ard_hierarchical_overall..") |>
+            group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |>
+            summarize(count_total = sum(count_total)) |>
+            mutate(
+              variable = .data[[gp_cols[i]]],
+              label = .data[[paste0(gp_cols[i], "_level")]]
+            )
+        )
+    }
+  }
+
+  # add counts to x$table_body ------------------------------------------------------------------
+  x$table_body <- x$table_body |>
+    dplyr::left_join(
+      cards,
+      by = c(cards |> select(-"count_total") |> names())
+    ) |>
+    dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " ")))
+
+  x
+}
diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd
new file mode 100644
index 000000000..0619f94be
--- /dev/null
+++ b/man/sort_tbl_hierarchical.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sort_tbl_hierarchical.R
+\name{sort_tbl_hierarchical}
+\alias{sort_tbl_hierarchical}
+\alias{tbl_sort}
+\alias{tbl_sort.tbl_hierarchical}
+\title{Sort Hierarchical Tables}
+\usage{
+tbl_sort(x, ...)
+
+\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", ascending = FALSE, .stat = "n")
+}
+\arguments{
+\item{x}{(\code{tbl_hierarchical})\cr
+A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.}
+
+\item{sort}{(\code{string})\cr
+Specifies sorting to perform. Values must be one of \code{c("alphanumeric", "frequency")}. Default is \code{"frequency"}.}
+
+\item{ascending}{(\code{logical})\cr
+Whether to sort rows in ascending or descending order. Default is descending (\code{ascending = FALSE}).}
+
+\item{.stat}{(\code{string})\cr
+Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all
+hierarchy levels.}
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
+
+This function is used to sort hierarchical tables. Options for sorting criteria are:
+\enumerate{
+\item Alphanumeric - rows are ordered alphanumerically by label text (default).
+\item Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
+ordered accordingly.
+}
+}
+\examples{
+ADAE_subset <- cards::ADAE |>
+  dplyr::filter(
+    AESOC \%in\% unique(cards::ADAE$AESOC)[1:5],
+    AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]
+  )
+
+tbl <- tbl_hierarchical(
+  data = ADAE_subset,
+  variables = c(SEX, RACE, AETERM),
+  by = TRTA,
+  denominator = cards::ADSL |> mutate(TRTA = ARM),
+  id = USUBJID,
+  include = AETERM,
+  overall_row = TRUE
+)
+
+tbl_sort(tbl, .stat = "N")
+}

From a9e19ec5aa58b4b55c4f57a29fde4b3f51204173 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Thu, 5 Dec 2024 22:07:53 -0500
Subject: [PATCH 02/25] Clean up documentation

---
 R/sort_tbl_hierarchical.R    | 15 +++++++--------
 man/sort_tbl_hierarchical.Rd |  9 ++++-----
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
index 3c7e576dc..c8a8e5ed2 100644
--- a/R/sort_tbl_hierarchical.R
+++ b/R/sort_tbl_hierarchical.R
@@ -12,8 +12,8 @@
 #'   A hierarchical gtsummary table of class `'tbl_hierarchical'`.
 #' @param sort (`string`)\cr
 #'   Specifies sorting to perform. Values must be one of `c("alphanumeric", "frequency")`. Default is `"frequency"`.
-#' @param ascending (`logical`)\cr
-#'   Whether to sort rows in ascending or descending order. Default is descending (`ascending = FALSE`).
+#' @param desc (scalar `logical`)\cr
+#'   Whether to sort rows in ascending or descending order. Default is descending (`desc = TRUE`).
 #' @param .stat (`string`)\cr
 #'   Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all
 #'   hierarchy levels.
@@ -33,11 +33,10 @@
 #'   by = TRTA,
 #'   denominator = cards::ADSL |> mutate(TRTA = ARM),
 #'   id = USUBJID,
-#'   include = AETERM,
 #'   overall_row = TRUE
 #' )
 #'
-#' tbl_sort(tbl, .stat = "N")
+#' tbl_sort(tbl)
 NULL
 
 #' @rdname sort_tbl_hierarchical
@@ -51,11 +50,11 @@ tbl_sort <- function(x, ...) {
 
 #' @rdname sort_tbl_hierarchical
 #' @export
-tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", ascending = FALSE, .stat = "n") {
+tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat = "n") {
   set_cli_abort_call()
 
   # process and check inputs ---------------------------------------------------
-  check_logical(ascending)
+  check_scalar_logical(desc)
 
   if (!sort %in% c("frequency", "alphanumeric")) {
     cli::cli_abort(
@@ -71,7 +70,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", ascending = FALSE,
     )
 
     x$table_body <- x$table_body |>
-      arrange(across(sort_cols, ~ if (!ascending) desc(.x) else .x))
+      arrange(across(sort_cols, ~ if (desc) desc(.x) else .x))
   } else {
     x <- .append_sort_counts(x, .stat)
     u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
@@ -101,7 +100,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", ascending = FALSE,
     ), "inner_var", "count_total", "label")
 
     x$table_body <- x$table_body |>
-      arrange(across(sort_cols, ~ if (is.numeric(.x) && !ascending) desc(.x) else .x))
+      arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) desc(.x) else .x))
   }
 
   x
diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd
index 0619f94be..dd66b6f96 100644
--- a/man/sort_tbl_hierarchical.Rd
+++ b/man/sort_tbl_hierarchical.Rd
@@ -8,7 +8,7 @@
 \usage{
 tbl_sort(x, ...)
 
-\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", ascending = FALSE, .stat = "n")
+\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = FALSE, .stat = "n")
 }
 \arguments{
 \item{x}{(\code{tbl_hierarchical})\cr
@@ -17,8 +17,8 @@ A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.}
 \item{sort}{(\code{string})\cr
 Specifies sorting to perform. Values must be one of \code{c("alphanumeric", "frequency")}. Default is \code{"frequency"}.}
 
-\item{ascending}{(\code{logical})\cr
-Whether to sort rows in ascending or descending order. Default is descending (\code{ascending = FALSE}).}
+\item{desc}{(scalar \code{logical})\cr
+Whether to sort rows in ascending or descending order. Default is descending (\code{desc = TRUE}).}
 
 \item{.stat}{(\code{string})\cr
 Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all
@@ -47,9 +47,8 @@ tbl <- tbl_hierarchical(
   by = TRTA,
   denominator = cards::ADSL |> mutate(TRTA = ARM),
   id = USUBJID,
-  include = AETERM,
   overall_row = TRUE
 )
 
-tbl_sort(tbl, .stat = "N")
+tbl_sort(tbl)
 }

From 1585a63d3fe8cd48c0709f67310a410173cd053c Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 6 Dec 2024 00:39:21 -0500
Subject: [PATCH 03/25] Add filtering function

---
 NAMESPACE                      |   2 +
 R/filter_tbl_hierarchical.R    | 114 +++++++++++++++++++++++++++++++++
 man/filter_tbl_hierarchical.Rd |  57 +++++++++++++++++
 3 files changed, 173 insertions(+)
 create mode 100644 R/filter_tbl_hierarchical.R
 create mode 100644 man/filter_tbl_hierarchical.Rd

diff --git a/NAMESPACE b/NAMESPACE
index e0301b684..d08955540 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -62,6 +62,7 @@ S3method(plot,tbl_regression)
 S3method(plot,tbl_uvregression)
 S3method(print,gtsummary)
 S3method(print,tbl_split)
+S3method(tbl_filter,tbl_hierarchical)
 S3method(tbl_regression,brmsfit)
 S3method(tbl_regression,crr)
 S3method(tbl_regression,default)
@@ -205,6 +206,7 @@ export(tbl_butcher)
 export(tbl_continuous)
 export(tbl_cross)
 export(tbl_custom_summary)
+export(tbl_filter)
 export(tbl_hierarchical)
 export(tbl_hierarchical_count)
 export(tbl_likert)
diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R
new file mode 100644
index 000000000..9c9bca108
--- /dev/null
+++ b/R/filter_tbl_hierarchical.R
@@ -0,0 +1,114 @@
+#' Filter Hierarchical Tables
+#'
+#' @description `r lifecycle::badge('experimental')`\cr
+#'
+#' This function is used to filter hierarchical tables by total row frequencies.
+#'
+#' @param x (`tbl_hierarchical`)\cr
+#'   A hierarchical gtsummary table of class `'tbl_hierarchical'`.
+#' @param t (scalar `numeric`)\cr
+#'   Threshold used to determine which values will be retained.
+#' @param gt (scalar `logical`)\cr
+#'   Whether to filter for row sums greater than `t` or less than `t`. Default is greater than (`gt = TRUE`).
+#' @param eq (scalar `logical`)\cr
+#'   Whether to include the value of `t` in the filtered range, i.e. whether to use exclusive comparators (`>`, `<`) or
+#'   inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`.
+#' @param .stat (`string`)\cr
+#'   Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all
+#'   hierarchy levels.
+#' @name filter_tbl_hierarchical
+#'
+#' @examples
+#' ADAE_subset <- cards::ADAE |>
+#'   dplyr::filter(
+#'     AESOC %in% unique(cards::ADAE$AESOC)[1:5],
+#'     AETERM %in% unique(cards::ADAE$AETERM)[1:5]
+#'   )
+#'
+#' tbl <- tbl_hierarchical(
+#'   data = ADAE_subset,
+#'   variables = c(SEX, RACE, AETERM),
+#'   by = TRTA,
+#'   denominator = cards::ADSL |> mutate(TRTA = ARM),
+#'   id = USUBJID,
+#'   overall_row = TRUE
+#' )
+#'
+#' # Example 1 - Row Sums > 10 ------------------
+#' tbl_filter(tbl, t = 10)
+#'
+#' # Example 2 - Row Sums <= 5 ------------------
+#' tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE)
+NULL
+
+#' @rdname filter_tbl_hierarchical
+#' @export
+tbl_filter <- function(x, ...) {
+  check_not_missing(x)
+  check_class(x, "gtsummary")
+
+  UseMethod("tbl_filter")
+}
+
+#' @export
+#' @rdname filter_tbl_hierarchical
+tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n") {
+  set_cli_abort_call()
+
+  # process and check inputs ---------------------------------------------------
+  check_numeric(t)
+  check_scalar_logical(gt)
+  check_scalar_logical(eq)
+
+  x <- .append_hierarchy_row_sums(x, .stat)
+  g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+  if (!gt) x$table_body$count_total[x$table_body$variable %in% g_cols] <- t - 1
+
+  filt_expr <- paste(
+    "count_total",
+    dplyr::case_when(
+      gt && eq ~ ">=",
+      !gt && eq ~ "<=",
+      !gt ~ "<",
+      TRUE ~ ">"
+    ),
+    t
+  )
+
+  x$table_body <- x$table_body |>
+    dplyr::filter(!!parse_expr(filt_expr))
+
+  # remove summary rows with no sub-rows still present -------------------------
+  if (!gt) {
+    for (i in rev(seq_along(g_cols))) {
+      empty_gps <- x$table_body |>
+        dplyr::group_by(across(c(names(g_cols[1:i]), paste0(names(g_cols[1:i]), "_level")))) |>
+        dplyr::summarize(empty_gp := dplyr::n() == 1) |>
+        na.omit()
+
+      if (!all(!empty_gps$empty_gp)) {
+        cli::cli_inform(
+          "For readability, any summary row that supercedes a row that meets the filtering criteria will be kept
+          regardless of whether it meets the filtering criteria itself.",
+          .frequency = "once",
+          .frequency_id = "hierarchy_filter_lt"
+        )
+
+        x$table_body <- x$table_body |>
+          dplyr::left_join(
+            empty_gps,
+            by = empty_gps |> select(cards::all_ard_groups()) |> names()
+          ) |>
+          dplyr::filter(!empty_gp | is.na(empty_gp)) |>
+          dplyr::select(-"empty_gp")
+      } else {
+        break
+      }
+    }
+  }
+
+  x$table_body <- x$table_body |>
+    dplyr::select(-"count_total")
+
+  x
+}
diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd
new file mode 100644
index 000000000..9696b1091
--- /dev/null
+++ b/man/filter_tbl_hierarchical.Rd
@@ -0,0 +1,57 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filter_tbl_hierarchical.R
+\name{filter_tbl_hierarchical}
+\alias{filter_tbl_hierarchical}
+\alias{tbl_filter}
+\alias{tbl_filter.tbl_hierarchical}
+\title{Filter Hierarchical Tables}
+\usage{
+tbl_filter(x, ...)
+
+\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n")
+}
+\arguments{
+\item{x}{(\code{tbl_hierarchical})\cr
+A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.}
+
+\item{t}{(scalar \code{numeric})\cr
+Threshold used to determine which values will be retained.}
+
+\item{gt}{(scalar \code{logical})\cr
+Whether to filter for row sums greater than \code{t} or less than \code{t}. Default is greater than (\code{gt = TRUE}).}
+
+\item{eq}{(scalar \code{logical})\cr
+Whether to include the value of \code{t} in the filtered range, i.e. whether to use exclusive comparators (\code{>}, \code{<}) or
+inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.}
+
+\item{.stat}{(\code{string})\cr
+Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all
+hierarchy levels.}
+}
+\description{
+\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
+
+This function is used to filter hierarchical tables by total row frequencies.
+}
+\examples{
+ADAE_subset <- cards::ADAE |>
+  dplyr::filter(
+    AESOC \%in\% unique(cards::ADAE$AESOC)[1:5],
+    AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]
+  )
+
+tbl <- tbl_hierarchical(
+  data = ADAE_subset,
+  variables = c(SEX, RACE, AETERM),
+  by = TRTA,
+  denominator = cards::ADSL |> mutate(TRTA = ARM),
+  id = USUBJID,
+  overall_row = TRUE
+)
+
+# Example 1 - Row Sums > 10 ------------------
+tbl_filter(tbl, t = 10)
+
+# Example 2 - Row Sums <= 5 ------------------
+tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE)
+}

From 6c28907358250f0003a9e3bb9a7d1edff72f83e7 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 6 Dec 2024 00:40:17 -0500
Subject: [PATCH 04/25] Clean up

---
 R/sort_tbl_hierarchical.R    | 71 +++++++++++++++++++++++-------------
 man/sort_tbl_hierarchical.Rd |  6 ++-
 2 files changed, 51 insertions(+), 26 deletions(-)

diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
index c8a8e5ed2..ca14a8047 100644
--- a/R/sort_tbl_hierarchical.R
+++ b/R/sort_tbl_hierarchical.R
@@ -36,7 +36,11 @@
 #'   overall_row = TRUE
 #' )
 #'
+#' # Example 1 - Decreasing Frequency Sort ------
 #' tbl_sort(tbl)
+#'
+#' # Example 2 - Reverse Alphanumeric Sort ------
+#' tbl_sort(tbl, sort = "alphanumeric")
 NULL
 
 #' @rdname sort_tbl_hierarchical
@@ -50,7 +54,7 @@ tbl_sort <- function(x, ...) {
 
 #' @rdname sort_tbl_hierarchical
 #' @export
-tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat = "n") {
+tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n") {
   set_cli_abort_call()
 
   # process and check inputs ---------------------------------------------------
@@ -63,33 +67,50 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat
     )
   }
 
+  u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
+
   if (sort == "alphanumeric") {
-    sort_cols <- c(
-      x$table_body |> select(cards::all_ard_groups("names")) |> names(),
-      "label"
-    )
+    sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label")
+    rep_str <- if (desc) "zzzz" else " "
+
+    # overall row always appears first
+    if (desc && "..ard_hierarchical_overall.." %in% x$table_body$variable) {
+      ovrl_row <- x$table_body[1, ]
+      x$table_body <- x$table_body[-1,]
+    }
 
     x$table_body <- x$table_body |>
-      arrange(across(sort_cols, ~ if (desc) desc(.x) else .x))
+      dplyr::rowwise() |>
+      dplyr::mutate(inner_var = if (.data$variable %in% u_cols) rep_str else .data$variable) |>
+      dplyr::ungroup() |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., rep_str))) |>
+      dplyr::arrange(across(sort_cols, ~ if (desc) dplyr::desc(.x) else .x)) |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., paste0("^", rep_str, "$"), NA))) |>
+      select(-"inner_var")
+
+    if (desc) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body)
   } else {
-    x <- .append_sort_counts(x, .stat)
-    u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
+    x <- .append_hierarchy_row_sums(x, .stat)
     g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
 
+    # sort summary rows first
+    x$table_body <- x$table_body |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " ")))
+
     # assign counts for each hierarchy level
     for (g in names(g_cols)) {
       x$table_body <- x$table_body |>
         dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE)
 
       x$table_body <- x$table_body |>
-        left_join(
-          summarise(x$table_body, !!paste0("count_", g) := first(count_total)),
+        dplyr::left_join(
+          dplyr::summarize(x$table_body, !!paste0("count_", g) := dplyr::first(count_total)),
           by = x$table_body |> dplyr::group_vars()
         )
     }
     x$table_body <- x$table_body |>
       dplyr::rowwise() |>
-      mutate(inner_var = if (.data$variable %in% u_cols) " " else .data$variable) |>
+      dplyr::mutate(inner_var = if (.data$variable %in% u_cols) " " else .data$variable) |>
       dplyr::ungroup()
 
     # sort by counts ------------------------------------------------------------------------------
@@ -100,13 +121,15 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat
     ), "inner_var", "count_total", "label")
 
     x$table_body <- x$table_body |>
-      arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) desc(.x) else .x))
+      dplyr::arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., "^ $", NA))) |>
+      dplyr::select(-starts_with("count_"), -"inner_var")
   }
 
   x
 }
 
-.append_sort_counts <- function(x, .stat) {
+.append_hierarchy_row_sums <- function(x, .stat) {
   if (!.stat %in% x$cards$tbl_hierarchical$stat_name) {
     cli::cli_abort(
       "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid
@@ -147,9 +170,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat
     dplyr::rowwise() |>
     dplyr::mutate(across(
       cards::all_ard_groups(),
-      ~ if (is.na(.x) && !grepl("_level", cur_column()) && variable == g_cols[cur_column()]) {
+      ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && variable == g_cols[dplyr::cur_column()]) {
         variable
-      } else if (is.na(.x) && variable %in% g_cols[gsub("_level", "", cur_column())]) {
+      } else if (is.na(.x) && variable %in% g_cols[gsub("_level", "", dplyr::cur_column())]) {
         label
       } else {
         .x
@@ -159,12 +182,11 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat
   # calculate total group sums for any variables not in include ---------------------------------
   if (!all(g_cols %in% cards$variable)) {
     gp_vars <- g_cols[g_cols %in% setdiff(g_cols, cards$variable)]
-    cli::cli_warn(
+    cli::cli_inform(
       "Not all hierarchy variables present in the table were included in the {.arg include} argument.
       These variables ({gp_vars}) do not have event rate data available so the total sum of the event
-      rates for this hierarchy section will be used instead. To use event rates to sort all sections of the table,
-      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}.",
-      call = get_cli_abort_call()
+      rates for this hierarchy section will be used instead. To use event rates for all sections of the table,
+      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
     )
 
     gp_cols <- names(gp_vars)
@@ -172,10 +194,10 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat
       cards <- cards |>
         dplyr::bind_rows(
           cards |>
-            filter(variable != "..ard_hierarchical_overall..") |>
-            group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |>
-            summarize(count_total = sum(count_total)) |>
-            mutate(
+            dplyr::filter(variable != "..ard_hierarchical_overall..") |>
+            dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |>
+            dplyr::summarize(count_total = sum(count_total)) |>
+            dplyr::mutate(
               variable = .data[[gp_cols[i]]],
               label = .data[[paste0(gp_cols[i], "_level")]]
             )
@@ -188,8 +210,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = FALSE, .stat
     dplyr::left_join(
       cards,
       by = c(cards |> select(-"count_total") |> names())
-    ) |>
-    dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " ")))
+    )
 
   x
 }
diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd
index dd66b6f96..538cd30fa 100644
--- a/man/sort_tbl_hierarchical.Rd
+++ b/man/sort_tbl_hierarchical.Rd
@@ -8,7 +8,7 @@
 \usage{
 tbl_sort(x, ...)
 
-\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = FALSE, .stat = "n")
+\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = TRUE, .stat = "n")
 }
 \arguments{
 \item{x}{(\code{tbl_hierarchical})\cr
@@ -50,5 +50,9 @@ tbl <- tbl_hierarchical(
   overall_row = TRUE
 )
 
+# Example 1 - Decreasing Frequency Sort ------
 tbl_sort(tbl)
+
+# Example 2 - Reverse Alphanumeric Sort ------
+tbl_sort(tbl, sort = "alphanumeric")
 }

From 42724994fdda531be9892c0be0f3108f62a09702 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 6 Dec 2024 00:43:00 -0500
Subject: [PATCH 05/25] Document

---
 R/filter_tbl_hierarchical.R    | 4 ++--
 R/sort_tbl_hierarchical.R      | 5 +++--
 man/filter_tbl_hierarchical.Rd | 3 +--
 man/sort_tbl_hierarchical.Rd   | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R
index 9c9bca108..2c3eab696 100644
--- a/R/filter_tbl_hierarchical.R
+++ b/R/filter_tbl_hierarchical.R
@@ -14,8 +14,7 @@
 #'   Whether to include the value of `t` in the filtered range, i.e. whether to use exclusive comparators (`>`, `<`) or
 #'   inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`.
 #' @param .stat (`string`)\cr
-#'   Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all
-#'   hierarchy levels.
+#'   Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.
 #' @name filter_tbl_hierarchical
 #'
 #' @examples
@@ -59,6 +58,7 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n"
   check_numeric(t)
   check_scalar_logical(gt)
   check_scalar_logical(eq)
+  check_string(.stat)
 
   x <- .append_hierarchy_row_sums(x, .stat)
   g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
index ca14a8047..5c0bc53e1 100644
--- a/R/sort_tbl_hierarchical.R
+++ b/R/sort_tbl_hierarchical.R
@@ -15,8 +15,8 @@
 #' @param desc (scalar `logical`)\cr
 #'   Whether to sort rows in ascending or descending order. Default is descending (`desc = TRUE`).
 #' @param .stat (`string`)\cr
-#'   Statistic to use for sorting when `sort = "frequency"`. This statistic must be present in the table for all
-#'   hierarchy levels.
+#'   Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for
+#'   all hierarchy levels.
 #'
 #' @name sort_tbl_hierarchical
 #'
@@ -59,6 +59,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
 
   # process and check inputs ---------------------------------------------------
   check_scalar_logical(desc)
+  check_string(.stat)
 
   if (!sort %in% c("frequency", "alphanumeric")) {
     cli::cli_abort(
diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd
index 9696b1091..40b72a1e6 100644
--- a/man/filter_tbl_hierarchical.Rd
+++ b/man/filter_tbl_hierarchical.Rd
@@ -25,8 +25,7 @@ Whether to include the value of \code{t} in the filtered range, i.e. whether to
 inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.}
 
 \item{.stat}{(\code{string})\cr
-Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all
-hierarchy levels.}
+Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.}
 }
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd
index 538cd30fa..731dd6d62 100644
--- a/man/sort_tbl_hierarchical.Rd
+++ b/man/sort_tbl_hierarchical.Rd
@@ -21,8 +21,8 @@ Specifies sorting to perform. Values must be one of \code{c("alphanumeric", "fre
 Whether to sort rows in ascending or descending order. Default is descending (\code{desc = TRUE}).}
 
 \item{.stat}{(\code{string})\cr
-Statistic to use for sorting when \code{sort = "frequency"}. This statistic must be present in the table for all
-hierarchy levels.}
+Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for
+all hierarchy levels.}
 }
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr

From f02c7c5ca6d9a63b3d478af6e0f063422cbe6936 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 6 Dec 2024 21:23:36 -0500
Subject: [PATCH 06/25] Improve code

---
 R/filter_tbl_hierarchical.R    |  61 ++++++++++---------
 R/sort_tbl_hierarchical.R      | 107 ++++++++++++++++++---------------
 man/filter_tbl_hierarchical.Rd |  15 ++---
 man/sort_tbl_hierarchical.Rd   |  21 ++++---
 4 files changed, 109 insertions(+), 95 deletions(-)

diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R
index 2c3eab696..7a1ad5b69 100644
--- a/R/filter_tbl_hierarchical.R
+++ b/R/filter_tbl_hierarchical.R
@@ -2,12 +2,12 @@
 #'
 #' @description `r lifecycle::badge('experimental')`\cr
 #'
-#' This function is used to filter hierarchical tables by total row frequencies.
+#' This function is used to filter hierarchical tables by frequency row sum.
 #'
 #' @param x (`tbl_hierarchical`)\cr
 #'   A hierarchical gtsummary table of class `'tbl_hierarchical'`.
 #' @param t (scalar `numeric`)\cr
-#'   Threshold used to determine which values will be retained.
+#'   Threshold used to determine which rows will be retained.
 #' @param gt (scalar `logical`)\cr
 #'   Whether to filter for row sums greater than `t` or less than `t`. Default is greater than (`gt = TRUE`).
 #' @param eq (scalar `logical`)\cr
@@ -15,14 +15,14 @@
 #'   inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`.
 #' @param .stat (`string`)\cr
 #'   Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.
+#'   Default is `"n"`.
+#'
 #' @name filter_tbl_hierarchical
+#' @seealso [tbl_sort()]
 #'
 #' @examples
 #' ADAE_subset <- cards::ADAE |>
-#'   dplyr::filter(
-#'     AESOC %in% unique(cards::ADAE$AESOC)[1:5],
-#'     AETERM %in% unique(cards::ADAE$AETERM)[1:5]
-#'   )
+#'   dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5])
 #'
 #' tbl <- tbl_hierarchical(
 #'   data = ADAE_subset,
@@ -54,18 +54,23 @@ tbl_filter <- function(x, ...) {
 tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n") {
   set_cli_abort_call()
 
-  # process and check inputs ---------------------------------------------------
+  # process and check inputs ----------------------------------------------------------------------
   check_numeric(t)
   check_scalar_logical(gt)
   check_scalar_logical(eq)
   check_string(.stat)
 
+  outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+
+  # get row sums ----------------------------------------------------------------------------------
   x <- .append_hierarchy_row_sums(x, .stat)
-  g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
-  if (!gt) x$table_body$count_total[x$table_body$variable %in% g_cols] <- t - 1
 
+  # keep all summary rows (removed later if no sub-rows are kept)
+  if (!gt) x$table_body$sum_row[x$table_body$variable %in% outer_cols] <- t - 1
+
+  # create and apply filtering expression ---------------------------------------------------------
   filt_expr <- paste(
-    "count_total",
+    "sum_row",
     dplyr::case_when(
       gt && eq ~ ">=",
       !gt && eq ~ "<=",
@@ -74,41 +79,39 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n"
     ),
     t
   )
-
   x$table_body <- x$table_body |>
     dplyr::filter(!!parse_expr(filt_expr))
 
-  # remove summary rows with no sub-rows still present -------------------------
+  # remove any summary rows with no sub-rows still present ----------------------------------------
   if (!gt) {
-    for (i in rev(seq_along(g_cols))) {
-      empty_gps <- x$table_body |>
-        dplyr::group_by(across(c(names(g_cols[1:i]), paste0(names(g_cols[1:i]), "_level")))) |>
-        dplyr::summarize(empty_gp := dplyr::n() == 1) |>
+    for (i in rev(seq_along(outer_cols))) {
+      gp_empty <- x$table_body |>
+        dplyr::group_by(across(c(names(outer_cols[1:i]), paste0(names(outer_cols[1:i]), "_level")))) |>
+        dplyr::summarize(is_empty := dplyr::n() == 1) |>
         na.omit()
 
-      if (!all(!empty_gps$empty_gp)) {
-        cli::cli_inform(
-          "For readability, any summary row that supercedes a row that meets the filtering criteria will be kept
-          regardless of whether it meets the filtering criteria itself.",
-          .frequency = "once",
-          .frequency_id = "hierarchy_filter_lt"
-        )
-
+      if (!all(!gp_empty$is_empty)) {
         x$table_body <- x$table_body |>
           dplyr::left_join(
-            empty_gps,
-            by = empty_gps |> select(cards::all_ard_groups()) |> names()
+            gp_empty,
+            by = gp_empty |> select(cards::all_ard_groups()) |> names()
           ) |>
-          dplyr::filter(!empty_gp | is.na(empty_gp)) |>
-          dplyr::select(-"empty_gp")
+          dplyr::filter(!is_empty | is.na(is_empty)) |>
+          dplyr::select(-"is_empty")
       } else {
         break
       }
     }
+    if (nrow(x$table_body) > 0) {
+      cli::cli_inform(
+        "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept
+        regardless of whether they meet the filtering criteria themselves."
+      )
+    }
   }
 
   x$table_body <- x$table_body |>
-    dplyr::select(-"count_total")
+    dplyr::select(-"sum_row")
 
   x
 }
diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
index 5c0bc53e1..1038701b9 100644
--- a/R/sort_tbl_hierarchical.R
+++ b/R/sort_tbl_hierarchical.R
@@ -4,28 +4,27 @@
 #'
 #' This function is used to sort hierarchical tables. Options for sorting criteria are:
 #'
-#' 1. Alphanumeric - rows are ordered alphanumerically by label text (default).
-#' 2. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
-#'    ordered accordingly.
+#' 1. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
+#'    ordered accordingly (default).
+#' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables
+#'    in ascending alphanumeric order (i.e. A to Z).
 #'
 #' @param x (`tbl_hierarchical`)\cr
 #'   A hierarchical gtsummary table of class `'tbl_hierarchical'`.
 #' @param sort (`string`)\cr
-#'   Specifies sorting to perform. Values must be one of `c("alphanumeric", "frequency")`. Default is `"frequency"`.
+#'   Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`.
 #' @param desc (scalar `logical`)\cr
 #'   Whether to sort rows in ascending or descending order. Default is descending (`desc = TRUE`).
 #' @param .stat (`string`)\cr
 #'   Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for
-#'   all hierarchy levels.
+#'   all hierarchy levels. Default is `"n"`.
 #'
 #' @name sort_tbl_hierarchical
+#' @seealso [tbl_filter()]
 #'
 #' @examples
 #' ADAE_subset <- cards::ADAE |>
-#'   dplyr::filter(
-#'     AESOC %in% unique(cards::ADAE$AESOC)[1:5],
-#'     AETERM %in% unique(cards::ADAE$AETERM)[1:5]
-#'   )
+#'   dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5])
 #'
 #' tbl <- tbl_hierarchical(
 #'   data = ADAE_subset,
@@ -36,10 +35,10 @@
 #'   overall_row = TRUE
 #' )
 #'
-#' # Example 1 - Decreasing Frequency Sort ------
+#' # Example 1 - Decreasing Frequency Sort ------------------
 #' tbl_sort(tbl)
 #'
-#' # Example 2 - Reverse Alphanumeric Sort ------
+#' # Example 2 - Ascending Alphanumeric Sort (Z to A) -------
 #' tbl_sort(tbl, sort = "alphanumeric")
 NULL
 
@@ -57,7 +56,7 @@ tbl_sort <- function(x, ...) {
 tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n") {
   set_cli_abort_call()
 
-  # process and check inputs ---------------------------------------------------
+  # process and check inputs ----------------------------------------------------------------------
   check_scalar_logical(desc)
   check_string(.stat)
 
@@ -68,21 +67,30 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
     )
   }
 
-  u_cols <- x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
+  overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable
+
+  outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+  inner_col <- setdiff(
+    x$table_body$variable,
+    x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
+  )
 
   if (sort == "alphanumeric") {
-    sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label")
+    # summary rows remain at the top of each sub-section
     rep_str <- if (desc) "zzzz" else " "
 
     # overall row always appears first
     if (desc && "..ard_hierarchical_overall.." %in% x$table_body$variable) {
       ovrl_row <- x$table_body[1, ]
-      x$table_body <- x$table_body[-1,]
+      x$table_body <- x$table_body[-1, ]
     }
 
+    # sort by label -------------------------------------------------------------------------------
+    sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label")
+
     x$table_body <- x$table_body |>
       dplyr::rowwise() |>
-      dplyr::mutate(inner_var = if (.data$variable %in% u_cols) rep_str else .data$variable) |>
+      dplyr::mutate(inner_var = if (!.data$variable == inner_col) rep_str else .data$variable) |>
       dplyr::ungroup() |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., rep_str))) |>
       dplyr::arrange(across(sort_cols, ~ if (desc) dplyr::desc(.x) else .x)) |>
@@ -91,47 +99,48 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
 
     if (desc) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body)
   } else {
+    # get row sums --------------------------------------------------------------------------------
     x <- .append_hierarchy_row_sums(x, .stat)
-    g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
-
-    # sort summary rows first
-    x$table_body <- x$table_body |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " ")))
-
-    # assign counts for each hierarchy level
-    for (g in names(g_cols)) {
-      x$table_body <- x$table_body |>
-        dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE)
 
+    # append outer hierarchy level sums in each row to sort at all levels -------------------------
+    for (g in names(outer_cols)) {
+      x$table_body <- x$table_body |> dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE)
       x$table_body <- x$table_body |>
         dplyr::left_join(
-          dplyr::summarize(x$table_body, !!paste0("count_", g) := dplyr::first(count_total)),
+          x$table_body |>
+            dplyr::summarize(!!paste0("sum_", g) := dplyr::first(sum_row)),
           by = x$table_body |> dplyr::group_vars()
         )
     }
+
+    # summary rows remain at the top of each sub-section
     x$table_body <- x$table_body |>
+      dplyr::ungroup() |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " "))) |>
       dplyr::rowwise() |>
-      dplyr::mutate(inner_var = if (.data$variable %in% u_cols) " " else .data$variable) |>
+      dplyr::mutate(inner_var = if (!.data$variable == inner_col) " " else .data$variable) |>
       dplyr::ungroup()
 
-    # sort by counts ------------------------------------------------------------------------------
+    # sort by row sum -----------------------------------------------------------------------------
     sort_cols <- c(rbind(
       x$table_body |> select(cards::all_ard_groups("names")) |> names(),
-      x$table_body |> select(starts_with("count_group")) |> names(),
+      x$table_body |> select(starts_with("sum_group")) |> names(),
       x$table_body |> select(cards::all_ard_groups("levels")) |> names()
-    ), "inner_var", "count_total", "label")
+    ), "inner_var", "sum_row", "label")
 
     x$table_body <- x$table_body |>
       dplyr::arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., "^ $", NA))) |>
-      dplyr::select(-starts_with("count_"), -"inner_var")
+      select(-starts_with("sum_"), -"inner_var")
   }
 
   x
 }
 
 .append_hierarchy_row_sums <- function(x, .stat) {
-  if (!.stat %in% x$cards$tbl_hierarchical$stat_name) {
+  cards <- x$cards$tbl_hierarchical
+
+  if (!.stat %in% cards$stat_name) {
     cli::cli_abort(
       "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid
       statistic options see the {.val stat_name} column of {.code x$cards$tbl_hierarchical}.",
@@ -139,26 +148,25 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
     )
   }
 
-  cards <- x$cards$tbl_hierarchical
   by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA
-  g_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+  outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
 
-  # fill in variable_level column of cards
+  # update logical variable_level entries from overall row to character
   cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |>
     dplyr::filter(variable == "..ard_hierarchical_overall..") |>
     dplyr::pull("label") |>
     as.list()
 
-  # extract counts ------------------------------------------------------------------------------
+  # extract row sums ------------------------------------------------------------------------------
   cards <- cards |>
     dplyr::filter(stat_name == .stat, variable %in% x$table_body$variable) |>
     dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -by_cols))) |>
-    dplyr::summarise(count_total = sum(unlist(stat))) |>
+    dplyr::summarise(sum_row = sum(unlist(stat))) |>
     dplyr::ungroup() |>
     dplyr::rename(label = variable_level) |>
     tidyr::unnest(cols = everything())
 
-  # match names to x$table_body
+  # match cards names to x$table_body -------------------------------------------------------------
   if (length(by_cols) > 1) {
     names(cards)[grep("group", names(cards))] <- x$table_body |>
       select(cards::all_ard_groups()) |>
@@ -166,23 +174,25 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
   }
   cards[cards$variable == "..ard_hierarchical_overall..", 1] <- "..ard_hierarchical_overall.."
 
-  # align cards layout with x$table_body --------------------------------------------------------
+  # fill in NAs to align cards layout with x$table_body -------------------------------------------
   cards <- cards |>
     dplyr::rowwise() |>
     dplyr::mutate(across(
       cards::all_ard_groups(),
-      ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && variable == g_cols[dplyr::cur_column()]) {
+      ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && variable == outer_cols[dplyr::cur_column()]) {
         variable
-      } else if (is.na(.x) && variable %in% g_cols[gsub("_level", "", dplyr::cur_column())]) {
+      } else if (is.na(.x) && variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) {
         label
       } else {
         .x
       }
     ))
 
-  # calculate total group sums for any variables not in include ---------------------------------
-  if (!all(g_cols %in% cards$variable)) {
-    gp_vars <- g_cols[g_cols %in% setdiff(g_cols, cards$variable)]
+  # for any variables not in include, calculate group sums ----------------------------------------
+  if (!all(outer_cols %in% cards$variable)) {
+    gp_vars <- outer_cols[outer_cols %in% setdiff(outer_cols, cards$variable)]
+    gp_cols <- names(gp_vars)
+
     cli::cli_inform(
       "Not all hierarchy variables present in the table were included in the {.arg include} argument.
       These variables ({gp_vars}) do not have event rate data available so the total sum of the event
@@ -190,14 +200,13 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
       set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
     )
 
-    gp_cols <- names(gp_vars)
     for (i in seq_along(gp_cols)) {
       cards <- cards |>
         dplyr::bind_rows(
           cards |>
             dplyr::filter(variable != "..ard_hierarchical_overall..") |>
             dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |>
-            dplyr::summarize(count_total = sum(count_total)) |>
+            dplyr::summarize(sum_row = sum(sum_row)) |>
             dplyr::mutate(
               variable = .data[[gp_cols[i]]],
               label = .data[[paste0(gp_cols[i], "_level")]]
@@ -206,11 +215,11 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
     }
   }
 
-  # add counts to x$table_body ------------------------------------------------------------------
+  # append row sums to x$table_body ---------------------------------------------------------------
   x$table_body <- x$table_body |>
     dplyr::left_join(
       cards,
-      by = c(cards |> select(-"count_total") |> names())
+      by = c(cards |> select(-"sum_row") |> names())
     )
 
   x
diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd
index 40b72a1e6..e7f3b3f1d 100644
--- a/man/filter_tbl_hierarchical.Rd
+++ b/man/filter_tbl_hierarchical.Rd
@@ -15,7 +15,7 @@ tbl_filter(x, ...)
 A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.}
 
 \item{t}{(scalar \code{numeric})\cr
-Threshold used to determine which values will be retained.}
+Threshold used to determine which rows will be retained.}
 
 \item{gt}{(scalar \code{logical})\cr
 Whether to filter for row sums greater than \code{t} or less than \code{t}. Default is greater than (\code{gt = TRUE}).}
@@ -25,19 +25,17 @@ Whether to include the value of \code{t} in the filtered range, i.e. whether to
 inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.}
 
 \item{.stat}{(\code{string})\cr
-Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.}
+Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.
+Default is \code{"n"}.}
 }
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
 
-This function is used to filter hierarchical tables by total row frequencies.
+This function is used to filter hierarchical tables by frequency row sum.
 }
 \examples{
 ADAE_subset <- cards::ADAE |>
-  dplyr::filter(
-    AESOC \%in\% unique(cards::ADAE$AESOC)[1:5],
-    AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]
-  )
+  dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5])
 
 tbl <- tbl_hierarchical(
   data = ADAE_subset,
@@ -54,3 +52,6 @@ tbl_filter(tbl, t = 10)
 # Example 2 - Row Sums <= 5 ------------------
 tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE)
 }
+\seealso{
+\code{\link[=tbl_sort]{tbl_sort()}}
+}
diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd
index 731dd6d62..3b004ef5e 100644
--- a/man/sort_tbl_hierarchical.Rd
+++ b/man/sort_tbl_hierarchical.Rd
@@ -15,31 +15,29 @@ tbl_sort(x, ...)
 A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.}
 
 \item{sort}{(\code{string})\cr
-Specifies sorting to perform. Values must be one of \code{c("alphanumeric", "frequency")}. Default is \code{"frequency"}.}
+Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.}
 
 \item{desc}{(scalar \code{logical})\cr
 Whether to sort rows in ascending or descending order. Default is descending (\code{desc = TRUE}).}
 
 \item{.stat}{(\code{string})\cr
 Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for
-all hierarchy levels.}
+all hierarchy levels. Default is \code{"n"}.}
 }
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
 
 This function is used to sort hierarchical tables. Options for sorting criteria are:
 \enumerate{
-\item Alphanumeric - rows are ordered alphanumerically by label text (default).
 \item Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
-ordered accordingly.
+ordered accordingly (default).
+\item Alphanumeric - rows are ordered alphanumerically by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}} sorts tables
+in ascending alphanumeric order (i.e. A to Z).
 }
 }
 \examples{
 ADAE_subset <- cards::ADAE |>
-  dplyr::filter(
-    AESOC \%in\% unique(cards::ADAE$AESOC)[1:5],
-    AETERM \%in\% unique(cards::ADAE$AETERM)[1:5]
-  )
+  dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5])
 
 tbl <- tbl_hierarchical(
   data = ADAE_subset,
@@ -50,9 +48,12 @@ tbl <- tbl_hierarchical(
   overall_row = TRUE
 )
 
-# Example 1 - Decreasing Frequency Sort ------
+# Example 1 - Decreasing Frequency Sort ------------------
 tbl_sort(tbl)
 
-# Example 2 - Reverse Alphanumeric Sort ------
+# Example 2 - Ascending Alphanumeric Sort (Z to A) -------
 tbl_sort(tbl, sort = "alphanumeric")
 }
+\seealso{
+\code{\link[=tbl_filter]{tbl_filter()}}
+}

From 30c574bb897048b50c6f5e8d106f415dbb3665d7 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 6 Dec 2024 22:48:34 -0500
Subject: [PATCH 07/25] Fix checks

---
 R/filter_tbl_hierarchical.R    | 20 ++++++++-----
 R/sort_tbl_hierarchical.R      | 51 ++++++++++++++++++++--------------
 R/tbl_hierarchical.R           |  3 ++
 inst/WORDLIST                  |  1 +
 man/filter_tbl_hierarchical.Rd | 15 +++++++---
 man/sort_tbl_hierarchical.Rd   | 13 +++++++--
 pkgdown/_pkgdown.yml           |  2 ++
 7 files changed, 70 insertions(+), 35 deletions(-)

diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R
index 7a1ad5b69..8821fd75e 100644
--- a/R/filter_tbl_hierarchical.R
+++ b/R/filter_tbl_hierarchical.R
@@ -2,10 +2,10 @@
 #'
 #' @description `r lifecycle::badge('experimental')`\cr
 #'
-#' This function is used to filter hierarchical tables by frequency row sum.
+#' This function is used to filter hierarchical table rows by frequency row sum.
 #'
-#' @param x (`tbl_hierarchical`)\cr
-#'   A hierarchical gtsummary table of class `'tbl_hierarchical'`.
+#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
+#'   A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
 #' @param t (scalar `numeric`)\cr
 #'   Threshold used to determine which rows will be retained.
 #' @param gt (scalar `logical`)\cr
@@ -16,11 +16,14 @@
 #' @param .stat (`string`)\cr
 #'   Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.
 #'   Default is `"n"`.
+#' @inheritParams rlang::args_dots_empty
+#'
+#' @return A `gtsummary` of the same class as `x`.
 #'
 #' @name filter_tbl_hierarchical
 #' @seealso [tbl_sort()]
 #'
-#' @examples
+#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))
 #' ADAE_subset <- cards::ADAE |>
 #'   dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5])
 #'
@@ -51,7 +54,7 @@ tbl_filter <- function(x, ...) {
 
 #' @export
 #' @rdname filter_tbl_hierarchical
-tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n") {
+tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...) {
   set_cli_abort_call()
 
   # process and check inputs ----------------------------------------------------------------------
@@ -60,7 +63,10 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n"
   check_scalar_logical(eq)
   check_string(.stat)
 
-  outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+  outer_cols <- sapply(
+    x$table_body |> select(cards::all_ard_groups("names")),
+    function(x) dplyr::last(unique(stats::na.omit(x)))
+  )
 
   # get row sums ----------------------------------------------------------------------------------
   x <- .append_hierarchy_row_sums(x, .stat)
@@ -88,7 +94,7 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n"
       gp_empty <- x$table_body |>
         dplyr::group_by(across(c(names(outer_cols[1:i]), paste0(names(outer_cols[1:i]), "_level")))) |>
         dplyr::summarize(is_empty := dplyr::n() == 1) |>
-        na.omit()
+        stats::na.omit()
 
       if (!all(!gp_empty$is_empty)) {
         x$table_body <- x$table_body |>
diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
index 1038701b9..3b221da0c 100644
--- a/R/sort_tbl_hierarchical.R
+++ b/R/sort_tbl_hierarchical.R
@@ -9,8 +9,8 @@
 #' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables
 #'    in ascending alphanumeric order (i.e. A to Z).
 #'
-#' @param x (`tbl_hierarchical`)\cr
-#'   A hierarchical gtsummary table of class `'tbl_hierarchical'`.
+#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
+#'   A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
 #' @param sort (`string`)\cr
 #'   Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`.
 #' @param desc (scalar `logical`)\cr
@@ -18,11 +18,14 @@
 #' @param .stat (`string`)\cr
 #'   Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for
 #'   all hierarchy levels. Default is `"n"`.
+#' @inheritParams rlang::args_dots_empty
+#'
+#' @return A `gtsummary` of the same class as `x`.
 #'
 #' @name sort_tbl_hierarchical
 #' @seealso [tbl_filter()]
 #'
-#' @examples
+#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))
 #' ADAE_subset <- cards::ADAE |>
 #'   dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5])
 #'
@@ -53,7 +56,7 @@ tbl_sort <- function(x, ...) {
 
 #' @rdname sort_tbl_hierarchical
 #' @export
-tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n") {
+tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n", ...) {
   set_cli_abort_call()
 
   # process and check inputs ----------------------------------------------------------------------
@@ -69,7 +72,10 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
 
   overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable
 
-  outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+  outer_cols <- sapply(
+    x$table_body |> select(cards::all_ard_groups("names")),
+    function(x) dplyr::last(unique(stats::na.omit(x)))
+  )
   inner_col <- setdiff(
     x$table_body$variable,
     x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
@@ -92,9 +98,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
       dplyr::rowwise() |>
       dplyr::mutate(inner_var = if (!.data$variable == inner_col) rep_str else .data$variable) |>
       dplyr::ungroup() |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., rep_str))) |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |>
       dplyr::arrange(across(sort_cols, ~ if (desc) dplyr::desc(.x) else .x)) |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., paste0("^", rep_str, "$"), NA))) |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |>
       select(-"inner_var")
 
     if (desc) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body)
@@ -108,7 +114,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
       x$table_body <- x$table_body |>
         dplyr::left_join(
           x$table_body |>
-            dplyr::summarize(!!paste0("sum_", g) := dplyr::first(sum_row)),
+            dplyr::summarize(!!paste0("sum_", g) := dplyr::first(.data$sum_row)),
           by = x$table_body |> dplyr::group_vars()
         )
     }
@@ -116,7 +122,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
     # summary rows remain at the top of each sub-section
     x$table_body <- x$table_body |>
       dplyr::ungroup() |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~tidyr::replace_na(., " "))) |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., " "))) |>
       dplyr::rowwise() |>
       dplyr::mutate(inner_var = if (!.data$variable == inner_col) " " else .data$variable) |>
       dplyr::ungroup()
@@ -130,7 +136,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
 
     x$table_body <- x$table_body |>
       dplyr::arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~str_replace(., "^ $", NA))) |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |>
       select(-starts_with("sum_"), -"inner_var")
   }
 
@@ -149,21 +155,24 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
   }
 
   by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA
-  outer_cols <- sapply(x$table_body |> select(cards::all_ard_groups("names")), function(x) tail(unique(na.omit(x)), 1))
+  outer_cols <- sapply(
+    x$table_body |> select(cards::all_ard_groups("names")),
+    function(x) dplyr::last(unique(stats::na.omit(x)))
+  )
 
   # update logical variable_level entries from overall row to character
   cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |>
-    dplyr::filter(variable == "..ard_hierarchical_overall..") |>
+    dplyr::filter(.data$variable == "..ard_hierarchical_overall..") |>
     dplyr::pull("label") |>
     as.list()
 
   # extract row sums ------------------------------------------------------------------------------
   cards <- cards |>
-    dplyr::filter(stat_name == .stat, variable %in% x$table_body$variable) |>
+    dplyr::filter(.data$stat_name == .stat, .data$variable %in% x$table_body$variable) |>
     dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -by_cols))) |>
-    dplyr::summarise(sum_row = sum(unlist(stat))) |>
+    dplyr::summarise(sum_row = sum(unlist(.data$stat))) |>
     dplyr::ungroup() |>
-    dplyr::rename(label = variable_level) |>
+    dplyr::rename(label = "variable_level") |>
     tidyr::unnest(cols = everything())
 
   # match cards names to x$table_body -------------------------------------------------------------
@@ -179,10 +188,10 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
     dplyr::rowwise() |>
     dplyr::mutate(across(
       cards::all_ard_groups(),
-      ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && variable == outer_cols[dplyr::cur_column()]) {
-        variable
-      } else if (is.na(.x) && variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) {
-        label
+      ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && .data$variable == outer_cols[dplyr::cur_column()]) {
+        .data$variable
+      } else if (is.na(.x) && .data$variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) {
+        .data$label
       } else {
         .x
       }
@@ -204,9 +213,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
       cards <- cards |>
         dplyr::bind_rows(
           cards |>
-            dplyr::filter(variable != "..ard_hierarchical_overall..") |>
+            dplyr::filter(.data$variable != "..ard_hierarchical_overall..") |>
             dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |>
-            dplyr::summarize(sum_row = sum(sum_row)) |>
+            dplyr::summarize(sum_row = sum(.data$sum_row)) |>
             dplyr::mutate(
               variable = .data[[gp_cols[i]]],
               label = .data[[paste0(gp_cols[i], "_level")]]
diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R
index 789244787..908242541 100644
--- a/R/tbl_hierarchical.R
+++ b/R/tbl_hierarchical.R
@@ -216,6 +216,9 @@ internal_tbl_hierarchical <- function(data,
   if ("..ard_hierarchical_overall.." %in% variables) {
     cli::cli_abort("The {.arg variables} argument cannot include a column named {.val ..ard_hierarchical_overall..}.")
   }
+  if (!all(variables == unique(variables))) {
+    cli::cli_abort("The {.arg variables} argument cannot contain repeated variables.")
+  }
 
   # evaluate tidyselect
   cards::process_selectors(data[variables], include = {{ include }})
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 0655b37bf..b25ab3581 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -46,6 +46,7 @@ cardx
 cli
 codebase
 coef
+comparators
 conf
 customizability
 customizable
diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd
index e7f3b3f1d..a84a127f9 100644
--- a/man/filter_tbl_hierarchical.Rd
+++ b/man/filter_tbl_hierarchical.Rd
@@ -8,11 +8,13 @@
 \usage{
 tbl_filter(x, ...)
 
-\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n")
+\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...)
 }
 \arguments{
-\item{x}{(\code{tbl_hierarchical})\cr
-A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.}
+\item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr
+A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.}
+
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{t}{(scalar \code{numeric})\cr
 Threshold used to determine which rows will be retained.}
@@ -28,12 +30,16 @@ inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FA
 Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.
 Default is \code{"n"}.}
 }
+\value{
+A \code{gtsummary} of the same class as \code{x}.
+}
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
 
-This function is used to filter hierarchical tables by frequency row sum.
+This function is used to filter hierarchical table rows by frequency row sum.
 }
 \examples{
+\dontshow{if ((identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 ADAE_subset <- cards::ADAE |>
   dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5])
 
@@ -51,6 +57,7 @@ tbl_filter(tbl, t = 10)
 
 # Example 2 - Row Sums <= 5 ------------------
 tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE)
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=tbl_sort]{tbl_sort()}}
diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd
index 3b004ef5e..2d6c21728 100644
--- a/man/sort_tbl_hierarchical.Rd
+++ b/man/sort_tbl_hierarchical.Rd
@@ -8,11 +8,13 @@
 \usage{
 tbl_sort(x, ...)
 
-\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = TRUE, .stat = "n")
+\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = TRUE, .stat = "n", ...)
 }
 \arguments{
-\item{x}{(\code{tbl_hierarchical})\cr
-A hierarchical gtsummary table of class \code{'tbl_hierarchical'}.}
+\item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr
+A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.}
+
+\item{...}{These dots are for future extensions and must be empty.}
 
 \item{sort}{(\code{string})\cr
 Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.}
@@ -24,6 +26,9 @@ Whether to sort rows in ascending or descending order. Default is descending (\c
 Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for
 all hierarchy levels. Default is \code{"n"}.}
 }
+\value{
+A \code{gtsummary} of the same class as \code{x}.
+}
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
 
@@ -36,6 +41,7 @@ in ascending alphanumeric order (i.e. A to Z).
 }
 }
 \examples{
+\dontshow{if ((identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 ADAE_subset <- cards::ADAE |>
   dplyr::filter(AETERM \%in\% unique(cards::ADAE$AETERM)[1:5])
 
@@ -53,6 +59,7 @@ tbl_sort(tbl)
 
 # Example 2 - Ascending Alphanumeric Sort (Z to A) -------
 tbl_sort(tbl, sort = "alphanumeric")
+\dontshow{\}) # examplesIf}
 }
 \seealso{
 \code{\link[=tbl_filter]{tbl_filter()}}
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index b195af0ad..767995ab5 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -86,6 +86,8 @@ reference:
   - subtitle: Hierarchical Summary Tables
   - contents:
       - tbl_hierarchical
+      - tbl_sort.tbl_hierarchical
+      - tbl_filter.tbl_hierarchical
       - add_overall.tbl_hierarchical
   - subtitle: Likert Summary Tables
   - contents:

From 7103b5960d82588166e18e3b972d515db243b733 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 6 Dec 2024 23:56:08 -0500
Subject: [PATCH 08/25] Add tests for sorting

---
 R/sort_tbl_hierarchical.R                     |  28 +--
 man/sort_tbl_hierarchical.Rd                  |  11 +-
 .../testthat/_snaps/sort_tbl_hierarchical.md  |  56 +++++
 tests/testthat/test-sort_tbl_hierarchical.R   | 206 ++++++++++++++++++
 4 files changed, 282 insertions(+), 19 deletions(-)
 create mode 100644 tests/testthat/_snaps/sort_tbl_hierarchical.md
 create mode 100644 tests/testthat/test-sort_tbl_hierarchical.R

diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
index 3b221da0c..623ac1b7f 100644
--- a/R/sort_tbl_hierarchical.R
+++ b/R/sort_tbl_hierarchical.R
@@ -14,7 +14,8 @@
 #' @param sort (`string`)\cr
 #'   Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`.
 #' @param desc (scalar `logical`)\cr
-#'   Whether to sort rows in ascending or descending order. Default is descending (`desc = TRUE`).
+#'   Whether to sort rows in ascending or descending order. Default is descending (`TRUE`) when `sort = "frequency"`
+#'   and ascending (`FALSE`) when `sort = "alphanumeric"`.
 #' @param .stat (`string`)\cr
 #'   Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for
 #'   all hierarchy levels. Default is `"n"`.
@@ -38,11 +39,11 @@
 #'   overall_row = TRUE
 #' )
 #'
-#' # Example 1 - Decreasing Frequency Sort ------------------
+#' # Example 1 - Descending Frequency Sort ------------------
 #' tbl_sort(tbl)
 #'
-#' # Example 2 - Ascending Alphanumeric Sort (Z to A) -------
-#' tbl_sort(tbl, sort = "alphanumeric")
+#' # Example 2 - Descending Alphanumeric Sort (Z to A) ------
+#' tbl_sort(tbl, sort = "alphanumeric", desc = TRUE)
 NULL
 
 #' @rdname sort_tbl_hierarchical
@@ -56,7 +57,7 @@ tbl_sort <- function(x, ...) {
 
 #' @rdname sort_tbl_hierarchical
 #' @export
-tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat = "n", ...) {
+tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...) {
   set_cli_abort_call()
 
   # process and check inputs ----------------------------------------------------------------------
@@ -71,7 +72,6 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
   }
 
   overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable
-
   outer_cols <- sapply(
     x$table_body |> select(cards::all_ard_groups("names")),
     function(x) dplyr::last(unique(stats::na.omit(x)))
@@ -86,7 +86,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
     rep_str <- if (desc) "zzzz" else " "
 
     # overall row always appears first
-    if (desc && "..ard_hierarchical_overall.." %in% x$table_body$variable) {
+    if (desc && overall) {
       ovrl_row <- x$table_body[1, ]
       x$table_body <- x$table_body[-1, ]
     }
@@ -96,21 +96,21 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
 
     x$table_body <- x$table_body |>
       dplyr::rowwise() |>
-      dplyr::mutate(inner_var = if (!.data$variable == inner_col) rep_str else .data$variable) |>
+      dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |>
       dplyr::ungroup() |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |>
-      dplyr::arrange(across(sort_cols, ~ if (desc) dplyr::desc(.x) else .x)) |>
+      dplyr::arrange(across(all_of(sort_cols), ~ if (desc) dplyr::desc(.x) else .x)) |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |>
       select(-"inner_var")
 
-    if (desc) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body)
+    if (desc && overall) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body)
   } else {
     # get row sums --------------------------------------------------------------------------------
     x <- .append_hierarchy_row_sums(x, .stat)
 
     # append outer hierarchy level sums in each row to sort at all levels -------------------------
     for (g in names(outer_cols)) {
-      x$table_body <- x$table_body |> dplyr::group_by(across(c(g, paste0(g, "_level"))), .add = TRUE)
+      x$table_body <- x$table_body |> dplyr::group_by(across(all_of(c(g, paste0(g, "_level")))), .add = TRUE)
       x$table_body <- x$table_body |>
         dplyr::left_join(
           x$table_body |>
@@ -124,7 +124,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
       dplyr::ungroup() |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., " "))) |>
       dplyr::rowwise() |>
-      dplyr::mutate(inner_var = if (!.data$variable == inner_col) " " else .data$variable) |>
+      dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) " " else .data$variable) |>
       dplyr::ungroup()
 
     # sort by row sum -----------------------------------------------------------------------------
@@ -135,7 +135,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
     ), "inner_var", "sum_row", "label")
 
     x$table_body <- x$table_body |>
-      dplyr::arrange(across(sort_cols, ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |>
+      dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |>
       select(-starts_with("sum_"), -"inner_var")
   }
@@ -169,7 +169,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = TRUE, .stat
   # extract row sums ------------------------------------------------------------------------------
   cards <- cards |>
     dplyr::filter(.data$stat_name == .stat, .data$variable %in% x$table_body$variable) |>
-    dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -by_cols))) |>
+    dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
     dplyr::summarise(sum_row = sum(unlist(.data$stat))) |>
     dplyr::ungroup() |>
     dplyr::rename(label = "variable_level") |>
diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd
index 2d6c21728..45c165614 100644
--- a/man/sort_tbl_hierarchical.Rd
+++ b/man/sort_tbl_hierarchical.Rd
@@ -8,7 +8,7 @@
 \usage{
 tbl_sort(x, ...)
 
-\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = TRUE, .stat = "n", ...)
+\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...)
 }
 \arguments{
 \item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr
@@ -20,7 +20,8 @@ A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_
 Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.}
 
 \item{desc}{(scalar \code{logical})\cr
-Whether to sort rows in ascending or descending order. Default is descending (\code{desc = TRUE}).}
+Whether to sort rows in ascending or descending order. Default is descending (\code{TRUE}) when \code{sort = "frequency"}
+and ascending (\code{FALSE}) when \code{sort = "alphanumeric"}.}
 
 \item{.stat}{(\code{string})\cr
 Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for
@@ -54,11 +55,11 @@ tbl <- tbl_hierarchical(
   overall_row = TRUE
 )
 
-# Example 1 - Decreasing Frequency Sort ------------------
+# Example 1 - Descending Frequency Sort ------------------
 tbl_sort(tbl)
 
-# Example 2 - Ascending Alphanumeric Sort (Z to A) -------
-tbl_sort(tbl, sort = "alphanumeric")
+# Example 2 - Descending Alphanumeric Sort (Z to A) ------
+tbl_sort(tbl, sort = "alphanumeric", desc = TRUE)
 \dontshow{\}) # examplesIf}
 }
 \seealso{
diff --git a/tests/testthat/_snaps/sort_tbl_hierarchical.md b/tests/testthat/_snaps/sort_tbl_hierarchical.md
new file mode 100644
index 000000000..99a9f5f65
--- /dev/null
+++ b/tests/testthat/_snaps/sort_tbl_hierarchical.md
@@ -0,0 +1,56 @@
+# tbl_sort.tbl_hierarchical() works
+
+    Code
+      as.data.frame(tbl)
+    Output
+         **Sex**  \n    **Race**  \n        **Reported Term for the Adverse Event** **Placebo**  \nN = 86 **Xanomeline High Dose**  \nN = 84 **Xanomeline Low Dose**  \nN = 84
+      1                                               Number of patients with event              26 (30%)                           42 (50%)                          40 (48%)
+      2                                                                           F              13 (25%)                           18 (45%)                          23 (46%)
+      3                                                                       WHITE              10 (21%)                           14 (41%)                          20 (45%)
+      4                                                   APPLICATION SITE PRURITUS              2 (4.2%)                            8 (24%)                          10 (23%)
+      5                                                                    ERYTHEMA               6 (13%)                            6 (18%)                           8 (18%)
+      6                                                   APPLICATION SITE ERYTHEMA              2 (4.2%)                            5 (15%)                           5 (11%)
+      7                                                                   DIARRHOEA              2 (4.2%)                             0 (0%)                          3 (6.8%)
+      8                                                   BLACK OR AFRICAN AMERICAN               3 (60%)                            4 (67%)                           3 (50%)
+      9                                                   APPLICATION SITE PRURITUS               2 (40%)                            2 (33%)                           2 (33%)
+      10                                                                   ERYTHEMA                0 (0%)                            1 (17%)                           1 (17%)
+      11                                       ATRIOVENTRICULAR BLOCK SECOND DEGREE                0 (0%)                            1 (17%)                            0 (0%)
+      12                                                                  DIARRHOEA               1 (20%)                             0 (0%)                            0 (0%)
+      13                                                                          M              13 (39%)                           24 (55%)                          17 (50%)
+      14                                                                      WHITE              12 (40%)                           22 (55%)                          17 (50%)
+      15                                                  APPLICATION SITE PRURITUS              1 (3.3%)                           12 (30%)                          10 (29%)
+      16                                                  APPLICATION SITE ERYTHEMA              1 (3.3%)                           10 (25%)                           7 (21%)
+      17                                                                   ERYTHEMA               3 (10%)                            5 (13%)                           6 (18%)
+      18                                                                  DIARRHOEA               6 (20%)                           3 (7.5%)                          2 (5.9%)
+      19                                       ATRIOVENTRICULAR BLOCK SECOND DEGREE              2 (6.7%)                           2 (5.0%)                            0 (0%)
+      20                                                  BLACK OR AFRICAN AMERICAN               1 (33%)                            1 (33%)                           0 (NA%)
+      21                                                  APPLICATION SITE PRURITUS               1 (33%)                             0 (0%)                           0 (NA%)
+      22                                                                  DIARRHOEA                0 (0%)                            1 (33%)                           0 (NA%)
+      23                                                                   ERYTHEMA                0 (0%)                            1 (33%)                           0 (NA%)
+      24                                           AMERICAN INDIAN OR ALASKA NATIVE               0 (NA%)                           1 (100%)                           0 (NA%)
+      25                                                                   ERYTHEMA               0 (NA%)                           1 (100%)                           0 (NA%)
+
+# tbl_sort.tbl_hierarchical() error messaging works
+
+    Code
+      tbl_sort(data.frame())
+    Condition
+      Error in `check_class()`:
+      ! The `x` argument must be class <gtsummary>, not a data frame.
+
+---
+
+    Code
+      tbl_sort(tbl, sort = "no_sorting")
+    Condition
+      Error in `tbl_sort()`:
+      ! The `sort` argument must be either "frequency" or "alphanumeric".
+
+---
+
+    Code
+      tbl_sort(tbl, .stat = "mean")
+    Condition
+      Error in `tbl_sort()`:
+      ! The `.stat` argument is "mean" but this statistic is not present in `x`. For all valid statistic options see the "stat_name" column of `x$cards$tbl_hierarchical`.
+
diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-sort_tbl_hierarchical.R
new file mode 100644
index 000000000..bc69937aa
--- /dev/null
+++ b/tests/testthat/test-sort_tbl_hierarchical.R
@@ -0,0 +1,206 @@
+skip_on_cran()
+
+ADAE_subset <- cards::ADAE |>
+  dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5])
+
+tbl <- tbl_hierarchical(
+  data = ADAE_subset,
+  variables = c(SEX, RACE, AETERM),
+  by = TRTA,
+  denominator = cards::ADSL |> mutate(TRTA = ARM),
+  id = USUBJID,
+  overall_row = TRUE
+)
+
+test_that("tbl_sort.tbl_hierarchical() works", {
+  withr::local_options(width = 200)
+  # no errors
+  expect_silent(tbl <- tbl_sort(tbl))
+  expect_snapshot(tbl |> as.data.frame())
+
+  # .stat argument works
+  expect_silent(tbl <- tbl_sort(tbl, .stat = "p"))
+})
+
+test_that("tbl_sort.tbl_hierarchical(sort = 'frequency') works", {
+  # descending frequency (default)
+  expect_silent(tbl <- tbl_sort(tbl))
+  expect_equal(
+    tbl$table_body |>
+      dplyr::filter(variable == "SEX") |>
+      dplyr::pull(label),
+    c("F", "M")
+  )
+  expect_equal(
+    tbl$table_body |>
+      dplyr::filter(variable == "RACE") |>
+      dplyr::pull(label),
+    c("WHITE", "BLACK OR AFRICAN AMERICAN", "WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE")
+
+  )
+  expect_equal(
+    tbl$table_body |>
+      dplyr::filter(variable == "AETERM") |>
+      dplyr::pull(label),
+    c(
+      "APPLICATION SITE PRURITUS", "ERYTHEMA", "APPLICATION SITE ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS",
+      "ERYTHEMA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "APPLICATION SITE PRURITUS",
+      "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE",
+      "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "ERYTHEMA"
+    )
+  )
+
+  # ascending frequency
+  expect_silent(tbl <- tbl_sort(tbl, desc = FALSE))
+  expect_equal(
+    tbl$table_body |>
+      dplyr::filter(variable == "SEX") |>
+      dplyr::pull(label),
+    c("F", "M") # F and M have equal numbers
+  )
+  expect_equal(
+    tbl$table_body |>
+      dplyr::filter(variable == "RACE") |>
+      dplyr::pull(label),
+    c("BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE", "BLACK OR AFRICAN AMERICAN", "WHITE")
+  )
+  expect_equal(
+    tbl$table_body |>
+      dplyr::filter(variable == "AETERM") |>
+      dplyr::pull(label),
+    c(
+      "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", "APPLICATION SITE PRURITUS", "DIARRHOEA",
+      "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", "APPLICATION SITE PRURITUS",
+      "DIARRHOEA", "ERYTHEMA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA",
+      "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS"
+    )
+  )
+})
+
+test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", {
+  # descending (Z to A)
+  expect_silent(result <- tbl_sort(tbl, sort = "alphanumeric", desc = TRUE))
+  expect_equal(
+    result$table_body |>
+      dplyr::filter(variable == "SEX") |>
+      dplyr::pull(label),
+    c("M", "F")
+  )
+  expect_equal(
+    result$table_body |>
+      dplyr::filter(variable == "RACE") |>
+      dplyr::pull(label),
+    c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "WHITE", "BLACK OR AFRICAN AMERICAN")
+  )
+  expect_equal(
+    result$table_body |>
+      dplyr::filter(variable == "AETERM") |>
+      dplyr::pull(label),
+    c(
+      "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS",
+      "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA","APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA",
+      "DIARRHOEA", "APPLICATION SITE PRURITUS", "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA",
+      "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS"
+    )
+  )
+
+  # ascending (A to Z)
+  expect_silent(result <- tbl_sort(result, sort = "alphanumeric"))
+
+  # results match with tbl_hierarchical which sorts A to Z by default
+  expect_equal(
+    result |> as.data.frame(),
+    tbl |> as.data.frame()
+  )
+})
+
+test_that("tbl_sort.tbl_hierarchical() works when there is no overall row in x", {
+  tbl_no_overall <- tbl_hierarchical(
+    data = ADAE_subset,
+    variables = c(SEX, RACE, AETERM),
+    by = TRTA,
+    denominator = cards::ADSL |> mutate(TRTA = ARM),
+    id = USUBJID,
+    overall_row = FALSE
+  )
+
+  # sort = 'frequency'
+  expect_silent(tbl_no_overall <- tbl_sort(tbl_no_overall))
+  expect_equal(
+    tbl_no_overall$table_body,
+    tbl_sort(tbl)$table_body[-1, ]
+  )
+
+  # sort = 'alphanumeric'
+  expect_silent(tbl_no_overall <- tbl_sort(tbl_no_overall, sort = "alphanumeric"))
+  expect_equal(
+    tbl_no_overall$table_body,
+    tbl$table_body[-1, ]
+  )
+})
+
+test_that("tbl_sort.tbl_hierarchical() works with only one variable in x", {
+  tbl_single <- tbl_hierarchical(
+    data = ADAE_subset,
+    variables = AETERM,
+    by = TRTA,
+    denominator = cards::ADSL |> mutate(TRTA = ARM),
+    id = USUBJID,
+    overall_row = TRUE
+  )
+
+  # sort = 'frequency'
+  expect_silent(tbl_single <- tbl_sort(tbl_single))
+  expect_equal(
+    tbl_single$table_body |>
+      dplyr::filter(variable == "AETERM") |>
+      dplyr::pull(label),
+    c(
+      "APPLICATION SITE PRURITUS", "ERYTHEMA", "APPLICATION SITE ERYTHEMA", "DIARRHOEA",
+      "ATRIOVENTRICULAR BLOCK SECOND DEGREE"
+    )
+  )
+
+  # sort = 'alphanumeric'
+  expect_silent(tbl_single <- tbl_sort(tbl_single, sort = "alphanumeric"))
+  expect_equal(
+    tbl_single$table_body |>
+      dplyr::filter(variable == "AETERM") |>
+      dplyr::pull(label),
+    sort(unique(ADAE_subset$AETERM))
+  )
+})
+
+test_that("tbl_sort.tbl_hierarchical() works when some variables not included in x", {
+  tbl <- tbl_hierarchical(
+    data = ADAE_subset,
+    variables = c(SEX, RACE, AETERM),
+    by = TRTA,
+    denominator = cards::ADSL |> mutate(TRTA = ARM),
+    id = USUBJID,
+    include =  c(SEX, AETERM),
+    overall_row = TRUE
+  )
+
+  expect_message(tbl_sort(tbl))
+})
+
+test_that("tbl_sort.tbl_hierarchical() error messaging works", {
+  # invalid x input
+  expect_snapshot(
+    tbl_sort(data.frame()),
+    error = TRUE
+  )
+
+  # invalid sort input
+  expect_snapshot(
+    tbl_sort(tbl, sort = "no_sorting"),
+    error = TRUE
+  )
+
+  # invalid .stat input
+  expect_snapshot(
+    tbl_sort(tbl, .stat = "mean"),
+    error = TRUE
+  )
+})

From d82c06f67247e35094aeed7cb2d7da125b5181b7 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Mon, 9 Dec 2024 15:49:11 -0500
Subject: [PATCH 09/25] Add filtering tests

---
 R/filter_tbl_hierarchical.R                   |   4 +-
 .../_snaps/filter_tbl_hierarchical.md         |  59 ++++++++
 tests/testthat/test-filter_tbl_hierarchical.R | 138 ++++++++++++++++++
 tests/testthat/test-sort_tbl_hierarchical.R   |   6 +-
 4 files changed, 203 insertions(+), 4 deletions(-)
 create mode 100644 tests/testthat/_snaps/filter_tbl_hierarchical.md
 create mode 100644 tests/testthat/test-filter_tbl_hierarchical.R

diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R
index 8821fd75e..56b968041 100644
--- a/R/filter_tbl_hierarchical.R
+++ b/R/filter_tbl_hierarchical.R
@@ -111,7 +111,9 @@ tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n"
     if (nrow(x$table_body) > 0) {
       cli::cli_inform(
         "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept
-        regardless of whether they meet the filtering criteria themselves."
+        regardless of whether they meet the filtering criteria themselves.",
+        .frequency = "once",
+        .frequency_id = "sum_rows_lt"
       )
     }
   }
diff --git a/tests/testthat/_snaps/filter_tbl_hierarchical.md b/tests/testthat/_snaps/filter_tbl_hierarchical.md
new file mode 100644
index 000000000..237023eac
--- /dev/null
+++ b/tests/testthat/_snaps/filter_tbl_hierarchical.md
@@ -0,0 +1,59 @@
+# tbl_filter.tbl_hierarchical() works
+
+    Code
+      as.data.frame(tbl)
+    Output
+         **Sex**  \n    **Race**  \n        **Reported Term for the Adverse Event** **Placebo**  \nN = 86 **Xanomeline High Dose**  \nN = 84 **Xanomeline Low Dose**  \nN = 84
+      1                                               Number of patients with event              26 (30%)                           42 (50%)                          40 (48%)
+      2                                                                           F              13 (25%)                           18 (45%)                          23 (46%)
+      3                                                                       WHITE              10 (21%)                           14 (41%)                          20 (45%)
+      4                                                   APPLICATION SITE ERYTHEMA              2 (4.2%)                            5 (15%)                           5 (11%)
+      5                                                   APPLICATION SITE PRURITUS              2 (4.2%)                            8 (24%)                          10 (23%)
+      6                                                                    ERYTHEMA               6 (13%)                            6 (18%)                           8 (18%)
+      7                                                                           M              13 (39%)                           24 (55%)                          17 (50%)
+      8                                                                       WHITE              12 (40%)                           22 (55%)                          17 (50%)
+      9                                                   APPLICATION SITE ERYTHEMA              1 (3.3%)                           10 (25%)                           7 (21%)
+      10                                                  APPLICATION SITE PRURITUS              1 (3.3%)                           12 (30%)                          10 (29%)
+      11                                                                  DIARRHOEA               6 (20%)                           3 (7.5%)                          2 (5.9%)
+      12                                                                   ERYTHEMA               3 (10%)                            5 (13%)                           6 (18%)
+
+# tbl_filter.tbl_hierarchical() error messaging works
+
+    Code
+      tbl_filter(data.frame(), t = 10)
+    Condition
+      Error in `check_class()`:
+      ! The `x` argument must be class <gtsummary>, not a data frame.
+
+---
+
+    Code
+      tbl_filter(tbl, t = "10")
+    Condition
+      Error in `tbl_filter()`:
+      ! The `t` argument must be numeric.
+
+---
+
+    Code
+      tbl_filter(tbl, t = "10", gt = "yes")
+    Condition
+      Error in `tbl_filter()`:
+      ! The `t` argument must be numeric.
+
+---
+
+    Code
+      tbl_filter(tbl, t = "10", eq = "no")
+    Condition
+      Error in `tbl_filter()`:
+      ! The `t` argument must be numeric.
+
+---
+
+    Code
+      tbl_filter(tbl, t = "10", .stat = "pct")
+    Condition
+      Error in `tbl_filter()`:
+      ! The `t` argument must be numeric.
+
diff --git a/tests/testthat/test-filter_tbl_hierarchical.R b/tests/testthat/test-filter_tbl_hierarchical.R
new file mode 100644
index 000000000..0542df4bb
--- /dev/null
+++ b/tests/testthat/test-filter_tbl_hierarchical.R
@@ -0,0 +1,138 @@
+skip_on_cran()
+
+ADAE_subset <- cards::ADAE |>
+  dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5])
+
+tbl <- tbl_hierarchical(
+  data = ADAE_subset,
+  variables = c(SEX, RACE, AETERM),
+  by = TRTA,
+  denominator = cards::ADSL |> mutate(TRTA = ARM),
+  id = USUBJID,
+  overall_row = TRUE
+)
+
+test_that("tbl_filter.tbl_hierarchical() works", {
+  withr::local_options(width = 200)
+
+  # no errors
+  expect_silent(tbl <- tbl_filter(tbl, t = 10))
+  expect_snapshot(tbl |> as.data.frame())
+
+  # .stat argument works
+  expect_silent(tbl <- tbl_filter(tbl, t = 10, .stat = "p"))
+})
+
+test_that("tbl_filter.tbl_hierarchical(gt) works", {
+  # gt = TRUE
+  expect_silent(tbl_gt <- tbl_filter(tbl, t = 10))
+
+  # gt = FALSE
+  expect_silent(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE))
+
+  expect_equal(
+    dplyr::inner_join(
+      tbl_gt$table_body,
+      tbl_lt$table_body,
+      by = names(tbl_gt$table_body)
+    ) |>
+      dplyr::filter(variable == "AETERM") |>
+      nrow(),
+    0
+  )
+
+  expect_equal(
+    sum(
+      tbl_gt$table_body |>
+        dplyr::filter(variable == "AETERM") |>
+        nrow(),
+      tbl_lt$table_body |>
+        dplyr::filter(variable == "AETERM") |>
+        nrow()
+    ),
+    tbl$table_body |>
+      dplyr::filter(variable == "AETERM") |>
+      nrow()
+  )
+})
+
+test_that("tbl_filter.tbl_hierarchical(eq) works", {
+  # gt = TRUE, eq = FALSE
+  expect_silent(tbl_gt <- tbl_filter(tbl, t = 12))
+
+  # gt = TRUE, eq = TRUE
+  expect_silent(tbl_geq <- tbl_filter(tbl, t = 12, eq = TRUE))
+  expect_gt(nrow(tbl_geq$table_body), nrow(tbl_gt$table_body))
+
+  # gt = FALSE, eq = FALSE
+  expect_silent(tbl_lt <- tbl_filter(tbl, t = 12, gt = FALSE))
+
+  # gt = TRUE, eq = TRUE
+  expect_silent(tbl_leq <- tbl_filter(tbl, t = 12, gt = FALSE, eq = TRUE))
+  expect_lt(nrow(tbl_lt$table_body), nrow(tbl_leq$table_body))
+})
+
+test_that("tbl_filter.tbl_hierarchical() returns empty table when all rows filtered out", {
+  expect_silent(tbl <- tbl_filter(tbl, t = 200))
+  expect_equal(nrow(tbl$table_body), 0)
+})
+
+test_that("tbl_filter.tbl_hierarchical() works with only one variable in x", {
+  tbl_single <- tbl_hierarchical(
+    data = ADAE_subset,
+    variables = AETERM,
+    by = TRTA,
+    denominator = cards::ADSL |> mutate(TRTA = ARM),
+    id = USUBJID,
+    overall_row = TRUE
+  )
+
+  expect_silent(tbl_single <- tbl_filter(tbl_single, t = 20))
+  expect_equal(nrow(tbl_single$table_body), 4)
+})
+
+test_that("tbl_filter.tbl_hierarchical() works when some variables not included in x", {
+  tbl <- tbl_hierarchical(
+    data = ADAE_subset,
+    variables = c(SEX, RACE, AETERM),
+    by = TRTA,
+    denominator = cards::ADSL |> mutate(TRTA = ARM),
+    id = USUBJID,
+    include = c(SEX, AETERM),
+    overall_row = TRUE
+  )
+
+  expect_message(tbl_filter(tbl, t = 10))
+})
+
+test_that("tbl_filter.tbl_hierarchical() error messaging works", {
+  # invalid x input
+  expect_snapshot(
+    tbl_filter(data.frame(), t = 10),
+    error = TRUE
+  )
+
+  # invalid t input
+  expect_snapshot(
+    tbl_filter(tbl, t = "10"),
+    error = TRUE
+  )
+
+  # invalid gt input
+  expect_snapshot(
+    tbl_filter(tbl, t = "10", gt = "yes"),
+    error = TRUE
+  )
+
+  # invalid eq input
+  expect_snapshot(
+    tbl_filter(tbl, t = "10", eq = "no"),
+    error = TRUE
+  )
+
+  # invalid .stat input
+  expect_snapshot(
+    tbl_filter(tbl, t = "10", .stat = "pct"),
+    error = TRUE
+  )
+})
diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-sort_tbl_hierarchical.R
index bc69937aa..7c61ea284 100644
--- a/tests/testthat/test-sort_tbl_hierarchical.R
+++ b/tests/testthat/test-sort_tbl_hierarchical.R
@@ -14,6 +14,7 @@ tbl <- tbl_hierarchical(
 
 test_that("tbl_sort.tbl_hierarchical() works", {
   withr::local_options(width = 200)
+
   # no errors
   expect_silent(tbl <- tbl_sort(tbl))
   expect_snapshot(tbl |> as.data.frame())
@@ -36,7 +37,6 @@ test_that("tbl_sort.tbl_hierarchical(sort = 'frequency') works", {
       dplyr::filter(variable == "RACE") |>
       dplyr::pull(label),
     c("WHITE", "BLACK OR AFRICAN AMERICAN", "WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE")
-
   )
   expect_equal(
     tbl$table_body |>
@@ -98,7 +98,7 @@ test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", {
       dplyr::pull(label),
     c(
       "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS",
-      "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA","APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA",
+      "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA",
       "DIARRHOEA", "APPLICATION SITE PRURITUS", "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA",
       "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS"
     )
@@ -178,7 +178,7 @@ test_that("tbl_sort.tbl_hierarchical() works when some variables not included in
     by = TRTA,
     denominator = cards::ADSL |> mutate(TRTA = ARM),
     id = USUBJID,
-    include =  c(SEX, AETERM),
+    include = c(SEX, AETERM),
     overall_row = TRUE
   )
 

From cd484f6faaab2c6e9171fc98359f1bbdaa580f9a Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Mon, 9 Dec 2024 16:22:51 -0500
Subject: [PATCH 10/25] Fix test

---
 tests/testthat/test-filter_tbl_hierarchical.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/testthat/test-filter_tbl_hierarchical.R b/tests/testthat/test-filter_tbl_hierarchical.R
index 0542df4bb..13e0906cc 100644
--- a/tests/testthat/test-filter_tbl_hierarchical.R
+++ b/tests/testthat/test-filter_tbl_hierarchical.R
@@ -28,7 +28,7 @@ test_that("tbl_filter.tbl_hierarchical(gt) works", {
   expect_silent(tbl_gt <- tbl_filter(tbl, t = 10))
 
   # gt = FALSE
-  expect_silent(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE))
+  expect_message(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE))
 
   expect_equal(
     dplyr::inner_join(

From 0fa9fe87b6dc8da7f54b42aeb68dee7a69c552bb Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 28 Jan 2025 19:04:07 -0500
Subject: [PATCH 11/25] Update sort parameters

---
 R/sort_tbl_hierarchical.R                     | 48 +++++++----------
 .../testthat/_snaps/sort_tbl_hierarchical.md  |  2 +-
 tests/testthat/test-sort_tbl_hierarchical.R   | 52 +------------------
 3 files changed, 21 insertions(+), 81 deletions(-)

diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
index 623ac1b7f..7494da717 100644
--- a/R/sort_tbl_hierarchical.R
+++ b/R/sort_tbl_hierarchical.R
@@ -4,20 +4,17 @@
 #'
 #' This function is used to sort hierarchical tables. Options for sorting criteria are:
 #'
-#' 1. Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
-#'    ordered accordingly (default).
+#' 1. Descending - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
+#'    sorted in descending order by sum (default).
 #' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables
 #'    in ascending alphanumeric order (i.e. A to Z).
 #'
 #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
 #'   A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
 #' @param sort (`string`)\cr
-#'   Specifies sorting to perform. Values must be one of `c("frequency", "alphanumeric")`. Default is `"frequency"`.
-#' @param desc (scalar `logical`)\cr
-#'   Whether to sort rows in ascending or descending order. Default is descending (`TRUE`) when `sort = "frequency"`
-#'   and ascending (`FALSE`) when `sort = "alphanumeric"`.
+#'   Specifies sorting to perform. Values must be one of `c("alphanumeric", "descending")`. Default is `"descending"`.
 #' @param .stat (`string`)\cr
-#'   Statistic to use to calculate row sums when `sort = "frequency"`. This statistic must be present in the table for
+#'   Statistic to use to calculate row sums when `sort = "descending"`. This statistic must be present in the table for
 #'   all hierarchy levels. Default is `"n"`.
 #' @inheritParams rlang::args_dots_empty
 #'
@@ -40,10 +37,12 @@
 #' )
 #'
 #' # Example 1 - Descending Frequency Sort ------------------
-#' tbl_sort(tbl)
+#' tbl <- tbl_sort(tbl)
+#' tbl
 #'
-#' # Example 2 - Descending Alphanumeric Sort (Z to A) ------
-#' tbl_sort(tbl, sort = "alphanumeric", desc = TRUE)
+#' # Example 2 - Alphanumeric Sort --------------------------
+#' tbl <- tbl_sort(tbl, sort = "alphanumeric")
+#' tbl
 NULL
 
 #' @rdname sort_tbl_hierarchical
@@ -57,16 +56,15 @@ tbl_sort <- function(x, ...) {
 
 #' @rdname sort_tbl_hierarchical
 #' @export
-tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...) {
+tbl_sort.tbl_hierarchical <- function(x, sort = "descending", .stat = "n", ...) {
   set_cli_abort_call()
 
   # process and check inputs ----------------------------------------------------------------------
-  check_scalar_logical(desc)
   check_string(.stat)
 
-  if (!sort %in% c("frequency", "alphanumeric")) {
+  if (!sort %in% c("descending", "alphanumeric")) {
     cli::cli_abort(
-      "The {.arg sort} argument must be either {.val frequency} or {.val alphanumeric}.",
+      "The {.arg sort} argument must be either {.val descending} or {.val alphanumeric}.",
       call = get_cli_abort_call()
     )
   }
@@ -81,16 +79,10 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "fr
     x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
   )
 
-  if (sort == "alphanumeric") {
-    # summary rows remain at the top of each sub-section
-    rep_str <- if (desc) "zzzz" else " "
-
-    # overall row always appears first
-    if (desc && overall) {
-      ovrl_row <- x$table_body[1, ]
-      x$table_body <- x$table_body[-1, ]
-    }
+  # keep summary rows at the top of each sub-section
+  rep_str <- " "
 
+  if (sort == "alphanumeric") {
     # sort by label -------------------------------------------------------------------------------
     sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label")
 
@@ -99,11 +91,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "fr
       dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |>
       dplyr::ungroup() |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |>
-      dplyr::arrange(across(all_of(sort_cols), ~ if (desc) dplyr::desc(.x) else .x)) |>
+      dplyr::arrange(across(all_of(sort_cols), ~ .x)) |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |>
       select(-"inner_var")
-
-    if (desc && overall) x$table_body <- dplyr::bind_rows(ovrl_row, x$table_body)
   } else {
     # get row sums --------------------------------------------------------------------------------
     x <- .append_hierarchy_row_sums(x, .stat)
@@ -122,9 +112,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "fr
     # summary rows remain at the top of each sub-section
     x$table_body <- x$table_body |>
       dplyr::ungroup() |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., " "))) |>
+      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |>
       dplyr::rowwise() |>
-      dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) " " else .data$variable) |>
+      dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |>
       dplyr::ungroup()
 
     # sort by row sum -----------------------------------------------------------------------------
@@ -135,7 +125,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "frequency", desc = (sort == "fr
     ), "inner_var", "sum_row", "label")
 
     x$table_body <- x$table_body |>
-      dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x) && desc) dplyr::desc(.x) else .x)) |>
+      dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x)) dplyr::desc(.x) else .x)) |>
       dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |>
       select(-starts_with("sum_"), -"inner_var")
   }
diff --git a/tests/testthat/_snaps/sort_tbl_hierarchical.md b/tests/testthat/_snaps/sort_tbl_hierarchical.md
index 99a9f5f65..07ff271ff 100644
--- a/tests/testthat/_snaps/sort_tbl_hierarchical.md
+++ b/tests/testthat/_snaps/sort_tbl_hierarchical.md
@@ -44,7 +44,7 @@
       tbl_sort(tbl, sort = "no_sorting")
     Condition
       Error in `tbl_sort()`:
-      ! The `sort` argument must be either "frequency" or "alphanumeric".
+      ! The `sort` argument must be either "descending" or "alphanumeric".
 
 ---
 
diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-sort_tbl_hierarchical.R
index 7c61ea284..80e7068ab 100644
--- a/tests/testthat/test-sort_tbl_hierarchical.R
+++ b/tests/testthat/test-sort_tbl_hierarchical.R
@@ -49,60 +49,10 @@ test_that("tbl_sort.tbl_hierarchical(sort = 'frequency') works", {
       "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "ERYTHEMA"
     )
   )
-
-  # ascending frequency
-  expect_silent(tbl <- tbl_sort(tbl, desc = FALSE))
-  expect_equal(
-    tbl$table_body |>
-      dplyr::filter(variable == "SEX") |>
-      dplyr::pull(label),
-    c("F", "M") # F and M have equal numbers
-  )
-  expect_equal(
-    tbl$table_body |>
-      dplyr::filter(variable == "RACE") |>
-      dplyr::pull(label),
-    c("BLACK OR AFRICAN AMERICAN", "WHITE", "AMERICAN INDIAN OR ALASKA NATIVE", "BLACK OR AFRICAN AMERICAN", "WHITE")
-  )
-  expect_equal(
-    tbl$table_body |>
-      dplyr::filter(variable == "AETERM") |>
-      dplyr::pull(label),
-    c(
-      "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", "APPLICATION SITE PRURITUS", "DIARRHOEA",
-      "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA", "APPLICATION SITE PRURITUS",
-      "DIARRHOEA", "ERYTHEMA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA",
-      "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS"
-    )
-  )
 })
 
 test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", {
-  # descending (Z to A)
-  expect_silent(result <- tbl_sort(tbl, sort = "alphanumeric", desc = TRUE))
-  expect_equal(
-    result$table_body |>
-      dplyr::filter(variable == "SEX") |>
-      dplyr::pull(label),
-    c("M", "F")
-  )
-  expect_equal(
-    result$table_body |>
-      dplyr::filter(variable == "RACE") |>
-      dplyr::pull(label),
-    c("WHITE", "BLACK OR AFRICAN AMERICAN", "AMERICAN INDIAN OR ALASKA NATIVE", "WHITE", "BLACK OR AFRICAN AMERICAN")
-  )
-  expect_equal(
-    result$table_body |>
-      dplyr::filter(variable == "AETERM") |>
-      dplyr::pull(label),
-    c(
-      "ERYTHEMA", "DIARRHOEA", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS",
-      "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA", "APPLICATION SITE PRURITUS", "ERYTHEMA", "ERYTHEMA",
-      "DIARRHOEA", "APPLICATION SITE PRURITUS", "APPLICATION SITE ERYTHEMA", "ERYTHEMA", "DIARRHOEA",
-      "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "APPLICATION SITE PRURITUS"
-    )
-  )
+  expect_silent(result <- tbl_sort(tbl))
 
   # ascending (A to Z)
   expect_silent(result <- tbl_sort(result, sort = "alphanumeric"))

From 86b9898e6bf84c0f687767945cf111f1090a05e3 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 12 Feb 2025 20:42:47 -0500
Subject: [PATCH 12/25] testing

---
 R/sort_tbl_hierarchical.R    | 189 +++++------------------------------
 R/tbl_hierarchical.R         |  12 ++-
 man/sort_tbl_hierarchical.Rd |  34 ++++---
 3 files changed, 56 insertions(+), 179 deletions(-)

diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
index 7494da717..1d0449ca1 100644
--- a/R/sort_tbl_hierarchical.R
+++ b/R/sort_tbl_hierarchical.R
@@ -4,18 +4,14 @@
 #'
 #' This function is used to sort hierarchical tables. Options for sorting criteria are:
 #'
-#' 1. Descending - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
+#' 1. Descending - within each section of the hierarchy table, count sums are calculated for each row and rows are
 #'    sorted in descending order by sum (default).
 #' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables
 #'    in ascending alphanumeric order (i.e. A to Z).
 #'
 #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
-#'   A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
-#' @param sort (`string`)\cr
-#'   Specifies sorting to perform. Values must be one of `c("alphanumeric", "descending")`. Default is `"descending"`.
-#' @param .stat (`string`)\cr
-#'   Statistic to use to calculate row sums when `sort = "descending"`. This statistic must be present in the table for
-#'   all hierarchy levels. Default is `"n"`.
+#'   a hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
+#' @inheritParams cards::ard_sort
 #' @inheritParams rlang::args_dots_empty
 #'
 #' @return A `gtsummary` of the same class as `x`.
@@ -37,7 +33,7 @@
 #' )
 #'
 #' # Example 1 - Descending Frequency Sort ------------------
-#' tbl <- tbl_sort(tbl)
+#' tbl <- tbl_sort(tbl, sort = "descending")
 #' tbl
 #'
 #' # Example 2 - Alphanumeric Sort --------------------------
@@ -56,170 +52,37 @@ tbl_sort <- function(x, ...) {
 
 #' @rdname sort_tbl_hierarchical
 #' @export
-tbl_sort.tbl_hierarchical <- function(x, sort = "descending", .stat = "n", ...) {
+tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
   set_cli_abort_call()
 
-  # process and check inputs ----------------------------------------------------------------------
-  check_string(.stat)
+  ard_args <- attributes(x$cards$tbl_hierarchical)$args
+  by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
 
-  if (!sort %in% c("descending", "alphanumeric")) {
-    cli::cli_abort(
-      "The {.arg sort} argument must be either {.val descending} or {.val alphanumeric}.",
-      call = get_cli_abort_call()
-    )
-  }
-
-  overall <- "..ard_hierarchical_overall.." %in% x$table_body$variable
-  outer_cols <- sapply(
-    x$table_body |> select(cards::all_ard_groups("names")),
-    function(x) dplyr::last(unique(stats::na.omit(x)))
-  )
-  inner_col <- setdiff(
-    x$table_body$variable,
-    x$table_body |> select(cards::all_ard_groups("names")) |> unlist() |> unique()
-  )
-
-  # keep summary rows at the top of each sub-section
-  rep_str <- " "
-
-  if (sort == "alphanumeric") {
-    # sort by label -------------------------------------------------------------------------------
-    sort_cols <- c(x$table_body |> select(cards::all_ard_groups("levels")) |> names(), "inner_var", "label")
-
-    x$table_body <- x$table_body |>
-      dplyr::rowwise() |>
-      dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |>
-      dplyr::ungroup() |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |>
-      dplyr::arrange(across(all_of(sort_cols), ~ .x)) |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., paste0("^", rep_str, "$"), NA))) |>
-      select(-"inner_var")
-  } else {
-    # get row sums --------------------------------------------------------------------------------
-    x <- .append_hierarchy_row_sums(x, .stat)
-
-    # append outer hierarchy level sums in each row to sort at all levels -------------------------
-    for (g in names(outer_cols)) {
-      x$table_body <- x$table_body |> dplyr::group_by(across(all_of(c(g, paste0(g, "_level")))), .add = TRUE)
-      x$table_body <- x$table_body |>
-        dplyr::left_join(
-          x$table_body |>
-            dplyr::summarize(!!paste0("sum_", g) := dplyr::first(.data$sum_row)),
-          by = x$table_body |> dplyr::group_vars()
-        )
-    }
-
-    # summary rows remain at the top of each sub-section
-    x$table_body <- x$table_body |>
-      dplyr::ungroup() |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ tidyr::replace_na(., rep_str))) |>
-      dplyr::rowwise() |>
-      dplyr::mutate(inner_var = if (!.data$variable %in% inner_col) rep_str else .data$variable) |>
-      dplyr::ungroup()
-
-    # sort by row sum -----------------------------------------------------------------------------
-    sort_cols <- c(rbind(
-      x$table_body |> select(cards::all_ard_groups("names")) |> names(),
-      x$table_body |> select(starts_with("sum_group")) |> names(),
-      x$table_body |> select(cards::all_ard_groups("levels")) |> names()
-    ), "inner_var", "sum_row", "label")
-
-    x$table_body <- x$table_body |>
-      dplyr::arrange(across(all_of(sort_cols), ~ if (is.numeric(.x)) dplyr::desc(.x) else .x)) |>
-      dplyr::mutate(across(cards::all_ard_groups(), .fns = ~ str_replace(., "^ $", NA))) |>
-      select(-starts_with("sum_"), -"inner_var")
-  }
-
-  x
-}
-
-.append_hierarchy_row_sums <- function(x, .stat) {
-  cards <- x$cards$tbl_hierarchical
-
-  if (!.stat %in% cards$stat_name) {
-    cli::cli_abort(
-      "The {.arg .stat} argument is {.val {(.stat)}} but this statistic is not present in {.arg x}. For all valid
-      statistic options see the {.val stat_name} column of {.code x$cards$tbl_hierarchical}.",
-      call = get_cli_abort_call()
-    )
-  }
-
-  by_cols <- if (ncol(x$table_body |> select(starts_with("stat_"))) > 1) c("group1", "group1_level") else NA
-  outer_cols <- sapply(
-    x$table_body |> select(cards::all_ard_groups("names")),
-    function(x) dplyr::last(unique(stats::na.omit(x)))
-  )
-
-  # update logical variable_level entries from overall row to character
-  cards$variable_level[cards$variable == "..ard_hierarchical_overall.."] <- x$table_body |>
-    dplyr::filter(.data$variable == "..ard_hierarchical_overall..") |>
-    dplyr::pull("label") |>
-    as.list()
-
-  # extract row sums ------------------------------------------------------------------------------
-  cards <- cards |>
-    dplyr::filter(.data$stat_name == .stat, .data$variable %in% x$table_body$variable) |>
+  # remove rows from ARD that do not correspond to a table row, append indices
+  x_sort_match <- x$cards$tbl_hierarchical |>
+    dplyr::filter(!is.na(group1)) |>
     dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
-    dplyr::summarise(sum_row = sum(unlist(.data$stat))) |>
+    dplyr::mutate(idx_sort = dplyr::cur_group_id()) |>
     dplyr::ungroup() |>
-    dplyr::rename(label = "variable_level") |>
-    tidyr::unnest(cols = everything())
-
-  # match cards names to x$table_body -------------------------------------------------------------
-  if (length(by_cols) > 1) {
-    names(cards)[grep("group", names(cards))] <- x$table_body |>
-      select(cards::all_ard_groups()) |>
-      names()
-  }
-  cards[cards$variable == "..ard_hierarchical_overall..", 1] <- "..ard_hierarchical_overall.."
-
-  # fill in NAs to align cards layout with x$table_body -------------------------------------------
-  cards <- cards |>
-    dplyr::rowwise() |>
-    dplyr::mutate(across(
-      cards::all_ard_groups(),
-      ~ if (is.na(.x) && !grepl("_level", dplyr::cur_column()) && .data$variable == outer_cols[dplyr::cur_column()]) {
-        .data$variable
-      } else if (is.na(.x) && .data$variable %in% outer_cols[gsub("_level", "", dplyr::cur_column())]) {
-        .data$label
-      } else {
-        .x
-      }
-    ))
+    cards::as_card()
+  attr(x_sort_match, "args") <- ard_args
 
-  # for any variables not in include, calculate group sums ----------------------------------------
-  if (!all(outer_cols %in% cards$variable)) {
-    gp_vars <- outer_cols[outer_cols %in% setdiff(outer_cols, cards$variable)]
-    gp_cols <- names(gp_vars)
+  # pull indices each corresponding to one row of x$table_body
+  pre_idx <- x_sort_match |>
+    dplyr::pull("idx_sort") |>
+    unique()
 
-    cli::cli_inform(
-      "Not all hierarchy variables present in the table were included in the {.arg include} argument.
-      These variables ({gp_vars}) do not have event rate data available so the total sum of the event
-      rates for this hierarchy section will be used instead. To use event rates for all sections of the table,
-      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
-    )
+  # pull updated index order after sorting
+  post_idx <- x_sort_match |>
+    cards::ard_sort(sort) |>
+    dplyr::pull("idx_sort") |>
+    unique()
 
-    for (i in seq_along(gp_cols)) {
-      cards <- cards |>
-        dplyr::bind_rows(
-          cards |>
-            dplyr::filter(.data$variable != "..ard_hierarchical_overall..") |>
-            dplyr::group_by(across(c(gp_cols[1:i], paste0(gp_cols[1:i], "_level")))) |>
-            dplyr::summarize(sum_row = sum(.data$sum_row)) |>
-            dplyr::mutate(
-              variable = .data[[gp_cols[i]]],
-              label = .data[[paste0(gp_cols[i], "_level")]]
-            )
-        )
-    }
-  }
+  # update x$cards
+  x$cards$tbl_hierarchical <- x$cards$tbl_hierarchical |> cards::ard_sort(sort)
 
-  # append row sums to x$table_body ---------------------------------------------------------------
-  x$table_body <- x$table_body |>
-    dplyr::left_join(
-      cards,
-      by = c(cards |> select(-"sum_row") |> names())
-    )
+  # update x$table_body according to updated (relative) row positions
+  x$table_body <- x$table_body[sapply(post_idx, function(x) which(pre_idx == x)), ]
 
   x
 }
diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R
index 908242541..03dfe9e29 100644
--- a/R/tbl_hierarchical.R
+++ b/R/tbl_hierarchical.R
@@ -326,6 +326,9 @@ internal_tbl_hierarchical <- function(data,
   # add the gtsummary column names to ARD data frame ---------------------------
   cards <- .add_gts_column_to_cards_hierarchical(cards, variables, by)
 
+  # sort ARD alphanumerically (the default table layout)
+  cards <- cards |> cards::ard_sort("alphanumeric")
+
   # call bridge function here
   brdg_hierarchical(
     cards = cards,
@@ -416,6 +419,8 @@ internal_tbl_hierarchical <- function(data,
 }
 
 .add_gts_column_to_cards_hierarchical <- function(cards, variables, by) {
+  args <- attributes(cards)$args
+
   # adding the name of the column the stats will populate
   if (is_empty(by)) {
     cards$gts_column <-
@@ -435,7 +440,12 @@ internal_tbl_hierarchical <- function(data,
       dplyr::mutate(gts_column = paste0("stat_", dplyr::cur_group_id()))
   }
 
-  cards |>
+  cards <- cards |>
     dplyr::ungroup() |>
     cards::as_card()
+
+  # re-add dropped args attribute
+  attr(cards, "args") <- args
+
+  cards
 }
diff --git a/man/sort_tbl_hierarchical.Rd b/man/sort_tbl_hierarchical.Rd
index 45c165614..0e61cb7d4 100644
--- a/man/sort_tbl_hierarchical.Rd
+++ b/man/sort_tbl_hierarchical.Rd
@@ -8,24 +8,26 @@
 \usage{
 tbl_sort(x, ...)
 
-\method{tbl_sort}{tbl_hierarchical}(x, sort = "frequency", desc = (sort == "frequency"), .stat = "n", ...)
+\method{tbl_sort}{tbl_hierarchical}(x, sort = "descending", ...)
 }
 \arguments{
 \item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr
-A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.}
+a hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.}
 
 \item{...}{These dots are for future extensions and must be empty.}
 
 \item{sort}{(\code{string})\cr
-Specifies sorting to perform. Values must be one of \code{c("frequency", "alphanumeric")}. Default is \code{"frequency"}.}
-
-\item{desc}{(scalar \code{logical})\cr
-Whether to sort rows in ascending or descending order. Default is descending (\code{TRUE}) when \code{sort = "frequency"}
-and ascending (\code{FALSE}) when \code{sort = "alphanumeric"}.}
+type of sorting to perform. Value must be one of:
+\itemize{
+\item \code{"alphanumeric"} - within each hierarchical section of the ARD, rows are ordered alphanumerically (i.e. A to Z)
+by \code{variable_label} text.
+\item \code{"descending"} - within each hierarchical section of the ARD, count sums are calculated for each row and rows are
+sorted in descending order by sum. If \code{sort = "descending"}, the \code{n} statistic is used to calculate row sums if
+included in \code{statistic} for all variables, otherwise \code{p} is used. If neither \code{n} nor \code{p} are present in \code{x} for
+all variables, an error will occur.
+}
 
-\item{.stat}{(\code{string})\cr
-Statistic to use to calculate row sums when \code{sort = "frequency"}. This statistic must be present in the table for
-all hierarchy levels. Default is \code{"n"}.}
+Defaults to \code{"descending"}.}
 }
 \value{
 A \code{gtsummary} of the same class as \code{x}.
@@ -35,8 +37,8 @@ A \code{gtsummary} of the same class as \code{x}.
 
 This function is used to sort hierarchical tables. Options for sorting criteria are:
 \enumerate{
-\item Frequency - within each section of the hierarchy table, frequency sums are calculated for each row and rows are
-ordered accordingly (default).
+\item Descending - within each section of the hierarchy table, count sums are calculated for each row and rows are
+sorted in descending order by sum (default).
 \item Alphanumeric - rows are ordered alphanumerically by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}} sorts tables
 in ascending alphanumeric order (i.e. A to Z).
 }
@@ -56,10 +58,12 @@ tbl <- tbl_hierarchical(
 )
 
 # Example 1 - Descending Frequency Sort ------------------
-tbl_sort(tbl)
+tbl <- tbl_sort(tbl, sort = "descending")
+tbl
 
-# Example 2 - Descending Alphanumeric Sort (Z to A) ------
-tbl_sort(tbl, sort = "alphanumeric", desc = TRUE)
+# Example 2 - Alphanumeric Sort --------------------------
+tbl <- tbl_sort(tbl, sort = "alphanumeric")
+tbl
 \dontshow{\}) # examplesIf}
 }
 \seealso{

From cd204108278842a5131bcc323b4443e2204b919e Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 12 Feb 2025 22:05:03 -0500
Subject: [PATCH 13/25] Finish sort function, update tests

---
 R/sort_tbl_hierarchical.R                     |  88 ----------
 R/tbl_sort.R                                  | 162 ++++++++++++++++++
 man/{sort_tbl_hierarchical.Rd => tbl_sort.Rd} |  13 +-
 .../{sort_tbl_hierarchical.md => tbl_sort.md} |  24 ---
 ...ort_tbl_hierarchical.R => test-tbl_sort.R} |  20 ---
 5 files changed, 168 insertions(+), 139 deletions(-)
 delete mode 100644 R/sort_tbl_hierarchical.R
 create mode 100644 R/tbl_sort.R
 rename man/{sort_tbl_hierarchical.Rd => tbl_sort.Rd} (83%)
 rename tests/testthat/_snaps/{sort_tbl_hierarchical.md => tbl_sort.md} (87%)
 rename tests/testthat/{test-sort_tbl_hierarchical.R => test-tbl_sort.R} (91%)

diff --git a/R/sort_tbl_hierarchical.R b/R/sort_tbl_hierarchical.R
deleted file mode 100644
index 1d0449ca1..000000000
--- a/R/sort_tbl_hierarchical.R
+++ /dev/null
@@ -1,88 +0,0 @@
-#' Sort Hierarchical Tables
-#'
-#' @description `r lifecycle::badge('experimental')`\cr
-#'
-#' This function is used to sort hierarchical tables. Options for sorting criteria are:
-#'
-#' 1. Descending - within each section of the hierarchy table, count sums are calculated for each row and rows are
-#'    sorted in descending order by sum (default).
-#' 2. Alphanumeric - rows are ordered alphanumerically by label text. By default, [tbl_hierarchical()] sorts tables
-#'    in ascending alphanumeric order (i.e. A to Z).
-#'
-#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
-#'   a hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
-#' @inheritParams cards::ard_sort
-#' @inheritParams rlang::args_dots_empty
-#'
-#' @return A `gtsummary` of the same class as `x`.
-#'
-#' @name sort_tbl_hierarchical
-#' @seealso [tbl_filter()]
-#'
-#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))
-#' ADAE_subset <- cards::ADAE |>
-#'   dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5])
-#'
-#' tbl <- tbl_hierarchical(
-#'   data = ADAE_subset,
-#'   variables = c(SEX, RACE, AETERM),
-#'   by = TRTA,
-#'   denominator = cards::ADSL |> mutate(TRTA = ARM),
-#'   id = USUBJID,
-#'   overall_row = TRUE
-#' )
-#'
-#' # Example 1 - Descending Frequency Sort ------------------
-#' tbl <- tbl_sort(tbl, sort = "descending")
-#' tbl
-#'
-#' # Example 2 - Alphanumeric Sort --------------------------
-#' tbl <- tbl_sort(tbl, sort = "alphanumeric")
-#' tbl
-NULL
-
-#' @rdname sort_tbl_hierarchical
-#' @export
-tbl_sort <- function(x, ...) {
-  check_not_missing(x)
-  check_class(x, "gtsummary")
-
-  UseMethod("tbl_sort")
-}
-
-#' @rdname sort_tbl_hierarchical
-#' @export
-tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
-  set_cli_abort_call()
-
-  ard_args <- attributes(x$cards$tbl_hierarchical)$args
-  by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
-
-  # remove rows from ARD that do not correspond to a table row, append indices
-  x_sort_match <- x$cards$tbl_hierarchical |>
-    dplyr::filter(!is.na(group1)) |>
-    dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
-    dplyr::mutate(idx_sort = dplyr::cur_group_id()) |>
-    dplyr::ungroup() |>
-    cards::as_card()
-  attr(x_sort_match, "args") <- ard_args
-
-  # pull indices each corresponding to one row of x$table_body
-  pre_idx <- x_sort_match |>
-    dplyr::pull("idx_sort") |>
-    unique()
-
-  # pull updated index order after sorting
-  post_idx <- x_sort_match |>
-    cards::ard_sort(sort) |>
-    dplyr::pull("idx_sort") |>
-    unique()
-
-  # update x$cards
-  x$cards$tbl_hierarchical <- x$cards$tbl_hierarchical |> cards::ard_sort(sort)
-
-  # update x$table_body according to updated (relative) row positions
-  x$table_body <- x$table_body[sapply(post_idx, function(x) which(pre_idx == x)), ]
-
-  x
-}
diff --git a/R/tbl_sort.R b/R/tbl_sort.R
new file mode 100644
index 000000000..ecafd15a6
--- /dev/null
+++ b/R/tbl_sort.R
@@ -0,0 +1,162 @@
+#' Sort Hierarchical Tables
+#'
+#' @description `r lifecycle::badge('experimental')`\cr
+#'
+#' This function is used to sort hierarchical tables. Options for sorting criteria are:
+#'
+#' 1. Descending - within each section of the hierarchy table, event rate sums are calculated for each row and rows are
+#'    sorted in descending order by sum (default).
+#' 2. Alphanumeric - rows are ordered alphanumerically (i.e. A to Z) by label text. By default, [tbl_hierarchical()]
+#'    sorts tables in alphanumeric order.
+#'
+#' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
+#'   a hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
+#' @inheritParams cards::ard_sort
+#' @inheritParams rlang::args_dots_empty
+#'
+#' @return A `gtsummary` of the same class as `x`.
+#'
+#' @name tbl_sort
+#' @seealso [tbl_filter()]
+#'
+#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))
+#' ADAE_subset <- cards::ADAE |>
+#'   dplyr::filter(AETERM %in% unique(cards::ADAE$AETERM)[1:5])
+#'
+#' tbl <- tbl_hierarchical(
+#'   data = ADAE_subset,
+#'   variables = c(SEX, RACE, AETERM),
+#'   by = TRTA,
+#'   denominator = cards::ADSL |> mutate(TRTA = ARM),
+#'   id = USUBJID,
+#'   overall_row = TRUE
+#' )
+#'
+#' # Example 1 - Descending Frequency Sort ------------------
+#' tbl <- tbl_sort(tbl)
+#' tbl
+#'
+#' # Example 2 - Alphanumeric Sort --------------------------
+#' tbl <- tbl_sort(tbl, sort = "alphanumeric")
+#' tbl
+NULL
+
+#' @rdname tbl_sort
+#' @export
+tbl_sort <- function(x, ...) {
+  check_not_missing(x)
+  check_class(x, "gtsummary")
+
+  UseMethod("tbl_sort")
+}
+
+#' @rdname tbl_sort
+#' @export
+tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
+  set_cli_abort_call()
+
+  ard_args <- attributes(x$cards$tbl_hierarchical)$args
+  by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
+  x_ard <- x$cards$tbl_hierarchical
+
+  # add dummy rows for variables not in include so their label rows are sorted correctly
+  not_incl <- setdiff(ard_args$variables, ard_args$include)
+  if (length(not_incl) > 0) {
+    cli::cli_inform(
+      "Not all hierarchy variables present in the table were included in the {.arg include} argument.
+      These variables ({not_incl}) do not have event rate data available so the total sum of the event rates
+      for this hierarchy section will be used instead. To use true event rates for all sections of the table,
+      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
+    )
+
+    x_ard <- x_ard |> mutate(idx_o = seq_len(nrow(x$cards$tbl_hierarchical)))
+    for (v in not_incl) {
+      i <- length(ard_args$by) + which(ard_args$variables == v)
+      x_sum_rows <- x_ard |>
+        dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |>
+        dplyr::group_map(function(.df, .g) {
+          # get pseudo-summary row stat value for descending sort
+          if (sort == "descending") {
+            stat_nm <- setdiff(.df$stat_name, "N")[1]
+            sum <- .df |>
+              dplyr::filter(stat_name == !!stat_nm) |>
+              dplyr::summarize(s = sum(unlist(stat))) |>
+              dplyr::pull(s)
+          }
+          g_cur <- .g[[ncol(.g) - 1]]
+          if (!is.na(g_cur) && g_cur == v) {
+            # dummy summary row to add in
+            .df[1, ] |> mutate(
+              variable = g_cur,
+              variable_level = .g[[ncol(.g)]],
+              stat_name = if (sort == "descending") stat_nm else "no_stat",
+              stat = if (sort == "descending") list(sum) else list(0),
+              idx_o = min(.df$idx_o) + 1,
+              tmp = TRUE
+            )
+          } else {
+            NULL
+          }
+        }, .keep = TRUE)
+      sum_row_pos <- dplyr::bind_rows(x_sum_rows) |> dplyr::pull(idx_o)
+      # adjust prior row indices to add in dummy summary rows
+      x_ard <- x_ard |>
+        dplyr::bind_rows(x_sum_rows) |>
+        dplyr::rowwise() |>
+        mutate(idx_o = .data$idx_o + sum(sum_row_pos > .data$idx_o)) |>
+        dplyr::ungroup()
+    }
+    x_ard <- x_ard |>
+      dplyr::arrange(idx_o, tmp) |>
+      select(-"idx_o")
+  }
+
+  # add indices to ARD
+  x_ard <- x_ard |>
+    dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
+    dplyr::mutate(idx_sort = dplyr::cur_group_id()) |>
+    dplyr::ungroup()
+
+  # re-add dropped args attribute
+  x_ard <- x_ard |> cards::as_card()
+  attr(x_ard, "args") <- ard_args
+
+  # get `by` variable count rows (do not correspond to a table row)
+  rm_idx <- x_ard |>
+    dplyr::filter(is.na(group1)) |>
+    dplyr::pull("idx_sort") |>
+    unique()
+
+  # pull index order (each corresponding to one row of x$table_body)
+  pre_sort_idx <- x_ard |>
+    dplyr::pull("idx_sort") |>
+    unique() |>
+    setdiff(rm_idx) |>
+    as.character()
+
+  # apply sorting
+  x_ard_sort <- x_ard |> cards::ard_sort(sort)
+
+  # pull updated index order after sorting
+  post_sort_idx <- x_ard_sort |>
+    dplyr::pull("idx_sort") |>
+    unique() |>
+    setdiff(rm_idx) |>
+    as.character()
+
+  # get updated (relative) row positions
+  idx <- (seq_len(length(pre_sort_idx)) |> stats::setNames(pre_sort_idx))[post_sort_idx]
+
+  # update x$cards
+  if ("tmp" %in% names(x_ard_sort)) {
+    x_ard_sort <- x_ard_sort |>
+      dplyr::filter(is.na(tmp)) |>
+      select(-"tmp")
+  }
+  x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_sort")
+
+  # update x$table_body
+  x$table_body <- x$table_body[idx, ]
+
+  x
+}
diff --git a/man/sort_tbl_hierarchical.Rd b/man/tbl_sort.Rd
similarity index 83%
rename from man/sort_tbl_hierarchical.Rd
rename to man/tbl_sort.Rd
index 0e61cb7d4..12d7fe3b1 100644
--- a/man/sort_tbl_hierarchical.Rd
+++ b/man/tbl_sort.Rd
@@ -1,7 +1,6 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/sort_tbl_hierarchical.R
-\name{sort_tbl_hierarchical}
-\alias{sort_tbl_hierarchical}
+% Please edit documentation in R/tbl_sort.R
+\name{tbl_sort}
 \alias{tbl_sort}
 \alias{tbl_sort.tbl_hierarchical}
 \title{Sort Hierarchical Tables}
@@ -37,10 +36,10 @@ A \code{gtsummary} of the same class as \code{x}.
 
 This function is used to sort hierarchical tables. Options for sorting criteria are:
 \enumerate{
-\item Descending - within each section of the hierarchy table, count sums are calculated for each row and rows are
+\item Descending - within each section of the hierarchy table, event rate sums are calculated for each row and rows are
 sorted in descending order by sum (default).
-\item Alphanumeric - rows are ordered alphanumerically by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}} sorts tables
-in ascending alphanumeric order (i.e. A to Z).
+\item Alphanumeric - rows are ordered alphanumerically (i.e. A to Z) by label text. By default, \code{\link[=tbl_hierarchical]{tbl_hierarchical()}}
+sorts tables in alphanumeric order.
 }
 }
 \examples{
@@ -58,7 +57,7 @@ tbl <- tbl_hierarchical(
 )
 
 # Example 1 - Descending Frequency Sort ------------------
-tbl <- tbl_sort(tbl, sort = "descending")
+tbl <- tbl_sort(tbl)
 tbl
 
 # Example 2 - Alphanumeric Sort --------------------------
diff --git a/tests/testthat/_snaps/sort_tbl_hierarchical.md b/tests/testthat/_snaps/tbl_sort.md
similarity index 87%
rename from tests/testthat/_snaps/sort_tbl_hierarchical.md
rename to tests/testthat/_snaps/tbl_sort.md
index 07ff271ff..fc2d4d810 100644
--- a/tests/testthat/_snaps/sort_tbl_hierarchical.md
+++ b/tests/testthat/_snaps/tbl_sort.md
@@ -30,27 +30,3 @@
       24                                           AMERICAN INDIAN OR ALASKA NATIVE               0 (NA%)                           1 (100%)                           0 (NA%)
       25                                                                   ERYTHEMA               0 (NA%)                           1 (100%)                           0 (NA%)
 
-# tbl_sort.tbl_hierarchical() error messaging works
-
-    Code
-      tbl_sort(data.frame())
-    Condition
-      Error in `check_class()`:
-      ! The `x` argument must be class <gtsummary>, not a data frame.
-
----
-
-    Code
-      tbl_sort(tbl, sort = "no_sorting")
-    Condition
-      Error in `tbl_sort()`:
-      ! The `sort` argument must be either "descending" or "alphanumeric".
-
----
-
-    Code
-      tbl_sort(tbl, .stat = "mean")
-    Condition
-      Error in `tbl_sort()`:
-      ! The `.stat` argument is "mean" but this statistic is not present in `x`. For all valid statistic options see the "stat_name" column of `x$cards$tbl_hierarchical`.
-
diff --git a/tests/testthat/test-sort_tbl_hierarchical.R b/tests/testthat/test-tbl_sort.R
similarity index 91%
rename from tests/testthat/test-sort_tbl_hierarchical.R
rename to tests/testthat/test-tbl_sort.R
index 80e7068ab..0621a07f5 100644
--- a/tests/testthat/test-sort_tbl_hierarchical.R
+++ b/tests/testthat/test-tbl_sort.R
@@ -134,23 +134,3 @@ test_that("tbl_sort.tbl_hierarchical() works when some variables not included in
 
   expect_message(tbl_sort(tbl))
 })
-
-test_that("tbl_sort.tbl_hierarchical() error messaging works", {
-  # invalid x input
-  expect_snapshot(
-    tbl_sort(data.frame()),
-    error = TRUE
-  )
-
-  # invalid sort input
-  expect_snapshot(
-    tbl_sort(tbl, sort = "no_sorting"),
-    error = TRUE
-  )
-
-  # invalid .stat input
-  expect_snapshot(
-    tbl_sort(tbl, .stat = "mean"),
-    error = TRUE
-  )
-})

From 1b46407d5caf268ed192f65a015eddd3beaecff9 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 12 Feb 2025 22:06:52 -0500
Subject: [PATCH 14/25] I think this is not supposed to be missing?

---
 R/tbl_hierarchical.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R
index 03dfe9e29..5290fab69 100644
--- a/R/tbl_hierarchical.R
+++ b/R/tbl_hierarchical.R
@@ -368,6 +368,7 @@ internal_tbl_hierarchical <- function(data,
         denominator = denominator,
         include = all_of(dplyr::nth(variables, -2)),
         statistic = statistic,
+        over_variables = overall_row,
         total_n = (is_empty(by) && length(include) == 1)
       )
 

From d7ccb1838e6816765a4e8bd797e7bd21951b8761 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 12 Feb 2025 22:32:47 -0500
Subject: [PATCH 15/25] Move sorting

---
 R/tbl_hierarchical.R | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R
index 5290fab69..b3b6c9d03 100644
--- a/R/tbl_hierarchical.R
+++ b/R/tbl_hierarchical.R
@@ -326,9 +326,6 @@ internal_tbl_hierarchical <- function(data,
   # add the gtsummary column names to ARD data frame ---------------------------
   cards <- .add_gts_column_to_cards_hierarchical(cards, variables, by)
 
-  # sort ARD alphanumerically (the default table layout)
-  cards <- cards |> cards::ard_sort("alphanumeric")
-
   # call bridge function here
   brdg_hierarchical(
     cards = cards,
@@ -343,7 +340,7 @@ internal_tbl_hierarchical <- function(data,
   ) |>
     append(
       list(
-        cards = list(cards) |> stats::setNames(calling_fun),
+        cards = list(cards |> cards::ard_sort("alphanumeric")) |> stats::setNames(calling_fun),
         inputs = tbl_hierarchical_inputs
       )
     ) |>

From db75c331890271df1991778905a39bf9fe0aeaa5 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 12 Feb 2025 22:33:38 -0500
Subject: [PATCH 16/25] testing

---
 R/filter_tbl_hierarchical.R    | 108 +++++++++++++++------------------
 man/filter_tbl_hierarchical.Rd |   6 +-
 2 files changed, 52 insertions(+), 62 deletions(-)

diff --git a/R/filter_tbl_hierarchical.R b/R/filter_tbl_hierarchical.R
index 56b968041..3d221b4cc 100644
--- a/R/filter_tbl_hierarchical.R
+++ b/R/filter_tbl_hierarchical.R
@@ -37,10 +37,10 @@
 #' )
 #'
 #' # Example 1 - Row Sums > 10 ------------------
-#' tbl_filter(tbl, t = 10)
+#' tbl_filter(tbl, sum(n) > 10)
 #'
 #' # Example 2 - Row Sums <= 5 ------------------
-#' tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE)
+#' tbl_filter(tbl, sum(n) <= 5)
 NULL
 
 #' @rdname filter_tbl_hierarchical
@@ -54,72 +54,62 @@ tbl_filter <- function(x, ...) {
 
 #' @export
 #' @rdname filter_tbl_hierarchical
-tbl_filter.tbl_hierarchical <- function(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...) {
+tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
   set_cli_abort_call()
 
-  # process and check inputs ----------------------------------------------------------------------
-  check_numeric(t)
-  check_scalar_logical(gt)
-  check_scalar_logical(eq)
-  check_string(.stat)
+  ard_args <- attributes(x$cards$tbl_hierarchical)$args
+  by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
 
-  outer_cols <- sapply(
-    x$table_body |> select(cards::all_ard_groups("names")),
-    function(x) dplyr::last(unique(stats::na.omit(x)))
-  )
+  browser()
+  # add indices to ARD
+  x_ard <- x$cards$tbl_hierarchical |>
+    dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
+    dplyr::mutate(idx_sort = dplyr::cur_group_id()) |>
+    dplyr::ungroup() |>
+    cards::as_card()
 
-  # get row sums ----------------------------------------------------------------------------------
-  x <- .append_hierarchy_row_sums(x, .stat)
+  # re-add dropped args attribute
+  attr(x_ard, "args") <- ard_args
 
-  # keep all summary rows (removed later if no sub-rows are kept)
-  if (!gt) x$table_body$sum_row[x$table_body$variable %in% outer_cols] <- t - 1
+  # get `by` variable count rows (do not correspond to a table row)
+  rm_idx <- x_ard |>
+    dplyr::filter(is.na(group1)) |>
+    dplyr::pull("idx_sort") |>
+    unique()
 
-  # create and apply filtering expression ---------------------------------------------------------
-  filt_expr <- paste(
-    "sum_row",
-    dplyr::case_when(
-      gt && eq ~ ">=",
-      !gt && eq ~ "<=",
-      !gt ~ "<",
-      TRUE ~ ">"
-    ),
-    t
-  )
-  x$table_body <- x$table_body |>
-    dplyr::filter(!!parse_expr(filt_expr))
+  # pull index order (each corresponding to one row of x$table_body)
+  pre_sort_idx <- x_ard |>
+    dplyr::pull("idx_sort") |>
+    unique() |>
+    setdiff(rm_idx) |>
+    as.character()
 
-  # remove any summary rows with no sub-rows still present ----------------------------------------
-  if (!gt) {
-    for (i in rev(seq_along(outer_cols))) {
-      gp_empty <- x$table_body |>
-        dplyr::group_by(across(c(names(outer_cols[1:i]), paste0(names(outer_cols[1:i]), "_level")))) |>
-        dplyr::summarize(is_empty := dplyr::n() == 1) |>
-        stats::na.omit()
+  # apply sorting
+  x_ard_sort <- x_ard |> cards::ard_filter({{ filter }})
 
-      if (!all(!gp_empty$is_empty)) {
-        x$table_body <- x$table_body |>
-          dplyr::left_join(
-            gp_empty,
-            by = gp_empty |> select(cards::all_ard_groups()) |> names()
-          ) |>
-          dplyr::filter(!is_empty | is.na(is_empty)) |>
-          dplyr::select(-"is_empty")
-      } else {
-        break
-      }
-    }
-    if (nrow(x$table_body) > 0) {
-      cli::cli_inform(
-        "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept
-        regardless of whether they meet the filtering criteria themselves.",
-        .frequency = "once",
-        .frequency_id = "sum_rows_lt"
-      )
-    }
-  }
+  # pull updated index order after sorting
+  post_sort_idx <- x_ard_sort |>
+    dplyr::pull("idx_sort") |>
+    unique() |>
+    setdiff(rm_idx) |>
+    as.character()
 
-  x$table_body <- x$table_body |>
-    dplyr::select(-"sum_row")
+  # get updated (relative) row positions
+  idx <- (seq_len(length(pre_sort_idx)) |> stats::setNames(pre_sort_idx))[post_sort_idx]
+
+  # update x$cards
+  x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_sort")
+
+  # update x$table_body
+  x$table_body <- x$table_body[idx, ]
 
   x
+  # if (nrow(x$table_body) > 0) {
+  #     cli::cli_inform(
+  #       "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept
+  #       regardless of whether they meet the filtering criteria themselves.",
+  #       .frequency = "once",
+  #       .frequency_id = "sum_rows_lt"
+  #     )
+  #   }
 }
diff --git a/man/filter_tbl_hierarchical.Rd b/man/filter_tbl_hierarchical.Rd
index a84a127f9..621d5a532 100644
--- a/man/filter_tbl_hierarchical.Rd
+++ b/man/filter_tbl_hierarchical.Rd
@@ -8,7 +8,7 @@
 \usage{
 tbl_filter(x, ...)
 
-\method{tbl_filter}{tbl_hierarchical}(x, t, gt = TRUE, eq = FALSE, .stat = "n", ...)
+\method{tbl_filter}{tbl_hierarchical}(x, filter, ...)
 }
 \arguments{
 \item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr
@@ -53,10 +53,10 @@ tbl <- tbl_hierarchical(
 )
 
 # Example 1 - Row Sums > 10 ------------------
-tbl_filter(tbl, t = 10)
+tbl_filter(tbl, sum(n) > 10)
 
 # Example 2 - Row Sums <= 5 ------------------
-tbl_filter(tbl, t = 10, gt = FALSE, eq = TRUE)
+tbl_filter(tbl, sum(n) <= 5)
 \dontshow{\}) # examplesIf}
 }
 \seealso{

From 34aca1027f22f848c58e5ea15398e148400aadb6 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Wed, 12 Feb 2025 22:38:23 -0500
Subject: [PATCH 17/25] Rename

---
 R/{filter_tbl_hierarchical.R => tbl_filter.R} |  6 +-
 ...lter_tbl_hierarchical.Rd => tbl_filter.Rd} |  5 +-
 .../_snaps/filter_tbl_hierarchical.md         | 59 -------------------
 tests/testthat/_snaps/tbl_filter.md           | 40 +++++++++++++
 ...r_tbl_hierarchical.R => test-tbl_filter.R} |  0
 5 files changed, 45 insertions(+), 65 deletions(-)
 rename R/{filter_tbl_hierarchical.R => tbl_filter.R} (97%)
 rename man/{filter_tbl_hierarchical.Rd => tbl_filter.Rd} (94%)
 delete mode 100644 tests/testthat/_snaps/filter_tbl_hierarchical.md
 create mode 100644 tests/testthat/_snaps/tbl_filter.md
 rename tests/testthat/{test-filter_tbl_hierarchical.R => test-tbl_filter.R} (100%)

diff --git a/R/filter_tbl_hierarchical.R b/R/tbl_filter.R
similarity index 97%
rename from R/filter_tbl_hierarchical.R
rename to R/tbl_filter.R
index 3d221b4cc..03692c762 100644
--- a/R/filter_tbl_hierarchical.R
+++ b/R/tbl_filter.R
@@ -20,7 +20,7 @@
 #'
 #' @return A `gtsummary` of the same class as `x`.
 #'
-#' @name filter_tbl_hierarchical
+#' @name tbl_filter
 #' @seealso [tbl_sort()]
 #'
 #' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))
@@ -43,7 +43,7 @@
 #' tbl_filter(tbl, sum(n) <= 5)
 NULL
 
-#' @rdname filter_tbl_hierarchical
+#' @rdname tbl_filter
 #' @export
 tbl_filter <- function(x, ...) {
   check_not_missing(x)
@@ -53,7 +53,7 @@ tbl_filter <- function(x, ...) {
 }
 
 #' @export
-#' @rdname filter_tbl_hierarchical
+#' @rdname tbl_filter
 tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
   set_cli_abort_call()
 
diff --git a/man/filter_tbl_hierarchical.Rd b/man/tbl_filter.Rd
similarity index 94%
rename from man/filter_tbl_hierarchical.Rd
rename to man/tbl_filter.Rd
index 621d5a532..66dfdeb22 100644
--- a/man/filter_tbl_hierarchical.Rd
+++ b/man/tbl_filter.Rd
@@ -1,7 +1,6 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/filter_tbl_hierarchical.R
-\name{filter_tbl_hierarchical}
-\alias{filter_tbl_hierarchical}
+% Please edit documentation in R/tbl_filter.R
+\name{tbl_filter}
 \alias{tbl_filter}
 \alias{tbl_filter.tbl_hierarchical}
 \title{Filter Hierarchical Tables}
diff --git a/tests/testthat/_snaps/filter_tbl_hierarchical.md b/tests/testthat/_snaps/filter_tbl_hierarchical.md
deleted file mode 100644
index 237023eac..000000000
--- a/tests/testthat/_snaps/filter_tbl_hierarchical.md
+++ /dev/null
@@ -1,59 +0,0 @@
-# tbl_filter.tbl_hierarchical() works
-
-    Code
-      as.data.frame(tbl)
-    Output
-         **Sex**  \n    **Race**  \n        **Reported Term for the Adverse Event** **Placebo**  \nN = 86 **Xanomeline High Dose**  \nN = 84 **Xanomeline Low Dose**  \nN = 84
-      1                                               Number of patients with event              26 (30%)                           42 (50%)                          40 (48%)
-      2                                                                           F              13 (25%)                           18 (45%)                          23 (46%)
-      3                                                                       WHITE              10 (21%)                           14 (41%)                          20 (45%)
-      4                                                   APPLICATION SITE ERYTHEMA              2 (4.2%)                            5 (15%)                           5 (11%)
-      5                                                   APPLICATION SITE PRURITUS              2 (4.2%)                            8 (24%)                          10 (23%)
-      6                                                                    ERYTHEMA               6 (13%)                            6 (18%)                           8 (18%)
-      7                                                                           M              13 (39%)                           24 (55%)                          17 (50%)
-      8                                                                       WHITE              12 (40%)                           22 (55%)                          17 (50%)
-      9                                                   APPLICATION SITE ERYTHEMA              1 (3.3%)                           10 (25%)                           7 (21%)
-      10                                                  APPLICATION SITE PRURITUS              1 (3.3%)                           12 (30%)                          10 (29%)
-      11                                                                  DIARRHOEA               6 (20%)                           3 (7.5%)                          2 (5.9%)
-      12                                                                   ERYTHEMA               3 (10%)                            5 (13%)                           6 (18%)
-
-# tbl_filter.tbl_hierarchical() error messaging works
-
-    Code
-      tbl_filter(data.frame(), t = 10)
-    Condition
-      Error in `check_class()`:
-      ! The `x` argument must be class <gtsummary>, not a data frame.
-
----
-
-    Code
-      tbl_filter(tbl, t = "10")
-    Condition
-      Error in `tbl_filter()`:
-      ! The `t` argument must be numeric.
-
----
-
-    Code
-      tbl_filter(tbl, t = "10", gt = "yes")
-    Condition
-      Error in `tbl_filter()`:
-      ! The `t` argument must be numeric.
-
----
-
-    Code
-      tbl_filter(tbl, t = "10", eq = "no")
-    Condition
-      Error in `tbl_filter()`:
-      ! The `t` argument must be numeric.
-
----
-
-    Code
-      tbl_filter(tbl, t = "10", .stat = "pct")
-    Condition
-      Error in `tbl_filter()`:
-      ! The `t` argument must be numeric.
-
diff --git a/tests/testthat/_snaps/tbl_filter.md b/tests/testthat/_snaps/tbl_filter.md
new file mode 100644
index 000000000..b9f41f6dc
--- /dev/null
+++ b/tests/testthat/_snaps/tbl_filter.md
@@ -0,0 +1,40 @@
+# tbl_filter.tbl_hierarchical() error messaging works
+
+    Code
+      tbl_filter(data.frame(), t = 10)
+    Condition
+      Error in `check_class()`:
+      ! The `x` argument must be class <gtsummary>, not a data frame.
+
+---
+
+    Code
+      tbl_filter(tbl, t = "10")
+    Condition
+      Error in `tbl_filter()`:
+      ! `filter` must be an expression.
+
+---
+
+    Code
+      tbl_filter(tbl, t = "10", gt = "yes")
+    Condition
+      Error in `tbl_filter()`:
+      ! `filter` must be an expression.
+
+---
+
+    Code
+      tbl_filter(tbl, t = "10", eq = "no")
+    Condition
+      Error in `tbl_filter()`:
+      ! `filter` must be an expression.
+
+---
+
+    Code
+      tbl_filter(tbl, t = "10", .stat = "pct")
+    Condition
+      Error in `tbl_filter()`:
+      ! `filter` must be an expression.
+
diff --git a/tests/testthat/test-filter_tbl_hierarchical.R b/tests/testthat/test-tbl_filter.R
similarity index 100%
rename from tests/testthat/test-filter_tbl_hierarchical.R
rename to tests/testthat/test-tbl_filter.R

From dfa61bd0d80e6c33e9456dca66caf4ed80a3d28f Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 14 Feb 2025 16:56:00 -0500
Subject: [PATCH 18/25] test

---
 R/tbl_filter.R                   | 102 +++++++++++++++++++++++--------
 R/tbl_hierarchical.R             |   2 +-
 man/tbl_filter.Rd                |  34 ++++++-----
 tests/testthat/test-tbl_filter.R |   6 +-
 4 files changed, 98 insertions(+), 46 deletions(-)

diff --git a/R/tbl_filter.R b/R/tbl_filter.R
index 03692c762..24b25872d 100644
--- a/R/tbl_filter.R
+++ b/R/tbl_filter.R
@@ -2,22 +2,29 @@
 #'
 #' @description `r lifecycle::badge('experimental')`\cr
 #'
-#' This function is used to filter hierarchical table rows by frequency row sum.
+#' This function is used to filter hierarchical table rows.
 #'
 #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
 #'   A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
-#' @param t (scalar `numeric`)\cr
-#'   Threshold used to determine which rows will be retained.
-#' @param gt (scalar `logical`)\cr
-#'   Whether to filter for row sums greater than `t` or less than `t`. Default is greater than (`gt = TRUE`).
-#' @param eq (scalar `logical`)\cr
-#'   Whether to include the value of `t` in the filtered range, i.e. whether to use exclusive comparators (`>`, `<`) or
-#'   inclusive comparators (`>=`, `<=`) when filtering. Default is `FALSE`.
-#' @param .stat (`string`)\cr
-#'   Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.
-#'   Default is `"n"`.
+#' @inheritParams cards::ard_sort
 #' @inheritParams rlang::args_dots_empty
 #'
+#' @details
+#' The `filter` argument can be used to filter out rows of a table which do not meet the criteria provided as an
+#' expression. Rows can be filtered on the values of any of the possible statistics (`n`, `p`, and `N`) provided they
+#' are included at least once in the table, as well as the values of any `by` variables. Filtering is only applied to
+#' rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at
+#' least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria
+#' themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e.
+#' across all `by` variable values) by using aggregate functions such as `sum()` and `mean()`.
+#'
+#' Some examples of possible filters:
+#' - `filter = n > 5`
+#' - `filter = n == 2 & p < 0.05`
+#' - `filter = sum(n) > 4`
+#' - `filter = mean(n) > 4 | n > 3`
+#' - `filter = any(n > 2 & TRTA == "Xanomeline High Dose")`
+#'
 #' @return A `gtsummary` of the same class as `x`.
 #'
 #' @name tbl_filter
@@ -59,46 +66,89 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
 
   ard_args <- attributes(x$cards$tbl_hierarchical)$args
   by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
+  x_ard <- x$cards$tbl_hierarchical
+
+  # add dummy rows for variables not in include so their label rows are ordered correctly
+  not_incl <- setdiff(ard_args$variables, ard_args$include)
+  if (length(not_incl) > 0) {
+    x_ard <- x_ard |> mutate(idx_o = seq_len(nrow(x$cards$tbl_hierarchical)))
+    for (v in not_incl) {
+      i <- length(ard_args$by) + which(ard_args$variables == v)
+      x_sum_rows <- x_ard |>
+        dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |>
+        dplyr::group_map(function(.df, .g) {
+          g_cur <- .g[[ncol(.g) - 1]]
+          if (!is.na(g_cur) && g_cur == v) {
+            # dummy summary row to add in
+            .df[1, ] |> mutate(
+              variable = g_cur,
+              variable_level = .g[[ncol(.g)]],
+              stat_name = "no_stat",
+              stat = list(0),
+              idx_o = min(.df$idx_o) + 1,
+              tmp = TRUE
+            )
+          } else {
+            NULL
+          }
+        }, .keep = TRUE)
+      sum_row_pos <- dplyr::bind_rows(x_sum_rows) |> dplyr::pull(idx_o)
+      # adjust prior row indices to add in dummy summary rows
+      x_ard <- x_ard |>
+        dplyr::bind_rows(x_sum_rows) |>
+        dplyr::rowwise() |>
+        mutate(idx_o = .data$idx_o + sum(sum_row_pos > .data$idx_o)) |>
+        dplyr::ungroup()
+    }
+    x_ard <- x_ard |>
+      dplyr::arrange(idx_o, tmp) |>
+      select(-"idx_o")
+  }
 
-  browser()
   # add indices to ARD
-  x_ard <- x$cards$tbl_hierarchical |>
+  x_ard <- x_ard |>
     dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
-    dplyr::mutate(idx_sort = dplyr::cur_group_id()) |>
-    dplyr::ungroup() |>
-    cards::as_card()
+    dplyr::mutate(idx_filter = dplyr::cur_group_id()) |>
+    dplyr::ungroup()
 
   # re-add dropped args attribute
+  x_ard <- x_ard |> cards::as_card()
   attr(x_ard, "args") <- ard_args
 
   # get `by` variable count rows (do not correspond to a table row)
   rm_idx <- x_ard |>
     dplyr::filter(is.na(group1)) |>
-    dplyr::pull("idx_sort") |>
+    dplyr::pull("idx_filter") |>
     unique()
 
   # pull index order (each corresponding to one row of x$table_body)
-  pre_sort_idx <- x_ard |>
-    dplyr::pull("idx_sort") |>
+  pre_filter_idx <- x_ard |>
+    dplyr::pull("idx_filter") |>
     unique() |>
     setdiff(rm_idx) |>
     as.character()
 
-  # apply sorting
-  x_ard_sort <- x_ard |> cards::ard_filter({{ filter }})
+  browser()
+  # apply filtering
+  x_ard_filter <- x_ard |> cards::ard_filter({{ filter }})
 
-  # pull updated index order after sorting
-  post_sort_idx <- x_ard_sort |>
-    dplyr::pull("idx_sort") |>
+  # pull updated index order after filtering
+  post_filter_idx <- x_ard_filter |>
+    dplyr::pull("idx_filter") |>
     unique() |>
     setdiff(rm_idx) |>
     as.character()
 
   # get updated (relative) row positions
-  idx <- (seq_len(length(pre_sort_idx)) |> stats::setNames(pre_sort_idx))[post_sort_idx]
+  idx <- (seq_len(length(pre_filter_idx)) |> stats::setNames(pre_filter_idx))[post_filter_idx]
 
   # update x$cards
-  x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_sort")
+  if ("tmp" %in% names(x_ard_filter)) {
+    x_ard_filter <- x_ard_filter |>
+      dplyr::filter(is.na(tmp)) |>
+      select(-"tmp")
+  }
+  x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_filter")
 
   # update x$table_body
   x$table_body <- x$table_body[idx, ]
diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R
index b3b6c9d03..6ebcad68a 100644
--- a/R/tbl_hierarchical.R
+++ b/R/tbl_hierarchical.R
@@ -340,7 +340,7 @@ internal_tbl_hierarchical <- function(data,
   ) |>
     append(
       list(
-        cards = list(cards |> cards::ard_sort("alphanumeric")) |> stats::setNames(calling_fun),
+        cards = list(cards) |> stats::setNames(calling_fun),
         inputs = tbl_hierarchical_inputs
       )
     ) |>
diff --git a/man/tbl_filter.Rd b/man/tbl_filter.Rd
index 66dfdeb22..640ec7264 100644
--- a/man/tbl_filter.Rd
+++ b/man/tbl_filter.Rd
@@ -14,20 +14,6 @@ tbl_filter(x, ...)
 A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.}
 
 \item{...}{These dots are for future extensions and must be empty.}
-
-\item{t}{(scalar \code{numeric})\cr
-Threshold used to determine which rows will be retained.}
-
-\item{gt}{(scalar \code{logical})\cr
-Whether to filter for row sums greater than \code{t} or less than \code{t}. Default is greater than (\code{gt = TRUE}).}
-
-\item{eq}{(scalar \code{logical})\cr
-Whether to include the value of \code{t} in the filtered range, i.e. whether to use exclusive comparators (\code{>}, \code{<}) or
-inclusive comparators (\code{>=}, \code{<=}) when filtering. Default is \code{FALSE}.}
-
-\item{.stat}{(\code{string})\cr
-Statistic to use to calculate row sums. This statistic must be present in the table for all hierarchy levels.
-Default is \code{"n"}.}
 }
 \value{
 A \code{gtsummary} of the same class as \code{x}.
@@ -35,7 +21,25 @@ A \code{gtsummary} of the same class as \code{x}.
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
 
-This function is used to filter hierarchical table rows by frequency row sum.
+This function is used to filter hierarchical table rows.
+}
+\details{
+The \code{filter} argument can be used to filter out rows of a table which do not meet the criteria provided as an
+expression. Rows can be filtered on the values of any of the possible statistics (\code{n}, \code{p}, and \code{N}) provided they
+are included at least once in the table, as well as the values of any \code{by} variables. Filtering is only applied to
+rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at
+least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria
+themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e.
+across all \code{by} variable values) by using aggregate functions such as \code{sum()} and \code{mean()}.
+
+Some examples of possible filters:
+\itemize{
+\item \code{filter = n > 5}
+\item \code{filter = n == 2 & p < 0.05}
+\item \code{filter = sum(n) > 4}
+\item \code{filter = mean(n) > 4 | n > 3}
+\item \code{filter = any(n > 2 & TRTA == "Xanomeline High Dose")}
+}
 }
 \examples{
 \dontshow{if ((identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
diff --git a/tests/testthat/test-tbl_filter.R b/tests/testthat/test-tbl_filter.R
index 13e0906cc..c06ef0c9b 100644
--- a/tests/testthat/test-tbl_filter.R
+++ b/tests/testthat/test-tbl_filter.R
@@ -16,11 +16,9 @@ test_that("tbl_filter.tbl_hierarchical() works", {
   withr::local_options(width = 200)
 
   # no errors
-  expect_silent(tbl <- tbl_filter(tbl, t = 10))
+  expect_silent(tbl <- tbl_filter(tbl, n > 10))
   expect_snapshot(tbl |> as.data.frame())
-
-  # .stat argument works
-  expect_silent(tbl <- tbl_filter(tbl, t = 10, .stat = "p"))
+  expect_silent(tbl <- tbl_filter(tbl, p > 10))
 })
 
 test_that("tbl_filter.tbl_hierarchical(gt) works", {

From a9d9f1942adf179a8028d3effc9cae2ff4229653 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 14 Feb 2025 17:19:26 -0500
Subject: [PATCH 19/25] Sort using x$table_body

---
 R/tbl_sort.R                   | 79 ++++++++++++++++++++--------------
 tests/testthat/test-tbl_sort.R | 15 +++++--
 2 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/R/tbl_sort.R b/R/tbl_sort.R
index ecafd15a6..04f5bc0d7 100644
--- a/R/tbl_sort.R
+++ b/R/tbl_sort.R
@@ -69,7 +69,6 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
       set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
     )
 
-    x_ard <- x_ard |> mutate(idx_o = seq_len(nrow(x$cards$tbl_hierarchical)))
     for (v in not_incl) {
       i <- length(ard_args$by) + which(ard_args$variables == v)
       x_sum_rows <- x_ard |>
@@ -91,72 +90,86 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
               variable_level = .g[[ncol(.g)]],
               stat_name = if (sort == "descending") stat_nm else "no_stat",
               stat = if (sort == "descending") list(sum) else list(0),
-              idx_o = min(.df$idx_o) + 1,
               tmp = TRUE
             )
           } else {
             NULL
           }
         }, .keep = TRUE)
-      sum_row_pos <- dplyr::bind_rows(x_sum_rows) |> dplyr::pull(idx_o)
-      # adjust prior row indices to add in dummy summary rows
-      x_ard <- x_ard |>
-        dplyr::bind_rows(x_sum_rows) |>
-        dplyr::rowwise() |>
-        mutate(idx_o = .data$idx_o + sum(sum_row_pos > .data$idx_o)) |>
-        dplyr::ungroup()
+
+      x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows)
     }
-    x_ard <- x_ard |>
-      dplyr::arrange(idx_o, tmp) |>
-      select(-"idx_o")
   }
 
   # add indices to ARD
   x_ard <- x_ard |>
     dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
-    dplyr::mutate(idx_sort = dplyr::cur_group_id()) |>
-    dplyr::ungroup()
+    dplyr::mutate(idx_unsort = dplyr::cur_group_id())
+
+  gps <- x_ard |>
+    dplyr::group_keys() |>
+    dplyr::mutate(idx_unsort = dplyr::row_number()) |>
+    cards::as_card() |>
+    cards::rename_ard_groups_shift(shift = -1) |>
+    dplyr::filter(!variable %in% ard_args$by) |>
+    dplyr::rename(label = variable_level)
+
+  overall_lbl <- x$table_body$label[x$table_body$variable == "..ard_hierarchical_overall.."]
+  if (length(overall_lbl) > 0) {
+    gps$label[gps$variable == "..ard_hierarchical_overall.."] <- overall_lbl
+    if (length(ard_args$variables) > 1) {
+      gps$group1[gps$variable == "..ard_hierarchical_overall.."] <- "..ard_hierarchical_overall.."
+    }
+  }
+
+  # match structure of ARD grouping columns to x$table_body grouping columns
+  gps <- gps |> tidyr::unnest(everything())
+  outer_cols <- if (length(ard_args$variables) > 1) {
+    ard_args$variables |>
+      utils::head(-1) |>
+      stats::setNames(paste0("group", seq_len(length(ard_args$variables) - 1)))
+  } else {
+    NULL
+  }
+  for (g in names(outer_cols)) {
+    which_g <- gps$variable == outer_cols[g]
+    gps[g][which_g, ] <- gps$variable[which_g]
+    gps[paste0(g, "_level")][which_g, ] <- gps$label[which_g]
+  }
+  x$table_body <- x$table_body |> dplyr::left_join(gps, by = names(gps) |> utils::head(-1))
 
   # re-add dropped args attribute
-  x_ard <- x_ard |> cards::as_card()
+  x_ard <- x_ard |>
+    dplyr::ungroup() |>
+    cards::as_card()
   attr(x_ard, "args") <- ard_args
 
   # get `by` variable count rows (do not correspond to a table row)
   rm_idx <- x_ard |>
     dplyr::filter(is.na(group1)) |>
-    dplyr::pull("idx_sort") |>
+    dplyr::pull("idx_unsort") |>
     unique()
 
-  # pull index order (each corresponding to one row of x$table_body)
-  pre_sort_idx <- x_ard |>
-    dplyr::pull("idx_sort") |>
-    unique() |>
-    setdiff(rm_idx) |>
-    as.character()
-
   # apply sorting
   x_ard_sort <- x_ard |> cards::ard_sort(sort)
 
   # pull updated index order after sorting
-  post_sort_idx <- x_ard_sort |>
-    dplyr::pull("idx_sort") |>
+  idx_sort <- x_ard_sort |>
+    dplyr::pull("idx_unsort") |>
     unique() |>
-    setdiff(rm_idx) |>
-    as.character()
-
-  # get updated (relative) row positions
-  idx <- (seq_len(length(pre_sort_idx)) |> stats::setNames(pre_sort_idx))[post_sort_idx]
+    setdiff(rm_idx)
 
-  # update x$cards
   if ("tmp" %in% names(x_ard_sort)) {
     x_ard_sort <- x_ard_sort |>
       dplyr::filter(is.na(tmp)) |>
       select(-"tmp")
   }
-  x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_sort")
+
+  # update x$cards
+  x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_unsort")
 
   # update x$table_body
-  x$table_body <- x$table_body[idx, ]
+  x$table_body <- x$table_body[match(idx_sort, x$table_body$idx_unsort), ] |> select(-"idx_unsort")
 
   x
 }
diff --git a/tests/testthat/test-tbl_sort.R b/tests/testthat/test-tbl_sort.R
index 0621a07f5..1923d7015 100644
--- a/tests/testthat/test-tbl_sort.R
+++ b/tests/testthat/test-tbl_sort.R
@@ -57,10 +57,17 @@ test_that("tbl_sort.tbl_hierarchical(sort = 'alphanumeric') works", {
   # ascending (A to Z)
   expect_silent(result <- tbl_sort(result, sort = "alphanumeric"))
 
-  # results match with tbl_hierarchical which sorts A to Z by default
+  # results are ordered correctly
   expect_equal(
-    result |> as.data.frame(),
-    tbl |> as.data.frame()
+    as.data.frame(result)[[1]],
+    c(
+      "Number of patients with event", "F", "BLACK OR AFRICAN AMERICAN", "APPLICATION SITE PRURITUS",
+      "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA", "ERYTHEMA", "WHITE", "APPLICATION SITE ERYTHEMA",
+      "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "M", "AMERICAN INDIAN OR ALASKA NATIVE", "ERYTHEMA",
+      "BLACK OR AFRICAN AMERICAN", "APPLICATION SITE PRURITUS", "DIARRHOEA", "ERYTHEMA", "WHITE",
+      "APPLICATION SITE ERYTHEMA", "APPLICATION SITE PRURITUS", "ATRIOVENTRICULAR BLOCK SECOND DEGREE", "DIARRHOEA",
+      "ERYTHEMA"
+    )
   )
 })
 
@@ -85,7 +92,7 @@ test_that("tbl_sort.tbl_hierarchical() works when there is no overall row in x",
   expect_silent(tbl_no_overall <- tbl_sort(tbl_no_overall, sort = "alphanumeric"))
   expect_equal(
     tbl_no_overall$table_body,
-    tbl$table_body[-1, ]
+    tbl_sort(tbl, sort = "alphanumeric")$table_body[-1, ]
   )
 })
 

From 07646b7121e578ef21e165e193b026e7900d1d87 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Fri, 14 Feb 2025 17:27:06 -0500
Subject: [PATCH 20/25] temp

---
 R/tbl_filter.R                      | 92 ++++++++++++++++++-----------
 tests/testthat/_snaps/tbl_filter.md | 28 +--------
 tests/testthat/test-tbl_filter.R    | 46 +++++----------
 3 files changed, 72 insertions(+), 94 deletions(-)

diff --git a/R/tbl_filter.R b/R/tbl_filter.R
index 24b25872d..781ed1ec8 100644
--- a/R/tbl_filter.R
+++ b/R/tbl_filter.R
@@ -6,7 +6,7 @@
 #'
 #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
 #'   A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
-#' @inheritParams cards::ard_sort
+#' @inheritParams cards::ard_filter
 #' @inheritParams rlang::args_dots_empty
 #'
 #' @details
@@ -68,10 +68,16 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
   by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
   x_ard <- x$cards$tbl_hierarchical
 
-  # add dummy rows for variables not in include so their label rows are ordered correctly
+  # add dummy rows for variables not in include so their label rows are filtered correctly
   not_incl <- setdiff(ard_args$variables, ard_args$include)
   if (length(not_incl) > 0) {
-    x_ard <- x_ard |> mutate(idx_o = seq_len(nrow(x$cards$tbl_hierarchical)))
+    cli::cli_inform(
+      "Not all hierarchy variables present in the table were included in the {.arg include} argument.
+      These variables ({not_incl}) do not have event rate data available so the total sum of the event rates
+      for this hierarchy section will be used instead. To use true event rates for all sections of the table,
+      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
+    )
+
     for (v in not_incl) {
       i <- length(ard_args$by) + which(ard_args$variables == v)
       x_sum_rows <- x_ard |>
@@ -85,75 +91,89 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
               variable_level = .g[[ncol(.g)]],
               stat_name = "no_stat",
               stat = list(0),
-              idx_o = min(.df$idx_o) + 1,
               tmp = TRUE
             )
           } else {
             NULL
           }
         }, .keep = TRUE)
-      sum_row_pos <- dplyr::bind_rows(x_sum_rows) |> dplyr::pull(idx_o)
-      # adjust prior row indices to add in dummy summary rows
-      x_ard <- x_ard |>
-        dplyr::bind_rows(x_sum_rows) |>
-        dplyr::rowwise() |>
-        mutate(idx_o = .data$idx_o + sum(sum_row_pos > .data$idx_o)) |>
-        dplyr::ungroup()
+
+      x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows)
     }
-    x_ard <- x_ard |>
-      dplyr::arrange(idx_o, tmp) |>
-      select(-"idx_o")
   }
 
   # add indices to ARD
   x_ard <- x_ard |>
     dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
-    dplyr::mutate(idx_filter = dplyr::cur_group_id()) |>
-    dplyr::ungroup()
+    dplyr::mutate(idx_nofilter = dplyr::cur_group_id())
+
+  gps <- x_ard |>
+    dplyr::group_keys() |>
+    dplyr::mutate(idx_nofilter = dplyr::row_number()) |>
+    cards::as_card() |>
+    cards::rename_ard_groups_shift(shift = -1) |>
+    dplyr::filter(!variable %in% ard_args$by) |>
+    dplyr::rename(label = variable_level)
+
+  overall_lbl <- x$table_body$label[x$table_body$variable == "..ard_hierarchical_overall.."]
+  if (length(overall_lbl) > 0) {
+    gps$label[gps$variable == "..ard_hierarchical_overall.."] <- overall_lbl
+    if (length(ard_args$variables) > 1) {
+      gps$group1[gps$variable == "..ard_hierarchical_overall.."] <- "..ard_hierarchical_overall.."
+    }
+  }
+
+  # match structure of ARD grouping columns to x$table_body grouping columns
+  gps <- gps |> tidyr::unnest(everything())
+  outer_cols <- if (length(ard_args$variables) > 1) {
+    ard_args$variables |>
+      utils::head(-1) |>
+      stats::setNames(paste0("group", seq_len(length(ard_args$variables) - 1)))
+  } else {
+    NULL
+  }
+  for (g in names(outer_cols)) {
+    which_g <- gps$variable == outer_cols[g]
+    gps[g][which_g, ] <- gps$variable[which_g]
+    gps[paste0(g, "_level")][which_g, ] <- gps$label[which_g]
+  }
+  x$table_body <- x$table_body |> dplyr::left_join(gps, by = names(gps) |> utils::head(-1))
 
   # re-add dropped args attribute
-  x_ard <- x_ard |> cards::as_card()
+  x_ard <- x_ard |>
+    dplyr::ungroup() |>
+    cards::as_card()
   attr(x_ard, "args") <- ard_args
 
   # get `by` variable count rows (do not correspond to a table row)
   rm_idx <- x_ard |>
     dplyr::filter(is.na(group1)) |>
-    dplyr::pull("idx_filter") |>
+    dplyr::pull("idx_nofilter") |>
     unique()
 
-  # pull index order (each corresponding to one row of x$table_body)
-  pre_filter_idx <- x_ard |>
-    dplyr::pull("idx_filter") |>
-    unique() |>
-    setdiff(rm_idx) |>
-    as.character()
-
-  browser()
   # apply filtering
   x_ard_filter <- x_ard |> cards::ard_filter({{ filter }})
 
   # pull updated index order after filtering
-  post_filter_idx <- x_ard_filter |>
-    dplyr::pull("idx_filter") |>
+  idx_filter <- x_ard_filter |>
+    dplyr::pull("idx_nofilter") |>
     unique() |>
-    setdiff(rm_idx) |>
-    as.character()
-
-  # get updated (relative) row positions
-  idx <- (seq_len(length(pre_filter_idx)) |> stats::setNames(pre_filter_idx))[post_filter_idx]
+    setdiff(rm_idx)
 
-  # update x$cards
   if ("tmp" %in% names(x_ard_filter)) {
     x_ard_filter <- x_ard_filter |>
       dplyr::filter(is.na(tmp)) |>
       select(-"tmp")
   }
-  x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_filter")
+
+  # update x$cards
+  x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter")
 
   # update x$table_body
-  x$table_body <- x$table_body[idx, ]
+  x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ] |> select(-"idx_nofilter")
 
   x
+
   # if (nrow(x$table_body) > 0) {
   #     cli::cli_inform(
   #       "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept
diff --git a/tests/testthat/_snaps/tbl_filter.md b/tests/testthat/_snaps/tbl_filter.md
index b9f41f6dc..82b4ca8d0 100644
--- a/tests/testthat/_snaps/tbl_filter.md
+++ b/tests/testthat/_snaps/tbl_filter.md
@@ -1,7 +1,7 @@
 # tbl_filter.tbl_hierarchical() error messaging works
 
     Code
-      tbl_filter(data.frame(), t = 10)
+      tbl_filter(data.frame(), sum(n) > 10)
     Condition
       Error in `check_class()`:
       ! The `x` argument must be class <gtsummary>, not a data frame.
@@ -9,31 +9,7 @@
 ---
 
     Code
-      tbl_filter(tbl, t = "10")
-    Condition
-      Error in `tbl_filter()`:
-      ! `filter` must be an expression.
-
----
-
-    Code
-      tbl_filter(tbl, t = "10", gt = "yes")
-    Condition
-      Error in `tbl_filter()`:
-      ! `filter` must be an expression.
-
----
-
-    Code
-      tbl_filter(tbl, t = "10", eq = "no")
-    Condition
-      Error in `tbl_filter()`:
-      ! `filter` must be an expression.
-
----
-
-    Code
-      tbl_filter(tbl, t = "10", .stat = "pct")
+      tbl_filter(tbl, 10)
     Condition
       Error in `tbl_filter()`:
       ! `filter` must be an expression.
diff --git a/tests/testthat/test-tbl_filter.R b/tests/testthat/test-tbl_filter.R
index c06ef0c9b..cad926ba8 100644
--- a/tests/testthat/test-tbl_filter.R
+++ b/tests/testthat/test-tbl_filter.R
@@ -16,17 +16,17 @@ test_that("tbl_filter.tbl_hierarchical() works", {
   withr::local_options(width = 200)
 
   # no errors
-  expect_silent(tbl <- tbl_filter(tbl, n > 10))
+  expect_silent(tbl <- tbl_filter(tbl, sum(n) > 10))
   expect_snapshot(tbl |> as.data.frame())
-  expect_silent(tbl <- tbl_filter(tbl, p > 10))
+  expect_silent(tbl <- tbl_filter(tbl, p > 0.05))
 })
 
 test_that("tbl_filter.tbl_hierarchical(gt) works", {
   # gt = TRUE
-  expect_silent(tbl_gt <- tbl_filter(tbl, t = 10))
+  expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 10))
 
   # gt = FALSE
-  expect_message(tbl_lt <- tbl_filter(tbl, t = 10, gt = FALSE))
+  expect_message(tbl_lt <- tbl_filter(tbl, sum(n) < 10))
 
   expect_equal(
     dplyr::inner_join(
@@ -56,22 +56,22 @@ test_that("tbl_filter.tbl_hierarchical(gt) works", {
 
 test_that("tbl_filter.tbl_hierarchical(eq) works", {
   # gt = TRUE, eq = FALSE
-  expect_silent(tbl_gt <- tbl_filter(tbl, t = 12))
+  expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 12))
 
   # gt = TRUE, eq = TRUE
-  expect_silent(tbl_geq <- tbl_filter(tbl, t = 12, eq = TRUE))
+  expect_silent(tbl_geq <- tbl_filter(tbl, sum(n) >= 12))
   expect_gt(nrow(tbl_geq$table_body), nrow(tbl_gt$table_body))
 
   # gt = FALSE, eq = FALSE
-  expect_silent(tbl_lt <- tbl_filter(tbl, t = 12, gt = FALSE))
+  expect_silent(tbl_lt <- tbl_filter(tbl, sum(n) < 12))
 
   # gt = TRUE, eq = TRUE
-  expect_silent(tbl_leq <- tbl_filter(tbl, t = 12, gt = FALSE, eq = TRUE))
+  expect_silent(tbl_leq <- tbl_filter(tbl, sum(n) <= 12))
   expect_lt(nrow(tbl_lt$table_body), nrow(tbl_leq$table_body))
 })
 
 test_that("tbl_filter.tbl_hierarchical() returns empty table when all rows filtered out", {
-  expect_silent(tbl <- tbl_filter(tbl, t = 200))
+  expect_silent(tbl <- tbl_filter(tbl, sum(n) > 200))
   expect_equal(nrow(tbl$table_body), 0)
 })
 
@@ -85,7 +85,7 @@ test_that("tbl_filter.tbl_hierarchical() works with only one variable in x", {
     overall_row = TRUE
   )
 
-  expect_silent(tbl_single <- tbl_filter(tbl_single, t = 20))
+  expect_silent(tbl_single <- tbl_filter(tbl_single, sum(n) > 20))
   expect_equal(nrow(tbl_single$table_body), 4)
 })
 
@@ -100,37 +100,19 @@ test_that("tbl_filter.tbl_hierarchical() works when some variables not included
     overall_row = TRUE
   )
 
-  expect_message(tbl_filter(tbl, t = 10))
+  expect_message(tbl_filter(tbl, sum(n) > 10))
 })
 
 test_that("tbl_filter.tbl_hierarchical() error messaging works", {
   # invalid x input
   expect_snapshot(
-    tbl_filter(data.frame(), t = 10),
+    tbl_filter(data.frame(), sum(n) > 10),
     error = TRUE
   )
 
-  # invalid t input
+  # invalid filter input
   expect_snapshot(
-    tbl_filter(tbl, t = "10"),
-    error = TRUE
-  )
-
-  # invalid gt input
-  expect_snapshot(
-    tbl_filter(tbl, t = "10", gt = "yes"),
-    error = TRUE
-  )
-
-  # invalid eq input
-  expect_snapshot(
-    tbl_filter(tbl, t = "10", eq = "no"),
-    error = TRUE
-  )
-
-  # invalid .stat input
-  expect_snapshot(
-    tbl_filter(tbl, t = "10", .stat = "pct"),
+    tbl_filter(tbl, 10),
     error = TRUE
   )
 })

From 0168a6db562df5b6bf634b9c1a8e22f497425207 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 18 Feb 2025 19:15:13 -0500
Subject: [PATCH 21/25] Fix filtering function, add parameter

---
 R/tbl_filter.R                      | 85 +++++++++++-----------------
 R/tbl_sort.R                        | 87 ++++++++++++++++-------------
 man/tbl_filter.Rd                   | 21 ++++---
 tests/testthat/_snaps/tbl_filter.md | 22 ++++++++
 4 files changed, 117 insertions(+), 98 deletions(-)

diff --git a/R/tbl_filter.R b/R/tbl_filter.R
index 781ed1ec8..5095fe30a 100644
--- a/R/tbl_filter.R
+++ b/R/tbl_filter.R
@@ -2,10 +2,13 @@
 #'
 #' @description `r lifecycle::badge('experimental')`\cr
 #'
-#' This function is used to filter hierarchical table rows.
+#' This function is used to filter hierarchical table rows. Filters are not applied to summary or overall rows.
 #'
 #' @param x (`tbl_hierarchical`, `tbl_hierarchical_count`)\cr
 #'   A hierarchical gtsummary table of class `'tbl_hierarchical'` or `'tbl_hierarchical_count'`.
+#' @param keep_empty_summary (scalar `logical`)\cr
+#'   Logical argument indicating whether to retain summary rows corresponding to table hierarchy sections that have had
+#'   all rows filtered out. Default is `TRUE`.
 #' @inheritParams cards::ard_filter
 #' @inheritParams rlang::args_dots_empty
 #'
@@ -13,15 +16,15 @@
 #' The `filter` argument can be used to filter out rows of a table which do not meet the criteria provided as an
 #' expression. Rows can be filtered on the values of any of the possible statistics (`n`, `p`, and `N`) provided they
 #' are included at least once in the table, as well as the values of any `by` variables. Filtering is only applied to
-#' rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at
-#' least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria
-#' themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e.
-#' across all `by` variable values) by using aggregate functions such as `sum()` and `mean()`.
+#' rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows are kept
+#' regardless of whether they meet the filtering criteria themselves. In addition to filtering on individual statistic
+#' values, filters can be applied across the row (i.e. across all `by` variable values) by using aggregate functions
+#' such as `sum()` and `mean()`.
 #'
 #' Some examples of possible filters:
 #' - `filter = n > 5`
 #' - `filter = n == 2 & p < 0.05`
-#' - `filter = sum(n) > 4`
+#' - `filter = sum(n) >= 4`
 #' - `filter = mean(n) > 4 | n > 3`
 #' - `filter = any(n > 2 & TRTA == "Xanomeline High Dose")`
 #'
@@ -61,7 +64,7 @@ tbl_filter <- function(x, ...) {
 
 #' @export
 #' @rdname tbl_filter
-tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
+tbl_filter.tbl_hierarchical <- function(x, filter, keep_empty_summary = TRUE, ...) {
   set_cli_abort_call()
 
   ard_args <- attributes(x$cards$tbl_hierarchical)$args
@@ -69,38 +72,7 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
   x_ard <- x$cards$tbl_hierarchical
 
   # add dummy rows for variables not in include so their label rows are filtered correctly
-  not_incl <- setdiff(ard_args$variables, ard_args$include)
-  if (length(not_incl) > 0) {
-    cli::cli_inform(
-      "Not all hierarchy variables present in the table were included in the {.arg include} argument.
-      These variables ({not_incl}) do not have event rate data available so the total sum of the event rates
-      for this hierarchy section will be used instead. To use true event rates for all sections of the table,
-      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
-    )
-
-    for (v in not_incl) {
-      i <- length(ard_args$by) + which(ard_args$variables == v)
-      x_sum_rows <- x_ard |>
-        dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |>
-        dplyr::group_map(function(.df, .g) {
-          g_cur <- .g[[ncol(.g) - 1]]
-          if (!is.na(g_cur) && g_cur == v) {
-            # dummy summary row to add in
-            .df[1, ] |> mutate(
-              variable = g_cur,
-              variable_level = .g[[ncol(.g)]],
-              stat_name = "no_stat",
-              stat = list(0),
-              tmp = TRUE
-            )
-          } else {
-            NULL
-          }
-        }, .keep = TRUE)
-
-      x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows)
-    }
-  }
+  x_ard <- x_ard |> .append_not_incl(ard_args)
 
   # add indices to ARD
   x_ard <- x_ard |>
@@ -160,26 +132,37 @@ tbl_filter.tbl_hierarchical <- function(x, filter, ...) {
     unique() |>
     setdiff(rm_idx)
 
+  # apply filtering while retaining original row order
+  idx_filter <- intersect(x$table_body$idx_nofilter, idx_filter)
+  x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ]
+
   if ("tmp" %in% names(x_ard_filter)) {
     x_ard_filter <- x_ard_filter |>
       dplyr::filter(is.na(tmp)) |>
       select(-"tmp")
   }
 
-  # update x$cards
-  x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter")
+  # remove summary rows from empty sections if requested
+  if (!keep_empty_summary) {
+    if (!dplyr::last(ard_args$variables) %in% x$table_body$variable) {
+      x$table_body <- x$table_body |> dplyr::filter(!variable %in% outer_cols)
+      x_ard_filter <- x_ard_filter |> dplyr::filter(!variable %in% outer_cols)
+    } else {
+      for (v in rev(outer_cols)) {
+        empty_rows <- x$table_body |>
+          dplyr::filter(variable == dplyr::lead(variable) & variable == v) |>
+          dplyr::pull("idx_nofilter")
+        x$table_body <- x$table_body |> dplyr::filter(!idx_nofilter %in% empty_rows)
+        x_ard_filter <- x_ard_filter |> dplyr::filter(!idx_nofilter %in% empty_rows)
+      }
+    }
+  }
 
   # update x$table_body
-  x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ] |> select(-"idx_nofilter")
+  x$table_body <- x$table_body |> select(-"idx_nofilter")
 
-  x
+  # update x$cards
+  x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter")
 
-  # if (nrow(x$table_body) > 0) {
-  #     cli::cli_inform(
-  #       "For readability, all summary rows preceding at least one row that meets the filtering criteria are kept
-  #       regardless of whether they meet the filtering criteria themselves.",
-  #       .frequency = "once",
-  #       .frequency_id = "sum_rows_lt"
-  #     )
-  #   }
+  x
 }
diff --git a/R/tbl_sort.R b/R/tbl_sort.R
index 04f5bc0d7..0f2b6aa76 100644
--- a/R/tbl_sort.R
+++ b/R/tbl_sort.R
@@ -60,46 +60,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
   x_ard <- x$cards$tbl_hierarchical
 
   # add dummy rows for variables not in include so their label rows are sorted correctly
-  not_incl <- setdiff(ard_args$variables, ard_args$include)
-  if (length(not_incl) > 0) {
-    cli::cli_inform(
-      "Not all hierarchy variables present in the table were included in the {.arg include} argument.
-      These variables ({not_incl}) do not have event rate data available so the total sum of the event rates
-      for this hierarchy section will be used instead. To use true event rates for all sections of the table,
-      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
-    )
-
-    for (v in not_incl) {
-      i <- length(ard_args$by) + which(ard_args$variables == v)
-      x_sum_rows <- x_ard |>
-        dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |>
-        dplyr::group_map(function(.df, .g) {
-          # get pseudo-summary row stat value for descending sort
-          if (sort == "descending") {
-            stat_nm <- setdiff(.df$stat_name, "N")[1]
-            sum <- .df |>
-              dplyr::filter(stat_name == !!stat_nm) |>
-              dplyr::summarize(s = sum(unlist(stat))) |>
-              dplyr::pull(s)
-          }
-          g_cur <- .g[[ncol(.g) - 1]]
-          if (!is.na(g_cur) && g_cur == v) {
-            # dummy summary row to add in
-            .df[1, ] |> mutate(
-              variable = g_cur,
-              variable_level = .g[[ncol(.g)]],
-              stat_name = if (sort == "descending") stat_nm else "no_stat",
-              stat = if (sort == "descending") list(sum) else list(0),
-              tmp = TRUE
-            )
-          } else {
-            NULL
-          }
-        }, .keep = TRUE)
-
-      x_ard <- x_ard |> dplyr::bind_rows(x_sum_rows)
-    }
-  }
+  x_ard <- x_ard |> .append_not_incl(ard_args, sort)
 
   # add indices to ARD
   x_ard <- x_ard |>
@@ -173,3 +134,49 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
 
   x
 }
+
+.append_not_incl <- function(x, ard_args, sort = NULL) {
+  # add dummy rows for variables not in include so their label rows are sorted correctly
+  not_incl <- setdiff(ard_args$variables, ard_args$include)
+  if (length(not_incl) > 0) {
+    cli::cli_inform(
+      "Not all hierarchy variables present in the table were included in the {.arg include} argument.
+      These variables ({not_incl}) do not have event rate data available so the total sum of the event rates
+      for this hierarchy section will be used instead. To use true event rates for all sections of the table,
+      set {.code include = everything()} when creating your table via {.fun tbl_hierarchical}."
+    )
+
+    for (v in not_incl) {
+      i <- length(ard_args$by) + which(ard_args$variables == v)
+      x_sum_rows <- x |>
+        dplyr::group_by(across(all_of(cards::all_ard_group_n((length(ard_args$by) + 1):i)))) |>
+        dplyr::group_map(function(.df, .g) {
+          # get pseudo-summary row stat value for descending sort
+          if (!is.null(sort) && sort == "descending") {
+            stat_nm <- setdiff(.df$stat_name, "N")[1]
+            sum <- .df |>
+              dplyr::filter(stat_name == !!stat_nm) |>
+              dplyr::summarize(s = sum(unlist(stat))) |>
+              dplyr::pull(s)
+          }
+          g_cur <- .g[[ncol(.g) - 1]]
+          if (!is.na(g_cur) && g_cur == v) {
+            # dummy summary row to add in
+            .df[1, ] |> mutate(
+              variable = g_cur,
+              variable_level = .g[[ncol(.g)]],
+              stat_name = if (!is.null(sort) && sort == "descending") stat_nm else "no_stat",
+              stat = if (!is.null(sort) && sort == "descending") list(sum) else list(0),
+              tmp = TRUE
+            )
+          } else {
+            NULL
+          }
+        }, .keep = TRUE)
+
+      x <- x |> dplyr::bind_rows(x_sum_rows)
+    }
+  }
+
+  x
+}
diff --git a/man/tbl_filter.Rd b/man/tbl_filter.Rd
index 640ec7264..0de42e998 100644
--- a/man/tbl_filter.Rd
+++ b/man/tbl_filter.Rd
@@ -7,13 +7,20 @@
 \usage{
 tbl_filter(x, ...)
 
-\method{tbl_filter}{tbl_hierarchical}(x, filter, ...)
+\method{tbl_filter}{tbl_hierarchical}(x, filter, keep_empty_summary = TRUE, ...)
 }
 \arguments{
 \item{x}{(\code{tbl_hierarchical}, \code{tbl_hierarchical_count})\cr
 A hierarchical gtsummary table of class \code{'tbl_hierarchical'} or \code{'tbl_hierarchical_count'}.}
 
 \item{...}{These dots are for future extensions and must be empty.}
+
+\item{filter}{(\code{expression})\cr an expression that is used to filter rows of the hierarchical ARD. See the Details
+section below for more information.}
+
+\item{keep_empty_summary}{(scalar \code{logical})\cr
+Logical argument indicating whether to retain summary rows corresponding to table hierarchy sections that have had
+all rows filtered out. Default is \code{TRUE}.}
 }
 \value{
 A \code{gtsummary} of the same class as \code{x}.
@@ -21,22 +28,22 @@ A \code{gtsummary} of the same class as \code{x}.
 \description{
 \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}\cr
 
-This function is used to filter hierarchical table rows.
+This function is used to filter hierarchical table rows. Filters are not applied to summary or overall rows.
 }
 \details{
 The \code{filter} argument can be used to filter out rows of a table which do not meet the criteria provided as an
 expression. Rows can be filtered on the values of any of the possible statistics (\code{n}, \code{p}, and \code{N}) provided they
 are included at least once in the table, as well as the values of any \code{by} variables. Filtering is only applied to
-rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows preceding at
-least one inner row that meets the filtering criteria are kept regardless of whether they meet the filtering criteria
-themselves. In addition to filtering on individual statistic values, filters can be applied across the row (i.e.
-across all \code{by} variable values) by using aggregate functions such as \code{sum()} and \code{mean()}.
+rows that correspond to the innermost variable in the hierarchy - all outer variable (summary) rows are kept
+regardless of whether they meet the filtering criteria themselves. In addition to filtering on individual statistic
+values, filters can be applied across the row (i.e. across all \code{by} variable values) by using aggregate functions
+such as \code{sum()} and \code{mean()}.
 
 Some examples of possible filters:
 \itemize{
 \item \code{filter = n > 5}
 \item \code{filter = n == 2 & p < 0.05}
-\item \code{filter = sum(n) > 4}
+\item \code{filter = sum(n) >= 4}
 \item \code{filter = mean(n) > 4 | n > 3}
 \item \code{filter = any(n > 2 & TRTA == "Xanomeline High Dose")}
 }
diff --git a/tests/testthat/_snaps/tbl_filter.md b/tests/testthat/_snaps/tbl_filter.md
index 82b4ca8d0..cdca373e9 100644
--- a/tests/testthat/_snaps/tbl_filter.md
+++ b/tests/testthat/_snaps/tbl_filter.md
@@ -1,3 +1,25 @@
+# tbl_filter.tbl_hierarchical() works
+
+    Code
+      as.data.frame(tbl)
+    Output
+         **Sex**  \n    **Race**  \n        **Reported Term for the Adverse Event** **Placebo**  \nN = 86 **Xanomeline High Dose**  \nN = 84 **Xanomeline Low Dose**  \nN = 84
+      1                                               Number of patients with event              26 (30%)                           42 (50%)                          40 (48%)
+      2                                                                           F              13 (25%)                           18 (45%)                          23 (46%)
+      3                                                   BLACK OR AFRICAN AMERICAN               3 (60%)                            4 (67%)                           3 (50%)
+      4                                                                       WHITE              10 (21%)                           14 (41%)                          20 (45%)
+      5                                                   APPLICATION SITE PRURITUS              2 (4.2%)                            8 (24%)                          10 (23%)
+      6                                                                    ERYTHEMA               6 (13%)                            6 (18%)                           8 (18%)
+      7                                                   APPLICATION SITE ERYTHEMA              2 (4.2%)                            5 (15%)                           5 (11%)
+      8                                                                           M              13 (39%)                           24 (55%)                          17 (50%)
+      9                                            AMERICAN INDIAN OR ALASKA NATIVE               0 (NA%)                           1 (100%)                           0 (NA%)
+      10                                                  BLACK OR AFRICAN AMERICAN               1 (33%)                            1 (33%)                           0 (NA%)
+      11                                                                      WHITE              12 (40%)                           22 (55%)                          17 (50%)
+      12                                                  APPLICATION SITE PRURITUS              1 (3.3%)                           12 (30%)                          10 (29%)
+      13                                                                  DIARRHOEA               6 (20%)                           3 (7.5%)                          2 (5.9%)
+      14                                                                   ERYTHEMA               3 (10%)                            5 (13%)                           6 (18%)
+      15                                                  APPLICATION SITE ERYTHEMA              1 (3.3%)                           10 (25%)                           7 (21%)
+
 # tbl_filter.tbl_hierarchical() error messaging works
 
     Code

From 73861f16e70a24e8863357f97afd9f67a64fa972 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 18 Feb 2025 19:59:39 -0500
Subject: [PATCH 22/25] Update tests

---
 tests/testthat/_snaps/tbl_filter.md | 19 +++++++
 tests/testthat/test-tbl_filter.R    | 77 ++++++++++++++++++-----------
 2 files changed, 66 insertions(+), 30 deletions(-)

diff --git a/tests/testthat/_snaps/tbl_filter.md b/tests/testthat/_snaps/tbl_filter.md
index cdca373e9..f1386cf8a 100644
--- a/tests/testthat/_snaps/tbl_filter.md
+++ b/tests/testthat/_snaps/tbl_filter.md
@@ -20,6 +20,25 @@
       14                                                                   ERYTHEMA               3 (10%)                            5 (13%)                           6 (18%)
       15                                                  APPLICATION SITE ERYTHEMA              1 (3.3%)                           10 (25%)                           7 (21%)
 
+# tbl_filter.tbl_hierarchical() works with various different filter conditions
+
+    Code
+      as.data.frame(tbl_f)
+    Output
+         **Sex**  \n    **Race**  \n        **Reported Term for the Adverse Event** **Placebo**  \nN = 86 **Xanomeline High Dose**  \nN = 84 **Xanomeline Low Dose**  \nN = 84
+      1                                               Number of patients with event              26 (30%)                           42 (50%)                          40 (48%)
+      2                                                                           F              13 (25%)                           18 (45%)                          23 (46%)
+      3                                                                       WHITE              10 (21%)                           14 (41%)                          20 (45%)
+      4                                                   APPLICATION SITE PRURITUS              2 (4.2%)                            8 (24%)                          10 (23%)
+      5                                                                    ERYTHEMA               6 (13%)                            6 (18%)                           8 (18%)
+      6                                                   APPLICATION SITE ERYTHEMA              2 (4.2%)                            5 (15%)                           5 (11%)
+      7                                                                           M              13 (39%)                           24 (55%)                          17 (50%)
+      8                                                                       WHITE              12 (40%)                           22 (55%)                          17 (50%)
+      9                                                   APPLICATION SITE PRURITUS              1 (3.3%)                           12 (30%)                          10 (29%)
+      10                                                                  DIARRHOEA               6 (20%)                           3 (7.5%)                          2 (5.9%)
+      11                                                                   ERYTHEMA               3 (10%)                            5 (13%)                           6 (18%)
+      12                                                  APPLICATION SITE ERYTHEMA              1 (3.3%)                           10 (25%)                           7 (21%)
+
 # tbl_filter.tbl_hierarchical() error messaging works
 
     Code
diff --git a/tests/testthat/test-tbl_filter.R b/tests/testthat/test-tbl_filter.R
index cad926ba8..2498d4e3a 100644
--- a/tests/testthat/test-tbl_filter.R
+++ b/tests/testthat/test-tbl_filter.R
@@ -17,28 +17,34 @@ test_that("tbl_filter.tbl_hierarchical() works", {
 
   # no errors
   expect_silent(tbl <- tbl_filter(tbl, sum(n) > 10))
+
+  # row order is retained
   expect_snapshot(tbl |> as.data.frame())
-  expect_silent(tbl <- tbl_filter(tbl, p > 0.05))
 })
 
-test_that("tbl_filter.tbl_hierarchical(gt) works", {
-  # gt = TRUE
-  expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 10))
+test_that("tbl_filter.tbl_hierarchical(keep_empty_summary) works", {
+  tbl2 <- tbl_hierarchical(
+    data = ADAE_subset,
+    variables = c(SEX, RACE, AEBODSYS, AETERM),
+    by = TRTA,
+    denominator = cards::ADSL |> mutate(TRTA = ARM),
+    id = USUBJID
+  )
 
-  # gt = FALSE
-  expect_message(tbl_lt <- tbl_filter(tbl, sum(n) < 10))
+  # keep summary rows
+  expect_silent(tbl_f <- tbl_filter(tbl2, sum(n) > 10, keep_empty_summary = TRUE))
+  expect_equal(nrow(tbl_f$table_body), 29)
 
-  expect_equal(
-    dplyr::inner_join(
-      tbl_gt$table_body,
-      tbl_lt$table_body,
-      by = names(tbl_gt$table_body)
-    ) |>
-      dplyr::filter(variable == "AETERM") |>
-      nrow(),
-    0
-  )
+  # remove summary rows
+  expect_silent(tbl_f <- tbl_filter(tbl2, sum(n) > 10, keep_empty_summary = FALSE))
+  expect_equal(nrow(tbl_f$table_body), 22)
+})
+
+test_that("tbl_filter.tbl_hierarchical() works with various different filter conditions", {
+  withr::local_options(width = 200)
 
+  expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 10))
+  expect_silent(tbl_lt <- tbl_filter(tbl, sum(n) <= 10))
   expect_equal(
     sum(
       tbl_gt$table_body |>
@@ -52,27 +58,38 @@ test_that("tbl_filter.tbl_hierarchical(gt) works", {
       dplyr::filter(variable == "AETERM") |>
       nrow()
   )
-})
 
-test_that("tbl_filter.tbl_hierarchical(eq) works", {
-  # gt = TRUE, eq = FALSE
-  expect_silent(tbl_gt <- tbl_filter(tbl, sum(n) > 12))
+  expect_silent(tbl_f <- tbl_filter(tbl, n > 5))
+  expect_equal(nrow(tbl_f$table_body), 14)
 
-  # gt = TRUE, eq = TRUE
-  expect_silent(tbl_geq <- tbl_filter(tbl, sum(n) >= 12))
-  expect_gt(nrow(tbl_geq$table_body), nrow(tbl_gt$table_body))
+  expect_silent(tbl_f <- tbl_filter(tbl, p > 0.05))
+  expect_equal(nrow(tbl_f$table_body), 25)
 
-  # gt = FALSE, eq = FALSE
-  expect_silent(tbl_lt <- tbl_filter(tbl, sum(n) < 12))
+  expect_silent(tbl_f <- tbl_filter(tbl, n == 2 & p < 0.05))
+  expect_equal(nrow(tbl_f$table_body), 11)
 
-  # gt = TRUE, eq = TRUE
-  expect_silent(tbl_leq <- tbl_filter(tbl, sum(n) <= 12))
-  expect_lt(nrow(tbl_lt$table_body), nrow(tbl_leq$table_body))
+  expect_silent(tbl_f <- tbl_filter(tbl, mean(n) > 4 | n > 3))
+  expect_equal(nrow(tbl_f$table_body), 15)
+
+  expect_silent(tbl_f <- tbl_filter(tbl, any(n > 2 & TRTA == "Xanomeline High Dose"), keep_empty_summary = FALSE))
+  expect_snapshot(tbl_f |> as.data.frame())
 })
 
 test_that("tbl_filter.tbl_hierarchical() returns empty table when all rows filtered out", {
-  expect_silent(tbl <- tbl_filter(tbl, sum(n) > 200))
-  expect_equal(nrow(tbl$table_body), 0)
+  tbl2 <- tbl_hierarchical(
+    data = ADAE_subset,
+    variables = c(SEX, RACE, AETERM),
+    by = TRTA,
+    denominator = cards::ADSL |> mutate(TRTA = ARM),
+    id = USUBJID
+  )
+
+  expect_silent(tbl_f <- tbl_filter(tbl2, sum(n) > 200, keep_empty_summary = FALSE))
+  expect_equal(nrow(tbl_f$table_body), 0)
+
+  # overall row present
+  expect_silent(tbl_f <- tbl_filter(tbl, sum(n) > 200, keep_empty_summary = FALSE))
+  expect_equal(nrow(tbl_f$table_body), 1)
 })
 
 test_that("tbl_filter.tbl_hierarchical() works with only one variable in x", {

From a9737e41930c46bb9c3654a21150e61b36dbfffc Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 18 Feb 2025 20:47:20 -0500
Subject: [PATCH 23/25] Fix checks

---
 R/tbl_filter.R | 88 ++++++++++++++------------------------------------
 R/tbl_sort.R   | 84 ++++++++++++++++++++++++++---------------------
 2 files changed, 73 insertions(+), 99 deletions(-)

diff --git a/R/tbl_filter.R b/R/tbl_filter.R
index 5095fe30a..2b065dfd6 100644
--- a/R/tbl_filter.R
+++ b/R/tbl_filter.R
@@ -71,56 +71,15 @@ tbl_filter.tbl_hierarchical <- function(x, filter, keep_empty_summary = TRUE, ..
   by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
   x_ard <- x$cards$tbl_hierarchical
 
-  # add dummy rows for variables not in include so their label rows are filtered correctly
-  x_ard <- x_ard |> .append_not_incl(ard_args)
-
-  # add indices to ARD
-  x_ard <- x_ard |>
-    dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
-    dplyr::mutate(idx_nofilter = dplyr::cur_group_id())
-
-  gps <- x_ard |>
-    dplyr::group_keys() |>
-    dplyr::mutate(idx_nofilter = dplyr::row_number()) |>
-    cards::as_card() |>
-    cards::rename_ard_groups_shift(shift = -1) |>
-    dplyr::filter(!variable %in% ard_args$by) |>
-    dplyr::rename(label = variable_level)
-
-  overall_lbl <- x$table_body$label[x$table_body$variable == "..ard_hierarchical_overall.."]
-  if (length(overall_lbl) > 0) {
-    gps$label[gps$variable == "..ard_hierarchical_overall.."] <- overall_lbl
-    if (length(ard_args$variables) > 1) {
-      gps$group1[gps$variable == "..ard_hierarchical_overall.."] <- "..ard_hierarchical_overall.."
-    }
-  }
-
-  # match structure of ARD grouping columns to x$table_body grouping columns
-  gps <- gps |> tidyr::unnest(everything())
-  outer_cols <- if (length(ard_args$variables) > 1) {
-    ard_args$variables |>
-      utils::head(-1) |>
-      stats::setNames(paste0("group", seq_len(length(ard_args$variables) - 1)))
-  } else {
-    NULL
-  }
-  for (g in names(outer_cols)) {
-    which_g <- gps$variable == outer_cols[g]
-    gps[g][which_g, ] <- gps$variable[which_g]
-    gps[paste0(g, "_level")][which_g, ] <- gps$label[which_g]
-  }
-  x$table_body <- x$table_body |> dplyr::left_join(gps, by = names(gps) |> utils::head(-1))
-
-  # re-add dropped args attribute
-  x_ard <- x_ard |>
-    dplyr::ungroup() |>
-    cards::as_card()
-  attr(x_ard, "args") <- ard_args
+  # add row indices to match structure of ard to x$table_body
+  reshape_x <- .reshape_ard_compare(x, x_ard, ard_args)
+  x <- reshape_x$x
+  x_ard <- reshape_x$x_ard
 
   # get `by` variable count rows (do not correspond to a table row)
   rm_idx <- x_ard |>
-    dplyr::filter(is.na(group1)) |>
-    dplyr::pull("idx_nofilter") |>
+    dplyr::filter(is.na(.data$group1)) |>
+    dplyr::pull("pre_idx") |>
     unique()
 
   # apply filtering
@@ -128,41 +87,44 @@ tbl_filter.tbl_hierarchical <- function(x, filter, keep_empty_summary = TRUE, ..
 
   # pull updated index order after filtering
   idx_filter <- x_ard_filter |>
-    dplyr::pull("idx_nofilter") |>
+    dplyr::pull("pre_idx") |>
     unique() |>
     setdiff(rm_idx)
 
   # apply filtering while retaining original row order
-  idx_filter <- intersect(x$table_body$idx_nofilter, idx_filter)
-  x$table_body <- x$table_body[match(idx_filter, x$table_body$idx_nofilter), ]
+  idx_filter <- intersect(x$table_body$pre_idx, idx_filter)
+  x$table_body <- x$table_body[match(idx_filter, x$table_body$pre_idx), ]
 
   if ("tmp" %in% names(x_ard_filter)) {
     x_ard_filter <- x_ard_filter |>
-      dplyr::filter(is.na(tmp)) |>
+      dplyr::filter(is.na(.data$tmp)) |>
       select(-"tmp")
   }
 
   # remove summary rows from empty sections if requested
   if (!keep_empty_summary) {
-    if (!dplyr::last(ard_args$variables) %in% x$table_body$variable) {
-      x$table_body <- x$table_body |> dplyr::filter(!variable %in% outer_cols)
-      x_ard_filter <- x_ard_filter |> dplyr::filter(!variable %in% outer_cols)
-    } else {
-      for (v in rev(outer_cols)) {
-        empty_rows <- x$table_body |>
-          dplyr::filter(variable == dplyr::lead(variable) & variable == v) |>
-          dplyr::pull("idx_nofilter")
-        x$table_body <- x$table_body |> dplyr::filter(!idx_nofilter %in% empty_rows)
-        x_ard_filter <- x_ard_filter |> dplyr::filter(!idx_nofilter %in% empty_rows)
+    if (length(ard_args$variables) > 1) {
+      outer_cols <- ard_args$variables |> utils::head(-1)
+      if (!dplyr::last(ard_args$variables) %in% x$table_body$variable) {
+        x$table_body <- x$table_body |> dplyr::filter(!.data$variable %in% outer_cols)
+        x_ard_filter <- x_ard_filter |> dplyr::filter(!.data$variable %in% outer_cols)
+      } else {
+        for (v in rev(outer_cols)) {
+          empty_rows <- x$table_body |>
+            dplyr::filter(.data$variable == dplyr::lead(.data$variable) & .data$variable == v) |>
+            dplyr::pull("pre_idx")
+          x$table_body <- x$table_body |> dplyr::filter(!.data$pre_idx %in% empty_rows)
+          x_ard_filter <- x_ard_filter |> dplyr::filter(!.data$pre_idx %in% empty_rows)
+        }
       }
     }
   }
 
   # update x$table_body
-  x$table_body <- x$table_body |> select(-"idx_nofilter")
+  x$table_body <- x$table_body |> select(-"pre_idx")
 
   # update x$cards
-  x$cards$tbl_hierarchical <- x_ard_filter |> select(-"idx_nofilter")
+  x$cards$tbl_hierarchical <- x_ard_filter |> select(-"pre_idx")
 
   x
 }
diff --git a/R/tbl_sort.R b/R/tbl_sort.R
index 0f2b6aa76..2a84e070a 100644
--- a/R/tbl_sort.R
+++ b/R/tbl_sort.R
@@ -56,25 +56,64 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
   set_cli_abort_call()
 
   ard_args <- attributes(x$cards$tbl_hierarchical)$args
-  by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
   x_ard <- x$cards$tbl_hierarchical
 
+  # add row indices match structure of ard to x$table_body
+  reshape_x <- .reshape_ard_compare(x, x_ard, ard_args, sort)
+  x <- reshape_x$x
+  x_ard <- reshape_x$x_ard
+
+  # get `by` variable count rows (do not correspond to a table row)
+  rm_idx <- x_ard |>
+    dplyr::filter(is.na(.data$group1)) |>
+    dplyr::pull("pre_idx") |>
+    unique()
+
+  # apply sorting
+  x_ard_sort <- x_ard |> cards::ard_sort(sort)
+
+  # pull updated index order after sorting
+  idx_sort <- x_ard_sort |>
+    dplyr::pull("pre_idx") |>
+    unique() |>
+    setdiff(rm_idx)
+
+  if ("tmp" %in% names(x_ard_sort)) {
+    x_ard_sort <- x_ard_sort |>
+      dplyr::filter(is.na(.data$tmp)) |>
+      select(-"tmp")
+  }
+
+  # update x$cards
+  x$cards$tbl_hierarchical <- x_ard_sort |> select(-"pre_idx")
+
+  # update x$table_body
+  x$table_body <- x$table_body[match(idx_sort, x$table_body$pre_idx), ] |> select(-"pre_idx")
+
+  x
+}
+
+.reshape_ard_compare <- function(x, x_ard, ard_args, sort = NULL) {
+  by_cols <- paste0("group", seq_along(length(ard_args$by)), c("", "_level"))
+
   # add dummy rows for variables not in include so their label rows are sorted correctly
   x_ard <- x_ard |> .append_not_incl(ard_args, sort)
 
   # add indices to ARD
   x_ard <- x_ard |>
     dplyr::group_by(across(c(cards::all_ard_groups(), cards::all_ard_variables(), -all_of(by_cols)))) |>
-    dplyr::mutate(idx_unsort = dplyr::cur_group_id())
+    dplyr::mutate(pre_idx = dplyr::cur_group_id())
 
+  # get grouping structure
   gps <- x_ard |>
     dplyr::group_keys() |>
-    dplyr::mutate(idx_unsort = dplyr::row_number()) |>
+    dplyr::mutate(pre_idx = dplyr::row_number()) |>
     cards::as_card() |>
     cards::rename_ard_groups_shift(shift = -1) |>
-    dplyr::filter(!variable %in% ard_args$by) |>
-    dplyr::rename(label = variable_level)
+    dplyr::filter(!.data$variable %in% ard_args$by) |>
+    dplyr::rename(label = "variable_level")
 
+  # match overall row if present
   overall_lbl <- x$table_body$label[x$table_body$variable == "..ard_hierarchical_overall.."]
   if (length(overall_lbl) > 0) {
     gps$label[gps$variable == "..ard_hierarchical_overall.."] <- overall_lbl
@@ -105,34 +144,7 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
     cards::as_card()
   attr(x_ard, "args") <- ard_args
 
-  # get `by` variable count rows (do not correspond to a table row)
-  rm_idx <- x_ard |>
-    dplyr::filter(is.na(group1)) |>
-    dplyr::pull("idx_unsort") |>
-    unique()
-
-  # apply sorting
-  x_ard_sort <- x_ard |> cards::ard_sort(sort)
-
-  # pull updated index order after sorting
-  idx_sort <- x_ard_sort |>
-    dplyr::pull("idx_unsort") |>
-    unique() |>
-    setdiff(rm_idx)
-
-  if ("tmp" %in% names(x_ard_sort)) {
-    x_ard_sort <- x_ard_sort |>
-      dplyr::filter(is.na(tmp)) |>
-      select(-"tmp")
-  }
-
-  # update x$cards
-  x$cards$tbl_hierarchical <- x_ard_sort |> select(-"idx_unsort")
-
-  # update x$table_body
-  x$table_body <- x$table_body[match(idx_sort, x$table_body$idx_unsort), ] |> select(-"idx_unsort")
-
-  x
+  list(x = x, x_ard = x_ard)
 }
 
 .append_not_incl <- function(x, ard_args, sort = NULL) {
@@ -155,9 +167,9 @@ tbl_sort.tbl_hierarchical <- function(x, sort = "descending", ...) {
           if (!is.null(sort) && sort == "descending") {
             stat_nm <- setdiff(.df$stat_name, "N")[1]
             sum <- .df |>
-              dplyr::filter(stat_name == !!stat_nm) |>
-              dplyr::summarize(s = sum(unlist(stat))) |>
-              dplyr::pull(s)
+              dplyr::filter(.data$stat_name == !!stat_nm) |>
+              dplyr::summarize(sum_stat = sum(unlist(.data$stat))) |>
+              dplyr::pull("sum_stat")
           }
           g_cur <- .g[[ncol(.g) - 1]]
           if (!is.na(g_cur) && g_cur == v) {

From ae322a1b608a8ceeee5d1c20d0fdec96221e2785 Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 18 Feb 2025 20:47:46 -0500
Subject: [PATCH 24/25] Add to author list

---
 DESCRIPTION              | 2 ++
 man/gtsummary-package.Rd | 1 +
 2 files changed, 3 insertions(+)

diff --git a/DESCRIPTION b/DESCRIPTION
index 674e79f8f..587d9c48e 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,6 +14,8 @@ Authors@R: c(
            comment = c(ORCID = "0000-0002-4683-1868")),
     person("Emily C.", "Zabor", role = "aut",
            comment = c(ORCID = "0000-0002-1402-4498")),
+    person("Emily", "de la Rua", , role = "aut",
+           comment = c(ORCID = "0009-0000-8738-5561")),
     person("Xing", "Bai", role = "ctb"),
     person("Esther", "Drill", role = "ctb",
            comment = c(ORCID = "0000-0002-3315-4538")),
diff --git a/man/gtsummary-package.Rd b/man/gtsummary-package.Rd
index c4d8eeb1e..44b7086a4 100644
--- a/man/gtsummary-package.Rd
+++ b/man/gtsummary-package.Rd
@@ -29,6 +29,7 @@ Authors:
   \item Jessica Lavery (\href{https://orcid.org/0000-0002-2746-5647}{ORCID})
   \item Karissa Whiting (\href{https://orcid.org/0000-0002-4683-1868}{ORCID})
   \item Emily C. Zabor (\href{https://orcid.org/0000-0002-1402-4498}{ORCID})
+  \item Emily de la Rua (\href{https://orcid.org/0009-0000-8738-5561}{ORCID})
 }
 
 Other contributors:

From b8bac66303a9baf872d2953b0a0260368336609c Mon Sep 17 00:00:00 2001
From: Emily de la Rua <emily.de_la_rua@contractors.roche.com>
Date: Tue, 18 Feb 2025 21:00:50 -0500
Subject: [PATCH 25/25] Add test

---
 DESCRIPTION                       |  1 +
 R/tbl_hierarchical.R              |  2 +-
 tests/testthat/_snaps/tbl_sort.md | 16 ++++++++++++++++
 tests/testthat/test-tbl_sort.R    | 14 ++++++++++++++
 4 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 587d9c48e..68d2210f2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -88,6 +88,7 @@ Suggests:
     testthat (>= 3.2.0),
     withr (>= 2.5.0),
     workflows (>= 0.2.4)
+Remotes: insightsengineering/cards@301_sort_filter_ard_stack_hierarchical
 VignetteBuilder: 
     knitr
 Config/Needs/check: hms
diff --git a/R/tbl_hierarchical.R b/R/tbl_hierarchical.R
index 790bcca86..270b0521e 100644
--- a/R/tbl_hierarchical.R
+++ b/R/tbl_hierarchical.R
@@ -214,7 +214,7 @@ internal_tbl_hierarchical <- function(data,
   if ("..ard_hierarchical_overall.." %in% variables) {
     cli::cli_abort("The {.arg variables} argument cannot include a column named {.val ..ard_hierarchical_overall..}.")
   }
-  if (!all(variables == unique(variables))) {
+  if (length(variables) != length(unique(variables))) {
     cli::cli_abort("The {.arg variables} argument cannot contain repeated variables.")
   }
 
diff --git a/tests/testthat/_snaps/tbl_sort.md b/tests/testthat/_snaps/tbl_sort.md
index fc2d4d810..58388091c 100644
--- a/tests/testthat/_snaps/tbl_sort.md
+++ b/tests/testthat/_snaps/tbl_sort.md
@@ -30,3 +30,19 @@
       24                                           AMERICAN INDIAN OR ALASKA NATIVE               0 (NA%)                           1 (100%)                           0 (NA%)
       25                                                                   ERYTHEMA               0 (NA%)                           1 (100%)                           0 (NA%)
 
+# tbl_sort.tbl_hierarchical() error messaging works
+
+    Code
+      tbl_sort(data.frame())
+    Condition
+      Error in `check_class()`:
+      ! The `x` argument must be class <gtsummary>, not a data frame.
+
+---
+
+    Code
+      tbl_sort(tbl, 10)
+    Condition
+      Error in `tbl_sort()`:
+      ! The `sort` argument must be a string, not a number.
+
diff --git a/tests/testthat/test-tbl_sort.R b/tests/testthat/test-tbl_sort.R
index 1923d7015..9c32dd0ec 100644
--- a/tests/testthat/test-tbl_sort.R
+++ b/tests/testthat/test-tbl_sort.R
@@ -141,3 +141,17 @@ test_that("tbl_sort.tbl_hierarchical() works when some variables not included in
 
   expect_message(tbl_sort(tbl))
 })
+
+test_that("tbl_sort.tbl_hierarchical() error messaging works", {
+  # invalid x input
+  expect_snapshot(
+    tbl_sort(data.frame()),
+    error = TRUE
+  )
+
+  # invalid sort input
+  expect_snapshot(
+    tbl_sort(tbl, 10),
+    error = TRUE
+  )
+})