diff --git a/DESCRIPTION b/DESCRIPTION index a726d03778..0f44ab0101 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -92,7 +92,7 @@ Remotes: github::insightsengineering/cards, github::insightsengineering/cardx@svy-ard_categorical_unit_tests Config/Needs/check: broom, broom.helpers, broom.mixed, lme4, effectsize, - emmeans, smd + emmeans, smd, forcats, scales Config/testthat/edition: 3 Config/testthat/parallel: true Encoding: UTF-8 diff --git a/NEWS.md b/NEWS.md index 9b8b4d40cf..c37b74e68e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -46,6 +46,8 @@ #### Internal Updates +* The "FAQ+Gallery", `tbl_summary()` Tutorial, `tbl_regression()`, and Quarto+R Markdown vignettes have been converted to articles. The URLs on the website have not changed for these pages, but the vignettes are no longer is bundled in the package. This change allows us to provide better documentation utilizing more tools that don't need to be bundled with the package. + * Greater consistency has been put in place for all calculated statistics in gtsummary. Previously, each function handled its own calculations and transforming these statistics into data frames that would be printed. Now each function will first prepare an Analysis Result Dataset (ARD), and ARDs are converted to gtsummary structures using bridge functions (prefixed with `brdg_*()`). The bridge functions will be exported to allow anyone to more easily extend gtsummary functions. ### Bug Fixes diff --git a/R/add_overall.R b/R/add_overall.R index 826e1fb39c..3a3515e5df 100644 --- a/R/add_overall.R +++ b/R/add_overall.R @@ -1,40 +1,26 @@ -#' Add overall column -#' -#' - [`add_overall.tbl_summary()`] -#' -#' @param x (`gtsummary`)\cr -#' Object with class 'gtsummary' -#' @param ... Passed to other methods. -#' @keywords internal -#' @author Daniel D. Sjoberg -#' @export -#' -#' @seealso [`add_overall.tbl_summary()`] -add_overall <- function(x, ...) { - check_not_missing(x) - check_class(x, "gtsummary") - UseMethod("add_overall") -} - #' Add overall column #' #' Adds a column with overall summary statistics to tables -#' created by `tbl_summary`, `tbl_svysummary`, `tbl_continuous` or -#' `tbl_custom_summary`. +#' created by `tbl_summary()`, `tbl_svysummary()`, `tbl_continuous()` or +#' `tbl_custom_summary()`. #' -#' @param x (`tbl_summary`/`tbl_svysummary`/`tbl_continuous`/`tbl_custom_summary`)\cr +#' @param x (`tbl_summary`, `tbl_svysummary`, `tbl_continuous`, `tbl_custom_summary`)\cr #' A stratified 'gtsummary' table -#' @param last Logical indicator to display overall column last in table. -#' Default is `FALSE`, which will display overall column first. -#' @param col_label String indicating the column label. Default is `"**Overall** \nN = {N}"` -#' @param statistic Override the statistic argument in initial `tbl_*` function. -#' call. Default is `NULL`. -#' @param digits Override the digits argument in initial `tbl_*` function -#' call. Default is `NULL`. +#' @param last (scalar `logical`)\cr +#' Logical indicator to display overall column last in table. +#' Default is `FALSE`, which will display overall column first. +#' @param col_label (`string`)\cr +#' String indicating the column label. Default is `"**Overall** \nN = {style_number(N)}"` +#' @param statistic ([`formula-list-selector`][syntax])\cr +#' Override the statistic argument in initial `tbl_*` function +#' call. Default is `NULL`. +#' @param digits ([`formula-list-selector`][syntax])\cr +#' Override the digits argument in initial `tbl_*` function +#' call. Default is `NULL`. #' @inheritParams rlang::args_dots_empty #' #' @author Daniel D. Sjoberg -#' @export +#' @name add_overall #' @return A `gtsummary` of same class as `x` #' @examples #' # Example 1 ---------------------------------- @@ -65,14 +51,26 @@ add_overall <- function(x, ...) { #' include = grade #' ) |> #' add_overall(last = TRUE) -add_overall.tbl_summary <- function(x, last = FALSE, col_label = "**Overall** \nN = {N}", +NULL + +#' @rdname add_overall +#' @export +add_overall <- function(x, ...) { + check_not_missing(x) + check_class(x, "gtsummary") + UseMethod("add_overall") +} + +#' @rdname add_overall +#' @export +add_overall.tbl_summary <- function(x, last = FALSE, col_label = "**Overall** \nN = {style_number(N)}", statistic = NULL, digits = NULL, ...) { set_cli_abort_call() check_dots_empty() # translating the col_label, if nothing passed by user if (missing(col_label)) { - paste0("**", translate_string("Overall"), "** \nN = {N}") + paste0("**", translate_string("Overall"), "** \nN = {style_number(N)}") } add_overall_generic( @@ -191,10 +189,8 @@ add_overall_merge <- function(x, tbl_overall, last, col_label, calling_fun) { dplyr::filter(.data$column %in% "stat_0") ) - # Add + # Add header to overall column x <- modify_header(x, stat_0 = col_label) - - x } diff --git a/R/custom_tidiers.R b/R/custom_tidiers.R index f57a93fb6c..7c510b2007 100644 --- a/R/custom_tidiers.R +++ b/R/custom_tidiers.R @@ -27,8 +27,7 @@ #' The tidier uses the output from `parameters::model_parameters()`, and #' merely takes the result and puts it in `broom::tidy()` format. To use this #' function with `tbl_regression()`, pass a function with the arguments for -#' `tidy_robust()` populated. This is easily done using `purrr::partial()` e.g. -#' `tbl_regression(tidy_fun = partial(tidy_robust, vcov = "CL"))` +#' `tidy_robust()` populated. #' #' - `pool_and_tidy_mice()` tidier to report models resulting from multiply imputed data #' using the mice package. Pass the mice model object *before* the model results diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000000..7d0f588375 --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,20 @@ +citHeader("To cite gtsummary in publications use:") + +bibentry( + bibtype = "Article", + key = "gtsummary", + author = c(person("Daniel D.", "Sjoberg"), + person("Karissa", "Whiting"), + person("Michael", "Curry"), + person("Jessica A.", "Lavery"), + person("Joseph", "Larmarange")), + title = "Reproducible Summary Tables with the gtsummary Package", + journal = "{The R Journal}", + year = "2021", + url = "https://doi.org/10.32614/RJ-2021-053", + doi = "10.32614/RJ-2021-053", + volume = "13", + issue = "1", + pages = "570-580", + textVersion = 'Sjoberg DD, Whiting K, Curry M, Lavery JA, Larmarange J. Reproducible summary tables with the gtsummary package. The R Journal 2021;13:570–80. https://doi.org/10.32614/RJ-2021-053.' +) diff --git a/inst/WORDLIST b/inst/WORDLIST new file mode 100644 index 0000000000..02cd8668ac --- /dev/null +++ b/inst/WORDLIST @@ -0,0 +1,94 @@ +ANCOVA +Biostatistics +CMD +Codecov +DOI +GGally +GVIF +Gehan +Hmisc +JAMA +Kaplan +MacOS +McNemar +McNemar's +NEJM +Ns +ORCID +Peto +README +RJ +RStudio +RTF +Rao +SHA +Tarone +Tidiers +VIF +Waerden's +Wainberg +YAML +bstfun +cli +codebase +coef +conf +coxph +crosstab +customizability +customizable +der +df +dichotomously +dplyr +effectsize +emmeans +exponentiate +flextable +forcats +ftExtra +ggplot +ggstats +glm +huxtable +kable +kableExtra +knitr +labelled +lifecycle +lm +lme +logLik +mL +mira +nevent +ng +nnet +obejcts +pre +pvalue +quosure +quosures +saddlepoint +smd +srvyr +styler +survfit +svysummary +tbl +th +tibble +tibbles +tidiers +tidyr +tidyselect +tidyselect's +tidyverse +un +unhide +unicode +univariable +unstratified +usethis +uvregression +yaml diff --git a/inst/rmarkdown_example/.gitignore b/inst/rmarkdown_example/.gitignore new file mode 100644 index 0000000000..a70fc31d06 --- /dev/null +++ b/inst/rmarkdown_example/.gitignore @@ -0,0 +1,9 @@ +# ignoring Rmarkdown output +*.html +*.pdf +*.docx +*.doc +*.rtf +~*.docx +~*.doc +~*.rtf diff --git a/inst/rmarkdown_example/gtsummary_rmarkdown_html.Rmd b/inst/rmarkdown_example/gtsummary_rmarkdown_html.Rmd new file mode 100644 index 0000000000..c4df800bac --- /dev/null +++ b/inst/rmarkdown_example/gtsummary_rmarkdown_html.Rmd @@ -0,0 +1,54 @@ +--- +title: "gtsummary + R Markdown" +output: html_document +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set(echo = TRUE, warning = FALSE) +``` + +```{r libraries, message=FALSE} +library(gtsummary) +library(tidyverse) +library(survival) +``` + +## gtsummary tables + +Tables created with {gtsummary} can be integrated into R markdown documents. +The {gtsummary} package was written to be a companion to the [{gt} package](https://gt.rstudio.com/) from Posit. + +```{r example} +patient_characteristics <- + trial |> + tbl_summary(by = trt, include = c(age, grade, response)) +patient_characteristics +``` + +With HTML output, you can include complex tables with footnotes, indentation, and spanning table headers. + +```{r} +# Side-by-side Regression Models +# logistic regression model +t1 <- + glm(response ~ trt + grade + age, trial, family = binomial) %>% + tbl_regression(exponentiate = TRUE) +# time to death Cox model +t2 <- + coxph(Surv(ttdeath, death) ~ trt + grade + age, trial) %>% + tbl_regression(exponentiate = TRUE) + +# printing merged table +tbl_merge( + tbls = list(t1, t2), + tab_spanner = c("**Tumor Response**", "**Time to Death**") +) +``` + +## inline reporting + +Any number/statistic from a {gtsummary} table can be reported inline in a R markdown document using the `inline_text()` function. See example below: + +> Among patients who received Drug A, `r inline_text(patient_characteristics, variable = grade, level = "III", column = "Drug A")` had grade III tumors. + +For detailed examples using functions from {gtsummary}, visit the [{gtsummary} website](https://www.danieldsjoberg.com/gtsummary/). diff --git a/man/add_overall.Rd b/man/add_overall.Rd index 93b74d931d..908e345ed3 100644 --- a/man/add_overall.Rd +++ b/man/add_overall.Rd @@ -2,6 +2,7 @@ % Please edit documentation in R/add_overall.R \name{add_overall} \alias{add_overall} +\alias{add_overall.tbl_summary} \alias{add_overall.tbl_continuous} \alias{add_overall.tbl_svysummary} \alias{add_overall.tbl_custom_summary} @@ -9,10 +10,19 @@ \usage{ add_overall(x, ...) +\method{add_overall}{tbl_summary}( + x, + last = FALSE, + col_label = "**Overall** \\nN = {style_number(N)}", + statistic = NULL, + digits = NULL, + ... +) + \method{add_overall}{tbl_continuous}( x, last = FALSE, - col_label = "**Overall** \\nN = {N}", + col_label = "**Overall** \\nN = {style_number(N)}", statistic = NULL, digits = NULL, ... @@ -21,7 +31,7 @@ add_overall(x, ...) \method{add_overall}{tbl_svysummary}( x, last = FALSE, - col_label = "**Overall** \\nN = {N}", + col_label = "**Overall** \\nN = {style_number(N)}", statistic = NULL, digits = NULL, ... @@ -30,27 +40,71 @@ add_overall(x, ...) \method{add_overall}{tbl_custom_summary}( x, last = FALSE, - col_label = "**Overall** \\nN = {N}", + col_label = "**Overall** \\nN = {style_number(N)}", statistic = NULL, digits = NULL, ... ) } \arguments{ -\item{x}{(\code{gtsummary})\cr -Object with class 'gtsummary'} +\item{x}{(\code{tbl_summary}, \code{tbl_svysummary}, \code{tbl_continuous}, \code{tbl_custom_summary})\cr +A stratified 'gtsummary' table} + +\item{...}{These dots are for future extensions and must be empty.} -\item{...}{Passed to other methods.} +\item{last}{(scalar \code{logical})\cr +Logical indicator to display overall column last in table. +Default is \code{FALSE}, which will display overall column first.} + +\item{col_label}{(\code{string})\cr +String indicating the column label. Default is \code{"**Overall** \\nN = {style_number(N)}"}} + +\item{statistic}{(\code{\link[=syntax]{formula-list-selector}})\cr +Override the statistic argument in initial \verb{tbl_*} function +call. Default is \code{NULL}.} + +\item{digits}{(\code{\link[=syntax]{formula-list-selector}})\cr +Override the digits argument in initial \verb{tbl_*} function +call. Default is \code{NULL}.} } -\description{ -\itemize{ -\item \code{\link[=add_overall.tbl_summary]{add_overall.tbl_summary()}} +\value{ +A \code{gtsummary} of same class as \code{x} } +\description{ +Adds a column with overall summary statistics to tables +created by \code{tbl_summary()}, \code{tbl_svysummary()}, \code{tbl_continuous()} or +\code{tbl_custom_summary()}. } -\seealso{ -\code{\link[=add_overall.tbl_summary]{add_overall.tbl_summary()}} +\examples{ +# Example 1 ---------------------------------- +trial |> + tbl_summary(include = c(age, grade), by = trt) |> + add_overall() + +# Example 2 ---------------------------------- +trial |> + tbl_summary( + include = grade, + by = trt, + percent = "row", + statistic = ~"{p}\%", + digits = ~1 + ) |> + add_overall( + last = TRUE, + statistic = ~"{p}\% (n={n})", + digits = ~ c(1, 0) + ) + +# Example 3 ---------------------------------- +trial |> + tbl_continuous( + variable = age, + by = trt, + include = grade + ) |> + add_overall(last = TRUE) } \author{ Daniel D. Sjoberg } -\keyword{internal} diff --git a/man/add_overall.tbl_summary.Rd b/man/add_overall.tbl_summary.Rd deleted file mode 100644 index fb4fb06e73..0000000000 --- a/man/add_overall.tbl_summary.Rd +++ /dev/null @@ -1,73 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_overall.R -\name{add_overall.tbl_summary} -\alias{add_overall.tbl_summary} -\title{Add overall column} -\usage{ -\method{add_overall}{tbl_summary}( - x, - last = FALSE, - col_label = "**Overall** \\nN = {N}", - statistic = NULL, - digits = NULL, - ... -) -} -\arguments{ -\item{x}{(\code{tbl_summary}/\code{tbl_svysummary}/\code{tbl_continuous}/\code{tbl_custom_summary})\cr -A stratified 'gtsummary' table} - -\item{last}{Logical indicator to display overall column last in table. -Default is \code{FALSE}, which will display overall column first.} - -\item{col_label}{String indicating the column label. Default is \code{"**Overall** \\nN = {N}"}} - -\item{statistic}{Override the statistic argument in initial \verb{tbl_*} function. -call. Default is \code{NULL}.} - -\item{digits}{Override the digits argument in initial \verb{tbl_*} function -call. Default is \code{NULL}.} - -\item{...}{These dots are for future extensions and must be empty.} -} -\value{ -A \code{gtsummary} of same class as \code{x} -} -\description{ -Adds a column with overall summary statistics to tables -created by \code{tbl_summary}, \code{tbl_svysummary}, \code{tbl_continuous} or -\code{tbl_custom_summary}. -} -\examples{ -# Example 1 ---------------------------------- -trial |> - tbl_summary(include = c(age, grade), by = trt) |> - add_overall() - -# Example 2 ---------------------------------- -trial |> - tbl_summary( - include = grade, - by = trt, - percent = "row", - statistic = ~"{p}\%", - digits = ~1 - ) |> - add_overall( - last = TRUE, - statistic = ~"{p}\% (n={n})", - digits = ~ c(1, 0) - ) - -# Example 3 ---------------------------------- -trial |> - tbl_continuous( - variable = age, - by = trt, - include = grade - ) |> - add_overall(last = TRUE) -} -\author{ -Daniel D. Sjoberg -} diff --git a/man/custom_tidiers.Rd b/man/custom_tidiers.Rd index 75e8efa02e..cc6a269350 100644 --- a/man/custom_tidiers.Rd +++ b/man/custom_tidiers.Rd @@ -108,8 +108,7 @@ package includes a wonderful function to calculate robust standard errors, confi The tidier uses the output from \code{parameters::model_parameters()}, and merely takes the result and puts it in \code{broom::tidy()} format. To use this function with \code{tbl_regression()}, pass a function with the arguments for -\code{tidy_robust()} populated. This is easily done using \code{purrr::partial()} e.g. -\code{tbl_regression(tidy_fun = partial(tidy_robust, vcov = "CL"))} +\code{tidy_robust()} populated. \item \code{pool_and_tidy_mice()} tidier to report models resulting from multiply imputed data using the mice package. Pass the mice model object \emph{before} the model results have been pooled. See example. diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 6ccee053cd..1a95e4523a 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -31,22 +31,22 @@ navbar: href: reference/index.html - text: Articles menu: - # - text: "tbl_summary() tutorial" - # href: articles/tbl_summary.html - # - text: "tbl_regression() tutorial" - # href: articles/tbl_regression.html - # - text: "inline_text() tutorial" - # href: articles/inline_text.html - # - text: "gtsummary + Quarto/Rmarkdown" - # href: articles/rmarkdown.html - # - text: "gtsummary + Shiny" - # href: articles/shiny.html + - text: "tbl_summary() tutorial" + href: articles/tbl_summary.html + - text: "tbl_regression() tutorial" + href: articles/tbl_regression.html + - text: "inline_text() tutorial" + href: articles/inline_text.html + - text: "gtsummary + Quarto/Rmarkdown" + href: articles/rmarkdown.html + - text: "gtsummary + Shiny" + href: articles/shiny.html - text: "gtsummary + themes" href: articles/themes.html - text: "cheat sheet" href: https://raw.githubusercontent.com/rstudio/cheatsheets/main/gtsummary.pdf - # - text: "FAQ + gallery" - # href: articles/gallery.html + - text: "FAQ + gallery" + href: articles/gallery.html - text: News href: news/index.html diff --git a/vignettes/articles/gallery.Rmd b/vignettes/articles/gallery.Rmd new file mode 100644 index 0000000000..e1939f244d --- /dev/null +++ b/vignettes/articles/gallery.Rmd @@ -0,0 +1,428 @@ +--- +title: "FAQ + Gallery" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + eval = TRUE, + warning = FALSE, + comment = "#>" +) +``` + +FAQ and Gallery showing various tables possible with the {gtsummary} package. + +```{r setup, message = FALSE, warning=FALSE} +library(gtsummary) +``` + +
+ +## Frequently Asked Questions + +### Data Summary Tables + +Headers, Labels and Formatting + +1. [How do I **modify column headers** in a table?](#table-header) + +1. [How do I **change the value** of the by levels?](#modify-pvalue) + +1. [How do I added a **spanning header** row to a table?](#table-header) + +1. [How do I change **variable labels**?](#modify-pvalue) + +1. [How do I **italicize or bold labels** in a table?](#table-header) + +1. [How do I **italicize or bold levels** in a table?](#continuous2) + +1. [How do I reduce **font size and cell padding** in a table?](#side-by-side) + +Adding and Modifying Statistics + +8. [How do I add the **number of observations** to a summary table?](#table-header) + +1. [How do I show **additional summary statistics** as a new row?](#continuous2) + +1. [How do I include a column for **missing values of a grouping variable**?](#include-missing) + +1. [How do I add a column with the **confidence around the mean**?](#mean-ci) + +1. [How do I add a column for the **difference between groups**?](#add_difference) + +1. [How do I summarize a continuous variable by one, two (or more) categorical variables?](#cross_tab_continuous) + +1. [How do I **stratify a summary table** by more than one variable?](#tbl_strata) + +1. [How do I change the **p-values format**?](#modify-pvalue) + +1. [How do I add a p-value for each group compared to a **single reference group**?](#compare-to-ref) + +1. [How do I add a correction for **multiple testing**?](#modify-pvalue) + +1. [How do I **combine** a summary table with a regression table?](#side-by-side) + +Statistical Tests + +19. [How do I do a **paired** t-test or McNemar's test?](#paired-test) + +
+ +### Regression Tables + +Headers, Labels and Formatting + +1. [How do I change the text in a **footnote**?](#tbl_uvregression-x) + +1. [How do I add **significance stars** for low p-values?](#add_significance_stars) + +1. [How do I **reduce font size** and cell padding in a table?](#side-by-side) + +Creating and Combining Tables + +4. [How do I **combine** the results of two related regression models into one table?](#side-by-side) + +1. [How do I **combine** a regression table with a summary table?](#side-by-side) + +1. [How do I create a regression table for **multiple models** with the same covariate(s) and different outcomes?](#tbl_uvregression-x) + +Adding and Modifying Statistics + +7. [How do I add total **event numbers** to a regression table?](#add_nevent) + +1. [How do I add event numbers for **each level** of a categorical covariate?](#add_nevent-levels) + +1. [How do I format a **Wald confidence interval**?](#wald-ci) + +
+ +## Summary Tables + + +Add a spanning header over the group columns for increased clarity, and modify column headers. Using `bold_labels()` formats the labels as bold, but labels can also be italicized using `italicize_labels()`, or combined to format with both bold and italics. + +```{r} +trial |> + tbl_summary( + by = trt, + include = c(age, grade), + missing = "no", + statistic = all_continuous() ~ "{median} ({p25}, {p75})" + ) |> + modify_header(all_stat_cols() ~ "**{level}** \nN = {n} ({style_percent(p)}%)") |> + add_n() |> + bold_labels() |> + modify_spanning_header(all_stat_cols() ~ "**Chemotherapy Treatment**") +``` + +
+ + +Show continuous summary statistics on multiple lines. Levels are italicized here using the `italicize_levels()` function, but the `bold_levels()` function can be used instead to create bold text, or both functions can be used together to get text that is both bold and in italics. + +```{r} +trial |> + tbl_summary( + by = trt, + include = c(age, marker), + type = all_continuous() ~ "continuous2", + statistic = + all_continuous() ~ c("{N_nonmiss}", + "{mean} ({sd})", + "{median} ({p25}, {p75})", + "{min}, {max}"), + missing = "no" + ) |> + italicize_levels() +``` + +
+ + +Modify the function that formats the p-values, change variable labels, updating tumor response header, and add a correction for multiple testing. + +```{r, message = FALSE} +trial |> + mutate(response = factor(response, labels = c("No Tumor Response", "Tumor Responded"))) |> + tbl_summary( + by = response, + include = c(age, grade), + missing = "no", + label = list(age ~ "Patient Age", grade ~ "Tumor Grade") + ) |> + add_p(pvalue_fun = label_style_pvalue(digits = 2)) |> + add_q() +``` + +
+ + +Include missing tumor response as column using `forcats::fct_na_value_to_level()`. + +```{r} +trial |> + mutate( + response = + factor(response, labels = c("No Tumor Response", "Tumor Responded")) |> + forcats::fct_na_value_to_level(level = "Missing Response Status") + ) |> + tbl_summary( + by = response, + include = c(age, grade), + label = list(age ~ "Patient Age", grade ~ "Tumor Grade") + ) +``` + +
+ + +Report treatment differences between two groups. +This is often needed in randomized trials. +In this example, we report the difference in tumor response and marker level between two chemotherapy treatments. + +```{r} +trial |> + tbl_summary( + by = trt, + include = c(response, marker), + statistic = list( + all_continuous() ~ "{mean} ({sd})", + all_categorical() ~ "{p}%" + ), + missing = "no" + ) |> + add_difference() |> + add_n() |> + modify_header(all_stat_cols() ~ "**{level}**") +``` + +
+ + +Paired t-test and McNemar's test. The data is expected in a long format with 2 rows per participant. + +```{r} +# imagine that each patient received Drug A and Drug B (adding ID showing their paired measurements) +trial_paired <- + trial |> + select(trt, marker, response) |> + mutate(.by = trt, id = dplyr::row_number()) + +# you must first delete incomplete pairs from the data, then you can build the table +trial_paired |> + # delete missing values + tidyr::drop_na() |> + # keep IDs with both measurements + dplyr::filter(.by = id, dplyr::n() == 2) |> + # summarize data + tbl_summary(by = trt, include = -id) |> + add_p( + test = list(marker ~ "paired.t.test", + response ~ "mcnemar.test"), + group = id + ) +``` + +
+ + +Include p-values comparing all groups to a single reference group. + +```{r} +# table summarizing data with no p-values +small_trial <- trial |> select(grade, age, response) +t0 <- small_trial |> + tbl_summary(by = grade, missing = "no") |> + modify_header(all_stat_cols() ~ "**{level}**") + +# table comparing grade I and II +t1 <- small_trial |> + dplyr::filter(grade %in% c("I", "II")) |> + tbl_summary(by = grade, missing = "no") |> + add_p() |> + modify_header(p.value ~ "**I vs. II**") |> + # hide summary stat columns + modify_column_hide(all_stat_cols()) + +# table comparing grade I and II +t2 <- small_trial |> + dplyr::filter(grade %in% c("I", "III")) |> + tbl_summary(by = grade, missing = "no") |> + add_p() |> + modify_header(p.value = "**I vs. III**") |> + # hide summary stat columns + modify_column_hide(all_stat_cols()) + +# merging the 3 tables together, and adding additional gt formatting +tbl_merge(list(t0, t1, t2)) |> + modify_spanning_header( + all_stat_cols() ~ "**Tumor Grade**", + starts_with("p.value") ~ "**p-values**" + ) +``` + +
+ + +Add 95% confidence interval around the mean as an additional column + +```{r} +trial |> + tbl_summary( + include = c(age, marker), + statistic = all_continuous() ~ "{mean} ({sd})", + missing = "no" + ) |> + modify_header(stat_0 = "**Mean (SD)**") |> + modify_footnote(stat_0 = NA) |> + add_ci() +``` + +
+ + +It's often needed to summarize a continuous variable by one, two, or more categorical variables. The example below shows a table summarizing a continuous variable by two categorical variables. To summarize by more than two categorical variables, use `tbl_continuous` in conjunction with `tbl_strata` (see an example of `tbl_strata` [here](#tbl_strata)). + +```{r} +trial |> + tbl_continuous(variable = marker, by = trt, include = grade) |> + modify_spanning_header(all_stat_cols() ~ "**Treatment Assignment**") +``` + +
+ + +Build a summary table stratified by more than one variable. + +```{r} +trial |> + select(trt, grade, age, stage) |> + mutate(grade = paste("Grade", grade)) |> + tbl_strata( + strata = grade, + ~ .x |> + tbl_summary(by = trt, missing = "no") |> + modify_header(all_stat_cols() ~ "**{level}**") + ) +``` + +
+ +## Regression Tables + + +Include number of observations and the number of events in a univariate regression table. + +```{r} +trial |> + tbl_uvregression( + method = glm, + y = response, + include = c(age, grade), + method.args = list(family = binomial), + exponentiate = TRUE + ) |> + add_nevent() +``` + +
+ + +Include two related models side-by-side with descriptive statistics. +We also use the compact table theme that reduces cell padding and font size. + +```{r} +gt_r1 <- glm(response ~ trt + grade, trial, family = binomial) |> + tbl_regression(exponentiate = TRUE) +gt_r2 <- survival::coxph(survival::Surv(ttdeath, death) ~ trt + grade, trial) |> + tbl_regression(exponentiate = TRUE) +gt_t1 <- trial |> + tbl_summary(include = c(trt, grade), missing = "no") |> + add_n() |> + modify_header(stat_0 = "**n (%)**") |> + modify_footnote(stat_0 = NA_character_) + +theme_gtsummary_compact() +tbl_merge( + list(gt_t1, gt_r1, gt_r2), + tab_spanner = c(NA_character_, "**Tumor Response**", "**Time to Death**") +) +``` + +```{r, echo=FALSE} +reset_gtsummary_theme() +``` + +
+ + +Include the number of events at each level of a categorical predictor. + +```{r} +trial |> + tbl_uvregression( + method = survival::coxph, + y = survival::Surv(ttdeath, death), + include = c(stage, grade), + exponentiate = TRUE, + hide_n = TRUE + ) |> + add_nevent(location = "level") +``` + +
+ + +Regression model where the covariate remains the same, and the outcome changes. + +```{r} +trial |> + tbl_uvregression( + method = lm, + x = trt, + show_single_row = "trt", + hide_n = TRUE, + include = c(age, marker) + ) |> + modify_header(label = "**Model Outcome**", + estimate = "**Treatment Coef.**") |> + modify_footnote(estimate = "Values larger than 0 indicate larger values in the Drug B group.") +``` + +
+ + +Implement a custom tidier to report Wald confidence intervals. +The Wald confidence intervals are calculated using `confint.default()`. + +```{r} +my_tidy <- function(x, exponentiate = FALSE, conf.level = 0.95, ...) { + dplyr::bind_cols( + broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE), + # calculate the confidence intervals, and save them in a tibble + confint.default(x) |> + dplyr::as_tibble() |> + rlang::set_names(c("conf.low", "conf.high")) + ) +} + +lm(age ~ grade + marker, trial) |> + tbl_regression(tidy_fun = my_tidy) +``` + +
+ + +Use significance stars on estimates with low p-values. + +```{r} +trial |> + tbl_uvregression( + method = survival::coxph, + y = survival::Surv(ttdeath, death), + include = c(stage, grade), + exponentiate = TRUE, + ) |> + add_significance_stars() +``` diff --git a/vignettes/articles/img/icons8-confused-100.png b/vignettes/articles/img/icons8-confused-100.png new file mode 100644 index 0000000000..424cec89e8 Binary files /dev/null and b/vignettes/articles/img/icons8-confused-100.png differ diff --git a/vignettes/articles/img/icons8-disappointed-100.png b/vignettes/articles/img/icons8-disappointed-100.png new file mode 100644 index 0000000000..bfb3698e1d Binary files /dev/null and b/vignettes/articles/img/icons8-disappointed-100.png differ diff --git a/vignettes/articles/img/icons8-neutral-100.png b/vignettes/articles/img/icons8-neutral-100.png new file mode 100644 index 0000000000..68ad161b4c Binary files /dev/null and b/vignettes/articles/img/icons8-neutral-100.png differ diff --git a/vignettes/articles/img/icons8-no-entry-100.png b/vignettes/articles/img/icons8-no-entry-100.png new file mode 100644 index 0000000000..0f0e8e8249 Binary files /dev/null and b/vignettes/articles/img/icons8-no-entry-100.png differ diff --git a/vignettes/articles/img/icons8-smiling-100.png b/vignettes/articles/img/icons8-smiling-100.png new file mode 100644 index 0000000000..c78d420b52 Binary files /dev/null and b/vignettes/articles/img/icons8-smiling-100.png differ diff --git a/vignettes/articles/img/icons8-under-construction-100.png b/vignettes/articles/img/icons8-under-construction-100.png new file mode 100644 index 0000000000..2d5c8a679c Binary files /dev/null and b/vignettes/articles/img/icons8-under-construction-100.png differ diff --git a/vignettes/articles/rmarkdown.Rmd b/vignettes/articles/rmarkdown.Rmd new file mode 100644 index 0000000000..3f87e52e1a --- /dev/null +++ b/vignettes/articles/rmarkdown.Rmd @@ -0,0 +1,233 @@ +--- +title: "rmarkdown" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + warning = FALSE, + comment = "#>" +) +library(gt) +``` + +The **{gtsummary}** package was written to be a companion to the **{gt}** package from RStudio. +But not all output types are supported by the **{gt}** package. +Therefore, we have made it possible to print **{gtsummary}** tables with various engines. + +## Output Types + +Here's a summary of the various Quarto and R Markdown output types and the print engines that support them. + + +```{r, echo=FALSE, warning=FALSE} + +# list of all the icons used in table +path_figure <- list( + "img/icons8-smiling-100.png", + "img/icons8-neutral-100.png", + "img/icons8-disappointed-100.png", + "img/icons8-no-entry-100.png", + "img/icons8-under-construction-100.png" +) + +# first make the key to go in the footnote +gt_markdown_key <- + dplyr::tibble( + figure = 1:5, + desc = c( + "Output fully supported", + "Missing indentation, footnotes, spanning headers", + "No formatted output", + "Output not supported", + "Under development, missing indentation" + ) + ) %>% + gt() %>% + cols_label(figure = md("**Key**"), desc = "") %>% + data_color( + columns = c(figure), + colors = scales::col_factor( + palette = c("#bae1ff", "#ffb3ba", "#ffdfba", "#ffffba", "#baffc9"), + domain = NULL, + reverse = TRUE + ), + alpha = 0.8 + ) %>% + text_transform( + locations = cells_body(columns = c(figure)), + fn = function(x) { + dplyr::case_when( + x == 1 ~ local_image(filename = path_figure[[1]], height = 20), + x == 2 ~ local_image(filename = path_figure[[2]], height = 20), + x == 3 ~ local_image(filename = path_figure[[3]], height = 20), + x == 4 ~ local_image(filename = path_figure[[4]], height = 20), + x == 5 ~ local_image(filename = path_figure[[5]], height = 20) + ) + } + ) %>% + tab_options( + table.font.size = "x-small", + data_row.padding = px(3), + column_labels.hidden = TRUE + ) %>% + opt_table_lines(extent = "none") %>% + tab_options(table.align = "left") + +# making table with gt +tidyr::expand_grid( + printer = c("gt", "flextable", "huxtable", "kableExtra", "kable", "tibble"), + output = c("HTML", "PDF", "RTF", "Word") +) |> + dplyr::mutate( + rating = dplyr::case_when( + printer == "gt" & output %in% c("HTML", "Word", "PDF") ~ 1, # good output + printer == "gt" & output %in% c("RTF") ~ 5, # under construction + printer == "kable" ~ 2, # ok output + printer == "flextable" & output != "RTF" ~ 1, # good output + printer == "flextable" & output == "RTF" ~ 4, # not supported + printer == "kableExtra" & output %in% c("PDF", "HTML") ~ 1, # good output + printer == "kableExtra" & output %in% c("RTF", "Word") ~ 4, # not supported + printer == "huxtable" ~ 1, # good output + printer == "tibble" ~ 3 # not great + ) %>% + factor() + ) %>% + tidyr::pivot_wider(id_cols = printer, names_from = output, values_from = rating) %>% + dplyr::mutate( + link = dplyr::case_when( + printer == "gt" ~ + "[gt](https://gt.rstudio.com/index.html)", + printer == "kable" ~ + "[kable](https://bookdown.org/yihui/rmarkdown-cookbook/kable.html)", + printer == "flextable" ~ + "[flextable](https://davidgohel.github.io/flextable/articles/overview.html)", + printer == "kableExtra" ~ + "[kableExtra](http://haozhu233.github.io/kableExtra/)", + printer == "huxtable" ~ + "[huxtable](https://hughjonesd.github.io/huxtable/)", + printer == "tibble" ~ + "[tibble](https://tibble.tidyverse.org/)" + ), + fns = dplyr::case_when( + printer == "gt" ~ "`as_gt()`", + printer == "kable" ~ "`as_kable()`", + printer == "flextable" ~ "`as_flex_table()`", + printer == "kableExtra" ~ "`as_kable_extra()`", + printer == "huxtable" ~ "`as_hux_table()`", + printer == "tibble" ~ "`as_tibble()`" + ) + ) %>% + gt() %>% + cols_move_to_start(columns = c(link, fns, HTML, Word, PDF, RTF)) %>% + cols_hide(columns = c(printer)) %>% + cols_label( + link = md("**Print Engine**"), + fns = md("**Function**"), + HTML = md("**HTML**"), PDF = md("**PDF**"), + RTF = md("**RTF**"), Word = md("**Word**") + ) %>% + fmt_markdown(columns = c(fns, link)) %>% + data_color( + columns = c(HTML, PDF, RTF, Word), + colors = scales::col_factor( + palette = c("#bae1ff", "#ffb3ba", "#ffdfba", "#ffffba", "#baffc9"), + domain = NULL, + reverse = TRUE + ), + alpha = 0.8 + ) %>% + text_transform( + locations = cells_body(columns = c(HTML, PDF, RTF, Word)), + fn = function(x) { + dplyr::case_when( + x == 1 ~ local_image(filename = path_figure[[1]]), + x == 2 ~ local_image(filename = path_figure[[2]]), + x == 3 ~ local_image(filename = path_figure[[3]]), + x == 4 ~ local_image(filename = path_figure[[4]]), + x == 5 ~ local_image(filename = path_figure[[5]]) + ) + } + ) %>% + cols_width( + c(HTML, PDF, RTF, Word) ~ px(60), + c(link) ~ px(110), + c(link, fns) ~ px(140) + ) %>% + gt::tab_source_note( + source_note = gt_markdown_key %>% as_raw_html() %>% html() + ) %>% + tab_options( + table.border.bottom.width = 0 + ) +``` + +Any **{gtsummary}** table can be converted to one of the types in the table above. +For example, the code below prints a **{gtsummary}** table as a **{flextable}** table, instead of the default **{gt}** table. + +```{r, eval=FALSE} +tbl_summary(trial) %>% + as_flex_table() +``` + +## Example R Markdown Report + +An example R markdown report using **{gtsummary}** has been included with the package. +To open the example file, run the following command in the R console. + +```{r, eval = FALSE} +library(gtsummary) +system.file(package = "gtsummary") %>% + file.path("rmarkdown_example/gtsummary_rmarkdown_html.Rmd") %>% + file.edit() +``` + +## LaTeX + +To print {gtsummary} tables using LaTeX, utilize one of the supporting print engines. + +```{r, eval = FALSE} +# build gtsummary table +tbl <- tbl_summary(trial) + +# using the {gt} package +as_gt(tbl) %>% gt::as_latex() + +# using the {huxtable} package +as_hux_table(tbl) %>% huxtable::to_latex() + +# using the {kableExtra} package +as_kable_extra(tbl, format = "latex") + +# using the knitr::kable function +as_kable(tbl, format = "latex") +``` + +## Images + +Use the {gt} package's `gt::gtsave()` function to save images of {gtsummary} tables. + +```{r, eval = FALSE} +tbl_summary(trial) |> # build gtsummary table + as_gt() |> # convert to gt table + gt::gtsave( # save table as image + filename = "my_table_image.png" + ) +``` + +## Tips + +When printing {gt} or {gtsummary} tables in a loop, use `print()` and `results = 'asis'` in the R markdown chunk. + +````r +`r ''````{r loop_print, results = 'asis'} +for (i in 1) { + tbl <- tbl_summary(trial) # build gtsummary table + print(tbl) # print table +} +``` +```` + +If `print(tbl)` does not work for you, try either `knitr::knit_print(tbl)` or `cat(knitr::knit_print(tbl))`. + + Icons from [icons8](https://icons8.com/) diff --git a/vignettes/articles/shiny.Rmd b/vignettes/articles/shiny.Rmd new file mode 100644 index 0000000000..5de69cddec --- /dev/null +++ b/vignettes/articles/shiny.Rmd @@ -0,0 +1,33 @@ +--- +title: "gtsummary + Shiny" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +Any gtsummary table can be placed into a [Shiny application](https://shiny.rstudio.com/). +The trick is that the gtsummary must first be converted into a gt table using the `gtsummary::as_gt()` function. +Once the table is a proper gt table, we can use the methods from the gt package to place the table in the Shiny app. +Read more about creating Shiny applications with gt tables here: [https://gt.rstudio.com/reference/gt_output.html](https://gt.rstudio.com/reference/gt_output.html). + +# Shiny Example + +Below is an example of a Shiny app using the gtsummary package to provide dynamic patient characteristic summaries. + + +```{=html} + +``` + +If the Shiny app has run out of its free server usage, see below for instructions on running the app locally on your machine. + +# Shiny Code + +```{r, eval=FALSE, file='https://raw.githubusercontent.com/ddsjoberg/gtsummary-shiny-example/main/app.R'} +``` + +To run this Shiny app locally on your machine, save the script above as `app.R` in a R Project, open in RStudio, and run the application. diff --git a/vignettes/articles/tbl_regression.Rmd b/vignettes/articles/tbl_regression.Rmd new file mode 100644 index 0000000000..5f7620bea3 --- /dev/null +++ b/vignettes/articles/tbl_regression.Rmd @@ -0,0 +1,270 @@ +--- +title: "tbl_regression" +--- + +```{r setup, include = FALSE} +# sourcing the purrr shims from rlang +library(rlang) +source("https://raw.githubusercontent.com/r-lib/rlang/main/R/standalone-purrr.R") + +knitr::opts_chunk$set( + collapse = TRUE, + warning = FALSE, + comment = "#>" +) + +gt_compact_fun <- function(x) { + gt::tab_options(x, + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +} + +# exit if car package not installed (added to pass Cmd Checks on old R versions) +if (!requireNamespace("car")) knitr::knit_exit() +``` + +## Introduction + +The [`tbl_regression()`](https://www.danieldsjoberg.com/gtsummary/reference/tbl_regression.html) function takes a **regression model object** in **R** and returns a **formatted table of regression model results** that is publication-ready. +It is a simple way to summarize and present your analysis results using **R**! +Like [`tbl_summary()`](https://www.danieldsjoberg.com/gtsummary/reference/tbl_summary.html), `tbl_regression()` creates highly customizable analytic tables with sensible defaults. + +This vignette will walk a reader through the `tbl_regression()` function, and the various functions available to modify and make additions to an existing formatted regression table. + +animated + +*Behind the scenes:* `tbl_regression()` uses `broom::tidy()` to perform the initial model formatting, and can accommodate many different model types (e.g. `lm()`, `glm()`, `survival::coxph()`, `survival::survreg()` and others are [supported models](#supported-models) known to work with {gtsummary}). +It is also possible to specify your own function to tidy the model results if needed. + +## Setup + +Before going through the tutorial, install and load {gtsummary}. + +```{r message = FALSE, warning=FALSE} +# install.packages("gtsummary") +library(gtsummary) +``` + +## Example data set + +In this vignette we'll be using the [**`trial`**](https://www.danieldsjoberg.com/gtsummary/reference/trial.html) data set which is included in the {gtsummary} package. + +* This data set contains information from `r nrow(trial)` patients who received one of two types of chemotherapy (Drug A or Drug B). + +* The outcomes are tumor response and death. + +* Each variable in the data frame has been assigned an **attribute label** (i.e. `attr(trial$trt, "label") == "Chemotherapy Treatment")` with the **[labelled](http://larmarange.github.io/labelled/)** package, which we highly recommend using. These labels are displayed in the {gtsummary} output table by default. Using {gtsummary} on a data frame without labels will simply print variable names, or there is an option to add labels later. + +```{r, echo = FALSE} +trial |> + imap( + ~ dplyr::tibble( + Variable = glue::glue("`{.y}`"), + Class = class(.x), + Label = attr(.x, "label") + ) + ) |> + dplyr::bind_rows() |> + gt::gt() |> + gt::tab_source_note("Includes mix of continuous, dichotomous, and categorical variables") |> + gt::fmt_markdown(columns = c(Variable)) |> + gt::cols_align("left") |> + gt_compact_fun() +``` + +## Basic Usage + +The default output from `tbl_regression()` is meant to be publication ready. + +* Let's start by creating a **logistic regression model** to **predict tumor response** using the variables age and grade from the [`trial`](https://www.danieldsjoberg.com/gtsummary/reference/trial.html) data set. +```{r, message=FALSE} +# build logistic regression model +m1 <- glm(response ~ age + stage, trial, family = binomial) + +# view raw model results +summary(m1)$coefficients +``` + +* We will then a **regression model table** to summarize and present these results in just one line of code from {gtsummary}. + +```{r, message=FALSE} +tbl_regression(m1, exponentiate = TRUE) +``` + +Note the sensible defaults with this basic usage (that can be customized later): + +* The model was recognized as logistic regression with coefficients exponentiated, so the **header displayed "OR"** for odds ratio. + +* **Variable types** are automatically detected and **reference rows** are added for categorical variables. + +* **Model estimates** and **confidence intervals** are **rounded** and formatted. + +* Because the variables in the data set were [labelled](http://larmarange.github.io/labelled/), the **labels were carried through** into the {gtsummary} output table. Had the data not been labelled, the default is to display the variable name. + +* Variable levels are **indented** and **footnotes** added. + +## Customize Output + +There are four primary ways to customize the output of the regression model table. + +1. Modify `tbl_regression()` function input arguments +1. Add additional data/information to a summary table with `add_*()` functions +1. Modify summary table appearance with the {gtsummary} functions +1. Modify table appearance with {gt} package functions + +### Modifying function arguments + +The `tbl_regression()` function includes many arguments for modifying the appearance. + +```{r, echo = FALSE} +dplyr::tribble( + ~Argument, ~Description, + "`label=`", "modify variable labels in table", + "`exponentiate=`", "exponentiate model coefficients", + "`include=`", "names of variables to include in output. Default is all variables", + "`show_single_row=`", "By default, categorical variables are printed on multiple rows. If a variable is dichotomous and you wish to print the regression coefficient on a single row, include the variable name(s) here.", + "`conf.level=`", "confidence level of confidence interval", + "`intercept=`", "indicates whether to include the intercept", + "`estimate_fun=`", "function to round and format coefficient estimates", + "`pvalue_fun=`", "function to round and format p-values", + "`tidy_fun=`", "function to specify/customize tidier function" +) |> + gt::gt() |> + gt::fmt_markdown(columns = c(Argument)) |> + gt_compact_fun() +``` + +### {gtsummary} functions to add information + +The {gtsummary} package has built-in functions for adding to results from `tbl_regression()`. +The following functions add columns and/or information to the regression table. + +```{r echo = FALSE} +dplyr::tribble( + ~Function, ~Description, + "`add_global_p()`", "adds the global p-value for a categorical variables", + "`add_glance_source_note()`", "adds statistics from `broom::glance()` as source note", + "`add_vif()`", "adds column of the variance inflation factors (VIF)", + "`add_q()`", "add a column of q values to control for multiple comparisons" +) |> + gt::gt() |> + gt::fmt_markdown(columns = c(Function)) |> + gt_compact_fun() +``` + +### {gtsummary} functions to format table + +The {gtsummary} package comes with functions specifically made to modify and format summary tables. +```{r echo = FALSE} +dplyr::tribble( + ~Function, ~Description, + "`modify_header()`", "update column headers", + "`modify_footnote()`", "update column footnote", + "`modify_spanning_header()`", "update spanning headers", + "`modify_caption()`", "update table caption/title", + "`bold_labels()`", "bold variable labels", + "`bold_levels()`", "bold variable levels", + "`italicize_labels()`", "italicize variable labels", + "`italicize_levels()`", "italicize variable levels", + "`bold_p()`", "bold significant p-values" +) |> + gt::gt() |> + gt::fmt_markdown(columns = c(Function)) |> + gt_compact_fun() +``` + +### {gt} functions to format table + +The [{gt} package](https://gt.rstudio.com/index.html) is packed with many great functions for modifying table output---too many to list here. +Review the package's website for a full listing. + +To use the {gt} package functions with {gtsummary} tables, the regression table must first be converted into a {gt} object. +To this end, use the `as_gt()` function after modifications have been completed with {gtsummary} functions. + +```{r} +m1 |> + tbl_regression(exponentiate = TRUE) |> + as_gt() |> + gt::tab_source_note(gt::md("*This data is simulated*")) +``` + +### Example + +There are formatting options available, such as adding bold and italics to text. +In the example below, + - Coefficients are **exponentiated** to give odds ratios + - **Global p-values** for Stage are reported + - Large p-values are rounded to **two decimal places** + - P-values less than 0.10 are **bold** + - Variable labels are **bold** + - Variable levels are **italicized** + +```{r, eval=TRUE} +# format results into data frame with global p-values +m1 |> + tbl_regression( + exponentiate = TRUE, + pvalue_fun = label_style_pvalue(digits = 2), + ) |> + add_global_p() |> + bold_p(t = 0.10) |> + bold_labels() |> + italicize_levels() +``` + +## Univariate Regression {#tbl_uvregression} + +The `tbl_uvregression()` function produces a table of univariate regression models. +The function is a wrapper for `tbl_regression()`, and as a result, accepts nearly identical function arguments. +The function's results can be modified in similar ways to `tbl_regression()`. + +```{r tbl_uvregression} +trial |> + tbl_uvregression( + method = glm, + y = response, + include = c(age, grade), + method.args = list(family = binomial), + exponentiate = TRUE, + pvalue_fun = label_style_pvalue(digits = 2) + ) |> + add_global_p() |> # add global p-value + add_nevent() |> # add number of events of the outcome + add_q() |> # adjusts global p-values for multiple testing + bold_p() |> # bold p-values under a given threshold (default 0.05) + bold_p(t = 0.10, q = TRUE) |> # now bold q-values under the threshold of 0.10 + bold_labels() +``` + +## Setting Default Options {#options} + +The {gtsummary} regression functions and their related functions have sensible defaults for rounding and formatting results. +If you, however, would like to change the defaults there are a few options. +The default options can be changed using the {gtsummary} themes function `set_gtsummary_theme()`. +The package includes pre-specified themes, and you can also create your own. +Themes can control baseline behavior, for example, how p-values are rounded, coefficients are rounded, default headers, confidence levels, etc. +For details on creating a theme and setting personal defaults, visit the [themes vignette](https://www.danieldsjoberg.com/gtsummary/articles/themes.html). + +## Supported Models {#supported-models} + +Below is a listing of known and tested models supported by `tbl_regression()`. +If a model follows a standard format and has a tidier, it's likely to be supported as well, even if not listed below. + +```{r supported_models, echo = FALSE} +broom.helpers::supported_models |> + gt::gt() |> + gt::cols_label(model = gt::md("Model"), notes = gt::md("Details")) |> + gt::fmt_markdown(columns = everything()) |> + gt::tab_options( + table.font.size = 11, data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +``` diff --git a/vignettes/articles/tbl_summary.Rmd b/vignettes/articles/tbl_summary.Rmd new file mode 100644 index 0000000000..d9f2523717 --- /dev/null +++ b/vignettes/articles/tbl_summary.Rmd @@ -0,0 +1,436 @@ +--- +title: "Tutorial: tbl_summary" +--- + +```{r setup, include = FALSE} +# sourcing the purrr shims from rlang +library(rlang) +source("https://raw.githubusercontent.com/r-lib/rlang/main/R/standalone-purrr.R") + +knitr::opts_chunk$set( + collapse = TRUE, + warning = FALSE, + comment = "#>" +) + +gt_compact_fun <- function(x) { + gt::tab_options(x, + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +} +``` + +## Introduction + +The `tbl_summary()` function calculates **descriptive statistics** for continuous, categorical, and dichotomous variables in **R**, and presents the results in a **beautiful, customizable summary table** ready for publication (for example, Table 1 or demographic tables). + +This vignette will walk a reader through the `tbl_summary()` function, and the various functions available to modify and make additions to an existing table summary object. + +## Setup + +Before going through the tutorial, install and load {gtsummary}. + +```{r message = FALSE, warning=FALSE} +# install.packages("gtsummary") +library(gtsummary) +``` + +## Example data set + +We'll be using the [`trial`](https://www.danieldsjoberg.com/gtsummary/reference/trial.html) data set throughout this example. + +* This set contains data from `r nrow(trial)` patients who received one of two types of chemotherapy (Drug A or Drug B). +The outcomes are tumor response and death. + +* Each variable in the data frame has been assigned an **attribute label** (i.e. `attr(trial$trt, "label") == "Chemotherapy Treatment")` with the **[labelled](http://larmarange.github.io/labelled/)** package. +These labels are displayed in the {gtsummary} output table by default. +Using {gtsummary} on a data frame without labels will simply print variable names in place of variable labels; there is also an option to add labels later. + +```{r, echo = FALSE} +trial |> + imap( + ~ dplyr::tibble( + Variable = glue::glue("`{.y}`"), + Class = class(.x), + Label = attr(.x, "label") + ) + ) |> + dplyr::bind_rows() |> + gt::gt() |> + gt::tab_source_note("Includes mix of continuous, dichotomous, and categorical variables") |> + gt::fmt_markdown(columns = Variable) |> + gt::cols_align("left") |> + gt_compact_fun() +``` + +```{r, message=FALSE} +head(trial) +``` + +For brevity, in this tutorial we'll use a subset of the variables from the trial data set. + +```{r} +trial2 <- trial |> select(trt, age, grade) +``` + +## Basic Usage + +The default output from `tbl_summary()` is meant to be **publication ready**. + +Let's start by creating a table of summary statistics from the `trial` data set. +The `tbl_summary()` function can take, at minimum, a data frame as the only input, and returns descriptive statistics for each column in the data frame. + + +```{r, message=FALSE} +trial2 |> tbl_summary() +``` + +Note the sensible defaults with this basic usage; each of the defaults may be customized. + +* **Variable types** are automatically detected so that appropriate descriptive statistics are calculated. + +* **Label attributes** from the data set are automatically printed. + +* **Missing values** are listed as "Unknown" in the table. + +* Variable levels are **indented** and **footnotes** are added. + +For this study data the summary statistics should be **split by treatment group**, which can be done by using the **`by=`** argument. +To compare two or more groups, include [`add_p()`](https://www.danieldsjoberg.com/gtsummary/reference/add_p.html) with the function call, which detects variable type and uses an appropriate statistical test. + +```{r} +trial2 |> + tbl_summary(by = trt) |> + add_p() +``` + +## Customize Output + +There are four primary ways to customize the output of the summary table. + +1. Use `tbl_summary()` function arguments +1. Add additional data/information to a summary table with `add_*()` functions +1. Modify summary table appearance with the {gtsummary} functions +1. Modify table appearance with {gt} package functions + +### Modifying `tbl_summary()` function arguments + +The `tbl_summary()` function includes many input options for modifying the appearance. + +```{r echo=FALSE} +dplyr::tribble( + ~Argument, ~Description, + "`label=`", "specify the variable labels printed in table", + "`type=`", "specify the variable type (e.g. continuous, categorical, etc.)", + "`statistic=`", "change the summary statistics presented", + "`digits=`", "number of digits the summary statistics will be rounded to", + "`missing=`", "whether to display a row with the number of missing observations", + "`missing_text=`", "text label for the missing number row", + "`sort=`", "change the sorting of categorical levels by frequency", + "`percent=`", "print column, row, or cell percentages", + "`include=`", "list of variables to include in summary table" +) |> + gt::gt() |> + gt::fmt_markdown(columns = Argument) |> + gt_compact_fun() +``` + +Example modifying `tbl_summary()` arguments. + +```{r, eval = TRUE} +trial2 |> + tbl_summary( + by = trt, + statistic = list( + all_continuous() ~ "{mean} ({sd})", + all_categorical() ~ "{n} / {N} ({p}%)" + ), + digits = all_continuous() ~ 2, + label = grade ~ "Tumor Grade", + missing_text = "(Missing)" + ) +``` + +There are multiple ways to specify the `statistic=` argument using a single formula, a list of formulas, and a named list. +The following table shows equivalent ways to specify the mean statistic for continuous variables `age` and `marker.` +Any {gtsummary} function argument that accepts formulas will accept each of these variations. + +```{r, echo = FALSE} +dplyr::tribble( + ~`select_helper`, ~`varname`, ~`named_list`, + '`all_continuous() ~ "{mean}"`', '`c("age", "marker") ~ "{mean}"`', '`list(age = "{mean}", marker = "{mean}")`', + '`list(all_continuous() ~ "{mean}")`', '`c(age, marker) ~ "{mean}"`', NA_character_, + NA_character_, '`list(c(age, marker) ~ "{mean}")`', NA_character_ +) |> + gt::gt() |> + gt::fmt_markdown(everything()) |> + gt::cols_label( + select_helper = gt::md("**Select with Helpers**"), + varname = gt::md("**Select by Variable Name**"), + named_list = gt::md("**Select with Named List**") + ) |> + gt::sub_missing(columns = everything(), missing_text = "---") |> + gt::tab_options(table.font.size = "85%") |> + gt::cols_width(everything() ~ gt::px(390)) +``` + +```{r out.width = "80%", echo = FALSE, fig.align='center'} +# print picture of slide if in packagedown so not included in CRAN +if (identical(Sys.getenv("IN_PKGDOWN"), "true")) { + knitr::include_graphics("https://github.com/ddsjoberg/gtsummary/raw/main/data-raw/crayon_images/crayon-selectors.png") +} +``` + +### {gtsummary} functions to add information + +The {gtsummary} package has functions to adding information or statistics to `tbl_summary()` tables. + +```{r echo = FALSE} +dplyr::tribble( + ~Function, ~Description, + "`add_p()`", "add p-values to the output comparing values across groups", + "`add_overall()`", "add a column with overall summary statistics", + "`add_n()`", "add a column with N (or N missing) for each variable", + "`add_difference()`", "add column for difference between two group, confidence interval, and p-value", + "`add_stat_label()`", "add label for the summary statistics shown in each row", + "`add_stat()`", "generic function to add a column with user-defined values", + "`add_q()`", "add a column of q values to control for multiple comparisons" +) |> + gt::gt() |> + gt::fmt_markdown(columns = Function) |> + gt_compact_fun() +``` + +### {gtsummary} functions to format table + +The {gtsummary} package comes with functions specifically made to modify and format summary tables. +```{r echo = FALSE} +dplyr::tribble( + ~Function, ~Description, + "`modify_header()`", "update column headers", + "`modify_footnote()`", "update column footnote", + "`modify_spanning_header()`", "update spanning headers", + "`modify_caption()`", "update table caption/title", + "`bold_labels()`", "bold variable labels", + "`bold_levels()`", "bold variable levels", + "`italicize_labels()`", "italicize variable labels", + "`italicize_levels()`", "italicize variable levels", + "`bold_p()`", "bold significant p-values" +) |> + gt::gt() |> + gt::fmt_markdown(columns = Function) |> + gt_compact_fun() +``` + +Example adding `tbl_summary()`-family functions + +```{r, eval = TRUE} +trial2 |> + tbl_summary(by = trt) |> + add_p(pvalue_fun = label_style_pvalue(digits = 2)) |> + add_overall() |> + add_n() |> + modify_header(label ~ "**Variable**") |> + modify_spanning_header(c("stat_1", "stat_2") ~ "**Treatment Received**") |> + modify_footnote( + all_stat_cols() ~ "Median (IQR) or Frequency (%)" + ) |> + modify_caption("**Table 1. Patient Characteristics**") |> + bold_labels() +``` + +### {gt} functions to format table + +The [{gt} package](https://gt.rstudio.com/index.html) is packed with many great functions for modifying table output---too many to list here. +Review the package's website for a full listing. + +To use the {gt} package functions with {gtsummary} tables, the summary table must first be converted into a `gt` object. +To this end, use the `as_gt()` function after modifications have been completed with {gtsummary} functions. + +```{r} +trial2 |> + tbl_summary(by = trt, missing = "no") |> + add_n() |> + as_gt() |> + gt::tab_source_note(gt::md("*This data is simulated*")) +``` + +## Select Helpers {#select_helpers} + +There is flexibility in how you select variables for {gtsummary} arguments, which allows for many customization opportunities! +For example, if you want to show age and the marker levels to one decimal place in `tbl_summary()`, you can pass `digits = c(age, marker) ~ 1`. +The selecting input is flexible, and you may also pass quoted column names. + +Going beyond typing out specific variables in your data set, you can use: + +1. All [**{tidyselect} helpers**](https://tidyselect.r-lib.org/reference/index.html) available throughout the tidyverse, such as `starts_with()`, `contains()`, and `everything()` (i.e. anything you can use with the `dplyr::select()` function), can be used with {gtsummary}. + + +1. Additional [**{gtsummary} selectors**](https://www.danieldsjoberg.com/gtsummary/reference/select_helpers.html) that are included in the package to supplement tidyselect functions. + + + **Summary type** There are two primary ways to select variables by their summary type. This is useful, for example, when you wish to report the mean and standard deviation for all continuous variables: `statistic = all_continuous() ~ "{mean} ({sd})"`. + + ```{r, eval=FALSE} + all_continuous() + all_categorical() + ``` + Dichotomous variables are, by default, included with `all_categorical()`. + + +## Multi-line Continuous Summaries {#continuous2} + +Continuous variables may also be summarized on multiple lines---a common format in some journals. +To update the continuous variables to summarize on multiple lines, update the summary type to `"continuous2"` (for summaries on two or more lines). + +```{r} +trial2 |> + select(age, trt) |> + tbl_summary( + by = trt, + type = all_continuous() ~ "continuous2", + statistic = all_continuous() ~ c( + "{N_nonmiss}", + "{median} ({p25}, {p75})", + "{min}, {max}" + ), + missing = "no" + ) |> + add_p(pvalue_fun = label_style_pvalue(digits = 2)) +``` + +## Advanced Customization {#advanced} + +*The information in this section applies to all {gtsummary} objects.* + +The {gtsummary} table has two important internal objects: + +```{r, echo = FALSE} +dplyr::tribble( + ~`Internal Object`, ~Description, + "`.$table_body`", "data frame that is printed as the gtsummary output table", + "`.$table_styling`", "contains instructions for styling `.$table_body` when printed" +) |> + gt::gt() |> + gt::fmt_markdown(columns = everything()) |> + gt_compact_fun() +``` + +When you print output from the `tbl_summary()` function into the R console or into an R markdown document, the `.$table_body` data frame is formatted using the instructions listed in `.$table_styling`. +The default printer converts the {gtsummary} object to a {gt} object with `as_gt()` via a sequence of {gt} commands executed on `.$table_body`. +Here's an example of the first few calls saved with `tbl_summary()`: + +```{r} +tbl_summary(trial2) |> + as_gt(return_calls = TRUE) |> + head(n = 4) +``` + +The {gt} functions are called in the order they appear, beginning with `gt::gt()`. + +If the user does not want a specific {gt} function to run (i.e. would like to change default printing), any {gt} call can be excluded in the `as_gt()` function. +In the example below, the default alignment is restored. + +After the `as_gt()` function is run, additional formatting may be added to the table using {gt} functions. +In the example below, a source note is added to the table. + +```{r as_gt2} +tbl_summary(trial2, by = trt) |> + as_gt(include = -cols_align) |> + gt::tab_source_note(gt::md("*This data is simulated*")) +``` + +## Set Default Options with Themes {#options} + +The {gtsummary} `tbl_summary()` function and the related functions have sensible defaults for rounding and presenting results. +If you, however, would like to change the defaults there are a few options. +The default options can be changed using the {gtsummary} themes function `set_gtsummary_theme()`. +The package includes prespecified themes, and you can also create your own. +Themes can control baseline behavior, for example, how p-values and percentages are rounded, which statistics are presented in `tbl_summary()`, default statistical tests in `add_p()`, etc. + +For details on creating a theme and setting personal defaults, review the [themes vignette](https://www.danieldsjoberg.com/gtsummary/dev/articles/themes.html). + +## Survey Data {#tbl_svysummary} + +The {gtsummary} package also supports survey data (objects created with the [{survey}](https://CRAN.R-project.org/package=survey) package) via the `tbl_svysummary()` function. +The syntax for `tbl_svysummary()` and `tbl_summary()` are nearly identical, and the examples above apply to survey summaries as well. + +To begin, install the {survey} package and load the `apiclus1` data set. + +```r +install.packages("survey") +``` +```{r} +# loading the api data set +data(api, package = "survey") +``` + +Before we begin, we convert the data frame to a survey object, registering the ID and weighting columns, and setting the finite population correction column. + +```{r} +svy_apiclus1 <- + survey::svydesign( + id = ~dnum, + weights = ~pw, + data = apiclus1, + fpc = ~fpc + ) +``` + +After creating the survey object, we can now summarize it similarly to a standard data frame using `tbl_svysummary()`. +Like `tbl_summary()`, `tbl_svysummary()` accepts the `by=` argument and works with the `add_p()` and `add_overall()` functions. + +It is not possible to pass custom functions to the `statistic=` argument of `tbl_svysummary()`. +You must use one of the [pre-defined summary statistic functions](https://www.danieldsjoberg.com/gtsummary/dev/reference/tbl_svysummary.html#statistic-argument) (e.g. `{mean}`, `{median}`) which leverage functions from the {survey} package to calculate weighted statistics. + +```{r} +svy_apiclus1 |> + tbl_svysummary( + # stratify summary statistics by the "both" column + by = both, + # summarize a subset of the columns + include = c(api00, api99, both), + # adding labels to table + label = list(api00 = "API in 2000", + api99 = "API in 1999") + ) |> + add_p() |> # comparing values by "both" column + add_overall() |> + # adding spanning header + modify_spanning_header(c("stat_1", "stat_2") ~ "**Met Both Targets**") +``` + +`tbl_svysummary()` can also handle weighted survey data where each row represents several individuals: + +```{r} +Titanic |> + as_tibble() |> + survey::svydesign(data = _, ids = ~1, weights = ~n) |> + tbl_svysummary(include = c(Age, Survived)) +``` + +## Cross Tables {#tbl_cross} + +Use `tbl_cross()` to compare two categorical variables in your data. +`tbl_cross()` is a wrapper for `tbl_summary()` that: + + * Automatically adds a spanning header to your table with the name or label of your comparison variable. + * Uses `percent = "cell"` by default. + * Adds row and column margin totals (customizable through the `margin` argument). + * Displays missing data in both row and column variables (customizable through the `missing` argument). + + +```{r} +trial |> + tbl_cross( + row = stage, + col = trt, + percent = "cell" + ) |> + add_p() +``` diff --git a/vignettes/gtsummary_definition.Rmd b/vignettes/gtsummary_definition.Rmd new file mode 100644 index 0000000000..221ef900ff --- /dev/null +++ b/vignettes/gtsummary_definition.Rmd @@ -0,0 +1,338 @@ +--- +title: "Definition of a gtsummary Object" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Definition of a gtsummary Object} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + warning = FALSE, + comment = "#>" +) +``` + +This vignette is meant for those who wish to contribute to {gtsummary}, or users who wish to gain an understanding of the inner-workings of a {gtsummary} object so they may more easily modify them to suit your own needs. +If this does not describe you, please refer to the [{gtsummary} website](https://www.danieldsjoberg.com/gtsummary/) to an introduction on how to use the package's functions and tutorials on advanced use. + +## Introduction + +Every {gtsummary} table has a few characteristics common among all tables created with the package. +Here, we review those characteristics, and provide instructions on how to construct a {gtsummary} object. + +```{r setup, message=FALSE} +library(gtsummary) + +tbl_regression_ex <- + lm(age ~ grade + marker, trial) %>% + tbl_regression() %>% + bold_p(t = 0.5) + +tbl_summary_ex <- + trial %>% + select(trt, age, grade, response) %>% + tbl_summary(by = trt) +``` + +## Structure of a {gtsummary} object + +Every {gtsummary} object is a list comprising of, at minimum, these elements: + +```r +.$table_body .$table_styling +``` + +#### table_body + +The `.$table_body` object is the data frame that will ultimately be printed as the output. +The table must include columns `"label"`, `"row_type"`, and `"variable"`. +The `"label"` column is printed, and the other two are hidden from the final output. + +```{r} +tbl_summary_ex$table_body +``` + +#### table_styling + +The `.$table_styling` object is a list of data frames containing information about how `.$table_body` is printed, formatted, and styled. +The list contains the following data frames `header`, `footnote`, `footnote_abbrev`, `fmt_fun`, `text_format`, `fmt_missing`, `cols_merge` and the following objects `source_note`, `caption`, `horizontal_line_above`. + +**`header`** + +The `header` table has the following columns and is one row per column found in `.$table_body`. +The table contains styling information that applies to entire column or the columns headers. + +```{r, echo=FALSE} +tibble::tribble( + ~Column, ~Description, + "column", "Column name from `.$table_body`", + "hide", "Logical indicating whether the column is hidden in the output. This column is also scoped in `modify_header()` (and friends) to be used in a selecting environment", + "align", "Specifies the alignment/justification of the column, e.g. 'center' or 'left'", + "label", "Label that will be displayed (if column is displayed in output)", + "interpret_label", "the {gt} function that is used to interpret the column label, `gt::md()` or `gt::html()`", + "spanning_header", "Includes text printed above columns as spanning headers.", + "interpret_spanning_header", "the {gt} function that is used to interpret the column spanning headers, `gt::md()` or `gt::html()`", + "modify_stat_{*}", "any column beginning with `modify_stat_` is a statistic available to report in `modify_header()` (and others)", + "modify_selector_{*}", "any column beginning with `modify_selector_` is a column that is scoped in `modify_header()` (and friends) to be used in a selecting environment" +) %>% + gt::gt() %>% + gt::fmt_markdown(columns = everything()) %>% + gt::tab_options( + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +``` + +**`footnote` & `footnote_abbrev`** + +Each {gtsummary} table may contain a single footnote per header and cell within the table. +Footnotes and footnote abbreviations are handled separately. +Updates/changes to footnote are appended to the bottom of the tibble. +A footnote of `NA_character_` deletes an existing footnote. + +```{r, echo=FALSE} +tibble::tribble( + ~Column, ~Description, + "column", "Column name from `.$table_body`", + "rows", "expression selecting rows in `.$table_body`, `NA` indicates to add footnote to header", + "footnote", "string containing footnote to add to column/row" +) %>% + gt::gt() %>% + gt::fmt_markdown(columns = everything()) %>% + gt::tab_options( + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +``` + +**`fmt_fun`** + +Numeric columns/rows are styled with the functions stored in `fmt_fun`. +Updates/changes to styling functions are appended to the bottom of the tibble. + + +```{r, echo=FALSE} +tibble::tribble( + ~Column, ~Description, + "column", "Column name from `.$table_body`", + "rows", "expression selecting rows in `.$table_body`", + "fmt_fun", "list of formatting/styling functions" +) %>% + gt::gt() %>% + gt::fmt_markdown(columns = everything()) %>% + gt::tab_options( + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +``` + +**`text_format`** + +Columns/rows are styled with bold, italic, or indenting stored in `text_format`. +Updates/changes to styling functions are appended to the bottom of the tibble. + + +```{r, echo=FALSE} +tibble::tribble( + ~Column, ~Description, + "column", "Column name from `.$table_body`", + "rows", "expression selecting rows in `.$table_body`", + "format_type", "one of `c('bold', 'italic', 'indent')`", + "undo_text_format", "logical indicating where the formatting indicated should be undone/removed." +) %>% + gt::gt() %>% + gt::fmt_markdown(columns = everything()) %>% + gt::tab_options( + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +``` + +**`fmt_missing`** + +By default, all `NA` values are shown blanks. +Missing values in columns/rows are replaced with the `symbol`. +For example, reference rows in `tbl_regression()` are shown with an em-dash. +Updates/changes to styling functions are appended to the bottom of the tibble. + +```{r, echo=FALSE} +tibble::tribble( + ~Column, ~Description, + "column", "Column name from `.$table_body`", + "rows", "expression selecting rows in `.$table_body`", + "symbol", "string to replace missing values with, e.g. an em-dash" +) %>% + gt::gt() %>% + gt::fmt_markdown(columns = everything()) %>% + gt::tab_options( + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +``` + +**`cols_merge`** + +This object is _experimental_ and may change in the future. +This tibble gives instructions for merging columns into a single column. +The implementation in `as_gt()` will be updated after `gt::cols_label()` gains a `rows=` argument. + +```{r, echo=FALSE} +tibble::tribble( + ~Column, ~Description, + "column", "Column name from `.$table_body`", + "rows", "expression selecting rows in `.$table_body`", + "pattern", "glue pattern directing how to combine/merge columns. The merged columns will replace the column indicated in 'column'." +) %>% + gt::gt() %>% + gt::fmt_markdown(columns = everything()) %>% + gt::tab_options( + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +``` + +**`source_note`** + +String that is made a table source note. +The attribute `"text_interpret"` is either `c("md", "html")`. + +**`caption`** + +String that is made into the table caption. +The attribute `"text_interpret"` is either `c("md", "html")`. + +**`horizontal_line_above`** + +Expression identifying a row where a horizontal line is placed above in the table. + +Example from `tbl_regression()` + +```{r} +tbl_regression_ex$table_styling +``` + +## Constructing a {gtsummary} object + +#### table_body + +When constructing a {gtsummary} object, the author will begin with the `.$table_body` object. +Recall the `.$table_body` data frame must include columns `"label"`, `"row_type"`, and `"variable"`. +Of these columns, only the `"label"` column will be printed with the final results. +The `"row_type"` column typically will control whether or not the label column is indented. +The `"variable"` column is often used in the `inline_text()` family of functions, and merging {gtsummary} tables with `tbl_merge()`. + +```{r} +tbl_regression_ex %>% + getElement("table_body") %>% + select(variable, row_type, label) +``` + +The other columns in `.$table_body` are created by the user and are likely printed in the output. +Formatting and printing instructions for these columns is stored in `.$table_styling`. + +### table_styling + +There are a few internal {gtsummary} functions to assist in constructing and modifying a `.$table_header` data frame. + +1. `.create_gtsummary_object(table_body)` After a user creates a `table_body`, pass it to this function and the skeleton of a gtsummary object is created and returned (including the full `table_styling` list of tables). + +1. `.update_table_styling()` After columns are added or removed from `table_body`, run this function to update `.$table_styling` to include or remove styling instructions for the columns. FYI the default styling for each new column is to hide it. + +1. `modify_table_styling()` This exported function modifies the printing instructions for a single column or groups of columns. + +1. `modify_table_body()` This exported function helps users make changes to `.$table_body`. The function runs `.update_table_styling()` internally to maintain internal validity with the printing instructions. + +## Printing a {gtsummary} object + +All {gtsummary} objects are printed with `print.gtsummary()`. +Before a {gtsummary} object is printed, it is converted to a {gt} object using `as_gt()`. +This function takes the {gtsummary} object as its input, and uses the information in `.$table_styling` to construct a list of {gt} calls that will be executed on `.$table_body`. +After the {gtsummary} object is converted to {gt}, it is then printed as any other {gt} object. + +In some cases, the package defaults to printing with other engines, such as flextable (`as_flex_table()`), huxtable (`as_hux_table()`), kableExtra (`as_kable_extra()`), and kable (`as_kable()`). +The default print engine is set with the theme element `"pkgwide-str:print_engine"` + +While the actual print function is slightly more involved, it is basically this: + +```{r, eval = FALSE} +print.gtsummary <- function(x) { + get_theme_element("pkgwide-str:print_engine") %>% + switch( + "gt" = as_gt(x), + "flextable" = as_flex_table(x), + "huxtable" = as_hux_table(x), + "kable_extra" = as_kable_extra(x), + "kable" = as_kable(x) + ) %>% + print() +} +``` + +## The `.$meta_data$df_stats` tibble + +Some {gtsummary} tables contain an internal object called `.$meta_data` containing a list column called `"df_stats"`. +The column is a list of tibbles with each tibble containing the summary statistics presented in the final gtsummary table. +While the statistics contained in each `"df_stats"` tibble can vary within a single gtsummary object, all the tibbles have a few common characteristics. + +Each tibble contain the following columns +```{r, echo = FALSE} +tibble::tribble( + ~Column, ~Description, + "`variable`", "String of the variable name", + "`label`", "String matching the variable's values in `.$table_body$label`", + "`col_name`", "The column name the statistics appear under in `.$table_body`, e.g. `'stat_0'`, `'stat_1'`", + "`variable_levels`", "This column appears if and only if the variable being summarized has multiple levels. The column is equal to the variable's levels.", + "``", "Primarily, the tibble stores the summary statistics for each variable. For example, when the mean is requested in `tbl_summary()`, there will be a column called `'mean'`." +) %>% + gt::gt() %>% + gt::fmt_markdown(columns = everything()) %>% + gt::tab_options( + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +``` + +The statistics columns each have an attribute called `"fmt_fun"` containing the formatting function that will be applied before the statistic is placed in `.$table_body`. + + + + + diff --git a/vignettes/inline_text.Rmd b/vignettes/inline_text.Rmd new file mode 100644 index 0000000000..f9e4d069f1 --- /dev/null +++ b/vignettes/inline_text.Rmd @@ -0,0 +1,162 @@ +--- +title: "Tutorial: inline_text" +date: "Last Updated: September 13, 2020" +output: rmarkdown::html_vignette +vignette: > + %\VignetteEncoding{UTF-8} + %\VignetteIndexEntry{Tutorial: inline_text} + %\VignetteEngine{knitr::rmarkdown} + chunk_output_type: console +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + warning = FALSE, + comment = "#>" +) +gt_compact_fun <- function(x) { + gt::tab_options(x, + table.font.size = "small", + data_row.padding = gt::px(1), + summary_row.padding = gt::px(1), + grand_summary_row.padding = gt::px(1), + footnotes.padding = gt::px(1), + source_notes.padding = gt::px(1), + row_group.padding = gt::px(1) + ) +} +``` + +```{r, echo = FALSE, results = 'asis'} +# we do NOT want the vignette to build on CRAN...it's taking too long +if (!identical(Sys.getenv("IN_PKGDOWN"), "true") && + !tolower(as.list(Sys.info())$user) %in% c("sjobergd", "currym", "whitingk", "whiting")) { + msg <- + paste( + "View this vignette on the", + "[package website](https://www.danieldsjoberg.com/gtsummary/articles/inline_text.html)." + ) + cat(msg) + knitr::knit_exit() +} +``` + +## Introduction + +**Reproducible reports** are an important part of good practices. We often need to report the **results from a table** in the text of an R markdown report. +**Inline reporting** has been made simple with [`inline_text()`](https://www.danieldsjoberg.com/gtsummary/reference/inline_text.tbl_summary.html). +The `inline_text()` function reports statistics from {gtsummary} tables inline in an [R markdown](https://rmarkdown.rstudio.com/lesson-1.html) report. + +## Setup + +Before going through the tutorial, install and load {gtsummary}. + +```{r message = FALSE, warning=FALSE} +# install.packages("gtsummary") +library(gtsummary) +``` + + +## Example data set + +We'll be using the [`trial`](https://www.danieldsjoberg.com/gtsummary/reference/trial.html) data set throughout this example. + +* This set contains data from `r nrow(trial)` patients who received one of two types of chemotherapy (Drug A or Drug B). +The outcomes are tumor response and death. + +For brevity in the tutorial, let's keep a subset of the variables from the trial data set. + +```{r} +trial2 <- + trial %>% + select(trt, marker, stage) +``` + + +## Inline results from tbl_summary() {#inline_text_tbl_summary} + +First create a basic summary table using [`tbl_summary()`](https://www.danieldsjoberg.com/gtsummary/reference/tbl_summary.html) (review [`tbl_summary()` vignette](https://www.danieldsjoberg.com/gtsummary/articles/tbl_summary.html) for detailed overview of this function if needed). + +```{r} +tab1 <- tbl_summary(trial2, by = trt) +tab1 +``` + +To report the median (IQR) of the marker levels in each group, use the following commands inline. + +> The median (IQR) marker level in the Drug A and Drug B groups are `` `r +inline_text(tab1, variable = marker, column = "Drug A")` `` and `` `r +inline_text(tab1, variable = marker, column = "Drug B")` ``, respectively. + +Here's how the line will appear in your report. + +> The median (IQR) marker level in the Drug A and Drug B groups are `r inline_text(tab1, variable = marker, column = "Drug A")` and `r inline_text(tab1, variable = marker, column = "Drug B")`, respectively. + +If you display a statistic from a categorical variable, include the `level` argument. + + +> `` `r +inline_text(tab1, variable = stage, level = "T1", column = "Drug B")` `` resolves to "`r inline_text(tab1, variable = stage, level = "T1", column = "Drug B")`" + + + + +## Inline results from tbl_regression() {#inline_text_tbl_regression} + +Similar syntax is used to report results from [`tbl_regression()`](https://www.danieldsjoberg.com/gtsummary/reference/tbl_regression.html) and [`tbl_uvregression()`](https://www.danieldsjoberg.com/gtsummary/reference/tbl_uvregression.html) tables. +Refer to the [`tbl_regression()` vignette](https://www.danieldsjoberg.com/gtsummary/articles/tbl_regression.html) if you need detailed guidance on using these functions. + +Let's first create a regression model. + +```{r} +# build logistic regression model +m1 <- glm(response ~ age + stage, trial, family = binomial(link = "logit")) +``` + +Now summarize the results with `tbl_regression()`; exponentiate to get the odds ratios. +```{r} +tbl_m1 <- tbl_regression(m1, exponentiate = TRUE) +tbl_m1 +``` + +To report the result for `age`, use the following commands inline. + +> `` `r +inline_text(tbl_m1, variable = age)` `` + +Here's how the line will appear in your report. + +> `r inline_text(tbl_m1, variable = age)` + +It is reasonable that you'll need to modify the text. +To do this, use the `pattern` argument. +The `pattern` argument syntax follows `glue::glue()` format with referenced R objects being inserted between curly brackets. +The default is `pattern = "{estimate} ({conf.level*100}% CI {conf.low}, {conf.high}; {p.value})"`. You have access the to following fields within the `pattern` argument. + +```{r, echo = FALSE} +tibble::tribble( + ~Parameter, ~Description, + "`{estimate}`", "primary estimate (e.g. model coefficient, odds ratio)", + "`{conf.low}`", "lower limit of confidence interval", + "`{conf.high}`", "upper limit of confidence interval", + "`{p.value}`", "p-value", + "`{conf.level}`", "confidence level of interval", + "`{N}`", "number of observations" +) %>% + gt::gt() %>% + gt::fmt_markdown(columns = c(Parameter)) %>% + gt_compact_fun() +``` + +> Age was not significantly associated with tumor response `` `r +inline_text(tbl_m1, variable = age, pattern = "(OR {estimate}; 95% CI {conf.low}, {conf.high}; {p.value})")` ``. + +> Age was not significantly associated with tumor response `r inline_text(tbl_m1, variable = age, pattern = "(OR {estimate}; 95% CI {conf.low}, {conf.high}; {p.value})")`. + +If you're printing results from a categorical variable, include the `level` argument, e.g. `inline_text(tbl_m1, variable = stage, level = "T3")` resolves to "`r inline_text(tbl_m1, variable = stage, level = "T3")`". + +The `inline_text` function has arguments for rounding the p-value (`pvalue_fun`) and the coefficients and confidence interval (`estimate_fun`). +These default to the same rounding performed in the table, but can be modified when reporting inline. + +For more details about inline code, review to the [RStudio documentation page](https://rmarkdown.rstudio.com/lesson-4.html).