diff --git a/DESCRIPTION b/DESCRIPTION
index 04be3e39..7ad0c624 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: report
Type: Package
Title: Automated Reporting of Results and Statistical Models
-Version: 0.5.8.2
+Version: 0.5.8.3
Authors@R:
c(person(given = "Dominique",
family = "Makowski",
@@ -150,3 +150,4 @@ Collate:
'utils_grouped_df.R'
'zzz.R'
Roxygen: list(markdown = TRUE)
+Remotes: easystats/insight, easystats/datawizard, easystats/parameters, easystats/performance, easystats/modelbased
diff --git a/NEWS.md b/NEWS.md
index 07728045..1d5f9e21 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,14 @@
# report 0.5.9
+Breaking
+
+* Arguments named `group`, `at` and `group_by` will be deprecated in future
+ releases. of _easystats_ packages. Please use `by` instead. This affects
+ following functions in *report*:
+
+ * `report_participants()`
+ * `report_sample()`
+
Minor changes
* `report` now supports reporting of Bayesian model comparison with variables of class `brms::loo_compare`.
diff --git a/R/report.lm.R b/R/report.lm.R
index b13449eb..904c8ce1 100644
--- a/R/report.lm.R
+++ b/R/report.lm.R
@@ -483,7 +483,7 @@ report_info.lm <- function(x,
effectsize <- report_effectsize(x, ...)
}
- text <- .info_effectsize(x, effectsize = effectsize, include_effectsize = include_effectsize)
+ info_text <- .info_effectsize(x, effectsize = effectsize, include_effectsize = include_effectsize)
if (is.null(parameters)) {
parameters <- report_parameters(x, ...)
@@ -495,7 +495,7 @@ report_info.lm <- function(x,
}
if ("ci_method" %in% names(att)) {
- text <- paste0(text, " ", .info_df(
+ info_text <- paste0(info_text, " ", .info_df(
ci = att$ci,
ci_method = att$ci_method,
test_statistic = att$test_statistic,
@@ -508,7 +508,7 @@ report_info.lm <- function(x,
# }
- as.report_info(text)
+ as.report_info(info_text)
}
@@ -538,25 +538,25 @@ report_text.lm <- function(x, table = NULL, ...) {
model,
". ",
perf,
- ifelse(nchar(perf) > 0, ". ", ""),
+ ifelse(nzchar(perf, keepNA = TRUE), ". ", ""),
intercept,
params_text_full,
"\n\n",
info
)
- text <- paste0(
+ summary_text <- paste0(
"We fitted a ",
summary(model),
". ",
summary(perf),
- ifelse(nchar(perf) > 0, ". ", ""),
+ ifelse(nzchar(perf, keepNA = TRUE), ". ", ""),
summary(intercept),
params_text
)
- as.report_text(text_full, summary = text)
+ as.report_text(text_full, summary = summary_text)
}
@@ -569,7 +569,7 @@ report_text.lm <- function(x, table = NULL, ...) {
if (!is.null(coefname) && coefname %in% names(table)) {
estimate <- attributes(table)$coefficient_name
} else {
- estimate <- datawizard::data_find(table, candidates, regex = TRUE, verbose = FALSE)[1]
+ estimate <- datawizard::extract_column_names(table, candidates, regex = TRUE, verbose = FALSE)[1]
}
estimate
}
diff --git a/R/report_htest_ttest.R b/R/report_htest_ttest.R
index 0d5de125..0fb20463 100644
--- a/R/report_htest_ttest.R
+++ b/R/report_htest_ttest.R
@@ -33,25 +33,25 @@
.report_table_ttest <- function(table_full, effsize) {
table_full <- cbind(table_full, attributes(effsize)$table)
- table <- datawizard::data_remove(
+ table_small <- datawizard::data_remove(
table_full,
c("Parameter", "Group", "Mean_Group1", "Mean_Group2", "Method", "d_CI_low", "d_CI_high")
)
- list(table = table, table_full = table_full)
+ list(table = table_small, table_full = table_full)
}
# report_effectsize ---------------------
.report_effectsize_ttest <- function(x, table, dot_args, type, rules = "cohen1988") {
- args <- c(list(x), dot_args)
- table <- do.call(effectsize::effectsize, args)
+ my_args <- c(list(x), dot_args)
+ table <- do.call(effectsize::effectsize, my_args)
ci <- attributes(table)$ci
estimate <- names(table)[1]
rules <- ifelse(is.null(dot_args$rules), rules, dot_args$rules)
- args <- list(table, rules = rules, dot_args)
- interpretation <- do.call(effectsize::interpret, args)$Interpretation
+ my_args <- list(table, rules = rules, dot_args)
+ interpretation <- do.call(effectsize::interpret, my_args)$Interpretation
rules <- .text_effectsize(attr(attr(interpretation, "rules"), "rule_name"))
if (estimate %in% c("d", "Cohens_d")) {
@@ -88,7 +88,7 @@
.report_model_ttest <- function(x, table) {
# If against mu
if (names(x$null.value) == "mean") {
- # TODO: @DominiqueMakowski why do we need "table" here?
+ # TODO: @DominiqueMakowski why do we need "table" here??
table$Difference <- x$estimate - x$null.value
means <- paste0(" (mean = ", insight::format_value(x$estimate), ")")
@@ -106,12 +106,12 @@
vars <- paste0(x$data.name)
}
- text <- paste0(
+ final_text <- paste0(
trimws(x$method),
" testing the difference ",
ifelse(grepl(" by ", x$data.name, fixed = TRUE), "of ", "between "),
vars_full
)
- text
+ final_text
}
diff --git a/R/report_participants.R b/R/report_participants.R
index 885d1eb1..40e6dfc5 100644
--- a/R/report_participants.R
+++ b/R/report_participants.R
@@ -21,11 +21,12 @@
#' so countries that represent less than 10% will be combined in the "other" category).
#' @param participants The name of the participants' identifier column (for
#' instance in the case of repeated measures).
-#' @param group A character vector indicating the name(s) of the column(s) used
+#' @param by A character vector indicating the name(s) of the column(s) used
#' for stratified description.
#' @param spell_n Logical, fully spell the sample size (`"Three participants"`
#' instead of `"3 participants"`).
#' @inheritParams report.numeric
+#' @param group Deprecated. Use `by` instead.
#'
#' @return A character vector with description of the "participants", based on
#' the information provided in `data`.
@@ -106,7 +107,7 @@
#' sex = "Sex",
#' gender = "Gender",
#' participants = "Participant",
-#' group = "Condition"
+#' by = "Condition"
#' )
#'
#' # Spell sample size
@@ -123,14 +124,21 @@ report_participants <- function(data,
country = NULL,
race = NULL,
participants = NULL,
- group = NULL,
+ by = NULL,
spell_n = FALSE,
digits = 1,
threshold = 10,
+ group = NULL,
...) {
+ ## TODO: deprecate later
+ if (!is.null(group)) {
+ insight::format_warning("Argument `group` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
+ by <- group
+ }
+
# Convert empty strings to NA
data_list <- lapply(data, function(x) {
- x[which(x == "")] <- NA
+ x[which(x == "")] <- NA # nolint
x
})
data <- as.data.frame(data_list, stringsAsFactors = FALSE)
@@ -165,8 +173,8 @@ report_participants <- function(data,
race <- .find_race_in_data(data)
}
- if (is.null(group)) {
- text <- .report_participants(
+ if (is.null(by)) {
+ final_text <- .report_participants(
data,
age = age,
sex = sex,
@@ -181,9 +189,9 @@ report_participants <- function(data,
...
)
} else {
- text <- NULL
- data[[group]] <- as.character(data[[group]])
- for (i in split(data, data[group])) {
+ final_text <- NULL
+ data[[by]] <- as.character(data[[by]])
+ for (i in split(data, data[by])) {
current_text <- .report_participants(
i,
age = age,
@@ -200,15 +208,15 @@ report_participants <- function(data,
pre_text <- paste0(
"the '",
- paste0(names(i[group]), " - ", vapply(i[group], unique, "character"), collapse = " and "),
+ paste0(names(i[by]), " - ", vapply(i[by], unique, "character"), collapse = " and "),
"' group: "
)
- text <- c(text, paste0(pre_text, current_text))
+ final_text <- c(final_text, paste0(pre_text, current_text))
}
- text <- paste("For", datawizard::text_concatenate(text, sep = ", for ", last = " and for "))
+ final_text <- paste("For", datawizard::text_concatenate(final_text, sep = ", for ", last = " and for "))
}
- text
+ final_text
}
#' @keywords internal
@@ -338,9 +346,7 @@ report_participants <- function(data,
) %in% c("male", "m", "female", "f", NA, "na")]) /
nrow(data) * 100, digits = digits),
"% other",
- if (!insight::format_value(length(data[[sex]][tolower(
- data[[sex]]
- ) %in% c(NA, "na")]) / nrow(data) * 100) == "0.00") {
+ if (insight::format_value(length(data[[sex]][tolower(data[[sex]]) %in% c(NA, "na")]) / nrow(data) * 100) != "0.00") { # nolint
paste0(", ", insight::format_value(length(data[[sex]][tolower(
data[[sex]]
) %in% c(NA, "na")]) / nrow(data) * 100), "% missing")
@@ -375,9 +381,9 @@ report_participants <- function(data,
data[[gender]]
) %in% both_genders]) /
nrow(data) * 100), "% non-binary",
- if (!insight::format_value(length(data[[gender]][tolower(
+ if (insight::format_value(length(data[[gender]][tolower(
data[[gender]]
- ) %in% c(NA, "na")]) / nrow(data) * 100) == "0.00") {
+ ) %in% c(NA, "na")]) / nrow(data) * 100) != "0.00") {
paste0(", ", insight::format_value(length(data[[gender]][tolower(
data[[gender]]
) %in% c(NA, "na")]) / nrow(data) * 100), "% missing")
@@ -387,31 +393,29 @@ report_participants <- function(data,
if (all(is.na(data[[education]]))) {
text_education <- ""
- } else {
- if (is.numeric(data[[education]])) {
- text_education <- summary(
- report_statistics(
- data[[education]],
- n = FALSE,
- centrality = "mean",
- missing_percentage = NULL,
- digits = digits,
- ...
- )
- )
-
- text_education <- sub("Mean =", "Mean education =", text_education, fixed = TRUE)
- } else {
- data[which(data[[education]] %in% c(NA, "NA")), education] <- "missing"
- txt <- summary(report_statistics(
- as.factor(data[[education]]),
- levels_percentage = TRUE,
+ } else if (is.numeric(data[[education]])) {
+ text_education <- summary(
+ report_statistics(
+ data[[education]],
+ n = FALSE,
+ centrality = "mean",
+ missing_percentage = NULL,
digits = digits,
...
- ))
+ )
+ )
- text_education <- paste0("Education: ", txt)
- }
+ text_education <- sub("Mean =", "Mean education =", text_education, fixed = TRUE)
+ } else {
+ data[which(data[[education]] %in% c(NA, "NA")), education] <- "missing"
+ txt <- summary(report_statistics(
+ as.factor(data[[education]]),
+ levels_percentage = TRUE,
+ digits = digits,
+ ...
+ ))
+
+ text_education <- paste0("Education: ", txt)
}
text_country <- if (all(is.na(data[[country]]))) {
@@ -468,6 +472,7 @@ report_participants <- function(data,
text_race <- paste("Race:", value_string)
}
+ # nolint start
paste0(
size,
" participants (",
@@ -491,6 +496,7 @@ report_participants <- function(data,
), text_race)),
")"
)
+ # nolint end
}
#' @keywords internal
diff --git a/R/report_sample.R b/R/report_sample.R
index 8518e61b..41d6a8f8 100644
--- a/R/report_sample.R
+++ b/R/report_sample.R
@@ -3,7 +3,7 @@
#' Create sample description table (also referred to as "Table 1").
#'
#' @param data A data frame for which descriptive statistics should be created.
-#' @param group_by Character vector, indicating the column(s) for possible grouping
+#' @param by Character vector, indicating the column(s) for possible grouping
#' of the descriptive table. Note that weighting (see `weights`) does not work
#' with more than one grouping column.
#' @param centrality Character, indicates the statistics that should be
@@ -43,6 +43,7 @@
#' @param digits Number of decimals.
#' @param n Logical, actual sample size used in the calculation of the
#' reported descriptive statistics (i.e., without the missing values).
+#' @param group_by Deprecated. Use `by` instead.
#' @inheritParams report.data.frame
#'
#' @return A data frame of class `report_sample` with variable names and
@@ -61,8 +62,8 @@
#'
#' report_sample(iris[, 1:4])
#' report_sample(iris, select = c("Sepal.Length", "Petal.Length", "Species"))
-#' report_sample(iris, group_by = "Species")
-#' report_sample(airquality, group_by = "Month", n = TRUE, total = FALSE)
+#' report_sample(iris, by = "Species")
+#' report_sample(airquality, by = "Month", n = TRUE, total = FALSE)
#'
#' # confidence intervals for proportions
#' set.seed(123)
@@ -72,7 +73,7 @@
#' report_sample(d, ci = 0.95, ci_correct = TRUE) # continuity correction
#' @export
report_sample <- function(data,
- group_by = NULL,
+ by = NULL,
centrality = "mean",
ci = NULL,
ci_method = "wilson",
@@ -83,7 +84,14 @@ report_sample <- function(data,
total = TRUE,
digits = 2,
n = FALSE,
+ group_by = NULL,
...) {
+ ## TODO: deprecate later
+ if (!is.null(group_by)) {
+ insight::format_warning("Argument `group_by` is deprecated and will be removed in a future release. Please use `by` instead.") # nolint
+ by <- group_by
+ }
+
# check for correct input type
if (!is.data.frame(data)) {
data <- tryCatch(
@@ -119,16 +127,16 @@ report_sample <- function(data,
variables <- setdiff(variables, exclude)
}
- # for grouped data frames, use groups as group_by argument
- if (inherits(data, "grouped_df") && is.null(group_by)) {
- group_by <- setdiff(colnames(attributes(data)$groups), ".rows")
+ # for grouped data frames, use groups as by argument
+ if (inherits(data, "grouped_df") && is.null(by)) {
+ by <- setdiff(colnames(attributes(data)$groups), ".rows")
}
# grouped by?
- grouping <- !is.null(group_by) && all(group_by %in% colnames(data))
+ do_grouping <- !is.null(by) && all(by %in% colnames(data))
# sanity check - weights and grouping
- if (!is.null(group_by) && length(group_by) > 1 && !is.null(weights)) {
+ if (!is.null(by) && length(by) > 1 && !is.null(weights)) {
insight::format_error("Cannot apply `weights` when grouping is done by more than one variable.")
}
@@ -143,12 +151,12 @@ report_sample <- function(data,
i
})
- # coerce group_by columns to factor
- groups <- as.data.frame(lapply(data[group_by], factor))
+ # coerce by columns to factor
+ groups <- as.data.frame(lapply(data[by], factor))
- out <- if (isTRUE(grouping)) {
+ out <- if (isTRUE(do_grouping)) {
result <- lapply(split(data[variables], groups), function(x) {
- x[group_by] <- NULL
+ x[by] <- NULL
.generate_descriptive_table(
x,
centrality,
@@ -162,7 +170,7 @@ report_sample <- function(data,
})
# for more than one group, fix column names. we don't want "a.b (n=10)",
# but rather ""a, b (n=10)""
- if (length(group_by) > 1) {
+ if (length(by) > 1) {
old_names <- datawizard::data_unite(
unique(groups),
new_column = ".old_names",
@@ -179,9 +187,9 @@ report_sample <- function(data,
variable <- result[[1]]["Variable"]
# number of observation, based on weights
if (!is.null(weights)) {
- n_obs <- round(as.vector(stats::xtabs(data[[weights]] ~ data[[group_by]])))
+ n_obs <- round(as.vector(stats::xtabs(data[[weights]] ~ data[[by]])))
} else {
- n_obs <- as.vector(table(data[group_by]))
+ n_obs <- as.vector(table(data[by]))
}
# column names for groups
cn <- sprintf("%s (n=%g)", names(result), n_obs)
@@ -189,13 +197,13 @@ report_sample <- function(data,
summaries <- do.call(cbind, lapply(result, function(i) i["Summary"]))
colnames(summaries) <- cn
# generate data for total column, but make sure to remove missings
- total_data <- data[stats::complete.cases(data[group_by]), unique(c(variables, group_by))]
+ total_data <- data[stats::complete.cases(data[by]), unique(c(variables, by))]
# bind all together, including total column
final <- cbind(
variable,
summaries,
Total = .generate_descriptive_table(
- total_data[setdiff(variables, group_by)],
+ total_data[setdiff(variables, by)],
centrality,
weights,
digits,
@@ -211,9 +219,9 @@ report_sample <- function(data,
}
# define total N, based on weights
if (!is.null(weights)) {
- total_n <- round(sum(as.vector(table(data[group_by]))) * mean(data[[weights]], na.rm = TRUE))
+ total_n <- round(sum(as.vector(table(data[by]))) * mean(data[[weights]], na.rm = TRUE))
} else {
- total_n <- sum(as.vector(table(data[group_by])))
+ total_n <- sum(as.vector(table(data[by])))
}
# add N to column name
colnames(final)[ncol(final)] <- sprintf(
@@ -335,36 +343,36 @@ report_sample <- function(data,
weights[is.na(x)] <- NA
weights <- stats::na.omit(weights)
x <- stats::na.omit(x)
- proportions <- prop.table(stats::xtabs(weights ~ x))
+ table_proportions <- prop.table(stats::xtabs(weights ~ x))
} else {
- proportions <- prop.table(table(x))
+ table_proportions <- prop.table(table(x))
}
# for binary factors, just need one level
if (nlevels(x) == 2) {
- proportions <- proportions[2]
+ table_proportions <- table_proportions[2]
}
# CI for proportions?
if (!is.null(ci)) {
- ci_low_high <- .ci_proportion(x, proportions, weights, ci, ci_method, ci_correct)
+ ci_low_high <- .ci_proportion(x, table_proportions, weights, ci, ci_method, ci_correct)
.summary <- sprintf(
"%.1f [%.1f, %.1f]",
- 100 * proportions,
+ 100 * table_proportions,
100 * ci_low_high$ci_low,
100 * ci_low_high$ci_high
)
} else {
- .summary <- sprintf("%.1f", 100 * proportions)
+ .summary <- sprintf("%.1f", 100 * table_proportions)
}
if (isTRUE(n)) {
- .summary <- paste0(.summary, ", ", round(sum(!is.na(x)) * as.vector(proportions)))
+ .summary <- paste0(.summary, ", ", round(sum(!is.na(x)) * as.vector(table_proportions)))
}
n_label <- ifelse(n, ", n", "")
data.frame(
- Variable = sprintf("%s [%s], %%%s", column, names(proportions), n_label),
+ Variable = sprintf("%s [%s], %%%s", column, names(table_proportions), n_label),
Summary = as.vector(.summary),
stringsAsFactors = FALSE
)
@@ -377,12 +385,12 @@ report_sample <- function(data,
# Standard error for confidence interval of proportions ----
-.ci_proportion <- function(x, proportions, weights, ci, ci_method, ci_correct) {
+.ci_proportion <- function(x, table_proportions, weights, ci, ci_method, ci_correct) {
ci_method <- match.arg(tolower(ci_method), c("wald", "wilson"))
# variables
- p <- as.vector(proportions)
- q <- 1 - p
+ p <- as.vector(table_proportions)
+ quant <- 1 - p
n <- length(stats::na.omit(x))
z <- stats::qnorm((1 + ci) / 2)
@@ -399,21 +407,21 @@ report_sample <- function(data,
if (ci_method == "wilson") {
# Wilson CIs -------------------
if (isTRUE(ci_correct)) {
- ci_low <- (2 * n * p + z^2 - 1 - z * sqrt(z^2 - 2 - 1 / n + 4 * p * (n * q + 1))) / (2 * (n + z^2))
- ci_high <- (2 * n * p + z^2 + 1 + z * sqrt(z^2 + 2 - 1 / n + 4 * p * (n * q - 1))) / (2 * (n + z^2))
+ ci_low <- (2 * n * p + z^2 - 1 - z * sqrt(z^2 - 2 - 1 / n + 4 * p * (n * quant + 1))) / (2 * (n + z^2))
+ ci_high <- (2 * n * p + z^2 + 1 + z * sqrt(z^2 + 2 - 1 / n + 4 * p * (n * quant - 1))) / (2 * (n + z^2))
# close to 0 or 1, then CI is 0 resp. 1
- fix <- p < 0.00001 | ci_low < 0.00001
- if (any(fix)) {
- ci_low[fix] <- 0
+ fix_ci <- p < 0.00001 | ci_low < 0.00001
+ if (any(fix_ci)) {
+ ci_low[fix_ci] <- 0
}
- fix <- p > 0.99999 | ci_high > 0.99999
- if (any(fix)) {
- ci_high[fix] <- 1
+ fix_ci <- p > 0.99999 | ci_high > 0.99999
+ if (any(fix_ci)) {
+ ci_high[fix_ci] <- 1
}
out <- list(ci_low = ci_low, ci_high = ci_high)
} else {
prop <- (2 * n * p) + z^2
- moe <- z * sqrt(z^2 + 4 * n * p * q)
+ moe <- z * sqrt(z^2 + 4 * n * p * quant)
correction <- 2 * (n + z^2)
out <- list(
ci_low = (prop - moe) / correction,
@@ -422,7 +430,7 @@ report_sample <- function(data,
}
} else {
# Wald CIs -------------------
- moe <- z * suppressWarnings(sqrt(p * q / n))
+ moe <- z * suppressWarnings(sqrt(p * quant / n))
if (isTRUE(ci_correct)) {
moe <- moe + 1 / (2 * n)
}
@@ -511,9 +519,9 @@ print_md.report_sample <- function(x, layout = "horizontal", ...) {
weights[is.na(x)] <- NA
weights <- stats::na.omit(weights)
x <- stats::na.omit(x)
- order <- order(x)
- x <- x[order]
- weights <- weights[order]
+ x_order <- order(x)
+ x <- x[x_order]
+ weights <- weights[x_order]
rw <- cumsum(weights) / sum(weights)
md_values <- min(which(rw >= p))
if (rw[md_values] == p) {
diff --git a/R/report_text.R b/R/report_text.R
index 7b29e9ba..2c7faa9b 100644
--- a/R/report_text.R
+++ b/R/report_text.R
@@ -103,6 +103,6 @@ summary.report_text <- function(object, ...) {
#' @export
print.report_text <- function(x, width = NULL, ...) {
- x <- datawizard::format_text(as.character(x), width = width, ...)
+ x <- datawizard::text_format(as.character(x), width = width, ...)
cat(x)
}
diff --git a/README.Rmd b/README.Rmd
index 7bca5c21..58dc8d13 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -262,11 +262,11 @@ paste(
Report can also help you create a sample description table (also referred to as **Table 1**).
```{r, eval=FALSE}
-report_sample(iris, group_by = "Species")
+report_sample(iris, by = "Species")
```
```{r, echo=FALSE}
-knitr::kable(report_sample(iris, group_by = "Species"))
+knitr::kable(report_sample(iris, by = "Species"))
```
### Report system and packages
diff --git a/README.md b/README.md
index b040f241..5faabf46 100644
--- a/README.md
+++ b/README.md
@@ -78,7 +78,7 @@ The package documentation can be found
## Report all the things
-
+
### General Workflow
@@ -262,28 +262,28 @@ report(model)
# of 1000 iterations and a warmup of 500) to predict mpg with qsec and wt
# (formula: mpg ~ qsec + wt). Priors over parameters were all set as normal (mean
# = 0.00, SD = 8.43; mean = 0.00, SD = 15.40) distributions. The model's
- # explanatory power is substantial (R2 = 0.81, 95% CI [0.69, 0.89], adj. R2 =
- # 0.79). The model's intercept, corresponding to qsec = 0 and wt = 0, is at 19.56
- # (95% CI [9.60, 30.59]). Within this model:
+ # explanatory power is substantial (R2 = 0.81, 95% CI [0.70, 0.90], adj. R2 =
+ # 0.79). The model's intercept, corresponding to qsec = 0 and wt = 0, is at 19.80
+ # (95% CI [8.93, 29.80]). Within this model:
#
- # - The effect of qsec (Median = 0.94, 95% CI [0.38, 1.45]) has a 99.90%
- # probability of being positive (> 0), 98.80% of being significant (> 0.30), and
- # 0.05% of being large (> 1.81). The estimation successfully converged (Rhat =
- # 1.001) and the indices are reliable (ESS = 1921)
- # - The effect of wt (Median = -5.05, 95% CI [-6.01, -4.05]) has a 100.00%
+ # - The effect of qsec (Median = 0.93, 95% CI [0.40, 1.49]) has a 100.00%
+ # probability of being positive (> 0), 99.05% of being significant (> 0.30), and
+ # 0.25% of being large (> 1.81). The estimation successfully converged (Rhat =
+ # 1.000) and the indices are reliable (ESS = 1864)
+ # - The effect of wt (Median = -5.04, 95% CI [-5.99, -4.08]) has a 100.00%
# probability of being negative (< 0), 100.00% of being significant (< -0.30),
# and 100.00% of being large (< -1.81). The estimation successfully converged
- # (Rhat = 1.000) and the indices are reliable (ESS = 2020)
+ # (Rhat = 0.999) and the indices are reliable (ESS = 2424)
#
# Following the Sequential Effect eXistence and sIgnificance Testing (SEXIT)
# framework, we report the median of the posterior distribution and its 95% CI
# (Highest Density Interval), along the probability of direction (pd), the
# probability of significance and the probability of being large. The thresholds
# beyond which the effect is considered as significant (i.e., non-negligible) and
- # large are |0.30| and |1.81|. Convergence and stability of the Bayesian sampling
- # has been assessed using R-hat, which should be below 1.01 (Vehtari et al.,
- # 2019), and Effective Sample Size (ESS), which should be greater than 1000
- # (Burkner, 2017).
+ # large are |0.30| and |1.81| (corresponding respectively to 0.05 and 0.30 of the
+ # outcome's SD). Convergence and stability of the Bayesian sampling has been
+ # assessed using R-hat, which should be below 1.01 (Vehtari et al., 2019), and
+ # Effective Sample Size (ESS), which should be greater than 1000 (Burkner, 2017).
## Other types of reports
@@ -296,10 +296,16 @@ model <- lm(Sepal.Length ~ Species, data = iris)
report_model(model)
# linear model (estimated using OLS) to predict Sepal.Length with Species (formula: Sepal.Length ~ Species)
+```
+
+``` r
report_performance(model)
# The model explains a statistically significant and substantial proportion of
# variance (R2 = 0.62, F(2, 147) = 119.26, p < .001, adj. R2 = 0.61)
+```
+
+``` r
report_statistics(model)
# beta = 5.01, 95% CI [4.86, 5.15], t(147) = 68.76, p < .001; Std. beta = -1.01, 95% CI [-1.18, -0.84]
@@ -334,7 +340,7 @@ Report can also help you create a sample description table (also
referred to as **Table 1**).
``` r
-report_sample(iris, group_by = "Species")
+report_sample(iris, by = "Species")
```
| Variable | setosa (n=50) | versicolor (n=50) | virginica (n=50) | Total (n=150) |
@@ -353,32 +359,35 @@ analysis paragraph about the tools used.
report(sessionInfo())
```
- # Analyses were conducted using the R Statistical language (version 4.2.2; R Core
- # Team, 2022) on macOS Ventura 13.1, using the packages lme4 (version 1.1.32;
- # Bates D et al., 2015), Matrix (version 1.5.3; Bates D et al., 2022), Rcpp
- # (version 1.0.10; Eddelbuettel D, François R, 2011), rstanarm (version 2.21.3;
- # Goodrich B et al., 2022), report (version 0.5.7; Makowski D et al., 2023) and
- # dplyr (version 1.1.0; Wickham H et al., 2023).
+ # Analyses were conducted using the R Statistical language (version 4.4.0; R Core
+ # Team, 2024) on Windows 11 x64 (build 22631), using the packages lme4 (version
+ # 1.1.35.3; Bates D et al., 2015), Matrix (version 1.7.0; Bates D et al., 2024),
+ # Rcpp (version 1.0.12; Eddelbuettel D et al., 2024), rstanarm (version 2.32.1;
+ # Goodrich B et al., 2024), report (version 0.5.8.3; Makowski D et al., 2023) and
+ # dplyr (version 1.1.4; Wickham H et al., 2023).
#
# References
# ----------
# - Bates D, Mächler M, Bolker B, Walker S (2015). "Fitting Linear Mixed-Effects
# Models Using lme4." _Journal of Statistical Software_, *67*(1), 1-48.
# doi:10.18637/jss.v067.i01 .
- # - Bates D, Maechler M, Jagan M (2022). _Matrix: Sparse and Dense Matrix Classes
- # and Methods_. R package version 1.5-3,
+ # - Bates D, Maechler M, Jagan M (2024). _Matrix: Sparse and Dense Matrix Classes
+ # and Methods_. R package version 1.7-0,
# .
- # - Eddelbuettel D, François R (2011). "Rcpp: Seamless R and C++ Integration."
- # _Journal of Statistical Software_, *40*(8), 1-18. doi:10.18637/jss.v040.i08
+ # - Eddelbuettel D, Francois R, Allaire J, Ushey K, Kou Q, Russell N, Ucar I,
+ # Bates D, Chambers J (2024). _Rcpp: Seamless R and C++ Integration_. R package
+ # version 1.0.12, . Eddelbuettel D,
+ # François R (2011). "Rcpp: Seamless R and C++ Integration." _Journal of
+ # Statistical Software_, *40*(8), 1-18. doi:10.18637/jss.v040.i08
# . Eddelbuettel D (2013). _Seamless R and
# C++ Integration with Rcpp_. Springer, New York. doi:10.1007/978-1-4614-6868-4
# , ISBN 978-1-4614-6867-7.
- # Eddelbuettel D, Balamuta JJ (2018). "Extending extitR with extitC++: A Brief
- # Introduction to extitRcpp." _The American Statistician_, *72*(1), 28-36.
+ # Eddelbuettel D, Balamuta J (2018). "Extending R with C++: A Brief Introduction
+ # to Rcpp." _The American Statistician_, *72*(1), 28-36.
# doi:10.1080/00031305.2017.1375990
# .
- # - Goodrich B, Gabry J, Ali I, Brilleman S (2022). "rstanarm: Bayesian applied
- # regression modeling via Stan." R package version 2.21.3,
+ # - Goodrich B, Gabry J, Ali I, Brilleman S (2024). "rstanarm: Bayesian applied
+ # regression modeling via Stan." R package version 2.32.1,
# . Brilleman S, Crowther M, Moreno-Betancur M,
# Buros Novik J, Wolfe R (2018). "Joint longitudinal and time-to-event models via
# Stan." StanCon 2018. 10-12 Jan 2018. Pacific Grove, CA, USA.,
@@ -387,11 +396,11 @@ report(sessionInfo())
# "Automated Results Reporting as a Practical Tool to Improve Reproducibility and
# Methodological Best Practices Adoption." _CRAN_.
# .
- # - R Core Team (2022). _R: A Language and Environment for Statistical
+ # - R Core Team (2024). _R: A Language and Environment for Statistical
# Computing_. R Foundation for Statistical Computing, Vienna, Austria.
# .
# - Wickham H, François R, Henry L, Müller K, Vaughan D (2023). _dplyr: A Grammar
- # of Data Manipulation_. R package version 1.1.0,
+ # of Data Manipulation_. R package version 1.1.4,
# .
## Credits
@@ -401,7 +410,6 @@ as follows:
``` r
citation("report")
-
To cite in publications use:
Makowski, D., Lüdecke, D., Patil, I., Thériault, R., Ben-Shachar,
diff --git a/man/report_participants.Rd b/man/report_participants.Rd
index 2ce58b14..e1704666 100644
--- a/man/report_participants.Rd
+++ b/man/report_participants.Rd
@@ -13,10 +13,11 @@ report_participants(
country = NULL,
race = NULL,
participants = NULL,
- group = NULL,
+ by = NULL,
spell_n = FALSE,
digits = 1,
threshold = 10,
+ group = NULL,
...
)
}
@@ -44,7 +45,7 @@ individuals in those groups as \code{"Non-Binary"}.}
\item{participants}{The name of the participants' identifier column (for
instance in the case of repeated measures).}
-\item{group}{A character vector indicating the name(s) of the column(s) used
+\item{by}{A character vector indicating the name(s) of the column(s) used
for stratified description.}
\item{spell_n}{Logical, fully spell the sample size (\code{"Three participants"}
@@ -55,6 +56,8 @@ instead of \code{"3 participants"}).}
\item{threshold}{Percentage after which to combine, e.g., countries (default is 10\%,
so countries that represent less than 10\% will be combined in the "other" category).}
+\item{group}{Deprecated. Use \code{by} instead.}
+
\item{...}{Arguments passed to or from other methods.}
}
\value{
@@ -141,7 +144,7 @@ report_participants(data,
sex = "Sex",
gender = "Gender",
participants = "Participant",
- group = "Condition"
+ by = "Condition"
)
# Spell sample size
diff --git a/man/report_sample.Rd b/man/report_sample.Rd
index 7a66569a..08751462 100644
--- a/man/report_sample.Rd
+++ b/man/report_sample.Rd
@@ -6,7 +6,7 @@
\usage{
report_sample(
data,
- group_by = NULL,
+ by = NULL,
centrality = "mean",
ci = NULL,
ci_method = "wilson",
@@ -17,13 +17,14 @@ report_sample(
total = TRUE,
digits = 2,
n = FALSE,
+ group_by = NULL,
...
)
}
\arguments{
\item{data}{A data frame for which descriptive statistics should be created.}
-\item{group_by}{Character vector, indicating the column(s) for possible grouping
+\item{by}{Character vector, indicating the column(s) for possible grouping
of the descriptive table. Note that weighting (see \code{weights}) does not work
with more than one grouping column.}
@@ -75,6 +76,8 @@ weight-variable. Reported descriptive statistics will be weighted by
\item{n}{Logical, actual sample size used in the calculation of the
reported descriptive statistics (i.e., without the missing values).}
+\item{group_by}{Deprecated. Use \code{by} instead.}
+
\item{...}{Arguments passed to or from other methods.}
}
\value{
@@ -89,8 +92,8 @@ library(report)
report_sample(iris[, 1:4])
report_sample(iris, select = c("Sepal.Length", "Petal.Length", "Species"))
-report_sample(iris, group_by = "Species")
-report_sample(airquality, group_by = "Month", n = TRUE, total = FALSE)
+report_sample(iris, by = "Species")
+report_sample(airquality, by = "Month", n = TRUE, total = FALSE)
# confidence intervals for proportions
set.seed(123)
diff --git a/tests/testthat/_snaps/windows/report_sample.md b/tests/testthat/_snaps/windows/report_sample.md
index d9ad1fb8..ffec20c1 100644
--- a/tests/testthat/_snaps/windows/report_sample.md
+++ b/tests/testthat/_snaps/windows/report_sample.md
@@ -180,10 +180,10 @@
-------------------------
x [1], % | 2.9 [2.0, 4.2]
-# report_sample group_by
+# report_sample by
Code
- report_sample(airquality, group_by = "Month")
+ report_sample(airquality, by = "Month")
Output
# Descriptive Statistics
@@ -198,7 +198,7 @@
---
Code
- report_sample(mtcars, group_by = "cyl")
+ report_sample(mtcars, by = "cyl")
Output
# Descriptive Statistics
@@ -218,7 +218,7 @@
---
Code
- report_sample(iris, group_by = "Species")
+ report_sample(iris, by = "Species")
Output
# Descriptive Statistics
@@ -456,7 +456,7 @@
---
Code
- report_sample(airquality, group_by = "Month", total = TRUE)
+ report_sample(airquality, by = "Month", total = TRUE)
Output
# Descriptive Statistics
@@ -471,7 +471,7 @@
---
Code
- report_sample(airquality, group_by = "Month", total = FALSE)
+ report_sample(airquality, by = "Month", total = FALSE)
Output
# Descriptive Statistics
@@ -486,7 +486,7 @@
---
Code
- report_sample(airquality, group_by = "Month", total = FALSE, n = TRUE)
+ report_sample(airquality, by = "Month", total = FALSE, n = TRUE)
Output
# Descriptive Statistics
@@ -501,7 +501,7 @@
---
Code
- report_sample(airquality, group_by = "Month", total = TRUE, n = TRUE)
+ report_sample(airquality, by = "Month", total = TRUE, n = TRUE)
Output
# Descriptive Statistics
diff --git a/tests/testthat/test-report_participants.R b/tests/testthat/test-report_participants.R
index 20319ddc..d8231254 100644
--- a/tests/testthat/test-report_participants.R
+++ b/tests/testthat/test-report_participants.R
@@ -14,7 +14,7 @@ test_that("report_participants, argument gender works", {
"Gender: 12.5% women, 37.5% men, 50.00% non-binary)"
)
)
- out <- report_participants(data, group = "Condition")
+ out <- report_participants(data, by = "Condition")
expect_identical(
out,
paste(
@@ -26,7 +26,7 @@ test_that("report_participants, argument gender works", {
)
)
# works when lowercase
- out <- report_participants(data, group = "Condition")
+ out <- report_participants(data, by = "Condition")
expect_identical(
out,
paste(
diff --git a/tests/testthat/test-report_sample.R b/tests/testthat/test-report_sample.R
index d37c53dd..a012f969 100644
--- a/tests/testthat/test-report_sample.R
+++ b/tests/testthat/test-report_sample.R
@@ -6,8 +6,8 @@ test_that("report_sample weights, coorect weighted N", {
stringsAsFactors = FALSE
)
- out1 <- report_sample(d, select = "x", group_by = "g")
- out2 <- report_sample(d, select = "x", group_by = "g", weights = "w")
+ out1 <- report_sample(d, select = "x", by = "g")
+ out2 <- report_sample(d, select = "x", by = "g", weights = "w")
expect_identical(
capture.output(print(out1)),
c(
@@ -41,7 +41,7 @@ test_that("report_sample weights, coorect weighted N", {
stringsAsFactors = FALSE
)
expect_error(
- report_sample(d, select = "x", group_by = c("g1", "g2"), weights = "w"),
+ report_sample(d, select = "x", by = c("g1", "g2"), weights = "w"),
regex = "Cannot apply"
)
})
@@ -123,18 +123,18 @@ test_that("report_sample CI", {
expect_warning(report_sample(d, ci = 0.95, weights = "w", ci_method = "wald"), regex = "accurate")
})
-test_that("report_sample group_by", {
+test_that("report_sample by", {
expect_snapshot(
variant = "windows",
- report_sample(airquality, group_by = "Month")
+ report_sample(airquality, by = "Month")
)
expect_snapshot(
variant = "windows",
- report_sample(mtcars, group_by = "cyl")
+ report_sample(mtcars, by = "cyl")
)
expect_snapshot(
variant = "windows",
- report_sample(iris, group_by = "Species")
+ report_sample(iris, by = "Species")
)
})
@@ -206,19 +206,19 @@ test_that("report_sample total", {
)
expect_snapshot(
variant = "windows",
- report_sample(airquality, group_by = "Month", total = TRUE)
+ report_sample(airquality, by = "Month", total = TRUE)
)
expect_snapshot(
variant = "windows",
- report_sample(airquality, group_by = "Month", total = FALSE)
+ report_sample(airquality, by = "Month", total = FALSE)
)
expect_snapshot(
variant = "windows",
- report_sample(airquality, group_by = "Month", total = FALSE, n = TRUE)
+ report_sample(airquality, by = "Month", total = FALSE, n = TRUE)
)
expect_snapshot(
variant = "windows",
- report_sample(airquality, group_by = "Month", total = TRUE, n = TRUE)
+ report_sample(airquality, by = "Month", total = TRUE, n = TRUE)
)
})
@@ -248,7 +248,7 @@ test_that("report_sample grouped data frames", {
data(mtcars)
mtcars_grouped <- datawizard::data_group(mtcars, "gear")
out1 <- report_sample(mtcars_grouped, select = c("hp", "mpg"))
- out2 <- report_sample(mtcars, group_by = "gear", select = c("hp", "mpg"))
+ out2 <- report_sample(mtcars, by = "gear", select = c("hp", "mpg"))
expect_identical(out1, out2)
})
@@ -258,7 +258,7 @@ test_that("report_sample, with more than one grouping variable", {
iris$grp <- sample(letters[1:3], nrow(iris), TRUE)
out <- report_sample(
iris,
- group_by = c("Species", "grp"),
+ by = c("Species", "grp"),
select = c("Sepal.Length", "Sepal.Width")
)
# verified against