Skip to content

Commit

Permalink
Check for misspelled colnames in report_sample() (#434)
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke authored May 31, 2024
1 parent bf636ee commit f9cc36b
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 1 deletion.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: report
Type: Package
Title: Automated Reporting of Results and Statistical Models
Version: 0.5.8.3
Version: 0.5.8.4
Authors@R:
c(person(given = "Dominique",
family = "Makowski",
Expand Down Expand Up @@ -148,6 +148,7 @@ Collate:
'report_table.R'
'utils_error_message.R'
'utils_grouped_df.R'
'utils_misspelled_variables.R'
'zzz.R'
Roxygen: list(markdown = TRUE)
Remotes: easystats/insight, easystats/datawizard, easystats/parameters, easystats/performance, easystats/modelbased
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Minor changes

* `report` now supports reporting of Bayesian model comparison with variables of class `brms::loo_compare`.
* `report` now supports reporting of BayesFactor objects with variables of class `BFBayesFactor`.
* `report_sample()` now suggests valid column names for misspelled columns in the `select`, `by`, `weights` and `exclude` arguments.

# report 0.5.8

Expand Down
6 changes: 6 additions & 0 deletions R/report_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ report_sample <- function(data,
select <- colnames(data)[select]
}

# sanity check for existing columns
.check_spelling(data, select)
.check_spelling(data, exclude)
.check_spelling(data, by)
.check_spelling(data, weights)

# variables to keep
if (!is.null(weights)) {
select <- unique(c(select, weights))
Expand Down
75 changes: 75 additions & 0 deletions R/utils_misspelled_variables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# call this function to check arguments. "select" is the argument where user
# specified column names. "arg_name" is the name of that argument, can be NULL
.check_spelling <- function(data, select) {
wrong_arg <- paste0("specified in `", deparse(substitute(select)), "` ")
if (!is.null(select) && isTRUE(nzchar(select)) && !all(select %in% colnames(data))) {
not_found <- select[!select %in% colnames(data)]
insight::format_error(
paste0(
sprintf("The following column(s) %sdon't exist in the dataset: ", wrong_arg),
datawizard::text_concatenate(not_found), "."
),
.misspelled_string(colnames(data), not_found, "Possibly misspelled?")
)
}
}


#' Fuzzy grep, matches pattern that are close, but not identical
#' @examples
#' colnames(iris)
#' p <- sprintf("(%s){~%i}", "Spela", 2)
#' grep(pattern = p, x = colnames(iris), ignore.case = FALSE)
#' @keywords internal
#' @noRd
.fuzzy_grep <- function(x, pattern, precision = NULL) {
if (is.null(precision)) {
precision <- round(nchar(pattern) / 3)
}
if (precision > nchar(pattern)) {
return(NULL)
}
p <- sprintf("(%s){~%i}", pattern, precision)
grep(pattern = p, x = x, ignore.case = FALSE)
}


#' create a message string to tell user about matches that could possibly
#' be the string they were looking for
#'
#' @keywords internal
#' @noRd
.misspelled_string <- function(source, searchterm, default_message = NULL) {
if (is.null(searchterm) || length(searchterm) < 1) {
return(default_message)
}
# used for many matches
more_found <- ""
# init default
msg <- ""
# guess the misspelled string
possible_strings <- unlist(lapply(searchterm, function(s) {
source[.fuzzy_grep(source, s)] # nolint
}), use.names = FALSE)
if (length(possible_strings)) {
msg <- "Did you mean "
if (length(possible_strings) > 1) {
# make sure we don't print dozens of alternatives for larger data frames
if (length(possible_strings) > 5) {
more_found <- sprintf(
" We even found %i more possible matches, not shown here.",
length(possible_strings) - 5
)
possible_strings <- possible_strings[1:5]
}
msg <- paste0(msg, "one of ", datawizard::text_concatenate(possible_strings, enclose = "\"", last = " or "))
} else {
msg <- paste0(msg, "\"", possible_strings, "\"")
}
msg <- paste0(msg, "?", more_found)
} else {
msg <- default_message
}
# no double white space
insight::trim_ws(msg)
}
2 changes: 2 additions & 0 deletions tests/testthat/test-report_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ test_that("report_sample check input", {
data(iris)
expect_error(report_sample(lm(Sepal.Length ~ Species, data = iris)))
expect_silent(report_sample(iris$Species))
expect_error(report_sample(iris, by = "Spedies"), regex = "The following column")
expect_error(report_sample(iris, select = "Spedies"), regex = "The following column")
})

test_that("report_sample default", {
Expand Down

0 comments on commit f9cc36b

Please sign in to comment.