r-lib · MichaelChirico · May 7, 2025 · May 7, 2025 · May 7, 2025 · May 7, 2025
diff --git a/NEWS.md b/NEWS.md
@@ -39,6 +39,7 @@
 * `boolean_arithmetic_linter()` finds many more cases like `sum(x | y) == 0` where the total of a known-logical vector is compared to 0 (#1580, @MichaelChirico).
 * `expect_lint()` has a new argument `ignore_order` (default `FALSE`), which, if `TRUE`, allows the `checks=` to be provided in arbitary order vs. how `lint()` produces them (@MichaelChirico).
 * `undesirable_function_linter()` accepts unnamed entries, treating them as undesirable functions, e.g. `undesirable_function_linter("sum")` (#2536, @MichaelChirico).
+* `fixed_regex_linter()` recognizes usage of the new (R 4.5.0) `grepv()` wrapper of `grep()`; `regex_subset_linter()` also recommends `grepv()` alternatives (#2855, @MichaelChirico).
 
 ### New linters
 

diff --git a/R/fixed_regex_linter.R b/R/fixed_regex_linter.R
@@ -77,7 +77,7 @@
 fixed_regex_linter <- function(allow_unescaped = FALSE) {
   # regular expression pattern is the first argument
   pos_1_regex_funs <- c(
-    "grep", "gsub", "sub", "regexec", "grepl", "regexpr", "gregexpr"
+    "grep", "grepv", "gsub", "sub", "regexec", "grepl", "regexpr", "gregexpr"
   )
 
   # regular expression pattern is the second argument

diff --git a/R/regex_subset_linter.R b/R/regex_subset_linter.R
@@ -1,8 +1,8 @@
 #' Require usage of direct methods for subsetting strings via regex
 #'
-#' Using `value = TRUE` in [grep()] returns the subset of the input that matches
-#'   the pattern, e.g. `grep("[a-m]", letters, value = TRUE)` will return the
-#'   first 13 elements (`a` through `m`).
+# TODO(R>=4.5.0): Just use [grepv()] directly. Need this while ?grepv doesn't exist.
+#' Using [`grepv()`][grep] returns the subset of the input that matches the pattern,
+#'   e.g. `grepv("[a-m]", letters)` will return the first 13 elements (`a` through `m`).
 #'
 #' `letters[grep("[a-m]", letters)]` and `letters[grepl("[a-m]", letters)]`
 #'   both return the same thing, but more circuitously and more verbosely.
@@ -12,12 +12,12 @@
 #'   `str_detect()` and `str_which()`.
 #'
 #' @section Exceptions:
-#'   Note that `x[grep(pattern, x)]` and `grep(pattern, x, value = TRUE)`
-#'   are not _completely_ interchangeable when `x` is not character
-#'   (most commonly, when `x` is a factor), because the output of the
-#'   latter will be a character vector while the former remains a factor.
-#'   It still may be preferable to refactor such code, as it may be faster
-#'   to match the pattern on `levels(x)` and use that to subset instead.
+#'   Note that `x[grep(pattern, x)]` and `grepv(pattern, x)` are not
+#'    _completely_ interchangeable when `x` is not character (most commonly,
+#'   when `x` is a factor), because the output of the latter will be a
+#'   character vector while the former remains a factor. It still may be
+#'   preferable to refactor such code, as it may be faster to match the
+#'   pattern on `levels(x)` and use that to subset instead.
 #'
 #' @evalRd rd_tags("regex_subset_linter")
 #'
@@ -35,7 +35,7 @@
 #'
 #' # okay
 #' lint(
-#'   text = "grep(pattern, x, value = TRUE)",
+#'   text = "grepv(pattern, x)",
 #'   linters = regex_subset_linter()
 #' )
 #'
@@ -72,7 +72,7 @@ regex_subset_linter <- function() {
       grep_expr,
       source_expression = source_expression,
       lint_message =
-        "Prefer grep(pattern, x, ..., value = TRUE) over x[grep(pattern, x, ...)] and x[grepl(pattern, x, ...)].",
+        "Prefer grepv(pattern, x, ...) over x[grep(pattern, x, ...)] and x[grepl(pattern, x, ...)].",
       type = "warning"
     )
 

diff --git a/man/regex_subset_linter.Rd b/man/regex_subset_linter.Rd
diff --git a/tests/testthat/test-fixed_regex_linter.R b/tests/testthat/test-fixed_regex_linter.R
@@ -1,30 +1,31 @@
 test_that("fixed_regex_linter skips allowed usages", {
   linter <- fixed_regex_linter()
 
-  expect_lint("gsub('^x', '', y)", NULL, linter)
-  expect_lint("grep('x$', '', y)", NULL, linter)
-  expect_lint("sub('[a-zA-Z]', '', y)", NULL, linter)
-  expect_lint("grepl(fmt, y)", NULL, linter)
-  expect_lint(R"{regexec('\\s', '', y)}", NULL, linter)
-  expect_lint("grep('a(?=b)', x, perl = TRUE)", NULL, linter)
-  expect_lint("grep('0+1', x, perl = TRUE)", NULL, linter)
-  expect_lint("grep('1*2', x)", NULL, linter)
-  expect_lint("grep('a|b', x)", NULL, linter)
-  expect_lint(R"{grep('\\[|\\]', x)}", NULL, linter)
+  expect_no_lint("gsub('^x', '', y)", linter)
+  expect_no_lint("grep('x$', y)", linter)
+  expect_no_lint("grepv('x$', y)", linter)
+  expect_no_lint("sub('[a-zA-Z]', '', y)", linter)
+  expect_no_lint("grepl(fmt, y)", linter)
+  expect_no_lint(R"{regexec('\\s', '', y)}", linter)
+  expect_no_lint("grep('a(?=b)', x, perl = TRUE)", linter)
+  expect_no_lint("grep('0+1', x, perl = TRUE)", linter)
+  expect_no_lint("grep('1*2', x)", linter)
+  expect_no_lint("grep('a|b', x)", linter)
+  expect_no_lint(R"{grep('\\[|\\]', x)}", linter)
 
   # if fixed=TRUE is already set, regex patterns don't matter
-  expect_lint(R"{gsub('\\.', '', y, fixed = TRUE)}", NULL, linter)
+  expect_no_lint(R"{gsub('\\.', '', y, fixed = TRUE)}", linter)
 
   # ignore.case=TRUE implies regex interpretation
-  expect_lint("gsub('abcdefg', '', y, ignore.case = TRUE)", NULL, linter)
+  expect_no_lint("gsub('abcdefg', '', y, ignore.case = TRUE)", linter)
 
   # char classes starting with [] might contain other characters -> not fixed
-  expect_lint("sub('[][]', '', y)", NULL, linter)
-  expect_lint("sub('[][ ]', '', y)", NULL, linter)
-  expect_lint("sub('[],[]', '', y)", NULL, linter)
+  expect_no_lint("sub('[][]', '', y)", linter)
+  expect_no_lint("sub('[][ ]', '', y)", linter)
+  expect_no_lint("sub('[],[]', '', y)", linter)
 
   # wrapper functions don't throw
-  expect_lint("gregexpr(pattern = pattern, data, perl = TRUE, ...)", NULL, linter)
+  expect_no_lint("gregexpr(pattern = pattern, data, perl = TRUE, ...)", linter)
 })
 
 test_that("fixed_regex_linter blocks simple disallowed usages", {
@@ -36,6 +37,7 @@ test_that("fixed_regex_linter blocks simple disallowed usages", {
   expect_lint("gregexpr('a-z', y)", lint_msg, linter)
   expect_lint(R"{regexec('\\$', x)}", lint_msg, linter)
   expect_lint("grep('\n', x)", lint_msg, linter)
+  expect_lint("grepv('\n', x)", lint_msg, linter)
 
   # naming the argument doesn't matter (if it's still used positionally)
   expect_lint("gregexpr(pattern = 'a-z', y)", lint_msg, linter)
@@ -77,19 +79,19 @@ test_that("fixed_regex_linter catches regex like [.] or [$]", {
 test_that("fixed_regex_linter catches null calls to strsplit as well", {
   linter <- fixed_regex_linter()
 
-  expect_lint("strsplit(x, '^x')", NULL, linter)
-  expect_lint(R"{strsplit(x, '\\s')}", NULL, linter)
-  expect_lint("strsplit(x, 'a(?=b)', perl = TRUE)", NULL, linter)
-  expect_lint("strsplit(x, '0+1', perl = TRUE)", NULL, linter)
-  expect_lint("strsplit(x, 'a|b')", NULL, linter)
+  expect_no_lint("strsplit(x, '^x')", linter)
+  expect_no_lint(R"{strsplit(x, '\\s')}", linter)
+  expect_no_lint("strsplit(x, 'a(?=b)', perl = TRUE)", linter)
+  expect_no_lint("strsplit(x, '0+1', perl = TRUE)", linter)
+  expect_no_lint("strsplit(x, 'a|b')", linter)
 
-  expect_lint("tstrsplit(x, '1*2')", NULL, linter)
-  expect_lint("tstrsplit(x, '[a-zA-Z]')", NULL, linter)
-  expect_lint("tstrsplit(x, fmt)", NULL, linter)
+  expect_no_lint("tstrsplit(x, '1*2')", linter)
+  expect_no_lint("tstrsplit(x, '[a-zA-Z]')", linter)
+  expect_no_lint("tstrsplit(x, fmt)", linter)
 
   # if fixed=TRUE is already set, regex patterns don't matter
-  expect_lint(R"{strsplit(x, '\\.', fixed = TRUE)}", NULL, linter)
-  expect_lint(R"{strsplit(x, '\\.', fixed = T)}", NULL, linter)
+  expect_no_lint(R"{strsplit(x, '\\.', fixed = TRUE)}", linter)
+  expect_no_lint(R"{strsplit(x, '\\.', fixed = T)}", linter)
 })
 
 test_that("fixed_regex_linter catches calls to strsplit as well", {
@@ -106,26 +108,26 @@ test_that("fixed_regex_linter is more exact about distinguishing \\s from \\:",
   linter <- fixed_regex_linter()
   lint_msg <- rex::rex("This regular expression is static")
 
-  expect_lint(R"{grep('\\s', '', x)}", NULL, linter)
+  expect_no_lint(R"{grep('\\s', '', x)}", linter)
   expect_lint(R"{grep('\\:', '', x)}", lint_msg, linter)
 })
 
 ## tests for stringr functions
 test_that("fixed_regex_linter skips allowed stringr usages", {
   linter <- fixed_regex_linter()
 
-  expect_lint("str_replace(y, '[a-zA-Z]', '')", NULL, linter)
-  expect_lint("str_replace_all(y, '^x', '')", NULL, linter)
-  expect_lint("str_detect(y, fmt)", NULL, linter)
-  expect_lint(R"{str_extract(y, '\\s')}", NULL, linter)
-  expect_lint(R"{str_extract_all(y, '\\s')}", NULL, linter)
-  expect_lint("str_which(x, '1*2')", NULL, linter)
+  expect_no_lint("str_replace(y, '[a-zA-Z]', '')", linter)
+  expect_no_lint("str_replace_all(y, '^x', '')", linter)
+  expect_no_lint("str_detect(y, fmt)", linter)
+  expect_no_lint(R"{str_extract(y, '\\s')}", linter)
+  expect_no_lint(R"{str_extract_all(y, '\\s')}", linter)
+  expect_no_lint("str_which(x, '1*2')", linter)
 
   # if fixed() is already set, regex patterns don't matter
-  expect_lint(R"{str_replace(y, fixed('\\.'), '')}", NULL, linter)
+  expect_no_lint(R"{str_replace(y, fixed('\\.'), '')}", linter)
 
   # namespace qualification doesn't matter
-  expect_lint("stringr::str_replace(y, stringr::fixed('abcdefg'), '')", NULL, linter)
+  expect_no_lint("stringr::str_replace(y, stringr::fixed('abcdefg'), '')", linter)
 })
 
 test_that("fixed_regex_linter blocks simple disallowed usages of stringr functions", {
@@ -148,11 +150,11 @@ test_that("fixed_regex_linter catches calls to str_split as well", {
   linter <- fixed_regex_linter()
   lint_msg <- rex::rex("This regular expression is static")
 
-  expect_lint("str_split(x, '^x')", NULL, linter)
-  expect_lint("str_split(x, fmt)", NULL, linter)
+  expect_no_lint("str_split(x, '^x')", linter)
+  expect_no_lint("str_split(x, fmt)", linter)
 
   # if fixed() is already set, regex patterns don't matter
-  expect_lint(R"{str_split(x, fixed('\\.'))}", NULL, linter)
+  expect_no_lint(R"{str_split(x, fixed('\\.'))}", linter)
   expect_lint(R"{str_split(x, '\\.')}", lint_msg, linter)
   expect_lint("str_split(x, '[.]')", lint_msg, linter)
 })
@@ -163,8 +165,8 @@ test_that("str_replace_all's multi-replacement version is handled", {
   # While each of the replacements is fixed, and this _could_ in principle be replaced by
   #   a pipeline where each step does one of the replacements and fixed() is used, this is overkill.
   #   Instead, ensure that no lint is returned for this case
-  expect_lint('str_replace_all(x, c("one" = "1", "two" = "2", "three" = "3"))', NULL, linter)
-  expect_lint('grepl(c("a" = "b"), x)', NULL, linter)
+  expect_no_lint('str_replace_all(x, c("one" = "1", "two" = "2", "three" = "3"))', linter)
+  expect_no_lint('grepl(c("a" = "b"), x)', linter)
 })
 
 test_that("1- or 2-width octal escape sequences are handled", {
@@ -209,20 +211,20 @@ test_that("bracketed unicode escapes are caught", {
 
 test_that("escaped characters are handled correctly", {
   linter <- fixed_regex_linter()
-  expect_lint(R"{gsub('\n+', '', sql)}", NULL, linter)
-  expect_lint('gsub("\\n{2,}", "\n", D)', NULL, linter)
-  expect_lint(R'{gsub("[\r\n]", "", x)}', NULL, linter)
-  expect_lint(R'{gsub("\n $", "", y)}', NULL, linter)
-  expect_lint(R'{gsub("```\n*```r*\n*", "", x)}', NULL, linter)
-  expect_lint('strsplit(x, "(;|\n)")', NULL, linter)
-  expect_lint(R'{strsplit(x, "(;|\n)")}', NULL, linter)
-  expect_lint(R'{grepl("[\\W]", x, perl = TRUE)}', NULL, linter)
-  expect_lint(R'{grepl("[\\W]", x)}', NULL, linter)
+  expect_no_lint(R"{gsub('\n+', '', sql)}", linter)
+  expect_no_lint('gsub("\\n{2,}", "\n", D)', linter)
+  expect_no_lint(R'{gsub("[\r\n]", "", x)}', linter)
+  expect_no_lint(R'{gsub("\n $", "", y)}', linter)
+  expect_no_lint(R'{gsub("```\n*```r*\n*", "", x)}', linter)
+  expect_no_lint('strsplit(x, "(;|\n)")', linter)
+  expect_no_lint(R'{strsplit(x, "(;|\n)")}', linter)
+  expect_no_lint(R'{grepl("[\\W]", x, perl = TRUE)}', linter)
+  expect_no_lint(R'{grepl("[\\W]", x)}', linter)
 })
 
 # make sure the logic is properly vectorized
 test_that("single expression with multiple regexes is OK", {
-  expect_lint('c(grep("^a", x), grep("b$", x))', NULL, fixed_regex_linter())
+  expect_no_lint('c(grep("^a", x), grep("b$", x))', fixed_regex_linter())
 })
 
 test_that("fixed replacements vectorize and recognize str_detect", {
@@ -344,8 +346,8 @@ local({
 test_that("'unescaped' regex can optionally be skipped", {
   linter <- fixed_regex_linter(allow_unescaped = TRUE)
 
-  expect_lint("grepl('a', x)", NULL, linter)
-  expect_lint("str_detect(x, 'a')", NULL, linter)
+  expect_no_lint("grepl('a', x)", linter)
+  expect_no_lint("str_detect(x, 'a')", linter)
   expect_lint("grepl('[$]', x)", rex::rex('Use "$" with fixed = TRUE'), linter)
 })
 
@@ -358,18 +360,18 @@ local({
       lint_msg <- "This regular expression is static"
 
       expect_lint(paste("x", pipe, "grepl(pattern = 'a')"), lint_msg, linter)
-      expect_lint(paste("x", pipe, "grepl(pattern = '^a')"), NULL, linter)
-      expect_lint(paste("x", pipe, "grepl(pattern = 'a', fixed = TRUE)"), NULL, linter)
+      expect_no_lint(paste("x", pipe, "grepl(pattern = '^a')"), linter)
+      expect_no_lint(paste("x", pipe, "grepl(pattern = 'a', fixed = TRUE)"), linter)
       expect_lint(paste("x", pipe, "str_detect('a')"), lint_msg, linter)
-      expect_lint(paste("x", pipe, "str_detect('^a')"), NULL, linter)
-      expect_lint(paste("x", pipe, "str_detect(fixed('a'))"), NULL, linter)
+      expect_no_lint(paste("x", pipe, "str_detect('^a')"), linter)
+      expect_no_lint(paste("x", pipe, "str_detect(fixed('a'))"), linter)
 
       expect_lint(paste("x", pipe, "gsub(pattern = 'a', replacement = '')"), lint_msg, linter)
-      expect_lint(paste("x", pipe, "gsub(pattern = '^a', replacement = '')"), NULL, linter)
-      expect_lint(paste("x", pipe, "gsub(pattern = 'a', replacement = '', fixed = TRUE)"), NULL, linter)
+      expect_no_lint(paste("x", pipe, "gsub(pattern = '^a', replacement = '')"), linter)
+      expect_no_lint(paste("x", pipe, "gsub(pattern = 'a', replacement = '', fixed = TRUE)"), linter)
       expect_lint(paste("x", pipe, "str_replace('a', '')"), lint_msg, linter)
-      expect_lint(paste("x", pipe, "str_replace('^a', '')"), NULL, linter)
-      expect_lint(paste("x", pipe, "str_replace(fixed('a'), '')"), NULL, linter)
+      expect_no_lint(paste("x", pipe, "str_replace('^a', '')"), linter)
+      expect_no_lint(paste("x", pipe, "str_replace(fixed('a'), '')"), linter)
     },
     pipe = pipes,
     .test_name = names(pipes)

diff --git a/tests/testthat/test-regex_subset_linter.R b/tests/testthat/test-regex_subset_linter.R
@@ -5,7 +5,7 @@ test_that("regex_subset_linter skips allowed usages", {
 
 test_that("regex_subset_linter blocks simple disallowed usages", {
   linter <- regex_subset_linter()
-  lint_msg <- rex::rex("Prefer grep(pattern, x, ..., value = TRUE)")
+  lint_msg <- rex::rex("Prefer grepv(pattern, x, ...)")
 
   expect_lint("x[grep(ptn, x)]", lint_msg, linter)
   expect_lint("names(y)[grepl(ptn, names(y), perl = TRUE)]", lint_msg, linter)