From 73985f967306d420c5cb0228a0feceb41ae5792b Mon Sep 17 00:00:00 2001 From: TylerRinker Date: Mon, 11 Oct 2021 19:36:04 -0400 Subject: [PATCH] Add `, perl = TRUE` to `grepl()` calls per #51 --- R/check_text_logicals.R | 20 ++++++++++---------- R/drop_row.R | 2 +- R/has_endmark.R | 2 +- R/make_plural.R | 2 +- R/match_tokens.R | 2 +- R/replace_money.R | 4 ++-- R/replace_number.R | 4 ++-- R/replace_word_elongation.R | 2 +- R/utils.R | 2 +- man/replace_word_elongation.Rd | 2 +- 10 files changed, 21 insertions(+), 21 deletions(-) diff --git a/R/check_text_logicals.R b/R/check_text_logicals.R index 2ebc770..fd254ac 100644 --- a/R/check_text_logicals.R +++ b/R/check_text_logicals.R @@ -326,7 +326,7 @@ contraction <- function(x){ "([a-z]'(nt|t|ve|d|ll|m|re))|('(cause|tis|twas))|(\\b(he|how|it|", "let|she|that|there|what|when|where|who|why)'s)" ), - x, ignore.case = TRUE + x, ignore.case = TRUE, perl = TRUE ) } #contraction(c('jon\'s a good man', "'cause I want to", '4was\'nt', 'the dog', @@ -337,7 +337,7 @@ date <- qr2fun('rm_date') ## digits digit <- function(x) { - grepl('\\d', x) + grepl('\\d', x, perl = TRUE) } ## email addresses @@ -350,12 +350,12 @@ emoticon <- qr2fun('rm_emoticon') ## just white space empty <- function(x) { #any(grepl("^\\s*$", stats::na.omit(x))) - grepl("^\\s*$", x) + grepl("^\\s*$", x, perl = TRUE) } ## are there escaped backslashes escaped <- function(x) { - grepl("[\\\\]", x) & !grepl("\\\"|\\\'|\\\`", x) + grepl("[\\\\]", x) & !grepl("\\\"|\\\'|\\\`", x, perl = TRUE) } @@ -366,7 +366,7 @@ hash <- qr2fun('rm_hash') ## contains html html <- function(x) { pat <- paste0("<[^>]+>|", paste(html_symbols[['html']], collapse ="|")) - grepl(pat, x) + grepl(pat, x, perl = TRUE) } ## incomplete sentences usually indicated by 2-4 enmarks that are @@ -376,12 +376,12 @@ incomplete <- function(x) { "\\?*\\?[.]+|[.?!]*\\? [.][.?!]+|[.?!]*\\. [.?!]+|[.?!]+\\. [.?!]*|", "[.?!]+\\.[.?!]*|[.?!]*\\.[.?!]+" ) - grepl(pat, x) + grepl(pat, x, perl = TRUE) } ## contains kerning kern <- function(x) { - grepl('(([A-Z]\\s+){2,}[A-Z])', x) + grepl('(([A-Z]\\s+){2,}[A-Z])', x, perl = TRUE) } ## check if something is a list of vectors @@ -419,7 +419,7 @@ misspelled <- function(x){ ## Does it have no letters no_alpha <- function(x) { - !is.na(x) & !grepl("[a-zA-Z]", x) + !is.na(x) & !grepl("[a-zA-Z]", x, perl = TRUE) } @@ -430,14 +430,14 @@ no_endmark <- function(x) { ## is comma with no space no_space_after_comma <- function(x) { - grepl("(,)([^ ])", x) + grepl("(,)([^ ])", x, perl = TRUE) } ## are there any non ascii characters non_ascii <- function(x) { - grepl("[^ -~]", x) & !is.na(x) & !grepl("^\\s*$", x) + grepl("[^ -~]", x, perl = TRUE) & !is.na(x) & !grepl("^\\s*$", x, perl = TRUE) } ## not character diff --git a/R/drop_row.R b/R/drop_row.R index 382063e..a9ebeff 100644 --- a/R/drop_row.R +++ b/R/drop_row.R @@ -79,7 +79,7 @@ drop_empty_row <- function(dataframe) { x <- apply(dataframe, 1, function(x) { paste(stats::na.omit(x), collapse = "") }) - return(dataframe[!grepl("^\\s*$", x), ,drop = FALSE] ) + return(dataframe[!grepl("^\\s*$", x, perl = TRUE), ,drop = FALSE] ) } diff --git a/R/has_endmark.R b/R/has_endmark.R index 0189ce3..b638b02 100644 --- a/R/has_endmark.R +++ b/R/has_endmark.R @@ -21,7 +21,7 @@ has_endmark <- function(x, endmarks = c('?', '.', '!'), ...){ !is.na(x) & grepl( sprintf('[%s]\\s*$', paste(endmarks, collapse = "")), - x, + x, perl = TRUE, ... ) } diff --git a/R/make_plural.R b/R/make_plural.R index 76e7cad..76653a3 100644 --- a/R/make_plural.R +++ b/R/make_plural.R @@ -22,7 +22,7 @@ make_plural <- function (x, keep.original = FALSE, hits <- match(tolower(x), tolower(irregular[[1]])) ends <- "(sh?|x|z|ch)$" - pluralify <- ifelse(grepl(ends, x), "es", "s") + pluralify <- ifelse(grepl(ends, x, perl = TRUE), "es", "s") out <- gsub("ys$", "ies", paste0(x, pluralify)) out[which(!is.na(hits))] <- irregular[[2]][hits[which(!is.na(hits))]] diff --git a/R/match_tokens.R b/R/match_tokens.R index 06b6802..1ed2cab 100644 --- a/R/match_tokens.R +++ b/R/match_tokens.R @@ -23,7 +23,7 @@ match_tokens <- function(x, pattern, ignore.case = TRUE, ...){ y <- rm_na(unique(unlist(textshape::split_token(x, lower = ignore.case)))) if (isTRUE(ignore.case)) pattern <- tolower(pattern) - y[grepl(paste(paste0('(', pattern, ')'), collapse = '|'), y)] + y[grepl(paste(paste0('(', pattern, ')'), collapse = '|'), y, perl = TRUE)] } diff --git a/R/replace_money.R b/R/replace_money.R index 9e1590b..3f0919d 100644 --- a/R/replace_money.R +++ b/R/replace_money.R @@ -37,8 +37,8 @@ replace_money <- function(x, pattern = '(-?)([$])([0-9,]+)(\\.\\d{2})?', replace_money_fun <- function(x, decimal = ' and '){ - sign <- ifelse(grepl('^-', x), 'negative ', '') - if (grepl('\\.', x)) { + sign <- ifelse(grepl('^-', x, perl = TRUE), 'negative ', '') + if (grepl('\\.', x, perl = TRUE)) { number <- replace_number( gsub( '\\.', diff --git a/R/replace_number.R b/R/replace_number.R index c91cfa9..6f43cf1 100644 --- a/R/replace_number.R +++ b/R/replace_number.R @@ -110,8 +110,8 @@ replace_number <- function(x, num.paste = FALSE, remove = FALSE, ...) { num_df[['den']][is.na(num_df[['den']])] <- "" num_df[['int']] <- eng(num_df[['integer']], ...) - is_decimal <- grepl("\\.", num_df[[1]]) - not_integer_decimal <- !grepl('\\d\\.', num_df[[1]]) + is_decimal <- grepl("\\.", num_df[[1]], perl = TRUE) + not_integer_decimal <- !grepl('\\d\\.', num_df[[1]], perl = TRUE) num_df[['int']][is_decimal & not_integer_decimal] <- "" diff --git a/R/replace_word_elongation.R b/R/replace_word_elongation.R index 967b4cf..5fb4028 100644 --- a/R/replace_word_elongation.R +++ b/R/replace_word_elongation.R @@ -28,7 +28,7 @@ #' 2011 Conference on Empirical Methods in Natural Language Processing (pp. #' 562-570). Edinburgh, Scotland. Retrieved from #' http://www.aclweb.org/anthology/D11-1052 \cr \cr -#' \url{http://storage.googleapis.com/books/ngrams/books/datasetsv2.html} \cr \cr +#' \url{https://storage.googleapis.com/books/ngrams/books/datasetsv2.html} \cr \cr #' \url{https://www.theatlantic.com/magazine/archive/2013/03/dragging-it-out/309220} \cr \cr #' \url{https://english.stackexchange.com/questions/189517/is-there-a-name-term-for-multiplied-vowels} #' @export diff --git a/R/utils.R b/R/utils.R index d6888bf..46bdab2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -236,7 +236,7 @@ drop_sci_note <- function(x, ...){ x <- as.character(as.numeric(x)) - locs <- grepl('e\\+', x) + locs <- grepl('e\\+', x, perl = TRUE) x[locs] <- unlist(Map(function(b, e) { diff --git a/man/replace_word_elongation.Rd b/man/replace_word_elongation.Rd index 76074a1..7d52e81 100644 --- a/man/replace_word_elongation.Rd +++ b/man/replace_word_elongation.Rd @@ -49,7 +49,7 @@ characteristics of small business information systems. Proceedings of the 2011 Conference on Empirical Methods in Natural Language Processing (pp. 562-570). Edinburgh, Scotland. Retrieved from http://www.aclweb.org/anthology/D11-1052 \cr \cr -\url{http://storage.googleapis.com/books/ngrams/books/datasetsv2.html} \cr \cr +\url{https://storage.googleapis.com/books/ngrams/books/datasetsv2.html} \cr \cr \url{https://www.theatlantic.com/magazine/archive/2013/03/dragging-it-out/309220} \cr \cr \url{https://english.stackexchange.com/questions/189517/is-there-a-name-term-for-multiplied-vowels} }