-
Notifications
You must be signed in to change notification settings - Fork 0
/
.Rhistory
512 lines (512 loc) · 18.8 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
s_cut <- string_cut
#' @rdname string_trim
string_trim <- function(s, which = c('both', 'left', 'right'), whitespace = '[ \t\r\n]') {
# Make sure arguments match "which".
which <- match.arg(which)
# Define functions for easier referencing.
## Remove specific patterns
sub_out <- function(x, r) gsub(r, '', x, perl = TRUE)
## vectorize switch so that we can apply the substitutions in a vectorized manner.
switch_out <- function(x, ...) sapply(x, switch, ...)
# Conditions for each "which" input.
cond_beg <- '^' %&% whitespace %&% '+'
cond_end <- whitespace %&% '+$'
cond_both <- "^" %&% whitespace %&% "*|" %&% whitespace %&% "*+$"
# Output should be based on user input for "which".
output <- switch_out(which,
left = sub_out(s, cond_beg),
right = sub_out(s, cond_end),
both = sub_out(s, cond_both))
# Initial output is a matrix, so flatten it to a vector so that
# users can use string_trim() on dataframe columns, for example.
output <- as.vector(output)
output
}
s_trim <- string_trim
trim <- string_trim
string_cull(rownames(mtcars), '^M|a')
#' Spot patterns in strings
#'
#' @description Spot patterns in strings.
#'
#' @usage string_spot(s, pattern, value = TRUE, ...)
#' string_spoti(s, pattern, value = FALSE, ...)
#' string_spotl(s, pattern, ...)
#' string_spotm(s, pattern, invert = FALSE, ...)
#' string_replace(s, search, replace, ...)
#' string_remove(s, remove, ...)
#' string_countm(s, pattern, ...)
#'
#' @param s A string (character) vector.
#' @param pattern A regular expression pattern.
#' @param value Boolean value (\code{TRUE/FALSE}) for whether to return the actual values (the former) or the indices (the latter).
#' @param invert Boolean value (\code{TRUE/FALSE}) for whether to find actual matches (\code{FALSE}; default) or non-matches (\code{TRUE}).
#' @param search A pattern to search.
#' @param replace The string to replace the searched pattern.
#' @param remove The string to remove.
#' @param ... Parameters to pass to \code{grep()}, \code{grepl()}, \code{gsub()}, or \code{gregexpr()}.
#'
#' @return Character vector for all functions except the \code{string_spotl()}, \code{string_spoti()}, and \code{string_countm()} functions, which produce Boolean, numeric, and numeric vectors respectively.
#'
#' @details The function \code{string_spot()} subsets a vector to the values matching a given pattern. Optional inputs get passed to \code{grep()}. Synonyms are \code{s_spot()} and \code{spot()}.
#'
#' The function \code{string_spoti()} subsets a vector to the indices matching a given pattern. Optional inputs get passed to \code{grep()}. Synonyms are \code{s_spoti()} and \code{spoti()}.
#'
#' The function \code{string_spotl()} detects whether a pattern exists in a vector, outputting a Boolean value (\code{TRUE/FALSE}). Optional inputs get passed to \code{grepl()}. Synonyms are \code{s_spotl()} and \code{spotl()}.
#'
#' The \code{string_spotm()} function spots pattern matches and returns \code{NA} if none are found. Optional inputs get passed to \code{grepl()}. Synonyms are \code{s_spotm()} and \code{spotm()}.
#'
#' The \code{string_replace()} function acts the same as \code{gsub()} with the inputs ordered differently. Optional inputs get passed to \code{gsub()}. Synonyms are \code{s_replace()}, \code{search_replace()}, \code{sr()}, \code{find_replace()}, and \code{fr()}.
#'
#' The \code{string_remove()} function blanks out a matching pattern. Optional inputs get passed to \code{gsub()}. Synonym is \code{s_remove()}.
#'
#' The \code{string_countm()} counts the number of matches in a string. Optional inputs get passed to \code{gregexpr()}. Synonyms are \code{s_countm()} and \code{countm()}.
#'
#' @examples
#' rn <- rownames(mtcars)
#'
#' string_spot(rn, "^M")
#' string_spot(rn, '^M', invert = TRUE)
#' string_spoti(rn, "^M")
#' string_spotl(rn, "^M")
#' string_spotm(rn, "^M")
#'
#' string_replace(rn, "^M", "Z")
#' string_remove(rn, "^M")
#' string_countm(rn, 'a')
#'
#'
#' @seealso \url{https://github.com/robertschnitman/stringops}
#' @rdname string_spots
#' @rdname string_spot
string_spot <- function(s, pattern, value = TRUE, ...) {
# String vector length must be greater than 0.
stopifnot(length(s) > 0)
output <- grep(pattern, s, value = value, ...)
output
}
s_spot <- string_spot
spot <- string_spot
#' @rdname string_spoti
string_spoti <- function(s, pattern, value = FALSE, ...) string_spot(s, pattern, value = value, ...)
s_spoti <- string_spoti
spoti <- string_spoti
#' @rdname string_spotl
string_spotl <- function(s, pattern, ...) {
output <- grepl(pattern, s, ...)
output
}
s_spotl <- string_spotl
spotl <- string_spotl
#' @rdname string_spotm
string_spotm <- function(s, pattern, invert = FALSE, ...) {
if (invert == FALSE) {
output <- ifelse(grepl(pattern, s), s, NA_character_)
} else {
output <- ifelse(!grepl(pattern, s), s, NA_character_)
}
output
}
s_spotm <- string_spotm
spotm <- string_spotm
#' @rdname string_replace
string_replace <- function(s, search, replace, ...) gsub(search, replace, s, ...)
s_replace <- string_replace
find_replace <- string_replace
fr <- string_replace
search_replace <- string_replace
sr <- string_replace
#' @rdname string_remove
string_remove <- function(s, remove, ...) gsub(remove, "", s, ...)
s_remove <- string_remove
#' @rdname string_count
string_countm <- function(s, pattern, ...) {
# https://stringr.tidyverse.org/articles/from-base.html#overall-differences-1
locations <- gregexpr(pattern = pattern, text = s, ...)
matches <- lapply(locations, function(x) attr(x, "match.length"))
# -1 for no matches
output <- sapply(matches, function(x) sum(ifelse(x == -1, 0, 1)))
output
}
s_countm <- string_countm
countm <- string_countm
string_cull(rownames(mtcars), '^M|a')
#' Concatenate strings together
#'
#' @description Concatenate strings together.
#'
#' @usage %&%
#' string_concat(a, b)
#' string_prefix(s, prefix)
#' string_suffix(s, suffix)
#' string_flag(s, flag, width, which = c('left', 'right', 'both'))
#' string_flagv(s, ...)
#'
#' @param s A string (character) vector.
#' @param a A string (character) vector.
#' @param b A string (character) vector.
#' @param collapse A string by which to separate the elements of a vector.
#' @param flag Character or number to append to a string.
#' @param width Number of characters a string should be.
#' @param which Side to flag a string.
#' @param ... Parameters passed to \code{string_flag()}.
#'
#' @return Character vector.
#'
#' @details The \code{\%&\%} operator acts similar to BASIC's \code{&}, concatenating two elements together.
#'
#' \code{string_prefix()} and its synonyms prefix a string to a vector, while \code{string_suffix()} and its synonyms suffix a string to a vector.
#'
#' \code{string_flag()} is a scalar function that appends a character or number to a string if it does not meet a specified width.
#'
#' \code{string_flagv()} is a vectorized version of \code{string_flag()}.
#'
#' The synonym pattern of these functions are \code{s_*()} and \code{*()} (replace asterisks with prefix, suffix, and flag(v)).
#'
#' @examples
#' "a" %&% "b"
#' string_prefix(rownames(mtcars), "A")
#' string_suffix(rownames(mtcars), "Z")
#' string_flag('123456789', '0', 10)
#'
#' @seealso \url{https://github.com/robertschnitman/stringops}
#' @rdname string_concat
string_concat <- function(a, b) paste0(a, b)
#' @rdname string_concat2
`%&%` <- string_concat
#' @rdname string_prefix
string_prefix <- function(s, prefix) prefix %&% s
s_prefix <- string_prefix
prefix <- string_prefix
#' @rdname string_suffix
string_suffix <- function(s, suffix) s %&% suffix
s_suffix <- string_suffix
suffix <- string_suffix
#' @rdname string_flag
string_flag <- function(s, flag, width, which = c('left', 'right', 'both')) {
# string_flag() is a scalar function
# Check which input
which <- match.arg(which)
# Repeat flag a specified number of times.
diff <- width - nchar(s)
diff_adj <- ifelse(diff <= 0, 1, diff)
# rep() is a vector: we need a single string of the flags.
flags <- string_join(rep(flag, diff_adj))
# Remove flags if the length of the string is greater than the specified width.
flags <- ifelse(nchar(s) > width,
string_remove(flags, flags),
flags)
# Special case for "both".
if (which == 'both') {
flags <- list(l = string_join(rep(flag, diff_adj/2)),
r = string_join(rep(flag, diff_adj/2)))
}
# Output should be a string vector based on the "which" input.
output <- switch(which,
left = flags %&% s,
right = s %&% flags,
both = flags$l %&% s %&% flags$r)
output
}
#' @rdname string_flagv
string_flagv <- function(s, ...) {
output <- sapply(s, function(x) string_flag(x, ...))
names(output) <- NULL
output
}
s_flagv <- string_flagv
flagv <- string_flagv
string_cull(rownames(mtcars), '^M|a')
roxygen2::roxygenise()
roxygen2::roxygenise()
pkgdown::build_site()
roxygen2::roxygenise()
warnings()
warnings()
roxygen2::roxygenise()
pkgdown::build_site()
#' Spot patterns in strings
#'
#' @description Spot patterns in strings.
#'
#' @usage string_spot(s, pattern, value = TRUE, ...)
#' string_spoti(s, pattern, value = FALSE, ...)
#' string_spotl(s, pattern, ...)
#' string_spotm(s, pattern, invert = FALSE, ...)
#' string_replace(s, search, replace, ...)
#' string_remove(s, remove, ...)
#' string_countm(s, pattern, ...)
#'
#' @param s A string (character) vector.
#' @param pattern A regular expression pattern.
#' @param value Boolean value (\code{TRUE/FALSE}) for whether to return the actual values (the former) or the indices (the latter).
#' @param invert Boolean value (\code{TRUE/FALSE}) for whether to find actual matches (\code{FALSE}; default) or non-matches (\code{TRUE}).
#' @param search A pattern to search.
#' @param replace The string to replace the searched pattern.
#' @param remove The string to remove.
#' @param ... Parameters to pass to \code{grep()}, \code{grepl()}, \code{gsub()}, or \code{gregexpr()}.
#'
#' @return Character vector for all functions except the \code{string_spotl()}, \code{string_spoti()}, and \code{string_countm()} functions, which produce Boolean, numeric, and numeric vectors respectively.
#'
#' @details The function \code{string_spot()} subsets a vector to the values matching a given pattern. Optional inputs get passed to \code{grep()}. Synonyms are \code{s_spot()} and \code{spot()}.
#'
#' The function \code{string_spoti()} subsets a vector to the indices matching a given pattern. Optional inputs get passed to \code{grep()}. Synonyms are \code{s_spoti()} and \code{spoti()}.
#'
#' The function \code{string_spotl()} detects whether a pattern exists in a vector, outputting a Boolean value (\code{TRUE/FALSE}). Optional inputs get passed to \code{grepl()}. Synonyms are \code{s_spotl()} and \code{spotl()}.
#'
#' The \code{string_spotm()} function spots pattern matches and returns \code{NA} if none are found. Optional inputs get passed to \code{grepl()}. Synonyms are \code{s_spotm()} and \code{spotm()}.
#'
#' The \code{string_replace()} function acts the same as \code{gsub()} with the inputs ordered differently. Optional inputs get passed to \code{gsub()}. Synonyms are \code{s_replace()}, \code{search_replace()}, \code{sr()}, \code{find_replace()}, and \code{fr()}.
#'
#' The \code{string_remove()} function blanks out a matching pattern. Optional inputs get passed to \code{gsub()}. Synonym is \code{s_remove()}.
#'
#' The \code{string_countm()} counts the number of matches in a string. Optional inputs get passed to \code{gregexpr()}. Synonyms are \code{s_countm()} and \code{countm()}.
#'
#' @examples
#' rn <- rownames(mtcars)
#'
#' string_spot(rn, "^M")
#' string_spot(rn, '^M', invert = TRUE)
#' string_spoti(rn, "^M")
#' string_spotl(rn, "^M")
#' string_spotm(rn, "^M")
#'
#' string_replace(rn, "^M", "Z")
#' string_remove(rn, "^M")
#' string_countm(rn, 'a')
#'
#'
#' @seealso \url{https://github.com/robertschnitman/stringops}
#' @rdname string_spots
#' @rdname string_spot
string_spot <- function(s, pattern, value = TRUE, ...) {
# String vector length must be greater than 0.
stopifnot(length(s) > 0)
output <- grep(pattern, s, value = value, ...)
output
}
s_spot <- string_spot
spot <- string_spot
#' @rdname string_spoti
string_spoti <- function(s, pattern, value = FALSE, ...) string_spot(s, pattern, value = value, ...)
s_spoti <- string_spoti
spoti <- string_spoti
#' @rdname string_spotl
string_spotl <- function(s, pattern, ...) {
output <- grepl(pattern, s, ...)
output
}
s_spotl <- string_spotl
spotl <- string_spotl
#' @rdname string_spotm
string_spotm <- function(s, pattern, invert = FALSE, ...) {
if (invert == FALSE) {
output <- ifelse(grepl(pattern, s), s, NA_character_)
} else {
output <- ifelse(!grepl(pattern, s), s, NA_character_)
}
output
}
s_spotm <- string_spotm
spotm <- string_spotm
#' @rdname string_replace
string_replace <- function(s, search, replace, ...) gsub(search, replace, s, ...)
s_replace <- string_replace
find_replace <- string_replace
fr <- string_replace
search_replace <- string_replace
sr <- string_replace
#' @rdname string_remove
string_remove <- function(s, remove, ...) gsub(remove, "", s, ...)
s_remove <- string_remove
#' @rdname string_count
string_countm <- function(s, pattern, ...) {
# https://stringr.tidyverse.org/articles/from-base.html#overall-differences-1
locations <- gregexpr(pattern = pattern, text = s, ...)
matches <- lapply(locations, function(x) attr(x, "match.length"))
# -1 for no matches
output <- sapply(matches, function(x) sum(ifelse(x == -1, 0, 1)))
output
}
s_countm <- string_countm
countm <- string_countm
#' Join elements into a single string or vector.
#'
#' @description Join (collapse) elements into a single string or vector.
#'
#' @usage string_join(s, collapse)
#' string_flatten(l)
#'
#' @param s A string (character) vector.
#' @param l A list.
#' @param collapse A string by which to separate the elements of a vector.
#'
#' @return Character vector.
#'
#' @details
#' \code{string_join()} joins vector elements into a single string.
#'
#' \code{string_flatten()} joins list elements into a vector.
#'
#' The synonym pattern of these functions are \code{s_*()} and \code{*()} (replace asterisks with join and flatten).
#'
#' @examples
#' string_join(rownames(mtcars), ", ")
#' string_flatten(list('a', 'b', 'c'))
#' @seealso \url{https://github.com/robertschnitman/stringops}
#' @rdname string_join
string_join <- function(s, collapse = "") {
output <- paste0(s, collapse = collapse)
if (collapse != "") {
# Remove last separator.
output <- string_remove(output, collapse %&% "$")
}
output
}
s_join <- string_join
join <- string_join
#' @rdname string_flatten
string_flatten <- function(l) {
# Type-check
if (typeof(l) != 'list') {
stop('Input detected is of type ' %&% typeof(l) %&% " instead of list. Please use a list.")
}
# Output
output <- paste0(l, sep = "")
output
}
s_flatten <- string_flatten
flatten <- string_flatten
string_spotm(rownames(mtcars), '^M')
string_spotm(rownames(mtcars), '^M.*')
string_spotm(rownames(mtcars), '^Mg$')
string_spotm(rownames(mtcars), '^M g$')
string_spotm(rownames(mtcars), '^M|g$')
string_remove
#' @rdname string_cull
string_cull <- function(s, pattern, get_all = FALSE, collapse = ',', ...) {
# Find patterns
greg <- gregexpr(pattern, s, ...)
# Get matches
regm <- regmatches(s, greg)
# Flatten matched patterns
flat <- sapply(regm, string_join, collapse = collapse)
# Get only first match, or all?
if (get_all == FALSE) {
flat <- string_remove(flat, ",.*")
} else {
flat <- flat
}
# Output should be missing if no matches are found.
output <- ifelse(flat == "", NA_character_, flat)
output
}
string_cull(rownames(mtcars), '^M')
#' Concatenate strings together
#'
#' @description Concatenate strings together.
#'
#' @usage %&%
#' string_concat(a, b)
#' string_prefix(s, prefix)
#' string_suffix(s, suffix)
#' string_flag(s, flag, width, which = c('left', 'right', 'both'))
#' string_flagv(s, ...)
#'
#' @param s A string (character) vector.
#' @param a A string (character) vector.
#' @param b A string (character) vector.
#' @param collapse A string by which to separate the elements of a vector.
#' @param flag Character or number to append to a string.
#' @param width Number of characters a string should be.
#' @param which Side to flag a string.
#' @param ... Parameters passed to \code{string_flag()}.
#'
#' @return Character vector.
#'
#' @details The \code{\%&\%} operator acts similar to BASIC's \code{&}, concatenating two elements together.
#'
#' \code{string_prefix()} and its synonyms prefix a string to a vector, while \code{string_suffix()} and its synonyms suffix a string to a vector.
#'
#' \code{string_flag()} is a scalar function that appends a character or number to a string if it does not meet a specified width.
#'
#' \code{string_flagv()} is a vectorized version of \code{string_flag()}.
#'
#' The synonym pattern of these functions are \code{s_*()} and \code{*()} (replace asterisks with prefix, suffix, and flag(v)).
#'
#' @examples
#' "a" %&% "b"
#' string_prefix(rownames(mtcars), "A")
#' string_suffix(rownames(mtcars), "Z")
#' string_flag('123456789', '0', 10)
#'
#' @seealso \url{https://github.com/robertschnitman/stringops}
#' @rdname string_concat
string_concat <- function(a, b) paste0(a, b)
#' @rdname string_concat2
`%&%` <- string_concat
#' @rdname string_prefix
string_prefix <- function(s, prefix) prefix %&% s
s_prefix <- string_prefix
prefix <- string_prefix
#' @rdname string_suffix
string_suffix <- function(s, suffix) s %&% suffix
s_suffix <- string_suffix
suffix <- string_suffix
#' @rdname string_flag
string_flag <- function(s, flag, width, which = c('left', 'right', 'both')) {
# string_flag() is a scalar function
# Check which input
which <- match.arg(which)
# Repeat flag a specified number of times.
diff <- width - nchar(s)
diff_adj <- ifelse(diff <= 0, 1, diff)
# rep() is a vector: we need a single string of the flags.
flags <- string_join(rep(flag, diff_adj))
# Remove flags if the length of the string is greater than the specified width.
flags <- ifelse(nchar(s) > width,
string_remove(flags, flags),
flags)
# Special case for "both".
if (which == 'both') {
flags <- list(l = string_join(rep(flag, diff_adj/2)),
r = string_join(rep(flag, diff_adj/2)))
}
# Output should be a string vector based on the "which" input.
output <- switch(which,
left = flags %&% s,
right = s %&% flags,
both = flags$l %&% s %&% flags$r)
output
}
#' @rdname string_flagv
string_flagv <- function(s, ...) {
output <- sapply(s, function(x) string_flag(x, ...))
names(output) <- NULL
output
}
s_flagv <- string_flagv
flagv <- string_flagv
string_cull(rownames(mtcars), '^M')
string_cull(rownames(mtcars), '^M|a')
string_cull(rownames(mtcars), '^M', TRUE)
args(string_cull)
string_cull(rownames(mtcars), '^M|a', TRUE)
roxygen2::roxygenise()
pkgdown::build_site()
cull
s_cull <- string_cull
cull <- string_cull
cull
string_cull
dim(rownames(mtcars))
dim(mtcars(mpg))
dim(mtcars$mpg)
dim(mtcars$mpg) == 2
dim(as.vector(mtcars$mpg)) == 2
dim(as.vector(mtcars$mpg))
is.vector(rownames(mtcars))
is.vector(mtcars$mpg)
substr(mtcars, 1, 2)
roxygen2::roxygenise()
pkgdown::build_site()