tidymodels · EmilHvitfeldt · Apr 1, 2024 · Mar 29, 2024 · Mar 29, 2024 · Mar 29, 2024
diff --git a/R/clean_levels.R b/R/clean_levels.R
@@ -24,7 +24,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `orginal`, `value`, and `id`:
 #' 
 #' \describe{

diff --git a/R/clean_names.R b/R/clean_names.R
@@ -19,7 +19,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `value`, and `id`:
 #' 
 #' \describe{

diff --git a/R/dummy_hash.R b/R/dummy_hash.R
@@ -36,15 +36,15 @@
 #' The argument `num_terms` controls the number of indices that the hashing
 #' function will map to. This is the tuning parameter for this transformation.
 #' Since the hashing function can map two different tokens to the same index,
-#' will a higher value of `num_terms` result in a lower chance of collision.
+#' a higher value of `num_terms` will result in a lower chance of collision.
 #'
 #' @template details-prefix
 #'
 #' @details
 #'
 #' # Tidying
 #' 
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `value`, `num_terms`, `collapse`, and `id`:
 #' 
 #' \describe{

diff --git a/R/lda.R b/R/lda.R
@@ -9,10 +9,10 @@
 #' @template args-trained
 #' @template args-columns
 #' @param lda_models A WarpLDA model object from the text2vec package. If left
-#'   to NULL, the default, will it train its model based on the training data.
+#'   to NULL, the default, it will train its model based on the training data.
 #'   Look at the examples for how to fit a WarpLDA model.
 #' @param num_topics integer desired number of latent topics.
-#' @param prefix A prefix for generated column names, default to "lda".
+#' @param prefix A prefix for generated column names, defaults to "lda".
 #' @template args-keep_original_cols
 #' @template args-skip
 #' @template args-id
@@ -21,7 +21,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `num_topics`, and `id`:
 #' 
 #' \describe{

diff --git a/R/lemma.R b/R/lemma.R
@@ -23,7 +23,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms` and `id`:
 #' 
 #' \describe{

diff --git a/R/ngram.R b/R/ngram.R
@@ -23,14 +23,14 @@
 #' @details
 #'
 #' The use of this step will leave the ordering of the tokens meaningless. If
-#' `min_num_tokens <  num_tokens` then the tokens order in increasing fashion
-#' with respect to the number of tokens in the n-gram. If `min_num_tokens = 1`
-#' and `num_tokens = 3` then the output contains all the 1-grams followed by all
+#' `min_num_tokens <  num_tokens` then the tokens will be ordered in increasing 
+#' fashion with respect to the number of tokens in the n-gram. If `min_num_tokens = 1`
+#' and `num_tokens = 3` then the output will contain all the 1-grams followed by all
 #' the 2-grams followed by all the 3-grams.
 #'
 #' # Tidying
 #' 
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms` and `id`:
 #' 
 #' \describe{

diff --git a/R/pos_filter.R b/R/pos_filter.R
@@ -25,7 +25,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms` and `id`:
 #' 
 #' \describe{

diff --git a/R/sequence_onehot.R b/R/sequence_onehot.R
@@ -18,7 +18,7 @@
 #' @param vocabulary A character vector, characters to be mapped to integers.
 #'   Characters not in the vocabulary will be encoded as 0. Defaults to
 #'   `letters`.
-#' @param prefix A prefix for generated column names, default to "seq1hot".
+#' @param prefix A prefix for generated column names, defaults to "seq1hot".
 #' @template args-keep_original_cols
 #' @template args-skip
 #' @template args-id
@@ -33,12 +33,12 @@
 #'
 #' The string will be capped by the sequence_length argument, strings shorter
 #' then sequence_length will be padded with empty characters. The encoding will
-#' assign a integer to each character in the vocabulary, and will encode
+#' assign an integer to each character in the vocabulary, and will encode
 #' accordingly. Characters not in the vocabulary will be encoded as 0.
 #'
 #' # Tidying
 #' 
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `vocabulary`, `token`, and `id`:
 #' 
 #' \describe{

diff --git a/R/show_tokens.R b/R/show_tokens.R
@@ -1,7 +1,7 @@
 #' Show token output of recipe
 #'
-#' Returns the tokens as a list of character vector of a recipe. This function
-#' can be useful for diagnostics doing recipe construction but should not be
+#' Returns the tokens as a list of character vectors of a recipe. This function
+#' can be useful for diagnostics during recipe construction but should not be
 #' used in final recipe steps. Note that this function will both prep() and
 #' bake() the recipe it is used on.
 #'

diff --git a/R/stem.R b/R/stem.R
@@ -28,7 +28,7 @@
 #'
 #' # Tidying
 #' 
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `is_custom_stemmer`, and `id`:
 #' 
 #' \describe{

diff --git a/R/stopwords.R b/R/stopwords.R
@@ -23,18 +23,18 @@
 #'
 #' @details
 #'
-#' Stop words are words which sometimes are remove before natural language
+#' Stop words are words which sometimes are removed before natural language
 #' processing tasks. While stop words usually refers to the most common words in
 #' the language there is no universal stop word list.
 #'
 #' The argument `custom_stopword_source` allows you to pass a character vector
-#' to filter against. With the `keep` argument one can specify to keep the words
+#' to filter against. With the `keep` argument one can specify words to keep
 #' instead of removing thus allowing you to select words with a combination of
 #' these two arguments.
 #'
 #' # Tidying
 #' 
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `value`, `keep`, and `id`:
 #' 
 #' \describe{

diff --git a/R/text_normalization.R b/R/text_normalization.R
@@ -21,7 +21,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `normalization_form`, and `id`:
 #' 
 #' \describe{

diff --git a/R/textfeature.R b/R/textfeature.R
@@ -9,8 +9,8 @@
 #' @template args-trained
 #' @template args-columns
 #' @param extract_functions A named list of feature extracting functions.
-#'   default to `count_functions`. See details for more information.
-#' @param prefix A prefix for generated column names, default to "textfeature".
+#'   Defaults to `count_functions`. See details for more information.
+#' @param prefix A prefix for generated column names, defaults to "textfeature".
 #' @template args-keep_original_cols
 #' @template args-skip
 #' @template args-id
@@ -29,7 +29,7 @@
 #'
 #' # Tidying
 #' 
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `functions`, and `id`:
 #' 
 #' \describe{

diff --git a/R/texthash.R b/R/texthash.R
@@ -37,7 +37,7 @@
 #'
 #' @details # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #'  columns `terms`, value and `id`:
 #' 
 #' \describe{

diff --git a/R/tf.R b/R/tf.R
@@ -33,12 +33,12 @@
 #' issues. A good strategy is to start with a low token count and go up
 #' according to how much RAM you want to use.
 #'
-#' Term frequency is a weight of how many times each token appear in each
+#' Term frequency is a weight of how many times each token appears in each
 #' observation. There are different ways to calculate the weight and this step
 #' can do it in a couple of ways. Setting the argument `weight_scheme` to
 #' "binary" will result in a set of binary variables denoting if a token is
 #' present in the observation. "raw count" will count the times a token is
-#' present in the observation. "term frequency" will divide the count with the
+#' present in the observation. "term frequency" will divide the count by the
 #' total number of words in the document to limit the effect of the document
 #' length as longer documents tends to have the word present more times but not
 #' necessarily at a higher percentage. "log normalization" takes the log of 1
@@ -54,7 +54,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `value`, and `id`:
 #' 
 #' \describe{

diff --git a/R/tfidf.R b/R/tfidf.R
@@ -51,7 +51,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `token`, `weight`, and `id`:
 #' 
 #' \describe{

diff --git a/R/tokenfilter.R b/R/tokenfilter.R
@@ -29,7 +29,7 @@
 #'
 #' @details
 #'
-#' This step allow you to limit the tokens you are looking at by filtering on
+#' This step allows you to limit the tokens you are looking at by filtering on
 #' their occurrence in the corpus. You are able to exclude tokens if they appear
 #' too many times or too few times in the data. It can be specified as counts
 #' using `max_times` and `min_times` or as percentages by setting `percentage`
@@ -44,7 +44,7 @@
 #'
 #' # Tidying
 #' 
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `value`, and `id`:
 #' 
 #' \describe{

diff --git a/R/tokenize.R b/R/tokenize.R
@@ -29,16 +29,16 @@
 #' options(width = 55)
 #' ```
 #'
-#' Tokenization is the act of splitting a character string into smaller parts to
+#' Tokenization is the act of splitting a character vector into smaller parts to
 #' be further analyzed. This step uses the `tokenizers` package which includes
 #' heuristics on how to to split the text into paragraphs tokens, word tokens,
 #' among others. `textrecipes` keeps the tokens as a [`token`][tokenlist()]
 #' variable and other steps will do their tasks on those [`token`][tokenlist()]
-#' variable before transforming them back to numeric variables.
+#' variables before transforming them back to numeric variables.
 #'
-#' Working will `textrecipes` will almost always start by calling
+#' Working with `textrecipes` will almost always start by calling
 #' `step_tokenize` followed by modifying and filtering steps. This is not always
-#' the case as you sometimes want to do apply pre-tokenization steps, this can
+#' the case as you sometimes want to apply pre-tokenization steps; this can
 #' be done with [recipes::step_mutate()].
 #'
 #' # Engines
@@ -182,7 +182,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `value`, and `id`:
 #' 
 #' \describe{

diff --git a/R/tokenize_bpe.R b/R/tokenize_bpe.R
@@ -23,7 +23,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms` and `id`:
 #' 
 #' \describe{

diff --git a/R/tokenize_sentencepiece.R b/R/tokenize_sentencepiece.R
@@ -28,7 +28,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms` and `id`:
 #' 
 #' \describe{

diff --git a/R/tokenize_wordpiece.R b/R/tokenize_wordpiece.R
@@ -22,7 +22,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms` and `id`:
 #' 
 #' \describe{

diff --git a/R/tokenmerge.R b/R/tokenmerge.R
@@ -9,7 +9,7 @@
 #' @template args-role_predictors
 #' @template args-trained
 #' @template args-columns
-#' @param prefix A prefix for generated column names, default to "tokenmerge".
+#' @param prefix A prefix for generated column names, defaults to "tokenmerge".
 #' @template args-keep_original_cols
 #' @template args-skip
 #' @template args-id
@@ -20,7 +20,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms` and `id`:
 #' 
 #' \describe{

diff --git a/R/untokenize.R b/R/untokenize.R
@@ -23,7 +23,7 @@
 #'
 #' # Tidying
 #'
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `value`, and `id`:
 #' 
 #' \describe{

diff --git a/R/word_embeddings.R b/R/word_embeddings.R
@@ -44,7 +44,7 @@
 #'
 #' # Tidying
 #' 
-#' When you [`tidy()`][tidy.recipe()] this step, a tibble is retruned with
+#' When you [`tidy()`][tidy.recipe()] this step, a tibble is returned with
 #' columns `terms`, `embedding_rows`, `aggregation`, and `id`:
 #' 
 #' \describe{

diff --git a/man/show_tokens.Rd b/man/show_tokens.Rd
diff --git a/man/step_clean_levels.Rd b/man/step_clean_levels.Rd
diff --git a/man/step_clean_names.Rd b/man/step_clean_names.Rd
diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd