From 201db60f0ae592745451ba42dfae2c9524b8acbe Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 17 Oct 2023 14:23:21 -0700 Subject: [PATCH 01/12] update news --- NEWS.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 24478d36..1d5b1af7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,6 @@ # textrecipes (development version) -* `step_untokenize()` and `step_normalization()` now returns factors instead of strings. - +* `step_untokenize()` and `step_normalization()` now returns factors instead of strings. (#247) # textrecipes 1.0.4 ## Improvements From 32253c55d6ecf16ddb9fb6872134e075da3be6ac Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 17 Oct 2023 14:30:08 -0700 Subject: [PATCH 02/12] update cran-comments --- revdep/README.md | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/revdep/README.md b/revdep/README.md index 8614cbd2..89f01f52 100644 --- a/revdep/README.md +++ b/revdep/README.md @@ -1,33 +1,32 @@ # Platform -|field |value | -|:--------|:------------------------------------------------------------------------------------------| -|version |R version 4.3.0 (2023-04-21) | -|os |macOS Ventura 13.5 | -|system |aarch64, darwin20 | -|ui |RStudio | -|language |(EN) | -|collate |en_US.UTF-8 | -|ctype |en_US.UTF-8 | -|tz |America/Los_Angeles | -|date |2023-08-14 | -|rstudio |2023.09.0-daily+310 Desert Sunflower (desktop) | -|pandoc |3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown) | +|field |value | +|:--------|:--------------------------------------------------| +|version |R version 4.3.1 (2023-06-16) | +|os |macOS Ventura 13.6 | +|system |aarch64, darwin20 | +|ui |X11 | +|language |(EN) | +|collate |en_US.UTF-8 | +|ctype |en_US.UTF-8 | +|tz |America/Los_Angeles | +|date |2023-10-17 | +|pandoc |3.1.3 @ /Users/emilhvitfeldt/miniforge3/bin/pandoc | # Dependencies |package |old |new |Δ | |:------------|:----------|:----------|:--| -|textrecipes |1.0.3 |1.0.3.9000 |* | +|textrecipes |1.0.4 |1.0.4.9000 |* | |cli |3.6.1 |3.6.1 | | |clock |0.7.0 |0.7.0 | | |cpp11 |0.4.6 |0.4.6 | | |data.table |1.14.8 |1.14.8 | | |diagram |1.6.5 |1.6.5 | | |digest |0.6.33 |0.6.33 | | -|dplyr |1.1.2 |1.1.2 | | +|dplyr |1.1.3 |1.1.3 | | |ellipsis |0.3.2 |0.3.2 | | -|fansi |1.0.4 |1.0.4 | | +|fansi |1.0.5 |1.0.5 | | |future |1.33.0 |1.33.0 | | |future.apply |1.11.0 |1.11.0 | | |generics |0.1.3 |0.1.3 | | @@ -39,18 +38,18 @@ |lava |1.7.2.1 |1.7.2.1 | | |lifecycle |1.0.3 |1.0.3 | | |listenv |0.9.0 |0.9.0 | | -|lubridate |1.9.2 |1.9.2 | | +|lubridate |1.9.3 |1.9.3 | | |magrittr |2.0.3 |2.0.3 | | |numDeriv |2016.8-1.1 |2016.8-1.1 | | |parallelly |1.36.0 |1.36.0 | | |pillar |1.9.0 |1.9.0 | | |pkgconfig |2.0.3 |2.0.3 | | -|prodlim |2023.03.31 |2023.03.31 | | +|prodlim |2023.08.28 |2023.08.28 | | |progressr |0.14.0 |0.14.0 | | |purrr |1.0.2 |1.0.2 | | |R6 |2.5.1 |2.5.1 | | |Rcpp |1.0.11 |1.0.11 | | -|recipes |1.0.7 |1.0.7 | | +|recipes |1.0.8 |1.0.8 | | |rlang |1.1.1 |1.1.1 | | |shape |1.4.6 |1.4.6 | | |SnowballC |0.7.1 |0.7.1 | | @@ -65,8 +64,8 @@ |tokenizers |0.3.0 |0.3.0 | | |tzdb |0.4.0 |0.4.0 | | |utf8 |1.2.3 |1.2.3 | | -|vctrs |0.6.3 |0.6.3 | | -|withr |2.5.0 |2.5.0 | | +|vctrs |0.6.4 |0.6.4 | | +|withr |2.5.1 |2.5.1 | | # Revdeps From fe5c2b5a49bf7787292d716690721fd5303aeacb Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Tue, 17 Oct 2023 14:30:27 -0700 Subject: [PATCH 03/12] update cran comments --- cran-comments.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cran-comments.md b/cran-comments.md index d7f18c07..99826a7a 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,6 +1,6 @@ ## Release Summary -This is the 18th CRAN release of textrecipes. +This is the 19th CRAN release of textrecipes. ## R CMD check results From 1dec6ba7fd3ff639300d84b0f2bbdcb5122d4bb7 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 18 Oct 2023 12:23:10 -0700 Subject: [PATCH 04/12] Increment version number to 1.0.5 --- DESCRIPTION | 2 +- NEWS.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 538938f4..1a35c6bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: textrecipes Title: Extra 'Recipes' for Text Processing -Version: 1.0.4.9000 +Version: 1.0.5 Authors@R: c( person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0679-1945")), diff --git a/NEWS.md b/NEWS.md index 1d5b1af7..0244a62c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# textrecipes (development version) +# textrecipes 1.0.5 * `step_untokenize()` and `step_normalization()` now returns factors instead of strings. (#247) # textrecipes 1.0.4 From d2141ecad92b7d0d01ba01978c420fafcb881e04 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 18 Oct 2023 13:35:01 -0700 Subject: [PATCH 05/12] add data.table as suggested --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 1a35c6bd..fce28e64 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,6 +32,7 @@ Imports: glue Suggests: covr, + data.table, dials (>= 1.2.0), hardhat, janitor, From f45f8487613d6d33dea4ec9985dd60cafdeaac38 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 18 Oct 2023 13:41:44 -0700 Subject: [PATCH 06/12] use data.table::setDTthreads() --- R/dummy_hash.R | 3 ++- R/lda.R | 3 ++- R/texthash.R | 3 ++- man/step_dummy_hash.Rd | 3 ++- man/step_lda.Rd | 3 ++- man/step_texthash.Rd | 3 ++- tests/testthat/test-dummy_hash.R | 27 ++++++++++++++++++--------- tests/testthat/test-lda.R | 21 ++++++++++++++------- tests/testthat/test-texthash.R | 18 ++++++++++++------ 9 files changed, 56 insertions(+), 28 deletions(-) diff --git a/R/dummy_hash.R b/R/dummy_hash.R index 0921aafa..d5eb26a8 100644 --- a/R/dummy_hash.R +++ b/R/dummy_hash.R @@ -66,8 +66,9 @@ #' @seealso [recipes::step_dummy()] #' @family Steps for Numeric Variables From Characters #' -#' @examplesIf rlang::is_installed("text2vec") +#' @examplesIf rlang::is_installed(c("text2vec", "data.table")) #' \dontrun{ +#' \dontshow{data.table::setDTthreads(2)} #' library(recipes) #' library(modeldata) #' data(grants) diff --git a/R/lda.R b/R/lda.R index 1497b5a4..12ad90b1 100644 --- a/R/lda.R +++ b/R/lda.R @@ -32,7 +32,8 @@ #' #' @family Steps for Numeric Variables From Tokens #' -#' @examplesIf rlang::is_installed("text2vec") +#' @examplesIf rlang::is_installed(c("text2vec", "data.table")) +#' \dontshow{data.table::setDTthreads(2)} #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/texthash.R b/R/texthash.R index 16a9b8e8..2acb6f92 100644 --- a/R/texthash.R +++ b/R/texthash.R @@ -55,7 +55,8 @@ #' [step_text_normalization()] to perform text normalization. #' @family Steps for Numeric Variables From Tokens #' -#' @examplesIf rlang::is_installed("text2vec") +#' @examplesIf rlang::is_installed(c("text2vec", "data.table")) +#' \dontshow{data.table::setDTthreads(2)} #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd index dfd1cbe7..2f7ba4a4 100644 --- a/man/step_dummy_hash.Rd +++ b/man/step_dummy_hash.Rd @@ -115,8 +115,9 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("text2vec")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ +\dontshow{data.table::setDTthreads(2)} library(recipes) library(modeldata) data(grants) diff --git a/man/step_lda.Rd b/man/step_lda.Rd index 323513f4..7c34db67 100644 --- a/man/step_lda.Rd +++ b/man/step_lda.Rd @@ -80,7 +80,8 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("text2vec")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{data.table::setDTthreads(2)} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_texthash.Rd b/man/step_texthash.Rd index 66eba420..18c86204 100644 --- a/man/step_texthash.Rd +++ b/man/step_texthash.Rd @@ -106,7 +106,8 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed("text2vec")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{data.table::setDTthreads(2)} library(recipes) library(modeldata) data(tate_text) diff --git a/tests/testthat/test-dummy_hash.R b/tests/testthat/test-dummy_hash.R index b38dda4c..7f48cf67 100644 --- a/tests/testthat/test-dummy_hash.R +++ b/tests/testthat/test-dummy_hash.R @@ -9,7 +9,8 @@ rec <- recipe(~., data = test_data) test_that("hashing gives double outputs", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- rec %>% step_dummy_hash(sponsor_code) @@ -30,7 +31,8 @@ test_that("hashing gives double outputs", { }) test_that("hashing multiple factors", { - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default res <- rec %>% step_dummy_hash(all_nominal_predictors(), num_terms = 12) %>% @@ -43,7 +45,8 @@ test_that("hashing multiple factors", { }) test_that("hashing collapsed multiple factors", { - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default res <- rec %>% step_dummy_hash(all_nominal_predictors(), num_terms = 4, collapse = TRUE) %>% @@ -56,7 +59,8 @@ test_that("hashing collapsed multiple factors", { test_that("hashing output width changes accordingly with num_terms", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- rec %>% step_dummy_hash(sponsor_code, num_terms = 256) %>% @@ -72,7 +76,8 @@ test_that("hashing output width changes accordingly with num_terms", { test_that("hashing output width changes accordingly with num_terms", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default signed <- recipe(~., data = test_data) %>% step_dummy_hash(all_predictors(), num_terms = 2) %>% @@ -92,7 +97,8 @@ test_that("hashing output width changes accordingly with num_terms", { test_that("check_name() is used", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default dat <- test_data dat$text <- dat$sponsor_code @@ -183,7 +189,8 @@ test_that("empty selection tidy method works", { test_that("keep_original_cols works", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default new_names <- paste0("dummyhash_sponsor_code_", 1:5) @@ -212,7 +219,8 @@ test_that("keep_original_cols works", { test_that("keep_original_cols - can prep recipes with it missing", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- recipe(~ sponsor_code, data = test_data) %>% step_dummy_hash(sponsor_code) @@ -232,7 +240,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { test_that("printing", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- rec %>% step_dummy_hash(sponsor_code) diff --git a/tests/testthat/test-lda.R b/tests/testthat/test-lda.R index fb896721..8f3d37b3 100644 --- a/tests/testthat/test-lda.R +++ b/tests/testthat/test-lda.R @@ -9,7 +9,8 @@ rec <- recipe(~ medium + artist, data = tate_text[seq_len(n_rows), ]) test_that("step_lda works as intended", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default n_top <- 10 rec1 <- rec %>% @@ -27,7 +28,8 @@ test_that("step_lda works as intended", { test_that("step_lda works with num_topics argument", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default n_top <- 100 rec1 <- rec %>% @@ -42,7 +44,8 @@ test_that("step_lda works with num_topics argument", { test_that("check_name() is used", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default dat <- tate_text[seq_len(100), ] dat$text <- dat$medium @@ -62,7 +65,8 @@ test_that("check_name() is used", { test_that("bake method errors when needed non-standard role columns are missing", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default tokenized_test_data <- rec %>% step_tokenize(medium) %>% @@ -126,7 +130,8 @@ test_that("empty selection tidy method works", { test_that("keep_original_cols works", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default new_names <- paste0("lda_medium_", 1:10) @@ -157,7 +162,8 @@ test_that("keep_original_cols works", { test_that("keep_original_cols - can prep recipes with it missing", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- recipe(~ medium, data = tate_text[seq_len(n_rows), ]) %>% step_tokenize(medium) %>% @@ -178,7 +184,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { test_that("printing", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- rec %>% step_tokenize(medium) %>% diff --git a/tests/testthat/test-texthash.R b/tests/testthat/test-texthash.R index 73d38b71..6ddaebec 100644 --- a/tests/testthat/test-texthash.R +++ b/tests/testthat/test-texthash.R @@ -12,7 +12,8 @@ rec <- recipe(~., data = test_data) test_that("hashing gives double outputs", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- rec %>% step_tokenize(text) %>% @@ -35,7 +36,8 @@ test_that("hashing gives double outputs", { test_that("hashing output width changes accordingly with num_terms", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- rec %>% step_tokenize(text) %>% @@ -52,7 +54,8 @@ test_that("hashing output width changes accordingly with num_terms", { test_that("hashing output width changes accordingly with num_terms", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default signed <- recipe(~., data = test_data) %>% step_tokenize(all_predictors()) %>% @@ -74,7 +77,8 @@ test_that("hashing output width changes accordingly with num_terms", { test_that("check_name() is used", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default dat <- test_data dat$texthash_text_0001 <- dat$text @@ -170,7 +174,8 @@ test_that("empty selection tidy method works", { test_that("keep_original_cols works", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default new_names <- paste0("texthash_text_", 1:5) @@ -218,7 +223,8 @@ test_that("keep_original_cols - can prep recipes with it missing", { test_that("printing", { skip_if_not_installed("text2vec") - skip_on_cran() # because data.table uses all cores by default + skip_if_not_installed("data.table") + data.table::setDTthreads(2) # because data.table uses all cores by default rec <- rec %>% step_tokenize(text) %>% From 815b535ce0e3ce374e77286f4b6316d023c00e28 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 18 Oct 2023 14:12:26 -0700 Subject: [PATCH 07/12] add library() call to data.table --- R/dummy_hash.R | 1 + R/lda.R | 1 + R/texthash.R | 1 + man/step_dummy_hash.Rd | 1 + man/step_lda.Rd | 1 + man/step_texthash.Rd | 1 + 6 files changed, 6 insertions(+) diff --git a/R/dummy_hash.R b/R/dummy_hash.R index d5eb26a8..4fba1026 100644 --- a/R/dummy_hash.R +++ b/R/dummy_hash.R @@ -68,6 +68,7 @@ #' #' @examplesIf rlang::is_installed(c("text2vec", "data.table")) #' \dontrun{ +#' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' library(recipes) #' library(modeldata) diff --git a/R/lda.R b/R/lda.R index 12ad90b1..051e71d7 100644 --- a/R/lda.R +++ b/R/lda.R @@ -33,6 +33,7 @@ #' @family Steps for Numeric Variables From Tokens #' #' @examplesIf rlang::is_installed(c("text2vec", "data.table")) +#' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' library(recipes) #' library(modeldata) diff --git a/R/texthash.R b/R/texthash.R index 2acb6f92..984c854e 100644 --- a/R/texthash.R +++ b/R/texthash.R @@ -56,6 +56,7 @@ #' @family Steps for Numeric Variables From Tokens #' #' @examplesIf rlang::is_installed(c("text2vec", "data.table")) +#' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' library(recipes) #' library(modeldata) diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd index 2f7ba4a4..0173c128 100644 --- a/man/step_dummy_hash.Rd +++ b/man/step_dummy_hash.Rd @@ -117,6 +117,7 @@ The underlying operation does not allow for case weights. \examples{ \dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontrun{ +\dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} library(recipes) library(modeldata) diff --git a/man/step_lda.Rd b/man/step_lda.Rd index 7c34db67..c69429ce 100644 --- a/man/step_lda.Rd +++ b/man/step_lda.Rd @@ -81,6 +81,7 @@ The underlying operation does not allow for case weights. \examples{ \dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} library(recipes) library(modeldata) diff --git a/man/step_texthash.Rd b/man/step_texthash.Rd index 18c86204..82ac9acc 100644 --- a/man/step_texthash.Rd +++ b/man/step_texthash.Rd @@ -107,6 +107,7 @@ The underlying operation does not allow for case weights. \examples{ \dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} library(recipes) library(modeldata) From 7decca2a04ddea461f6d8bedd29495fe1ac4d640 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Wed, 18 Oct 2023 14:59:10 -0700 Subject: [PATCH 08/12] try setting OMP_THREAD_LIMIT env var --- R/dummy_hash.R | 3 +-- R/lda.R | 1 + R/texthash.R | 1 + man/step_dummy_hash.Rd | 3 +-- man/step_lda.Rd | 1 + man/step_texthash.Rd | 1 + 6 files changed, 6 insertions(+), 4 deletions(-) diff --git a/R/dummy_hash.R b/R/dummy_hash.R index 4fba1026..34482c48 100644 --- a/R/dummy_hash.R +++ b/R/dummy_hash.R @@ -67,9 +67,9 @@ #' @family Steps for Numeric Variables From Characters #' #' @examplesIf rlang::is_installed(c("text2vec", "data.table")) -#' \dontrun{ #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} +#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} #' library(recipes) #' library(modeldata) #' data(grants) @@ -84,7 +84,6 @@ #' #' tidy(grants_rec, number = 1) #' tidy(grants_obj, number = 1) -#' } #' @export step_dummy_hash <- function(recipe, diff --git a/R/lda.R b/R/lda.R index 051e71d7..d6737ee1 100644 --- a/R/lda.R +++ b/R/lda.R @@ -35,6 +35,7 @@ #' @examplesIf rlang::is_installed(c("text2vec", "data.table")) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} +#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/R/texthash.R b/R/texthash.R index 984c854e..0769cf92 100644 --- a/R/texthash.R +++ b/R/texthash.R @@ -58,6 +58,7 @@ #' @examplesIf rlang::is_installed(c("text2vec", "data.table")) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} +#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} #' library(recipes) #' library(modeldata) #' data(tate_text) diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd index 0173c128..cf4f58e4 100644 --- a/man/step_dummy_hash.Rd +++ b/man/step_dummy_hash.Rd @@ -116,9 +116,9 @@ The underlying operation does not allow for case weights. \examples{ \dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -\dontrun{ \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} +\dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} library(recipes) library(modeldata) data(grants) @@ -133,7 +133,6 @@ bake(grants_obj, grants_test) tidy(grants_rec, number = 1) tidy(grants_obj, number = 1) -} \dontshow{\}) # examplesIf} } \references{ diff --git a/man/step_lda.Rd b/man/step_lda.Rd index c69429ce..b41c8a45 100644 --- a/man/step_lda.Rd +++ b/man/step_lda.Rd @@ -83,6 +83,7 @@ The underlying operation does not allow for case weights. \dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} +\dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} library(recipes) library(modeldata) data(tate_text) diff --git a/man/step_texthash.Rd b/man/step_texthash.Rd index 82ac9acc..08065a3f 100644 --- a/man/step_texthash.Rd +++ b/man/step_texthash.Rd @@ -109,6 +109,7 @@ The underlying operation does not allow for case weights. \dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} +\dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} library(recipes) library(modeldata) data(tate_text) From fc46a5c1b560dcc43588e43e4f505b92ccf3d398 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Thu, 19 Oct 2023 09:03:04 -0700 Subject: [PATCH 09/12] try setting text2vec.mc.cores as well --- R/dummy_hash.R | 3 +++ man/step_dummy_hash.Rd | 3 +++ 2 files changed, 6 insertions(+) diff --git a/R/dummy_hash.R b/R/dummy_hash.R index 34482c48..44963ee4 100644 --- a/R/dummy_hash.R +++ b/R/dummy_hash.R @@ -70,6 +70,9 @@ #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} +#' \dontshow{library(text2vec)} +#' \dontshow{options("text2vec.mc.cores" = 1)} +#' #' library(recipes) #' library(modeldata) #' data(grants) diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd index cf4f58e4..7227c2ad 100644 --- a/man/step_dummy_hash.Rd +++ b/man/step_dummy_hash.Rd @@ -119,6 +119,9 @@ The underlying operation does not allow for case weights. \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} +\dontshow{library(text2vec)} +\dontshow{options("text2vec.mc.cores" = 1)} + library(recipes) library(modeldata) data(grants) From 1b8766ddf1298b22ddbbcc3975e5b4826e9456a2 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 20 Oct 2023 12:41:44 -0700 Subject: [PATCH 10/12] trying more options --- R/dummy_hash.R | 6 +++++- R/lda.R | 2 +- R/texthash.R | 2 +- man/step_dummy_hash.Rd | 6 +++++- man/step_lda.Rd | 2 +- man/step_texthash.Rd | 2 +- 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/R/dummy_hash.R b/R/dummy_hash.R index 44963ee4..9e0a05eb 100644 --- a/R/dummy_hash.R +++ b/R/dummy_hash.R @@ -66,11 +66,15 @@ #' @seealso [recipes::step_dummy()] #' @family Steps for Numeric Variables From Characters #' -#' @examplesIf rlang::is_installed(c("text2vec", "data.table")) +#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} +#' \dontshow{Sys.setenv("rsparse_omp_threads" = 1L)} +#' \dontshow{options(rsparse_omp_threads = 1L)} #' \dontshow{library(text2vec)} +#' \dontshow{Sys.setenv("rsparse_omp_threads" = 1L)} +#' \dontshow{options(rsparse_omp_threads = 1L)} #' \dontshow{options("text2vec.mc.cores" = 1)} #' #' library(recipes) diff --git a/R/lda.R b/R/lda.R index d6737ee1..e3841f18 100644 --- a/R/lda.R +++ b/R/lda.R @@ -32,7 +32,7 @@ #' #' @family Steps for Numeric Variables From Tokens #' -#' @examplesIf rlang::is_installed(c("text2vec", "data.table")) +#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/R/texthash.R b/R/texthash.R index 0769cf92..ac5832c6 100644 --- a/R/texthash.R +++ b/R/texthash.R @@ -55,7 +55,7 @@ #' [step_text_normalization()] to perform text normalization. #' @family Steps for Numeric Variables From Tokens #' -#' @examplesIf rlang::is_installed(c("text2vec", "data.table")) +#' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} #' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd index 7227c2ad..658063e4 100644 --- a/man/step_dummy_hash.Rd +++ b/man/step_dummy_hash.Rd @@ -115,11 +115,15 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} +\dontshow{Sys.setenv("rsparse_omp_threads" = 1L)} +\dontshow{options(rsparse_omp_threads = 1L)} \dontshow{library(text2vec)} +\dontshow{Sys.setenv("rsparse_omp_threads" = 1L)} +\dontshow{options(rsparse_omp_threads = 1L)} \dontshow{options("text2vec.mc.cores" = 1)} library(recipes) diff --git a/man/step_lda.Rd b/man/step_lda.Rd index b41c8a45..edca4dc3 100644 --- a/man/step_lda.Rd +++ b/man/step_lda.Rd @@ -80,7 +80,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} diff --git a/man/step_texthash.Rd b/man/step_texthash.Rd index 08065a3f..edb1dc93 100644 --- a/man/step_texthash.Rd +++ b/man/step_texthash.Rd @@ -106,7 +106,7 @@ The underlying operation does not allow for case weights. } \examples{ -\dontshow{if (rlang::is_installed(c("text2vec", "data.table"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} From 233a5fc6965799f941ad96313f8030ba12a1853c Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 20 Oct 2023 15:11:41 -0700 Subject: [PATCH 11/12] final solution --- R/dummy_hash.R | 5 ++++- man/step_dummy_hash.Rd | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R/dummy_hash.R b/R/dummy_hash.R index 9e0a05eb..c83a9a3b 100644 --- a/R/dummy_hash.R +++ b/R/dummy_hash.R @@ -69,10 +69,13 @@ #' @examplesIf all(c("text2vec", "data.table") %in% rownames(installed.packages())) #' \dontshow{library(data.table)} #' \dontshow{data.table::setDTthreads(2)} -#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} +#' \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)} +#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 1)} #' \dontshow{Sys.setenv("rsparse_omp_threads" = 1L)} #' \dontshow{options(rsparse_omp_threads = 1L)} #' \dontshow{library(text2vec)} +#' \dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)} +#' \dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 1)} #' \dontshow{Sys.setenv("rsparse_omp_threads" = 1L)} #' \dontshow{options(rsparse_omp_threads = 1L)} #' \dontshow{options("text2vec.mc.cores" = 1)} diff --git a/man/step_dummy_hash.Rd b/man/step_dummy_hash.Rd index 658063e4..d730b3ba 100644 --- a/man/step_dummy_hash.Rd +++ b/man/step_dummy_hash.Rd @@ -118,10 +118,13 @@ The underlying operation does not allow for case weights. \dontshow{if (all(c("text2vec", "data.table") \%in\% rownames(installed.packages()))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \dontshow{library(data.table)} \dontshow{data.table::setDTthreads(2)} -\dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 2)} +\dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)} +\dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 1)} \dontshow{Sys.setenv("rsparse_omp_threads" = 1L)} \dontshow{options(rsparse_omp_threads = 1L)} \dontshow{library(text2vec)} +\dontshow{Sys.setenv("OMP_NUM_THREADS" = 1)} +\dontshow{Sys.setenv("OMP_THREAD_LIMIT" = 1)} \dontshow{Sys.setenv("rsparse_omp_threads" = 1L)} \dontshow{options(rsparse_omp_threads = 1L)} \dontshow{options("text2vec.mc.cores" = 1)} From 59e6775e93655bd9a234787b29c3b4017c3dd831 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Fri, 20 Oct 2023 15:15:18 -0700 Subject: [PATCH 12/12] Increment version number to 1.0.5.9000 --- DESCRIPTION | 2 +- NEWS.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index fce28e64..441127c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: textrecipes Title: Extra 'Recipes' for Text Processing -Version: 1.0.5 +Version: 1.0.5.9000 Authors@R: c( person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0679-1945")), diff --git a/NEWS.md b/NEWS.md index 0244a62c..cf77a8c7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# textrecipes (development version) + # textrecipes 1.0.5 * `step_untokenize()` and `step_normalization()` now returns factors instead of strings. (#247)