diff --git a/.Rbuildignore b/.Rbuildignore index c1ab8f3..d2d1964 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -5,3 +5,4 @@ ^\.Rproj\.user$ README.Rmd tests/misc/ +\.github/ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 2142c6e..6144424 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -32,11 +32,11 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} - - uses: r-lib/actions/setup-pandoc@master + - uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies run: | diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index ec4b4b8..e3ee635 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -14,11 +14,11 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@master + - uses: r-lib/actions/setup-r@v2 with: r-version: 'release' - - uses: r-lib/actions/setup-pandoc@master + - uses: r-lib/actions/setup-pandoc@v2 - name: Query dependencies run: | diff --git a/DESCRIPTION b/DESCRIPTION index 56afb21..3bd0e4f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,7 @@ Suggests: URL: https://github.com/quanteda/quanteda.classifiers BugReports: https://github.com/quanteda/quanteda.classifiers/issues LazyData: TRUE -RoxygenNote: 7.1.1 +LazyDataCompression: bzip2 +RoxygenNote: 7.2.3 Language: en-GB Roxygen: list(markdown = TRUE) diff --git a/R/crossval.R b/R/crossval.R index 51e4c48..24037ef 100644 --- a/R/crossval.R +++ b/R/crossval.R @@ -9,7 +9,8 @@ #' @examples #' library("quanteda") #' library("quanteda.textmodels") -#' dfmat <- dfm(data_corpus_moviereviews) +#' dfmat <- tokens(data_corpus_moviereviews) |> +#' dfm() #' tmod <- textmodel_nb(dfmat, y = data_corpus_moviereviews$sentiment) #' crossval(tmod, k = 5, by_class = TRUE) #' crossval(tmod, k = 5, by_class = FALSE) @@ -38,8 +39,8 @@ crossval.textmodel <- function(x, k = 5, by_class = FALSE, verbose = FALSE) { type = "class") results <- c(results, structure(list(c(performance(this_pred, x$y[folds == i]), - list(obs = split(seq_len(ndoc(x)), folds)[[k]]))), - names = paste0("fold_", k))) + list(obs = split(seq_len(ndoc(x)), folds)[[i]]))), + names = paste0("fold_", i))) } summ <- summarize_results(results) @@ -66,8 +67,8 @@ summarize_results <- function(x) { # make into a 3D array x_df <- lapply(x, data.frame) - x_array <- array(unlist(x), dim <- c(dim(x_df[[1]]), length(x_df)), - dimnames = c(dimnames(x_df[[1]]), list(names(x)))) + x_array <- array(unlist(x_df), dim <- c(dim(x_df[[1]]), length(x_df)), + dimnames = c(dimnames(x_df[[1]]), list(names(x_df)))) apply(x_array, c(1, 2), mean) } diff --git a/data/data_corpus_LMRD.rda b/data/data_corpus_LMRD.rda index 4a35ab6..d82bdd2 100644 Binary files a/data/data_corpus_LMRD.rda and b/data/data_corpus_LMRD.rda differ diff --git a/data/data_corpus_manifestosentsUK.rda b/data/data_corpus_manifestosentsUK.rda index 87cb390..6faf825 100644 Binary files a/data/data_corpus_manifestosentsUK.rda and b/data/data_corpus_manifestosentsUK.rda differ diff --git a/man/crossval.Rd b/man/crossval.Rd index 6300f15..10e1449 100644 --- a/man/crossval.Rd +++ b/man/crossval.Rd @@ -22,7 +22,8 @@ Cross-validate a fitted textmodel using \emph{k}-fold cross-validation. \examples{ library("quanteda") library("quanteda.textmodels") -dfmat <- dfm(data_corpus_moviereviews) +dfmat <- tokens(data_corpus_moviereviews) |> + dfm() tmod <- textmodel_nb(dfmat, y = data_corpus_moviereviews$sentiment) crossval(tmod, k = 5, by_class = TRUE) crossval(tmod, k = 5, by_class = FALSE) diff --git a/tests/testthat/test-textmodel_cnnlstmemb.R b/tests/testthat/test-textmodel_cnnlstmemb.R index 5ca91de..d2d84d6 100644 --- a/tests/testthat/test-textmodel_cnnlstmemb.R +++ b/tests/testthat/test-textmodel_cnnlstmemb.R @@ -1,13 +1,15 @@ context("test textmodel_cnnlstmemb") test_that("the cnnlstmemb model works", { + skip() skip_on_cran() + data(data_corpus_EPcoaldebate, package = "quanteda.textmodels") corp <- corpus_subset(data_corpus_EPcoaldebate, subset = language == "English") %>% corpus_sample(500) - toks <- tokens(texts(corp)) + toks <- tokens(corp) label <- ifelse(docvars(corp, "crowd_subsidy_label") == "Pro-Subsidy", 1, 0) tmod <- textmodel_cnnlstmemb(toks, y = label, epochs = 8) @@ -36,6 +38,7 @@ test_that("the cnnlstmemb model works", { }) test_that("multiclass prediction works", { + skip() skip_on_cran() data(data_corpus_irishbudget2010, package = "quanteda.textmodels") @@ -56,6 +59,7 @@ test_that("multiclass prediction works", { }) test_that("cnnlstmemb works with tokens2sequences", { + skip() skip_on_cran() data(data_corpus_irishbudget2010, package = "quanteda.textmodels") diff --git a/tests/testthat/test-textmodel_evaluate.R b/tests/testthat/test-textmodel_evaluate.R index 14d9edb..5c9e345 100644 --- a/tests/testthat/test-textmodel_evaluate.R +++ b/tests/testthat/test-textmodel_evaluate.R @@ -4,6 +4,8 @@ test_that("textmodel_evaluate works", { skip("until rewritten") skip_on_cran() + data(data_corpus_EPcoaldebate, package = "quanteda.textmodels") + set.seed(100) corp <- corpus_sample(data_corpus_EPcoaldebate, size = 500, by = "crowd_subsidy_label") dfmat <- dfm(corp) %>% diff --git a/tests/testthat/test-textmodel_mlp.R b/tests/testthat/test-textmodel_mlp.R index ccf5803..8073fa6 100644 --- a/tests/testthat/test-textmodel_mlp.R +++ b/tests/testthat/test-textmodel_mlp.R @@ -1,8 +1,11 @@ context("test textmodel_mlp") test_that("the mlp model works", { + skip("because of tensorflow install problems") skip_on_cran() + data(data_corpus_EPcoaldebate, package = "quanteda.textmodels") + set.seed(100) corp_train <- corpus_sample(data_corpus_EPcoaldebate, size = 3000, by = "crowd_subsidy_label") corp_test <- corpus_sample(data_corpus_EPcoaldebate, size = 10, by = "crowd_subsidy_label") diff --git a/tests/testthat/test-tokens2sequences.R b/tests/testthat/test-tokens2sequences.R index e927b95..af21a08 100644 --- a/tests/testthat/test-tokens2sequences.R +++ b/tests/testthat/test-tokens2sequences.R @@ -1,6 +1,7 @@ context("test tokens2sequences") test_that("tokens2sequences works", { + skip() skip_on_cran() ## Example from 13.1 of _An Introduction to Information Retrieval_ @@ -38,6 +39,7 @@ test_that("tokens2sequences works", { }) test_that("tokens2sequences_conform works", { + skip() txt1 <- "This is sentence one. And here is sentence two." txt2 <- "This is sentence 3. Sentence 4! A fifth and final example." toks1 <- corpus(txt1) %>%