From 71cc0551e2d5219c98999d18704699a1d7ac6d49 Mon Sep 17 00:00:00 2001
From: Kohei Watanabe <watanabe.kohei@gmail.com>
Date: Thu, 22 Aug 2024 17:39:07 +0900
Subject: [PATCH 1/3] Drop docvars from input object

---
 R/seededlda.R                  |  1 +
 tests/testthat/test-internal.R | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)
diff --git a/R/seededlda.R b/R/seededlda.R
index 56414b1..f5a509a 100644
--- a/R/seededlda.R
+++ b/R/seededlda.R
@@ -189,6 +189,7 @@ tfm <- function(x, dictionary, levels = 1,
     if (!quanteda::is.dictionary(dictionary))
         stop("dictionary must be a dictionary object", call. = FALSE)
 
+    docvars(x) <- NULL # sanitize dfm
     dict <- flatten_dictionary(dictionary, levels)
     key <- names(dict)
     feat <- featnames(x)
diff --git a/tests/testthat/test-internal.R b/tests/testthat/test-internal.R
index 0b8f766..882e3d4 100644
--- a/tests/testthat/test-internal.R
+++ b/tests/testthat/test-internal.R
@@ -116,6 +116,24 @@ test_that("tfm works with ngrams", {
                  c("un" = 2, "icc" = 2, "other" = 0))
 })
 
+test_that("tfm works with dfm with x in docvars (#87)", {
+
+	dict <- dictionary(list("A" = "a", "B" = "b"))
+	dat <- data.frame(text = c("a b c", "A B C"),
+					  x = c(1, 2))
+	corp <- corpus(dat)
+	toks <- tokens(corp)
+	dfmt <- dfm(toks)
+
+	expect_equal(
+		as.matrix(seededlda:::tfm(dfmt, dict, residula = 1)),
+		matrix(c(2, 0, 0, 0, 2, 0, 0, 0 ,0), nrow = 3,
+			   dimnames = list(c("A", "B", "other"), c("a", "b", "c")))
+	)
+
+})
+
+
 test_that("levels is working", {
 
 	dict <- dictionary(list(A = list(

From 1f8e773fec8776f91b5dde1e53320508038cc8e5 Mon Sep 17 00:00:00 2001
From: Kohei Watanabe <watanabe.kohei@gmail.com>
Date: Mon, 26 Aug 2024 14:58:39 +0900
Subject: [PATCH 2/3] Save adjust_alpha in the object

---
 R/lda.R                                   | 3 ++-
 man/textmodel_lda.Rd                      | 3 ++-
 src/lda.cpp                               | 1 +
 tests/testthat/test-textmodel_lda.R       | 4 +++-
 tests/testthat/test-textmodel_seededlda.R | 5 +++--
 5 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/R/lda.R b/R/lda.R
index 024978f..0d2fb17 100644
--- a/R/lda.R
+++ b/R/lda.R
@@ -54,7 +54,8 @@
 #'   \item{k}{the number of topics.}
 #'   \item{last_iter}{the number of iterations in Gibbs sampling.}
 #'   \item{max_iter}{the maximum number of iterations in Gibbs sampling.}
-#'   \item{auto_iter}{`auto_iter` is used if `TRUE`.}
+#'   \item{auto_iter}{the use of `auto_iter`}
+#'   \item{adjust_alpha}{the value of `adjust_alpha`.}
 #'   \item{alpha}{the smoothing parameter for `theta`.}
 #'   \item{beta}{the smoothing parameter for `phi`.}
 #'   \item{epsilon}{the amount of adjustment for `adjust_alpha`.}
diff --git a/man/textmodel_lda.Rd b/man/textmodel_lda.Rd
index 50976e1..237f80a 100644
--- a/man/textmodel_lda.Rd
+++ b/man/textmodel_lda.Rd
@@ -59,7 +59,8 @@ Returns a list of model parameters:
 \item{k}{the number of topics.}
 \item{last_iter}{the number of iterations in Gibbs sampling.}
 \item{max_iter}{the maximum number of iterations in Gibbs sampling.}
-\item{auto_iter}{\code{auto_iter} is used if \code{TRUE}.}
+\item{auto_iter}{the use of \code{auto_iter}}
+\item{adjust_alpha}{the value of \code{adjust_alpha}.}
 \item{alpha}{the smoothing parameter for \code{theta}.}
 \item{beta}{the smoothing parameter for \code{phi}.}
 \item{epsilon}{the amount of adjustment for \code{adjust_alpha}.}
diff --git a/src/lda.cpp b/src/lda.cpp
index 254c314..d50d85d 100644
--- a/src/lda.cpp
+++ b/src/lda.cpp
@@ -38,6 +38,7 @@ List cpp_lda(arma::sp_mat &mt, int k, int max_iter, double min_delta,
                         Rcpp::Named("max_iter") = lda.max_iter,
                         Rcpp::Named("last_iter") = lda.iter,
                         Rcpp::Named("auto_iter") = (lda.min_delta == 0),
+                        Rcpp::Named("adjust_alpha") = lda.adjust,
                         Rcpp::Named("alpha") = as<NumericVector>(wrap(lda.alpha)),
                         Rcpp::Named("beta") = as<NumericVector>(wrap(lda.beta)),
                         Rcpp::Named("epsilon") = as<NumericVector>(wrap(lda.epsilon)),
diff --git a/tests/testthat/test-textmodel_lda.R b/tests/testthat/test-textmodel_lda.R
index 4e938ff..42775c0 100644
--- a/tests/testthat/test-textmodel_lda.R
+++ b/tests/testthat/test-textmodel_lda.R
@@ -72,7 +72,8 @@ test_that("LDA is working", {
     )
     expect_equal(
         names(lda),
-        c("k", "max_iter", "last_iter", "auto_iter", "alpha", "beta", "epsilon", "gamma",
+        c("k", "max_iter", "last_iter", "auto_iter", "adjust_alpha",
+          "alpha", "beta", "epsilon", "gamma",
           "phi", "theta", "words", "data", "batch_size", "call", "version")
     )
     expect_equal(lda$last_iter, 200)
@@ -120,6 +121,7 @@ test_that("adjust_alpha works", {
 
 	set.seed(1234)
 	lda <- textmodel_lda(dfmt, max_iter = 200, adjust_alpha = 0.5)
+	expect_equal(lda$adjust_alpha, 0.5)
 	expect_true(all(lda$alpha != 0.5))
 	expect_true(all(lda$alpha > 0.25))
 	expect_true(all(lda$epsilon > 0))
diff --git a/tests/testthat/test-textmodel_seededlda.R b/tests/testthat/test-textmodel_seededlda.R
index aa36486..3b4b685 100644
--- a/tests/testthat/test-textmodel_seededlda.R
+++ b/tests/testthat/test-textmodel_seededlda.R
@@ -104,8 +104,9 @@ test_that("seeded LDA is working", {
     )
     expect_equal(
         names(lda),
-        c("k", "max_iter", "last_iter", "auto_iter", "alpha", "beta", "epsilon", "gamma",
-          "phi", "theta", "words", "data", "batch_size", "call", "version",
+        c("k", "max_iter", "last_iter", "auto_iter", "adjust_alpha",
+          "alpha", "beta", "epsilon", "gamma", "phi", "theta",
+          "words", "data", "batch_size", "call", "version",
           "dictionary", "valuetype", "case_insensitive", "seeds",
           "residual", "weight")
     )

From f212d75a5405dd49e8a2d566d624c3ae05e21e2e Mon Sep 17 00:00:00 2001
From: Kohei Watanabe <watanabe.kohei@gmail.com>
Date: Wed, 4 Sep 2024 19:58:43 +0900
Subject: [PATCH 3/3] Include RcppArmadillo

---
 src/lda.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/lda.cpp b/src/lda.cpp
index d50d85d..7a3b080 100644
--- a/src/lda.cpp
+++ b/src/lda.cpp
@@ -1,3 +1,4 @@
+#include <RcppArmadillo.h>
 #include "lib.h"
 #include "dev.h"
 #include "lda.h"