Skip to content

Commit

Permalink
Update test
Browse files Browse the repository at this point in the history
  • Loading branch information
koheiw committed Sep 21, 2023
1 parent 2aebb0e commit a4d36ba
Showing 1 changed file with 18 additions and 11 deletions.
29 changes: 18 additions & 11 deletions tests/train.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,24 @@ toks <- tokens(corp, remove_punct = TRUE, remove_symbols = TRUE)
lis <- as.list(toks)
txt <- stringi::stri_c_list(lis, " ")

mod_lis <- word2vec(lis, dim = 15, iter = 20, min_count = 2,
verbose = FALSE, threads = 1)
#emb_lis <- as.matrix(mod_lis)
mod_lis <- word2vec(lis, dim = 50, iter = 5, min_count = 5,
verbose = TRUE, threads = 4)
emb_lis <- as.matrix(mod_lis)
dim(emb_lis)
predict(mod_lis, c("people", "American"), type = "nearest")

mod_txt <- word2vec(txt, dim = 15, iter = 20, split = c("[ \n]", "\n"), min_count = 2,
verbose = FALSE, threads = 1)
mod_txt <- word2vec(txt, dim = 50, iter = 5, split = c("[ \n]", "\n"), min_count = 5,
verbose = TRUE, threads = 4)
emb_txt <- as.matrix(mod_txt)
predict(mod_txt, c("citizen", "country"), type = "nearest")
dim(emb_txt)
predict(mod_txt, c("people", "American"), type = "nearest")


microbenchmark::microbenchmark(
"lis" = word2vec(lis, dim = 50, iter = 5, min_count = 5,
verbose = FALSE, threads = 10),
"txt" = word2vec(txt, dim = 50, iter = 5, split = c("[ \n]", "\n"), min_count = 5,
verbose = FALSE, threads = 10),
times = 10
)

n <- 100
thread <- 3
id <- 1
floor((n / thread) * id)
floor((n / thread) * (id + 1)) - 1

0 comments on commit a4d36ba

Please sign in to comment.