To test the performance of calculation applying on dense dfm matrix versus on sparse dfm matrix versus using multi-thread (on sparse dfm)
require(quanteda, quietly = TRUE, warn.conflicts = FALSE)
## quanteda version 0.9.9.43
## Using 7 of 8 cores for parallel computing
ie2010dfm <- dfm(data_corpus_irishbudget2010, verbose = FALSE)
wfm_sparse <- function(x){
textmodel_wordfish(x, dir = c(6,5), threads = 1)
}
wfm_sparse_mt <- function(x){
threads <- RcppParallel::defaultNumThreads()
quanteda_options(threads = threads - 1)
textmodel_wordfish(x, dir = c(6,5))
}
microbenchmark::microbenchmark(
wfm_s_mt = wfm_sparse_mt(ie2010dfm),
wfm_s = wfm_sparse(ie2010dfm),
wfm_d = textmodel_wordfish(ie2010dfm, dir = c(6,5), sparse = FALSE),
times=20, unit = 'relative')
## Unit: relative
## expr min lq mean median uq max neval
## wfm_s_mt 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 20
## wfm_s 3.233257 3.232424 3.233229 3.229867 3.227946 3.269056 20
## wfm_d 3.764306 3.761309 3.761267 3.762677 3.759394 3.765570 20