To test the performance of calculation applying on dense dfm matrix versus on sparse dfm matrix (with truncated svd).
require(quanteda, quietly = TRUE, warn.conflicts = FALSE)
## quanteda version 0.9.9.46
## Using 7 of 8 cores for parallel computing
ie2010dfm <- dfm(data_corpus_irishbudget2010, verbose = FALSE)
ie2010dfm_dense <- as.matrix(ie2010dfm)
microbenchmark::microbenchmark(
ca = ca::ca(ie2010dfm_dense),
ca_textmodel = textmodel_ca(ie2010dfm),
times=10, unit = 'relative')
## Unit: relative
## expr min lq mean median uq max neval
## ca 1.842227 1.675758 1.354429 1.758322 1.731018 1.100471 10
## ca_textmodel 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 10
data(SOTUCorpus, package = "quantedaData")
SOTUdfm <- dfm(SOTUCorpus)
SOTUdfm_dense <- as.matrix(SOTUdfm)
microbenchmark::microbenchmark(
ca = ca::ca(SOTUdfm_dense),
ca_textmodel = textmodel_ca(SOTUdfm),
times=10, unit = 'relative')
## Unit: relative
## expr min lq mean median uq max neval
## ca 1.902012 1.805813 1.7407 1.742105 1.702073 1.576118 10
## ca_textmodel 1.000000 1.000000 1.0000 1.000000 1.000000 1.000000 10