To test the performance of similarity calculation using vectorized method versus c loop methods.
require(quanteda, quietly = TRUE, warn.conflicts = FALSE)
## quanteda version 0.9.8.9024
data(SOTUCorpus, package = "quantedaData")
SOTUDfm <- dfm(SOTUCorpus, remove = stopwords("english"), stem = TRUE, verbose = FALSE)
##
microbenchmark::microbenchmark(eucQuanteda = textstat_dist(SOTUDfm, method = "euclidean",
margin = "documents"),
eucProxy = proxy::dist(as.matrix(SOTUDfm), "euclidean", diag = FALSE, upper = FALSE, p = 2),
eucStats = dist(as.matrix(SOTUDfm), method = "euclidean", diag = FALSE, upper = FALSE, p = 2),
times = 20, unit = "relative")
## Unit: relative
## expr min lq mean median uq max neval
## eucQuanteda 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 20
## eucProxy 41.88628 42.00283 41.19271 42.63788 41.55125 36.42565 20
## eucStats 97.81379 95.99923 90.97174 95.91534 91.84129 64.85205 20
require(quanteda, quietly = TRUE, warn.conflicts = FALSE)
data(SOTUCorpus, package = "quantedaData")
SOTUDfm <- dfm(SOTUCorpus, remove = stopwords("english"), stem = TRUE, verbose = FALSE)
##
microbenchmark::microbenchmark(jacQuanteda = textstat_simil(SOTUDfm, method ="jaccard",
margin = "documents", upper = TRUE),
jacProxy = proxy::simil(as.matrix(SOTUDfm), "jaccard", diag = FALSE, upper = FALSE),
times = 10, unit = "relative")
## Unit: relative
## expr min lq mean median uq max neval
## jacQuanteda 1.0000 1.00000 1.00000 1.00000 1.00000 1.00000 10
## jacProxy 33.1578 32.97427 32.20105 33.69403 34.10281 26.24232 10
To test the performance of similarity calculation using parallel method versus serial method.
require(quanteda, quietly = TRUE, warn.conflicts = FALSE)
data(SOTUCorpus, package = "quantedaData")
SOTUDfm <- dfm(SOTUCorpus, remove = stopwords("english"), stem = TRUE, verbose = FALSE)
##
microbenchmark::microbenchmark(manhaQuanteda = textstat_dist(SOTUDfm, method ="manhattan",
margin = "documents", upper = FALSE),
manhaProxy = proxy::dist(as.matrix(SOTUDfm), "Manhattan", diag = FALSE, upper = FALSE),
times = 10, unit = "relative")
## Unit: relative
## expr min lq mean median uq max neval
## manhaQuanteda 1.00000 1.00000 1.00000 1.0000 1.00000 1.00000 10
## manhaProxy 13.70755 12.88241 12.66993 12.4481 12.49964 12.43336 10