require(quanteda)
## Loading required package: quanteda
## Package version: 1.3.18
## Parallel computing: 2 of 8 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
## 
##     View
quanteda_options(threads = 8)
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: KDE neon User Edition 5.14
## 
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] quanteda_1.3.18
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.0         pillar_1.3.0       compiler_3.4.4    
##  [4] plyr_1.8.4         bindr_0.1.1        tools_3.4.4       
##  [7] stopwords_0.9.0    digest_0.6.18      lubridate_1.7.4   
## [10] evaluate_0.12      tibble_1.4.2       gtable_0.2.0      
## [13] lattice_0.20-35    pkgconfig_2.0.2    rlang_0.3.0.1     
## [16] fastmatch_1.1-0    Matrix_1.2-12      yaml_2.2.0        
## [19] xfun_0.4           bindrcpp_0.2.2     stringr_1.3.1     
## [22] dplyr_0.7.8        knitr_1.21         grid_3.4.4        
## [25] tidyselect_0.2.5   glue_1.3.0         data.table_1.11.8 
## [28] R6_2.3.0           rmarkdown_1.11     spacyr_0.9.91     
## [31] ggplot2_3.1.0      purrr_0.2.5        magrittr_1.5      
## [34] scales_1.0.0       htmltools_0.3.6    assertthat_0.2.0  
## [37] colorspace_1.3-2   stringi_1.2.4      lazyeval_0.2.1    
## [40] RcppParallel_4.4.2 munsell_0.5.0      crayon_1.3.4
corp <- readRDS("/home/kohei/Documents/Brexit/Data/data_corpus_guardian.RDS")

system.time(
    corp2 <- corpus_sample(corp, 5000)
)
##    user  system elapsed 
##    0.01    0.00    0.01
system.time(
    toks <- tokens(corp)
)
##    user  system elapsed 
## 214.611   6.907 198.440
system.time(
    toks2 <- tokens_sample(toks, 5000)
)
##    user  system elapsed 
##   0.880   0.019   0.882
system.time(
    toks3 <- tokens_remove(toks, stopwords("en"))
)
##    user  system elapsed 
##  25.145   0.491   9.432
system.time(
    mt <- dfm(toks)
)
##    user  system elapsed 
##  23.147   3.014  25.301
system.time(
    mt2 <- dfm_group(mt, "date")
)
##    user  system elapsed 
##   9.426   1.753  11.183
system.time(
    mt3 <- dfm_remove(mt, stopwords("en"))
)
##    user  system elapsed 
##   2.360   0.223   2.586