require(quanteda)
## Loading required package: quanteda
## Package version: 1.3.18
## Parallel computing: 2 of 8 threads used.
## See https://quanteda.io for tutorials and examples.
## 
## Attaching package: 'quanteda'
## The following object is masked from 'package:utils':
## 
##     View
quanteda_options(threads = 8)
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: KDE neon User Edition 5.14
## 
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] quanteda_1.3.18
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.0         pillar_1.3.0       compiler_3.4.4    
##  [4] plyr_1.8.4         bindr_0.1.1        tools_3.4.4       
##  [7] stopwords_0.9.0    digest_0.6.18      evaluate_0.12     
## [10] tibble_1.4.2       gtable_0.2.0       lattice_0.20-35   
## [13] pkgconfig_2.0.2    rlang_0.3.0.1      fastmatch_1.1-0   
## [16] Matrix_1.2-12      yaml_2.2.0         xfun_0.4          
## [19] bindrcpp_0.2.2     stringr_1.3.1      dplyr_0.7.8       
## [22] knitr_1.21         grid_3.4.4         tidyselect_0.2.5  
## [25] glue_1.3.0         data.table_1.11.8  R6_2.3.0          
## [28] rmarkdown_1.11     spacyr_0.9.91      ggplot2_3.1.0     
## [31] purrr_0.2.5        magrittr_1.5       scales_1.0.0      
## [34] htmltools_0.3.6    assertthat_0.2.0   colorspace_1.3-2  
## [37] stringi_1.2.4      lazyeval_0.2.1     RcppParallel_4.4.2
## [40] munsell_0.5.0      crayon_1.3.4
corp <- readRDS("/home/kohei/Documents/Brexit/Data/data_corpus_guardian.RDS")

system.time(
    corp2 <- corpus_sample(corp, 5000)
)
##    user  system elapsed 
##  10.126   0.539  10.669
system.time(
    toks <- tokens(corp)
)
##    user  system elapsed 
## 219.813   4.588 202.292
system.time(
    toks2 <- tokens_sample(toks, 5000)
)
##    user  system elapsed 
##   1.028   0.001   1.009
system.time(
    toks3 <- tokens_remove(toks, stopwords("en"))
)
##    user  system elapsed 
##  25.548   0.573   9.560
system.time(
    mt <- dfm(toks)
)
##    user  system elapsed 
##  24.539   2.472  26.250
system.time(
    mt2 <- dfm_group(mt, "date")
)
##    user  system elapsed 
##   8.049   1.599   9.669
system.time(
    mt3 <- dfm_remove(mt, stopwords("en"))
)
##    user  system elapsed 
##   2.244   0.232   2.477