recombine documents tokens by groups
tokens_group(x, groups = NULL)
x | tokens object |
---|---|
groups | either: a character vector containing the names of document variables to be used for grouping; or a factor or object that can be coerced into a factor equal in length or rows to the number of documents. See groups for details. |
# dfm_group examples corp <- corpus(c("a a b", "a b c c", "a c d d", "a c c d"), docvars = data.frame(grp = c("grp1", "grp1", "grp2", "grp2"))) toks <- tokens(corp) quanteda:::tokens_group(toks, groups = "grp")#> tokens from 2 documents. #> grp1 : #> [1] "a" "a" "b" "a" "b" "c" "c" #> #> grp2 : #> [1] "a" "c" "d" "d" "a" "c" "c" "d" #>quanteda:::tokens_group(toks, groups = c(1, 1, 2, 2))#> tokens from 2 documents. #> 1 : #> [1] "a" "a" "b" "a" "b" "c" "c" #> #> 2 : #> [1] "a" "c" "d" "d" "a" "c" "c" "d" #>