recombine documents tokens by groups

tokens_group(x, groups = NULL)

Arguments

x

tokens object

groups

either: a character vector containing the names of document variables to be used for grouping; or a factor or object that can be coerced into a factor equal in length or rows to the number of documents. See groups for details.

Examples

# dfm_group examples corp <- corpus(c("a a b", "a b c c", "a c d d", "a c c d"), docvars = data.frame(grp = c("grp1", "grp1", "grp2", "grp2"))) toks <- tokens(corp) quanteda:::tokens_group(toks, groups = "grp")
#> tokens from 2 documents. #> grp1 : #> [1] "a" "a" "b" "a" "b" "c" "c" #> #> grp2 : #> [1] "a" "c" "d" "d" "a" "c" "c" "d" #>
quanteda:::tokens_group(toks, groups = c(1, 1, 2, 2))
#> tokens from 2 documents. #> 1 : #> [1] "a" "a" "b" "a" "b" "c" "c" #> #> 2 : #> [1] "a" "c" "d" "d" "a" "c" "c" "d" #>