Split a data frame z
into two data frames x
and y
so
that merge(x, y)
is z
.
unmerge(z, by)
z | data frame |
---|---|
by | vector of names of columns in |
list with two elements x
and y
each of which are data
frames containing at least the columns given in by
.
z <- data.frame( name = c("peter", "peter", "paul", "mary", "paul", "mary"), age = c(42, 42, 31, 28, 31, 28), height = c(181, 181, 178, 172, 178, 172), subject = c("maths", "bio", "bio", "bio", "chem", "maths"), year = c(2016, 2017, 2017, 2017, 2015, 2016), mark = c("A", "B", "B", "A", "C", "b") ) # What fields seem to be properties of objects identified by name? # -> Age and height are fix properties of the persons identified by name (result1 <- unmerge(z, "name"))#> $x #> name age height #> 1 peter 42 181 #> 3 paul 31 178 #> 4 mary 28 172 #> #> $y #> name subject year mark #> 1 peter maths 2016 A #> 2 peter bio 2017 B #> 3 paul bio 2017 B #> 4 mary bio 2017 A #> 5 paul chem 2015 C #> 6 mary maths 2016 b #># What fields seem to be properties of objects identified by subject? # -> It seems that the subjects have been tested in different years (result2 <- unmerge(z, "subject"))#> $x #> subject year #> 1 maths 2016 #> 2 bio 2017 #> 5 chem 2015 #> #> $y #> subject name age height mark #> 1 maths peter 42 181 A #> 2 bio peter 42 181 B #> 3 bio paul 31 178 B #> 4 bio mary 28 172 A #> 5 chem paul 31 178 C #> 6 maths mary 28 172 b #># Test if merge(result$x, result$y) results in z y1 <- merge(result1$x, result1$y) y2 <- merge(result2$x, result2$y) columns <- sort(names(z)) identical(fullySorted(z[, columns]), fullySorted(y1[, columns])) # TRUE#> [1] TRUE#> [1] TRUE