add_cols()
: Add one or more columns to an existing data frame. If
specified .before
or .after
columns does not exist, columns are
appended at the end of the data. Return a data frame with all the original
columns in .data
plus the columns declared in ...
. In
add_cols()
columns in .data
are available for the expressions.
So, it is possible to add a column based on existing data.
add_rows()
: Add one or more rows to an existing data frame. If
specified .before
or .after
rows does not exist, rows are
appended at the end of the data. Return a data frame with all the original
rows in .data
plus the rows declared in ...
.
all_pairs()
: Get all the possible pairs between the levels of a
factor.
colnames_to_lower()
: Translate all column names to lower case.
colnames_to_upper()
: Translate all column names to upper case.
colnames_to_title()
: Translate all column names to title case.
column_exists()
: Checks if a column exists in a data frame. Return a
logical value.
columns_to_first()
: Move columns to first positions in .data
.
columns_to_last()
: Move columns to last positions in .data
.
concatenate()
: Concatenate columns of a data frame. If drop =
TRUE
then the existing variables are dropped. If pull = TRUE
then the
concatenated variable is pull out to a vector. This is specially useful when
using concatenate
to add columns to a data frame with add_cols()
.
get_levels()
: Get the levels of a factor variable.
get_level_size()
: Get the size of each level of a factor variable.
remove_cols()
: Remove one or more columns from a data frame.
remove_rows()
: Remove one or more rows from a data frame.
reorder_cols()
: Reorder columns in a data frame.
select_cols()
: Select one or more columns from a data frame.
select_first_col()
: Select first variable, possibly with an offset.
select_last_col()
: Select last variable, possibly with an offset.
select_numeric_cols()
: Select all the numeric columns of a data
frame.
select_non_numeric_cols()
: Select all the non-numeric columns of a
data frame.
select_rows()
: Select one or more rows from a data frame.
add_cols(.data, ..., .before = NULL, .after = NULL) add_rows(.data, ..., .before = NULL, .after = NULL) all_pairs(.data, levels) colnames_to_lower(.data) colnames_to_upper(.data) colnames_to_title(.data) column_to_first(.data, ...) column_to_last(.data, ...) column_exists(.data, cols) concatenate( .data, ..., new_var = new_var, sep = "_", drop = FALSE, pull = FALSE, .before = NULL, .after = NULL ) get_levels(.data, group) get_level_size(.data, group) reorder_cols(.data, ..., .before = NULL, .after = NULL) remove_cols(.data, ...) remove_rows(.data, ...) select_first_col(.data, offset = NULL) select_last_col(.data, offset = NULL) select_numeric_cols(.data) select_non_numeric_cols(.data) select_cols(.data, ...) select_rows(.data, ...)
.data | A data frame |
---|---|
... | The argument depends on the function used.
|
.before, .after | For |
levels | The levels of a factor or a numeric vector. |
cols | A quoted variable name to check if it exists in |
new_var | The name of the new variable containing the concatenated
values. Defaults to |
sep | The separator to appear between concatenated variables. Defaults to "_". |
drop | Logical argument. If |
pull | Logical argument. If |
group | A factor variable to get the levels. |
offset | Set it to n to select the nth variable from the
end (for |
Tiago Olivoto tiagoolivoto@gmail.com
# \donttest{ library(metan) ################# Adding columns ################# # Variables x and y .after last column data_ge %>% add_cols(x = 10, y = 30)#> # A tibble: 420 x 7 #> ENV GEN REP GY HM x y #> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> #> 1 E1 G1 1 2.17 44.9 10 30 #> 2 E1 G1 2 2.50 46.9 10 30 #> 3 E1 G1 3 2.43 47.8 10 30 #> 4 E1 G2 1 3.21 45.2 10 30 #> 5 E1 G2 2 2.93 45.3 10 30 #> 6 E1 G2 3 2.56 45.5 10 30 #> 7 E1 G3 1 2.77 46.7 10 30 #> 8 E1 G3 2 3.62 43.2 10 30 #> 9 E1 G3 3 2.28 47.8 10 30 #> 10 E1 G4 1 2.36 47.9 10 30 #> # ... with 410 more rows# Variables x and y .before the variable GEN data_ge %>% add_cols(x = 10, y = 30, .before = GEN)#> # A tibble: 420 x 7 #> ENV x y GEN REP GY HM #> <fct> <dbl> <dbl> <fct> <fct> <dbl> <dbl> #> 1 E1 10 30 G1 1 2.17 44.9 #> 2 E1 10 30 G1 2 2.50 46.9 #> 3 E1 10 30 G1 3 2.43 47.8 #> 4 E1 10 30 G2 1 3.21 45.2 #> 5 E1 10 30 G2 2 2.93 45.3 #> 6 E1 10 30 G2 3 2.56 45.5 #> 7 E1 10 30 G3 1 2.77 46.7 #> 8 E1 10 30 G3 2 3.62 43.2 #> 9 E1 10 30 G3 3 2.28 47.8 #> 10 E1 10 30 G4 1 2.36 47.9 #> # ... with 410 more rows# Creating a new variable based on the existing ones. data_ge %>% add_cols(GY2 = GY^2, GY2_HM = GY2 + HM, .after = GY)#> # A tibble: 420 x 7 #> ENV GEN REP GY GY2 GY2_HM HM #> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> #> 1 E1 G1 1 2.17 4.70 49.6 44.9 #> 2 E1 G1 2 2.50 6.27 53.2 46.9 #> 3 E1 G1 3 2.43 5.89 53.6 47.8 #> 4 E1 G2 1 3.21 10.3 55.5 45.2 #> 5 E1 G2 2 2.93 8.60 53.9 45.3 #> 6 E1 G2 3 2.56 6.58 52.1 45.5 #> 7 E1 G3 1 2.77 7.67 54.4 46.7 #> 8 E1 G3 2 3.62 13.1 56.3 43.2 #> 9 E1 G3 3 2.28 5.18 52.9 47.8 #> 10 E1 G4 1 2.36 5.57 53.5 47.9 #> # ... with 410 more rows############### Reordering columns ############### reorder_cols(data_ge2, NKR, .before = ENV)#> # A tibble: 156 x 18 #> NKR ENV GEN REP PH EH EP EL ED CL CD CW KW #> <dbl> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 36.6 A1 H1 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. #> 2 31.4 A1 H1 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. #> 3 31.8 A1 H1 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. #> 4 32.8 A1 H10 1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. #> 5 28 A1 H10 2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. #> 6 32.8 A1 H10 3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. #> 7 34.6 A1 H11 1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. #> 8 34.4 A1 H11 2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. #> 9 34.8 A1 H11 3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. #> 10 31.6 A1 H12 1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. #> # ... with 146 more rows, and 5 more variables: NR <dbl>, CDED <dbl>, #> # PERK <dbl>, TKW <dbl>, NKE <dbl>#> # A tibble: 156 x 18 #> PH EH EP EL ED CL CD CW KW NR NKR CDED PERK #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. 15.6 36.6 0.538 89.6 #> 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. 16 31.4 0.551 89.5 #> 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. 17.2 31.8 0.561 89.7 #> 4 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. 15.6 32.8 0.586 87.9 #> 5 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. 17.6 28 0.607 89.7 #> 6 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. 16.8 32.8 0.577 88.5 #> 7 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. 16.8 34.6 0.594 89.1 #> 8 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. 13.6 34.4 0.608 88.3 #> 9 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. 15.2 34.8 0.576 89.0 #> 10 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. 14.8 31.6 0.597 88.7 #> # ... with 146 more rows, and 5 more variables: TKW <dbl>, NKE <dbl>, #> # ENV <fct>, GEN <fct>, REP <fct>######## Selecting and removing columns ########## select_cols(data_ge2, GEN, REP)#> # A tibble: 156 x 2 #> GEN REP #> <fct> <fct> #> 1 H1 1 #> 2 H1 2 #> 3 H1 3 #> 4 H10 1 #> 5 H10 2 #> 6 H10 3 #> 7 H11 1 #> 8 H11 2 #> 9 H11 3 #> 10 H12 1 #> # ... with 146 more rowsremove_cols(data_ge2, GEN, REP)#> # A tibble: 156 x 16 #> ENV PH EH EP EL ED CL CD CW KW NR NKR CDED #> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. 15.6 36.6 0.538 #> 2 A1 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. 16 31.4 0.551 #> 3 A1 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. 17.2 31.8 0.561 #> 4 A1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. 15.6 32.8 0.586 #> 5 A1 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. 17.6 28 0.607 #> 6 A1 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. 16.8 32.8 0.577 #> 7 A1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. 16.8 34.6 0.594 #> 8 A1 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. 13.6 34.4 0.608 #> 9 A1 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. 15.2 34.8 0.576 #> 10 A1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. 14.8 31.6 0.597 #> # ... with 146 more rows, and 3 more variables: PERK <dbl>, TKW <dbl>, #> # NKE <dbl>########## Selecting and removing rows ########### select_rows(data_ge2, 2:3)#> # A tibble: 2 x 18 #> ENV GEN REP PH EH EP EL ED CL CD CW KW NR #> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A1 H1 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 21.4 184. 16 #> 2 A1 H1 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 24.0 208. 17.2 #> # ... with 5 more variables: NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, #> # NKE <dbl>remove_rows(data_ge2, 2:3)#> # A tibble: 154 x 18 #> ENV GEN REP PH EH EP EL ED CL CD CW KW NR #> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A1 H1 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 25.1 217. 15.6 #> 2 A1 H10 1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 26.2 194. 15.6 #> 3 A1 H10 2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 20.7 176. 17.6 #> 4 A1 H10 3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 26.8 207. 16.8 #> 5 A1 H11 1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 26.2 217. 16.8 #> 6 A1 H11 2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 24.1 181. 13.6 #> 7 A1 H11 3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 20.5 166. 15.2 #> 8 A1 H12 1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 20.1 161. 14.8 #> 9 A1 H12 2 2.56 1.56 0.616 15.7 49.9 29.9 16.2 24.0 188. 17.2 #> 10 A1 H12 3 2.79 1.53 0.546 15.0 52.7 31.4 15.2 32.9 193. 20 #> # ... with 144 more rows, and 5 more variables: NKR <dbl>, CDED <dbl>, #> # PERK <dbl>, TKW <dbl>, NKE <dbl>########### Concatenating columns ################ concatenate(data_ge, ENV, GEN, REP)#> # A tibble: 420 x 6 #> ENV GEN REP GY HM new_var #> <fct> <fct> <fct> <dbl> <dbl> <chr> #> 1 E1 G1 1 2.17 44.9 E1_G1_1 #> 2 E1 G1 2 2.50 46.9 E1_G1_2 #> 3 E1 G1 3 2.43 47.8 E1_G1_3 #> 4 E1 G2 1 3.21 45.2 E1_G2_1 #> 5 E1 G2 2 2.93 45.3 E1_G2_2 #> 6 E1 G2 3 2.56 45.5 E1_G2_3 #> 7 E1 G3 1 2.77 46.7 E1_G3_1 #> 8 E1 G3 2 3.62 43.2 E1_G3_2 #> 9 E1 G3 3 2.28 47.8 E1_G3_3 #> 10 E1 G4 1 2.36 47.9 E1_G4_1 #> # ... with 410 more rowsconcatenate(data_ge, ENV, GEN, REP, drop = TRUE)#> # A tibble: 420 x 1 #> new_var #> <chr> #> 1 E1_G1_1 #> 2 E1_G1_2 #> 3 E1_G1_3 #> 4 E1_G2_1 #> 5 E1_G2_2 #> 6 E1_G2_3 #> 7 E1_G3_1 #> 8 E1_G3_2 #> 9 E1_G3_3 #> 10 E1_G4_1 #> # ... with 410 more rows# Combine with add_cols() and replace_string() data_ge2 %>% add_cols(ENV_GEN = concatenate(., ENV, GEN, pull = TRUE), .after = GEN) %>% replace_string(ENV_GEN, pattern = "H", replacement = "HYB_", .after = ENV_GEN)#> # A tibble: 156 x 20 #> ENV GEN ENV_GEN new_var REP PH EH EP EL ED CL CD #> <fct> <fct> <chr> <chr> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A1 H1 A1_H1 A1_HYB~ 1 2.61 1.71 0.658 16.1 52.2 28.1 16.3 #> 2 A1 H1 A1_H1 A1_HYB~ 2 2.87 1.76 0.628 14.2 50.3 27.6 14.5 #> 3 A1 H1 A1_H1 A1_HYB~ 3 2.68 1.58 0.591 16.0 50.7 28.4 16.4 #> 4 A1 H10 A1_H10 A1_HYB~ 1 2.83 1.64 0.581 16.7 54.1 31.7 17.4 #> 5 A1 H10 A1_H10 A1_HYB~ 2 2.79 1.71 0.616 14.9 52.7 32.0 15.5 #> 6 A1 H10 A1_H10 A1_HYB~ 3 2.72 1.51 0.554 16.7 52.7 30.4 17.5 #> 7 A1 H11 A1_H11 A1_HYB~ 1 2.75 1.51 0.549 17.4 51.7 30.6 18.0 #> 8 A1 H11 A1_H11 A1_HYB~ 2 2.72 1.56 0.573 16.7 47.2 28.7 17.2 #> 9 A1 H11 A1_H11 A1_HYB~ 3 2.77 1.67 0.600 15.8 47.9 27.6 16.4 #> 10 A1 H12 A1_H12 A1_HYB~ 1 2.73 1.54 0.563 14.9 47.5 28.2 15.5 #> # ... with 146 more rows, and 8 more variables: CW <dbl>, KW <dbl>, NR <dbl>, #> # NKR <dbl>, CDED <dbl>, PERK <dbl>, TKW <dbl>, NKE <dbl>########### formating column names ############### # Creating data with messy column names df <- head(data_ge, 3) colnames(df) <- c("Env", "gen", "Rep", "GY", "hm") df#> # A tibble: 3 x 5 #> Env gen Rep GY hm #> <fct> <fct> <fct> <dbl> <dbl> #> 1 E1 G1 1 2.17 44.9 #> 2 E1 G1 2 2.50 46.9 #> 3 E1 G1 3 2.43 47.8colnames_to_lower(df)#> # A tibble: 3 x 5 #> env gen rep gy hm #> <fct> <fct> <fct> <dbl> <dbl> #> 1 E1 G1 1 2.17 44.9 #> 2 E1 G1 2 2.50 46.9 #> 3 E1 G1 3 2.43 47.8colnames_to_upper(df)#> # A tibble: 3 x 5 #> ENV GEN REP GY HM #> <fct> <fct> <fct> <dbl> <dbl> #> 1 E1 G1 1 2.17 44.9 #> 2 E1 G1 2 2.50 46.9 #> 3 E1 G1 3 2.43 47.8colnames_to_title(df)#> # A tibble: 3 x 5 #> Env Gen Rep Gy Hm #> <fct> <fct> <fct> <dbl> <dbl> #> 1 E1 G1 1 2.17 44.9 #> 2 E1 G1 2 2.50 46.9 #> 3 E1 G1 3 2.43 47.8################### Adding rows ################## data_ge %>% add_rows(GY = 10.3, HM = 100.11, .after = 1)#> # A tibble: 421 x 5 #> ENV GEN REP GY HM #> <fct> <fct> <fct> <dbl> <dbl> #> 1 E1 G1 1 2.17 44.9 #> 2 NA NA NA 10.3 100. #> 3 E1 G1 2 2.50 46.9 #> 4 E1 G1 3 2.43 47.8 #> 5 E1 G2 1 3.21 45.2 #> 6 E1 G2 2 2.93 45.3 #> 7 E1 G2 3 2.56 45.5 #> 8 E1 G3 1 2.77 46.7 #> 9 E1 G3 2 3.62 43.2 #> 10 E1 G3 3 2.28 47.8 #> # ... with 411 more rows########## checking if a column exists ########### column_exists(data_g, "GEN")#> [1] TRUE####### get the levels and size of levels ######## get_levels(data_g, GEN)#> [1] "H1" "H10" "H11" "H12" "H13" "H2" "H3" "H4" "H5" "H6" "H7" "H8" #> [13] "H9"get_level_size(data_g, GEN)#> H1 H10 H11 H12 H13 H2 H3 H4 H5 H6 H7 H8 H9 #> 3 3 3 3 3 3 3 3 3 3 3 3 3############## all possible pairs ################ all_pairs(data_g, GEN)#> V1 V2 #> 1 H1 H10 #> 2 H1 H11 #> 3 H1 H12 #> 4 H1 H13 #> 5 H1 H2 #> 6 H1 H3 #> 7 H1 H4 #> 8 H1 H5 #> 9 H1 H6 #> 10 H1 H7 #> 11 H1 H8 #> 12 H1 H9 #> 13 H10 H11 #> 14 H10 H12 #> 15 H10 H13 #> 16 H10 H2 #> 17 H10 H3 #> 18 H10 H4 #> 19 H10 H5 #> 20 H10 H6 #> 21 H10 H7 #> 22 H10 H8 #> 23 H10 H9 #> 24 H11 H12 #> 25 H11 H13 #> 26 H11 H2 #> 27 H11 H3 #> 28 H11 H4 #> 29 H11 H5 #> 30 H11 H6 #> 31 H11 H7 #> 32 H11 H8 #> 33 H11 H9 #> 34 H12 H13 #> 35 H12 H2 #> 36 H12 H3 #> 37 H12 H4 #> 38 H12 H5 #> 39 H12 H6 #> 40 H12 H7 #> 41 H12 H8 #> 42 H12 H9 #> 43 H13 H2 #> 44 H13 H3 #> 45 H13 H4 #> 46 H13 H5 #> 47 H13 H6 #> 48 H13 H7 #> 49 H13 H8 #> 50 H13 H9 #> 51 H2 H3 #> 52 H2 H4 #> 53 H2 H5 #> 54 H2 H6 #> 55 H2 H7 #> 56 H2 H8 #> 57 H2 H9 #> 58 H3 H4 #> 59 H3 H5 #> 60 H3 H6 #> 61 H3 H7 #> 62 H3 H8 #> 63 H3 H9 #> 64 H4 H5 #> 65 H4 H6 #> 66 H4 H7 #> 67 H4 H8 #> 68 H4 H9 #> 69 H5 H6 #> 70 H5 H7 #> 71 H5 H8 #> 72 H5 H9 #> 73 H6 H7 #> 74 H6 H8 #> 75 H6 H9 #> 76 H7 H8 #> 77 H7 H9 #> 78 H8 H9########## select numeric variables only ######### select_numeric_cols(data_g)#> # A tibble: 39 x 15 #> PH EH EP EL ED CL CD CW KW NR NKR CDED PERK #> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 2.11 1.05 0.497 15.7 49.9 30.5 16.6 28.6 164. 15.6 31.2 0.612 85.1 #> 2 2.20 1.09 0.492 13.7 49.2 30.5 14.7 22.3 130. 16.4 24.8 0.619 85.2 #> 3 2.29 1.15 0.502 15.1 52.6 31.7 16.2 29.6 176. 15.6 29.2 0.603 85.9 #> 4 1.79 0.888 0.514 13.9 44.1 26.2 15.0 12.9 116. 14.8 33 0.596 89.8 #> 5 2.05 1.03 0.504 13.6 43.9 23.5 14.4 11.5 118. 16 32.4 0.535 91.1 #> 6 2.27 1.11 0.491 14.5 43.7 24.6 16.1 12.5 128. 15.2 34.6 0.566 90.7 #> 7 1.71 0.808 0.489 15.5 45.2 25.0 16.7 15.2 140. 15.6 36 0.552 90.3 #> 8 2.09 1.06 0.509 12.2 46.9 26.5 14.3 13.5 114. 16.8 26.2 0.566 89.3 #> 9 2.5 1.44 0.577 15.0 49.0 27.5 15.2 19.4 168. 16.4 35 0.562 89.6 #> 10 2.52 1.52 0.601 14.4 49.2 28.4 15 18.2 153. 16.4 32 0.577 89.2 #> # ... with 29 more rows, and 2 more variables: TKW <dbl>, NKE <dbl>select_non_numeric_cols(data_g)#> # A tibble: 39 x 2 #> GEN REP #> <fct> <fct> #> 1 H1 1 #> 2 H1 2 #> 3 H1 3 #> 4 H10 1 #> 5 H10 2 #> 6 H10 3 #> 7 H11 1 #> 8 H11 2 #> 9 H11 3 #> 10 H12 1 #> # ... with 29 more rows# }