Select helpers

The package metan reexports the tidy select helpers and implements own select helpers based on operations with prefixes and suffixes (difference_var(), intersect_var(), and union_var()), length of variable names (width_of(), width_greater_than(), and width_less_than()), and on case type (lower_case_only(), upper_case_only(), and title_case_only()).

Variables that start with a prefix and ends with a suffix.

Here, we will select the variables from data_ge2 that start with “C” and ends with “D”. Just to reduce the length of outputs, only three rows are selected

library(metan)
data_sel <- head(data_ge2, 3)
data_sel %>% 
  select_cols(intersect_var("C", "D")) %>% 
  print_table()

Variables that start with a prefix OR ends with a suffix.

The following code select variables that start with “C” or ends with “D”.

data_sel %>% 
  select_cols(union_var("C", "D")) %>% 
  print_table()

Variables that start with a prefix AND NOT ends with a suffix.

The following code select variables that start with “C” and not ends with “D”.

data_sel %>% 
  select_cols(difference_var("C", "D")) %>% 
  print_table()

Selection based on length of column names.

  • Select variables with an specific name length (four letters)
data_sel %>% 
  select_cols(width_of(4)) %>% 
  print_table()
  • Select variables with width less than n.
data_sel %>% 
  select_cols(width_less_than(3)) %>% 
  print_table()
  • Select variables with width greater than n.
data_sel %>% 
  select_cols(width_greater_than(2)) %>% 
  print_table()

Select variables by case type

Let’s create data frame with ‘messy’ columnn names.

df <- head(data_ge, 3)
colnames(df) <- c("Env", "gen", "Rep", "GY", "hm")
select_cols(df, lower_case_only()) %>% print_table()
select_cols(df, upper_case_only()) %>% print_table()
select_cols(df, title_case_only()) %>% print_table()

Remove rows or colums wih NA values

The functions remove_rows_na() and remove_rows_na() are used to remove rows and columns with NA values, respectively.

data_with_na <- data_g
data_with_na[c(1, 5, 10), c(3:5, 10:15)] <- NA
print_table(data_with_na)
remove_cols_na(data_with_na) %>% print_table()
# Warning: Column(s) PH, EH, EP, CW, KW, NR, NKR, CDED, PERK with NA values
# deleted.
remove_rows_na(data_with_na) %>% print_table()
# Warning: Row(s) 1, 5, 10 with NA values deleted.

Bind cross-validation objects



AMMI0 <- cv_ammi(data_ge, ENV, GEN, REP, GY, naxis = 0)
AMMI2 <- cv_ammi(data_ge, ENV, GEN, REP, GY, naxis = 2)
AMMI9 <- cv_ammi(data_ge, ENV, GEN, REP, GY, naxis = 9)
AMMIF <- cv_ammif(data_ge, ENV, GEN, REP, GY)
BLUP_g <- cv_blup(data_ge, ENV, GEN, REP, GY)
bind1 <- bind_cv(AMMI0, AMMI2, AMMI9)
bind2 <- bind_cv(AMMI0, AMMI2, AMMI9, bind = "means")

Split a dataframe into subsets grouped by one or more factors

Group data and exclude all non-numeric variables

g1 <- split_factors(data_ge, ENV)
is.split_factors(g1)
# [1] TRUE

Group data and keep all original variables

g2 <- split_factors(data_ge, ENV, GEN, keep_factors = TRUE)
print_table(g2[[1]])

Group a data frame using all factor variables

g3 <- as.split_factors(CO2)
names(g3)
#  [1] "Qn1 | Quebec | nonchilled"      "Qn2 | Quebec | nonchilled"     
#  [3] "Qn3 | Quebec | nonchilled"      "Qc1 | Quebec | chilled"        
#  [5] "Qc3 | Quebec | chilled"         "Qc2 | Quebec | chilled"        
#  [7] "Mn3 | Mississippi | nonchilled" "Mn2 | Mississippi | nonchilled"
#  [9] "Mn1 | Mississippi | nonchilled" "Mc2 | Mississippi | chilled"   
# [11] "Mc3 | Mississippi | chilled"    "Mc1 | Mississippi | chilled"

Make a two-way table based on categorical and numerical arguments

print_table(data_ge)
matrix <- make_mat(data_ge, row = GEN, col = ENV, val = GY)
print_table(matrix, rownames = TRUE)

Make upper and lower triangular matrices

cor_mat <- corr_coef(data_ge2, EP, EL, ED, EL, CD, CL)$cor

# Upper triangular
upp_mat <- make_upper_tri(cor_mat)
print_table(upp_mat, rownames = TRUE)

# Lower triangular
low_mat <- make_lower_tri(cor_mat)
print_table(low_mat, rownames = TRUE)

Make a symmetric matrix

sym <- make_sym(low_mat)
print_table(sym, rownames = TRUE)

Reorder a correlation matrix

Reorder the correlation matrix according to the correlation coefficient by using hclust for hierarchical clustering order. This is useful to identify the hidden pattern in the matrix.

print_table(cor_mat, rownames = TRUE)
reorder_cormat(cor_mat) %>% print_table(rownames = TRUE)

Compute harmonic and geometric means

num <- c(1:20, 30, 50)
hmean(num)
# [1] 6.025626
gmean(num)
# [1] 9.552141

hmean(data_ge2) %>% round(2)
# # A tibble: 1 x 15
#      PH    EH    EP    EL    ED    CL    CD    CW    KW    NR   NKR  CDED  PERK
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1  2.44  1.28  0.53  15.1  49.4  28.8  15.9  23.0  166.  16.0  31.9 0.580  87.4
# # ... with 2 more variables: TKW <dbl>, NKE <dbl>
gmean(data_ge2, EP, EL, CL)
# # A tibble: 1 x 3
#      EP    EL    CL
#   <dbl> <dbl> <dbl>
# 1 0.534  15.1  28.9

Generate pairwise combinations of variables by applying one function to each pair

data <- data.frame(A = runif(n = 5, min = 3, max = 30),
                   B = runif(n = 5, min = 1, max = 10),
                   C = runif(n = 5, min = 9, max = 90),
                   D = runif(n = 5, min = 1, max = 90),
                   E = runif(n = 5, min = 5, max = 10))
c1 <- comb_vars(data)
print_table(c1)

c2 <- comb_vars(data, FUN = "*", order = "first")
print_table(c2)

Combining data.frames by row, filling missing values

df1 <- data.frame(v1 = c(1, 2), v2 = c(2, 3))
df2 <- data.frame(v3 = c(4, 5))
rbind_fill(df1, df2) %>% print_table()
rbind_fill(df1, df2, fill = "NA") %>% print_table()

Rescale a continuous vector to have specified minimum and maximum values

Rescale a numeric vector

resca(values = c(1:5))
# [1]   0  25  50  75 100

Rescale using a data frame and select rescaled variables only

data_ge %>%
resca(GY, HM, new_min = 0, new_max = 1,  keep = FALSE) %>%
  head()%>%
  print_table()

Rescale within factors

library(tidyverse)
  data_ge2 %>% 
    select(ENV, GEN, starts_with("N"), ends_with("L")) %>%
    group_by(ENV, GEN) %>%
    summarise_all(mean) %>%
    group_by(ENV) %>%
    resca(ends_with("L")) %>%
    head(n = 13) %>%
    print_table()

Rendering engine

This vignette was built with pkgdown. All tables were produced with the package DT using the following function.

library(DT) # Used to make the tables
# Function to make HTML tables
print_table <- function(table, rownames = FALSE, digits = 3, ...){
  df <- datatable(table, rownames = rownames, extensions = 'Buttons',
                  options = list(scrollX = TRUE, 
                                 dom = '<<t>Bp>',
                                 buttons = c('copy', 'excel', 'pdf', 'print')), ...)
  num_cols <- c(as.numeric(which(sapply(table, class) == "numeric")))
  if(length(num_cols) > 0){
    formatSignif(df, columns = num_cols, digits = digits)
  } else{
    df
  }
}