[Stable]

  • all_lower_case(): Translate all non-numeric strings of a data frame to lower case.

  • all_upper_case(): Translate all non-numeric strings of a data frame to upper case.

  • all_title_case(): Translate all non-numeric strings of a data frame to title case.

  • first_upper_case: Translate the first word of a string to upper case.

  • extract_number(): Extract the number(s) of a string.

  • extract_string(): Extract all strings, ignoring case.

  • find_text_in_num(): Find text characters in a numeric sequence and return the row index.

  • has_text_in_num(): Inspect columns looking for text in numeric sequence and return a warning if text is found.

  • remove_space(): Remove all blank spaces of a string.

  • remove_strings(): Remove all strings of a variable.

  • replace_number(): Replace numbers with a replacement.

  • replace_string(): Replace all strings with a replacement, ignoring case.

  • round_cols(): Round a selected column or a whole data frame to significant figures.

  • tidy_strings(): Tidy up characters strings, non-numeric columns, or any selected columns in a data frame by putting all word in upper case, replacing any space, tabulation, punctuation characters by '_', and putting '_' between lower and upper case. Suppose that str = c("Env1", "env 1", "env.1") (which by definition should represent a unique level in plant breeding trials, e.g., environment 1) is subjected to tidy_strings(str): the result will be then c("ENV_1", "ENV_1", "ENV_1"). See Examples section for more examples.

all_upper_case(.data, ...)

all_lower_case(.data, ...)

all_title_case(.data, ...)

first_upper_case(.data, ...)

extract_number(.data, ..., pattern = NULL)

extract_string(.data, ..., pattern = NULL)

find_text_in_num(.data, ...)

has_text_in_num(.data)

remove_space(.data, ...)

remove_strings(.data, ...)

replace_number(
  .data,
  ...,
  pattern = NULL,
  replacement = "",
  ignore_case = FALSE
)

replace_string(
  .data,
  ...,
  pattern = NULL,
  replacement = "",
  ignore_case = FALSE
)

round_cols(.data, ..., digits = 2)

tidy_strings(.data, ..., sep = "_")

Arguments

.data

A data frame

...

The argument depends on the function used.

  • For round_cols() ... are the variables to round. If no variable is informed, all the numeric variables from data are used.

  • For all_lower_case(), all_upper_case(), all_title_case(), stract_number(), stract_string(), remove_strings(), and tidy_strings() ... are the variables to apply the function. If no variable is informed, the function will be applied to all non-numeric variables in .data.

pattern

A string to be matched. Regular Expression Syntax is also allowed.

replacement

A string for replacement.

ignore_case

If FALSE (default), the pattern matching is case sensitive and if TRUE, case is ignored during matching.

digits

The number of significant figures.

sep

A character string to separate the terms. Defaults to "_".

Author

Tiago Olivoto tiagoolivoto@gmail.com

Examples

# \donttest{ library(metan) ################ Rounding numbers ############### # All numeric columns round_cols(data_ge2, digits = 1)
#> # A tibble: 156 x 18 #> ENV GEN REP PH EH EP EL ED CL CD CW KW NR #> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A1 H1 1 2.6 1.7 0.7 16.1 52.2 28.1 16.3 25.1 216. 15.6 #> 2 A1 H1 2 2.9 1.8 0.6 14.2 50.3 27.6 14.5 21.4 184. 16 #> 3 A1 H1 3 2.7 1.6 0.6 16 50.7 28.4 16.4 24 208. 17.2 #> 4 A1 H10 1 2.8 1.6 0.6 16.7 54.1 31.7 17.4 26.2 194. 15.6 #> 5 A1 H10 2 2.8 1.7 0.6 14.9 52.7 32 15.5 20.7 176. 17.6 #> 6 A1 H10 3 2.7 1.5 0.6 16.7 52.7 30.4 17.5 26.8 207. 16.8 #> 7 A1 H11 1 2.8 1.5 0.5 17.4 51.7 30.6 18 26.2 217. 16.8 #> 8 A1 H11 2 2.7 1.6 0.6 16.7 47.2 28.7 17.2 24.1 181. 13.6 #> 9 A1 H11 3 2.8 1.7 0.6 15.8 47.9 27.6 16.4 20.5 166. 15.2 #> 10 A1 H12 1 2.7 1.5 0.6 14.9 47.5 28.2 15.5 20.1 161 14.8 #> # ... with 146 more rows, and 5 more variables: NKR <dbl>, CDED <dbl>, #> # PERK <dbl>, TKW <dbl>, NKE <dbl>
# Round specific columns round_cols(data_ge2, EP, digits = 1)
#> # A tibble: 156 x 18 #> ENV GEN REP PH EH EP EL ED CL CD CW KW NR #> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A1 H1 1 2.61 1.71 0.7 16.1 52.2 28.1 16.3 25.1 217. 15.6 #> 2 A1 H1 2 2.87 1.76 0.6 14.2 50.3 27.6 14.5 21.4 184. 16 #> 3 A1 H1 3 2.68 1.58 0.6 16.0 50.7 28.4 16.4 24.0 208. 17.2 #> 4 A1 H10 1 2.83 1.64 0.6 16.7 54.1 31.7 17.4 26.2 194. 15.6 #> 5 A1 H10 2 2.79 1.71 0.6 14.9 52.7 32.0 15.5 20.7 176. 17.6 #> 6 A1 H10 3 2.72 1.51 0.6 16.7 52.7 30.4 17.5 26.8 207. 16.8 #> 7 A1 H11 1 2.75 1.51 0.5 17.4 51.7 30.6 18.0 26.2 217. 16.8 #> 8 A1 H11 2 2.72 1.56 0.6 16.7 47.2 28.7 17.2 24.1 181. 13.6 #> 9 A1 H11 3 2.77 1.67 0.6 15.8 47.9 27.6 16.4 20.5 166. 15.2 #> 10 A1 H12 1 2.73 1.54 0.6 14.9 47.5 28.2 15.5 20.1 161. 14.8 #> # ... with 146 more rows, and 5 more variables: NKR <dbl>, CDED <dbl>, #> # PERK <dbl>, TKW <dbl>, NKE <dbl>
########### Extract or replace numbers ########## # Extract numbers extract_number(data_ge, GEN)
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <dbl> <fct> <dbl> <dbl> #> 1 E1 1 1 2.17 44.9 #> 2 E1 1 2 2.50 46.9 #> 3 E1 1 3 2.43 47.8 #> 4 E1 2 1 3.21 45.2 #> 5 E1 2 2 2.93 45.3 #> 6 E1 2 3 2.56 45.5 #> 7 E1 3 1 2.77 46.7 #> 8 E1 3 2 3.62 43.2 #> 9 E1 3 3 2.28 47.8 #> 10 E1 4 1 2.36 47.9 #> # ... with 410 more rows
# Replace numbers replace_number(data_ge, GEN)
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 G 1 2.17 44.9 #> 2 E1 G 2 2.50 46.9 #> 3 E1 G 3 2.43 47.8 #> 4 E1 G 1 3.21 45.2 #> 5 E1 G 2 2.93 45.3 #> 6 E1 G 3 2.56 45.5 #> 7 E1 G 1 2.77 46.7 #> 8 E1 G 2 3.62 43.2 #> 9 E1 G 3 2.28 47.8 #> 10 E1 G 1 2.36 47.9 #> # ... with 410 more rows
replace_number(data_ge, GEN, pattern = 1, replacement = "_one")
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 G_one 1 2.17 44.9 #> 2 E1 G_one 2 2.50 46.9 #> 3 E1 G_one 3 2.43 47.8 #> 4 E1 G2 1 3.21 45.2 #> 5 E1 G2 2 2.93 45.3 #> 6 E1 G2 3 2.56 45.5 #> 7 E1 G3 1 2.77 46.7 #> 8 E1 G3 2 3.62 43.2 #> 9 E1 G3 3 2.28 47.8 #> 10 E1 G4 1 2.36 47.9 #> # ... with 410 more rows
########## Extract, replace or remove strings ########## # Extract strings extract_string(data_ge, GEN)
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 G 1 2.17 44.9 #> 2 E1 G 2 2.50 46.9 #> 3 E1 G 3 2.43 47.8 #> 4 E1 G 1 3.21 45.2 #> 5 E1 G 2 2.93 45.3 #> 6 E1 G 3 2.56 45.5 #> 7 E1 G 1 2.77 46.7 #> 8 E1 G 2 3.62 43.2 #> 9 E1 G 3 2.28 47.8 #> 10 E1 G 1 2.36 47.9 #> # ... with 410 more rows
# Replace strings replace_string(data_ge, GEN)
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 1 1 2.17 44.9 #> 2 E1 1 2 2.50 46.9 #> 3 E1 1 3 2.43 47.8 #> 4 E1 2 1 3.21 45.2 #> 5 E1 2 2 2.93 45.3 #> 6 E1 2 3 2.56 45.5 #> 7 E1 3 1 2.77 46.7 #> 8 E1 3 2 3.62 43.2 #> 9 E1 3 3 2.28 47.8 #> 10 E1 4 1 2.36 47.9 #> # ... with 410 more rows
replace_string(data_ge, GEN, pattern = "G", replacement = "GENOTYPE_")
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 GENOTYPE_1 1 2.17 44.9 #> 2 E1 GENOTYPE_1 2 2.50 46.9 #> 3 E1 GENOTYPE_1 3 2.43 47.8 #> 4 E1 GENOTYPE_2 1 3.21 45.2 #> 5 E1 GENOTYPE_2 2 2.93 45.3 #> 6 E1 GENOTYPE_2 3 2.56 45.5 #> 7 E1 GENOTYPE_3 1 2.77 46.7 #> 8 E1 GENOTYPE_3 2 3.62 43.2 #> 9 E1 GENOTYPE_3 3 2.28 47.8 #> 10 E1 GENOTYPE_4 1 2.36 47.9 #> # ... with 410 more rows
# Remove strings remove_strings(data_ge)
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 1 1 1 2.17 44.9 #> 2 1 1 2 2.50 46.9 #> 3 1 1 3 2.43 47.8 #> 4 1 2 1 3.21 45.2 #> 5 1 2 2 2.93 45.3 #> 6 1 2 3 2.56 45.5 #> 7 1 3 1 2.77 46.7 #> 8 1 3 2 3.62 43.2 #> 9 1 3 3 2.28 47.8 #> 10 1 4 1 2.36 47.9 #> # ... with 410 more rows
remove_strings(data_ge, ENV)
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <dbl> <fct> <fct> <dbl> <dbl> #> 1 1 G1 1 2.17 44.9 #> 2 1 G1 2 2.50 46.9 #> 3 1 G1 3 2.43 47.8 #> 4 1 G2 1 3.21 45.2 #> 5 1 G2 2 2.93 45.3 #> 6 1 G2 3 2.56 45.5 #> 7 1 G3 1 2.77 46.7 #> 8 1 G3 2 3.62 43.2 #> 9 1 G3 3 2.28 47.8 #> 10 1 G4 1 2.36 47.9 #> # ... with 410 more rows
############ Find text in numeric sequences ########### mixed_text <- data.frame(data_ge) mixed_text[2, 4] <- "2..503" mixed_text[3, 4] <- "3.2o75" find_text_in_num(mixed_text, GY)
#> [1] 2 3
############# upper, lower and title cases ############ gen_text <- c("This is the first string.", "this is the second one") all_lower_case(gen_text)
#> [1] "this is the first string." "this is the second one"
all_upper_case(gen_text)
#> [1] "THIS IS THE FIRST STRING." "THIS IS THE SECOND ONE"
all_title_case(gen_text)
#> [1] "This Is The First String." "This Is The Second One"
first_upper_case(gen_text)
#> [1] "This is the first string." "This is the second one"
# A whole data frame all_lower_case(data_ge)
#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <chr> <chr> <chr> <dbl> <dbl> #> 1 e1 g1 1 2.17 44.9 #> 2 e1 g1 2 2.50 46.9 #> 3 e1 g1 3 2.43 47.8 #> 4 e1 g2 1 3.21 45.2 #> 5 e1 g2 2 2.93 45.3 #> 6 e1 g2 3 2.56 45.5 #> 7 e1 g3 1 2.77 46.7 #> 8 e1 g3 2 3.62 43.2 #> 9 e1 g3 3 2.28 47.8 #> 10 e1 g4 1 2.36 47.9 #> # ... with 410 more rows
############### Tidy up messy text string ############## messy_env <- c("ENV 1", "Env 1", "Env1", "env1", "Env.1", "Env_1") tidy_strings(messy_env)
#> [1] "ENV_1" "ENV_1" "ENV_1" "ENV_1" "ENV_1" "ENV_1"
messy_gen <- c("GEN1", "gen 2", "Gen.3", "gen-4", "Gen_5", "GEN_6") tidy_strings(messy_gen)
#> [1] "GEN_1" "GEN_2" "GEN_3" "GEN_4" "GEN_5" "GEN_6"
messy_int <- c("EnvGen", "Env_Gen", "env gen", "Env Gen", "ENV.GEN", "ENV_GEN") tidy_strings(messy_int)
#> [1] "ENV_GEN" "ENV_GEN" "ENV_GEN" "ENV_GEN" "ENV_GEN" "ENV_GEN"
library(tibble) # Or a whole data frame df <- tibble(Env = messy_env, gen = messy_gen, Env_GEN = interaction(Env, gen), y = rnorm(6, 300, 10)) df
#> # A tibble: 6 x 4 #> Env gen Env_GEN y #> <chr> <chr> <fct> <dbl> #> 1 ENV 1 GEN1 ENV 1.GEN1 306. #> 2 Env 1 gen 2 Env 1.gen 2 297. #> 3 Env1 Gen.3 Env1.Gen.3 290. #> 4 env1 gen-4 env1.gen-4 297. #> 5 Env.1 Gen_5 Env.1.Gen_5 301. #> 6 Env_1 GEN_6 Env_1.GEN_6 296.
tidy_strings(df)
#> # A tibble: 6 x 4 #> Env gen Env_GEN y #> <chr> <chr> <chr> <dbl> #> 1 ENV_1 GEN_1 ENV_1_GEN_1 306. #> 2 ENV_1 GEN_2 ENV_1_GEN_2 297. #> 3 ENV_1 GEN_3 ENV_1_GEN_3 290. #> 4 ENV_1 GEN_4 ENV_1_GEN_4 297. #> 5 ENV_1 GEN_5 ENV_1_GEN_5 301. #> 6 ENV_1 GEN_6 ENV_1_GEN_6 296.
# }