all_lower_case()
: Translate all non-numeric strings of a data frame
to lower case.
all_upper_case()
: Translate all non-numeric strings of a data frame
to upper case.
all_title_case()
: Translate all non-numeric strings of a data frame
to title case.
first_upper_case
: Translate the first word of a string to upper
case.
extract_number()
: Extract the number(s) of a string.
extract_string()
: Extract all strings, ignoring case.
find_text_in_num()
: Find text characters in a numeric sequence and
return the row index.
has_text_in_num()
: Inspect columns looking for text in numeric
sequence and return a warning if text is found.
remove_space()
: Remove all blank spaces of a string.
remove_strings()
: Remove all strings of a variable.
replace_number()
: Replace numbers with a replacement.
replace_string()
: Replace all strings with a replacement, ignoring
case.
round_cols()
: Round a selected column or a whole data frame to
significant figures.
tidy_strings()
: Tidy up characters strings, non-numeric columns, or
any selected columns in a data frame by putting all word in upper case,
replacing any space, tabulation, punctuation characters by '_'
, and
putting '_'
between lower and upper case. Suppose that str = c("Env1", "env 1", "env.1")
(which by definition should represent a unique
level in plant breeding trials, e.g., environment 1) is subjected to
tidy_strings(str)
: the result will be then c("ENV_1", "ENV_1", "ENV_1")
. See Examples section for more examples.
all_upper_case(.data, ...) all_lower_case(.data, ...) all_title_case(.data, ...) first_upper_case(.data, ...) extract_number(.data, ..., pattern = NULL) extract_string(.data, ..., pattern = NULL) find_text_in_num(.data, ...) has_text_in_num(.data) remove_space(.data, ...) remove_strings(.data, ...) replace_number( .data, ..., pattern = NULL, replacement = "", ignore_case = FALSE ) replace_string( .data, ..., pattern = NULL, replacement = "", ignore_case = FALSE ) round_cols(.data, ..., digits = 2) tidy_strings(.data, ..., sep = "_")
.data | A data frame |
---|---|
... | The argument depends on the function used.
|
pattern | A string to be matched. Regular Expression Syntax is also allowed. |
replacement | A string for replacement. |
ignore_case | If |
digits | The number of significant figures. |
sep | A character string to separate the terms. Defaults to "_". |
Tiago Olivoto tiagoolivoto@gmail.com
# \donttest{ library(metan) ################ Rounding numbers ############### # All numeric columns round_cols(data_ge2, digits = 1)#> # A tibble: 156 x 18 #> ENV GEN REP PH EH EP EL ED CL CD CW KW NR #> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A1 H1 1 2.6 1.7 0.7 16.1 52.2 28.1 16.3 25.1 216. 15.6 #> 2 A1 H1 2 2.9 1.8 0.6 14.2 50.3 27.6 14.5 21.4 184. 16 #> 3 A1 H1 3 2.7 1.6 0.6 16 50.7 28.4 16.4 24 208. 17.2 #> 4 A1 H10 1 2.8 1.6 0.6 16.7 54.1 31.7 17.4 26.2 194. 15.6 #> 5 A1 H10 2 2.8 1.7 0.6 14.9 52.7 32 15.5 20.7 176. 17.6 #> 6 A1 H10 3 2.7 1.5 0.6 16.7 52.7 30.4 17.5 26.8 207. 16.8 #> 7 A1 H11 1 2.8 1.5 0.5 17.4 51.7 30.6 18 26.2 217. 16.8 #> 8 A1 H11 2 2.7 1.6 0.6 16.7 47.2 28.7 17.2 24.1 181. 13.6 #> 9 A1 H11 3 2.8 1.7 0.6 15.8 47.9 27.6 16.4 20.5 166. 15.2 #> 10 A1 H12 1 2.7 1.5 0.6 14.9 47.5 28.2 15.5 20.1 161 14.8 #> # ... with 146 more rows, and 5 more variables: NKR <dbl>, CDED <dbl>, #> # PERK <dbl>, TKW <dbl>, NKE <dbl># Round specific columns round_cols(data_ge2, EP, digits = 1)#> # A tibble: 156 x 18 #> ENV GEN REP PH EH EP EL ED CL CD CW KW NR #> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 A1 H1 1 2.61 1.71 0.7 16.1 52.2 28.1 16.3 25.1 217. 15.6 #> 2 A1 H1 2 2.87 1.76 0.6 14.2 50.3 27.6 14.5 21.4 184. 16 #> 3 A1 H1 3 2.68 1.58 0.6 16.0 50.7 28.4 16.4 24.0 208. 17.2 #> 4 A1 H10 1 2.83 1.64 0.6 16.7 54.1 31.7 17.4 26.2 194. 15.6 #> 5 A1 H10 2 2.79 1.71 0.6 14.9 52.7 32.0 15.5 20.7 176. 17.6 #> 6 A1 H10 3 2.72 1.51 0.6 16.7 52.7 30.4 17.5 26.8 207. 16.8 #> 7 A1 H11 1 2.75 1.51 0.5 17.4 51.7 30.6 18.0 26.2 217. 16.8 #> 8 A1 H11 2 2.72 1.56 0.6 16.7 47.2 28.7 17.2 24.1 181. 13.6 #> 9 A1 H11 3 2.77 1.67 0.6 15.8 47.9 27.6 16.4 20.5 166. 15.2 #> 10 A1 H12 1 2.73 1.54 0.6 14.9 47.5 28.2 15.5 20.1 161. 14.8 #> # ... with 146 more rows, and 5 more variables: NKR <dbl>, CDED <dbl>, #> # PERK <dbl>, TKW <dbl>, NKE <dbl>########### Extract or replace numbers ########## # Extract numbers extract_number(data_ge, GEN)#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <dbl> <fct> <dbl> <dbl> #> 1 E1 1 1 2.17 44.9 #> 2 E1 1 2 2.50 46.9 #> 3 E1 1 3 2.43 47.8 #> 4 E1 2 1 3.21 45.2 #> 5 E1 2 2 2.93 45.3 #> 6 E1 2 3 2.56 45.5 #> 7 E1 3 1 2.77 46.7 #> 8 E1 3 2 3.62 43.2 #> 9 E1 3 3 2.28 47.8 #> 10 E1 4 1 2.36 47.9 #> # ... with 410 more rows# Replace numbers replace_number(data_ge, GEN)#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 G 1 2.17 44.9 #> 2 E1 G 2 2.50 46.9 #> 3 E1 G 3 2.43 47.8 #> 4 E1 G 1 3.21 45.2 #> 5 E1 G 2 2.93 45.3 #> 6 E1 G 3 2.56 45.5 #> 7 E1 G 1 2.77 46.7 #> 8 E1 G 2 3.62 43.2 #> 9 E1 G 3 2.28 47.8 #> 10 E1 G 1 2.36 47.9 #> # ... with 410 more rowsreplace_number(data_ge, GEN, pattern = 1, replacement = "_one")#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 G_one 1 2.17 44.9 #> 2 E1 G_one 2 2.50 46.9 #> 3 E1 G_one 3 2.43 47.8 #> 4 E1 G2 1 3.21 45.2 #> 5 E1 G2 2 2.93 45.3 #> 6 E1 G2 3 2.56 45.5 #> 7 E1 G3 1 2.77 46.7 #> 8 E1 G3 2 3.62 43.2 #> 9 E1 G3 3 2.28 47.8 #> 10 E1 G4 1 2.36 47.9 #> # ... with 410 more rows########## Extract, replace or remove strings ########## # Extract strings extract_string(data_ge, GEN)#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 G 1 2.17 44.9 #> 2 E1 G 2 2.50 46.9 #> 3 E1 G 3 2.43 47.8 #> 4 E1 G 1 3.21 45.2 #> 5 E1 G 2 2.93 45.3 #> 6 E1 G 3 2.56 45.5 #> 7 E1 G 1 2.77 46.7 #> 8 E1 G 2 3.62 43.2 #> 9 E1 G 3 2.28 47.8 #> 10 E1 G 1 2.36 47.9 #> # ... with 410 more rows# Replace strings replace_string(data_ge, GEN)#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 1 1 2.17 44.9 #> 2 E1 1 2 2.50 46.9 #> 3 E1 1 3 2.43 47.8 #> 4 E1 2 1 3.21 45.2 #> 5 E1 2 2 2.93 45.3 #> 6 E1 2 3 2.56 45.5 #> 7 E1 3 1 2.77 46.7 #> 8 E1 3 2 3.62 43.2 #> 9 E1 3 3 2.28 47.8 #> 10 E1 4 1 2.36 47.9 #> # ... with 410 more rowsreplace_string(data_ge, GEN, pattern = "G", replacement = "GENOTYPE_")#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <fct> <chr> <fct> <dbl> <dbl> #> 1 E1 GENOTYPE_1 1 2.17 44.9 #> 2 E1 GENOTYPE_1 2 2.50 46.9 #> 3 E1 GENOTYPE_1 3 2.43 47.8 #> 4 E1 GENOTYPE_2 1 3.21 45.2 #> 5 E1 GENOTYPE_2 2 2.93 45.3 #> 6 E1 GENOTYPE_2 3 2.56 45.5 #> 7 E1 GENOTYPE_3 1 2.77 46.7 #> 8 E1 GENOTYPE_3 2 3.62 43.2 #> 9 E1 GENOTYPE_3 3 2.28 47.8 #> 10 E1 GENOTYPE_4 1 2.36 47.9 #> # ... with 410 more rows# Remove strings remove_strings(data_ge)#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <dbl> <dbl> <dbl> <dbl> <dbl> #> 1 1 1 1 2.17 44.9 #> 2 1 1 2 2.50 46.9 #> 3 1 1 3 2.43 47.8 #> 4 1 2 1 3.21 45.2 #> 5 1 2 2 2.93 45.3 #> 6 1 2 3 2.56 45.5 #> 7 1 3 1 2.77 46.7 #> 8 1 3 2 3.62 43.2 #> 9 1 3 3 2.28 47.8 #> 10 1 4 1 2.36 47.9 #> # ... with 410 more rowsremove_strings(data_ge, ENV)#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <dbl> <fct> <fct> <dbl> <dbl> #> 1 1 G1 1 2.17 44.9 #> 2 1 G1 2 2.50 46.9 #> 3 1 G1 3 2.43 47.8 #> 4 1 G2 1 3.21 45.2 #> 5 1 G2 2 2.93 45.3 #> 6 1 G2 3 2.56 45.5 #> 7 1 G3 1 2.77 46.7 #> 8 1 G3 2 3.62 43.2 #> 9 1 G3 3 2.28 47.8 #> 10 1 G4 1 2.36 47.9 #> # ... with 410 more rows############ Find text in numeric sequences ########### mixed_text <- data.frame(data_ge) mixed_text[2, 4] <- "2..503" mixed_text[3, 4] <- "3.2o75" find_text_in_num(mixed_text, GY)#> [1] 2 3############# upper, lower and title cases ############ gen_text <- c("This is the first string.", "this is the second one") all_lower_case(gen_text)#> [1] "this is the first string." "this is the second one"all_upper_case(gen_text)#> [1] "THIS IS THE FIRST STRING." "THIS IS THE SECOND ONE"all_title_case(gen_text)#> [1] "This Is The First String." "This Is The Second One"first_upper_case(gen_text)#> [1] "This is the first string." "This is the second one"# A whole data frame all_lower_case(data_ge)#> # A tibble: 420 x 5 #> ENV GEN REP GY HM #> <chr> <chr> <chr> <dbl> <dbl> #> 1 e1 g1 1 2.17 44.9 #> 2 e1 g1 2 2.50 46.9 #> 3 e1 g1 3 2.43 47.8 #> 4 e1 g2 1 3.21 45.2 #> 5 e1 g2 2 2.93 45.3 #> 6 e1 g2 3 2.56 45.5 #> 7 e1 g3 1 2.77 46.7 #> 8 e1 g3 2 3.62 43.2 #> 9 e1 g3 3 2.28 47.8 #> 10 e1 g4 1 2.36 47.9 #> # ... with 410 more rows############### Tidy up messy text string ############## messy_env <- c("ENV 1", "Env 1", "Env1", "env1", "Env.1", "Env_1") tidy_strings(messy_env)#> [1] "ENV_1" "ENV_1" "ENV_1" "ENV_1" "ENV_1" "ENV_1"#> [1] "GEN_1" "GEN_2" "GEN_3" "GEN_4" "GEN_5" "GEN_6"messy_int <- c("EnvGen", "Env_Gen", "env gen", "Env Gen", "ENV.GEN", "ENV_GEN") tidy_strings(messy_int)#> [1] "ENV_GEN" "ENV_GEN" "ENV_GEN" "ENV_GEN" "ENV_GEN" "ENV_GEN"library(tibble) # Or a whole data frame df <- tibble(Env = messy_env, gen = messy_gen, Env_GEN = interaction(Env, gen), y = rnorm(6, 300, 10)) df#> # A tibble: 6 x 4 #> Env gen Env_GEN y #> <chr> <chr> <fct> <dbl> #> 1 ENV 1 GEN1 ENV 1.GEN1 306. #> 2 Env 1 gen 2 Env 1.gen 2 297. #> 3 Env1 Gen.3 Env1.Gen.3 290. #> 4 env1 gen-4 env1.gen-4 297. #> 5 Env.1 Gen_5 Env.1.Gen_5 301. #> 6 Env_1 GEN_6 Env_1.GEN_6 296.tidy_strings(df)#> # A tibble: 6 x 4 #> Env gen Env_GEN y #> <chr> <chr> <chr> <dbl> #> 1 ENV_1 GEN_1 ENV_1_GEN_1 306. #> 2 ENV_1 GEN_2 ENV_1_GEN_2 297. #> 3 ENV_1 GEN_3 ENV_1_GEN_3 290. #> 4 ENV_1 GEN_4 ENV_1_GEN_4 297. #> 5 ENV_1 GEN_5 ENV_1_GEN_5 301. #> 6 ENV_1 GEN_6 ENV_1_GEN_6 296.# }