## ----globalOpts, include=FALSE------------------------------------------
library(knitr)
opts_chunk$set(concordance = TRUE,
               fig.align = "center",
               out.width = "1.2\\linewidth", ## default is 1 
               tidy = FALSE,
               comment = NA,
               fig.path = "Rmd_Figures/DataPrep/", 
               # cache.path = "giveMeAName_cacheFolder",
               ## provide a cache path if several Rnws
               # dev = "png",
               fig.pos = "H",
               crop = hook_pdfcrop) ## changes mar defaults...
##
options(digits = 2)


## ----Checkpoint, message=FALSE------------------------------------------
## (messages are omitted in this chunk)
##
library(checkpoint)
checkpoint(snapshot_date = "2022-11-15") 


## ----loadPackages, message=FALSE----------------------------------------
## (messages are omitted from this chunk)
##
library(dplyr)
library(kableExtra)
library(ggplot2)
library(tidyr)
library(readxl)
library(magrittr)
library(survival)


## ----Settings-----------------------------------------------------------
theme_set(theme_bw())

if (!dir.exists("Prepared_data_and_models")) {
  dir.create("Prepared_data_and_models")
}


## ----getData------------------------------------------------------------
d.maize <- read_excel(path = paste0("../../Original_data/", 
                                   "3_cob_weight_filled.xlsx"), 
                     sheet = "measurements") 


## ----emptyCols----------------------------------------------------------
## Remove empty columns

## The following columns are empty
empty.cols <- apply(X = is.na(d.maize), MARGIN = 2, FUN = all)
colnames(d.maize[, empty.cols, drop = FALSE])

## Remove
d.maize <- d.maize[, ! empty.cols, drop = FALSE]


## ----OverviewHeadStr----------------------------------------------------
dim(d.maize)
head(d.maize)[1:ncol(d.maize)]
str(d.maize)


## ----proto_gg_density---------------------------------------------------
gg.density <- ggplot(data = d.maize) + 
  guides(alpha = "none") +
  geom_density() +
  geom_rug(alpha = 0.3)


## ----"check_pot"--------------------------------------------------------
## class
class(d.maize$`pot`)
## overview missing values
is.na(d.maize$`pot`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`pot`, na.rm = TRUE)
## show the first 10 non-missing values in decreasing order
table(d.maize$`pot`, useNA = "no") 


## ----createPotsOnTableMatrix, echo=FALSE--------------------------------
## (this chunk is not echoed)
##
M.pots_on_table <- matrix(data = paste0(rep(LETTERS[1:3], each = 6),
                                        1:6), 
                          byrow = TRUE,
                          nrow = 3, ncol = 6)[c(3:1), ]
M.pots_on_table


## ----"create_pot.fac"---------------------------------------------------
## change pot to factor
d.maize %<>%
  mutate(pot.fac = as.factor(`pot`))
## check
class(d.maize$`pot.fac`)
## levels mapped correctly
d.maize %>%
  select(contains("pot")) %>%
  unique()
## number of levels (without missing values)
n_distinct(d.maize$`pot.fac`, na.rm = TRUE)
## show how many observations are in each level of pot.fac
table(d.maize$`pot.fac`, useNA = "no")


## ----"check_soil"-------------------------------------------------------
## class
class(d.maize$`soil`) 
## overview missing values
is.na(d.maize$`soil`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`soil`, na.rm = TRUE)
## show all non-missing values in decreasing order
table(d.maize$`soil`, useNA = "no")%>%
  sort(decreasing = TRUE)


## ----"create_soil.fac"--------------------------------------------------
d.maize %<>%
  mutate(soil.fac = as.factor(`soil`))
## check
class(d.maize$`soil.fac`)
## levels mapped correctly
d.maize %>%
  select(contains("soil")) %>%
  unique()
## show all non-missing values
table(d.maize$`soil.fac`, useNA = "no")


## ----"check_well"-------------------------------------------------------
## class
class(d.maize$`well`) 
## overview missing values
is.na(d.maize$`well`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`well`, na.rm = TRUE)
## show all non-missing values in decreasing order
table(d.maize$`well`, useNA = "no")


## ----"create_well.fac"--------------------------------------------------
## change well to factor
d.maize %<>%
  mutate(well.fac = as.factor(`well`))
## check
class(d.maize$`well.fac`)
## levels mapped correctly
d.maize %>%
  select(contains("well")) %>%
  unique()
## number of levels (without missing values)
n_distinct(d.maize$`well.fac`, na.rm = TRUE)
## show all non-missing values
table(d.maize$`well.fac`, useNA = "no")


## ----createWellsMatrix, echo=FALSE--------------------------------------
## (this chunk is not echoed)
##
M.wells <- matrix(data = letters[1:6], 
                  byrow = TRUE,
                  nrow = 3, ncol = 2)[c(3:1), ]
M.wells


## ----"check_depth"------------------------------------------------------
class(d.maize$`depth`)
summary(d.maize$`depth`)
table(d.maize$`depth`)


## ----"depth_well"-------------------------------------------------------
d.maize %>%
  filter(pot.fac == "A1") %>%
  select(pot.fac, well.fac, depth)
d.maize %>%
  filter(pot.fac == "B2") %>%
  select(pot.fac, well.fac, depth)


## ----"check_depth_density", eval=FALSE, echo=FALSE----------------------
## gg.density +
##   aes(x = `depth`, alpha = 0.3)


## ----"check_seed.weight"------------------------------------------------
class(d.maize$`seed.weight`)
summary(d.maize$`seed.weight`)


## ----"check_seed.weight_density"----------------------------------------
gg.density + 
  aes(x = `seed.weight`, alpha = 0.3)


## ----"create_seed.weight.grams"-----------------------------------------
d.maize %<>%
  mutate(seed.weight.grams = 0.01 * `seed.weight`)
## check generalities
class(d.maize$`seed.weight.grams`)
summary(d.maize$`seed.weight.grams`)
## levels mapped correctly for some randomly picked observations
## with the following row numbers:
set.seed(1)
(sel <- sample(1:nrow(d.maize), 10))
(d.maize %>%
    select(contains("seed")))[sel,]


## ----"check_fungus"-----------------------------------------------------
## class
class(d.maize$`fungus`) 
## overview missing values
is.na(d.maize$`fungus`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`fungus`, na.rm = TRUE)
## show all non-missing values in decreasing order
sort(table(d.maize$`fungus`, useNA = "no"), decreasing = TRUE)


## ----"create_fungus.fac"------------------------------------------------
## replace missing values with "no" and convert to factor
d.maize %<>%
  mutate(fungus.fac = replace_na(fungus, "no") %>%
           as.factor())
## overview missing values
is.na(d.maize$`fungus.fac`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`fungus.fac`, na.rm = TRUE)
## show all non-missing values in decreasing order
table(d.maize$`fungus.fac`, useNA = "no")


## ----"check_date.germinated"--------------------------------------------
## class
class(d.maize$`date.germinated`) 
## overview missing values
is.na(d.maize$`date.germinated`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`date.germinated`, na.rm = TRUE)
## show all non-missing values in decreasing order
table(d.maize$`date.germinated`, useNA = "no") 


## ----"create_date.germinated.asDate"------------------------------------
d.maize %<>%
  mutate(date.germinated.asDate = as.Date(`date.germinated`))
## check class
class(d.maize$`date.germinated.asDate`) 
## levels mapped correctly for some randomly picked observations
## with the following row numbers:
(sel <- sample(1:nrow(d.maize), 10))
(d.maize %>%
    select(contains("date.germinated")))[sel,]
## number of levels (without missing values)
n_distinct(d.maize$`date.germinated.asDate`, na.rm = TRUE)
## show all non-missing values in decreasing order
table(d.maize$`date.germinated.asDate`, useNA = "no")


## ----"last_germination_lab"---------------------------------------------
max(d.maize$date.germinated.asDate, na.rm = TRUE)


## ----"date_planted"-----------------------------------------------------
max(d.maize$date.germinated.asDate, na.rm = TRUE) + 1


## ----"check_observations"-----------------------------------------------
## class
class(d.maize$`observations`) 
## overview missing values
is.na(d.maize$`observations`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`observations`, na.rm = TRUE)
## show all non-missing values
table(d.maize$`observations`, useNA = "no") 


## ----"create_obs.time"--------------------------------------------------
obs.time <- rep("night", nrow(d.maize))
obs.time[grep("morning", d.maize$observations, ignore.case = TRUE)] <- "morning"
d.maize$obs.time <- factor(obs.time, levels = c("morning", "night"))
## class
class(d.maize$`obs.time`)
head(d.maize$`obs.time`)
## overview missing values
is.na(d.maize$`obs.time`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## show all non-missing values 
table(d.maize$`obs.time`, useNA = "no")


## ----"create_broken"----------------------------------------------------
broken <- rep(FALSE, nrow(d.maize))
broken[grep("broken", d.maize$observations, ignore.case = TRUE)] <- TRUE
d.maize$broken <- broken
## class
class(d.maize$`broken`)
## overview missing values
is.na(d.maize$`broken`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## show all non-missing values
table(d.maize$`broken`, useNA = "no")


## ----"check_height_2022_07_05"------------------------------------------
## class
class(d.maize$`height_2022_07_05`) 
## overview missing values
is.na(d.maize$`height_2022_07_05`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`height_2022_07_05`, na.rm = TRUE)
## show the first 10 non-missing values in decreasing order
table(d.maize$`height_2022_07_05`, useNA = "no")


## ----"create_height_2022_07_05.num", warning=TRUE-----------------------
d.maize %<>%
  mutate(height_2022_07_05.num = as.numeric(`height_2022_07_05`))
## overview missing values
is.na(d.maize$`height_2022_07_05.num`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## values mapped correctly for some randomly picked observations
## with the following row numbers:
(sel <- sample(1:nrow(d.maize), 10))
(d.maize %>%
    select(contains("height")))[sel,]
## number of levels (without missing values)
n_distinct(d.maize$`height_2022_07_05.num`, na.rm = TRUE)
## show the first 10 non-missing values in decreasing order
table(d.maize$`height_2022_07_05.num`, useNA = "no") 


## -----------------------------------------------------------------------
gg.density %+%
  d.maize +
  aes(x = `height_2022_07_05.num`, alpha = 0.3)
##
gg.density %+%
  d.maize +
  aes(x = `height_2022_07_05.num`, alpha = 0.3) +
  scale_x_log10()


## ----"create_plant.found"-----------------------------------------------
d.maize %<>%
  mutate(plant.found = !is.na(`height_2022_07_05`))
## overview missing values
is.na(d.maize$`height_2022_07_05`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## overview TRUE/FALSE
d.maize$`plant.found` %>% 
  # factor(levels = c(TRUE, FALSE)) %>% 
  table()
## values mapped correctly for some randomly picked observations
## with the following row numbers:
(sel <- sample(1:nrow(d.maize), 10))
(d.maize %>%
    select(height_2022_07_05, plant.found))[sel,]


## ----"check_cob_weight"-------------------------------------------------
## class
class(d.maize$`cob_weight`)
## overview missing values
is.na(d.maize$`cob_weight`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`cob_weight`, na.rm = TRUE)


## ----"create_cob_weight.num"--------------------------------------------
d.maize %<>%
  mutate(cob_weight.num = as.numeric(`cob_weight`))
## values mapped correctly for some randomly picked observations
## with the following row numbers:
(sel <- sample(1:nrow(d.maize), 10))
(d.maize %>%
    select(contains("cob_weight")))[sel,]
## overview missing values
is.na(d.maize$`cob_weight.num`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`cob_weight.num`, na.rm = TRUE)
## show the first 10 non-missing values in decreasing order
table(d.maize$`cob_weight.num`, useNA = "no") 


## ----"cob_weight.num_density"-------------------------------------------
d.maize %>%
  filter(!is.na(cob_weight.num)) %>%
  ggplot(aes(x = cob_weight.num)) + 
  guides(alpha = "none") +
  geom_density() +
  geom_rug(alpha = 0.3)


## ----"cob_weight.num_density_log"---------------------------------------
d.maize %>%
  filter(!is.na(cob_weight.num)) %>%
  ggplot(aes(x = cob_weight.num)) + 
  guides(alpha = "none") +
  geom_density() +
  scale_x_log10() +
  geom_rug(alpha = 0.3)


## ----"cob_weight.num_density_notworking", eval=FALSE, echo=FALSE--------
## ## ideally, I would like to call
## gg.density +
##   aes(x = `cob_weight`, alpha = 0.3)
## ## but this somehow doesn't work and I am not sure if it is because of the NAs


## ----"check_...12"------------------------------------------------------
class(d.maize$`...12`)
summary(d.maize$`...12`)
## overview missing values
is.na(d.maize$`...12`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()


## ----"create_germinated.in.lab"-----------------------------------------
d.maize %<>%
  mutate(germinated.in.lab = !is.na(`date.germinated.asDate`))
## class
class(d.maize$`germinated.in.lab`)
## values mapped correctly for some randomly picked observations
## with the following row numbers:
(sel <- sample(1:nrow(d.maize), 10))
(d.maize %>%
    select(germinated.in.lab, date.germinated.asDate))[sel,]
## overview missing values
is.na(d.maize$`germinated.in.lab`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## show all non-missing values 
table(d.maize$`germinated.in.lab`, useNA = "no")
table(d.maize$`germinated.in.lab`, useNA = "no") %>%
  prop.table() %>%
  round(digits = 2)


## ----"create_germinated.in.field"---------------------------------------
d.maize %<>%
  mutate(germinated.in.field = (!`germinated.in.lab` & (!is.na(height_2022_07_05.num))))
## class
class(d.maize$`germinated.in.field`)
## values mapped correctly for some randomly picked observations
## with the following row numbers:
(sel <- sample(1:nrow(d.maize), 10))
(d.maize %>%
    select(germinated.in.field, 
           germinated.in.lab,
           height_2022_07_05.num))[sel,]
## overview missing values
is.na(d.maize$`germinated.in.field`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## show all non-missing values
table(d.maize$`germinated.in.field`, useNA = "no")
table(d.maize$`germinated.in.field`, useNA = "no") %>%
  prop.table() %>%
  round(digits = 2)


## ----"create_germinated.yes"--------------------------------------------
d.maize %<>%
  mutate(germinated.yes = (`germinated.in.field` != `germinated.in.lab`))
## class
class(d.maize$`germinated.yes`)
## values mapped correctly for some randomly picked observations
## with the following row numbers:
(sel <- sample(1:nrow(d.maize), 10))
(d.maize %>%
    select(germinated.yes,
           germinated.in.field,
           germinated.in.lab))[sel,]
## overview missing values
is.na(d.maize$`germinated.yes`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`germinated.yes`, na.rm = TRUE)
## show all non-missing values in decreasing order
table(d.maize$`germinated.yes`, useNA = "no")
table(d.maize$`germinated.yes`, useNA = "no") %>%
  prop.table() %>%
  round(digits = 2)


## ----"create_days.to.germination"---------------------------------------
d.maize %<>%
  mutate(days.to.germination =  as.numeric(date.germinated.asDate - as.Date("2022-04-30")))
## class
class(d.maize$`days.to.germination`) 
## overview missing values
is.na(d.maize$`days.to.germination`) %>% 
  factor(levels = c(TRUE, FALSE)) %>% 
  table()
## number of levels (without missing values)
n_distinct(d.maize$`days.to.germination`, na.rm = TRUE)
## show all non-missing values in decreasing order
table(d.maize$`days.to.germination`, useNA = "no")


## ----days.to.germination_density----------------------------------------
d.maize %>%
  filter(!is.na(days.to.germination)) %>%
  ggplot(aes(x = days.to.germination)) + 
  guides(alpha = "none") +
  geom_density() +
  geom_rug(alpha = 0.3)


## -----------------------------------------------------------------------
days.to.germination.censored.tmp <- d.maize$days.to.germination
days.to.germination.censored.tmp[is.na(days.to.germination.censored.tmp)] <-
  max(days.to.germination.censored.tmp, na.rm = TRUE)
d.maize <- d.maize %>% 
  mutate(days.to.germination.censored = days.to.germination.censored.tmp)
##
## check
d.maize %>% 
  select(contains("days.to.germination"), germinated.in.lab) %>% 
  unique()
## makes sense


## ----create_seed_coordinate_y-------------------------------------------
library(stringr)
d.maize <- d.maize %>% 
  mutate(seed_coord_y.tmp = ifelse(well %in% c("a", "b"),
                                   yes = 1, 
                                   no = ifelse(well %in% c("c", "d"), 
                                               yes = 2,
                                               no = 3)))
## check
d.maize %>% 
  select(seed_coord_y.tmp, well) %>% 
  unique()
## makes sense
##
d.maize <- d.maize %>% 
  mutate(capital.letter = str_extract(pot, 
                                      pattern = "[A-Z]"))
##
## check
set.seed(10)
d.maize %>% 
  select(pot, capital.letter) %>% 
  unique() %>% 
  sample_n(10)
## makes sense for these 10 observations


## ----adding_coord_y.tmp-------------------------------------------------
d.maize <- d.maize %>% 
  mutate(seed_coord_y = ifelse(capital.letter == "B",
                               yes = seed_coord_y.tmp + 3,
                               no = ifelse(capital.letter == "C",
                                           yes = seed_coord_y.tmp + 6,
                                           no = seed_coord_y.tmp)))
##
## check
set.seed(2023)
d.maize %>% 
  select(capital.letter, well, seed_coord_y) %>% 
  unique() %>% 
  sample_n(10)
## makes sense for these 10 observations
## we can now remove the two temporary variables
d.maize <- d.maize %>% 
  select(-c(capital.letter, seed_coord_y.tmp))
##
## double check
table(d.maize$seed_coord_y, useNA = "ifany") 
## makes sense: there are 12 seeds for each of the 9 lines


## ----create_seed_coordinate_x-------------------------------------------
d.maize <- d.maize %>% 
  mutate(seed_coord_x.tmp = ifelse(well %in% c("a", "c", "e"),
                                   yes = 1, 
                                   no = 2))
## check
d.maize %>% 
  select(seed_coord_x.tmp, well) %>% 
  unique()
## makes sense
##
d.maize <- d.maize %>% 
  mutate(pot.nb = str_extract(pot, 
                              pattern = "\\d"))
##
## check
set.seed(2023)
d.maize %>% 
  select(pot, pot.nb) %>% 
  unique() %>% 
  sample_n(10)
## makes sense for these 10 observations


## ----adding_coord_x.tmp-------------------------------------------------
d.maize <- d.maize %>% 
  mutate(
    seed_coord_x = 
      ifelse(pot.nb == 2,
             yes = seed_coord_x.tmp + 2,
             no = ifelse(pot.nb == 3,
                         yes = seed_coord_x.tmp + 4,
                         no = ifelse(pot.nb == 4,
                                     yes = seed_coord_x.tmp + 6,
                                     no = ifelse(pot.nb == 5,
                                                 yes = seed_coord_x.tmp + 8,
                                                 no = ifelse(pot.nb == 6, 
                                                             yes = seed_coord_x.tmp + 10,
                                                             no = seed_coord_x.tmp
                                                 ))))))
##
## check
set.seed(2023)
d.maize %>% 
  select(pot.nb, well, seed_coord_x) %>% 
  unique() %>% 
  sample_n(10)
## makes sense for these 10 observations
## we can now remove the two temporary variables
d.maize <- d.maize %>% 
  select(-c(pot.nb, seed_coord_x.tmp))
##
## double check
table(d.maize$seed_coord_x)
## makes sense: there are 9 seeds, for each of the 12 columns.


## -----------------------------------------------------------------------
ggplot(d.maize, mapping = aes(x = seed_coord_x, y = seed_coord_y)) +
  geom_point(alpha = 0.3, pch = 19, size = 3)


## -----------------------------------------------------------------------
d.maize <- d.maize %>% 
  mutate(position_field_x = 0)
##
d.maize$position_field_x[d.maize$pot.fac %in% c("A1", "A2", "A3", "A4")] <- 1
##
## check
d.maize %>% 
  select(pot.fac, position_field_x) %>% 
  unique()
## makes sense


## -----------------------------------------------------------------------
d.maize$position_field_x[d.maize$pot.fac %in% c("A5", "A6", "B1", "B2")] <- 2
##
## check
d.maize %>% 
  select(pot.fac, position_field_x) %>% 
  unique()
## makes sense


## -----------------------------------------------------------------------
d.maize$position_field_x[d.maize$pot.fac %in% c("B3", "B4", "B5")] <- 3
d.maize$position_field_x[d.maize$pot.fac == "B6" &
                          d.maize$well.fac %in% c("a", "b", "c", "d")] <- 3
##
## check
d.maize %>% 
  select(pot.fac, position_field_x) %>% 
  unique()
d.maize %>% 
  filter(pot.fac == "B6") %>% 
  select(position_field_x, well) %>% 
  unique()
## make sense


## -----------------------------------------------------------------------
d.maize$position_field_x[d.maize$pot.fac %in% c("C1", "C2")] <- 4
d.maize$position_field_x[d.maize$pot.fac == "B6" & 
                          d.maize$well.fac %in% c("e", "f")] <- 4
d.maize$position_field_x[d.maize$pot.fac == "C3" & 
                          d.maize$well.fac %in% c("a", "b", "c", "d", "e")] <- 4
##
## check
d.maize %>% 
  select(pot.fac, position_field_x) %>% 
  unique()
d.maize %>% 
  filter(pot.fac == "B6") %>% 
  select(position_field_x, well) %>% 
  unique()
d.maize %>% 
  filter(pot.fac == "C3") %>% 
  select(position_field_x, well) %>% 
  unique()
## make sense


## -----------------------------------------------------------------------
d.maize$position_field_x[d.maize$pot.fac %in% c("C4", "C5", "C6")] <- 5
d.maize$position_field_x[d.maize$pot.fac == "C3" & 
                          d.maize$well.fac %in% c("f")] <- 5
##
## check
d.maize %>% 
  select(pot.fac, position_field_x) %>% 
  unique()
d.maize %>% 
  filter(pot.fac == "C3") %>% 
  select(position_field_x, well) %>% 
  unique()
## make sense


## ----distance.points----------------------------------------------------
d.maize <- d.maize %>% 
  mutate(position_field_x_cm = position_field_x * 50)
##
## check
set.seed(2023)
d.maize %>% 
  select(starts_with("position_field_x")) %>% 
  sample_n(10)
## makes sense


## -----------------------------------------------------------------------
d.maize <- d.maize %>% 
  arrange(pot.fac) %>% 
  mutate(position_field_y = c(1:24, 1:24, 1:22, 1:19, 1:19))
##
## check
table(d.maize$position_field_y)


## -----------------------------------------------------------------------
plot(x = d.maize$position_field_x,
     y = d.maize$position_field_y,
     pch = d.maize$well,
     cex = 0.5)
## makes sense


## ----distance.points_y--------------------------------------------------
d.maize <- d.maize %>% 
  mutate(position_field_y_cm = position_field_y * 25)
##
## check
set.seed(2023)
d.maize %>% 
  select(starts_with("position_field_y")) %>% 
  sample_n(10)
## makes sense


## ----MissingValues, warning=FALSE---------------------------------------
## (warning messages are omitted from this chunk)
##
d.maize %>% 
  mutate(across(.cols = everything(), .fns = is.na)) %>% 
  tidyr::pivot_longer(cols = everything()) %>% 
  mutate(name = factor(x = name, levels = unique(name))) %>% 
  group_by(name) %>% 
  summarize(n = sum(value),
            n.perc = format(x = round(x = n/n() * 100, digits = 1), nsmall = 1)) %>% 
  ungroup() %>% 
  rename("Missings" = "n",
         "Missing (%)" = "n.perc") %>% 
  kable(caption = "Overview over the number and percentages of missing values",
        label = "tab1_missings", 
        booktabs=TRUE,
        longtable = TRUE, 
        linesep = c("")) %>%
  kable_styling(font_size = 7,
                latex_options = c("striped", "repeat_header", "hold_position"))


## ----saveData, eval = TRUE----------------------------------------------
saveRDS(d.maize,
        file = paste0("./Prepared_data_and_models/","d.maize_PreparedData.RDS"))
write.csv2(x = d.maize,
           fileEncoding = "UTF-8",
           file = paste0("./Prepared_data_and_models/","d.maize_PreparedData.csv"),
           quote = TRUE,
           row.names = FALSE)


## ----sessionInfo--------------------------------------------------------
sessionInfo()

