## ----globalOpts, include=FALSE------------------------------------------
library(knitr)
opts_chunk$set(concordance = TRUE,
               fig.align = "center",
               out.width = "1.2\\linewidth", ## default is 1 
               tidy = FALSE,
               comment = NA,
               fig.path = "Rmd_files/Rmd_Figures/Modelling/", 
               cache.path = "Rmd_files/Cache/Modelling/",
               # dev = "png",
               fig.pos = "H",
               crop = hook_pdfcrop) ## changes mar defaults...
##
options(digits = 8)


## ----Checkpoint, message=FALSE------------------------------------------
## (messages are omitted in this chunk)
##
# library(checkpoint)
# checkpoint(snapshot_date = "2022-11-15") 


## ----loadPackages, message=FALSE----------------------------------------
## (messages are omitted from this chunk)
##
library(multcomp)
library(dplyr)
library(kableExtra)
library(ggplot2)
library(tibble) ## function rownames_to_column()
library(plgraphics)


## ----Settings-----------------------------------------------------------
Sys.setenv(lang = "en_US")
theme_set(theme_bw())

if (!dir.exists("Prepared_data_and_models")) {
  dir.create("Prepared_data_and_models")
}


## ----getData------------------------------------------------------------
d.maize <- readRDS(file = paste0("Prepared_data_and_models/", 
                                "d.maize_PreparedData.RDS"))


## ----OverviewHeadStr----------------------------------------------------
dim(d.maize)
head(d.maize)[1:min(ncol(d.maize), 30)]
str(d.maize)


## ----createWellsMatrix, echo=FALSE--------------------------------------
## (this chunk is not echoed)
##
M.wells <- matrix(data = letters[1:6], 
                  byrow = TRUE,
                  nrow = 3, ncol = 2)[c(3:1), ]
M.wells


## ----createPotsOnTableMatrix, echo=FALSE--------------------------------
## (this chunk is not echoed)
##
M.pots_on_table <- matrix(data = paste0(rep(LETTERS[1:3], each = 6),
                                        1:6), 
                          byrow = TRUE,
                          nrow = 3, ncol = 6)[c(3:1), ]
M.pots_on_table


## ----lm.cob_weight.full-------------------------------------------------
lm.cob_weight.full <- lm(cob_weight.num ~ 
                           soil.fac +
                           depth +
                           pot.fac +
                           well.fac +
                           seed.weight, 
                         data = d.maize)
##
summary(lm.cob_weight.full)


## -----------------------------------------------------------------------
X <- model.matrix(lm.cob_weight.full)
robustbase::rankMM(X)
ncol(X)


## ----lm.cob_weight------------------------------------------------------
lm.cob_weight <- lm(cob_weight.num ~ pot.fac +
                      depth +
                      well.fac +
                      seed.weight, 
                    data = d.maize)
##
summary(lm.cob_weight)


## ----log_model_fitting--------------------------------------------------
lm.cob_weight.log <- update(lm.cob_weight, formula = log(.) ~ .)
##
summary(lm.cob_weight.log)


## ----log_model_checking-------------------------------------------------
par(mfrow = c(2, 2))
plot(lm.cob_weight)
plot(lm.cob_weight.log)
par(mfrow = c(1, 1))


## ----residual_plots_testing_normality, warning=FALSE--------------------
## (warnings are omitted from this chunk)
set.seed(2023)
plregr(lm.cob_weight,
       plotselect = c(default = 0,
                      qq = 1),
       xvar = FALSE)
set.seed(2023)
plregr(lm.cob_weight.log,
       plotselect = c(default = 0,
                      qq = 1),
       xvar = FALSE)
##
d.maize.no.na <- d.maize %>%
  filter(!is.na(cob_weight)) %>% 
  mutate(resid = resid(lm.cob_weight, na.action = na.omit), 
         resid.log = resid(lm.cob_weight.log, na.action = na.omit))

ggplot(d.maize.no.na, mapping = aes(x = resid)) +
  geom_density() +
  geom_rug(alpha = 0.2)

ggplot(d.maize.no.na, mapping = aes(x = resid.log)) +
  geom_density() +
  geom_rug(alpha = 0.2)


## -----------------------------------------------------------------------
drop1(lm.cob_weight, test = "F")


## ----lm.cob_weight_CI---------------------------------------------------
( CI.cob_weight <- confint(lm.cob_weight) )


## ----CIcovariates-------------------------------------------------------
## Store the estimated values as dataframe
( d.coef.cob_weight <- data.frame(coef.cob_weight = coef(lm.cob_weight)) )
##
d.CI.cob_weight <- as.data.frame(CI.cob_weight)

## Join the two dataframe by rowname
d.est.cob_weight <- left_join(rownames_to_column(d.coef.cob_weight), 
                              rownames_to_column(d.CI.cob_weight),
                              by = c("rowname" = "rowname"))  
##
## visualise the dataframe
d.est.cob_weight %>% 
  kable(caption = paste0("Estimates and 95\\% CI."),
        label = "tab_coef_cob_weight",
        booktabs = TRUE,
        longtable = TRUE,
        linesep = c("")) %>%
  # landscape() %>%
  kable_styling(font_size = 7,
                latex_options = c("striped", "repeat_header", "hold_position"))


## ----plot_CI------------------------------------------------------------
d.est.cob_weight %>% 
  filter(rowname != "(Intercept)") %>% 
  ggplot(mapping = aes(y = rowname, x = coef.cob_weight)) +
  geom_point() +
  geom_errorbar(mapping = aes(xmin = `2.5 %`, xmax = `97.5 %`)) +
  xlab("Confidence intervals") +
  theme(axis.title.y = element_blank()) +
  geom_vline(xintercept = 0, color = "darkgrey")


## ----residual_plots-----------------------------------------------------
plot(lm.cob_weight)


## ----fitted_against_seed.weight-----------------------------------------
ggplot(d.maize.no.na, mapping = aes(x = seed.weight, y = resid)) +
  geom_point(alpha = 0.5) +
  geom_smooth()


## ----residuals_against_depth--------------------------------------------
ggplot(d.maize.no.na, mapping = aes(x = depth, y = resid)) +
  geom_point(alpha = 0.5) +
  geom_smooth()


## ----residuals_against_soil---------------------------------------------
ggplot(d.maize.no.na, mapping = aes(x = soil.fac,
                                   y = resid, 
                                   colour = soil.fac)) +
  geom_violin(draw_quantiles = 0.5)


## ----residuals_against_well---------------------------------------------
ggplot(d.maize.no.na, mapping = aes(x = well.fac,
                                   y = resid)) +
  geom_violin(draw_quantiles = 0.5)


## ----nested_variable----------------------------------------------------
xtabs(formula = ~ pot.fac + soil.fac, data = d.maize)


## ----soil.fac_contrast--------------------------------------------------
test.soil <- glht(lm.cob_weight, 
                  linfct = c("(Intercept) + pot.facA4 + pot.facC2 + pot.facC3 = 0", ## Bio Garden
                             "pot.facB3 + pot.facB4 + pot.facC4 + pot.facC5 = 0", ## Composana
                             "pot.facA2 + pot.facA3 + pot.facA5 + pot.facA6 = 0", ## Herbs
                             "pot.facB1 + pot.facB2 + pot.facB6 + pot.facC1 = 0"))
par("mar") ## the second value refers to the left margin (to be enlarged) 
par(mar = c(5.1, 19, 4.1, 2.1)) 
plot(test.soil)


## ----citations----------------------------------------------------------
citation()
citation("stats")


## ----sessionInfo--------------------------------------------------------
sessionInfo()

