
## Start up
#Open project
install.packages("lme4")
library(lme4)
library(lattice)
library(MuMIn)
library(AICcmodavg)
library(lazyeval)
library(ggplot2)
library(devtools)
library(bbmle)
library(lsmeans)
library(Hmisc)
library(digest)
library(lmtest)
library(car)
library(corrgram)
library(arm)
library(zoo)
library(sjPlot)
library(dplyr)
library(stats)
library(sjstats)
library(coefplot)
library(pracma)
library(vegan)
library(dtplyr)
library(PCAmixdata)
library(MASS)
library(ggplot2)
library(aods3)
library(rcompanion)


##############################################################################################################
################################## Loading and exploring the data ############################################
##############################################################################################################


data <- spss.get("C:/Dryad_data repository/Database_Lepus europaeus_26_11_2021.sav", use.value.labels=TRUE, to.data.frame = TRUE)
attach(data)
summary(data)

# select cases (activate row, by removing #)
data <- subset(data, data$Select.placental > 0) # & data$Select.placental < 100) # available samples for placental scars
#data <- subset(data, data$Select.body.condition > 0) # available samples for body condition
#data <- subset(data, data$Select.weight.adrenal.gland > 0) # available samples for weight adrenal gland


## make FGR, Hunter.type, Area and sub.area a factor
data$f.Age.class <- factor(data$Age.class)
data$f.Area <- factor(data$Area)
data$f.Sub.area <- factor(data$Sub.area)
data$f.Sex <- factor(data$Sex)


# transformation, centring & z-score of all continous variables
data$WAG <- (data$Adrenal.combined.gr.new)
data$l.WAG <- (data$Adrenal.combined.gr.new)
data$z.WAG <- ((data$l.WAG) - mean(data$l.WAG, na.rm=TRUE))/I(2*sd(data$l.WAG, na.rm=TRUE))
data$l.BCI <- (data$BCI)
data$z.BCI <- ((data$l.BCI) - mean(data$l.BCI, na.rm=TRUE))/I(2*sd(data$l.BCI, na.rm=TRUE))
data$l.No.pred <- log10(data$No.of.predators+0.00000001)
data$z.No.pred <- ((data$l.No.pred) - mean(data$l.No.pred, na.rm=TRUE))/I(2*sd(data$l.No.pred, na.rm=TRUE))
data$l.Per.shot <- (data$Percentage.shot)
data$z.Per.shot <- ((data$l.Per.shot) - mean(data$l.Per.shot, na.rm=TRUE))/I(2*sd(data$l.Per.shot, na.rm=TRUE))
data$l.Days.start <- (data$Days.since.start)
data$z.Days.start <- ((data$l.Days.start) - mean(data$l.Days.start, na.rm=TRUE))/I(2*sd(data$l.Days.start, na.rm=TRUE))
data$l.FMR.ha.avg <- log10(data$FMR.ha.avg.uncor+0.00000001)
data$z.FMR.ha.avg <- ((data$l.FMR.ha.avg) - mean(data$l.FMR.ha.avg, na.rm=TRUE))/I(2*sd(data$l.FMR.ha.avg, na.rm=TRUE))
data$l.effort <- log10(data$Effort+0.00000001)
data$z.effort <- ((data$l.effort) - mean(data$l.effort, na.rm=TRUE))/I(2*sd(data$l.effort, na.rm=TRUE))


# make continuous variables numeric
data$z.WAG <- as.numeric(data$z.WAG)
data$z.BCI <- as.numeric(data$z.BCI)
data$z.No.pred <- as.numeric(data$z.No.pred)
data$z.Per.shot <- as.numeric(data$z.Per.shot)
data$z.FMR.ha.avg <- as.numeric(data$z.FMR.ha.avg)
data$z.effort <- as.numeric(data$z.effort)


# check distribution
dotchart(data$z.WAG)
dotchart(data$z.BCI)
dotchart(data$z.No.pred)
dotchart(data$z.Per.shot)
dotchart(data$z.Days.start)
dotchart(data$z.FMR.ha.avg)
dotchart(data$z.effort)


#############################################################################################################
######################################## Models Body condition index  #################################################
#############################################################################################################

### model Body condition index versus sFMR

install.packages(farver)
library(farver)
ggplot(data, aes(y=BCI, x=z.No.pred))+geom_point()+geom_smooth(method = "loess")

install.packages(gamm4)
library(gamm4)
gamm4 <- gamm4(BCI ~ s(z.FMR.ha.avg) + s(z.No.pred) + s(z.Per.shot) + s(z.WAG) + f.Sex + f.Age.class + s(z.Days.start, k=5),random=~(1|f.Area) + (1|f.Area:f.Sub.area), data=data) 
summary(gamm4$gam)

# conclusion: all continue variables = linear relationship (edf = 1)

library(lmerTest)

########################################################################################################### BCI ~ FMR
###########################################################################################################

model.BCI.FMR <- lmer(BCI ~ z.FMR.ha.avg + z.Per.shot + f.Sex + f.Age.class + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.FMR)
drop1(model.BCI.FMR)
# drop f.sex, AIC = 584.8

model.BCI.FMR.1 <- lmer(BCI ~ z.FMR.ha.avg + z.Per.shot + f.Age.class + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.FMR.1)
drop1(model.BCI.FMR.1)
# drop z.Per.shot, AIC = 582.9

model.BCI.FMR.2 <- lmer(BCI ~ z.FMR.ha.avg + f.Age.class + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.FMR.2)
drop1(model.BCI.FMR.2)
# final model, AIC = 581.6

model.BCI.end.FMR <- lmer(BCI ~ z.FMR.ha.avg + f.Age.class + z.Days.start + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.end.FMR)
# final model AIC = 581.6
confint(model.BCI.end.FMR)
tab_model(model.BCI.end.FMR)
summary(BCI)

# model validation
data$E1 <- resid(model.BCI.end.FMR, type = "pearson")
data$F1 <- fitted(model.BCI.end.FMR)

par(mfrow = c(1,1), cex.lab = 1.5)
plot(x = data$F1 , 
     y = data$E1 ,
     xlab = "Fitted values (with re)",
     ylab = "Pearson residuals")
abline(h = 0, lty = 2)

library(ggplot2)
output <- data.frame(resid = resid(model.BCI.end.FMR,type = "pearson" ), fitted = fitted(model.BCI.end.FMR))
ggplot(output, aes(fitted, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no distinct pattern, graph winds around y=0

output <- data.frame(resid = resid(model.BCI.end.FMR,type = "pearson" ), FMR = data$z.FMR.ha.avg)
ggplot(output, aes(FMR, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no trend, graph winds around y=0

boxplot(data$E1 ~ data$f.Sex)
# conclusion: variation residuals does not relate to f.Age.class and f.Sex

###### bubblechart for spatial correlation

ggplot(data, aes(x=RD.X, y=RD.Y, size = z.FMR.ha.avg, color=f.Age.class)) +
  geom_point(alpha=0.7)

library(raster)
library(gstat)

spdata <- data.frame(resid = data$E1, x = data$RD.X, y = data$RD.Y)
coordinates(spdata) <- c("x", "y")
bubble(spdata, "resid", col = c("blue", "orange"), main = "Residuals", xlab = "X-coordinates", 
       ylab = "Y-coordinates")
# residuals are not dependend on spatial distribution


install.packages("SpatialEpi")
library(SpatialEpi)


V1 <- variogram(resid(model.BCI.end.FMR) ~1,spdata)
plot(V1, pch = 16, col = 1, cex = 1.5,
     xlab = list(label = "RD", cex = 1.5),
     ylab = list(label = "Semi-variance", cex = 1.5))
# conclusion: variance is quite constant (except point > 1200) and thus does not depend on the distance, so no spatial auto-correlation


plot(cooks.distance(model.BCI.end.FMR), 
     type = "h",
     xlab = "Observation", 
     ylab = "Cook distance",
     cex.lab =  1.5)

# possible outlier if Cooks distance > 4/66 = 0.0597


########################################################################################################### BCI ~ NP
###########################################################################################################

### model Body condition index versus number of predators


model.BCI.pred <- lmer(BCI ~ z.No.pred + z.Per.shot + f.Sex + f.Age.class + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.pred)
drop1(model.BCI.pred)
# drop f.sex, AIC = 589.1

model.BCI.pred.1 <- lmer(BCI ~ z.No.pred + z.Per.shot + f.Age.class + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.pred.1)
drop1(model.BCI.pred.1)
# drop z.Per.shot, AIC = 587.2

model.BCI.pred.2 <- lmer(BCI ~ z.No.pred + f.Age.class + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.pred.2)
drop1(model.BCI.pred.2)
# drop z.Days.start, AIC = 586.7

model.BCI.pred.3 <- lmer(BCI ~ z.No.pred + f.Age.class + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.pred.3)
drop1(model.BCI.pred.3)
# final model, AIC = 586.7

model.BCI.end.PN <- lmer(BCI ~ z.No.pred + f.Age.class + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.BCI.end.PN)
# final model AIC = 581.6
confint(model.BCI.end.PN)


# model validation
data$E1 <- resid(model.BCI.end.PN, type = "pearson")
data$F1 <- fitted(model.BCI.end.PN)

par(mfrow = c(1,1), cex.lab = 1.5)
plot(x = data$F1 , 
     y = data$E1 ,
     xlab = "Fitted values (with re)",
     ylab = "Pearson residuals")
abline(h = 0, lty = 2)

library(ggplot2)
output <- data.frame(resid = resid(model.BCI.end.PN,type = "pearson" ), fitted = fitted(model.BCI.end.PN))
ggplot(output, aes(fitted, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no strong pattern, graph winds around y=0

output <- data.frame(resid = resid(model.BCI.end.PN,type = "pearson" ), FMR = data$z.No.pred)
ggplot(output, aes(FMR, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no trend, graph winds around y=0

boxplot(data$E1 ~ data$f.Sex)
# conclusion: variation residuals does not relate to f.Age.class and f.Sex


plot(cooks.distance(model.BCI.end.PN), 
     type = "h",
     xlab = "Observation", 
     ylab = "Cook distance",
     cex.lab =  1.5)

# possible outlier if Cooks distance > 4/66 = 0.0597

#############################################################################################################
######################################## Models Weight Afrenal glands  #################################################
#############################################################################################################


install.packages(farver)
library(farver)
ggplot(data, aes(y=WAG, x=z.No.pred))+geom_point()+geom_smooth(method = "loess")

install.packages(gamm4)
library(gamm4)
gamm4 <- gamm4(WAG ~ s(z.FMR.ha.avg) + s(z.No.pred) + s(z.Per.shot) + f.Sex + f.Age.class + s(z.Days.start, k=5),random=~(1|f.Area) + (1|f.Area:f.Sub.area), data=data) 
summary(gamm4$gam)

# conclusion: all continues variables = linear relationship (edf = 1)

#############################################################################################################
#############################################################################################################

### model Weight adrenal glands versus sFMR

model.WAG.FMR <- lmer(WAG ~ z.FMR.ha.avg + z.Per.shot + f.Sex + f.Age.class + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.FMR)
drop1(model.WAG.FMR)
# drop z.days.start, AIC = -144.7

model.WAG.FMR.1 <- lmer(WAG ~ z.FMR.ha.avg + z.Per.shot + f.Sex + f.Age.class + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.FMR.1)
drop1(model.WAG.FMR.1)
# drop f.Age.class, AIC = -146.7

model.WAG.FMR.2 <- lmer(WAG ~ z.FMR.ha.avg + z.Per.shot + f.Sex + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.FMR.2)
drop1(model.WAG.FMR.2)
# drop z.Per.shot, AIC = -147.9

model.WAG.FMR.3 <- lmer(WAG ~ z.FMR.ha.avg + f.Sex + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.FMR.3)
drop1(model.WAG.FMR.3)
# final model, AIC = -149.1

model.WAG.end.FMR <- lmer(WAG ~ z.FMR.ha.avg + f.Sex + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.end.FMR)
# final model AIC = 581.6
confint(model.WAG.end.FMR)
tab_model(model.WAG.end.FMR)

# model validation
data$E1 <- resid(model.WAG.end.FMR, type = "pearson")
data$F1 <- fitted(model.WAG.end.FMR)

par(mfrow = c(1,1), cex.lab = 1.5)
plot(x = data$F1 , 
     y = data$E1 ,
     xlab = "Fitted values (with re)",
     ylab = "Pearson residuals")
abline(h = 0, lty = 2)

library(ggplot2)
output <- data.frame(resid = resid(model.WAG.end.FMR,type = "pearson" ), fitted = fitted(model.WAG.end.FMR))
ggplot(output, aes(fitted, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: moderate pattern, grafiek winds around y=0, but deviates close to extremes

output <- data.frame(resid = resid(model.WAG.end.FMR,type = "pearson" ), FMR = data$z.FMR.ha.avg)
ggplot(output, aes(FMR, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no trend, graph winds around y=0

boxplot(data$E1 ~ data$f.Age.class)
# conclusion: variation residuals does not relate to f.Sex, adults in f.age.class have more variation


plot(cooks.distance(model.WAG.end.FMR), 
     type = "h",
     xlab = "Observation", 
     ylab = "Cook distance",
     cex.lab =  1.5)

# possible outlier if Cooks distance > 4/66 = 0.0597
# 1 outlier removed with WAG = 0.61, was clearly far from scatterplot between Hare.ID en WAG.
# no other outliers removed

#############################################################################################################
#############################################################################################################

### model Weight adrenal glands versus number of predators

model.WAG.pred <- lmer(WAG ~ z.No.pred + z.Per.shot + f.Sex + f.Age.class + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.pred)
drop1(model.WAG.pred)
# drop f.age.class, AIC = -141.1

model.WAG.pred.1 <- lmer(WAG ~ z.No.pred + z.Per.shot + f.Sex + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.pred.1)
drop1(model.WAG.pred.1)
# drop z.Per.shot, AIC = -142.9

model.WAG.pred.2 <- lmer(WAG ~ z.No.pred + f.Sex + z.Days.start + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.pred.2)
drop1(model.WAG.pred.2)
# drop z.Days.start, AIC = -144.5

model.WAG.pred.3 <- lmer(WAG ~ z.No.pred + f.Sex + 1 + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.pred.3)
drop1(model.WAG.pred.3)
# f.sex not dropped, AIC = -145.4 (AIC increases after removing f.sex to -144.3)
# final model, AIC = -145.3

model.WAG.end.PN <- lmer(WAG ~ z.No.pred + f.Sex + (1|f.Area/f.Sub.area), data=data, REML=FALSE, control = lmerControl(optimizer = "bobyqa")) 
summary(model.WAG.end.PN)
# final model AIC = -145.3
confint(model.WAG.end.PN)
library(sjstats)
r.squaredGLMM(model.WAG.end.PN)
tab_model(model.WAG.end.PN)

# model validation
data$E1 <- resid(model.WAG.end.PN, type = "pearson")
data$F1 <- fitted(model.WAG.end.PN)

par(mfrow = c(1,1), cex.lab = 1.5)
plot(x = data$F1 , 
     y = data$E1 ,
     xlab = "Fitted values (with re)",
     ylab = "Pearson residuals")
abline(h = 0, lty = 2)

library(ggplot2)
output <- data.frame(resid = resid(model.WAG.end.PN,type = "pearson" ), fitted = fitted(model.WAG.end.PN))
ggplot(output, aes(fitted, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: moderate pattern, graph winds around y=0, but deviates around extreme on the right

output <- data.frame(resid = resid(model.WAG.end.PN,type = "pearson" ), NP = data$z.No.pred)
ggplot(output, aes(NP, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no trend, grapg winds around y=0

boxplot(data$E1 ~ data$f.Age.class)
# conclusion: variation in residuals does not relate to f.Sex, a little bit more variation in adults of f.age.class


plot(cooks.distance(model.WAG.end.PN), 
     type = "h",
     xlab = "Observation", 
     ylab = "Cook distance",
     cex.lab =  1.5)

# possible outlier if Cooks distance > 4/66 = 0.0597



#############################################################################################################################
####################### Models placental scars ###############################################################################
#############################################################################################################################

#install.packages(farver)
library(farver)
ggplot(data, aes(y=Placental.proportion.19, x=z.No.pred))+geom_point()+geom_smooth(method = "loess")
# FMR, Per.shot, BCI and WAG seem visually reasonably linear, but No.pred is not linear

install.packages(gamm4)
library(gamm4)
data$weights <- 19 # maximum number of placental scars is 19
gamm7 <- gamm4(data$Placental.proportion.19 ~ s(z.FMR.ha.avg, k=5) + s(z.No.pred, k=5) + s(z.Per.shot, k=5) + s(z.BCI, k=5) + s(z.WAG, k=5), random=~ (1|Hare.ID), data = data, weights = data$weights, nAGQ = 1, family = binomial(link= "logit"))
summary(gamm7$gam)
# conclusion: all continues variables = linear relationship (edf = 1), z.WAG has an edf of 1.846


#############################################################################################################
#############################################################################################################

### model placental scars versus sFMR


data$weights <- 19 # maximum number of placental scars is 19

MPL1 <- glm(data$Placental.proportion.19 ~ z.FMR.ha.avg + z.Per.shot + z.BCI + z.WAG, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL1)
drop1(MPL1)
# drop BCI, AIC = 75.683

MPL2 <- glm(data$Placental.proportion.19 ~ z.FMR.ha.avg + z.Per.shot + z.WAG, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL2)
drop1(MPL2)
# drop z.Per.shot, AIC = 73.869 * (n=17 samples, cannot be compared with AIC from MPL3)
# when Z.Per.shot was dropped we included an extra sample, n=18), subset(data, data$Select.placental > 0)

MPL3 <- glm(data$Placental.proportion.19 ~ z.FMR.ha.avg + z.WAG, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL3)
drop1(MPL3)
# drop z.WAG, AIC = 72.065

MPL4 <- glm(data$Placental.proportion.19 ~ z.FMR.ha.avg, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL4)
drop1(MPL4)
# AIC = 71.392
# MPL4 is final model
library(rcompanion)
nagelkerke(MPL4)


MPL.end <- glm(data$Placental.proportion.19 ~ z.FMR.ha.avg, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL.end)

library(r2glmm)
r2beta(MPL.end)


# model validation
data$E1 <- resid(MPL.end, type = "pearson")
data$F1 <- fitted(MPL.end)

par(mfrow = c(1,1), cex.lab = 1.5)
plot(x = data$F1 , 
     y = data$E1 ,
     xlab = "Fitted values (with re)",
     ylab = "Pearson residuals")
abline(h = 0, lty = 2)

library(ggplot2)
output <- data.frame(resid = resid(MPL.end,type = "pearson" ), fitted = fitted(MPL.end))
ggplot(output, aes(fitted, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no pattern, graphs winds around y=0

output <- data.frame(resid = resid(MPL.end,type = "pearson" ), FMR = data$z.FMR.ha.avg)
ggplot(output, aes(FMR, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no trend, graph winds around y=0



plot(cooks.distance(MPL.end), 
     type = "h",
     xlab = "Observation", 
     ylab = "Cook distance",
     cex.lab =  1.5)

# possible outliers if Cooks distance > 4/16 = 0.25



#############################################################################################################
#############################################################################################################

### model placental scars versus number of predators   


data$weights <- 19 # maximum number of placental scars is 19

MPL1 <- glm(data$Placental.proportion.19 ~ z.No.pred + z.Per.shot + z.BCI + z.WAG, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL1)
drop1(MPL1)
# drop Z.WAG, AIC = 95.059

MPL2 <- glm(data$Placental.proportion.19 ~ z.No.pred + z.Per.shot + z.BCI, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL2)
drop1(MPL2)
# drop z.BCI, AIC = 93.12

MPL3 <- glm(data$Placental.proportion.19 ~ z.No.pred + z.Per.shot, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL3)
drop1(MPL3)
# drop z.Per.shot, AIC = 97.028 * (n=17 samples, cannot be compared with AIC from MPL4)
# when Z.Per.shot was dropped we included an extra sample, n=18), subset(data, data$Select.placental > 0)

MPL4 <- glm(data$Placental.proportion.19 ~ z.No.pred, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL4)
drop1(MPL4)
# AIC = 96.758
# MPL4 is final model



MPL.end <- glm(data$Placental.proportion.19 ~ z.No.pred, data = data, weights = data$weights, family = binomial(link= "logit"))
summary(MPL.end)





# model validation
data$E1 <- resid(MPL.end, type = "pearson")
data$F1 <- fitted(MPL.end)

par(mfrow = c(1,1), cex.lab = 1.5)
plot(x = data$F1 , 
     y = data$E1 ,
     xlab = "Fitted values (with re)",
     ylab = "Pearson residuals")
abline(h = 0, lty = 2)

library(ggplot2)
output <- data.frame(resid = resid(MPL.end,type = "pearson" ), fitted = fitted(MPL.end))
ggplot(output, aes(fitted, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no clear pattern, graph winds around y=0, small dip between 0.50 en 0.55

output <- data.frame(resid = resid(MPL.end,type = "pearson" ), PRE = data$z.No.pred)
ggplot(output, aes(PRE, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no trend, graph winds around y=0, small dip between 0.0 and 0.5



plot(cooks.distance(MPL.end), 
     type = "h",
     xlab = "Observation", 
     ylab = "Cook distance",
     cex.lab =  1.5)

# possible outliers if Cooks distance > 4/18 = 0.22
# case Hare.ID 15 is slightly positioned from scatterplot between Hare.ID en number of placentals scars.
# however, is not a problem, therefore no outlier



#############################################################################################################
##########################   scatterplot placental scars - sFMR
#############################################################################################################


scatterplot(data$z.FMR.ha.avg, data$Nr.Placental.scars, smooth = FALSE,  xlab = "sFMR", ylab = "Number of placental scars")



library(devtools)
devtools::install_github("strengejacke/sjPlot")

#install.packages("sjPlot")
library(sjPlot)
library(ggplot2)
modelxx <- MPL.end
ggplot(MPL.end, aes(data$z.FMR.ha.avg, data$Nr.Placental.scars)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method=NULL)+geom_abline(intercept=0, slope=0,col=2,lty=2)



p <- ggplot(data, aes(FMR.ha.avg.uncor, Placental.proportion.19)) + geom_point(size = 2, color = "black")
p + coord_trans(x="log10") + scale_x_continuous(breaks=seq(0,1100,100))
