

## Start up
#Open project
install.packages("lme4")
library(lme4)
library(lattice)
library(MuMIn)
library(AICcmodavg)
library(lazyeval)
library(ggplot2)
library(devtools)
library(bbmle)
library(lsmeans)
library(Hmisc)
library(digest)
library(lmtest)
library(car)
library(corrgram)
library(arm)
library(zoo)
library(sjPlot)
library(dplyr)
library(stats)
library(sjstats)
library(coefplot)
library(pracma)
library(vegan)
library(dtplyr)
library(PCAmixdata)
library(MASS)
library(ggplot2)
library(aods3)
library(rcompanion)


##############################################################################################################
################################## Loading and exploring the data ############################################
##############################################################################################################

## Read in table and explore data: presence absence data
data <- spss.get("F:/PhD Brown hare/PhD/Article Hare allometry/Article Ecology and Evolution/Manuscript Ecology and Evolution/Dryad_data repository/Database_Validation of hunter estimates_26_11_2021.sav", use.value.labels=TRUE, to.data.frame = TRUE)
attach(data)
summary(data)

# select cases
data <- subset(data, data$Select.species > 0 & data$Select.species < 250)


data$f.area <- factor(data$Area)
data$f.species <- factor(data$Species)


# transformation, centring & z-score of all continous variables
data$l.SDM <- (data$SDM.corrected)
data$z.sdm <- ((data$l.SDM) - mean(data$l.SDM, na.rm=TRUE))/I(2*sd(data$l.SDM, na.rm=TRUE))
data$l.effort <- log10(data$Hunter.effort+1)
data$z.effort <- ((data$l.effort) - mean(data$l.effort, na.rm=TRUE))/I(2*sd(data$l.effort, na.rm=TRUE))
data$l.PRO <- log10(data$Proportion.year.available)
data$z.prop <- ((data$l.PRO) - mean(data$l.PRO, na.rm=TRUE))/I(2*sd(data$l.PRO, na.rm=TRUE))
data$l.area <- log10(data$Area.size+1)
data$z.area <- ((data$l.area) - mean(data$l.area, na.rm=TRUE))/I(2*sd(data$l.area, na.rm=TRUE))
data$l.estimate <- (data$Hunter.estimate)



# make continuous variables numeric
data$z.sdm <- as.numeric(data$z.sdm)
data$z.effort <- as.numeric(data$z.effort)
data$z.prop <- as.numeric(data$z.prop)
data$z.area <- as.numeric(data$z.area)

# check distribution
dotchart(data$z.sdm)
dotchart(data$z.effort) 
dotchart(data$z.prop)
dotchart(data$z.area)
dotchart(data$l.estimate)



#############################################################################################################
######################################## GLM  #################################################
#############################################################################################################

install.packages(farver)
library(farver)
ggplot(data, aes(y=l.estimate, x=z.sdm))+geom_point()+geom_smooth(method = "loess")



install.packages(gamm4)
library(gamm4)
gamm4 <- gamm4(l.estimate ~ s(z.sdm) + s(z.effort, k=5) + s(z.prop, k=5) + s(z.area), random=~(1|f.species), data=data, REML = FALSE) 
summary(gamm4$gam)
# edf z.sdm, z.prop = 1
# edf z.effort = 2.064, but not yet significant
# edf z.area = 2.969
# lmer.REML = 1707

gamm5 <- gamm4(l.estimate ~ s(z.sdm) + s(z.effort, k=5) + s(z.prop, k=5), random=~(1|f.species) + (1|f.area), data=data, REML = FALSE) 
summary(gamm5$gam)
# edf z.sdm, z.prop = 1
# edf z.effort = 2.227, deviates significant from linear
# lmer.REML = 1710.6


gamm6 <- gamm4(l.estimate ~ s(z.sdm) + s(z.prop, k=5), random=~(1|f.species) + (1|f.area), data=data, REML = FALSE) 
summary(gamm6$gam)
# all edf = 1 no splines necessary
# lmer.REML = 1858.6


############################################################################
library(MASS)

NB1<-glm.nb(l.estimate ~ z.sdm + z.prop,
            data = data)
summary(NB1)
Anova(NB1) 

ENB1<-resid(NB1, type="pearson")
N<-nrow(data)
p<-length(coef(NB1))+1
overdispersionNB1<- sum(ENB1^2)/(N-p)
overdispersionNB1

### overdispersion = 1.37


########################################  Hunter estimates     -    Poisson with log link




model.sdm.1 <- glmer(data$l.estimate ~ z.sdm + z.prop + (1|f.area) + (1|f.species), data = data, weights = data$weights, nAGQ = 1, family = poisson(link= "log"), control = glmerControl(optimizer = "bobyqa")) # nAGQ = 1 is Laplace parameter estimation, nAGQ > 1 is Gauss Hermite quadrature parameter estimation (more accurate, but slower)
summary(model.sdm.1)
drop1(model.sdm.1)
# final model, AIC = 1345


MPL.end <- glmer(data$l.estimate ~ z.sdm + z.prop + (1|f.area) + (1|f.species), data = data, weights = data$weights, nAGQ = 1, family = poisson(link= "log"), control = glmerControl(optimizer = "bobyqa")) # nAGQ = 1 is Laplace parameter estimation, nAGQ > 1 is Gauss Hermite quadrature parameter estimation (more accurate, but slower)
summary(MPL.end)


## package r2GLMM to assess goodness of fit!!


# model validation
data$E1 <- resid(MPL.end, type = "pearson")
data$F1 <- fitted(MPL.end)

par(mfrow = c(1,1), cex.lab = 1.5)
plot(x = data$F1 , 
     y = data$E1 ,
     xlab = "Fitted values (with re)",
     ylab = "Pearson residuals")
abline(h = 0, lty = 2)

library(ggplot2)
output <- data.frame(resid = resid(MPL.end,type = "pearson" ), fitted = fitted(MPL.end))
ggplot(output, aes(fitted, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no pattern, graph winds around y=0, and goes down a bit at the end

output <- data.frame(resid = resid(MPL.end,type = "pearson" ), SDM = data$z.sdm)
ggplot(output, aes(SDM, resid)) +
  geom_jitter(position=position_jitter(width=0.001), alpha=.5) +
  stat_smooth(method="loess")+geom_abline(intercept=0, slope=0,col=2,lty=2)
# conclusion: no trend graph winds around y=0


Cook <- cooks.distance(MPL.end)
Cook
plot(cooks.distance(MPL.end), 
     type = "h",
     xlab = "Observation", 
     ylab = "Cook distance",
     cex.lab =  1.5)

# possible outliers if Cooks distance > 4/320 = 0.0125



modelxx <- MPL.end
#confint(modelxx, method = "boot")
summary(modelxx)
gof(modelxx) # GOF = Goodness of Fit for models of Count data, D = Deviance, X2 = Pearson Chi-squared (Chi2/df closer to 1 is better?)
deviance(modelxx)
resid(modelxx)
scatterplot(data$z.sdm, data$l.estimate)
r.squaredLR(modelxx)
plot(resid(modelxx))

# residuals normal?
data$resid <- resid(modelxx)
qqnorm(resid); qqline(resid, col = 2)
data$pred <- predict(modelxx)
scatterplot(I(data$l.estimate+1), data$resid, log = 'x')
