

#check what is in r console from last time
ls()
#remove everything
rm(list=ls())

#check what working directory is set
getwd()
#setting to required one where data is based
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")

#reading in data 
hardeningsimpleinR1 <- read.csv("heatwavehardening.csv", header = TRUE) # dataset with 2 * 10D blocks combined into a single 20D block 


#### DATA CHECK, CLEAN, DESCRIPTION AND SUMMARY #########################################################################

hardeningsimpleinR1 # produces all whole dataframe - no NAs, data to 2 or 3 d.p., no irregularities/anomalies


### checking for outliers/errors
summary(hardeningsimpleinR1) # produces general (unsplit) range, quantiles, median, count and mean summary stats for each variable


str(hardeningsimpleinR1) # checks the variable types

#'data.frame':	84 obs. of  3 variables:
#     $ Replicate          : int  1 2 3 4 5 6 7 8 9 10 ...
#$ Offspring.count.20D: int  327 182 373 323 374 318 147 127 0 104 ...
#$ Temperature.oC     : Factor w/ 3 levels "30","42","double42": 1 1 1 1 1 1 1 1 1 1 ...

# no converting necassary

is.na(hardeningsimpleinR1) # returns TRUE of x is missing
# nothing missing


levels(hardeningsimpleinR1$Temperature.oC)
#[1] "30"       "42"       "double42"
## if want to change 
# spermcount1$Temperature.oC <- factor(hardeningsimpleinR1$Temperature.oC,levels = c("42","double42",30"))
# levels(spermcount1$Temperature.oC)


#### DATA EXPLORATION ##################################################################################################

############## ! NAT COMMS DESCRIPTIVE STATISTICS #########################


#### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(hardeningsimpleinR1$Offspring.count.20D, hardeningsimpleinR1$Temperature.oC)
#$`30`
#vars  n   mean     sd median trimmed    mad min max range  skew kurtosis    se
#X11  20 227.35 119.66    242  234.81 143.81   0 374   374 -0.31    -1.37 26.76

#$`42`
#vars  n   mean     sd median trimmed    mad min max range skew kurtosis   se
#X11  35 125.94 118.89    103  118.45 152.71   0 351   351 0.31    -1.46 20.1

#$double42
#vars  n  mean   sd median trimmed mad min max range skew kurtosis   se
#X11  29 14.34 27.9      0    9.36   0   0 104   104 1.86     2.31 5.18




##############################################################################################################################################################PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES ###############################

### in base
# 30
hist(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "30"], 
     main = list("Control", cex = 2), xlab = "20 day adult count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10) 
# 42
hist(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "42"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Single heatwave", cex = 2), xlab = "20 day adult count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default
# double42
hist(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "double42"], 
     col = "red4", density = 30, angle = 180, border = "red4", 
     main = list("Double heatwave", cex = 2), xlab = "20 day adult count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC, ylab="20D Adult Count", xlab="Temperature")




########### Normality - Failed in all groups
shapiro.test (hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "30"]) # W = 0.9212, p-value = 0.1045
ks.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "30"], pnorm)  # D = 0.95, p-value = 4.441e-16
shapiro.test (hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "42"]) # W = 0.86992, p-value = 0.0006756
ks.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "42"], pnorm) # D = 0.71429, p-value = 6.661e-16
shapiro.test (hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "double42"]) # W = 0.59226, p-value = 8.292e-08
ks.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "double42"], pnorm) # D = 0.5, p-value = 1.009e-06


########### Homogeneity of Variances - Failed
bartlett.test(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC) # Bartlett's K-squared = 48.409, df = 2, p-value = 3.078e-11
fligner.test(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC) # Fligner-Killeen:med chi-squared = 31.808, df = 2, p-value = 1.239e-07
#! need library(car)
leveneTest(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC)   #Df F value Pr(>F) 2  31.518 7.507e-11 ***




###################### NEAT PLOT #########################################
library(ggplot2)

temp <- expression(paste('Temperature (',degree,'C)')) #the temperature label with degrees sign # ~ is a space

############## ! FIGURE 2c PLOT #########################

graphharden<-ggplot(hardeningsimpleinR1, aes(x=Temperature.oC, y=Offspring.count.20D, fill= Temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + 
     scale_fill_manual(values=c("ghostwhite", "tomato", "firebrick3"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("30", "42", "double42"), #the order listed in the legend
                       label = c("Control", "1 Heatwave", "2 Heatwaves")) + #how things are labeled in the lgend
     geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + #so all the data points are not ontop of each other
     labs (x= "Number of heatwaves male exposed to", y="20 day reproductive output") +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("30", "42", "double42"), #the order of the variables on the x axis
                      labels=c("Control", "1 ", "2")) + # the names on the x axis
     coord_cartesian(ylim=c(-7.5, 407.5)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 400, 100), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(            #legend.position="none", #get rid of the hashtag to get rid of legend
          panel.grid.major=element_blank(), #getting rid of majorgridlines
          panel.border=element_blank(),     #getting rid of minorgridlines  
          panel.grid.minor=element_blank(),
          axis.line.x=element_line(color="black", size = 1),
          axis.line.y=element_line(color="black", size = 1),
          axis.text.x=element_text(color="black", size=12),
          axis.text.y=element_text(color="black", size=12),
          axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
          axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)),
          legend.position="none",
          panel.background=element_blank(),
          plot.background=element_rect(fill="transparent", colour = NA))

setwd("C:/Users/UEA/Desktop")
ggsave("graphharden.png",width=3.75, height=4, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Dissertation and phd/d- data for phd/R analysis/main/hardening")




#################################################################################################################################################################### OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 


########## Transformation with just hist and shapiro 
# comparing plots and tests before and after
# two other methods: by(df$response, df$treatment, shapiro.test) # with(df, tapply(response, treatment, shapiro.test))
## RIGHT SKEW FIXING
par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird
#raw data not normal; positive/right skew
hist (sqrt(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "30"]))
hist (sqrt(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "42"]))
hist (sqrt(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "double42"]))
shapiro.test (sqrt(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "30"]))
shapiro.test (sqrt(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "42"]))
shapiro.test (sqrt(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "double42"]))
#sqrt data 30, 42, double42 not normal; still positive/right skew
hist (log10(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "30"]+0.01))
hist (log10(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "42"]+0.01))
hist (log10(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "double42"]+0.01))
shapiro.test (log10(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "30"]+0.01))
shapiro.test (log10(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "42"]+0.01))
shapiro.test (log10(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC == "double42"]+0.01))
#log10 still not normal



#### 2+ sample Kurskall Wallis
# As the data is both not normal and not homogenous in variance in groups there is debate over the best method so both tried

#1) Homogeneity of variance not strict assumpation of Kurskall wallace 
kruskal.test(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC)
# Kruskal-Wallis chi-squared =  30, df = 2, p-value = 7e-08 # there is a significant difference between groups
# Posthoc testing 

#! library(pgirmess) 
kruskalmc(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC, probs = 0.05, cont=NULL) # change to 2 tailed for all to control
#             obs.dif critical.dif difference
#30-42       18.23214     16.36859       TRUE
#30-double42 39.61983     16.97317       TRUE
#42-double42 21.38768     14.66344       TRUE

#! library(PMCMR) 
posthoc.kruskal.nemenyi.test(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC, dist="Chisquare") #"Tukey" for no ties 
#          30      42    
# 42       0.0259  -     
# double42 1.1e-07 0.0019


# dunn.test.control(x=survivalsex$Prop.survivors,g=survivalsex$Treatment, p.adjust="bonferroni")
# dunn.test.control(x=survivalsex$Prop.survivors,g=survivalsex$Treatment)
posthoc.kruskal.dunn.test(x=hardeningsimpleinR1$Offspring.count.20D, g=hardeningsimpleinR1$Temperature.oC, p.adjust.method="bonferroni") # ULTRA CONSERVATIVE
posthoc.kruskal.dunn.test(x=hardeningsimpleinR1$Offspring.count.20D, g=hardeningsimpleinR1$Temperature.oC) # THIS METHOD PREFERRED FOR NON PARAMETRIC UNEQUAL SAMPLE SIZES
#               30      42    
#      42       0.0069  -     
#      double42 4.4e-08 0.0008


#also see Games-Howell test as more robust to violation of assumptions 

#2) Convert data to ranks and submitt to a welch anova and tukey HSD Ruxton (2006)
hardeningsimpleinR1$rank<-rank(hardeningsimpleinR1$Offspring.count.20D) # ranking all data inter-group by ascending count 
oneway.test(hardeningsimpleinR1$rank~ hardeningsimpleinR1$Temperature.oC, var.equal=FALSE)
# F =  40, num df = 2.000, denom df = 50, p-value = 1e-10
TukeyHSD(aov) # only works with aov not oneway, 
#! library(userfriendlyscience)
posthocTGH(y=hardeningsimpleinR1$rank, x=hardeningsimpleinR1$Temperature.oC, method="games-howell") # use games-howell when different sample sizes # tukey for equal
#           n means variances
# 30       20    64       321
# 42       35    46       537
# double42 29    24       164
# 
#               t df       p
# 30:42       3.3 48 5.8e-03
# 30:double42 8.5 32 3.0e-09
# 42:double42 4.7 55 5.9e-05



#Note  for picking out levels within factor
#wilcox.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="30"], hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="42"])
#wilcox.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="30"], hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="double42"])
#wilcox.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="42"], hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="double42"])




#################################################################################################################################################################### NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################


########## ! NAT COMMS MODEL SELECTION ###################

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()


library(psych)
describeBy(hardeningsimpleinR1$Offspring.count.20D, hardeningsimpleinR1$Temperature.oC)
# $`30`
# vars    n   mean     sd median trimmed    mad min max range  skew kurtosis    se
# X1    1 20 227.35 119.66    242  234.81 143.81   0 374   374 -0.31    -1.37 26.76
# 
# $`42`
# vars    n   mean     sd median trimmed    mad min max range skew kurtosis   se
# X1    1 35 125.94 118.89    103  118.45 152.71   0 351   351 0.31    -1.46 20.1
# 
# $double42
# vars    n  mean   sd median trimmed mad min max range skew kurtosis   se
# X1    1 29 14.34 27.9      0    9.36   0   0 104   104 1.86     2.31 5.18

#### Poisson family error structures
# As data is very right skewed count, fitting normal distibution does not give normal and homogenity of variance in residuals 

# Creating a global model
globalmodposs<-glm(Offspring.count.20D ~ Temperature.oC, poisson(link = "log"), data=hardeningsimpleinR1)
globalmodpossID<-glm(Offspring.count.20D ~ Temperature.oC, poisson(link = "identity"), data=hardeningsimpleinR1)
globalmodpossRT<-glm(Offspring.count.20D ~ Temperature.oC, poisson(link = "sqrt"), data=hardeningsimpleinR1) 
globalmodnegbin<-glm.nb(Offspring.count.20D ~ Temperature.oC, link = "log", data=hardeningsimpleinR1)


#########################

summary(globalmodposs); summary(globalmodpossID); summary(globalmodpossRT) # No R^2, AIC given
# AIC: 7736.2,AIC: 7736.2,AIC: 7736.2 # link change seem to do little
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.44
pseudoR<-(globalmodpossID$null.deviance-globalmodpossID$deviance) / globalmodpossID$null.deviance # (thomas et al., 2015)
pseudoR # 0.44
pseudoR<-(globalmodpossRT$null.deviance-globalmodpossRT$deviance) / globalmodpossRT$null.deviance # (thomas et al., 2015)
pseudoR # 0.44
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodposs))+((2*1*(1+1)/(84-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 7730.202
qAICc<-(-2*logLik(globalmodposs)/90.9372)+((2*1*(1+1)/(84-1-1))); qAICc # 85.05418


## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
theta<-globalmodposs$deviance/globalmodposs$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 91, massively overdispersed is >1 is overdispersion. VAR.S 15950.49, U 111.56 
#! library(AER) alternative test
var(hardeningsimpleinR1$Offspring.count.20D) #15950
mean(hardeningsimpleinR1$Offspring.count.20D) #112
dispersiontest(globalmodposs) # dispersion  78 

table(hardeningsimpleinR1$Offspring.count.20D); str(hardeningsimpleinR1$Offspring.count.20D)# 25% data 0

############################

theta<-globalmodnegbin$deviance/globalmodnegbin$df.residual; theta 
par(mfrow=c(2,2)); plot(globalmodnegbin);par(mfrow=c(1,1))
pseudoR<-(globalmodnegbin$null.deviance-globalmodnegbin$deviance) / globalmodnegbin$null.deviance # (thomas et al., 2015)
pseudoR # 0.21


library(MASS)
hardeningcountnvebinom<-glm.nb(Offspring.count.20D ~ Temperature.oC, link = "log", data=hardeningsimpleinR1)

hardeningcountspoiss<-glm(Offspring.count.20D ~ Temperature.oC, poisson(link = "log"), data=hardeningsimpleinR1)

summary(hardeningcountnvebinom,cor = F)
par(mfrow=c(2,2)); plot(hardeningcountnvebinom);par(mfrow=c(1,1))

pseudoR<-(hardeningcountnvebinom$null.deviance-hardeningcountnvebinom$deviance) / hardeningcountnvebinom$null.deviance # (thomas et al., 2015)
pseudoR # 0.21

AICc<-(-2*logLik(hardeningcountnvebinom))+((2*1*(1+1)/(84-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 812 # AICc smaller so model more efficient

lrtest(hardeningcountspoiss, hardeningcountnvebinom) # negative binomial better, confirming with assumptions
# Model 1: Offspring.count.20D ~ Temperature.oC
# Model 2: Offspring.count.20D ~ Temperature.oC
# #Df LogLik Df Chisq Pr(>Chisq)    
# 1   3  -3865                        
# 2   4   -406  1  6919     <2e-16 ***

### assumption checks, recommendation of residual dev (contribution of each obs to resid dev) rather than pearson (Thomas et al., 2015)
summary(hardeningcountnvebinom)
summary(hardeningcountspoiss)

# 1) Errors normally distributed? - NOT NECASSARY BUT IMPROVED

# poisson
devresid<-resid(hardeningcountspoiss, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(hardeningcountspoiss);par(mfrow=c(1,1))

# -vebinom
devresid<-resid(hardeningcountnvebinom, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(hardeningcountnvebinom);par(mfrow=c(1,1))
# -ve binom Q-Q points more symmetrical pull balanced, devresid histogram  positive skewe reduced 

# 2) Homogenous/homoscedasticity variance of residuals - NOT NECASSARY BUT NOT IMPROVED

# poisson
devresid<-resid(hardeningcountspoiss, type = "deviance")
plot(devresid ~ hardeningcountspoiss$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~hardeningcounts$Temperature.oC) 
par(mfrow=c(2,2)); plot(hardeningcountspoiss);par(mfrow=c(1,1))

# -vebinom
devresid<-resid(hardeningcountnvebinom, type = "deviance")
plot(devresid ~ hardeningcountnvebinom$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~hardeningcounts$Temperature.oC)
par(mfrow=c(2,2)); plot(hardeningcountnvebinom);par(mfrow=c(1,1))
#  P1 some wedge, test failed

# 3) Independences of independent variables - 1 FACTOR SO YES
# Only one independent variable

# 4) No serial auto-correlation with time/space - #ALL DATA COLLECTED AT ONE TIMEPOINT
#! need library(car)
# durbinWatsonTest(hardeningcountnvebinom) 

# 5) No bias by unduly influential datapoints - YES

# poisson
par(mfrow=c(2,2)); plot(hardeningcountspoiss);par(mfrow=c(1,1))
influence<-influence.measures(hardeningcountspoiss); summary(influence) 

# -vebinom
par(mfrow=c(2,2)); plot(hardeningcountnvebinom);par(mfrow=c(1,1))
influence<-influence.measures(hardeningcountnvebinom); summary(influence) 
# 26 cases in poisson with larger cooks distance fixed with -ve binom

# 6) Independent variables measured without error - BEST OF ABILITY

## Overdispersion re-check
theta<-hardeningcountnvebinom$deviance/hardeningcountnvebinom$df.residual; theta # 1.2
AICc<-(-2*logLik(hardeningcountnvebinom))+((2*1*(1+1)/(59-1-1))); AICc # 688
# Theta now <1.5 so overdispersion accounted for. 


###  MODEL REFINEMENT
hardeningcountnvebinom<-glm.nb(Offspring.count.20D ~ Temperature.oC, link = "log", data=hardeningsimpleinR1)
pseudoR<-(hardeningcountnvebinom$null.deviance-hardeningcountnvebinom$deviance) / hardeningcountnvebinom$null.deviance; pseudoR # 0.21
AICc<-(-2*logLik(hardeningcountnvebinom))+((2*1*(1+1)/(46-1-1))); AICc  # 812

## Null model
nullmod<-glm.nb(Offspring.count.20D ~ 1, link = "log", data=hardeningsimpleinR1) # creating null of just intercept (and random in glmms)
pseudoR<-(nullmod$null.deviance-nullmod$deviance) / nullmod$null.deviance; pseudoR # (thomas et al., 2015) # 5.5e-16
AICc<-(-2*logLik(nullmod))+((2*1*(1+1)/(46-1-1))); AICc  # 13565 

# control=glm.control(maxit=100) doesn't work

### Global model explains offspring significantly more than null model because
# 1) pseudo R is higher and AIC more than 2 lower 

# 2) anova comparison
anova(hardeningcountnvebinom, nullmod, test = "Chi")
anova(nullmod, hardeningcountnvebinom, test = "Chi") #! Use "F" for continuous dependent variables
# Likelihood ratio tests of Negative Binomial Models
# Response: Offspring.count.20D
#            Model   theta Resid. df    2 x log-lik.   Test    df LR stat. Pr(Chi)
# 1              1 7.7e+05        83          -13565                              
# 2 Temperature.oC 3.1e-01        81            -812 1 vs 2     2    12754       0

anova(hardeningcountnvebinom)
#                 Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
# NULL                              83      120.3             
# Temperature.oC  2     25.2        81       95.1  3.4e-06 ***

# 3) liklihood ratio test
library(lmtest)
lrtest(hardeningcountnvebinom,nullmod)
lrtest(nullmod,hardeningcountnvebinom)#produces same result just different order 
# Likelihood ratio test
# 
# Model 1: Offspring.count.20D ~ 1
# Model 2: Offspring.count.20D ~ Temperature.oC
#     Df LogLik Df Chisq Pr(>Chisq)    
# 1   4   -406                        
# 2   2  -6783 -2 12754     <2e-16 ***


########## ! NAT COMMS MODEL SIGNIFICANCE ###################

drop1(hardeningcountnvebinom, test= "Chi")
#                 Df Deviance    AIC    LRT  Pr(>Chi)    
# <none>              95.123 817.63                     
# Temperature.oC  2  120.282 838.79 25.159 3.442e-06 ***


## Therefore complex model kept (although the simplest prefered for NS difference)

globalmodnvebinom # shortened with basic information # redundant


########## ! NAT COMMS MODEL POST HOC ###################

summary(hardeningcountnvebinom)
# 
# Call:
#      glm.nb(formula = Offspring.count.20D ~ Temperature.oC, data = hardeningsimpleinR1, 
#             link = "log", init.theta = 0.3086917859)
# 
# Deviance Residuals: 
#    Min      1Q  Median      3Q     Max  
# -2.019  -1.544  -0.321   0.239   1.613  
# 
# Coefficients:
#                               Estimate Std. Error z value Pr(>|z|)    
#      (Intercept)               5.426      0.403   13.47  < 2e-16 ***
#      Temperature.oC42         -0.591      0.505   -1.17     0.24    
#      Temperature.oCdouble42   -2.763      0.526   -5.26  1.5e-07 ***
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# (Dispersion parameter for Negative Binomial(0.31) family taken to be 1)
# 
# Null deviance: 120.282  on 83  degrees of freedom
# Residual deviance:  95.123  on 81  degrees of freedom
# AIC: 819.6
# 
# Number of Fisher Scoring iterations: 1
# 
# 
# Theta:  0.3087 
# Std. Err.:  0.0494 
# 
# 2 x log-likelihood:  -811.6270 

exp(5.426 )# 227
exp(5.426-0.591) # 126
exp(5.426-2.763) # 14


library(multcomp)
summary(glht(hardeningcountnvebinom, mcp(Temperature.oC="Tukey")))
# Simultaneous Tests for General Linear Hypotheses
# 
# Multiple Comparisons of Means: Tukey Contrasts
# 
# 
# Fit: glm.nb(formula = Offspring.count.20D ~ Temperature.oC, data = hardeningsimpleinR1, 
#             link = "log", init.theta = 0.3086917859)
# 
# Linear Hypotheses:
#                           Estimate Std. Error z value Pr(>|z|)    
#      42 - 30 == 0         -0.591      0.505   -1.17     0.47    
#      double42 - 30 == 0   -2.763      0.526   -5.26   <1e-05 ***
#      double42 - 42 == 0   -2.172      0.455   -4.78   <1e-05 ***
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# (Adjusted p values reported -- single-step method)


library(lsmeans)
lsmeans(hardeningcountnvebinom, pairwise~Temperature.oC, adjust="tukey")
# $contrasts
# contrast       estimate        SE df z.ratio p.value
# 30 - 42       0.5906624 0.5049526 NA   1.170  0.4713
# 30 - double42 2.7631012 0.5256460 NA   5.257  <.0001
# 42 - double42 2.1724389 0.4548553 NA   4.776  <.0001





#### HEATWAVE HARDENING SUMMARY ######################################################################################
#Barnard et al., 2007 and Thomas 2015 as references
#---------Hypothesis
# The relative effect of a single 5d 42oC heatwave and double heatwaves on the 20D reproductive fitness (adult offspring counts) of adult tribolium males

#Response variable (dependent):           20D reproductive fitness; (count +/- positive skew) 

#Global Fixed variables (independent):          
#    Categorical                          temperature (30, 42, double42)  
#    Covariates                           NA
#    Non-linear terms                     NA
#    Interactions                         NA

#Random terms:                            (oviblock)

#---------Misc

# Simple analysis:                       2 sample t-test but not normal/equal var     
# Non-para:                              mann-whitney U
# Plot:                                  Notched Box / Univariate scatter  

#------Model report Simple stats
# Large postive skew and heterogeniety of variances. unable to transform

# 1. kurskal wallis
# kruskal.test(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC)
# Chi^2 (2) = 30, p-value = 7e-08
# post-hoc kruskalmc(hardeningsimpleinR1$Offspring.count.20D ~ hardeningsimpleinR1$Temperature.oC, probs = 0.05, cont=NULL)
# 30-42, 42-double42, 30-double42 p < 0.05

# 2. welches anova with ranks
# oneway.test(hardeningsimpleinR1$Offspring.count.20D~ hardeningsimpleinR1$Temperature.oC, var.equal=FALSE)
# F (2,35)= 42.304, p-value = 4.164e-10
# posthocTGH(y=hardeningsimpleinR1$Offspring.count.20D, x=hardeningsimpleinR1$Temperature.oC, method="games-howell")
# 30-42, 42-double42, 30-double42 p < 0.05

#---------GLM Model refinement

# # Overdispersion parameter:                70-90 for poisson (log) 0 inflation, split into 1/0 for 0 inflation and count model for 
#(for binomial/poisson)

## 0 presence model
# Error family (+link function):           bernoulli binomial (logit)

# Model refinement method(s):              AICc comparison, anova(), lmtest()

# Most plausible/final model(s):           hardening0pabinom <- glm(Zeros ~ Temperature.oC,  binomial(link= "logit"), data=hardening0pa))

# AIC(c)                                   88.06074 
# Model R^2/Adj R^2:                       NA for poiss, quasi-p and -ve binom
# pseudoR^2/deviance:                      0.14
# Model significance:                      from analysis of deviance lrtest(nullhardening0pabinom, hardening0pabinom): X^2(2) = 14.3, df = 2,81, p= 8e-04 

# Hypothesis interpretation:  Heatwaves increase the frequency of males with 0 reproductive fitness

## 0-less count model
# Error family (+link function):           poisson (log) theta=53-66, -ve binomial (log) theta=1.2

# Model refinement method(s):              AICc comparison, anova(), lmtest()

# Most plausible/final model(s):           globalmodnvebinom<-glm.nb(Mean.sperm.count ~ Temperature.oC, link = "log", data=spermcount1)

# AIC(c)                                   983 
# Model R^2/Adj R^2:                       NA for poiss, quasi-p and -ve binom
# pseudoR^2/deviance:                      0.33
# Model significance:                      from analysis of deviance anova(nullmod, hardeningcountnvebinom, test = "Chi"): X^2 (2) = 27 ,  df=2, 56,  p= 1.3e-06


#--------Model assumptions
## 0 presence model
# deviance residuals <2 so ok thomas et al., 2015


## 0-less count model
#Residuals normally distributed:               NOT NECASSARY BUT IMPROVED
#    hist of stdzd resids (u=0, sd=1):         symmetrical bell shaped 
#    p1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal  
#    p2-Q-Q plot w/ stdzd resids:              straight line 
#    p3-scale-location w/sqrt resid~fitted:    no pattern
#    s-w/k-s resid tests:                      NS
# -ve binom Q-Q points more symmetrical pull balanced, devresid histogram  positive skewe reduced 

#Homogeneity of variance:                      NOT NECASSARY BUT NOT IMPROVED
#    P1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal
#    sresid~ind vars:                          no pattern
#    fligner/levenes tests:                    NS
#  P1 some wedge, test failed

#Independence of variables (no-collinearity):  YES AS ONLY 1 VARIABLE
#    pairwise scatterplots and correlations:   r<0.3
#    sresid~ind vars:                          no collinearity
#    variance inflation factors (VIFs):        values <3

#No serial auto-correlation                     YES AS ONLY A SINGLE TIMEPOINT
#    durbinwatson test:                         NS
#    Auto-cor function (ACF):                   <threshold
#  test passed

#No bias by unduly influential datapoints:      YES
#    P4-stdzd resids~leverage:                  <cooks threshold eg. >1 or sample-size/4
#    leverage                                   <2p/n
# 26 cases in poisson with larger cooks distance fixed with -ve binom

#Independent variables measured without error:  TO BEST OF ABILITY


#-------Model report
# 0 presence model
#term                   peramter+/-se    test-stat(wald z)   d.f.      P
# 42 - 30 == 0         -1.884  1.096           -1.72               0.188  
# double42 - 30 == 0   -3.013  1.091           -2.76               0.015 *
# double42 - 42 == 0   -1.130  0.536           -2.11               0.082 .

# Simultaneous Tests for General Linear Hypotheses
# Multiple Comparisons of Means: Tukey Contrasts
# Fit: glm(formula = Zeros ~ Temperature.oC, family = binomial(link = "logit"), 
#          data = hardening0pa)


# 0-less count model
#term                  peramter+/-se    test-stat(wald z)   d.f.      P
# 42 - 30 == 0         -0.345  0.276               -1.25           0.42    
# double42 - 30 == 0   -2.086  0.325               -6.42         <1e-04 ***
# double42 - 42 == 0   -1.741  0.306               -5.69         <1e-04 ***

# Simultaneous Tests for General Linear Hypotheses
# Multiple Comparisons of Means: Tukey Contrasts
# Fit: glm.nb(formula = Offspring.count.20D ~ Temperature.oC, data = hardeningcounts, 
#             link = "log", init.theta = 1.205634467)


# Hypothesis interpretation:  Heatwaves decrease the reproductive fitness of males which produce offspring 

# Males do not appear to harden to successive heatwaves but become more vulnerble. Effects significant after two heatwaves


