
#check what is in r console from last time
ls()
#remove everything
rm(list=ls())

#check what working directory is set
getwd()
#setting to required one where data is based
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")

#reading in data 
corelifestage10D <- read.csv("heatwavelifestagewide.csv", header = TRUE) # 10D data set wide format for analysis
lifestageplot <- read.csv("heatwavelifestagelong.csv", header = TRUE)# 10D data set long format for plotting




str(lifestageplot)
lifestageplot$Temperature.oC <- as.factor(lifestageplot$Temperature.oC)
lifestageplot$Life.stage <- as.factor(lifestageplot$Life.stage)
lifestageplot$Batch <- as.factor(lifestageplot$Batch)
str(lifestageplot)
# 'data.frame':	284 obs. of  7 variables:
# $ Replicate       : int  1 2 3 4 5 6 7 8 9 10 ...                                       # female id
# $ Temperature.oC  : Factor w/ 2 levels "30","42": 1 1 1 1 1 1 1 1 1 1 ...               # male heatwave temperature
# $ Count           : int  96 235 177 198 194 208 143 194 208 68 ...                      # offspring counts at particular stage
# $ Prop.success    : num  0.927 0.881 0.831 0.874 0.845 0.755 0.951 0.928 0.909 0.309 ...# successful development between stages
# $ Life.stage      : Factor w/ 4 levels "1","2","3","4": 1 1 1 1 1 1 1 1 1 1 ...         # grouping of  offspring life stage  being counted
# $ Life.stage.names: Factor w/ 4 levels "adulttotal","egghatch",..: 2 2 2 2 2 2 2 2 2 2 ... # names of life stage is being counted



str(corelifestage10D)
corelifestage10D$Temperature.oC <- as.factor(corelifestage10D$Temperature.oC)
str(corelifestage10D)
# 'data.frame':	71 obs. of  10 variables:
# $ Replicate        : int  1 2 3 4 5 6 7 8 9 10 ...                                           # female id
# $ Temperature.oC   : Factor w/ 2 levels "30","42": 1 1 1 1 1 1 1 1 1 1 ...                   # male heatwave temperature
# $ Egg.sum.10D      : int  96 235 177 198 194 208 143 194 208 68 ...                          # egg #
# $ Hatch.prop       : num  0.927 0.881 0.831 0.874 0.845 0.755 0.951 0.928 0.909 0.309 ...    # proportion of eggs hatching
# $ Larvae.sum       : int  89 207 147 173 164 157 136 180 189 21 ...                          # larvae #
# $ Pupation.prop    : num  0.989 0.99 1 0.942 0.988 1 1 0.994 0.995 1 ...                     # proportion of larvae pupating
# $ Pupae.sum        : int  88 205 147 163 162 157 136 179 188 21 ...                          # pupae #       
# $ Eclosion.prop    : num  1 1 1 1 1 1 1 1 0.995 1 ...                                        # proportion of pupae eclosing
# $ Adult.sum        : int  88 205 147 163 162 157 136 179 187 21 ...                          # adult #
# $ Egg.to.adult.prop: num  0.917 0.872 0.831 0.823 0.835 0.755 0.951 0.923 0.899 0.309 ...    # proportion of eggs reaching adult stage
# 

####### Core data ##############
## Egg count
#core
corelifestage10D

## Hatch success
#core
corelifestage10D$Hatch.fail<-corelifestage10D$Egg.sum - corelifestage10D$Larvae.sum # column of eggs failing to hatch
corelifestage10D
corelifestage10D$Cbindhatch<-cbind(corelifestage10D$Larvae.sum, corelifestage10D$Hatch.fail) # vector for model of success/fail hatch
corelifestage10D$Cbindhatch

noNAcorelifestage10D<-corelifestage10D[complete.cases(corelifestage10D[,6]),] # remove NAs on pupation column where there were no larvae to develop further (note all petris had some larve progressing to adults so #NAs pupation = #NAs eclosion)

## Pupation success
#core
noNAcorelifestage10D$Pupation.fail<-noNAcorelifestage10D$Larvae.sum - noNAcorelifestage10D$Pupae.sum # column of larvae failing to pupate
noNAcorelifestage10D
noNAcorelifestage10D$Cbindpupation<-cbind(noNAcorelifestage10D$Pupae.sum, noNAcorelifestage10D$Pupation.fail) # vector for model of succes/fail pupate
noNAcorelifestage10D$Cbindpupation

## Eclosion success
#core
noNAcorelifestage10D$Eclosion.fail<-noNAcorelifestage10D$Pupae.sum - noNAcorelifestage10D$Adult.sum # column of pupae failing to eclose
noNAcorelifestage10D
noNAcorelifestage10D$Cbindeclosion<-cbind(noNAcorelifestage10D$Adult.sum, noNAcorelifestage10D$Eclosion.fail) # vector for model of succes/fail eclose
noNAcorelifestage10D$Cbindeclosion

## Egg to adult success
#core
corelifestage10D$Egg.adult.fail<-corelifestage10D$Egg.sum - corelifestage10D$Adult.sum # column of eggs failing to become adults
corelifestage10D
corelifestage10D$Cbindeggadult<-cbind(corelifestage10D$Adult.sum, corelifestage10D$Egg.adult.fail) # vector for model of succes/fail egg-adult
corelifestage10D$Cbindeggadult

## Data checks
summary(corelifestage10D) 
summary(noNAcorelifestage10D)
summary(lifestageplot)

str(corelifestage10D) 
str(noNAcorelifestage10D)
str(lifestageplot)

is.na(corelifestage10D) 
is.na(noNAcorelifestage10D) # no NAs
is.na(lifestageplot)

levels(corelifestage10D$Temperature.oC) 
levels(noNAcorelifestage10D$Temperature.oC)
levels(lifestageplot$Temperature.oC)
#[1] "30"       "42"   


####### Total data across multiple sets with no NAs ##############

## Egg count
noNAlifestage10Degg<-lifestage10D[complete.cases(lifestage10D[,3]),] #removing NAs from egg hatch
str(noNAlifestage10Degg)

## Hatch success
noNAlifestage10Dhatch<-lifestage10D
noNAlifestage10Dhatch<-lifestage10D[complete.cases(lifestage10D[,4]),]
noNAlifestage10Dhatch$Larvae.sum.2<-noNAlifestage10Dhatch$Egg.sum * noNAlifestage10Dhatch$Hatch.prop
noNAlifestage10Dhatch$Larvae.sum.2<-round(noNAlifestage10Dhatch$Larvae.sum.2, digits = 0)
noNAlifestage10Dhatch$Hatch.fail<-noNAlifestage10Dhatch$Egg.sum - noNAlifestage10Dhatch$Larvae.sum.2 # column of eggs failing to hatch
noNAlifestage10Dhatch$Cbindhatch<-cbind(noNAlifestage10Dhatch$Larvae.sum.2, noNAlifestage10Dhatch$Hatch.fail) # vector for model of success/fail hatch
noNAlifestage10Dhatch$Cbindhatch
str(noNAlifestage10Dhatch)

## Pupation success
# only core

## Eclosion success
# only have %s without pupae/adult numbers cannot do cbind
noNAlifestage10Dpupae <- lifestage10D[complete.cases(lifestage10D[,8]),]

## Egg to adult success
# only core tracked from egg to adult


## Data checks
summary(lifestage10D) # produces general (unsplit) range, quantiles, median, count and mean summary stats for each variable
summary(corelifestage10D) 
summary(noNAcorelifestage10D)

str(lifestage10D) # checks the variable types
str(corelifestage10D) 
str(noNAcorelifestage10D)

is.na(lifestage10D) # returns TRUE of x is missing
is.na(corelifestage10D) 
is.na(noNAcorelifestage10D) # no NAs

levels(lifestage10D$Temperature.oC) # returns TRUE of x is missing
levels(corelifestage10D$Temperature.oC) 
levels(noNAcorelifestage10D$Temperature.oC)
#[1] "30"       "42"   
## if want to change 
# spermcount1$Temperature.oC <- factor(hardeningsimpleinR1$Temperature.oC,levels = c("42","double42",30"))
# levels(spermcount1$Temperature.oC)



##! Summary 
summary{lifestage10D} # total dataset 

summary(corelifestage10D) # total core dataset n= (39, 32) for analysis of egg counts, hatch success, egg to adult success

summary(noNAcorelifestage10D) # no NA core dataset n= (38, 18) for analysis of pupation and eclosion success

summary(noNAlifestage10Degg) # no NA total dataset n= (59, 76) for analysis of egg counts

summary(noNAlifestage10Dhatch) # no NA total dataset n= (59, 76) for analysis cbind hatch success

summary(noNAlifestage10Dpupae) # no NA total dataset n= (73, 52) for non-parametric eclosion success








############ Boxplot of proportion success hatch/pupation/eclsion/egg-adult ####
################################################################################

library(ggplot2)

names(lifestageplot)

temp <- expression(paste('Temperature (',degree,'C)',sep='')) #the temperature label with degrees sign

########## ! NAT COMMS FIGURE 3 H PLOT #############

graphlifehurdlea<-ggplot(subset(lifestageplot, Life.stage %in% c("1" , "2", "3")), aes(x=Life.stage, y=Prop.success, fill= Temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=3, position=position_dodge(0.5), color="black") + 
     stat_summary(fun.y="mean", geom="line", aes(group=factor(Temperature.oC)), position=position_dodge(width=0.5)) +
     scale_fill_manual(values=c("ghostwhite", "tomato"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("30","42"), #the order listed in the legend
                       label = c("Control","Heatwave")) + #how things are labeled in the lgend
     scale_colour_manual(values=c("black", "black")) +
     geom_point(position=position_jitterdodge(dodge.width=0.5, jitter.width=0.15), shape=1, size= 1.5) +
     labs (x= "Offspring lifestage transitions", y= "Proportion of offspring surviving") +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("1", "2", "3"), #the order of the variables on the x axis
                      labels=c("Hatching", "Pupating", "Eclosing")) + # the names on the x axis
     coord_cartesian(ylim=c(-0.02, 1.02)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 1, 0.2), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(#legend.position="none", #get rid of the hashtag to get rid of legend
          panel.grid.major=element_blank(), #getting rid of majorgridlines
          panel.border=element_blank(),     #getting rid of minorgridlines  
          panel.grid.minor=element_blank(),
          axis.line.x=element_line(color="black", size = 1),
          axis.line.y=element_line(color="black", size = 1),
          axis.text.x=element_text(color="black", size=12),
          axis.text.y=element_text(color="black", size=12),
          axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
          axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)),
          legend.position="none",
          panel.background=element_blank(),
          plot.background=element_rect(fill="transparent", colour = NA)) 


setwd("C:/Users/UEA/Desktop")
ggsave("graphlifehurdlea.png",width=3.75, height=4, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")


graphlifehurdleb<-ggplot(subset(lifestageplot, Life.stage %in% c("4")), aes(x=Life.stage, y=Prop.success, fill= Temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=3, position=position_dodge(0.5), color="black") + 
     stat_summary(fun.y="mean", geom="line", aes(group=factor(Temperature.oC)), position=position_dodge(width=0.5)) +
     scale_fill_manual(values=c("ghostwhite", "tomato"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("30","42"), #the order listed in the legend
                       label = c("Control","Heatwave")) + #how things are labeled in the lgend
     scale_colour_manual(values=c("black", "black")) +
     geom_point(position=position_jitterdodge(dodge.width=0.5, jitter.width=0.15), shape=1, size= 1) +
     labs (x= "Proportion suvriving", y= "20 day offspring production") +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("4"), #the order of the variables on the x axis
                      labels=c("Total")) + # the names on the x axis
     coord_cartesian(ylim=c(-0.02, 1.02)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 1, 0.2), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(#legend.position="none", #get rid of the hashtag to get rid of legend
          panel.grid.major=element_blank(), #getting rid of majorgridlines
          panel.border=element_blank(),     #getting rid of minorgridlines  
          panel.grid.minor=element_blank(),
          axis.line.x=element_line(color="black", size = 1),
          axis.line.y=element_line(color="black", size = 1),
          axis.text.x=element_text(color="black", size=12),
          axis.text.y=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank(),
          legend.position="none",
          panel.background=element_blank(),
          plot.background=element_rect(fill="transparent", colour = NA)) 

setwd("C:/Users/UEA/Desktop")
ggsave("graphlifehurdleb.png",width=1.25, height=4, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Dissertation and phd/d- data for phd/R analysis/main/recovery")




############################################################################################################################################################## EGG HATCH PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES ###############################
names(corelifestage10D)
corelifestage10D$Hatch.prop

summary(corelifestage10D) # total core dataset n= (39, 32) for analysis of egg counts, hatch success, egg to adult success


#### ! NAT COMMS DESCRIPTIVE STATS ###############



#### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(corelifestage10D$Hatch.prop, corelifestage10D$Temperature.oC)
#$`30`
#vars  n   mean sd    median trimmed   mad min max range  skew kurtosis   se
#X1 1 39 0.85 0.18       0.9    0.89 0.06   0   1     1  -3.25    11.23 0.03
 
#$`42`
#vars  n   mean sd    median trimmed   mad min max range skew kurtosis    se
#X1 1 32 0.41 0.43      0.27     0.4   0.4  0 0.98  0.98 0.14    -1.91 0.08


### in base
# 30
hist(corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "30"], 
     main = list("Control", cex = 2), xlab = "prop hatch", ylab ="Frequency", ylim = c(0,30),
     nclass = 10) 
# 42
hist(corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "42"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Heatwave", cex = 2), xlab = "prop hatch", ylab ="Frequency", ylim = c(0,30),
     nclass = 10)  # keep nclass = 10, keep scales default

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(corelifestage10D$Hatch.prop ~ corelifestage10D$Temperature.oC, ylab="prop hatch", xlab="Temperature")

########### Normality - Failed in all groups
shapiro.test (corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "30"]) # W = 0.57765, p-value = 2.125e-09
ks.test(corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "30"], pnorm)  # D = 0.70675, p-value < 2.2e-16
shapiro.test (corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "42"]) # W = 0.73865, p-value = 3.473e-06
ks.test(corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "42"], pnorm) # D = 0.5, p-value = 2.251e-07

########### Homogeneity of Variances - Failed
bartlett.test(corelifestage10D$Hatch.prop ~ corelifestage10D$Temperature.oC) # Bartlett's K-squared = 23.207, df = 1, p-value = 1.455e-06
fligner.test(corelifestage10D$Hatch.prop ~ corelifestage10D$Temperature.oC) # Fligner-Killeen:med chi-squared = 34.351, df = 1, p-value = 4.602e-09
#! need library(car)
leveneTest(corelifestage10D$Hatch.prop ~ corelifestage10D$Temperature.oC)   #Df F value Pr(>F) 1  61.267 4.136e-11 ***

#################################################################################################################################################################### EGG HATCH OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 

########## Transformation with just hist and shapiro 
# comparing plots and tests before and after
# two other methods: by(df$response, df$treatment, shapiro.test) # with(df, tapply(response, treatment, shapiro.test))
## PROPORTION FIXING # arcsine sqrt transformation on porportions 
par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird
#raw data not normal; 30 -ve skew leptokurotosis. 42 opposite. both capped at 0 and 1
hist (asin(sqrt(corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "30"])))
hist (asin(sqrt(corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "42"])))
shapiro.test (asin(sqrt(corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "30"])))
shapiro.test (asin(sqrt(corelifestage10D$Hatch.prop[corelifestage10D$Temperature.oC == "42"])))
#not fixed

par(mfrow=c(1,1))

# As the data is both not normal and not homogenous in variance in groups there is debate over the best method so both tried

#1) Homogeneity of variance not strict assumpation 2 sample Mann whitney U test
wilcox.test(corelifestage10D$Hatch.prop ~ corelifestage10D$Temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 985.5, p-value = 2.767e-05 # there is a significant difference between groups

#2) Convert data to ranks and submitt to a welch anova and tukey HSD Ruxton (2006)
corelifestage10D$rank<-rank(corelifestage10D$Hatch.prop) # ranking all data inter-group by ascending count 
t.test(corelifestage10D$rank~ corelifestage10D$Temperature.oC, var.equal=FALSE)
# t = 4.7245, df = 59.818, p-value = 1.44e-05




############################################################################################################################################################## LARVAE PUPATION PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES ###############################
names(noNAcorelifestage10D)
noNAcorelifestage10D$Pupation.prop
summary(noNAcorelifestage10D) # no NA core dataset n= (38, 18) for analysis of pupation and eclosion success


#### ! NAT COMMS DESCRIPTIVE STATS ###############


#### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(noNAcorelifestage10D$Pupation.prop, noNAcorelifestage10D$Temperature.oC)
#$`30`
#vars  n   mean sd    median trimmed   mad min max range  skew kurtosis  se
#X1 1 38   0.99 0.02   0.99    0.99   0.01 0.91  1  0.09  -2.8     8.75  0

#$`42`
#vars  n   mean sd    median trimmed   mad min max range skew kurtosis   se
#X1 1 18   0.96 0.08   0.99    0.97  0.02 0.67   1  0.33 -2.57    6.03 0.02


### in base
# 30
hist(noNAcorelifestage10D$Pupation.prop[noNAcorelifestage10D$Temperature.oC == "30"], 
     main = list("Control", cex = 2), xlab = "prop pupate", ylab ="Frequency", ylim = c(0,40),
     nclass = 10) 
# 42
hist(noNAcorelifestage10D$Pupation.prop[noNAcorelifestage10D$Temperature.oC == "42"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Heatwave", cex = 2), xlab = "prop pupate", ylab ="Frequency", ylim = c(0,40),
     nclass = 10)  # keep nclass = 10, keep scales default

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(noNAcorelifestage10D$Pupation.prop ~ noNAcorelifestage10D$Temperature.oC, ylab="prop pupate", xlab="Temperature")

########### Normality - Failed in all groups
shapiro.test (noNAcorelifestage10D$Pupation.prop[noNAcorelifestage10D$Temperature.oC == "30"]) # W = 0.6346, p-value = 1.702e-08
ks.test(noNAcorelifestage10D$Pupation.prop[noNAcorelifestage10D$Temperature.oC == "30"], pnorm)  # D = 0.8178, p-value < 2.2e-16
shapiro.test (noNAcorelifestage10D$Pupation.prop[noNAcorelifestage10D$Temperature.oC == "42"]) # W = 0.54555, p-value = 2.045e-06
ks.test(noNAcorelifestage10D$Pupation.prop[noNAcorelifestage10D$Temperature.oC == "42"], pnorm) # D = 0.74845, p-value = 3.491e-09

########### Homogeneity of Variances - passed in two tests less sensitive to departures from normality (thomas et al., 2015)
bartlett.test(noNAcorelifestage10D$Pupation.prop ~ noNAcorelifestage10D$Temperature.oC) # Bartlett's K-squared = 53.146, df = 1, p-value = 3.096e-13
fligner.test(noNAcorelifestage10D$Pupation.prop ~ noNAcorelifestage10D$Temperature.oC) # Fligner-Killeen:med chi-squared = 4.7126, df = 1, p-value = 0.02994
#! need library(car)
leveneTest(noNAcorelifestage10D$Pupation.prop ~ noNAcorelifestage10D$Temperature.oC)   #Df F value Pr(>F) 1  4.4053 0.04052 *

#################################################################################################################################################################### LARVAE PUPATION OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 

########## Transformation with just hist and shapiro 
# comparing plots and tests before and after
# two other methods: by(df$response, df$treatment, shapiro.test) # with(df, tapply(response, treatment, shapiro.test))
## PROPORTION FIXING # arcsine sqrt transformation on porportions 
par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird
#raw data not normal; both -ve skew leptokurotosis. both capped at 0 and 1
hist (asin(sqrt(noNAcorelifestage10D$Pupation.prop[corelifestage10D$Temperature.oC == "30"])))
hist (asin(sqrt(noNAcorelifestage10D$Pupation.prop[corelifestage10D$Temperature.oC == "42"])))
shapiro.test (asin(sqrt(noNAcorelifestage10D$Pupation.prop[corelifestage10D$Temperature.oC == "30"])))
shapiro.test (asin(sqrt(noNAcorelifestage10D$Pupation.prop[corelifestage10D$Temperature.oC == "42"])))
#not fixed

par(mfrow=c(1,1))

# As the data is both not normal and not homogenous in variance in groups there is debate over the best method so both tried

#1) Homogeneity of variance not strict assumpation 2 sample Mann whitney U test
wilcox.test(noNAcorelifestage10D$Pupation.prop ~ noNAcorelifestage10D$Temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 429.5, p-value = 0.121 # there is no significant difference between groups

#2) Convert data to ranks and submitt to a welch anova and tukey HSD Ruxton (2006)
noNAcorelifestage10D$rank<-rank(noNAcorelifestage10D$Pupation.prop) # ranking all data inter-group by ascending count 
t.test(noNAcorelifestage10D$rank~ noNAcorelifestage10D$Temperature.oC, var.equal=FALSE)
# t = 1.5118, df = 30.033, p-value = 0.141





############################################################################################################################################################## PUPAL ECLOSION PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES ###############################
names(noNAcorelifestage10D)
noNAcorelifestage10D$Pupation.prop
summary(noNAcorelifestage10D) # no NA core dataset n= (38, 18) for analysis of pupation and eclosion success


#### ! NAT COMMS DESCRIPTIVE STATS ###############

#### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(noNAcorelifestage10D$Eclosion.prop, noNAcorelifestage10D$Temperature.oC)
#$`30`
#vars  n   mean sd    median trimmed   mad min max range  skew kurtosis  se
#X1 1 38    1  0      1            1   0  0.99   1  0.01  -2.5     4.42  0

#$`42`
#vars  n   mean sd    median trimmed   mad min max range skew kurtosis   se
#X1 1 18    1    0      1          1   0  0.99   1  0.01 -1.98    2.56   0


### in base
# 30
hist(noNAcorelifestage10D$Eclosion.prop[noNAcorelifestage10D$Temperature.oC == "30"], 
     main = list("Control", cex = 2), xlab = "prop elcose", ylab ="Frequency", ylim = c(0,40),
     nclass = 10) 
# 42
hist(noNAcorelifestage10D$Eclosion.prop[noNAcorelifestage10D$Temperature.oC == "42"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Heatwave", cex = 2), xlab = "prop elcose", ylab ="Frequency", ylim = c(0,40),
     nclass = 10)  # keep nclass = 10, keep scales default

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(noNAcorelifestage10D$Eclosion.prop ~ noNAcorelifestage10D$Temperature.oC, ylab="prop eclose", xlab="Temperature")

########### Normality - Failed in all groups
shapiro.test (noNAcorelifestage10D$Eclosion.prop[noNAcorelifestage10D$Temperature.oC == "30"]) # W = 0.36021, p-value = 1.082e-11
ks.test(noNAcorelifestage10D$Eclosion.prop[noNAcorelifestage10D$Temperature.oC == "30"], pnorm)  # D = 0.83989, p-value < 2.2e-16
shapiro.test (noNAcorelifestage10D$Eclosion.prop[noNAcorelifestage10D$Temperature.oC == "42"]) # W = 0.48399, p-value = 5.839e-07
ks.test(noNAcorelifestage10D$Eclosion.prop[noNAcorelifestage10D$Temperature.oC == "42"], pnorm) # D = 0.83842, p-value = 2.045e-11

########### Homogeneity of Variances - passed in two tests less sensitive to departures from normality (thomas et al., 2015)
bartlett.test(noNAcorelifestage10D$Eclosion.prop ~ noNAcorelifestage10D$Temperature.oC) # Bartlett's K-squared = 11.066, df = 1, p-value = 0.0008793
fligner.test(noNAcorelifestage10D$Eclosion.prop ~ noNAcorelifestage10D$Temperature.oC) # Fligner-Killeen:med chi-squared = 1.1021, df = 1, p-value = 0.2938
#! need library(car)
leveneTest(noNAcorelifestage10D$Eclosion.prop ~ noNAcorelifestage10D$Temperature.oC)   #Df F value Pr(>F) 1  1.4269 0.2375

#################################################################################################################################################################### PUPAL ECLOSION OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 

########## Transformation with just hist and shapiro 
# comparing plots and tests before and after
# two other methods: by(df$response, df$treatment, shapiro.test) # with(df, tapply(response, treatment, shapiro.test))
## PROPORTION FIXING # arcsine sqrt transformation on porportions 
par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird
#raw data not normal; both -ve skew leptokurotosis. both capped at 0 and 1
hist (asin(sqrt(noNAcorelifestage10D$Eclosion.prop[corelifestage10D$Temperature.oC == "30"])))
hist (asin(sqrt(noNAcorelifestage10D$Eclosion.prop[corelifestage10D$Temperature.oC == "42"])))
shapiro.test (asin(sqrt(noNAcorelifestage10D$Eclosion.prop[corelifestage10D$Temperature.oC == "30"])))
shapiro.test (asin(sqrt(noNAcorelifestage10D$Eclosion.prop[corelifestage10D$Temperature.oC == "42"])))
#not fixed

par(mfrow=c(1,1))

# As the data is  not normal but is homogenous in variance in groups 

# 2 sample Mann whitney U test
wilcox.test(noNAcorelifestage10D$Eclosion.prop ~ noNAcorelifestage10D$Temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 367.5, p-value = 0.445 # there is no difference between groups




############################################################################################################################################################## EGG TO ADULT PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES ###############################
names(corelifestage10D)
corelifestage10D$Hatch.prop

summary(corelifestage10D) # total core dataset n= (39, 32) for analysis of egg counts, hatch success, egg to adult success


#### ! NAT COMMS DESCRIPTIVE STATS ###############


#### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(corelifestage10D$Egg.to.adult.prop, corelifestage10D$Temperature.oC)
#$`30`
#vars  n   mean sd    median trimmed   mad min max range  skew kurtosis   se
#X1 1 39   0.84 0.18   0.88    0.88   0.07   0   1     1 -3.21     11.2 0.03

#$`42`
#vars  n   mean sd    median trimmed   mad min max range skew kurtosis    se
#X1 1 32    0.4 0.42   0.26    0.39   0.39   0 0.95  0.95 0.14    -1.91 0.07


### in base
# 30
hist(corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "30"], 
     main = list("Control", cex = 2), xlab = "prop hatch", ylab ="Frequency", ylim = c(0,30),
     nclass = 10) 
# 42
hist(corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "42"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Heatwave", cex = 2), xlab = "prop hatch", ylab ="Frequency", ylim = c(0,30),
     nclass = 10)  # keep nclass = 10, keep scales default

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(corelifestage10D$Egg.to.adult.prop ~ corelifestage10D$Temperature.oC, ylab="prop hatch", xlab="Temperature")

########### Normality - Failed in all groups
shapiro.test (corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "30"]) # W = 0.59737, p-value = 3.877e-09
ks.test(corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "30"], pnorm)  # D = 0.70675, p-value < 2.2e-16
shapiro.test (corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "42"]) # W = 0.73732, p-value = 3.305e-06
ks.test(corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "42"], pnorm) # D = 0.5, p-value = 2.251e-07

########### Homogeneity of Variances - Failed
bartlett.test(corelifestage10D$Egg.to.adult.prop ~ corelifestage10D$Temperature.oC) # Bartlett's K-squared = 22.491, df = 1, p-value = 2.111e-06
fligner.test(corelifestage10D$Egg.to.adult.prop ~ corelifestage10D$Temperature.oC) # Fligner-Killeen:med chi-squared = 34.347, df = 1, p-value = 4.612e-09
#! need library(car)
leveneTest(corelifestage10D$Egg.to.adult.prop ~ corelifestage10D$Temperature.oC)   #Df F value Pr(>F) 1  62.008 3.391e-11 ***

#################################################################################################################################################################### EGG TO ADULT OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 

########## Transformation with just hist and shapiro 
# comparing plots and tests before and after
# two other methods: by(df$response, df$treatment, shapiro.test) # with(df, tapply(response, treatment, shapiro.test))
## PROPORTION FIXING # arcsine sqrt transformation on porportions 
par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird
#raw data not normal; 30 -ve skew leptokurotosis. 42 opposite. both capped at 0 and 1
hist (asin(sqrt(corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "30"])))
hist (asin(sqrt(corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "42"])))
shapiro.test (asin(sqrt(corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "30"])))
shapiro.test (asin(sqrt(corelifestage10D$Egg.to.adult.prop[corelifestage10D$Temperature.oC == "42"])))
#not fixed

par(mfrow=c(1,1))

# As the data is both not normal and not homogenous in variance in groups there is debate over the best method so both tried

#1) Homogeneity of variance not strict assumpation 2 sample Mann whitney U test
wilcox.test(corelifestage10D$Egg.to.adult.prop ~ corelifestage10D$Temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 1001.5, p-value = 1.201e-05 # there is a significant difference between groups

#2) Convert data to ranks and submitt to a welch anova and tukey HSD Ruxton (2006)
corelifestage10D$rank<-rank(corelifestage10D$Egg.to.adult.prop) # ranking all data inter-group by ascending count 
t.test(corelifestage10D$rank~ corelifestage10D$Temperature.oC, var.equal=FALSE)
# t = 5.0445, df = 62.359, p-value = 4.192e-06





####################################################################################################################################################################EGG HATCH NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################

names(corelifestage10D)
corelifestage10D$Cbindhatch

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()

#### ! NAT COMMS MODEL SELECTION ###############


#### Binomial family error structures
# As data is proportion bound at 0 and 1 fitting normal distibution does not give normal and homogenity of variance in residuals 

# Creating a global model
globalmodbinom<-glm(Cbindhatch ~ Temperature.oC, binomial(link = "logit"), data=corelifestage10D)
globalmodbibnomLOG<-glm(Cbindhatch ~ Temperature.oC, binomial(link = "log"), data=corelifestage10D)

summary(globalmodbinom); summary(globalmodbibnomLOG);  # No R^2, AIC given
# AIC: 4729.4, AIC: 4729.4 # link change seem to do little
pseudoR<-(globalmodbinom$null.deviance-globalmodbinom$deviance) / globalmodbinom$null.deviance # (thomas et al., 2015)
pseudoR # 0.1957514
pseudoR<-(globalmodbibnomLOG$null.deviance-globalmodbibnomLOG$deviance) / globalmodbibnomLOG$null.deviance # (thomas et al., 2015)
pseudoR # 0.1957514
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodbinom))+((2*1*(1+1)/(71-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 4725.445
qAICc<-(-2*logLik(globalmodbinom)/64.89407)+((2*1*(1+1)/(71-1-1))); qAICc # 72.8749 

## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
theta<-globalmodbinom$deviance/globalmodbinom$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 64.89407,  overdispersed is >1.5 is overdispersion. 
hist(corelifestage10D$Cbindhatch) # over <50% cases are 0. recommendation of using quasi binomial 

globalmodquasibin<-glm(Cbindhatch ~ Temperature.oC, quasibinomial(link = "logit"), data=corelifestage10D)
globalmodqausibibinmLOG<-glm(Cbindhatch ~ Temperature.oC, quasibinomial(link = "log"), data=corelifestage10D)
globalmodqausibibinmID<-glm(Cbindhatch ~ Temperature.oC, quasibinomial(link = "identity"), data=corelifestage10D)

summary(globalmodquasibin); summary(globalmodqausibibinmLOG); summary(globalmodqausibibinmID) 
pseudoR<-(globalmodquasibin$null.deviance-globalmodquasibin$deviance) / globalmodquasibin$null.deviance # (thomas et al., 2015)
pseudoR # 0.1957514
# all produce the same output and pseudoR

# 1) Errors normally distributed? - NOT NECASSARY BUT NOT IMPROVED

# binomial
devresid<-resid(globalmodbinom, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))

# quasibinomial
devresid<-resid(globalmodquasibin, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalnvebinom);par(mfrow=c(1,1))
# quasibinomial Q-Q simlar fluctuation,  devresid histogram spread  and KS test similarly failed 

# 2) Homogenous/homoscedasticity variance of residuals - NOT NECASSARY BUT NOT IMPROVED

# binomial
devresid<-resid(globalmodbinom, type = "deviance")
plot(devresid ~ globalmodbinom$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~corelifestage10D$Temperature.oC) 
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# quasibinomial
devresid<-resid(globalmodquasibin, type = "deviance")
plot(devresid ~ globalmodquasibin$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~corelifestage10D$Temperature.oC)
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
#  P1 some wedge, test failed 

# 3) Independences of independent variables - 1 FACTOR SO YES
# Only one independent variable

# 4) No serial auto-correlation with time/space - YES
#! need library(car)
durbinWatsonTest(globalmodquasibin) # test passed

# 5) No bias by unduly influential datapoints - YES

# binomial
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
influence<-influence.measures(globalmodbinom); summary(influence) 

# quasibinomial
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
influence<-influence.measures(globalmodquasibin); summary(influence) 
# 23 cases in poisson with larger cooks distance fixed with -ve binom

# 6) Independent variables measured without error - BEST OF ABILITY


###  MODEL REFINEMENT
globalmodquasibin<-glm(Cbindhatch ~ Temperature.oC, quasibinomial(link = "logit"), data=corelifestage10D)
pseudoR<-(globalmodquasibin$null.deviance-globalmodquasibin$deviance) / globalmodquasibin$null.deviance; pseudoR # 0.1957514

nullmodquasibin<-glm(Cbindhatch ~ 1, quasibinomial(link = "logit"), data=corelifestage10D)
pseudoR<-(nullmodquasibin$null.deviance-nullmodquasibin$deviance) / nullmodquasibin$null.deviance; pseudoR # 1.633565e-16



#### ! NAT COMMS MODEL SIGNIFICANCE ###############

### Global model explains offspring significantly more than null model because
# anova/lrtest(model, nullmodel) and aic functions do not work with quasibinomial 
anova(globalmodquasibin, test = "Chisq") 
# Analysis of Deviance Table
# Model: quasibinomial, link: logit
# Response: Cbindhatch
# Terms added sequentially (first to last)
# 
#                Df Deviance Resid. Df   Resid. Dev  Pr(>Chi)    
# NULL                              70     5567.5              
# Temperature.oC  1   1089.8        69     4477.7   2.348e-05 ***
drop1(globalmodquasibin, test = "Chi")
#                Df Deviance scaled dev.  Pr(>Chi)    
# <none>              4477.7                          
# Temperature.oC  1   5567.5      17.884 2.348e-05 ** 

anova(globalmodquasibin, nullmodquasibin, test = "Chisq") 

library(lmtest)
lrtest(globalmodquasibin, nullmodquasibin)

globalmodquasibin # shortened with basic information # redundant



#### ! NAT COMMS MODEL POST HOC ###############

summary(globalmodquasibin)
# Call:
#      glm(formula = Cbindhatch ~ Temperature.oC, family = quasibinomial(link = "logit"), 
#          data = corelifestage10D)
# 
# Deviance Residuals: 
#      Min       1Q   Median       3Q      Max  
#  -24.616   -7.093    1.212    3.534   13.631  
# 
# Coefficients:
#                        Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)        1.8620     0.2708   6.875 2.24e-09 ***
#      Temperature.oC42  -1.5412     0.3731  -4.131 9.98e-05 ***
# 
# (Dispersion parameter for quasibinomial family taken to be 60.93951)
# 
# Null deviance: 5567.5  on 70  degrees of freedom
# Residual deviance: 4477.7  on 69  degrees of freedom
# AIC: NA
# 
# Number of Fisher Scoring iterations: 5


exp(1.8620)/ (1+ exp(1.8620))
exp(1.8620-1.5412)/ (1+ exp(1.8620-1.5412))




####################################################################################################################################################################LARVAE PUPATION NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################

names(noNAcorelifestage10D)
noNAcorelifestage10D$Cbindpupation

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()


#### ! NAT COMMS MODEL SELECTION ###############

#### Binomial family error structures
# As data is proportion bound at 0 and 1 fitting normal distibution does not give normal and homogenity of variance in residuals 

# Creating a global model
globalmodbinom<-glm(Cbindpupation ~ Temperature.oC, binomial(link = "logit"), data=noNAcorelifestage10D)
globalmodbibnomLOG<-glm(Cbindpupation ~ Temperature.oC, binomial(link = "log"), data=noNAcorelifestage10D)

summary(globalmodbinom); summary(globalmodbibnomLOG);  # No R^2, AIC given
# AIC: 330.11, AIC: 330.11 # link change seem to do little
pseudoR<-(globalmodbinom$null.deviance-globalmodbinom$deviance) / globalmodbinom$null.deviance # (thomas et al., 2015)
pseudoR # 0.08003934
pseudoR<-(globalmodbibnomLOG$null.deviance-globalmodbibnomLOG$deviance) / globalmodbibnomLOG$null.deviance # (thomas et al., 2015)
pseudoR # 0.08003934
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodbinom))+((2*1*(1+1)/(56-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 326.1868
qAICc<-(-2*logLik(globalmodbinom)/4.132718)+((2*1*(1+1)/(56-1-1))); qAICc # 78.98406 

## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
theta<-globalmodbinom$deviance/globalmodbinom$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 4.132718,  overdispersed is >1 is overdispersion. 
hist(noNAcorelifestage10D$Cbindpupation) # over >50% cases are 0. recommendation of using quasi binomial 

globalmodquasibin<-glm(Cbindpupation ~ Temperature.oC, quasibinomial(link = "logit"), data=noNAcorelifestage10D)
globalmodqausibibinmLOG<-glm(Cbindpupation ~ Temperature.oC, quasibinomial(link = "log"), data=noNAcorelifestage10D)
globalmodqausibibinmID<-glm(Cbindpupation ~ Temperature.oC, quasibinomial(link = "identity"), data=noNAcorelifestage10D)

summary(globalmodquasibin); summary(globalmodqausibibinmLOG); summary(globalmodqausibibinmID) 
pseudoR<-(globalmodquasibin$null.deviance-globalmodquasibin$deviance) / globalmodquasibin$null.deviance # (thomas et al., 2015)
pseudoR # 0.08003934
# all produce the same output and pseudoR

# 1) Errors normally distributed? - NOT NECASSARY BUT NOT IMPROVED

# binomial
devresid<-resid(globalmodbinom, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# quasibinomial
devresid<-resid(globalmodquasibin, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
# quasibinom Q-Q simlar fluctuation, pull down on tail, devresid histogram negative skew  and KS test similarly failed 

# 2) Homogenous/homoscedasticity variance of residuals - NOT NECASSARY PASSED BUT NOT IMPROVED

# binomial
devresid<-resid(globalmodbinom, type = "deviance")
plot(devresid ~ globalmodbinom$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~noNAcorelifestage10D$Temperature.oC) 
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# quasibinomial
devresid<-resid(globalmodquasibin, type = "deviance")
plot(devresid ~ globalmodquasibin$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~noNAcorelifestage10D$Temperature.oC)
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
#  P1 no slope or wedge, test passed 

# 3) Independences of independent variables - 1 FACTOR SO YES
# Only one independent variable

# 4) No serial auto-correlation with time/space - YES
#! need library(car)
durbinWatsonTest(globalmodquasibin) # test passed

# 5) No bias by unduly influential datapoints - YES

# binomial
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
influence<-influence.measures(globalmodbinom); summary(influence) 

# quasibinomial
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
influence<-influence.measures(globalmodquasibin); summary(influence) 
# 6 cases in poisson with larger cooks distance improved to cooks <1

# 6) Independent variables measured without error - BEST OF ABILITY

theta<-globalmodquasibin$deviance/globalmodquasibin$df.residual; theta



###  MODEL REFINEMENT
globalmodquasibin<-glm(Cbindpupation ~ Temperature.oC, quasibinomial(link = "logit"), data=noNAcorelifestage10D)
pseudoR<-(globalmodquasibin$null.deviance-globalmodquasibin$deviance) / globalmodquasibin$null.deviance; pseudoR # 0.08003934

nullmodquasibin<-glm(Cbindpupation ~ 1, quasibinomial(link = "logit"), data=noNAcorelifestage10D)
pseudoR<-(nullmodquasibin$null.deviance-nullmodquasibin$deviance) / nullmodquasibin$null.deviance; pseudoR # -1.325177e-16


#### ! NAT COMMS MODEL SIGNIFICANCE ###############

### Global model does not explain offspring significantly more than null model because
# anova/lrtest(model, nullmodel) and aic functions do not work with quasibinomial 
anova(globalmodquasibin, test = "Chisq") 
# Analysis of Deviance Table
# Model: quasibinomial, link: logit
# Response: Cbindpupation
# Terms added sequentially (first to last)
#                 Df Deviance Resid. Df Resid. Dev Pr(>Chi)  
# NULL                              55     242.58           
# Temperature.oC  1   19.416        54     223.17  0.07428 .
drop1(globalmodquasibin, test = "Chi")
# <none>              223.17                       
# Temperature.oC  1   242.58      3.1857  0.07428 .

globalmodquasibin # shortened with basic information # redundant
# Df Deviance scaled dev. Pr(>Chi)  
# <none>              223.17                       
# Temperature.oC  1   242.58      3.1857  0.07428 .


#### ! NAT COMMS MODEL POST HOC ###############

summary(globalmodquasibin)
# Call:
#      glm(formula = Cbindpupation ~ Temperature.oC, family = quasibinomial(link = "logit"), 
#          data = noNAcorelifestage10D)
# 
# Deviance Residuals: 
#      Min       1Q   Median       3Q      Max  
# -7.1796  -0.2427   0.9015   1.7832   2.7186  
# 
# Coefficients:
#                        Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)        4.3858     0.2849  15.392   <2e-16 ***
#      Temperature.oC42  -0.7938     0.4328  -1.834   0.0721 .  
# 
# (Dispersion parameter for quasibinomial family taken to be 6.094751)
# 
# Null deviance: 242.58  on 55  degrees of freedom
# Residual deviance: 223.17  on 54  degrees of freedom
# AIC: NA
# 
# Number of Fisher Scoring iterations: 5


# estimate logit look ok
exp(4.3858)/ (1+ exp(4.3858)) # 0.9877002
exp(4.3858-0.7938)/ (1+ exp(4.3858-0.7938)) # 0.9731951




####################################################################################################################################################################PUPAL ECLOSION NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################

names(noNAcorelifestage10D)
noNAcorelifestage10D$Cbindeclosion

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()


#### ! NAT COMMS MODEL SELECTION ###############

#### Binomial family error structures
# As data is proportion bound at 0 and 1 fitting normal distibution does not give normal and homogenity of variance in residuals 

# Creating a global model
globalmodbinom<-glm(Cbindeclosion ~ Temperature.oC, binomial(link = "logit"), data=noNAcorelifestage10D)
globalmodbibnomLOG<-glm(Cbindeclosion ~ Temperature.oC, binomial(link = "log"), data=noNAcorelifestage10D)

summary(globalmodbinom); summary(globalmodbibnomLOG);  # No R^2, AIC given
# AIC: 45.877, AIC: 45.877 # link change seem to do little
pseudoR<-(globalmodbinom$null.deviance-globalmodbinom$deviance) / globalmodbinom$null.deviance # (thomas et al., 2015)
pseudoR # 0.03236302
pseudoR<-(globalmodbibnomLOG$null.deviance-globalmodbibnomLOG$deviance) / globalmodbibnomLOG$null.deviance # (thomas et al., 2015)
pseudoR # 0.03236302
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodbinom))+((2*1*(1+1)/(56-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 41.95118
qAICc<-(-2*logLik(globalmodbinom)/0.5171399)+((2*1*(1+1)/(56-1-1))); qAICc # 78.96795 

## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
theta<-globalmodbinom$deviance/globalmodbinom$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 0.5171399,  overdispersed is <1 so no overdispersion. 


# 1) Errors normally distributed? - NOT NECASSARY BUT NOT IMPROVED

# globalmodbibnomLOGIT
devresid<-resid(globalmodbinom, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# globalmodbibnomLOG
devresid<-resid(globalmodbibnomLOG, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodbibnomLOG);par(mfrow=c(1,1))
#  Q-Q large drop on left pull down on tail, devresid histogram negative skew  and leptokurtotic and KS test similarly failed 

# 2) Homogenous/homoscedasticity variance of residuals - NOT NECASSARY PASSED BUT NOT IMPROVED

# globalmodbibnomLOGIT
devresid<-resid(globalmodbinom, type = "deviance")
plot(devresid ~ globalmodbinom$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~noNAcorelifestage10D$Temperature.oC) 
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# globalmodbibnomLOG
devresid<-resid(globalmodbibnomLOG, type = "deviance")
plot(devresid ~ globalmodbibnomLOG$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~noNAcorelifestage10D$Temperature.oC)
par(mfrow=c(2,2)); plot(globalmodbibnomLOG);par(mfrow=c(1,1))
#  P1 some slope , test failed 

# 3) Independences of independent variables - 1 FACTOR SO YES
# Only one independent variable

# 4) No serial auto-correlation with time/space - YES AS COLLECTED AT ONCE
#! need library(car)
durbinWatsonTest(globalmodbinom) # test failed

# 5) No bias by unduly influential datapoints - YES

# globalmodbibnomLOGIT
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
influence<-influence.measures(globalmodbinom); summary(influence) 

# globalmodbibnomLOG
par(mfrow=c(2,2)); plot(globalmodbibnomLOG);par(mfrow=c(1,1))
influence<-influence.measures(globalmodbibnomLOG); summary(influence) 
# 5 cases  but cooks distance <1

# 6) Independent variables measured without error - BEST OF ABILITY


# no difference in log/logit

###  MODEL REFINEMENT
globalmodbinom<-glm(Cbindeclosion ~ Temperature.oC, binomial(link = "logit"), data=noNAcorelifestage10D)
pseudoR<-(globalmodbinom$null.deviance-globalmodbinom$deviance) / globalmodbinom$null.deviance; pseudoR # 0.03236302

nullmodbinom<-glm(Cbindeclosion ~ 1, binomial(link = "logit"), data=noNAcorelifestage10D)
pseudoR<-(nullmodbinom$null.deviance-nullmodbinom$deviance) / nullmodbinom$null.deviance; pseudoR # -1.325177e-16


#### ! NAT COMMS MODEL SIGNIFICANCE ###############


### Global model does not explain offspring significantly more than null model because
lrtest(globalmodbinom, nullmodbinom)
anova(globalmodbinom, nullmodbinom, test= "Chi")
anova(globalmodbinom, test = "Chisq") 
drop1(globalmodbinom, test = "Chi")
# Likelihood ratio test
# 
# Model 1: Cbindeclosion ~ Temperature.oC
# Model 2: Cbindeclosion ~ 1
# #Df  LogLik Df Chisq Pr(>Chisq)
# 1   2 -20.939                    
# 2   1 -21.405 -1 0.934     0.3338

globalmodbinom # shortened with basic information # redundant

#### ! NAT COMMS MODEL POST HOC ###############

summary(globalmodbinom)
# Call:
#      glm(formula = Cbindeclosion ~ Temperature.oC, family = binomial(link = "logit"), 
#          data = noNAcorelifestage10D)
# 
# Deviance Residuals: 
#     Min       1Q   Median       3Q      Max  
# -1.6273   0.3778   0.4602   0.4945   0.7279  
# 
# Coefficients:
#                        Estimate Std. Error z value Pr(>|z|)    
#      (Intercept)        7.3296     0.5002  14.654   <2e-16 ***
#      Temperature.oC42  -0.7601     0.7642  -0.995     0.32    
# 
# (Dispersion parameter for binomial family taken to be 1)
# 
# Null deviance: 28.860  on 55  degrees of freedom
# Residual deviance: 27.926  on 54  degrees of freedom
# AIC: 45.877
# 
# Number of Fisher Scoring iterations: 6

# estimate logit look ok
exp(7.3296)/ (1+ exp(7.3296)) # 0.9877002
exp(7.3296-0.7601)/ (1+ exp(7.3296-0.7601)) # 0.9731951


####################################################################################################################################################################EGG TO ADULT NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################

names(corelifestage10D)
corelifestage10D$Cbindeggadult

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()


#### ! NAT COMMS MODEL SELECTION ###############

#### Binomial family error structures
# As data is proportion bound at 0 and 1 fitting normal distibution does not give normal and homogenity of variance in residuals 

describeBy(corelifestage10D$Hatch.prop, corelifestage10D$Temperature.oC)
describeBy(corelifestage10D$Pupation.prop, corelifestage10D$Temperature.oC)
describeBy(corelifestage10D$Eclosion.prop, corelifestage10D$Temperature.oC)
describeBy(corelifestage10D$Egg.to.adult.prop, corelifestage10D$Temperature.oC)

# Creating a global model
globalmodbinom<-glm(Cbindeggadult ~ Temperature.oC, binomial(link = "logit"), data=corelifestage10D)
globalmodbibnomLOG<-glm(Cbindeggadult ~ Temperature.oC, binomial(link = "log"), data=corelifestage10D)

summary(globalmodbinom); summary(globalmodbibnomLOG);  # No R^2, AIC given
# AIC: 4471.1, AIC: 4471.1 # link change seem to do little
pseudoR<-(globalmodbinom$null.deviance-globalmodbinom$deviance) / globalmodbinom$null.deviance # (thomas et al., 2015)
pseudoR # 0.2056754
pseudoR<-(globalmodbibnomLOG$null.deviance-globalmodbibnomLOG$deviance) / globalmodbibnomLOG$null.deviance # (thomas et al., 2015)
pseudoR # 0.2056754
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodbinom))+((2*1*(1+1)/(71-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 4467.148
qAICc<-(-2*logLik(globalmodbinom)/61.05243)+((2*1*(1+1)/(71-1-1))); qAICc # 73.22607 

## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
theta<-globalmodbinom$deviance/globalmodbinom$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 61.05243,  overdispersed is >1 is overdispersion. 
hist(corelifestage10D$Cbindeggadult) # over <30% cases are 0. recommendation of using quasi binomial 

globalmodquasibin<-glm(Cbindeggadult ~ Temperature.oC, quasibinomial(link = "logit"), data=corelifestage10D)
globalmodqausibibinmLOG<-glm(Cbindeggadult ~ Temperature.oC, quasibinomial(link = "log"), data=corelifestage10D)
globalmodqausibibinmID<-glm(Cbindeggadult ~ Temperature.oC, quasibinomial(link = "identity"), data=corelifestage10D)

summary(globalmodquasibin); summary(globalmodqausibibinmLOG); summary(globalmodqausibibinmID) 
pseudoR<-(globalmodquasibin$null.deviance-globalmodquasibin$deviance) / globalmodquasibin$null.deviance # (thomas et al., 2015)
pseudoR # 0.2056754
# all produce the same output and pseudoR

# 1) Errors normally distributed? - NOT NECASSARY BUT NOT IMPROVED

# binomial
devresid<-resid(globalmodbinom, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# quasibinomial
devresid<-resid(globalmodquasibin, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
# quasibinom Q-Q similar fluctuation,  devresid histogram -ve skew  and KS test similarly failed 

# 2) Homogenous/homoscedasticity variance of residuals - NOT NECASSARY BUT NOT IMPROVED

# binomial
devresid<-resid(globalmodbinom, type = "deviance")
plot(devresid ~ globalmodbinom$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~corelifestage10D$Temperature.oC) 
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# quasibinomial
devresid<-resid(globalmodquasibin, type = "deviance")
plot(devresid ~ globalmodquasibin$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~corelifestage10D$Temperature.oC)
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
#  P1 some wedge, test failed 

# 3) Independences of independent variables - 1 FACTOR SO YES
# Only one independent variable

# 4) No serial auto-correlation with time/space - YES
#! need library(car)
durbinWatsonTest(globalmodquasibin) # test passed

# 5) No bias by unduly influential datapoints - YES

# binomial
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
influence<-influence.measures(globalmodbinom); summary(influence) 

# quasibinomial
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
influence<-influence.measures(globalmodquasibin); summary(influence) 
# ~20 cases in binomial with larger cooks distance fixed with quasibinom

# 6) Independent variables measured without error - BEST OF ABILITY




globalmodquasibin<-glm(Cbindeggadult ~ Temperature.oC, quasibinomial(link = "logit"), data=corelifestage10D)
pseudoR<-(globalmodquasibin$null.deviance-globalmodquasibin$deviance) / globalmodquasibin$null.deviance; pseudoR # 0.2056754

nullmodquasibin<-glm(Cbindeggadult ~ 1, quasibinomial(link = "logit"), data=corelifestage10D)
pseudoR<-(nullmodquasibin$null.deviance-nullmodquasibin$deviance) / nullmodquasibin$null.deviance; pseudoR # 0


#### ! NAT COMMS SIGNIFIANCE ###############

### Global model explains offspring significantly more than null model because
# anova/lrtest(model, nullmodel) and aic functions do not work with quasibinomial 
anova(globalmodquasibin, test = "Chisq") 
# Analysis of Deviance Table
# Model: quasibinomial, link: logit
# Response: Cbindhatch
# Terms added sequentially (first to last)
#                 Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
# NULL                              70     5303.4              
# Temperature.oC  1   1090.8        69     4212.6 1.125e-05 ***
drop1(globalmodquasibin, test = "Chi")
# Df Deviance scaled dev.  Pr(>Chi)    
# <none>              4212.6                          
# Temperature.oC  1   5303.4      19.286 1.125e-05 ***

globalmodquasibin # shortened with basic information # redundant

#### ! NAT COMMS POST HOC ###############

summary(globalmodquasibin)

# Call:
#      glm(formula = Cbindeggadult ~ Temperature.oC, family = quasibinomial(link = "logit"), 
#          data = corelifestage10D)
# 
# Deviance Residuals: 
#      Min        1Q    Median        3Q       Max  
# -24.1197   -6.7330    0.7325    3.4834   12.5560  
# 
# Coefficients:
#                        Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)        1.7689     0.2523   7.011 1.27e-09 ***
#      Temperature.oC42  -1.5148     0.3524  -4.298 5.54e-05 ***
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# (Dispersion parameter for quasibinomial family taken to be 56.5575)
# 
# Null deviance: 5303.4  on 70  degrees of freedom
# Residual deviance: 4212.6  on 69  degrees of freedom
# AIC: NA
# 
# Number of Fisher Scoring iterations: 5


# estimate logit look ok
exp(1.7689)/ (1+ exp(1.7689)) # 0.8543208
exp(1.7689-1.5148)/ (1+ exp(1.7689-1.5148)) # 0.5631854






