#check what is in r console from last time
ls()
#remove everything
rm(list=ls())

#check what working directory is set
getwd()
#setting to required one where data is based
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")

#reading in data 
repfit <- read.csv("reproductivefitnessrange.csv", header = TRUE) # full data set 

#### DATA CHECK, CLEAN, DESCRIPTION AND SUMMARY #########################################################################

repfit # produces all whole dataframe - no NAs, data to 2 or 3 d.p., no irregularities/anomalies


### checking for outliers/errors
summary(repfit) # produces general (unsplit) range, quantiles, median, count and mean summary stats for each variable



str(repfit) # checks the variable types
# 'data.frame':	879 obs. of  8 variables:
# $ Female.I.D.No         : int  1 2 3 4 5 6 7 8 9 10 ...
# $ Sex                   : Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 1 ...        # Sex heatwaved
# $ Temperature.oC        : int  30 30 30 30 30 30 30 30 30 30 ...                             # 5 day heatwave temperature
# $ Offspring.count.B1.10D: int  96 95 133 16 78 156 82 153 92 109 ...                         # first 10 day offspring count
# $ Offspring.count.B2.10D: int  123 194 177 0 130 178 89 183 160 144 ...                      # second 10 day offspring count
# $ Offspring.count.20D   : int  219 289 310 16 208 334 171 336 252 253 ...                    # total offspring count
 

is.na(repfit) # returns TRUE of x is missing
# nothing missing

repfit$Temperature.oCnum<-repfit$Temperature.oC
repfit$Temperature.oC<-as.factor(repfit$Temperature.oC) # changing to categorical factor as coding is not proportional to differences

levels(repfit$Temperature.oC)
# "30" "35" "38" "39" "40" "42"




########################### NEATPLOT ###############################################################################

# 20D boxplot

library(ggplot2)

temp <- expression(bold(paste('Heatwave temperature (',degree,'C)'))) #the temperature label with degrees sign # ~ is a space
label30oC <- expression(bold(""*30*degree*C))
label35oC <- expression(bold(""*35*degree*C))
label38oC <- expression(bold(""*38*degree*C))
label39oC <- expression(bold(""*39*degree*C))
label40oC <- expression(bold(""*40*degree*C))
label42oC <- expression(bold(""*42*degree*C))


############# ! NATURE COMM FIGURE 1 PLOT ###################

graphrepfitrange20dfacet<-ggplot(repfit, aes(x=Temperature.oC, y=Offspring.count.20D, fill= Temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=2, position=position_dodge(0.5), color="black") + 
     scale_fill_manual(values=c("ghostwhite", "ghostwhite", "ghostwhite", "ghostwhite", "tomato", "tomato"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("Female","Male"), #the order listed in the legend
                       label = c("Female","Male")) + #how things are labeled in the lgend
     scale_colour_manual(values=c("black", "black")) +
     geom_point(position=position_jitterdodge(dodge.width=0.5, jitter.width = 0.15), shape=1, size= 1) +
     labs (x= temp, y="Reproductive output") +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("30","35","38","39","40","42"), #the order of the variables on the x axis
                      labels=c("30","35","38","39","40","42")) + # the names on the x axis
     coord_cartesian(ylim=c(-10, 510)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 500, 100), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0),
                        sec.axis = dup_axis()) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     facet_grid(.~ Sex) +
     theme(
           #legend.position="none", #get rid of the hashtag to get rid of legend
           panel.grid.major=element_blank(), #getting rid of majorgridlines
           panel.border=element_blank(),     #getting rid of minorgridlines  
           panel.grid.minor=element_blank(),
           axis.line.x=element_line(color="black", size = 1),
           axis.line.y=element_line(color="black", size = 1),
           axis.text.x=element_text(color="black", size=14),
           axis.text.y=element_text(color="black", size=14),
           axis.title.x=element_text(face = "bold", size=14, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
           axis.title.y=element_text(face = "bold", size=14, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)),
           strip.background = element_blank(),
           strip.text.x = element_text(size = 14, face = "bold", color="black"),
           legend.position="none",
           panel.background=element_blank(),
           plot.background=element_rect(fill="transparent", colour = NA))


setwd("C:/Users/UEA/Desktop")
ggsave("graphrepfitrange20dfacet.png",width=8, height=6, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Dissertation and phd/d- data for phd/R analysis/largepaper/20drepfit")





##############################################################################################################################################################  PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES ###############################



############# ! NATURE COMM SUPP TABLE STATS ###################


summary(repfit) # total core dataset n= (219 females, 262 males) 

#### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(repfit$Offspring.count.20D, list(repfit$Sex,repfit$Temperature.oC),mat=TRUE) 
# item group1 group2 vars  n     mean       sd median   trimmed      mad min max range       skew    kurtosis
# X11     1 Female     30    1 75 210.8800 77.24495  216.0 212.40984  80.0604  16 368   352 -0.1799377 -0.45099283
# X12     2   Male     30    1 79 191.7975 55.70835  188.0 190.30769  53.3736  72 339   267  0.3147822 -0.07760356
# X13     3 Female     35    1 34 254.9412 51.77629  260.0 258.53571  49.6671 116 359   243 -0.6111854  0.11509463
# X14     4   Male     35    1 33 258.3333 56.23537  266.0 260.33333  62.2692 147 355   208 -0.3251280 -0.96893641
# X15     5 Female     38    1 43 259.6744 73.60439  264.0 267.08571  51.8910   0 372   372 -1.7567568  4.55386527
# X16     6   Male     38    1 48 187.1042 67.25127  195.0 189.45000  62.2692  15 311   296 -0.3748840 -0.37159696
# X17     7 Female     39    1 35 227.8000 88.50916  224.0 233.96552  87.4734   9 424   415 -0.5833396  0.47091275
# X18     8   Male     39    1 43 193.7674 71.57309  201.0 201.28571  63.7518   0 316   316 -1.0986155  1.36131966
# X19     9 Female     40    1 35 234.1714 84.80028  276.0 240.62069  69.6822   8 363   355 -0.6858006 -0.45920246
# X110   10   Male     40    1 48 150.8958 71.92823  164.5 155.57500  57.0801   0 268   268 -0.7950745 -0.03240679
# X111   11 Female     42    1 28 210.0000 74.79453  220.0 217.54167  44.4780   0 348   348 -1.1157936  1.58404538
# X112   12   Male     42    1 42 106.7857 92.98663  114.5  99.73529 117.8667   0 316   316  0.3396299 -1.10350277
# se
# X11   8.919479
# X12   6.267679
# X13   8.879561
# X14   9.789321
# X15  11.224564
# X16   9.706885
# X17  14.960778
# X18  10.914793
# X19  14.333863
# X110 10.381945
# X111 14.134838
# X112 14.348149

repfit30  <- repfit[repfit$Temperature.oC== "30",]
repfit35 <- repfit[repfit$Temperature.oC== "35",]
repfit38 <- repfit[repfit$Temperature.oC== "38",]
repfit39 <- repfit[repfit$Temperature.oC== "39",]
repfit40 <- repfit[repfit$Temperature.oC== "40",]
repfit42 <- repfit[repfit$Temperature.oC== "42",]
repfitmale <- repfit[repfit$Sex== "Male",]
repfitfemale <- repfit[repfit$Sex== "Female",]


################ Within sex correlation across temperature ##############

########### Normality  

#### Male
### in base
# Count 
hist(repfitmale$Offspring.count.20D, 
     main = list("Control", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,100),
     nclass = 10) 
# Temp 
hist(repfitmale$Temperature.oCnum,
     main = list("Control", cex = 2), xlab = "Temperature.oCnum", ylab ="Frequency", ylim = c(0,100),
     nclass = 10) 

#### Female
### in base
# Count 
hist(repfitfemale$Offspring.count.20D, 
     main = list("Control", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,100),
     nclass = 10) 
# Temp 
hist(repfitfemale$Temperature.oCnum,
     main = list("Control", cex = 2), xlab = "Temperature.oCnum", ylab ="Frequency", ylim = c(0,100),
     nclass = 10) 

## Both sexes count looks normal but the temperature is uniform (due to sampling )


### Male - Both variables failed
shapiro.test(repfitmale$Offspring.count.20D) # W = 0.96197, p-value = 5.979e-07
ks.test(repfitmale$Offspring.count.20D, pnorm) #  D = 0.93171, p-value < 2.2e-16

shapiro.test(repfitmale$Temperature.oCnum) # W = 0.82848, p-value < 2.2e-16
ks.test(repfitmale$Temperature.oCnum, pnorm) #  D = 1, p-value < 2.2e-16


### Female - Both variables failed
shapiro.test(repfitfemale$Offspring.count.20D) # W = 0.95901, p-value = 1.526e-06
ks.test(repfitfemale$Offspring.count.20D, pnorm) #  D = 0.988, p-value < 2.2e-16

shapiro.test(repfitfemale$Temperature.oCnum) # W = 0.83686, p-value = 1.623e-15
ks.test(repfitfemale$Temperature.oCnum, pnorm) #  D = 1, p-value < 2.2e-16

## Transformatons unlikely to work 




################ Within temperature differences in sex ####################################


########### Normality 

par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird

#### repfitdick 30 
### in base
# Male 
hist(repfit30$Offspring.count.20D[repfit30$Sex == "Male"], 
     main = list("Male", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10) 
# Female
hist(repfit30$Offspring.count.20D[repfit30$Sex == "Female"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Female", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default

#### repfitdick 35 
### in base
# Male 
hist(repfit35$Offspring.count.20D[repfit35$Sex == "Male"], 
     main = list("Male", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10) 
# Female
hist(repfit35$Offspring.count.20D[repfit35$Sex == "Female"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Female", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default


#### repfitdick 38 
### in base
# Male 
hist(repfit38$Offspring.count.20D[repfit38$Sex == "Male"], 
     main = list("Male", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10) 
# Female
hist(repfit38$Offspring.count.20D[repfit38$Sex == "Female"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Female", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default


#### repfitdick 39 
### in base
# Male 
hist(repfit39$Offspring.count.20D[repfit39$Sex == "Male"], 
     main = list("Male", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10) 
# Female
hist(repfit39$Offspring.count.20D[repfit39$Sex == "Female"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Female", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default


#### repfitdick 40 
### in base
# Male 
hist(repfit40$Offspring.count.20D[repfit40$Sex == "Male"], 
     main = list("Male", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10) 
# Female
hist(repfit40$Offspring.count.20D[repfit40$Sex == "Female"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Female", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default


#### repfitdick 42 
### in base
# Male 
hist(repfit42$Offspring.count.20D[repfit42$Sex == "Male"], 
     main = list("Male", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10) 
# Female
hist(repfit42$Offspring.count.20D[repfit42$Sex == "Female"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("Female", cex = 2), xlab = "20D offspring count", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default

## Most of groups have negative skew and some degree of kurtosis


### repfitdick 30  - Passed in shapiro male and female  
shapiro.test (repfit30$Offspring.count.20D[repfit30$Sex == "Male"]) #  W = 0.98518, p-value = 0.494
ks.test(repfit30$Offspring.count.20D[repfit30$Sex == "Male"], pnorm)  # D = 1, p-value < 2.2e-16
shapiro.test (repfit30$Offspring.count.20D[repfit30$Sex == "Female"]) # W = 0.9909, p-value = 0.8736
ks.test(repfit30$Offspring.count.20D[repfit30$Sex == "Female"], pnorm) # D = 1, p-value < 2.2e-16
### repfitdick 35  - Passed in shapiro 
shapiro.test (repfit35$Offspring.count.20D[repfit35$Sex == "Male"]) #  W = 0.96159, p-value = 0.2866
ks.test(repfit35$Offspring.count.20D[repfit35$Sex == "Male"], pnorm)  # D = 1, p-value < 2.2e-16
shapiro.test (repfit35$Offspring.count.20D[repfit35$Sex == "Female"]) # W = 0.96415, p-value = 0.32
ks.test(repfit35$Offspring.count.20D[repfit35$Sex == "Female"], pnorm) # D = 1, p-value < 2.2e-16
### repfitdick 38  - Passed in shapiro male
shapiro.test (repfit38$Offspring.count.20D[repfit38$Sex == "Male"]) #  W = 0.98181, p-value = 0.6554
ks.test(repfit38$Offspring.count.20D[repfit38$Sex == "Male"], pnorm)  # D = 1, p-value < 2.2e-16
shapiro.test (repfit38$Offspring.count.20D[repfit38$Sex == "Female"]) # W = 0.82761, p-value = 1.496e-05
ks.test(repfit38$Offspring.count.20D[repfit38$Sex == "Female"], pnorm) # D = 0.95399, p-value < 2.2e-16
### repfitdick 39  - Passed in shapiro female
shapiro.test (repfit39$Offspring.count.20D[repfit39$Sex == "Male"]) #  W = 0.89767, p-value = 0.001064
ks.test(repfit39$Offspring.count.20D[repfit39$Sex == "Male"], pnorm)  # D = 0.93023, p-value < 2.2e-16
shapiro.test (repfit39$Offspring.count.20D[repfit39$Sex == "Female"]) # W = 0.94811, p-value = 0.09911
ks.test(repfit39$Offspring.count.20D[repfit39$Sex == "Female"], pnorm) # D = 1, p-value < 2.2e-16
### repfitdick 40  - Failed
shapiro.test (repfit40$Offspring.count.20D[repfit40$Sex == "Male"]) # W = 0.90853, p-value = 0.0012
ks.test(repfit40$Offspring.count.20D[repfit40$Sex == "Male"], pnorm)  # D = 0.875, p-value < 2.2e-16
shapiro.test (repfit40$Offspring.count.20D[repfit40$Sex == "Female"]) # W = W = 0.91484, p-value = 0.01015
ks.test(repfit40$Offspring.count.20D[repfit40$Sex == "Female"], pnorm) # D = 1, p-value < 2.2e-16
### repfitdick 42  - Failed
shapiro.test (repfit42$Offspring.count.20D[repfit42$Sex == "Male"]) # W = 0.90918, p-value = 0.002742
ks.test(repfit42$Offspring.count.20D[repfit42$Sex == "Male"], pnorm)  # D = 0.73915, p-value < 2.2e-16
shapiro.test (repfit42$Offspring.count.20D[repfit42$Sex == "Female"]) # W = 0.8945, p-value = 0.008526
ks.test(repfit42$Offspring.count.20D[repfit42$Sex == "Female"], pnorm) # D = 0.96429, p-value < 2.2e-16

## Transformation unlikely to fix normality in one groupd without removing it in the other, for consistency and lack of trust in 
# 35 normality also treating as non normal 


###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(repfit30$Offspring.count.20D ~ repfit30$Sex, ylab="20D offspring count", xlab="Temperature")
boxplot(repfit35$Offspring.count.20D ~ repfit35$Sex, ylab="20D offspring count", xlab="Temperature")
boxplot(repfit38$Offspring.count.20D ~ repfit38$Sex, ylab="20D offspring count", xlab="Temperature")
boxplot(repfit39$Offspring.count.20D ~ repfit39$Sex, ylab="20D offspring count", xlab="Temperature")
boxplot(repfit40$Offspring.count.20D ~ repfit40$Sex, ylab="20D offspring count", xlab="Temperature")
boxplot(repfit42$Offspring.count.20D ~ repfit42$Sex, ylab="20D offspring count", xlab="Temperature")

# variances look similar at lower temperatures, but males larger at higher temperatures



########### Homogeneity of Variances - Passed in all groups except 30 and 42
#! need library(car)
### repfitdick 30
bartlett.test(repfit30$Offspring.count.20D ~ repfit30$Sex) # Bartlett's K-squared = 7.9656, df = 1, p-value = 0.004768
fligner.test(repfit30$Offspring.count.20D ~ repfit30$Sex) # Fligner-Killeen:med chi-squared 7.4207, df = 1, p-value = 0.006448
leveneTest(repfit30$Offspring.count.20D ~ repfit30$Sex)   #Df F value Pr(>F)  1  7.4809 0.006978 **
### repfitdick 35
bartlett.test(repfit35$Offspring.count.20D ~ repfit35$Sex) # Bartlett's K-squared = 0.21834, df = 1, p-value = 0.6403
fligner.test(repfit35$Offspring.count.20D ~ repfit35$Sex) # Fligner-Killeen:med chi-squared = 0.81114, df = 1, p-value = 0.3678
leveneTest(repfit35$Offspring.count.20D ~ repfit35$Sex)   #Df F value Pr(>F) 1  0.7927 0.3766
### repfitdick 38
bartlett.test(repfit38$Offspring.count.20D ~ repfit38$Sex) # Bartlett's K-squared = 0.35815, df = 1, p-value = 0.5495
fligner.test(repfit38$Offspring.count.20D ~ repfit38$Sex) # Fligner-Killeen:med chi-squared = 1.404, df = 1, p-value = 0.2361
leveneTest(repfit38$Offspring.count.20D ~ repfit38$Sex)   #Df F value Pr(>F) 1  0.2297 0.6329
### repfitdick 39
bartlett.test(repfit39$Offspring.count.20D ~ repfit39$Sex) # Bartlett's K-squared = 1.6853, df = 1, p-value = 0.1942
fligner.test(repfit39$Offspring.count.20D ~ repfit39$Sex) # Fligner-Killeen:med chi-squared = 1.6991, df = 1, p-value = 0.1924
leveneTest(repfit39$Offspring.count.20D ~ repfit39$Sex)   #Df F value Pr(>F) 1  1.1588 0.2851
### repfitdick 40
bartlett.test(repfit40$Offspring.count.20D ~ repfit40$Sex) # Bartlett's K-squared = 1.0699, df = 1, p-value = 0.301
fligner.test(repfit40$Offspring.count.20D ~ repfit40$Sex) # Fligner-Killeen:med chi-squared = 0.95458, df = 1, p-value = 0.3286
leveneTest(repfit40$Offspring.count.20D ~ repfit40$Sex)   #Df F value Pr(>F) 1  1.6879 0.1976
### repfitdick 42
bartlett.test(repfit42$Offspring.count.20D ~ repfit42$Sex) # Bartlett's K-squared = 1.4646, df = 1, p-value = 0.2262
fligner.test(repfit42$Offspring.count.20D ~ repfit42$Sex) # Fligner-Killeen:med chi-squared = 5.8057, df = 1, p-value = 0.01597
leveneTest(repfit42$Offspring.count.20D ~ repfit42$Sex)   #Df F value Pr(>F) 1  6.2658 0.01472 *







#################################################################################################################################################################### WITHIN SEX ACROSS TEMP CORRELATION OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 
# As the data is both not normal (apart from day 5) but is homogenous in variance in groups 

## Male
cor.test(repfitmale$Offspring.count.20D, repfitmale$Temperature.oCnum, method = "spearman")
# S = 5500500, p-value = 4.877e-08
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
# rho 
# -0.3120675 


## Female
cor.test(repfitfemale$Offspring.count.20D, repfitfemale$Temperature.oCnum, method = "spearman")
# S = 2472200, p-value = 0.4253
# alternative hypothesis: true rho is not equal to 0
# sample estimates:
#      rho 
# 0.05064674 



#Note  for picking out levels within factor
#wilcox.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="30"], hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="42"])
#wilcox.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="30"], hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="double42"])
#wilcox.test(hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="42"], hardeningsimpleinR1$Offspring.count.20D[hardeningsimpleinR1$Temperature.oC=="double42"])

#################################################################################################################################################################### WITHIN TEMP ACROSS SEX CORRELATION OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 

### repfitdick 30
wilcox.test(repfit30$Offspring.count.20D ~ repfit30$Sex, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 3484.5, p-value = 0.0594 # there is a significant difference between groups

### repfitdick 35
wilcox.test(repfit35$Offspring.count.20D ~ repfit35$Sex, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 539, p-value = 0.7874 # there is no significant difference between groups

### repfitdick 38
wilcox.test(repfit38$Offspring.count.20D ~ repfit38$Sex, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 1668.5, p-value = 4.276e-07 # there is a significant difference between groups

### repfitdick 39
wilcox.test(repfit39$Offspring.count.20D ~ repfit39$Sex, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 946.5, p-value = 0.05188 # there is no significant difference between groups

### repfitdick 40
wilcox.test(repfit40$Offspring.count.20D ~ repfit40$Sex, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 1291.5, p-value = 3.19e-05 # there is a significant difference between groups

### repfitdick 42
wilcox.test(repfit42$Offspring.count.20D ~ repfit42$Sex, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 953, p-value = 1.194e-05 # there is a significant difference between groups

#2) Convert data to ranks and submitt to a welch anova and tukey HSD Ruxton (2006)
### repfitdick 30
repfit30$rank<-rank(repfit30$Offspring.count.20D) # ranking all data inter-group by ascending count 
t.test(repfit30$rank~ repfit30$Sex, var.equal=FALSE)
# t = 1.8942, df = 144.32, p-value = 0.0602

### repfitdick 42
repfit42$rank<-rank(repfit42$Offspring.count.20D) # ranking all data inter-group by ascending count 
t.test(repfit42$rank~ repfit42$Sex, var.equal=FALSE)
# t = 5.2598, df = 62.865, p-value = 1.845e-06



describeBy(repfit$Offspring.count.20D, list(repfit$Sex,repfit$Temperature.oC),mat=TRUE) 
#      item group1 group2 vars  n     mean       sd median   trimmed      mad min max range       skew    kurtosis        se
# X11     1 Female     30    1 75 210.8800 77.24495  216.0 212.40984  80.0604  16 368   352 -0.1799377 -0.45099283  8.919479
# X12     2   Male     30    1 79 191.7975 55.70835  188.0 190.30769  53.3736  72 339   267  0.3147822 -0.07760356  6.267679
# X13     3 Female     35    1 34 254.9412 51.77629  260.0 258.53571  49.6671 116 359   243 -0.6111854  0.11509463  8.879561
# X14     4   Male     35    1 33 258.3333 56.23537  266.0 260.33333  62.2692 147 355   208 -0.3251280 -0.96893641  9.789321
# X15     5 Female     38    1 43 259.6744 73.60439  264.0 267.08571  51.8910   0 372   372 -1.7567568  4.55386527 11.224564
# X16     6   Male     38    1 48 187.1042 67.25127  195.0 189.45000  62.2692  15 311   296 -0.3748840 -0.37159696  9.706885
# X17     7 Female     39    1 35 227.8000 88.50916  224.0 233.96552  87.4734   9 424   415 -0.5833396  0.47091275 14.960778
# X18     8   Male     39    1 43 193.7674 71.57309  201.0 201.28571  63.7518   0 316   316 -1.0986155  1.36131966 10.914793
# X19     9 Female     40    1 35 234.1714 84.80028  276.0 240.62069  69.6822   8 363   355 -0.6858006 -0.45920246 14.333863
# X110   10   Male     40    1 48 150.8958 71.92823  164.5 155.57500  57.0801   0 268   268 -0.7950745 -0.03240679 10.381945
# X111   11 Female     42    1 28 210.0000 74.79453  220.0 217.54167  44.4780   0 348   348 -1.1157936  1.58404538 14.134838
# X112   12   Male     42    1 42 106.7857 92.98663  114.5  99.73529 117.8667   0 316   316  0.3396299 -1.10350277 14.348149









#################################################################################################################################################################### NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################



############# ! NATURE COMM MODEL SELECTION ###################



#### Poisson family error structures
# As data is very right skewed count, fitting normal distibution does not give normal and homogenity of variance in residuals 

### ! several groups of temeperature variables, can be treated as factor or continuous (Field SPSS Book, Asking questions in biology book, Handbook of biological statstics); trying both to see the effect

### Temperature as factor 

# Creating a global model
globalmodposs<-glm(Offspring.count.20D ~ Temperature.oC*Sex, poisson(link = "log"), data=repfit)
globalmodpossID<-glm(Offspring.count.20D ~ Temperature.oC*Sex, poisson(link = "identity"), data=repfit)
globalmodpossRT<-glm(Offspring.count.20D ~ Temperature.oC*Sex, poisson(link = "sqrt"), data=repfit) 

#########################

summary(globalmodposs); summary(globalmodpossID); summary(globalmodpossRT) # No R^2, AIC given
# AIC: 20666, AIC: 20666, AIC: 20666 # link change seem to do little
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.2055347
pseudoR<-(globalmodpossID$null.deviance-globalmodpossID$deviance) / globalmodpossID$null.deviance # (thomas et al., 2015)
pseudoR # 0.2055347
pseudoR<-(globalmodpossRT$null.deviance-globalmodpossRT$deviance) / globalmodpossRT$null.deviance # (thomas et al., 2015)
pseudoR # 0.2055347
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodposs))+((2*2*(2+1)/(543-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 22816.91
qAICc<-(-2*logLik(globalmodposs)/35.99749)+((2*2*(2+1)/(543-2-1))); qAICc # 633.8688


## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
theta<-globalmodposs$deviance/globalmodposs$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 35.99749, massively overdispersed is >1 is overdispersion. VAR.S 7180., U 207.5696 
#! library(AER) alternative test
var(repfit$Offspring.count.20D) #6873.245
mean(repfit$Offspring.count.20D) #203.3462
repfit(globalmodposs) # dispersion  27.77385





### Temperature as numeric 

globalmodposs<-glm(Offspring.count.20D ~ Temperature.oCnum*Sex, poisson(link = "log"), data=repfit)
globalmodpossID<-glm(Offspring.count.20D ~ Temperature.oCnum*Sex, poisson(link = "identity"), data=repfit) # error
globalmodpossRT<-glm(Offspring.count.20D ~ Temperature.oCnum*Sex, poisson(link = "sqrt"), data=repfit) 


#########################

summary(globalmodposs); summary(globalmodpossID); summary(globalmodpossRT) # No R^2, AIC given
# AIC: 25158,AIC: 22841,AIC: 25100 # link change seem to do little
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.1085657
pseudoR<-(globalmodpossID$null.deviance-globalmodpossID$deviance) / globalmodpossID$null.deviance # (thomas et al., 2015)
pseudoR # 0.2055347
pseudoR<-(globalmodpossRT$null.deviance-globalmodpossRT$deviance) / globalmodpossRT$null.deviance # (thomas et al., 2015)
pseudoR # 0.1109537
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodposs))+((2*2*(2+1)/(543-2-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 25149.96
qAICc<-(-2*logLik(globalmodposs)/90.9372)+((2*1*(1+1)/(84-1-1))); qAICc # 39.79169

## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
theta<-globalmodposs$deviance/globalmodposs$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 39.79169, massively overdispersed is >1 is overdispersion. VAR.S 7180.45, U 207.5696 
#! library(AER) alternative test
var(repfitdick$Offspring.count.20D) #6873.245
mean(repfitdick$Offspring.count.20D) #203.3462
dispersiontest(globalmodposs) # dispersion  30.45573


############################

### Temperature as factor 

library(MASS)
globalmodnegbin<-glm.nb(Offspring.count.20D ~ Temperature.oC*Sex, link = "log", data=repfit, control=glm.control(maxit=100)) # error not working very well
# control=glm.control(maxit=100) increasing iteration limit doesnt help
globalmodposs<-glm(Offspring.count.20D ~ Temperature.oC*Sex, poisson(link = "log"), data=repfit)
globalmodquasiposs<-glm(Offspring.count.20D ~ Temperature.oC*Sex, quasipoisson(link = "log"), data=repfit)
globalmodgauss<-glm(Offspring.count.20D ~ Temperature.oC*Sex, gaussian(link = "identity"), data=repfit) 

theta<-globalmodnegbin$deviance/globalmodnegbin$df.residual; theta   # dispersion  35.99749
par(mfrow=c(2,2)); plot(globalmodnegbin);par(mfrow=c(1,1)) #37.03054 # cooks outliers, left QQ plot pull 
pseudoR<-(globalmodnegbin$null.deviance-globalmodnegbin$deviance) / globalmodnegbin$null.deviance # (thomas et al., 2015)
pseudoR # 0.2055345
summary(globalmodnegbin); AIC(globalmodnegbin) # 22843

theta<-globalmodposs$deviance/globalmodposs$df.residual; theta   # dispersion  35.99749
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.2055347
summary(globalmodposs); AIC(globalmodposs)


## poisson and negative binomial no different, hasnt worked, only a few 0s. 
list(repfitdick$Offspring.count.20D < 0)
table(repfitdick$Offspring.count.20D) # 20 0s in 


par(mfrow=c(2,2)); plot(globalmodgauss);par(mfrow=c(1,1)) # distirbution looks quite normal 
devresid<-resid(globalmodgauss, type = "deviance"); hist(devresid)
pseudoR<-(globalmodgauss$null.deviance-globalmodgauss$deviance) / globalmodgauss$null.deviance # (thomas et al., 2015)
pseudoR # 0.251578
summary(globalmodgauss) # explains more variation too

theta<-globalmodquasiposs$deviance/globalmodquasiposs$df.residual; theta   # dispersion  35.99749
par(mfrow=c(2,2)); plot(globalmodquasiposs);par(mfrow=c(1,1))
pseudoR<-(globalmodquasiposs$null.deviance-globalmodquasiposs$deviance) / globalmodquasiposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.2055347
summary(globalmodquasiposs); AIC(globalmodquasiposs) # qusai poisson looks more promising with residuasl and output p values




### Temperature as numeric 

globalmodnegbin<-glm.nb(Offspring.count.20D ~ Temperature.oCnum*Sex, link = "log", data=repfit, control=glm.control(maxit=100)) # error not working very well
# control=glm.control(maxit=100) increasing iteration limit doesnt help
globalmodposs<-glm(Offspring.count.20D ~ Temperature.oCnum*Sex, poisson(link = "log"), data=repfit)
globalmodquasiposs<-glm(Offspring.count.20D ~ Temperature.oCnum*Sex, quasipoisson(link = "log"), data=repfit)
globalmodgauss<-glm(Offspring.count.20D ~ Temperature.oCnum*Sex, gaussian(link = "identity"), data=repfit) 

theta<-globalmodnegbin$deviance/globalmodnegbin$df.residual; theta   # dispersion  39.79169
par(mfrow=c(2,2)); plot(globalmodnegbin);par(mfrow=c(1,1)) #40.28075 # cooks outliers, left QQ plot pull , looks more extreme than factor 
pseudoR<-(globalmodnegbin$null.deviance-globalmodnegbin$deviance) / globalmodnegbin$null.deviance # (thomas et al., 2015)
pseudoR # 0.1085657 # explains less var than factor
summary(globalmodnegbin); AIC(globalmodnegbin) # 25160

theta<-globalmodposs$deviance/globalmodposs$df.residual; theta   # dispersion  39.79169 more overdispersed than factor
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.1085657
summary(globalmodposs); AIC(globalmodposs) # 25157.94


par(mfrow=c(2,2)); plot(globalmodgauss);par(mfrow=c(1,1)) # distirbution looks quite normal , bit of funneling in residuals
devresid<-resid(globalmodgauss, type = "deviance"); hist(devresid)
pseudoR<-(globalmodgauss$null.deviance-globalmodgauss$deviance) / globalmodgauss$null.deviance # (thomas et al., 2015)
pseudoR # 0.1387511
summary(globalmodgauss) # explains more variation too #6266.5

theta<-globalmodquasiposs$deviance/globalmodquasiposs$df.residual; theta   # dispersion  39.79169
par(mfrow=c(2,2)); plot(globalmodquasiposs);par(mfrow=c(1,1))
pseudoR<-(globalmodquasiposs$null.deviance-globalmodquasiposs$deviance) / globalmodquasiposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.1085657
summary(globalmodquasiposs); AIC(globalmodquasiposs) # qusai poisson looks more promising with residuasl and output p values








### assumption checks, recommendation of residual dev (contribution of each obs to resid dev) rather than pearson (Thomas et al., 2015)
globalmodposs<-glm(Offspring.count.20D ~ Temperature.oC*Sex.1, poisson(link = "log"), data=repfitdick)
globalmodquasiposs<-glm(Offspring.count.20D ~ Temperature.oC*Sex.1, quasipoisson(link = "log"), data=repfitdick)
summary(globalmodposs)
summary(globalmodquasiposs)

# 1) Errors normally distributed? - NOT NECASSARY BUT NOT IMPROVED

# poisson
devresid<-resid(globalmodposs, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))

# quasi
devresid<-resid(globalmodquasiposs, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodquasiposs);par(mfrow=c(1,1))
# some negative skew and pull down on left of qq

# 2) Homogenous/homoscedasticity variance of residuals - NOT NECASSARY BUT NOT IMPROVED

# poisson
devresid<-resid(globalmodposs, type = "deviance")
plot(devresid ~ globalmodposs$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~repfit$Temperature.oC) 
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))

# quasi
devresid<-resid(globalmodquasiposs, type = "deviance")
plot(devresid ~ globalmodposs$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~repfit$Temperature.oC)
par(mfrow=c(2,2)); plot(globalmodquasiposs);par(mfrow=c(1,1))
#  plot on trend or wedging, test failed

# 3) Independences of independent variables - NO PATTERN
# Only one independent variable
plot(devresid~repfit$Temperature.oC)
plot(devresid~repfit$Sex)

repfit$Sex.1num<-repfit$Sex; repfit$Sex.1num<-  as.numeric(repfit$Sex)
plot(repfit$Sex.1num~repfit$Temperature.oCnum)
cor.test(repfit$Sex.1num, repfit$Temperature.oCnum, method = "spearman")
# no correlation >0.3/ pattern

# 4) No serial auto-correlation with time/space - #ALL DATA COLLECTED AT ONE TIMEPOINT
#! need library(car)
# durbinWatsonTest(globalmodquasiposs)  # passed

# 5) No bias by unduly influential datapoints - YES

# poisson
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
influence<-influence.measures(globalmodposs); summary(influence) 

# quasi
par(mfrow=c(2,2)); plot(globalmodquasiposs);par(mfrow=c(1,1))
influence<-influence.measures(globalmodquasiposs); summary(influence) 
     # 50+ cases in poisson with larger cooks distance fixed with quasi poisson 

# 6) Independent variables measured without error - BEST OF ABILITY

## Overdispersion re-check
theta<-globalmodquasiposs$deviance/globalmodquasiposs$df.residual; theta # 35.99749
#AICc<-(-2*logLik(globalmodquasiposs))+((2*2*(2+1)/(481-2-1))); AICc # 688
# model still overdispersed



############# ! NATURE COMM MODEL REFINEMENT ###################


#! Quasi poisson is improving assumpations of model in parcicular cooks distance, it is also producing more reliable significance values; negative binomila is identical to poisson with iteration limit reached
# models with factors levels look better because their coefficient estiamtes are closer, they have less overdispersion and explain a greater proporiton of the variation 
# there are only 6 group points unevenly spaced on regression line  (Field book )


globalmodquasiposs<-glm(Offspring.count.20D ~ Temperature.oC*Sex, quasipoisson(link = "log"), data=repfit)
pseudoR<-(globalmodquasiposs$null.deviance-globalmodquasiposs$deviance) / globalmodquasiposs$null.deviance; pseudoR # 0.2055347

# Minimum adequate potentials 
globalmodquasiposs2<-glm(Offspring.count.20D ~ Temperature.oC+Sex, quasipoisson(link = "log"), data=repfit)
pseudoR<-(globalmodquasiposs2$null.deviance-globalmodquasiposs2$deviance) / globalmodquasiposs2$null.deviance; pseudoR # 0.1580346

globalmodquasiposs3<-glm(Offspring.count.20D ~ Temperature.oC, quasipoisson(link = "log"), data=repfit)
pseudoR<-(globalmodquasiposs3$null.deviance-globalmodquasiposs3$deviance) / globalmodquasiposs3$null.deviance; pseudoR # 0.09597719

globalmodquasiposs4<-glm(Offspring.count.20D ~ Sex, quasipoisson(link = "log"), data=repfit)
pseudoR<-(globalmodquasiposs4$null.deviance-globalmodquasiposs4$deviance) / globalmodquasiposs4$null.deviance; pseudoR # 0.07106971


## Null model
nullmod<-glm(Offspring.count.20D ~ 1, quasipoisson(link = "log"), data=repfit) # creating null of just intercept (and random in glmms)
pseudoR<-(nullmod$null.deviance-nullmod$deviance) / nullmod$null.deviance; pseudoR # (thomas et al., 2015) #  0



# liklihood ratio test
library(lmtest)

############# ! NATURE COMM MODEL SIGNIFICANCE ###################

anova(globalmodquasiposs, globalmodquasiposs2, test = "Chi")
# Model 1: Offspring.count.20D ~ Temperature.oC * Sex
# Model 2: Offspring.count.20D ~ Temperature.oC + Sex
# Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
# 1       531      19115                          
# 2       536      20258 -5  -1142.8 1.337e-07 ***
# intercept inclusion model explains significantly more variation so is retained

anova(globalmodquasiposs, test = "Chi")
#                           Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
     # NULL                                 542      24060              
     # Temperature.oC      5   2309.2       537      21751 4.475e-16 ***
     # Sex                 1   1493.1       536      20258 4.150e-13 ***
     # Temperature.oC:Sex  5   1142.8       531      19115 1.337e-07 ***

drop1(globalmodquasiposs, test = "Chi")
#                     Df Deviance scaled dev.  Pr(>Chi)    
# <none>                    19115                          
# Temperature.oC:Sex  5    20258      40.239 1.337e-07 ***

drop1(globalmodquasiposs2, test = "Chi")
#                 Df Deviance scaled dev.  Pr(>Chi)    
# <none>               20258                          
# Temperature.oC  5    22350      71.693 4.550e-14 ***
# Sex             1    21751      51.160 8.514e-13 ***



 anova(globalmodquasiposs, nullmod, test = "Chi")
 # Analysis of Deviance Table
 # 
 # Model 1: Offspring.count.20D ~ Temperature.oC * Sex
 # Model 2: Offspring.count.20D ~ 1
 # Resid. Df Resid. Dev  Df Deviance  Pr(>Chi)    
 # 1       531      19115                           
 # 2       542      24060 -11  -4945.1 < 2.2e-16 ***

 
 summary(globalmodquasiposs)
# Call:
#      glm(formula = Offspring.count.20D ~ Temperature.oC * Sex, family = quasipoisson(link = "log"), 
#          data = repfitdick)
# 
# Deviance Residuals: 
#      Min       1Q   Median       3Q      Max  
# -22.789   -2.979    0.374    3.309   16.348  
# 
# Coefficients:
#                            Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)                 5.351289   0.042376 126.280  < 2e-16 ***
#      Temperature.oC35            0.189744   0.071220   2.664  0.00795 ** 
#      Temperature.oC38            0.208139   0.065874   3.160  0.00167 ** 
#      Temperature.oC39            0.077179   0.073198   1.054  0.29219    
#      Temperature.oC40            0.104764   0.072533   1.444  0.14923    
#      Temperature.oC42           -0.004182   0.081400  -0.051  0.95905    
#      Sex.1Male                  -0.094849   0.060582  -1.566  0.11803    
#      Temperature.oC35:Sex.1Male  0.108067   0.101382   1.066  0.28694    
#      Temperature.oC38:Sex.1Male -0.232914   0.096831  -2.405  0.01650 *  
#      Temperature.oC39:Sex.1Male -0.066960   0.103156  -0.649  0.51654    
#      Temperature.oC40:Sex.1Male -0.344614   0.105151  -3.277  0.00112 ** 
#      Temperature.oC42:Sex.1Male -0.581434   0.121790  -4.774 2.34e-06 ***
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# (Dispersion parameter for quasipoisson family taken to be 28.40155)
# 
# Null deviance: 24060  on 542  degrees of freedom
# Residual deviance: 19115  on 531  degrees of freedom
# AIC: NA
# 
# Number of Fisher Scoring iterations: 5



########## checking parameter estimates

# female looks ok
exp(5.351289) # 210.
exp(5.351289+0.189744) # 210.8799
exp(5.351289+0.208139) # 254.9412
exp(5.351289+0.077179) # 259.6743
exp(5.351289+0.104764) # 234.1713
exp(5.351289-0.004182) # 209.9999
# male a bit off from 35 within SE
exp(5.351289-0.094849) # 191.7975
exp(5.351289+0.189744-0.094849+0.108067) # 258.3334
exp(5.351289+0.208139-0.094849-0.232914) # 187.1041
exp(5.351289+0.077179-0.094849-0.066960) # 193.7675
exp(5.351289+0.104764-0.094849-0.344614) # 150.8959
exp(5.351289-0.004182-0.094849-0.581434) # 106.7857

describeBy(repfit$Offspring.count.20D, list(repfit$Sex,repfit$Temperature.oC),mat=TRUE) 
#      item group1 group2 vars  n     mean       sd median   trimmed      mad min max range       skew    kurtosis
# X11     1 Female     30    1 75 210.8800 77.24495  216.0 212.40984  80.0604  16 368   352 -0.1799377 -0.45099283
# X12     2   Male     30    1 79 191.7975 55.70835  188.0 190.30769  53.3736  72 339   267  0.3147822 -0.07760356
# X13     3 Female     35    1 34 254.9412 51.77629  260.0 258.53571  49.6671 116 359   243 -0.6111854  0.11509463
# X14     4   Male     35    1 33 258.3333 56.23537  266.0 260.33333  62.2692 147 355   208 -0.3251280 -0.96893641
# X15     5 Female     38    1 43 259.6744 73.60439  264.0 267.08571  51.8910   0 372   372 -1.7567568  4.55386527
# X16     6   Male     38    1 48 187.1042 67.25127  195.0 189.45000  62.2692  15 311   296 -0.3748840 -0.37159696
# X17     7 Female     39    1 35 227.8000 88.50916  224.0 233.96552  87.4734   9 424   415 -0.5833396  0.47091275
# X18     8   Male     39    1 43 193.7674 71.57309  201.0 201.28571  63.7518   0 316   316 -1.0986155  1.36131966
# X19     9 Female     40    1 35 234.1714 84.80028  276.0 240.62069  69.6822   8 363   355 -0.6858006 -0.45920246
# X110   10   Male     40    1 48 150.8958 71.92823  164.5 155.57500  57.0801   0 268   268 -0.7950745 -0.03240679
# X111   11 Female     42    1 28 210.0000 74.79453  220.0 217.54167  44.4780   0 348   348 -1.1157936  1.58404538
# X112   12   Male     42    1 42 106.7857 92.98663  114.5  99.73529 117.8667   0 316   316  0.3396299 -1.10350277
# se
# X11   8.919479
# X12   6.267679
# X13   8.879561
# X14   9.789321
# X15  11.224564
# X16   9.706885
# X17  14.960778
# X18  10.914793
# X19  14.333863
# X110 10.381945
# X111 14.134838
# X112 14.348149

tapply(repfit$Offspring.count.20D,list(repfit$Sex,repfit$Temperature.oC),mean)
#        30       35       38       39       40       42
# Female 210.8800 254.9412 259.6744 227.8000 234.1714 210.0000
# Male   191.7975 258.3333 187.1042 193.7674 150.8958 106.7857

############# ! NATURE COMM MODEL SIGNIFICANCE ###################

############# Post hoc

#! cannot get working
library(lsmeans)


lsmeans(globalmodquasiposs, pairwise~Sex*Temperature.oC, adjust="tukey")
# contrast                  estimate         SE df z.ratio p.value
# Female,30 - Male,30    0.094849288 0.06058210 NA   1.566  0.9214 ,
# Female,30 - Female,35 -0.189743587 0.07122041 NA  -2.664  0.2443  =
# Female,30 - Male,35   -0.202961489 0.07160523 NA  -2.834  0.1655  
# Female,30 - Female,38 -0.208139359 0.06587354 NA  -3.160  0.0692  =
# Female,30 - Male,38    0.119623749 0.07041415 NA   1.699  0.8692 
# Female,30 - Female,39 -0.077178800 0.07319812 NA  -1.054  0.9964  =
# Female,30 - Male,39    0.084630565 0.07214208 NA   1.173  0.9910  
# Female,30 - Female,40 -0.104764197 0.07253305 NA  -1.444  0.9547  =
# Female,30 - Male,40    0.334699498 0.07561078 NA   4.427  0.0006 <---
# Female,30 - Female,42  0.004181721 0.08139991 NA   0.051  1.0000  =
# Female,30 - Male,42    0.680465095 0.09015688 NA   7.548  <.0001 <---
# Male,30 - Female,35   -0.284592875 0.07177075 NA  -3.965  0.0042 <---
# Male,30 - Male,35     -0.297810778 0.07215264 NA  -4.128  0.0022  = <--
# Male,30 - Female,38   -0.302988647 0.06646817 NA  -4.558  0.0003 <---
# Male,30 - Male,38      0.024774460 0.07097075 NA   0.349  1.0000  =
# Male,30 - Female,39   -0.172028088 0.07373371 NA  -2.333  0.4527  
# Male,30 - Male,39     -0.010218724 0.07268545 NA  -0.141  1.0000  =
# Male,30 - Female,40   -0.199613485 0.07307351 NA  -2.732  0.2105
# Male,30 - Male,40      0.239850210 0.07612939 NA   3.151  0.0711  =
# Male,30 - Female,42   -0.090667568 0.08188186 NA  -1.107  0.9944
# Male,30 - Male,42      0.585615807 0.09059226 NA   6.464  <.0001  = <---
# Female,35 - Male,35   -0.013217903 0.08129058 NA  -0.163  1.0000 ,
# Female,35 - Female,38 -0.018395772 0.07629006 NA  -0.241  1.0000
# Female,35 - Male,38    0.309367335 0.08024340 NA   3.855  0.0065 <---
# Female,35 - Female,39  0.112564787 0.08269713 NA   1.361  0.9706
# Female,35 - Male,39    0.274374151 0.08176387 NA   3.356  0.0380
# Female,35 - Female,40  0.084979390 0.08210903 NA   1.035  0.9969
# Female,35 - Male,40    0.524443085 0.08484009 NA   6.182  <.0001 <---
# Female,35 - Female,42  0.193925307 0.09003774 NA   2.154  0.5835
# Female,35 - Male,42    0.870208682 0.09802608 NA   8.877  <.0001 <---
# Male,35 - Female,38   -0.005177869 0.07664943 NA  -0.068  1.0000 
# Male,35 - Male,38      0.322585238 0.08058514 NA   4.003  0.0036
# Male,35 - Female,39    0.125782690 0.08302878 NA   1.515  0.9369
# Male,35 - Male,39      0.287592054 0.08209929 NA   3.503  0.0233
# Male,35 - Female,40    0.098197293 0.08244304 NA   1.191  0.9897
# Male,35 - Male,40      0.537660987 0.08516339 NA   6.313  <.0001 <---
# Male,35 - Female,42    0.207143210 0.09034244 NA   2.293  0.4816
# Male,35 - Male,42      0.883426584 0.09830603 NA   8.986  <.0001 <---
# Female,38 - Male,38    0.327763107 0.07553793 NA   4.339  0.0009 ,
# Female,38 - Female,39  0.130960559 0.07813956 NA   1.676  0.8794
# Female,38 - Male,39    0.292769923 0.07715119 NA   3.795  0.0082
# Female,38 - Female,40  0.103375162 0.07751690 NA   1.334  0.9748
# Female,38 - Male,40    0.542838857 0.08040408 NA   6.751  <.0001 <---
# Female,38 - Female,42  0.212321079 0.08587066 NA   2.473  0.3573
# Female,38 - Male,42    0.888604454 0.09421300 NA   9.432  <.0001 <---
# Male,38 - Female,39   -0.196802548 0.08200379 NA  -2.400  0.4059
# Male,38 - Male,39     -0.034993184 0.08106255 NA  -0.432  1.0000
# Male,38 - Female,40   -0.224387945 0.08141068 NA  -2.756  0.1991
# Male,38 - Male,40      0.215075749 0.08416440 NA   2.555  0.3055
# Male,38 - Female,42   -0.115442028 0.08940135 NA  -1.291  0.9804
# Male,38 - Male,42      0.560841347 0.09744187 NA   5.756  <.0001 <---
# Female,39 - Male,39    0.161809364 0.08349221 NA   1.938  0.7352 ,
# Female,39 - Female,40 -0.027585397 0.08383025 NA  -0.329  1.0000
# Female,39 - Male,40    0.411878298 0.08650699 NA   4.761  0.0001 <---
# Female,39 - Female,42  0.081360520 0.09161012 NA   0.888  0.9992
# Female,39 - Male,42    0.757643895 0.09947226 NA   7.617  <.0001 <---
# Male,39 - Female,40   -0.189394761 0.08290975 NA  -2.284  0.4878
# Male,39 - Male,40      0.250068933 0.08561527 NA   2.921  0.1334
# Male,39 - Female,42   -0.080448844 0.09076854 NA  -0.886  0.9993
# Male,39 - Male,42      0.595834530 0.09869775 NA   6.037  <.0001 <---
# Female,40 - Male,40    0.439463695 0.08594496 NA   5.113  <.0001 , <---
# Female,40 - Female,42  0.108945917 0.09107959 NA   1.196  0.9894 
# Female,40 - Male,42    0.785229292 0.09898388 NA   7.933  <.0001 <---
# Male,40 - Female,42   -0.330517777 0.09354912 NA  -3.533  0.0210
# Male,40 - Male,42      0.345765597 0.10126083 NA   3.415  0.0313
# Female,42 - Male,42    0.676283374 0.10565373 NA   6.401  <.0001 , <---






#### HEATWAVE REPRODUCTIVE FITNESS SUMMARY ######################################################################################
#Barnard et al., 2007 and Thomas 2015 as references
#---------Hypothesis---------------- 
# The relative effect of a single 5d 42oC heatwave on the 20D reproductive fitness (adult offspring counts) of adult tribolium males and females

#Response variable (dependent):           20D reproductive fitness; (count +/- positive skew) 

#Global Fixed variables (independent):          
#    Categorical                          (temperature 30, 35, 38. 39, 40, 42), sex 
#    Covariates                           (temperautre 30, 35, 38. 39, 40, 42)
#    Non-linear terms                     NA
#    Interactions                         temperature and sex

#Random terms:                            

#---------Misc-------------------

# Simple analysis:                       2 sample t-tests within temperature or correlations across but not normal/equal var     
# Non-para:                              mann-whitney U and spearnans 
# Plot:                                  Notched Box / Univariate scatter  

#------Model report Simple stats-------------------
# Most data negative skew skew and heterogeniety of variances. unable to transform

# cor.test(repfitdickmale$Offspring.count.20D, repfitdickmale$Temperature.oCnum, method = "spearman")
# S = 5500500, p-value = 4.877e-08
# rho -0.31 

# cor.test(repfitdickfemale$Offspring.count.20D, repfitdickfemale$Temperature.oCnum, method = "spearman")
# S = 2472200, p-value = 0.4253
# rho 0.05

#30oC
# W = 3484.5, p-value = 0.0594 # there is no significant difference between groups
# t = 1.8942, df = 144.32, p-value = 0.0602

#35oC
# W = 539, p-value = 0.7874 # there is no significant difference between groups

# 38oC
# W = 1668.5, p-value = 4.276e-07 # there is a significant difference between groups

# 39oC
# W = 946.5, p-value = 0.05188 # there is no significant difference between groups

# 40oC
# W = 1291.5, p-value = 3.19e-05 # there is a significant difference between groups

# 42oC
# W = 953, p-value = 1.194e-05 # there is a significant difference between groups
# t = 5.2598, df = 62.865, p-value = 1.845e-06



#---------GLM Model refinement-------------------

# # Overdispersion parameter:                35.99749-27.77385  for poisson (log), large variance 6873.245 to 203.3462



# Error family (+link function):           quasi poisson (logit); negative binomial failed to iterate

# Model refinement method(s):              AICc comparison, anova(), drop1, residual investigation 

# Most plausible/final model(s):           globalmodquasiposs<-glm(Offspring.count.20D ~ Temperature.oC*Sex.1, quasipoisson(link = "log"), data=repfitdick)


# AIC(c)                                   NA for quasipois 
# Model R^2/Adj R^2:                       NA for poiss, quasi-p and -ve binom
# pseudoR^2/deviance:                      0.2055347 for factor, 0.1085657 for numeric
# Model significance:                      from analysis of deviance anova(Minimum adequate model , null): 
# Factor
#X^2= 4945.1, df= 3, 539 / 11, 531 , p<0.001

# Numeric  
#X^2= 2612.1, df= 3, 539 / 3, 539 , p<0.001


#--------Model assumptions------------------

#Residuals normally distributed:               NOT NECASSARY BUT slightly IMPROVED
#    hist of stdzd resids (u=0, sd=1):         symmetrical bell shaped 
#    p1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal  
#    p2-Q-Q plot w/ stdzd resids:              straight line 
#    p3-scale-location w/sqrt resid~fitted:    no pattern
#    s-w/k-s resid tests:                      NS
# # some negtive skew and pull down on left of qq

#Homogeneity of variance:                      NOT NECASSARY BUT NOT slightly IMPROVED
#    P1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal
#    sresid~ind vars:                          no pattern
#    fligner/levenes tests:                    NS
#  plot no trend or wedging, test failed. better in factor 

#Independence of variables (no-collinearity):  NO PATTERN
#    pairwise scatterplots and correlations:   r<0.3
#    sresid~ind vars:                          no collinearity
#    variance inflation factors (VIFs):        values <3
# no correlation >0.3/ pattern

#No serial auto-correlation                     YES AS ONLY A SINGLE TIMEPOINT
#    durbinwatson test:                         NS
#    Auto-cor function (ACF):                   <threshold
#  test passed

#No bias by unduly influential datapoints:      YES
#    P4-stdzd resids~leverage:                  <cooks threshold eg. >1 or sample-size/4
#    leverage                                   <2p/n
# 50+ cases in poisson with larger cooks distance fixed with quasi poisson 

#Independent variables measured without error:  TO BEST OF ABILITY


#-------Model report-----------------------
#term                   peramter+/-se    test-stat(wald z)   d.f.      P


## factor
# anova()/drop1()
#                      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)   
# NULL                                   542      24060              
# Temperature.oC        5   2309.2       537      21751 4.475e-16 ***
# Sex.1                 1   1493.1       536      20258 4.150e-13 ***
# Temperature.oC:Sex.1  5   1142.8       531      19115 1.337e-07 ***

## Coefficients:
#                                  Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)                 5.351289   0.042376 126.280  < 2e-16 ***
#      Temperature.oC35            0.189744   0.071220   2.664  0.00795 ** 
#      Temperature.oC38            0.208139   0.065874   3.160  0.00167 ** 
#      Temperature.oC39            0.077179   0.073198   1.054  0.29219    
#      Temperature.oC40            0.104764   0.072533   1.444  0.14923    
#      Temperature.oC42           -0.004182   0.081400  -0.051  0.95905    
#      Sex.1Male                  -0.094849   0.060582  -1.566  0.11803    
#      Temperature.oC35:Sex.1Male  0.108067   0.101382   1.066  0.28694    
#      Temperature.oC38:Sex.1Male -0.232914   0.096831  -2.405  0.01650 *  
#      Temperature.oC39:Sex.1Male -0.066960   0.103156  -0.649  0.51654    
#      Temperature.oC40:Sex.1Male -0.344614   0.105151  -3.277  0.00112 ** 
#      Temperature.oC42:Sex.1Male -0.581434   0.121790  -4.774 2.34e-06 ***


## nuneric
# anova()/drop1()
#                               Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
#      NULL                                      542      24060              
#      Sex.1                    1  1709.92       541      22350 8.306e-14 ***
#      Temperature.oCnum        1   236.23       540      22114  0.005524 ** 
#      Sex.1:Temperature.oCnum  1   665.92       539      21448 3.181e-06 ***


## Coefficients:
#                                 Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)                  5.201972   0.193109  26.938  < 2e-16 ***
#      Sex.1Male                    1.022857   0.273844   3.735 0.000208 ***
#      Temperature.oCnum            0.006632   0.005298   1.252 0.211231    
#      Sex.1Male:Temperature.oCnum -0.035090   0.007532  -4.659 4.01e-06 ***
# Hypothesis interpretation:  


