

#check what is in r console from last time
ls()
#remove everything
rm(list=ls())

#check what working directory is set
getwd()
#setting to required one where data is based
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")

#reading in data 
transgenpaternalsum <- read.csv("transgenmalesum.csv", header = TRUE) # paternal heat, total offspring production with 20D * 13 females
transgenmaternalsum <- read.csv("transgenfemalesum.csv", header = TRUE) # maternal heat, total offspring production with 20D * 13 females


#### DATA CHECK, CLEAN, DESCRIPTION AND SUMMARY #########################################################################

## Data checks
str(transgenpaternalsum) 
# 'data.frame':	90 obs. of  5 variables:
# $ Replicate              : int  1 2 3 4 5 6 7 8 9 10 ...                                male/family number number
# $ Paternal.temperature.oC: int  30 30 30 30 30 30 30 30 30 30 ...                       heatwave treatment of father
# $ Offspring.count.sum.20D: int  1071 1676 952 676 1535 1030 177 688 771 741 ...         sum of offspring counts from 13 females provided in series to each male
# $ Count.matings          : int  7 9 4 5 7 4 1 5 4 5 ...                                 number of females producing offspring
# $ Batch                  : int  1 1 1 1 1 1 1 1 1 1 ...                                 random block
transgenpaternalsum$Paternal.temperature.oC<-as.factor(transgenpaternalsum$Paternal.temperature.oC)
str(transgenmaternalsum) 
# 'data.frame':	103 obs. of  4 variables:
# $ Replicate              : int  1 2 3 4 5 6 7 8 9 10 ...                                
# $ Maternal.temperature.oC: Factor w/ 3 levels "C","FH","FSH": 2 2 2 2 2 2 2 2 2 2 ...   heatwave treatment of mother/sperm
# $ Offspring.count.sum.20D: int  1469 1215 1132 841 1036 791 1336 0 781 730 ...          
# $ Count.matings          : int  6 7 6 7 6 5 7 0 7 7 ...                                 
                    


is.na(transgenpaternalsum) 
is.na(transgenmaternalsum) 
 # no NAs

summary(transgenpaternalsum) 
summary(transgenmaternalsum) 
 # produces general (unsplit) range, quantiles, median, count and mean summary stats for each variable

levels(transgenpaternalsum$Paternal.temperature.oC) #[1] "30"       "42"   
levels(transgenmaternalsum$Maternal.temperature.oC) # "C"   "FH"  "FSH"





###################### NEAT PLOT #########################################


names(transgenmaternalsum)
names(transgenpaternalsum)
str(transgenmaternalsum)
str(transgenpaternalsum)


library(ggplot2)

temp <- expression(paste('Temperature (',degree,'C)'))  #the temperature label with degrees sign # ~ is a space

############### ! FIGURE 4 C ##################


#paternal sum
graphtranspatsum<-ggplot(transgenpaternalsum, aes(x=Paternal.temperature.oC, y=Offspring.count.sum.20D, fill= Paternal.temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + 
     scale_fill_manual(values=c("ghostwhite", "tomato"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("30", "42"), #the order listed in the legend
                       label = c("Control", "Heatwave")) + #how things are labeled in the lgend
     geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + #so all the data points are not ontop of each other
     labs(x= expression(bold(atop("Paternal heatwave", paste("treatment")))), y= "Son's reproductive output") +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("30", "42"), #the order of the variables on the x axis
                      labels=c("Control", "Heatwave")) + # the names on the x axis
     coord_cartesian(ylim=c(-50, 2550)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 2500, 500), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(
          #legend.position="none", #get rid of the hashtag to get rid of legend
          panel.grid.major=element_blank(), #getting rid of majorgridlines
          panel.border=element_blank(),     #getting rid of minorgridlines  
          panel.grid.minor=element_blank(),
          axis.line.x=element_line(color="black", size = 1),
          axis.line.y=element_line(color="black", size = 1),
          axis.text.x=element_text(color="black", size=10),
          axis.text.y=element_text(color="black", size=12),
          axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
          axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)),
          legend.position="none",
          panel.background=element_blank(),
          plot.background=element_rect(fill="transparent", colour = NA))

setwd("C:/Users/UEA/Desktop")
ggsave("graphtranspatsum.png",width=2.5, height=4, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")

# firebrick1, darkred
# grey90, grey60 grey30


############### ! FIGURE 4 D ##################

#paternal mate
graphtranspatmate<-ggplot(transgenpaternalsum, aes(x=Paternal.temperature.oC, y=Count.matings, fill= Paternal.temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + 
     scale_fill_manual(values=c("ghostwhite", "tomato"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("30", "42"), #the order listed in the legend
                       label = c("Control", "Heatwave")) + #how things are labeled in the lgend
     geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + #so all the data points are not ontop of each other
     labs (x= expression(bold(atop("Paternal heatwave", paste("treatment")))), y= "Son's mating frequency") +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("30", "42"), #the order of the variables on the x axis
                      labels=c("Control", expression(atop("Heatwave", paste(""))))) + # the names on the x axis
     coord_cartesian(ylim=c(-0.2, 12.2)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 12, 3), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(
          #legend.position="none", #get rid of the hashtag to get rid of legend
          panel.grid.major=element_blank(), #getting rid of majorgridlines
          panel.border=element_blank(),     #getting rid of minorgridlines  
          panel.grid.minor=element_blank(),
          axis.line.x=element_line(color="black", size = 1),
          axis.line.y=element_line(color="black", size = 1),
          axis.text.x=element_text(color="black", size=12),
          axis.text.y=element_text(color="black", size=14),
          axis.title.x=element_text(face = "bold", size=14, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
          axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)),
          legend.position="none",
          panel.background=element_blank(),
          plot.background=element_rect(fill="transparent", colour = NA))

setwd("C:/Users/UEA/Desktop")
ggsave("graphtranspatmate.png",width=2.5, height=4, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")



############### ! FIGURE 5.3 E ##################

#maternal sum
graphtransmatsum<-ggplot(transgenmaternalsum, aes(x=Maternal.temperature.oC, y=Offspring.count.sum.20D, fill= Maternal.temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + 
     scale_fill_manual(values=c("ghostwhite", "tomato", "firebrick3"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("C", "FH", "FSH"), #the order listed in the legend
                       label = c("Control", "Female", expression(atop("Female", paste("and sperm"))))) + #how things are labeled in the lgend
     geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + #so all the data points are not ontop of each other
     labs (x= expression(bold(atop("Maternal heatwave", paste("treatment")))), y= "Son's reproductive output") +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("C", "FH", "FSH"), #the order of the variables on the x axis
                      labels=c("Control", "Female", expression(atop("Female", paste("and sperm"))))) + # the names on the x axis
     coord_cartesian(ylim=c(-50, 2550)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 2500, 500), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(
          #legend.position="none", #get rid of the hashtag to get rid of legend
          panel.grid.major=element_blank(), #getting rid of majorgridlines
          panel.border=element_blank(),     #getting rid of minorgridlines  
          panel.grid.minor=element_blank(),
          axis.line.x=element_line(color="black", size = 1),
          axis.line.y=element_line(color="black", size = 1),
          axis.text.x=element_text(color="black", size=12),
          axis.text.y=element_text(color="black", size=12),
          axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
          axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)),
          legend.position="none",
          panel.background=element_blank(),
          plot.background=element_rect(fill="transparent", colour = NA))

setwd("C:/Users/UEA/Desktop")
ggsave("graphtransmatsum.png",width=3.5, height=4, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")


############### ! FIGURE 4 F ##################

#maternal mate
graphtransmatmate<-ggplot(transgenmaternalsum, aes(x=Maternal.temperature.oC, y=Count.matings, fill= Maternal.temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + 
     scale_fill_manual(values=c("ghostwhite", "tomato", "firebrick3"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("C", "FH", "FSH"), #the order listed in the legend
                       label = c("Control", "Female", expression(atop("Female", paste("and sperm"))))) + #how things are labeled in the lgend
     geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + #so all the data points are not ontop of each other
     labs (x= expression(bold(atop("Maternal heatwave", paste("treatment")))), y= "Son's mating frequency") +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("C", "FH", "FSH"), #the order of the variables on the x axis
                      labels=c("Control", "Female", expression(atop("Female", paste("and sperm"))))) + # the names on the x axis
     coord_cartesian(ylim=c(-0.2, 12.2)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 12, 3), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(
          #legend.position="none", #get rid of the hashtag to get rid of legend
          panel.grid.major=element_blank(), #getting rid of majorgridlines
          panel.border=element_blank(),     #getting rid of minorgridlines  
          panel.grid.minor=element_blank(),
          axis.line.x=element_line(color="black", size = 1),
          axis.line.y=element_line(color="black", size = 1),
          axis.text.x=element_text(color="black", size=12),
          axis.text.y=element_text(color="black", size=14),
          axis.title.x=element_text(face = "bold", size=14, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
          axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)),
          legend.position="none",
          panel.background=element_blank(),
          plot.background=element_rect(fill="transparent", colour = NA))

setwd("C:/Users/UEA/Desktop")
ggsave("graphtransmatmate.png",width=3.5, height=4.2, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")




#########################################################################################################################################MALE TOTAL TRANSGEN COUNT PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES  ###############################
names(transgenpaternalsum)

########### ! NAT COMMS DESCRIPTIVE STATS ####################


### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(transgenpaternalsum$Offspring.count.sum.20D, transgenpaternalsum$Paternal.temperature.oC)
# $`30`
#    vars  n    mean sd     median trimmed mad   min  max range skew kurtosis  se
# X1    1 48 1254.81 434.42 1200.5  1264.9 400.3 177 2582  2405 0.05     0.75 62.7
# 
# $`42`
#    vars  n mean   sd      median trimmed mad   min  max range skew  kurtosis  se
# X1    1 42 960.36 445.37  999.5  984.03 414.39   0 1787  1787 -0.45    -0.38 68.72


describeBy(transgenpaternalsum$Offspring.count.sum.20D, list(transgenpaternalsum$Paternal.temperature.oC,transgenpaternalsum$Batch),mat=TRUE)
# item group1 group2 vars  n      mean       sd median   trimmed      mad min  max range       skew   kurtosis        se
# X11    1     30      1    1 18 1107.1111 489.5575 1050.5 1123.3750 546.3381 177 1777  1600 -0.1302218 -1.2193973 115.38982
# X12    2     42      1    1 12  933.5833 475.6331 1062.5  956.9000 452.1930   0 1634  1634 -0.4149534 -1.0370445 137.30344
# X13    3     30      2    1 30 1343.4333 379.0828 1316.0 1326.3750 309.8634 443 2582  2139  0.7479822  2.2310344  69.21073
# X14    4     42      2    1 30  971.0667 440.6908  999.5  995.3333 365.4609   0 1787  1787 -0.4336471 -0.2628504  80.45877

### in base
# 30
hist(transgenpaternalsum$Offspring.count.sum.20D[transgenpaternalsum$Paternal.temperature.oC == "30"], 
     main = list("30", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 
# 42
hist(transgenpaternalsum$Offspring.count.sum.20D[transgenpaternalsum$Paternal.temperature.oC == "42"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("42", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(transgenpaternalsum$Offspring.count.sum.20D ~ transgenpaternalsum$Paternal.temperature.oC, ylab="10D Adult Count", xlab="Temperature")

# notice plot has automatically produced a scatterplot if x is made as an integar


########### Normality - Passed with more powerful shapiro
shapiro.test (transgenpaternalsum$Offspring.count.sum.20D[transgenpaternalsum$Paternal.temperature.oC == "30"]) # 0.97359, p-value = 0.3473
ks.test(transgenpaternalsum$Offspring.count.sum.20D[transgenpaternalsum$Paternal.temperature.oC == "30"], pnorm)  # D = 1, p-value < 2.2e-16
shapiro.test (transgenpaternalsum$Offspring.count.sum.20D[transgenpaternalsum$Paternal.temperature.oC == "42"]) #  W = 0.96741, p-value = 0.2696
ks.test(transgenpaternalsum$Offspring.count.sum.20D[transgenpaternalsum$Paternal.temperature.oC == "42"], pnorm) # D = 0.92857, p-value < 2.2e-16



########### Homogeneity of Variances - Passed in all tests
bartlett.test(transgenpaternalsum$Offspring.count.sum.20D ~ transgenpaternalsum$Paternal.temperature.oC) # Bartlett's K-squared = 0.026848, df = 1, p-value = 0.8698
fligner.test(transgenpaternalsum$Offspring.count.sum.20D ~ transgenpaternalsum$Paternal.temperature.oC) # Fligner-Killeen:med chi-squared = 0.094537, df = 1, p-value = 0.7585
#! need library(car)
leveneTest(transgenpaternalsum$Offspring.count.sum.20D ~ transgenpaternalsum$Paternal.temperature.oC)   #Df F value Pr(>F) 1  0.0628 0.8027


##################################################################################################################################################MALE TOTAL TRANSGEN COUNT OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 


### Transformation not necassary because normal and homogenous variance
## 2 sample t-test
t.test(transgenpaternalsum$Offspring.count.sum.20D ~ transgenpaternalsum$Paternal.temperature.oC, var.equal = TRUE, paired = FALSE)
# t = 3.1705, df = 88, p-value = 0.002095 # is a difference in total offspring production



########################################################################################################################################MALE TRANSGEN MATE PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES  ###############################
names(transgenpaternalsum)

########### ! NAT COMMS DESCRIPTIVE STATS ####################

### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(transgenpaternalsum$Count.matings, transgenpaternalsum$Paternal.temperature.oC)
# $`30`
#    vars  n    mean sd     median trimmed mad   min  max range skew kurtosis  se
#X1   1    48   6.73 2.17      7    6.75 2.97      1  11    10 -0.11    -0.52 0.31
# 
# $`42`
#    vars  n mean   sd      median trimmed mad   min  max range skew  kurtosis  se
# X1    1 42 4.88 2.15          5    5.06 1.48   0     8     8 -0.56    -0.15 0.33

### in base
# 30
hist(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "30"], 
     main = list("30", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 
# 42
hist(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "42"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("42", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(transgenpaternalsum$Count.matings ~ transgenpaternalsum$Paternal.temperature.oC, ylab="10D Adult Count", xlab="Temperature")

# notice plot has automatically produced a scatterplot if x is made as an integar


########### Normality - Failed in heat, both groups negative skew and platykurtic
shapiro.test (transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "30"]) # W = 0.96297, p-value = 0.1331
ks.test(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "30"], pnorm)  # D = 0.97782, p-value < 2.2e-16
shapiro.test (transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "42"]) #  W = 0.93021, p-value = 0.01317
ks.test(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "42"], pnorm) # D = 0.90341, p-value < 2.2e-16

########### Homogeneity of Variances - Similar variance
bartlett.test(transgenpaternalsum$Count.matings ~ transgenpaternalsum$Paternal.temperature.oC) # Bartlett's K-squared = 0.002307, df = 1, p-value = 0.9617
fligner.test(transgenpaternalsum$Count.matings ~ transgenpaternalsum$Paternal.temperature.oC) # Fligner-Killeen:med chi-squared = 0.30106, df = 1, p-value = 0.5832
#! need library(car)
leveneTest(transgenpaternalsum$Count.matings ~ transgenpaternalsum$Paternal.temperature.oC)   #Df F value Pr(>F) 1  0.2122 0.6462



################################################################################################################################################## MALE TRANSGEN MATE OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 

########## Transformation with just hist and shapiro 
# comparing plots and tests before and after
# two other methods: by(df$response, df$treatment, shapiro.test) # with(df, tapply(response, treatment, shapiro.test))
## RIGHT SKEW FIXING
par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird
#raw data normal
hist (sqrt(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "30"]))
hist (sqrt(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "42"]))
shapiro.test (sqrt(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "30"]))
shapiro.test (sqrt(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "42"]))
#sqrt not normal
hist (log10(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "30"]+0.01))
hist (log10(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "42"]+0.01))
shapiro.test (log10(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "30"]+0.01))
shapiro.test (log10(transgenpaternalsum$Count.matings[transgenpaternalsum$Paternal.temperature.oC == "42"]+0.01))
#log10  not normal
# transformations make worse

# As the heat group is not normal but has similar in variance in groups man whitney U valid

#1) 2-sample mann whitney U
wilcox.test(transgenpaternalsum$Count.matings ~ transgenpaternalsum$Paternal.temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 1450, p-value = 0.0003096# is a significant difference in mating



########################################################################################################################################FEMALE TOTAL TRANSGEN COUNT OFFSPRING PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES  ###############################
names(transgenmaternalsum)
transgenmaternalsum$Maternal.temperature.oC

########### ! NAT COMMS DESCRIPTIVE STATS ####################


### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(transgenmaternalsum$Offspring.count.sum.20D, transgenmaternalsum$Maternal.temperature.oC)
# $`C`
#    vars  n    mean sd     median trimmed mad   min  max range skew kurtosis  se
#X1    1  27 1113.37 459.29   1214 1126.78 341   0  2066  2066 -0.41    -0.12 88.39
# 
# $`FH`
#    vars  n mean   sd      median trimmed mad   min  max range skew  kurtosis  se
# X1    1 42  877 334.21  936.5  890.85 255.01   0 1495  1495 -0.41     0.11 51.57
# 
# $`FSH`
#    vars  n mean   sd      median trimmed mad   min  max range skew  kurtosis  se
# X1    1 34 632.85 450.53    654  608.96 540.41   0 1723  1723 0.35    -0.62 77.27

### in base
# C
hist(transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "C"], 
     main = list("30", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 
# FH
hist(transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "FH"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("42", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 
# FSH
hist(transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "FSH"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("42", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(transgenmaternalsum$Offspring.count.sum.20D ~ transgenmaternalsum$Maternal.temperature.oC, ylab="10D Adult Count", xlab="Temperature")

# notice plot has automatically produced a scatterplot if x is made as an integar


########### Normality - All passed in shapiro wilk
shapiro.test (transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "C"]) # W = 0.97606, p-value = 0.7646
ks.test(transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "C"], pnorm)  # D = 0.96296, p-value < 2.2e-16
shapiro.test (transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "FH"]) #  W = 0.97011, p-value = 0.3329
ks.test(transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "FH"], pnorm) # D = 0.97619, p-value < 2.2e-16
shapiro.test (transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "FSH"]) #  W = 0.95635, p-value = 0.1899
ks.test(transgenmaternalsum$Offspring.count.sum.20D[transgenmaternalsum$Maternal.temperature.oC == "FSH"], pnorm) # D = 0.91176, p-value < 2.2e-16

########### Homogeneity of Variances - Similar variance
bartlett.test(transgenmaternalsum$Offspring.count.sum.20D ~ transgenmaternalsum$Maternal.temperature.oC) # Bartlett's K-squared = 4.2917, df = 2, p-value = 0.117
fligner.test(transgenmaternalsum$Offspring.count.sum.20D ~ transgenmaternalsum$Maternal.temperature.oC) # Fligner-Killeen:med chi-squared = 4.2149, df = 2, p-value = 0.1216
#! need library(car)
leveneTest(transgenmaternalsum$Offspring.count.sum.20D ~ transgenmaternalsum$Maternal.temperature.oC)   #Df F value Pr(>F) 2  2.2016  0.116


##################################################################################################################################################FEMALE TOTAL TRANSGEN COUNT OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC  ##################################### 


########## More powerful shapiro shows normality in all groups and variances homogenous so 2+ sample ANOVA can be used

summary(aov(transgenmaternalsum$Offspring.count.sum.20D~ transgenmaternalsum$Maternal.temperature.oC)) 
#                                             Df   Sum Sq  Mean Sq  F value   Pr(>F)    
#transgenmaternalsum$Maternal.temperature.oC   2  3499427  1749714   10.44 7.64e-05 ***
# Residuals                                   100 16762501 167625                     

# Crawley 2007 and thomas 2015 recommends TukeyHSD for pairwise comparisons
TukeyHSD(aov(transgenmaternalsum$Offspring.count.sum.20D~ transgenmaternalsum$Maternal.temperature.oC))
#           diff       lwr         upr     p adj
# FH-C   -236.3704 -476.6419    3.901133 0.0548669
# FSH-C  -480.5174 -731.6065 -229.428350 0.0000442
# FSH-FH -244.1471 -468.8594  -19.434710 0.0298341

#! library(userfriendlyscience)
posthocTGH(y=transgenmaternalsum$Offspring.count.sum.20D, x=transgenmaternalsum$Maternal.temperature.oC, method="games-howell") # use games-howell when different sample sizes # tukey for equal
#     n means variances
# C   27  1113    210944
# FH  42   877    111698
# FSH 34   633    202980
# 
#          t df      p
# C:FH   2.3 44 0.0650
# C:FSH  4.1 55 0.0004
# FH:FSH 2.6 59 0.0290


########################################################################################################################################FEMALE TRANSGEN MATE PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES  ###############################
names(transgenpaternalsum)

names(transgenmaternalsum)
transgenmaternalsum$Maternal.temperature.oC

########### ! NAT COMMS DESCRIPTIVE STATS ####################

### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(transgenmaternalsum$Count.matings, transgenmaternalsum$Maternal.temperature.oC)
# $`C`
#    vars  n    mean sd     median trimmed mad   min  max range skew kurtosis  se
#X1    1   27    6    2.39      6    6.13 1.48   0    10    10 -0.59    -0.17 0.46
# 
# $`FH`
#    vars  n mean   sd      median trimmed mad   min  max range skew  kurtosis  se
# X1    1  42 5.71  2.18      6    5.76    1.48   0  1 0    10 -0.26     0.15 0.34
# 
# $`FSH`
#    vars  n mean   sd      median trimmed mad   min  max range skew  kurtosis  se
# X1    1  34 4.06  2.52      4       4    2.97   0   9     9   0.21    -0.87 0.43

### in base
# C
hist(transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "C"], 
     main = list("30", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 
# FH
hist(transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "FH"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("42", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 
# FSH
hist(transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "FSH"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("42", cex = 2), xlab = "10 day adult count", ylab ="Frequency", ylim = c(0,60),
     nclass = 10) 

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(transgenmaternalsum$Count.matings ~ transgenmaternalsum$Maternal.temperature.oC, ylab="10D Adult Count", xlab="Temperature")

# notice plot has automatically produced a scatterplot if x is made as an integar


########### Normality - Failed in heat, both groups negative skew and platykurtic
shapiro.test (transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "C"]) # W = 0.95447, p-value = 0.2746
ks.test(transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "C"], pnorm)  # D = 0.94021, p-value < 2.2e-16
shapiro.test (transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "FH"]) #  W = 0.96076, p-value = 0.1572
ks.test(transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "FH"], pnorm) # D = 0.92963, p-value < 2.2e-16
shapiro.test (transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "FSH"]) #  W = 0.95948, p-value = 0.2348
ks.test(transgenmaternalsum$Count.matings[transgenmaternalsum$Maternal.temperature.oC == "FSH"], pnorm) # D = 0.83019, p-value < 2.2e-16

########### Homogeneity of Variances - Similar variance
bartlett.test(transgenmaternalsum$Count.matings ~ transgenmaternalsum$Maternal.temperature.oC) # Bartlett's K-squared = 0.79432, df = 2, p-value = 0.6722
fligner.test(transgenmaternalsum$Count.matings ~ transgenmaternalsum$Maternal.temperature.oC) # Fligner-Killeen:med chi-squared = 1.5788, df = 2, p-value = 0.4541
#! need library(car)
leveneTest(transgenmaternalsum$Count.matings ~ transgenmaternalsum$Maternal.temperature.oC)   #Df F value Pr(>F) 2   0.708 0.4951


################################################################################################################################################## FEMALE TRANSGEN MATE OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 


########## More powerful shapiro shows normality in all groups and variances homogenous so 2+ sample ANOVA can be used

summary(aov(transgenmaternalsum$Count.matings~ transgenmaternalsum$Maternal.temperature.oC)) 
#                                               Df Sum Sq Mean Sq  F value Pr(>F)   
# transgenmaternalsum$Maternal.temperature.oC   2   72.5   36.24    6.56 0.0021 **
#  Residuals                                  100  552.5    5.52                  

# Crawley 2007 and thomas 2015 recommends TukeyHSD for pairwise comparisons
TukeyHSD(aov(transgenmaternalsum$Count.matings~ transgenmaternalsum$Maternal.temperature.oC))
#           diff       lwr        upr     p adj
# FH-C   -0.2857143 -1.665084  1.0936559 0.8749051
# FSH-C  -1.9411765 -3.382649 -0.4997039 0.0051311
# FSH-FH -1.6554622 -2.945509 -0.3654152 0.0080981




#! library(userfriendlyscience)
posthocTGH(y=transgenmaternalsum$Count.matings, x=transgenmaternalsum$Maternal.temperature.oC, method="games-howell") # use games-howell when different sample sizes # tukey for equal
#     n means variances
# C   27   6.0       5.7
# FH  42   5.7       4.7
# FSH 34   4.1       6.4
# 
#         t df      p
# C:FH   0.5 52 0.8706
# C:FSH  3.1 57 0.0089
# FH:FSH 3.0 66 0.0099







#################################################################################################################################################################### MALE TOTAL TRANSGEN COUNT NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################
names(transgenpaternalsum)
str(transgenpaternalsum)
transgenpaternalsum$Paternal.temperature.oC

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()


#### Poisson family error structures
# As data is a count with a relatively normal distribution and homogenity of variance in residuals however, theoretically better to have 0 bounded discrete distribution for count data

# Random model to account for difference in batches/time/generation. Has been nested within treatment as treatment largest and date codes are not different for each treatments


########### ! NAT COMMS MODEL SELECTION ####################

# Checking importance of random factor
globalmodposs<-glm(Offspring.count.sum.20D ~ Batch, poisson(link = "log"), data=transgenpaternalsum)
summary(globalmodposs); AIC(globalmodposs) # 21285
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance; pseudoR # 0.01246878(thomas et al., 2015)
lrtest(globalmodposs)# Good as generation blocks explain little variation but significant; needs to be accounted for in mixed model 

### Creating a global mixed model
globalmodposs<-glmer(Offspring.count.sum.20D ~ Paternal.temperature.oC + (1|Batch), poisson(link = "log"), data=transgenpaternalsum)
globalmodpossID<-glmer(Offspring.count.sum.20D ~ Paternal.temperature.oC + (1|Batch), poisson(link = "identity"), data=transgenpaternalsum)
globalmodpossRT<-glmer(Offspring.count.sum.20D ~ Paternal.temperature.oC + (1|Batch), poisson(link = "sqrt"), data=transgenpaternalsum)

#  (1|Batch) is more biologically correct than  (1|Batch/Paternal.temperature.oC) and (1|Paternal.temperature.oC/Batch)

AICc<-(-2*logLik(globalmodposs))+((2*1*(1+1)/(90-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 19395.86
AICc<-(-2*logLik(globalmodpossID))+((2*1*(1+1)/(90-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 19460.91
AICc<-(-2*logLik(globalmodpossRT))+((2*1*(1+1)/(90-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 19426.89
anova(globalmodposs, globalmodpossID); anova(globalmodposs, globalmodpossRT);anova(globalmodpossID, globalmodpossRT)
# no difference in link on AICc but poisson model has the lowest AIC

# R^2
#! library(MuMIn)
r.squaredGLMM(globalmodposs)
# Marginal R proportion of variance explained by fixed factors R2m = 0.07096276      
# Conditional R proportion of variance explained by random facotrs R2c = 0.07096276  

summary(globalmodposs)
exp(7.11) # 1224.148
exp(7.1-0.28) # 915.985




# http://glmm.wikidot.com/faq
overdisp_fun <- function(model) {
     ## number of variance parameters in 
     ##   an n-by-n variance-covariance matrix
     vpars <- function(m) {
          nrow(m)*(nrow(m)+1)/2
     }
     model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model))
     rdf <- nrow(model.frame(model))-model.df
     rp <- residuals(model,type="pearson")
     Pearson.chisq <- sum(rp^2)
     prat <- Pearson.chisq/rdf
     pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
     c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}



overdisp_fun(globalmodposs)
# chisq      ratio        rdf          p 
# 15336.8586   176.2857    87.0000     0.0000 
# A ratio of >2 shows excessive overdispersion (Thomas et al., 2015)
# From histograms and summary of raw data true overdispersion likely #glmadmb doesnt work; VARIANCE 212862.7371	MEAN 1117.4
mean(transgenpaternalsum$Offspring.count.sum.20D) # 1117.4
var(transgenpaternalsum$Offspring.count.sum.20D) #212862.7
# variance large relative to mean
table(transgenpaternalsum$Offspring.count.sum.20D)

# Checking reasons: no other covariates and samples are already mean of males, no pseudo rep as different males and females used for each datapoint 
hist(transgenpaternalsum$Offspring.count.sum.20D) 
# <10 0s are true 0s of no offspring counts as each petri dish was sifted and any adults would have been collected, no escapees either.




#! Below negative binnomila iteration limit reached even with maximal convergence setting, no 0 - inflation, random factor important so cannot be removed, really overdispersed, distribution seems quite normal. trying contnuous distributions for fits and estimates
lm<-lmer(Offspring.count.sum.20D ~ Paternal.temperature.oC + (1|Batch), data=transgenpaternalsum)
globalnegbinom <- glmer.nb(Offspring.count.sum.20D ~ Paternal.temperature.oC + (1|Batch), link= "log", data=transgenpaternalsum, glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000)))

sresid<-resid(lm, type="pearson"); hist(sresid) # not necassary for normal residuals, but look quite normal
sresid<-resid(globalnegbinom, type="pearson"); hist(sresid) # not necassary for normal residuals, but look quite normal

fits<-fitted(lm); plot(sresid~fits) # checking for heteroscedasicity; some wedging
fits<-fitted(globalnegbinom); plot(sresid~fits) # checking for heteroscedasicity; some wedging

plot(sresid~transgenpaternalsum$Replicate)# no pattern
plot(sresid~transgenpaternalsum$Paternal.temperature.oC) # 42 bit wider
plot(sresid~transgenpaternalsum$Batch) # no pattern

plot(sresid~transgenpaternalsum$Replicate)# no pattern
plot(sresid~transgenpaternalsum$Paternal.temperature.oC) # 42 bit wider
plot(sresid~transgenpaternalsum$Batch) # no pattern

library(LMERConvenienceFunctions)
mcp.fnc(lm)       # Provides a QQ plot and heteroscedasticity plot
plotLMER.fnc(lm)  # Plots the population mean relationship between fixed and dependent variables

mcp.fnc(globalnegbinom) 
plotLMER.fnc(globalnegbinom)

# gaussian seems better residuals seem normal doesn't fail to iterate

########### ! NAT COMMS MODEL SIGNIFIANCE ####################

drop1(lm, test= "Chisq")
# Df    AIC    LRT  Pr(Chi)   
# <none>                     1356.8                   
# Paternal.temperature.oC  1 1364.6 9.7375 0.001805 **

########### ! NAT COMMS MODEL POST HOC ####################

summary(lm)
# Estimate Std. Error t value
# (Intercept)                1244.00      85.45   14.56
# Paternal.temperature.oC42  -302.18      92.41   -3.27

r.squaredGLMM(lm)
# R2m       R2c 
# 0.1045735 0.1341541

library(lsmeans)
lsmeans(lm, pairwise~Paternal.temperature.oC, adjust="tukey")
# contrast estimate       SE    df t.ratio p.value
# 30 - 42   302.177 92.40563 89.34    3.27  0.0015





#################################################################################################################################################################### MALE TRANSGEN MATE NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################
names(transgenpaternalsum)
str(transgenpaternalsum)
transgenpaternalsum$Paternal.temperature.oC

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()

########### ! NAT COMMS MODEL SELECTION ####################


#### Poisson family error structures
# As data is a count with a relatively normal distribution and homogenity of variance in residuals however, theoretically better to have 0 bounded discrete distribution for count data

# Random model to account for difference in batches/time/generation. Has been nested within treatment as treatment largest and date codes are not different for each treatments

# Checking importance of random factor
globalmodposs<-glm(Count.matings ~ Batch, poisson(link = "log"), data=transgenpaternalsum)
summary(globalmodposs)# 419.46
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance; pseudoR # 8.343517e-05 (thomas et al., 2015)
lrtest(globalmodposs)# Good as generation blocks explain little variation and non-significant; needs to be accounted for in mixed model 

### Creating a global mixed model
globalmodposs<-glmer(Count.matings ~ Paternal.temperature.oC + (1|Batch), poisson(link = "log"), data=transgenpaternalsum)
globalmodpossID<-glmer(Count.matings ~ Paternal.temperature.oC + (1|Batch), poisson(link = "identity"), data=transgenpaternalsum)
globalmodpossRT<-glmer(Count.matings ~ Paternal.temperature.oC + (1|Batch), poisson(link = "sqrt"), data=transgenpaternalsum)

#  (1|Batch) is more biologically correct than  (1|Batch/Paternal.temperature.oC) and (1|Paternal.temperature.oC/Batch)

AICc<-(-2*logLik(globalmodposs))+((2*1*(1+1)/(90-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 402.3212
AICc<-(-2*logLik(globalmodpossID))+((2*1*(1+1)/(90-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 402.3212
AICc<-(-2*logLik(globalmodpossRT))+((2*1*(1+1)/(90-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 402.3212
anova(globalmodposs, globalmodpossID); anova(globalmodposs, globalmodpossRT);anova(globalmodpossID, globalmodpossRT)
# no difference in link on AICc

# R^2
#! library(MuMIn)
r.squaredGLMM(globalmodposs)
# Marginal R proportion of variance explained by fixed factors R2m = 0.1401275      
# Conditional R proportion of variance explained by random facotrs R2c = 0.1401275  



# http://glmm.wikidot.com/faq
overdisp_fun <- function(model) {
     ## number of variance parameters in 
     ##   an n-by-n variance-covariance matrix
     vpars <- function(m) {
          nrow(m)*(nrow(m)+1)/2
     }
     model.df <- sum(sapply(VarCorr(model),vpars))+length(fixef(model))
     rdf <- nrow(model.frame(model))-model.df
     rp <- residuals(model,type="pearson")
     Pearson.chisq <- sum(rp^2)
     prat <- Pearson.chisq/rdf
     pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
     c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}

overdisp_fun(globalmodposs)
# chisq      ratio        rdf          p 
# 71.9230688  0.8267019 87.0000000  0.8780448 
# A ratio of >2 shows excessive overdispersion (Thomas et al., 2015) so model is fine
# From histograms and summary of raw data true overdispersion likely #glmadmb doesnt work 

# Assumption checks
sresid<-resid(globalmodposs, type="pearson"); hist(sresid) # not necassary for normal residuals, but look quite normal
fits<-fitted(globalmodposs); plot(sresid~fits) # checking for heteroscedasicity; little trend

plot(sresid~transgenpaternalsum$Replicate)# no pattern
plot(sresid~transgenpaternalsum$Paternal.temperature.oC) # 30 bit wider
plot(sresid~transgenpaternalsum$Batch) # no pattern

globalmodposs<-glmer(Count.matings ~ Paternal.temperature.oC + (1|Batch), poisson(link = "log"), data=transgenpaternalsum, glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000)))
nullmodposs<-glmer(Count.matings ~ 1 + (1|Batch), poisson(link = "log"), data=transgenpaternalsum, glmerControl(optimizer="bobyqa", optCtrl = list(maxfun = 100000)))


########### ! NAT COMMS MODEL SIGNIFICANCE ####################


lrtest(globalmodposs,nullmodposs)
anova(globalmodposs,nullmodposs, test="Chi")
drop1(globalmodposs, test="Chi")
# nullmodposs: Count.matings ~ 1 + (1 | Batch)
# globalmodposs: Count.matings ~ Paternal.temperature.oC + (1 | Batch)
# #Df LogLik Df Chisq Pr(>Chisq)    
# 1   3   -201                        
# 2   2   -208 -1  13.2    0.00028 ***


########### ! NAT COMMS MODEL POST HOC ####################


summary(globalmodposs)
# Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
# Family: poisson  ( log )
# Formula: Count.matings ~ Paternal.temperature.oC + (1 | Batch)
# Data: transgenpaternalsum
# Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+05))
# 
# AIC      BIC   logLik deviance df.resid 
# 410.3    420.3   -201.1    402.3       86 
# 
# Scaled residuals: 
#      Min       1Q   Median       3Q      Max 
# -2.20929 -0.66659  0.05389  0.50652  1.64639 
# 
# Random effects:
#      Groups                        Name        Variance Std.Dev.
# Batch:Paternal.temperature.oC (Intercept) 0        0       
# Paternal.temperature.oC       (Intercept) 0        0       
# Number of obs: 90, groups:  Batch:Paternal.temperature.oC, 4; Paternal.temperature.oC, 2
# 
# Fixed effects:
#                                Estimate Std. Error z value Pr(>|z|)    
#     (Intercept)                1.90645    0.05564   34.26  < 2e-16 ***
#      Paternal.temperature.oC42 -0.32111    0.08930   -3.60 0.000323 ***
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# Correlation of Fixed Effects:
#      (Intr)
# Ptrnl.t.C42 -0.623

exp(1.90645) #6.7
exp(1.90645-0.32111) #4.9


#################################################################################################################################################################### FEMALE TOTAL TRANSGEN COUNT NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################
names(transgenmaternalsum)
str(transgenmaternalsum)
transgenmaternalsum$Maternal.temperature.oC

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()

########### ! NAT COMMS MODEL SELECTION ####################


#### Poisson family error structures
# As data is a count with a relatively normal distribution and homogenity of variance in residuals however, theoretically better to have 0 bounded discrete distribution for count data

# Creating a global model
globalmodposs<-glm(Offspring.count.sum.20D ~ Maternal.temperature.oC, poisson(link = "log"), data=transgenmaternalsum)
globalmodpossID<-glm(Offspring.count.sum.20D ~ Maternal.temperature.oC, poisson(link = "identity"), data=transgenmaternalsum)
globalmodpossRT<-glm(Offspring.count.sum.20D ~ Maternal.temperature.oC, poisson(link = "sqrt"), data=transgenmaternalsum) 

summary(globalmodposs); summary(globalmodpossID); summary(globalmodpossRT) # No R^2, AIC given
# AIC: 25998,AIC: 25998,AIC: 25998 # link change seem to do little
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.1402246
pseudoR<-(globalmodpossID$null.deviance-globalmodpossID$deviance) / globalmodpossID$null.deviance # (thomas et al., 2015)
pseudoR # 0.1402246
pseudoR<-(globalmodpossRT$null.deviance-globalmodpossRT$deviance) / globalmodpossRT$null.deviance # (thomas et al., 2015)
pseudoR # 0.1402246
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodposs))+((2*1*(1+1)/(102-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 25992.47
qAICc<-(-2*logLik(globalmodposs)/251.6213)+((2*1*(1+1)/(102-1-1))); qAICc # 103.3398


## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
theta<-globalmodposs$deviance/globalmodposs$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 251.6213, massively overdispersed is >1 is overdispersion. VAR.S 198646, U 858 
#! library(AER) alternative test
dispersiontest(globalmodposs) # dispersion 201.2849  
mean(transgenmaternalsum$Offspring.count.sum.20D) #858.3689
var(transgenmaternalsum$Offspring.count.sum.20D) #198646.4


# Checking reasons: no other covariates and samples are sum of males, no pseudo rep as different males and females used for each datapoint 
hist(transgenmaternalsum$Offspring.count.sum.20D) # > 10 cases of 0. 
# True overdispersion likely, theta >20 so negative binomial best  

## -ve binomial cannot converge
library(MASS)
matsumnvebinom<-glm.nb(Offspring.count.sum.20D ~ Maternal.temperature.oC, link = "log", data=transgenmaternalsum, control=glm.control(maxit=25)) # negative binomial does not fit with the default amount of iterations (25) or even increased so quasipoisson is fitted 
# avoiding as output does not seem improved, many outliers
par(mfrow=c(2,2)); plot(matsumnvebinom);par(mfrow=c(1,1))
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))

## quasi-poisson
globalmodquasi<-glm(Offspring.count.sum.20D ~ Maternal.temperature.oC, quasipoisson(link = "log"), data=transgenmaternalsum) 

par(mfrow=c(2,2)); plot(globalmodquasi);par(mfrow=c(1,1))
summary(globalmodquasi) 

pseudoR<-(globalmodquasi$null.deviance-globalmodquasi$deviance) / globalmodquasi$null.deviance # (thomas et al., 2015)
pseudoR # 0.1402246 

AICc<-(-2*logLik(globalmodquasi))+((2*1*(1+1)/(102-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) '# NA for quasipoisson # AICc smaller so model more efficient

### assumption checks, recommendation of residual dev (contribution of each obs to resid dev) rather than pearson (Thomas et al., 2015)
summary(globalmodposs)
summary(globalmodquasi)

# 1) Errors normally distributed? - NOT NECASSARY BUT NOT IMPROVED

# poisson
devresid<-resid(globalmodposs, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))

# quasipoisson
devresid<-resid(globalmodquasi, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodquasi);par(mfrow=c(1,1))
# -both Q-Q points pull down on left, devresid histogram  some negative skew

# 2) Homogenous/homoscedasticity variance of residuals - NOT NECASSARY BUT NOT IMPROVED

# poisson
devresid<-resid(globalmodquasi, type = "deviance")
plot(devresid ~ globalmodquasi$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~transgenmaternalsum$Maternal.temperature.oC) 
par(mfrow=c(2,2)); plot(globalmodquasi);par(mfrow=c(1,1))

# quasipoisson
devresid<-resid(globalmodquasi, type = "deviance")
plot(devresid ~ globalmodquasi$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~transgenmaternalsum$Maternal.temperature.oC)
par(mfrow=c(2,2)); plot(globalmodquasi);par(mfrow=c(1,1))
#  P1 little wedge/slope, test failed

# 3) Independences of independent variables - 1 FACTOR SO YES
# Only one independent variable

# 4) No serial auto-correlation with time/space - YES
#! need library(car)
durbinWatsonTest(globalmodquasi) # test passed

# 5) No bias by unduly influential datapoints - YES

# poisson
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
influence<-influence.measures(globalmodposs); summary(influence) 

# -vebinom
par(mfrow=c(2,2)); plot(globalmodquasi);par(mfrow=c(1,1))
influence<-influence.measures(globalmodquasi); summary(influence) 
# 40 cases in poisson with larger cooks distance reduced to 6 with cooks <0.1

# 6) Independent variables measured without error - BEST OF ABILITY

## Overdispersion re-check
theta<-globalmodquasi$deviance/globalmodquasi$df.residual; theta #  251.6213
# Theta still large


###  MODEL REFINEMENT
globalmodquasi<-glm(Offspring.count.sum.20D ~ Maternal.temperature.oC, quasipoisson(link = "log"), data=transgenmaternalsum) 
nullmodquasi<-glm(Offspring.count.sum.20D ~ 1, quasipoisson(link = "log"), data=transgenmaternalsum)

pseudoR<-(nullmodquasi$null.deviance-nullmodquasi$deviance) / nullmodquasi$null.deviance; pseudoR # (thomas et al., 2015) # [1] 0

### Global model explains offspring significantly more than null model because

# anova comparison
anova(globalmodquasi, nullmodquasi, test = "Chi")

# Model 1: Offspring.count.sum.20D ~ Maternal.temperature.oC
# Model 2: Offspring.count.sum.20D ~ 1
# Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
# 1       100      25162                          
# 2       102      29266 -2  -4103.8 5.032e-05 ***
anova(globalmodquasi, test = "Chi")
#                         Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
# NULL                                      102      29266              
# Maternal.temperature.oC  2   4103.8       100      25162 5.032e-05 ***

## liklihood ratio test and AIX not available 

########### ! NAT COMMS MODEL SIGNIFICANCE ####################


drop1(globalmodquasi, test = "Chi")
# <none>                        25162                          
# Maternal.temperature.oC  2    29266      19.794 5.032e-05 ***
# lrtest(globalmodquasi, nullmodquasi) doesnt work


########### ! NAT COMMS MODEL POST HOC ####################


summary(globalmodquasi)
# Call:
#      glm(formula = Offspring.count.sum.20D ~ Maternal.temperature.oC, 
#          family = quasipoisson(link = "log"), data = transgenmaternalsum)
# 
# Deviance Residuals: 
#      Min       1Q   Median       3Q      Max  
# -47.188   -8.394    1.871    7.892   35.653  
# 
# Coefficients:
#                                  Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)                 7.01515    0.08305  84.472  < 2e-16 ***
#      Maternal.temperature.oCFH  -0.23864    0.11192  -2.132   0.0354 *  
#      Maternal.temperature.oCFSH -0.56491    0.12858  -4.394 2.78e-05 ***
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# (Dispersion parameter for quasipoisson family taken to be 207.3235)
# 
# Null deviance: 29266  on 102  degrees of freedom
# Residual deviance: 25162  on 100  degrees of freedom
# AIC: NA
# 
# Number of Fisher Scoring iterations: 5

exp(7.01515) # 1113.374
exp(7.01515-0.23864) # 877.0026
exp(7.01515-0.56491) # 632.8542


library(multcomp)
summary(glht(globalmodquasi, mcp(Maternal.temperature.oC="Tukey")))
# Multiple Comparisons of Means: Tukey Contrasts
# 
# 
# Fit: glm(formula = Offspring.count.sum.20D ~ Maternal.temperature.oC, 
#          family = quasipoisson(link = "log"), data = transgenmaternalsum)
# 
# Linear Hypotheses:
#                      Estimate Std. Error z value Pr(>|z|)    
#      FH - C == 0    -0.2386     0.1119  -2.132   0.0829 .  
#      FSH - C == 0   -0.5649     0.1286  -4.394   <0.001 ***
#      FSH - FH == 0  -0.3263     0.1235  -2.641   0.0224 *  


library(lsmeans)
lsmeans(globalmodquasi, pairwise~Maternal.temperature.oC, adjust="tukey")
# contrast  estimate        SE df z.ratio p.value
# C - FH   0.2386401 0.1119167 NA   2.132  0.0834
# C - FSH  0.5649090 0.1285772 NA   4.394  <.0001
# FH - FSH 0.3262689 0.1235472 NA   2.641  0.0225



#################################################################################################################################################################### FEMALE TRANSGEN MATE NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################
names(transgenmaternalsum)
str(transgenmaternalsum)
transgenmaternalsum$Maternal.temperature.oC

#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()


########### ! NAT COMMS MODEL SELECTION ####################


#### Poisson family error structures
# As data is a count with a relatively normal distribution and homogenity of variance in residuals however, theoretically better to have 0 bounded discrete distribution for count data

# Creating a global model
globalmodposs<-glm(Count.matings ~ Maternal.temperature.oC, poisson(link = "log"), data=transgenmaternalsum)
globalmodpossID<-glm(Count.matings ~ Maternal.temperature.oC, poisson(link = "identity"), data=transgenmaternalsum)
globalmodpossRT<-glm(Count.matings ~ Maternal.temperature.oC, poisson(link = "sqrt"), data=transgenmaternalsum) 

summary(globalmodposs); summary(globalmodpossID); summary(globalmodpossRT) # No R^2, AIC given
# AIC: 481.24,AIC: 481.24,AIC: 481.24 # link change seem to do little
pseudoR<-(globalmodposs$null.deviance-globalmodposs$deviance) / globalmodposs$null.deviance # (thomas et al., 2015)
pseudoR # 0.09675123
pseudoR<-(globalmodpossID$null.deviance-globalmodpossID$deviance) / globalmodpossID$null.deviance # (thomas et al., 2015)
pseudoR # 0.09675123
pseudoR<-(globalmodpossRT$null.deviance-globalmodpossRT$deviance) / globalmodpossRT$null.deviance # (thomas et al., 2015)
pseudoR # 0.09675123
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodposs))+((2*1*(1+1)/(102-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 475.277
qAICc<-(-2*logLik(globalmodposs)/1.345)+((2*1*(1+1)/(102-1-1))); qAICc # 353.38

## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
theta<-globalmodposs$deviance/globalmodposs$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 1.345705, only a little overdispersed; poisson OK 
#! library(AER) alternative test
dispersiontest(globalmodposs) # dispersion 1.072105  

# 1) Errors normally distributed? - NOT NECASSARY SOMEWHAT NORMAL

# poisson
devresid<-resid(globalmodposs, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
# Q-Q points pull down on left, devresid histogram relatively normal and KS passed 

# 2) Homogenous/homosscedasticity variance of residuals - NOT NECASSARY SOMEWHAT SIMILAR VARIANCE

# poisson
devresid<-resid(globalmodposs, type = "deviance")
plot(devresid ~ globalmodposs$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~transgenmaternalsum$Maternal.temperature.oC) 
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
#  P1 no wedge sligt slope, test failed

# 3) Independences of independent variables - 1 FACTOR SO YES
# Only one independent variable

# 4) No serial auto-correlation with time/space - YES
#! need library(car)
durbinWatsonTest(globalmodposs) # test passed

# 5) No bias by unduly influential datapoints - YES

# poisson
par(mfrow=c(2,2)); plot(globalmodposs);par(mfrow=c(1,1))
influence<-influence.measures(globalmodposs); summary(influence) 
# 5 cases but cooks <0.05

# 6) Independent variables measured without error - BEST OF ABILITY


###  MODEL REFINEMENT
globalmodposs<-glm(Count.matings ~ Maternal.temperature.oC, poisson(link = "log"), data=transgenmaternalsum)
nullmodposs<-glm(Count.matings ~ 1, poisson(link = "log"), data=transgenmaternalsum)

pseudoR<-(nullmodposs$null.deviance-nullmodposs$deviance) / nullmodposs$null.deviance; pseudoR # (thomas et al., 2015) # -3.815378e-16
AICc<-(-2*logLik(nullmodposs))+((2*1*(1+1)/(102-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 489.6915

### Global model explains offspring significantly more than null model because

# 1) AIC Lower, R^2 higher

# 2) anova comparison
anova(globalmodposs, nullmodposs, test = "Chi")
# Model 1: Count.matings ~ Maternal.temperature.oC
# Model 2: Count.matings ~ 1
#   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
# 1       100     134.57                          
# 2       102     148.99 -2  -14.415 0.0007412 ***

anova(globalmodposs, test = "Chi")
#                          Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
# NULL                                      102     148.99              
# Maternal.temperature.oC  2   14.415       100     134.57 0.0007412 ***

# 3) liklihood ratio test 
lrtest(globalmodposs, nullmodposs)
#   #Df  LogLik Df  Chisq Pr(>Chisq)    
# 1   3 -237.62                         
# 2   1 -244.83 -2 14.415  0.0007412 ***

########### ! NAT COMMS MODEL SIGNIFICANCE ####################


# 4) drop1
drop1(globalmodposs, test = "Chi")
#                          Df Deviance    AIC    LRT  Pr(>Chi)    
# <none>                       134.57 481.24                     
# Maternal.temperature.oC  2   148.99 491.65 14.415 0.0007412 ***

########### ! NAT COMMS MODEL POST HOC ####################

summary(globalmodposs)
# Call:
#      glm(formula = Count.matings ~ Maternal.temperature.oC, family = poisson(link = "log"), 
#          data = transgenmaternalsum)
# 
# Deviance Residuals: 
#      Min       1Q   Median       3Q      Max  
# -3.4641  -0.7584   0.0000   0.5194   2.1099  
# 
# Coefficients:
#                             Estimate Std. Error z value Pr(>|z|)    
# (Intercept)                 1.79176    0.07857  22.805  < 2e-16 ***
# Maternal.temperature.oCFH  -0.04879    0.10168  -0.480  0.63135    
# Maternal.temperature.oCFSH -0.39087    0.11584  -3.374  0.00074 ***
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# (Dispersion parameter for poisson family taken to be 1)
# 
# Null deviance: 148.99  on 102  degrees of freedom
# Residual deviance: 134.57  on 100  degrees of freedom
# AIC: 481.24
# 
# Number of Fisher Scoring iterations: 5


exp(1.79176) # 1113.374
exp(1.79176-0.04879) # 5.71429
exp(1.79176-0.39087) # 4.058811


library(multcomp)
summary(glht(globalmodposs, mcp(Maternal.temperature.oC="Tukey")))
# Multiple Comparisons of Means: Tukey Contrasts
# 
# 
# Fit: glm(formula = Count.matings ~ Maternal.temperature.oC, family = poisson(link = "log"), 
#          data = transgenmaternalsum)
# 
# Linear Hypotheses:
#      Estimate Std. Error z value Pr(>|z|)   
#      FH - C == 0   -0.04879    0.10168  -0.480  0.88050   
#      FSH - C == 0  -0.39087    0.11584  -3.374  0.00225 **
#      FSH - FH == 0 -0.34208    0.10683  -3.202  0.00377 **

library(lsmeans)
lsmeans(globalmodposs, pairwise~Maternal.temperature.oC, adjust="tukey")
# $contrasts
# contrast   estimate        SE df z.ratio p.value
# C - FH   0.04879016 0.1016834 NA   0.480  0.8808
# C - FSH  0.39086631 0.1158413 NA   3.374  0.0021
# FH - FSH 0.34207614 0.1068318 NA   3.202  0.0039










#### MATE TOTAL TRANSGEN COUNT SUMMARY ######################################################################################
#Barnard et al., 2007 and Thomas 2015 as references
#---------Hypothesis
# The relative effect of a single 5d 42oC male heatwave on the 20D reproductive fitness (adult offspring counts) of their sons stretched with a series of 13 females 

#Response variable (dependent):           20D reproductive fitness * 13 females; (count +/- positive skew) 

#Global Fixed variables (independent):          
#    Categorical                          temperature (control, males heated 42)  
#    Covariates                           NA
#    Non-linear terms                     NA
#    Interactions                         NA

#Random terms:                            Batch (oviblock)

#---------Misc

# Simple analysis:                       Independent t-test    
# Non-para:                              Mann whitney U
# Plot:                                  Notched Box / Univariate scatter  

#------Model report Simple stats
# normal shapiro-wilk, variances similar

# t.test(transgenpaternalsum$Offspring.count.sum.20D ~ transgenpaternalsum$Paternal.temperature.oC, var.equal = TRUE, paired = FALSE)
# # t = 3.1705, df = 88, p-value = 0.002095 

# heatwaves lower reproductive fitness

#---------GLM Model refinement

# # Overdispersion parameter:                177.2714 (overdisp func) for poisson, negative binomial cannot reach full iterations

# Error family (+link function):           poisson (log)

# Model refinement method(s):              AICc comparison, anova(), lmtest()

# Most plausible/final model(s):           globalmodposs<-glmer(Offspring.count.sum.20D ~ Paternal.temperature.oC + (1|Paternal.temperature.oC/Batch), poisson(link = "log"), data=transgenpaternalsum)

# AIC(c)                                   19295.1 
# Model R^2/Adj R^2:                       NA for poiss, quasi-p and -ve binom
# pseudoR^2/deviance:                      R2m = 0.0709566  R2c = 0.0709566  (MuMIn)
# Model significance:                      from analysis of deviance lrtest(model, null): X^2 (4, 86) = 5.0575 p = 4e-04

# Hypothesis interpretation:  Paternal heatwave transgenerationally reduce the sons reproductive fitness


#--------Model assumptions

#Residuals normally distributed:               SOMEWHAT
#    hist of standardized resids:                  symmetrical bell shaped 
# not necassary for normal residuals, but look quite normal

#Homogeneity of variance:                      SOMEWHAT
#    P1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal
#    sresid~ind vars:                          no pattern
# not necassary for homoscedasicity, small amount of wedging

#Independence of variables (no-collinearity):  YES 
#    pairwise scatterplots and correlations:   r<0.3
#    sresid~ind vars:                          no collinearity
# no patterns with other variables

#No serial auto-correlation                     TIME ACCOUNTED FOR IN RANDOM

#No bias by unduly influential datapoints:      SOMEWHAT

#Independent variables measured without error:  TO BEST OF ABILITY

#! NOTE 
# overdispersion cannot be accounted for easily as no quasipoisson in mixed model and negative binomial not enough iterations. The exp() estimates are relatively ok. Gaussian ANOVA may be better?


#-------Model report
#term                    peramter+/-se    test-stat(wald z)   d.f.      P
#(Intercept)                7.10648  0.04942                143.81   <2e-16 ***
#Paternal.temperature.oC42 -0.24769  0.06997                 -3.54    4e-04 ***



#### MATE TRANSGEN MATE SUMMARY ######################################################################################
#Barnard et al., 2007 and Thomas 2015 as references
#---------Hypothesis
# The relative effect of a single 5d 42oC male heatwave on the number of females, in a series of 13, their sons can fertilise 

#Response variable (dependent):           Number of females producing offspring; (count +/- positive skew) 

#Global Fixed variables (independent):          
#    Categorical                          temperature (control, males heated 42)  
#    Covariates                           NA
#    Non-linear terms                     NA
#    Interactions                         NA

#Random terms:                            Batch (oviblock)

#---------Misc

# Simple analysis:                       Independent t-test but not normal   
# Non-para:                              Mann whitney U
# Plot:                                  Notched Box / Univariate scatter  

#------Model report Simple stats
# As the heat group is not normal but has similar in variance in groups man whitney U valid

# 1) 2-sample mann whitney U
# wilcox.test(transgenpaternalsum$Count.matings ~ transgenpaternalsum$Paternal.temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE)
#  W = 1450, p-value = 0.0003096

# Heat males fertilise significantly less females


#---------GLM Model refinement

# # Overdispersion parameter:                0.8363148 (overdisp func) for poisson so not overdispersed

# Error family (+link function):           poisson (log)

# Model refinement method(s):              AICc comparison, anova(), lmtest()

# Most plausible/final model(s):           globalmodposs<-glmer(Count.matings ~ Paternal.temperature.oC + (1|Paternal.temperature.oC/Batch), poisson(link = "log"), data=transgenpaternalsum)

# AIC(c)                                   402.3212 
# Model R^2/Adj R^2:                       NA for poiss, quasi-p and -ve binom
# pseudoR^2/deviance:                      R2m = 0.1401275  R2c = 0.1401275  (MuMIn)
# Model significance:                      from analysis of deviance lrtest(model, null): X^2 (4, 86) = 5.6853 p = 0.01711     

# Hypothesis interpretation:  Paternal heatwave transgenerationally reduces the frequency of fertilisations sons can achieve


#--------Model assumptions

#Residuals normally distributed:               SOMEWHAT
#    hist of standardized resids:             symmetrical bell shaped 
# not necassary for normal residuals, but look quite normal

#Homogeneity of variance:                      SOMEWHAT
#    P1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal
#    sresid~ind vars:                          no pattern
# not necassary for homoscedasicity, little trend

#Independence of variables (no-collinearity):  YES 
#    pairwise scatterplots and correlations:   r<0.3
#    sresid~ind vars:                          no collinearity
# no patterns with other variables

#No serial auto-correlation                     TIME ACCOUNTED FOR IN RANDOM

#No bias by unduly influential datapoints:      SOMEWHAT

#Independent variables measured without error:  TO BEST OF ABILITY


#-------Model report
#term                    peramter+/-se    test-stat(wald z)   d.f.      P
# (Intercept)               1.90645 0.05564   34.26                < 2e-16 ***
# Paternal.temperature.oC42-0.32111 0.08930   -3.60                0.000323 ***




#### FEMALE TOTAL TRANSGEN COUNT SUMMARY ######################################################################################
#Barnard et al., 2007 and Thomas 2015 as references
#---------Hypothesis
# The relative effect of a single 5d 42oC female or stored sperm heatwaves on the 20D reproductive fitness (adult offspring counts) of their sons stretched with a series of 13 females 

#Response variable (dependent):           20D reproductive fitness * 13 females; (count +/- positive skew) 

#Global Fixed variables (independent):          
#    Categorical                          temperature (control, females heated 42, stored sperm 42)  
#    Covariates                           NA
#    Non-linear terms                     NA
#    Interactions                         NA

#Random terms:                            (oviblock)

#---------Misc

# Simple analysis:                       ANOVA      
# Non-para:                              krusskal wallis
# Plot:                                  Notched Box / Univariate scatter  

#------Model report Simple stats
# More powerful shapiro shows normality in all groups and variances homogenous so 2+ sample ANOVA can be used

#summary(aov(transgenmaternalsum$Offspring.count.sum.20D~ transgenmaternalsum$Maternal.temperature.oC))
# #                                             Df   Sum Sq  Mean Sq  F value   Pr(>F)    
# #transgenmaternalsum$Maternal.temperature.oC   2  3499427  1749714   10.44 7.64e-05 ***
# # Residuals                                   100 16762501 167625                     

# TukeyHSD(aov(transgenmaternalsum$Offspring.count.sum.20D~ transgenmaternalsum$Maternal.temperature.oC))
# #           diff       lwr         upr     p adj
# # FH-C   -236.3704 -476.6419    3.901133 0.0548669
# # FSH-C  -480.5174 -731.6065 -229.428350 0.0000442
# # FSH-FH -244.1471 -468.8594  -19.434710 0.0298341

# Heating females does does not reduce the reproductive fitness of offspring however, heating stored sperm does 

#---------GLM Model refinement

# # Overdispersion parameter:              201-251 for poisson (log) true overdispersion large variation

# Error family (+link function):           quasipoisson (log)
#! Note negative binomial models had iterative limit reached

# Model refinement method(s):              AICc comparison, anova(), lmtest()

# Most plausible/final model(s):           glm(formula = Offspring.count.sum.20D ~ Maternal.temperature.oC, family = quasipoisson(link = "log"), data = transgenmaternalsum)

# AIC(c)                                   NA for quasipoiss 
# Model R^2/Adj R^2:                       NA for poiss, quasi-p and -ve binom
# pseudoR^2/deviance:                      0.1402246
# Model significance:                      from analysis of deviance anova(nullmod,globalmodel): X^2 (2, 100) = -4103.8 p= 5.032e-05 

# Hypothesis interpretation:  There is a transgenerational reduction in the sons reproductive fitness in heatwave treatments

#--------Model assumptions

#Residuals normally distributed:               NOT NECASSARY BUT NOT IMPROVED
#    hist of stdzd resids (u=0, sd=1):         symmetrical bell shaped 
#    p1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal  
#    p2-Q-Q plot w/ stdzd resids:              straight line 
#    p3-scale-location w/sqrt resid~fitted:    no pattern
#    s-w/k-s resid tests:                      NS
# -both Q-Q points pull down on left, devresid histogram  some negative skew

#Homogeneity of variance:                      NOT NECASSARY BUT NOT IMPROVED
#    P1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal
#    sresid~ind vars:                          no pattern
#    fligner/levenes tests:                    NS
#  P1 little wedge/slope, test failed

#Independence of variables (no-collinearity):  1 FACTOR SO YES
#    pairwise scatterplots and correlations:   r<0.3
#    sresid~ind vars:                          no collinearity
#    variance inflation factors (VIFs):        values <3
# Only one independent variable

#No serial auto-correlation                     YES
#    durbinwatson test:                         NS
#    Auto-cor function (ACF):                   <threshold
# test passed

#No bias by unduly influential datapoints:      YES
#    P4-stdzd resids~leverage:                  <cooks threshold eg. >1 or sample-size/4
#    leverage                                   <2p/n
# 40 cases in poisson with larger cooks distance reduced to 6 with cooks <0.1

#Independent variables measured without error:  TO BEST OF ABILITY

#! Note model is still largely overdispersed

#-------Model report
# Multiple Comparisons of Means: Tukey Contrasts
#term                    peramter+/-se    test-stat(wald z)   d.f.      P
#      FH - C == 0    -0.2386     0.1119  -2.132                     0.0829 .  
#      FSH - C == 0   -0.5649     0.1286  -4.394                     <0.001 ***
#      FSH - FH == 0  -0.3263     0.1235  -2.641                      0.0224 *  

# Hypothesis interpretation:  Maternal heatwaves do not transgenerationally decrease the reproductive fitness of sons however, son produced from heatwaved stored sperm do have decreased reproductive fitness 





#### FEMALE  TRANSGEN MATE SUMMARY ######################################################################################
#Barnard et al., 2007 and Thomas 2015 as references
#---------Hypothesis
# The relative effect of a single 5d 42oC female or stored sperm heatwaves on the number of females, in a series of 13, their sons can fertilise 


#Response variable (dependent):           Number of females producing offspring; (count +/- positive skew) 

#Global Fixed variables (independent):          
#    Categorical                          temperature (control, females heated 42, stored sperm 42)  
#    Covariates                           NA
#    Non-linear terms                     NA
#    Interactions                         NA

#Random terms:                            (oviblock)

#---------Misc

# Simple analysis:                       ANOVA      
# Non-para:                              krusskal wallis
# Plot:                                  Notched Box / Univariate scatter  

#------Model report Simple stats

# More powerful shapiro shows normality in all groups and variances homogenous so 2+ sample ANOVA can be used

# summary(aov(transgenmaternalsum$Count.matings~ transgenmaternalsum$Maternal.temperature.oC)) 
#                                               Df Sum Sq Mean Sq  F value Pr(>F)   
# transgenmaternalsum$Maternal.temperature.oC   2   72.5   36.24    6.56 0.0021 **
#  Residuals                                  100  552.5    5.52                  

# Crawley 2007 and thomas 2015 recommends TukeyHSD for pairwise comparisons
# TukeyHSD(aov(transgenmaternalsum$Count.matings~ transgenmaternalsum$Maternal.temperature.oC))
#           diff       lwr        upr     p adj
# FH-C   -0.2857143 -1.665084  1.0936559 0.8749051
# FSH-C  -1.9411765 -3.382649 -0.4997039 0.0051311
# FSH-FH -1.6554622 -2.945509 -0.3654152 0.0080981

# Heating females does does not reduce the number of females fertilised by sons however, heating stored sperm does 


#---------GLM Model refinement

# # Overdispersion parameter:                1.07-1.35 for poisson (log) 

# Error family (+link function):           poisson (log)

# Model refinement method(s):              AICc comparison, anova(), lmtest()

# Most plausible/final model(s):           glm(formula = Count.matings ~ Maternal.temperature.oC, family = poisson(link = "log"), data = transgenmaternalsum)

# AIC(c)                                   489.6915 
# Model R^2/Adj R^2:                       NA for poiss, quasi-p and -ve binom
# pseudoR^2/deviance:                      0.09675123
# Model significance:                      from analysis of deviance lrtest(model, nullmodel): X^2 (2,100) = 14.415    p = 0.0007412 

# Hypothesis interpretation:  Heatwave treatments transgenerationally reduce the frequency of fertilisations sons can achieve


#--------Model assumptions

#Residuals normally distributed:               NOT NECASSARY SOMEWHAT NORMAL
#    hist of stdzd resids (u=0, sd=1):         symmetrical bell shaped 
#    p1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal  
#    p2-Q-Q plot w/ stdzd resids:              straight line 
#    p3-scale-location w/sqrt resid~fitted:    no pattern
#    s-w/k-s resid tests:                      NS
# Q-Q points pull down on left, devresid histogram relatively normal and KS passed 

#Homogeneity of variance:                      NOT NECASSARY SOMEWHAT SIMILAR VARIANCE
#    P1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal
#    sresid~ind vars:                          no pattern
#    fligner/levenes tests:                    NS
#  P1 no wedge sligt slope, test failed

#Independence of variables (no-collinearity):  1 FACTOR SO YES
#    pairwise scatterplots and correlations:   r<0.3
#    sresid~ind vars:                          no collinearity
#    variance inflation factors (VIFs):        values <3
# Only one independent variable

#No serial auto-correlation                     YES 
#    durbinwatson test:                         NS
#    Auto-cor function (ACF):                   <threshold
# test passed

#No bias by unduly influential datapoints:      YES
#    P4-stdzd resids~leverage:                  <cooks threshold eg. >1 or sample-size/4
#    leverage                                   <2p/n
# 5 cases but cooks <0.05

#Independent variables measured without error:  TO BEST OF ABILITY


#-------Model report
# summary(glht(globalmodquasi, mcp(Maternal.temperature.oC="Tukey")))
#term                    peramter+/-se    test-stat(wald z)   d.f.      P
#      FH - C == 0   -0.04879    0.10168      -0.480                  0.88050   
#      FSH - C == 0  -0.39087    0.11584      -3.374                  0.00225 **
#      FSH - FH == 0 -0.34208    0.10683      -3.202                  0.00377 **


# Hypothesis interpretation:  Maternal heatwaves do not transgenerationally decrease frequency of fertilisation sons achieve however, sons produced from heatwaved stored sperm do have decreased frequency

