

#check what is in r console from last time
ls()
#remove everything
rm(list=ls())

#check what working directory is set
getwd()
#setting to required one where data is based
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")

#reading in data 
competition7D <- read.csv("heatwavecompetition.csv", header = TRUE) # 7 day data set 



#### DATA CHECK, CLEAN, DESCRIPTION AND SUMMARY #########################################################################

competition7D # produces all whole dataframe - no NAs, data to 2 or 3 d.p., no irregularities/anomalies

#data checks
str(competition7D)
# 'data.frame':	116 obs. of  5 variables:
#      $ Replicate     : int  1 2 3 4 5 6 7 8 9 10 ...
# $ Temperature.oC: int  30 30 30 30 30 30 30 30 30 30 ...
# $ THL.count.7D  : int  80 76 59 63 65 70 111 112 85 77 ...
# $ Rdhd.count.7D : int  2 42 67 29 4 43 22 1 33 16 ...
# $ Focal.male.P2 : num  0.98 0.64 0.47 0.68 0.94 0.62 0.83 0.99 0.72 0.83 ...

summary(competition7D)
is.na(competition7D)  # no nas


competition7D$Cbindpaternity<-cbind(competition7D$THL.count, competition7D$Rdhd.count) # vector for model of success/fail hatch
competition7D$Temperature.oC <- as.factor(competition7D$Temperature.oC) 


str(competition7D)


###################### NEAT PLOTS #########################################
names(competition7D)


library(ggplot2)


temp <- expression(paste('Temperature (',degree,'C)')) #the temperature label with degrees sign # ~ is a space
label30oC <- expression(""*30~degree*C)
label40oC <- expression(""*40~degree*C)




############# ! NAT COMM PLOT 2b #########################


graphcomp<-ggplot(subset(competition7D, Temperature.oC %in% c("30" , "42")), aes(x=Temperature.oC, y=Focal.male.P2, fill= Temperature.oC)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.5,
                  lwd=0.5,
                  fatten=0.5,
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked)
     stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + 
     scale_fill_manual(values=c("ghostwhite", "tomato"), # changes the colour of the bars
                       name = temp, #adds in temperature label on the legend
                       breaks = c("30",  "42"), #the order listed in the legend
                       label = c("30",  "42")) + #how things are labeled in the lgend
     geom_jitter(shape=1, size=1.5, position=position_jitter(0.15)) + #so all the data points are not ontop of each other
     labs(x= expression(bold(atop("Male treatment", paste("")))), y= expression(bold(atop("Proportion of offspring sired", paste("treatment male"))))) +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("30",  "42"), #the order of the variables on the x axis
                      labels=c("Control",  "Heatwave")) + # the names on the x axis
     coord_cartesian(ylim=c(-0.02, 1.02)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 1, 0.2), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(
           #legend.position="none", #get rid of the hashtag to get rid of legend
           panel.grid.major=element_blank(), #getting rid of majorgridlines
           panel.border=element_blank(),     #getting rid of minorgridlines  
           panel.grid.minor=element_blank(),
           axis.line.x=element_line(color="black", size = 1),
           axis.line.y=element_line(color="black", size = 1),
           axis.text.x=element_text(color="black", size=12),
           axis.text.y=element_text(color="black", size=12),
           axis.title.x=element_text(face = "bold", size=12, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
           axis.title.y=element_text(face = "bold", size=12, color="black", margin = margin(t = 0, r = 10, b = 0, l = 0)),
           legend.position="none",
           panel.background=element_blank(),
           plot.background=element_rect(fill="transparent", colour = NA))

setwd("C:/Users/UEA/Desktop")
ggsave("graphcomp.png",width=2.8, height=4, dpi=300, bg = "transparent")
setwd("C:/Users/UEA/Documents/Dissertation and phd/d- data for phd/R analysis/main/competition")




############################################################################################################################################################## PLOTTING RAW DATA DISTRIBUTION AND TESTING NORMALITY AND HOMOGENIETY OF VARIANCES ###############################



#### ! library(psych)
#gives you vars  n, mean, sd,  median,  trimmed, mad, min, max, range, skew, kurtosis, se
describeBy(competition7D$Focal.male.P2, competition7D$Temperature.oC)
# $`30`
# vars     n mean  sd median trimmed  mad min max range skew kurtosis   se
# X1    1 65 0.77 0.2   0.81    0.79 0.19   0   1     1 -1.3     2.05 0.02
# 
# $`42`
# vars     n mean   sd median trimmed mad min max range skew kurtosis   se
# X1    1 51 0.33 0.26    0.3    0.31 0.3   0   1     1 0.51    -0.77 0.04



### in base
# 30
hist(competition7D$Focal.male.P2[competition7D$Temperature.oC == "30"], 
     main = list("Control", cex = 2), xlab = "paternity share", ylab ="Frequency", ylim = c(0,20),
     nclass = 10) 

# 42
hist(competition7D$Focal.male.P2[competition7D$Temperature.oC == "42"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("42", cex = 2), xlab = "paternity share", ylab ="Frequency", ylim = c(0,20),
     nclass = 10)  # keep nclass = 10, keep scales default
# 30 right skew, 42 left

###### plotting differences
# base boxplots of data distribution grouped by temperature
boxplot(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC, ylab="10 day egg count", xlab="Temperature")
#heats more variance

# notice plot has automatically produced a scatterplot
# if the mean.sperm.count is made as an integar

########### Normality - Failed in all groups
shapiro.test (competition7D$Focal.male.P2[competition7D$Temperature.oC == "30"]) # W = 0.89006, p-value = 3.029e-05
ks.test(competition7D$Focal.male.P2[competition7D$Temperature.oC == "30"], pnorm)  # D = 0.61928, p-value < 2.2e-16
shapiro.test (competition7D$Focal.male.P2[competition7D$Temperature.oC == "42"]) # W = 0.92884, p-value = 0.004494
ks.test(competition7D$Focal.male.P2[competition7D$Temperature.oC == "42"], pnorm) # D = 0.5, p-value = 1.685e-11

########### Homogeneity of Variances - Failed
bartlett.test(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC) # Bartlett's K-squared = 4.1242, df = 1, p-value = 0.04227
fligner.test(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC) # FFligner-Killeen:med chi-squared = 10.033, df = 1, p-value = 0.001538

#! need library(car)
leveneTest(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC)   #Df F value Pr(>F) 1  8.8302 0.003615 **







####################################################################################################################################################################  OLD METHOD: USE NORMAL > TRY AND TRANSFORM TO NORMAL > NON PARAMETRIC ##################################### 

########## Transformation with just hist and shapiro 
# comparing plots and tests before and after
# two other methods: by(df$response, df$treatment, shapiro.test) # with(df, tapply(response, treatment, shapiro.test))
## RIGHT SKEW FIXING
par(mfrow=c(2,2)) #plotting the graphs next to get other in a 4x4 gird
#raw data not normal; 30  -ve skew, 42 platykurotosis
hist (sqrt(competition7D$Focal.male.P2[competition7D$Temperature.oC == "30"]))

hist (sqrt(competition7D$Focal.male.P2[competition7D$Temperature.oC == "42"]))
shapiro.test (sqrt(competition7D$Focal.male.P2[competition7D$Temperature.oC == "30"]))

shapiro.test (sqrt(competition7D$Focal.male.P2[competition7D$Temperature.oC == "42"]))
#sqrt data 30 leptokurtotic,   42 -ve skew
hist (log10(competition7D$Focal.male.P2[competition7D$Temperature.oC == "30"]+0.01))

hist (log10(competition7D$Focal.male.P2[competition7D$Temperature.oC == "42"]+0.01))
shapiro.test (log10(competition7D$Focal.male.P2[competition7D$Temperature.oC == "30"]+0.01))

shapiro.test (log10(competition7D$Focal.male.P2[competition7D$Temperature.oC == "42"]+0.01))
par(mfrow=c(1,1))


# As the data is both not normal and not homogenous in variance in groups there is debate over the best method so both tried



# data from above are not normal cannot be transformed
# varaiances also not equal
bartlett.test(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC) # Bartlett's K-squared = 4.1242, df = 1, p-value = 0.04227
fligner.test(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC) # FFligner-Killeen:med chi-squared = 10.033, df = 1, p-value = 0.001538
#! need library(car)
leveneTest(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC)   #Df F value Pr(>F) 1  8.8302 0.003615 **


# method 1 non parametric
wilcox.test(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 2982.5, p-value = 1.713e-13

# method 2 welch t-test on ranks
competition7D$rank<-rank(competition7D$Focal.male.P2)
t.test(competition7D$rank ~ competition7D$Temperature.oC, var.equal=FALSE, exact = TRUE, conf.int = TRUE, paired = FALSE)
# t = 10.167, df = 109.75, p-value < 2.2e-16




####################################################################################################################################################################NEW METHOD: USE GLM WITH NON-GAUSSIAN ERROR STRUCTURE######################################################



################## ! NAT COMMS DESCIPTIVE STATISTICS ####################

names(competition7D)


#### USEFUL PACKAGES
library(car); library(MASS); library (lme4); library (nlme) 

library(glmmADMB)#  glmmADMB()

describeBy(competition7D$Focal.male.P2, competition7D$Temperature.oC)
# $`30`
# vars  n mean  sd median trimmed  mad min max range skew kurtosis   se
# X1    1 65 0.77 0.2   0.81    0.79 0.19   0   1     1 -1.3     2.05 0.02
# 
# $`42`
# vars  n mean   sd median trimmed mad min max range skew kurtosis   se
# X1    1 51 0.33 0.26    0.3    0.31 0.3   0   1     1 0.51    -0.77 0.04




################## ! NAT COMM MODEL SELECTION ####################




#### Binomial family error structures
# As data is proportion bound at 0 and 1 fitting normal distibution does not give normal and homogenity of variance in residuals 

#! note only proportions without total so I cannot use cbind; raw data gained from Matt, can now use cbind

# Creating a global model
globalmodbinom<-glm(Cbindpaternity ~ Temperature.oC, binomial(link = "logit"), data=competition7D)
globalmodbibnomLOG<-glm(Cbindpaternity ~ Temperature.oC, binomial(link = "log"), data=competition7D)


summary(globalmodbinom); summary(globalmodbibnomLOG);  # No R^2, AIC given
# AIC: 3814.9, AIC: 3814.9 # link change seem to do little
pseudoR<-(globalmodbinom$null.deviance-globalmodbinom$deviance) / globalmodbinom$null.deviance # (thomas et al., 2015)
pseudoR # 0.4414073
pseudoR<-(globalmodbibnomLOG$null.deviance-globalmodbibnomLOG$deviance) / globalmodbibnomLOG$null.deviance # (thomas et al., 2015)
pseudoR # 0.4414073
# seems changing the link does nothing to R^2 or AIC
# poisson explains more variation in data than gaussian

AICc<-(-2*logLik(globalmodbinom))+((2*1*(1+1)/(115-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 3810.894
qAICc<-(-2*logLik(globalmodbinom)/29.33022)+((2*1*(1+1)/(115-1-1))); qAICc # 129.9648

## Overdispersion check
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
theta<-globalmodbinom$deviance/globalmodbinom$df.residual; theta #dispersion perameter (thomas et al 2015) how much variation left unexplained after fitting distribution # theta = 29.33022,  overdispersed is >1.5 is overdispersion.
hist(competition7D$Cbindpaternity) # looks like many 0s. recommendation of using quasi binomial

globalmodquasibin<-glm(Cbindpaternity ~ Temperature.oC, quasibinomial(link = "logit"), data=competition7D)
globalmodqausibibinmLOG<-glm(Cbindpaternity ~ Temperature.oC, quasibinomial(link = "log"), data=competition7D)
globalmodqausibibinmID<-glm(Cbindpaternity ~ Temperature.oC, quasibinomial(link = "identity"), data=competition7D)

summary(globalmodquasibin); summary(globalmodqausibibinmLOG); summary(globalmodqausibibinmID)
pseudoR<-(globalmodquasibin$null.deviance-globalmodquasibin$deviance) / globalmodquasibin$null.deviance # (thomas et al., 2015)
pseudoR # 0.4414073
pseudoR<-(globalmodqausibibinmLOG$null.deviance-globalmodqausibibinmLOG$deviance) / globalmodqausibibinmLOG$null.deviance # (thomas et al., 2015)
pseudoR # 0.4414073
pseudoR<-(globalmodqausibibinmID$null.deviance-globalmodqausibibinmID$deviance) / globalmodqausibibinmID$null.deviance # (thomas et al., 2015)
pseudoR # 0.4414073
# all produce the same output and pseudoR



# 1) Errors normally distributed? - NOT NECASSARY BUT NOT IMPROVED

# binomial
devresid<-resid(globalmodbinom, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# quasibinomial
devresid<-resid(globalmodquasibin, type = "deviance"); hist(devresid)
shapiro.test(devresid);ks.test(devresid, pnorm)
qqnorm(devresid,cex=1.8,pch=20); qqline(devresid,lty=2,lwd=2)
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
# quasibinomial Q-Q simlar fluctuation,  devresid histogram spread  and KS test similarly failed

# 2) Homogenous/homoscedasticity variance of residuals - NOT NECASSARY BUT NOT IMPROVED

# binomial
devresid<-resid(globalmodbinom, type = "deviance")
plot(devresid ~ globalmodbinom$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~competition7D$Temperature.oC)
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))

# quasibinomial
devresid<-resid(globalmodquasibin, type = "deviance")
plot(devresid ~ globalmodquasibin$fitted.values, pch = 20, cex = 1, cex.lab = 1.5)
fligner.test(devresid~competition7D$Temperature.oC)
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
#  P1 some slope no wedge, test failed

# 3) Independences of independent variables - 1 FACTOR SO YES
# Only one independent variable

# 4) No serial auto-correlation with time/space - YES
#! need library(car)
durbinWatsonTest(globalmodquasibin) # test passed

# 5) No bias by unduly influential datapoints - YES

# binomial
par(mfrow=c(2,2)); plot(globalmodbinom);par(mfrow=c(1,1))
influence<-influence.measures(globalmodbinom); summary(influence)

# quasibinomial
par(mfrow=c(2,2)); plot(globalmodquasibin);par(mfrow=c(1,1))
influence<-influence.measures(globalmodquasibin); summary(influence)
# 9 cases in poisson with larger cooks distance fixed quasi poisson

# 6) Independent variables measured without error - BEST OF ABILITY









###  MODEL REFINEMENT
globalmodquasibin<-glm(Cbindpaternity ~ Temperature.oC, quasibinomial(link = "logit"), data=competition7D)
pseudoR<-(globalmodquasibin$null.deviance-globalmodquasibin$deviance) / globalmodquasibin$null.deviance; pseudoR # 0.4414073

nullmodquasibin<-glm(Cbindpaternity ~ 1, quasibinomial(link = "logit"), data=competition7D)
pseudoR<-(nullmodquasibin$null.deviance-nullmodquasibin$deviance) / nullmodquasibin$null.deviance; pseudoR # 0

### Global model explains offspring significantly more than null model because
# anova/lrtest(model, nullmodel) and aic functions do not work with quasibinomial
anova(globalmodquasibin, test = "Chisq")
# Analysis of Deviance Table
# Model: quasibinomial, link: logit
# Response: Cbindpaternity
# Terms added sequentially (first to last)
#                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
# NULL                             115     5985.8              
# Temperature.oC  1   2642.2       114     3343.6 < 2.2e-16 ***



################## ! NAT COMMS MODEL SIGNIFICANCE ####################


drop1(globalmodquasibin, test = "Chi")
# Single term deletions
# Model:
#      Cbindpaternity ~ Temperature.oC
#                Df  Deviance  scaled dev.  Pr(>Chi)    
# <none>              3343.6                          
# Temperature.oC  1   5985.8      97.329 < 2.2e-16 ***


globalmodquasibin # shortened with basic information # redundant


################## ! THESIS MODEL POST HOC ####################


summary(globalmodquasibin)
# Call:
#      glm(formula = Cbindpaternity ~ Temperature.oC, family = quasibinomial(link = "logit"), 
#          data = competition7Dno40)
# 
# Deviance Residuals: 
#      Min        1Q    Median        3Q       Max  
# -18.1728   -4.0122    0.4477    4.1504   15.2860  
# 
# Coefficients:
#                         Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)        1.2143     0.1487   8.166 4.81e-13 ***
#      Temperature.oC42  -1.9935     0.2140  -9.313 1.11e-15 ***
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# (Dispersion parameter for quasibinomial family taken to be 27.14704)
# 
# Null deviance: 5985.8  on 115  degrees of freedom
# Residual deviance: 3343.6  on 114  degrees of freedom
# AIC: NA
# 
# Number of Fisher Scoring iterations: 4

# estimate logit look ok
exp(1.2143)/ (1+ exp(1.2143)) #0.77
exp(1.2143-1.9935)/ (1+ exp(1.2143-1.9935)) # 0.3144923





####################  HEATWAVE COMPETITION SUMMARY ######################################################################################
#Barnard et al., 2007 and Thomas 2015 as references
#---------Hypothesis---------------- 
# The relative effect of 5d 42oC male heatwave on the share of paternity over 7D oviposition he can attain by pairing with a female previously mated by a control female.  

#Response variable (dependent):           offspring sired by focal male;  (not cbind proportion)

#Global Fixed variables (independent):          
#    Categorical                          temperature (30, (40), 42)  
#    Covariates                           NA
#    Non-linear terms                     NA
#    Interactions                         NA

#Random terms:                            (date) (oviblock)

#---------Misc-------------------

# Simple analysis:                       anova but not normal/equal var     
# Non-para:                              kurskall wallis
# Plot:                                  Notched Box / Univariate scatter  

#------Model report Simple stats-------------------
# As the data is both not normal and not homogenous in variance in groups there is debate over the best method so both tried

# 1)kruskal.test(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC)
# Kruskal-Wallis chi-squared =  58.849, df = 2, p-value = 1.664e-13
# Kruskalmc(competition7D$Focal.male.P2 ~ competition7D$Temperature.oC, probs = 0.05, cont=NULL)
#             obs.dif critical.dif difference
# 30-40 13.40819     21.63750      FALSE
# 30-42 65.26561     21.00721       TRUE
# 40-42 51.85742     22.83511       TRUE

#2)oneway.test(competition7D$rank~ competition7D$Temperature.oC, var.equal=FALSE)
# F = 54.459, num df = 2.000, denom df = 97.644, p-value < 2.2e-16
#posthocTGH(y=competition7D$rank, x=competition7D$Temperature.oC, method="games-howell") 
#     t   df       p
# 30:40  1.7  83 2.1e-01
# 30:42 10.2 110 8.3e-14
# 40:42  6.4  83 2.2e-08


##---------GLM Model refinement-------------------

# Error family (+link function):           cbind binomial (logit)

# Model refinement method(s):              AICc comparison, anova(), drop1()

# Most plausible/final model(s):           bindpaternity ~ Temperature.oC, family = quasibinomial(link = "logit"), 
#          data = competition7Dno40

# AIC(c)                                NA for quasibinomial (3810.894 for binomial)
# Model R^2/Adj R^2:                    NA
# pseudoR^2/deviance:                   0.4414073
# Model significance:                   from analysis of deviance lrtest(nullhardening0pabinom, hardening0pabinom):  
# X^2 (1, 115/ 1, 114) = 2642.2 p <0.001


# Hypothesis interpretation:  
# 42oC heatwaves reduce the ability of males to gain paternity in sperm competition with other males

#                Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
# NULL                             115     5985.8              
# Temperature.oC  1   2642.2       114     3343.6 < 2.2e-16 ***


#-------Model report-----------------------

#term                    peramter+/-se    test-stat(wald z)   d.f.      P
# (Intercept)            1.2143     0.1487   8.166                   4.81e-13 ***
# Temperature.oC42      -1.9935     0.2140  -9.313                   1.11e-15 ***

#--------Model assumptions------------------

#Residuals normally distributed:               NOT NECASSARY BUT NOT IMPROVED
#    hist of stdzd resids (u=0, sd=1):         symmetrical bell shaped 
#    p1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal  
#    p2-Q-Q plot w/ stdzd resids:              straight line 
#    p3-scale-location w/sqrt resid~fitted:    no pattern
#    s-w/k-s resid tests:                      NS
# quasibinomial Q-Q simlar fluctuation,  devresid histogram spread  and KS test similarly failed

#Homogeneity of variance:                      NOT NECASSARY BUT NOT IMPROVED
#    P1-resids(ab difs obs-mod)~fitted:        no pattern/horizontal
#    sresid~ind vars:                          no pattern
#    fligner/levenes tests:                    NS
#  P1 some slope no wedge, test failed

#Independence of variables (no-collinearity):  1 FACTOR SO YES
#    pairwise scatterplots and correlations:   r<0.3
#    sresid~ind vars:                          no collinearity
#    variance inflation factors (VIFs):        values <3
# Only one independent variable

#No serial auto-correlation                     YES
#    durbinwatson test:                         NS
#    Auto-cor function (ACF):                   <threshold


#No bias by unduly influential datapoints:      YES IN QUASI BINOMIAL
#    P4-stdzd resids~leverage:                  <cooks threshold eg. >1 or sample-size/4
#    leverage                                   <2p/n


#Independent variables measured without error:  TO BEST OF ABILITY
