
#check what is in r console from last time
ls()
#remove everything
rm(list=ls())

#check what working directory is set
getwd()
#setting to required one where data is based
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave")

#reading in data 
gfpheatsimple <- read.csv("heatwavegfpdistribution.csv", header = TRUE) 



######## SIMPLE GFP FOR PAPER MANIPULATING DATA LONG TO SHORT FORMAT: AND CHECKING IT WORKS  ##############

names(gfpheatsimple)

str(gfpheatsimple)
# 'data.frame':	46 obs. of  3 variables:
# $ Replicate                     : int  1 2 3 4 5 6 7 8 9 10 ... # male ID
# $ composite.treatment           : Factor w/ 2 levels "CS24","HS24": 1 1 1 1 1 1 1 1 1 1 ... # male heatwave status. freezing female tract at 24 hrs after mating
# $ x10.total.tract.mean.intensity: num  3073 6803 2229 1496 2502 ... # mean fluoresence value indicating sperm density in the tract

gfpheatsimple$x10.total.tract.mean.intensity.x10.to.3<-gfpheatsimple$x10.total.tract.mean.intensity/1000

###### PLOTTING DATA ###########

library(ggplot2)

temp <- expression(paste('Temperature (',degree,'C)',sep=''))

#! PAPER PLOT

simplemeanintensity<-ggplot(gfpheatsimple, aes(x=composite.treatment, y=x10.total.tract.mean.intensity.x10.to.3, fill= composite.treatment)) +  #change fill to colour is just lines and change 'scale_fill_manual' below to scale_color_manual
     geom_boxplot(notch=F,  #change to F if want to get rid of notchs
                  outlier.shape= NA, #shape of the outlier (hashtag out if dont want outliers marked)
                  width=0.75, #0.5 for publication
                  lwd=1, #0.5 for publication
                  fatten=1, #0.5 for publication 
                  color="black",
                  position=position_dodge(0.5)) + #size of the outlier (hashtag out if dont want outliers marked) 
     stat_summary(fun.y="mean", geom= "point", size=4, position=position_dodge(1), color="black") + # size=1.5 for publication
     scale_fill_manual(values=c("ghostwhite", "firebrick1"), # changes the colour of the bars #! tomato for publication
                       name = temp, #adds in temperature label on the legend
                       breaks = c("CS24","HS24"), #the order listed in the legend
                       label = c("Control","Heatwave")) + #how things are labeled in the lgend
     scale_colour_manual(values=c("black", "black")) +
     geom_point(shape=1, size=1.5, position=position_jitter(0.25)) + #0.2 jitterr for publication
     labs (x= "", y= expression(bold(atop("Mean sperm fluoresence ", paste(bold("in"~female~tract~x~10^{3})))))) +  #adding title to the x axis and y axis
     scale_x_discrete(breaks=c("CS24","HS24"), #the order of the variables on the x axis
                      labels=c("Control","Heatwave")) + # the names on the x axis
     coord_cartesian(ylim=c(-0.2, 10.2)) + #set axis limits
     scale_y_continuous(breaks=seq(0, 10, 2), #ticks from 0 to 16000 and show number every 16000
                        expand = c(0, 0)) + #cuts the axis off at 0
     theme_classic() + #the theme of the whole plot 
     theme(
          #legend.position="none", #get rid of the hashtag to get rid of legend
          panel.grid.major=element_blank(), #getting rid of majorgridlines
          panel.border=element_blank(),     #getting rid of minorgridlines  
          panel.grid.minor=element_blank(),
          axis.line.x=element_line(color="black", size = 1.5), #1 for publication
          axis.line.y=element_line(color="black", size = 1.5), #1 for publication
          axis.text.x=element_text(face = "bold",color="black", size=16), # size 12 for publication
          axis.text.y=element_text(face = "bold",color="black", size=20), # size 12 for publication
          axis.title.x=element_text(face = "bold", size=16, color="black", margin = margin(t = 10, r = 0, b = 0, l = 0)),
          axis.title.y=element_text(face = "bold", size=16, color="black", margin = margin(t = 0, r = 7, b = 0, l = 0)),
          legend.position="none",
          panel.background=element_blank(),
          plot.background=element_rect(fill="transparent", colour = NA))


setwd("C:/Users/UEA/Desktop") 
ggsave("simplemeanintensity.png",width=4, height=6, dpi=300, bg = "transparent") # 2.5 2.5 for publication
setwd("C:/Users/UEA/Documents/Neat data/nature heatwave") 




#### SIMPLE GFP TOTAL DATA CHECK, CLEAN, DESCRIPTION AND SUMMARY #########################################################################
####### average intensity
names(gfpheatsimple)

######### ! NAT COMMS DESCIPTIVE STATS ###########

describeBy(gfpheatsimple$x10.total.tract.mean.intensity, gfpheatsimple$composite.treatment)
# $CS24
# vars  n    mean      sd  median trimmed     mad    min     max   range skew kurtosis     se
# X1    1 22 2703.14 1768.35 2365.46  2517.9 1416.85 801.48 6802.67 6001.19  0.8    -0.61 377.01
# 
# $HS24
# vars  n   mean     sd median trimmed     mad   min     max   range skew kurtosis     se
# X1    1 24 905.56 902.35 807.23  801.48 1047.98 35.37 3085.86 3050.49 0.89    -0.32 184.19

########### average intensity Normality  - Failed in all groups
# 30
hist(gfpheatsimple$x10.total.tract.mean.intensity[gfpheatsimple$composite.treatment == "CS24"], 
     main = list("Control", cex = 2), xlab = "tract.presence", ylab ="Frequency", ylim = c(0,30),
     nclass = 3) 
# 41
hist(gfpheatsimple$x10.total.tract.mean.intensity[gfpheatsimple$composite.treatment == "HS24"], 
     col = "red", density = 30, angle = 180, border = "red", 
     main = list("42", cex = 2), xlab = "tract.presence", ylab ="Frequency", ylim = c(0,30),
     nclass = 3)  # keep nclass = 10, keep scales default
# not normal

shapiro.test (gfpheatsimple$x10.total.tract.mean.intensity[gfpheatsimple$composite.treatment == "CS24"]) # W = 0.88395, p-value = 0.01439
ks.test(gfpheatsimple$x10.total.tract.mean.intensity[gfpheatsimple$composite.treatment == "CS24"], pnorm)  # D = 1, p-value = 3.331e-16
shapiro.test (gfpheatsimple$x10.total.tract.mean.intensity[gfpheatsimple$composite.treatment == "HS24"]) # W = 0.85849, p-value = 0.003155
ks.test(gfpheatsimple$x10.total.tract.mean.intensity[gfpheatsimple$composite.treatment == "HS24"], pnorm) # D = 1, p-value < 2.2e-16


########### average intensity Homogeneity of Variances - marginally passed in one
boxplot(gfpheatsimple$x10.total.tract.mean.intensity ~ gfpheatsimple$composite.treatment, ylab="tract.presence", xlab="Temperature")

bartlett.test(gfpheatsimple$x10.total.tract.mean.intensity ~ gfpheatsimple$composite.treatment) #  Bartlett's K-squared = 9.2328, df = 1, p-value = 0.002377
fligner.test(gfpheatsimple$x10.total.tract.mean.intensity ~ gfpheatsimple$composite.treatment) # Fligner-Killeen:med chi-squared = 6.7119, df = 1, p-value = 0.009577

#! need library(car)
leveneTest(gfpheatsimple$x10.total.tract.mean.intensity ~ gfpheatsimple$composite.treatment)   #Df F value Pr(>F) 1  7.4078 0.00927 **


##### SIMPLE GFP TOTAL NON-PARA RANKS TESTING


##### ave intensity
# non-norm dif-var
wilcox.test(gfpheatsimple$x10.total.tract.mean.intensity ~ gfpheatsimple$composite.treatment, exact = TRUE, conf.int = TRUE, paired = FALSE)
# W = 444, p-value = 3.267e-05
gfpheatsimple$rank<-rank(gfpheatsimple$x10.total.tract.mean.intensity) # ranking all data inter-group by ascending count 
t.test(gfpheatsimple$rank~ gfpheatsimple$composite.treatment, var.equal=FALSE)
# t = 4.8682, df = 43.999, p-value = 1.488e-05

###### SIMPLE GFP TOTAL NEW METHOD GLM ERROR STRUCTURE ###############


######### ! NAT COMMS MODEL SELECTION ###########


#### Gaussian error 
# Data is a right skewed continuous variable, fitting normal distibution does not give normal and homogenity of variance in residuals 


#### Creating a global model gaussian identity
globalmodgauss<-glm(x10.total.tract.mean.intensity ~ composite.treatment,  gaussian(link = "identity"), data=gfpheatsimple)

summary(globalmodgauss); # No R^2, AIC 799.97
AICc<-(-2*logLik(globalmodgauss))+((2*1*(1+1)/(46-1-1))); AICc # qAICc<-((-2*logLik(model1)/Theta)+((2*p*(p+1)/(n-p-1))); qAICc # AIC correcting for perameters(p) and sample size (n) # 794.
pseudoR<-(globalmodgauss$null.deviance-globalmodgauss$deviance) / globalmodgauss$null.deviance # (thomas et al., 2015)
pseudoR # 0.3053027
R2 <- cor(gfpheatsimple$x10.total.tract.mean.intensity,predict(globalmodgauss))^2; R2 # 0.3053027

# 1) Errors normally distributed? - Somewhat
sresid <- (globalmodgauss$residuals - mean(globalmodgauss$residuals))/sd(globalmodgauss$residuals);hist(sresid) 
shapiro.test(sresid);ks.test(sresid, pnorm)
par(mfrow=c(2,2)); plot(globalmodgauss);par(mfrow=c(1,1))
# P2 Q-Q points pull up on right side, sresid histogram some positive, one normality test passed 

# 2) Homogenous/homoscedasticity variance of residuals - NO
par(mfrow=c(2,2)); plot(globalmodgauss);par(mfrow=c(1,1));plot(sresid~globalmodgauss$fitted.values, pch = 20, cex = 2, cex.lab = 1.5)
plot(sresid~gfpheatsimple$composite.treatment) 
fligner.test(sresid~gfpheatsimple$composite.treatment); leveneTest(sresid ~ gfpheatsimple$composite.treatment)  

#  P1 Resids~Fitted and P3 SQRT(Resid~Fitted) show some wedging and slope. tests failed

# 3) Independences of independent variables - YES
# only 1 variable

# 4) No serial auto-correlation with time/space - YES
#! need library(car)
durbinWatsonTest(globalmodgauss) # Test passed

# 5) No bias by unduly influential datapoints - NO
influence<-influence.measures(globalmodgauss); summary(influence) 
par(mfrow=c(2,2));plot(globalmodgauss);par(mfrow=c(1,1))
# 2 cooks distances 0.1-0.2; confirmed on P4 stdzd resids~leverage,
# still below cooks threshold >1 or samplesize (49)/4 (Fox, 1991)

# 6) Independent variables measured without error - BEST OF ABILITY




#### Creating a global model gaussian log - NOT IMPROVED
globalmodgausslog<-glm(x10.total.tract.mean.intensity ~ composite.treatment,  gaussian(link = "log"), data=gfpheatsimple)

summary(globalmodgausslog); # No R^2, 799.97 

pseudoR<-(globalmodgausslog$null.deviance-globalmodgausslog$deviance) / globalmodgausslog$null.deviance # (thomas et al., 2015)
pseudoR # 0.3053027

# 1) Errors normally distributed? - BETTER decreased positive skew
sresid <- (globalmodgausslog$residuals - mean(globalmodgausslog$residuals))/sd(globalmodgausslog$residuals);hist(sresid) 
shapiro.test(sresid);ks.test(sresid, pnorm)
par(mfrow=c(2,2)); plot(globalmodgausslog);par(mfrow=c(1,1))
# P2 Q-Q points pull up on rileftgth side, sresid histogram some negative skew, normality tests p<0.01. 


# 2) Homogenous/homoscedasticity variance of residuals - YES beter
par(mfrow=c(2,2)); plot(globalmodgausslog);par(mfrow=c(1,1));plot(sresid~globalmodgausslog$fitted.values, pch = 20, cex = 2, cex.lab = 1.5)
plot(sresid~gfpheatsimple$composite.treatment) 
fligner.test(sresid~gfpheatsimple$composite.treatment); leveneTest(sresid ~ gfpheatsimple$composite.treatment)  
#  P1 Resids~Fitted and P3 SQRT(Resid~Fitted) show some wedging and slope. tests passed

# 3) Independences of independent variables - YES
# only 1 variable

# 4) No serial auto-correlation with time/space - YES
#! need library(car)
durbinWatsonTest(globalmodgausslog) # Test passed

# 5) No bias by unduly influential datapoints - YES
influence<-influence.measures(globalmodgausslog); summary(influence) 
par(mfrow=c(2,2));plot(globalmodgausslog);par(mfrow=c(1,1))
# 2 cooks distances 0.1-0.2; confirmed on P4 stdzd resids~leverage,
# still below cooks threshold >1 or samplesize (49)/4 (Fox, 1991)

# 6) Independent variables measured without error - BEST OF ABILITY



#### Creating a global model gamma 
globalmodgamma<-glm(x10.total.tract.mean.intensity ~ composite.treatment,  Gamma(link = "inverse"), data=gfpheatsimple)

summary(globalmodgamma); # No R^2, 772. 

pseudoR<-(globalmodgamma$null.deviance-globalmodgamma$deviance) / globalmodgamma$null.deviance # (thomas et al., 2015)
pseudoR # 0.2212256

# 1) Errors normally distributed? - SIMILAR decreased positive skew
sresid <- (globalmodgamma$residuals - mean(globalmodgamma$residuals))/sd(globalmodgamma$residuals);hist(sresid) 
shapiro.test(sresid);ks.test(sresid, pnorm)
par(mfrow=c(2,2)); plot(globalmodgamma);par(mfrow=c(1,1))
# P2 Q-Q points less pull, sresid histogram some negative skew, normality tests p<0.01. 


# 2) Homogenous/homoscedasticity variance of residuals - WORSE
par(mfrow=c(2,2)); plot(globalmodgamma);par(mfrow=c(1,1));plot(sresid~globalmodgamma$fitted.values, pch = 20, cex = 2, cex.lab = 1.5)
plot(sresid~gfpheatsimple$composite.treatment) 
fligner.test(sresid~gfpheatsimple$composite.treatment); leveneTest(sresid ~ gfpheatsimple$composite.treatment)  
#  P1 Resids~Fitted and P3 SQRT(Resid~Fitted) show big wedging and slope. tests badly failed

# 3) Independences of independent variables - YES
# only 1 variable

# 4) No serial auto-correlation with time/space - NO
#! need library(car)
durbinWatsonTest(globalmodgamma) # Test failed

# 5) No bias by unduly influential datapoints - BETTER
influence<-influence.measures(globalmodgamma); summary(influence) 
par(mfrow=c(2,2));plot(globalmodgamma);par(mfrow=c(1,1))
# none

# 6) Independent variables measured without error - BEST OF ABILITY




###  MODEL REFINEMENT
names(globalmodgausslog)#lists the possible objects to pull out

#! link functions did little and gamma seems to have worse fit, sticking with original 

## Global model
globalmodgausslog<-glm(x10.total.tract.mean.intensity ~ composite.treatment,  gaussian(link = "log"), data=gfpheatsimple)

## model refinement

######### ! NAT COMMS MODEL SIGNIFICANCE ###########

drop1(globalmodgausslog, test= "F")
# Single term deletions
# 
# Model:
#      x10.total.tract.mean.intensity ~ composite.treatment
#                      Df  Deviance    AIC F value    Pr(>F)    
# <none>                  84395871 799.97                      
# composite.treatment  1 121485825 814.73  19.337 6.847e-05 ***


pseudoR<-(globalmodgausslog$null.deviance-globalmodgausslog$deviance) / globalmodgausslog$null.deviance # (thomas et al., 2015)
pseudoR # 0.3053027

######### ! NAT COMMS MODEL POST HOC ###########

summary(globalmodgausslog)
# Call:
#      glm(formula = x10.total.tract.mean.intensity ~ composite.treatment, 
#          family = gaussian(link = "log"), data = gfpheatsimple)
# 
# Deviance Residuals: 
#      Min       1Q   Median       3Q      Max  
# -1901.7   -866.0   -140.1    354.1   4099.5  
# 
# Coefficients:
#                               Estimate Std. Error t value Pr(>|t|)    
#      (Intercept)               7.9022     0.1092  72.342  < 2e-16 ***
#      composite.treatmentHS24  -1.0936     0.3307  -3.307  0.00189 ** 
#      ---
#      Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 
# (Dispersion parameter for gaussian family taken to be 1918088)
# 
# Null deviance: 121485825  on 45  degrees of freedom
# Residual deviance:  84395871  on 44  degrees of freedom
# AIC: 799.97
# 
# Number of Fisher Scoring iterations: 6

exp(7.9022) #2703.
exp(7.9022-1.0936) #905.

describeBy(gfpheatsimple$x10.total.tract.mean.intensity, gfpheatsimple$composite.treatment) 

1-(905.6021/2703.223) # 66% reduction heat

