## LOAD PACKAGES

library(readxl)
library(ggplot2)
library(tidyr)
library(dplyr)
library(lme4)
library(lmerTest)
library(bestNormalize)
library(corrplot)
library(ggfortify) 
library(ggbiplot)
library(olsrr)
library(rptR)
library(patchwork)
library(devtools)
library(bannerCommenter)
library(car)



#######################################################
#### Part 1 -- Repeatability  #########################
#######################################################

## NOTE: FISH WITH DAMAGED BRAIN REMOVED FROM EXCEL FILE: F21. 

############ Outliers/normal? ###############
FBrain<-read_excel("/brainsize_and_matechoice_data.xlsx",sheet=1)

FBrain$Fish_ID<-as.factor(FBrain$Fish_ID)
FBrain$Sex<-as.factor(FBrain$Sex)

##Log all continuous data
FBrain$log_SL_T<-log(FBrain$SL_T)
FBrain$log_BrA<-log(FBrain$BrA)
FBrain$log_BrW<-log(FBrain$BrW)
FBrain$log_Sum<-log(FBrain$Sum)
FBrain$log_TelVol<-log(FBrain$TelVol)
FBrain$log_OTVol<-log(FBrain$OTVol)
FBrain$log_CerVol<-log(FBrain$CerVol)
FBrain$log_DMVol<-log(FBrain$DMVol)
FBrain$log_OBVol<-log(FBrain$OBVol)
FBrain$log_HypVol<-log(FBrain$HypVol)
FBrain$log_DMVol<-log(FBrain$DMVol)
FBrain$log_BrainWt<-log(FBrain$BrainWt)
FBrain$log_OT_Area<-log(FBrain$OT_Area)
FBrain$log_Tel_Area<-log(FBrain$Tel_Area)

##normally distributed?
hist(FBrain$log_BrA) 
hist(FBrain$log_BrainWt) 
hist(FBrain$log_Sum) 

#outliers

plot(BrA~SL_T, data=FBrain)
##One outlier - ~ 6.9mm2 brain area. Remove from analysis. ID = F10.
ß
FBraino <- FBrain[-c(10),]


############ Fixation/ wash time effect? #################

Fixed1 <- lm(log_Sum ~ Fixing_Time + log_SL_T, data=FBraino)
car::Anova(Fixed1) 
##No effect of fixation time on brain volume controlling for log SL
Fixed2 <- lm(log_Sum ~ Wash_2 + log_SL_T, data=FBraino)
car::Anova(Fixed2)
##No effect of washing 2 duration on brain volume controlling for log SL 

Fixed3 <- lm(log_BrainWt ~ Fixing_Time + log_SL_T, data=FBraino)
car::Anova(Fixed3)
##No effect of fixation time on brain weight controlling for log SL

Fixed4 <- lm(log_BrainWt ~ Wash_2 + log_SL_T, data=FBraino)
car::Anova(Fixed4)
##No effect of washing 2 duration on brain weight controlling for log SL 


############ Repeatability values for external + internal measurements ###############

##Note: This section uses normalised values. Each value has an average of 0 and SD of 1
##Outlier removed in excel file*****

BR_measurement2<-read_excel("/brainsize_and_matechoice_data.xlsx",sheet=2)

BR_measurement2$Fish_ID<-as.factor(BR_measurement2$Fish_ID)
BR_measurement2$Type<-as.factor(BR_measurement2$Type)
str(BR_measurement2)

##Area and Volume
Br_VAf<- select(filter(BR_measurement2, Type=="st_Sum"| Type=="st_BrA"), c("Fish_ID", "Type", "Measure"))
rep1 <- rpt(Measure ~ (1 | Fish_ID), grname = "Fish_ID", data = Br_VAf, datatype = "Gaussian", 
            nboot = 100, npermut = 0)
rep1


##Area and Mass
Br_AMf<- select(filter(BR_measurement2, Type=="st_BrA"| Type=="st_BrainWt"), c("Fish_ID", "Type", "Measure"))
rep2<-rpt(Measure ~ (1 | Fish_ID), grname = "Fish_ID", data = Br_AMf, datatype = "Gaussian", 
          nboot = 100, npermut = 0)
rep2


##Width and Volume
Br_WVf<- select(filter(BR_measurement2, Type=="st_Sum"| Type=="st_BrW"), c("Fish_ID", "Type", "Measure"))
rep3 <- rpt(Measure ~ (1 | Fish_ID), grname = "Fish_ID", data = Br_WVf, datatype = "Gaussian", 
            nboot = 100, npermut = 0)
rep3


##Width and Mass
Br_WMf<- select(filter(BR_measurement2, Type=="st_BrainWt"| Type=="st_BrW"), c("Fish_ID", "Type", "Measure"))
rep4 <- rpt(Measure ~ (1 | Fish_ID), grname = "Fish_ID", data = Br_WMf, datatype = "Gaussian", 
            nboot = 100, npermut = 0)
rep4



##Optic tectum area + optic tectum volume 
Br_OTf<- select(filter(BR_measurement2, Type=="st_OTVol"| Type=="st_OT_Area"), c("Fish_ID", "Type", "Measure"))
rep7 <- rpt(Measure ~ (1 | Fish_ID), grname = "Fish_ID", data = Br_OTf, datatype = "Gaussian", 
             nboot = 100, npermut = 0)
rep7


##Telencephalon area and telencephalon volume
Br_Telf<- select(filter(BR_measurement2, Type=="st_TelVol"| Type=="st_Tel_Area"), c("Fish_ID", "Type", "Measure"))
rep8<-rpt(Measure ~ (1 | Fish_ID), grname = "Fish_ID", data = Br_Telf, datatype = "Gaussian", 
           nboot = 100, npermut = 0)
rep8



########## Repeatability between observers ##############

BR_rpt<-read_excel("/brainsize_and_matechoice_data.xlsx",sheet=3)
str(BR_rpt)
BR_rpt$Fish_ID<-as.factor(BR_rpt$Fish_ID)
BR_rpt$Type<-as.factor(BR_rpt$Type)
BR_rpt$ID<-as.factor(BR_rpt$ID)


### AREA ###
arearep<- filter(BR_rpt, Type=="Area")

rpt(Measure ~ (1 | Fish_ID), grname = "Fish_ID", data = arearep, datatype = "Gaussian", 
    nboot = 100, npermut = 0)



### WIDTH #### 

widthrep<- filter(BR_rpt, Type=="Width")

rpt(Measure ~ (1 | Fish_ID), grname = "Fish_ID", data = widthrep, datatype = "Gaussian", 
    nboot = 100, npermut = 0)





#######################################################
###### Part 2 -- Mate Choice  #########################
#######################################################

## Load packages

library(effects)
library(sjPlot)
library(stargazer)

#Load data
mate<-read_excel("/brainsize_and_matechoice_data.xlsx", sheet4)
str(mate)

mate$F_brain_size <- factor(mate$F_brain_size)
mate$M_diff<- factor(mate$M_diff)
mate$Batch<- factor(mate$Batch)
mate$M_size_diff<-as.numeric(mate$M_size_diff)
mate$F_trial_no<- factor(mate$F_trial_no)
mate$Order_num<- factor(mate$Order_num)
mate$F_switches<-as.integer(mate$F_switches)
mate$Time_high_red<-as.integer(mate$Time_high_red)
mate$Time_low_red<-as.integer(mate$Time_low_red)
mate$Time_most<-as.integer(mate$Time_most)
mate$Time_least<-as.integer(mate$Time_least)


############ Outliers? Remove

boxplot(mate$M_red_diff)$out
outliers <- boxplot(mate$M_red_diff, plot=FALSE)$out
print(outliers)

mate[which(mate$M_red_diff %in% outliers),]
mate2 <- mate[-which(mate$M_red_diff %in% outliers),]

boxplot(mate2$M_red_diff)

######## Do females consistently choose between males?

glmer1<-glmer(cbind(Time_most, Time_least) ~ 1 + (1|Observation_ID), 
              data = mate2, family = binomial(logit), glmerControl(optimizer = ('bobyqa')))
summary(glmer1)

#### SIDE: create over_disp function 

overdisp_fun <- function(model) {
  rdf <- df.residual(model)
  rp <- residuals(model,type="pearson")
  Pearson.chisq <- sum(rp^2)
  prat <- Pearson.chisq/rdf
  pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
  c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
}



########## Does the order in which males are presented affect SOP scores?

glmer1<-glmer(cbind(Time_high_red, Time_low_red) ~ Order_num + (1|Observation_ID), 
              data = mate2, family = binomial(logit), glmerControl(optimizer = ('bobyqa')))
summary(glmer1) 
overdisp_fun(glmer1)

######## Do females consistently choose between males?
glmer2<-glmer(cbind(Time_most, Time_least) ~ 1 + (1|Observation_ID), 
                data = mate2, family = binomial(logit), glmerControl(optimizer = ('bobyqa')))
summary(glmer2)
overdisp_fun(glmer2)


######## What affects SOP scores?

## Total Brain Area (Categorical)

glmer3<-glmer(cbind(Time_high_red, Time_low_red) ~ F_brain_size*M_diff + (1|Female_ID) + (1|Observation_ID), 
              data = mate2, family = binomial(logit), glmerControl(optimizer = ('bobyqa')))
summary(glmer3) 
overdisp_fun(glmer3)


## Does preference deviate from the null = 0.5? Post-hoc test.

#First, group only based on male difference

matebigdiff2 <- filter(mate2, M_diff=="Large")
matesmalldiff2 <- filter(mate2, M_diff=="Small")

##Large difference in red
glmer4<-glmer(cbind(Time_high_red, Time_low_red) ~ 1  + (1|Observation_ID), 
              data = matebigdiff2, family = binomial(logit), glmerControl(optimizer = ('bobyqa')))
summary(glmer4)
overdisp_fun(glmer4)

#Small difference in red
glmer5<-glmer(cbind(Time_high_red, Time_low_red) ~ 1 + (1|Observation_ID), 
              data = matesmalldiff2, family = binomial(logit), glmerControl(optimizer = ('bobyqa')))
summary(glmer5)
overdisp_fun(glmer5)

## Examining the effects of continuous variation 

##Brain Area (Continuous)

glmer6<-glmer(cbind(Time_high_red, Time_low_red) ~ F_brain_area + M_red_diff + F_size + (1|Observation_ID), 
              data = mate2, family = binomial(logit), glmerControl(optimizer = ('bobyqa')))
summary(glmer6) 
overdisp_fun(glmer6)

##Telencephalon Area (Continuous)

glmer7<-glmer(cbind(Time_high_red, Time_low_red) ~ Tel_Area + (F_brain_area-Tel_Area) + M_red_diff + (1|Observation_ID), 
              data = mate2, family = binomial(logit), glmerControl(optimizer = ('bobyqa')))
summary(glmer7)
overdisp_fun(glmer7)


########### What affects responsiveness scores?

# Brain Area (Categorical)
model1<- lmer((F_responsiveness) ~ F_brain_size + M_diff + (1|Female_ID), data=mate2)
summary(model1)
overdisp_fun(model1)


########### What affects the number of switches between males?

## Right skewed and has some zeros, so log+1 transform
hist(mate2$F_switches)
mate2$log_F_switches <- log(1 + mate2$F_switches)

## Brain Area (Categorical)
model2 <- lmer(log_F_switches ~ F_brain_size + M_diff + (1|Female_ID) + (1|M_pair), data=mate2)
summary(model2)
overdisp_fun(model1)

## Does continuous variation in brain size/telencephalon size/male difference affect responsiveness scores?

#Brain Area (Continuous)
model3<- lmer((F_responsiveness) ~  F_brain_area + M_red_diff + F_size  + (1|Female_ID), data=mate2)
summary(model3)
overdisp_fun(model1)

#Telencephalon Area (Continuous)

model4<- lmer(F_responsiveness ~ Tel_Area + (F_brain_area - Tel_Area) + M_red_diff + (1|Female_ID), data=mate2)
summary(model4)
overdisp_fun(model1)

## Brain Area (Continuous)
model5 <- lmer(log_F_switches ~ F_brain_area + M_red_diff + F_size + (1|Female_ID) + (1|M_pair), data=mate2)
summary(model5)
overdisp_fun(model1)

## Telencephalon Area  (Continuous)

model6<- lmer(log_F_switches ~ Tel_Area + (F_brain_area - Tel_Area) + M_red_diff + (1|Female_ID) + (1|M_pair), data=mate2)
summary(model6)
overdisp_fun(model6)


## Plot Figure 3

mate3 <-
  mate2 %>%
  #group by make and female size class
  group_by(M_diff,
           F_brain_size) %>%
  #remove NAs for calcualtions
  drop_na(SOP_high_red) %>%
  summarise(
    #calcualte standard error based and standard deviaiton
    sd = sd(SOP_high_red)/sqrt(sum(!is.na(SOP_high_red))),
    #calculate mean
    mean = mean(SOP_high_red),
    upper.CI = qt(0.975 , df = (nrow(.)-1))*sd(SOP_high_red)/sqrt(nrow(.)),
    lower.CI = qt(0.025 , df = (nrow(.)-1))*sd(SOP_high_red)/sqrt(nrow(.))) %>%
  #calcualte upper and lower standard error
  mutate(max95 = mean + sd,
         min95 = mean - sd,
         upper.CI = mean + upper.CI,
         lower.CI = mean + lower.CI) %>%
  #add the same matrics for all for different male sizes
  bind_rows(.,
            mate2 %>%
              #only group by male size
              group_by(M_diff) %>%
              #remove NAs for calcualtions
              drop_na(SOP_high_red) %>%
              summarise(
                #calcualte standard error based and standard deviaiton
                sd = sd(SOP_high_red)/sqrt(sum(!is.na(SOP_high_red))),
                #calculate mean
                mean = mean(SOP_high_red),
                upper.CI = qt(0.975 , df = (nrow(.)-1))*sd(SOP_high_red)/sqrt(nrow(.)),
                lower.CI = qt(0.025 , df = (nrow(.)-1))*sd(SOP_high_red)/sqrt(nrow(.))) %>%
              #calcualte upper and lower standard error
              mutate(
                #This just adds an 'all' grouping variable for female size
                F_brain_size = rep("All", 2),
                max95 = mean + sd,
                min95 = mean - sd,
                upper.CI = mean + upper.CI,
                lower.CI = mean + lower.CI))

d<-ggplot(data = mate3 %>%
            #keep only vals fro "All"
            filter(F_brain_size == "All")) +
  geom_errorbar(aes(x = F_brain_size,
                    y = mean,
                    ymin = lower.CI,
                    ymax = upper.CI,
                    #can specify colour if you want
  ),
  width = 0.4) +
  geom_pointrange(aes(x = F_brain_size,
                      y = mean,
                      ymin = lower.CI,
                      ymax = upper.CI,
                      #can specify colour if you want
  )) +
  facet_grid(cols = vars(M_diff)) +
  theme_bw() +
  geom_hline(yintercept=c(0.5), linetype="dotted")

d + labs(x = NULL, y = "Average SOP Score for the High Red Male", title = "Male Difference in Red") + theme(legend.position = "none",
                                                                                                            panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.title = element_text(size = 12, hjust = 0.5)) + annotate ("text", size = 7, x = 1, y = 0.47, label = "*") + annotate ("text", size = 7, x = 1, y = 0.585, label = "*")

### Plot Figure 4


mod<-lm(F_brain_area~F_size, data=mate2)
mate2$residuals <- residuals(mod)


ggplot(data=mate2, aes(x = residuals, y= SOP_high_red)) +
  geom_point() +
  theme_classic() + xlab("Residual Brain Area") + ylab("SOP Score for the High Red Male")



#######################################################
######## Supplementary Data - Part 1 #################
#######################################################



######### Correlation data #########

##Change variable names

FBraino$Standard_Length<-log(FBraino$SL_T)
FBraino$Brain_Area<-log(FBraino$BrA)
FBraino$Brain_Length<-log(FBraino$BrL)
FBraino$Brain_Width<-log(FBraino$BrW)
FBraino$Brain_Volume<-log(FBraino$Sum)
FBraino$Telencephalon_Volume<-log(FBraino$TelVol)
FBraino$Optic_Tectum_Volume<-log(FBraino$OTVol)
FBraino$Cerebellum_Volume<-log(FBraino$CerVol)
FBraino$Dorsal_Medulla_Volume<-log(FBraino$DMVol)
FBraino$Olfactory_Bulb_Volume<-log(FBraino$OBVol)
FBraino$Hypothalamus_Volume<-log(FBraino$HypVol)
FBraino$Dorsal_Medulla_Volume<-log(FBraino$DMVol)
FBraino$Brain_Weight<-log(FBraino$BrainWt)
FBraino$Optic_Tectum_Area<-log(FBraino$OT_Area)
FBraino$Telencephalon_Area<-log(FBraino$Tel_Area)

##Select only columns necessary
str(FBraino)
FBrainSelecto <- select(FBraino,Standard_Length:Telencephalon_Area)

##Plot with insignificant values left blank

Fo <- cor(FBrainSelecto,use = "pairwise.complete.obs")
cor.mtest <- function(mat, conf.level = 0.95) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat <- lowCI.mat <- uppCI.mat <- matrix(NA, n, n)
  diag(p.mat) <- 0
  diag(lowCI.mat) <- diag(uppCI.mat) <- 1
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], conf.level = conf.level)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
      lowCI.mat[i, j] <- lowCI.mat[j, i] <- tmp$conf.int[1]
      uppCI.mat[i, j] <- uppCI.mat[j, i] <- tmp$conf.int[2]
    }
  }
  return(list(p.mat, lowCI.mat, uppCI.mat))
}
res1 <- cor.mtest(FBrainSelecto, 0.95)
res2 <- cor.mtest(FBrainSelecto, 0.99)
## specialized the insignificant value according to the significant level
corrplot(Fo, p.mat = res1[[1]], sig.level = 0.05, insig="blank", tl.col = "black")






