# Load packages -----------------------------------------------------------

library(tidyverse)
library(car)
library(lme4)
library(MuMIn)
library(DHARMa)
library(sjPlot)


# Read in the data --------------------------------------------------------

Field.data <- read_csv("Pipit_sample_information.csv")
TLR4 <- read_csv("Pipit_TLR4.csv")
MHC <- read_csv("Pipit_MHC.csv")

# Correct and tidy the field data -----------------------------------------

# Change those that are still classified by an age code only to an A or J. Birds were classified as juvenile (EURING age code 3; born this calendar year) or adult (EURING age codes 4–6; born before this calendar year), based on feather moult pattern (Cramp 1988), with the exception of samples collected in 2009, where age codes 3 and 5 were classified as juvenile. 
Field.data$Age[Field.data$Age_code=="3"] <- "J"
Field.data$Age[Field.data$Age_code=="4"] <- "A"
Field.data$Age[Field.data$Age_code=="5"] <- "A"
Field.data$Age[Field.data$Age_code=="6"] <- "A"
Field.data$Age[is.na(Field.data$Age)] <- "A"

# Correct pox status (Y? or ?) - all signs of pox lesions/scars/deformities
Field.data$Pox[Field.data$Pox=="?"] <- "Y"
Field.data$Pox[Field.data$Pox=="Y?"] <- "Y"

# Code pox and malaria as binary variables
Field.data$Pox[Field.data$Pox=="Y"] <- "1"
Field.data$Pox[Field.data$Pox=="N"] <- "0"
Field.data$Malaria[Field.data$Malaria=="Y"] <- "1"
Field.data$Malaria[Field.data$Malaria=="N"] <- "0"

str(Field.data$Pox)
Field.data$Pox <-as.numeric(Field.data$Pox)

str(Field.data$Malaria)
Field.data$Malaria <-as.numeric(Field.data$Malaria)

# Code year as a factor
str(Field.data$Year)
Field.data$Year <-as.factor(Field.data$Year)

# Tidy TLR4 data and code for different TLR4 variants ----------------------------------------

# Remove individuals with missing TLR4 data 
TLR4 <- filter(TLR4, !is.na(TLR4_Prot_1 | TLR4_Prot_2 | TLR4_Prot_3 | TLR4_Prot_4))

# Code for the presence (1) /absence (0) of each protein haplotype (TLR4_PX): Paste the number of copies of each protein haplotype into a new column and then convert 2 copies to just presence (1).
TLR4$TLR4_P1 <- TLR4$TLR4_Prot_1
TLR4$TLR4_P2 <- TLR4$TLR4_Prot_2
TLR4$TLR4_P3 <- TLR4$TLR4_Prot_3
TLR4$TLR4_P4 <- TLR4$TLR4_Prot_4
TLR4$TLR4_P1[TLR4$TLR4_P1=="2"] <- "1"
TLR4$TLR4_P2[TLR4$TLR4_P2=="2"] <- "1"
TLR4$TLR4_P3[TLR4$TLR4_P3=="2"] <- "1"
TLR4$TLR4_P4[TLR4$TLR4_P4=="2"] <- "1"

# Combine protein haplotype data to code for different protein genotypes

Haplotype_columns <- select(TLR4, TLR4_Prot_1, TLR4_Prot_2, TLR4_Prot_3, TLR4_Prot_4) # select the four protein haplotype columns and make an object

Haplotype_columns$TLR4_genotype <- NA # create an empty column for genotypes

for (i in 1:nrow (Haplotype_columns))  # for the ith value in the sequence, starting at column one, for every row...
{
  Haplotype_columns$TLR4_genotype[i] <- paste(which(Haplotype_columns [i,] >0), collapse =",") # where the value is greater than 1, return the column number and list those values together
}

# Correct for homozygous genotypes
Haplotype_columns$TLR4_genotype[Haplotype_columns$TLR4_genotype=="1"] <- "1,1"
Haplotype_columns$TLR4_genotype[Haplotype_columns$TLR4_genotype=="2"] <- "2,2"
Haplotype_columns$TLR4_genotype[Haplotype_columns$TLR4_genotype=="3"] <- "3,3"
Haplotype_columns$TLR4_genotype[Haplotype_columns$TLR4_genotype=="4"] <- "4,4"

# Add haplotype genotype 
Haplotype_columns$Sample <- TLR4$Sample

Genotypes <- select(Haplotype_columns, Sample, TLR4_genotype) ## select out only these columns from the protein genotype data to add back in
TLR4 <- left_join(TLR4, Genotypes, by = c("Sample")) 

# Code for protein heterozygosity (TLR4_het): encoded as as 1 for heterozygotes, and 0 for homozygotes
TLR4$TLR4_het <- NA
TLR4$TLR4_het[TLR4$TLR4_genotype=="1,1"] <- "0"
TLR4$TLR4_het[TLR4$TLR4_genotype=="2,2"] <- "0"
TLR4$TLR4_het[TLR4$TLR4_genotype=="3,3"] <- "0"
TLR4$TLR4_het[TLR4$TLR4_genotype=="4,4"] <- "0"
TLR4$TLR4_het[is.na(TLR4$TLR4_het)] <- "1"
TLR4$TLR4_het[is.na(TLR4$TLR4_genotype)] <- NA


# Tidy and code MHC data --------------------------------------------------

# Remove MHC alleles with low amplicon efficiency
MHC$ANBE3 <- NULL
MHC$ANBE31 <- NULL

# Add a column for the quadratic of allele number
MHC$MHC_Nalleles_squared <- (MHC$Nalleles_without_3_31)^2


# Combine the three datasets ----------------------------------------------

Candidate.data <-left_join(Field.data, TLR4, by = c('Sample'))
Candidate.data <-left_join(Candidate.data, MHC, by = c('Sample'))
Candidate.data <- subset(Candidate.data, subset=(MHC=="Y"|TLR4=="Y"))


# Finding the minimal model (Table 1) ------------------------------------------------------------------

# Subset variables for non-genetic models
Non.genetic <- select(Candidate.data, Sample, Island, Year, Age, Sex, Pox, Malaria)

# Exclude two individuals with missing sex
Non.genetic <- filter(Non.genetic, !is.na(Sex))

# Build the model with all variables first and then sequentially remove all non-significant predictors

NGM1 <- glmer(Pox~(Age+Sex+Malaria+Island)+(1|Year), data=Non.genetic, family = binomial)
vif(NGM1)
summary(NGM1)

NGM2 <- glmer(Pox~(Age+Sex)+(Malaria*Island)+(1|Year), data=Non.genetic, family = binomial)
vif(NGM2)
summary(NGM2)

NGM3 <- glmer(Pox~(Sex+Malaria)+(Age*Island)+(1|Year), data=Non.genetic, family = binomial)
vif(NGM3)
summary(NGM3)

NGM4 <- glmer(Pox~(Sex+Malaria+Island)+(1|Year), data=Non.genetic, family = binomial)
vif(NGM4)
summary(NGM4)

MinimalModel <- glmer(Pox~(Malaria+Island)+(1|Year), data=Non.genetic, family = binomial)
vif(MinimalModel)
summary(MinimalModel)
r.squaredGLMM(MinimalModel)

# Checking model fit (random factors and residuals)

plot_model(MinimalModel)
plot_model(MinimalModel,type = "re")

res = simulateResiduals(MinimalModel)
plot(res, rank = T)

par(mfrow=c(2,2))
simulationOutput <- simulateResiduals(fittedModel = MinimalModel, n = 1000)
testDispersion(simulationOutput = simulationOutput, alternative ="less")
testDispersion(simulationOutput = simulationOutput, alternative ="greater")
testUniformity(simulationOutput = simulationOutput)
testOutliers(simulationOutput, alternative = "two.sided")
par(mfrow=c(1,1))

# Add removed variables one at a time to the minimal model to get estimates

NGM6 <- glmer(Pox~(Sex+Malaria+Island)+(1|Year), data=Non.genetic, family = binomial)
summary(NGM6)

NGM7 <- glmer(Pox~(Age+Malaria+Island)+(1|Year), data=Non.genetic, family = binomial)
summary(NGM7)


# Plotting data for minimal model (Fig 2) --------------------------------

# change malaria to a factor
str(Non.genetic$Malaria)
Non.genetic$Malaria <-as.factor(Non.genetic$Malaria)

# calculate prevalence
pox_status_counts_malaria <- count(Non.genetic,Pox,Island,Malaria) 

infected_counts_malaria <- filter(pox_status_counts_malaria, Pox == "1") 

number_of_individuals_malaria <- count(Non.genetic,Island,Malaria) 

pox_infections_malaria <- left_join(number_of_individuals_malaria, infected_counts_malaria, by = c("Island","Malaria")) 

pox_infections_malaria$Pox_prevalence_malaria <- (pox_infections_malaria$n.y/pox_infections_malaria$n.x)*100 

# Plot data for minimal model
Malaria_Pox_Prevalence <- ggplot(pox_infections_malaria, aes(x = Island, y = Pox_prevalence_malaria, fill = Malaria)) +
  geom_bar(
    aes(color = Malaria, fill = Malaria),
    stat = "identity", position = position_dodge(0.8),
    width = 0.7
  )+
  geom_text(
    aes(x = Island, y = Pox_prevalence_malaria, label = n.x, group = Malaria),
    position = position_dodge(width = 0.8),
    vjust = -0.3, size = 3,
    color = "black"
  )+
  labs(y= "Pox prevalence (%)", x = "")+
  theme_classic()+
  theme(
    axis.text=element_text(size=9),
    axis.title.x = element_text(size=9),
    axis.title.y = element_text(size=9, vjust = 4),
    legend.text=element_text(size=7),
    legend.title=element_text(size=7.2),
    plot.margin = unit(c(0.5, 0.3, 0.1, 0.5), "cm"),
    legend.position = c(0.9, 0.9)
  )+
  scale_y_continuous(expand = c(0,0), limit = c(0,50))+
  scale_x_discrete(labels= c("PS" = "Porto Santo" , "TF" = "Tenerife"))+
  scale_color_manual(name="Malaria infection status", values = c("#BEBEBE", "#595959"), labels = c("Not infected", "Infected"))+
  scale_fill_manual(name="Malaria infection status", values = c("#BEBEBE", "#595959"), labels = c("Not infected", "Infected"))
Malaria_Pox_Prevalence

pdf("Pox_Figure2.pdf")
print(Malaria_Pox_Prevalence)
dev.off()
ggsave("Pox_Figure2.pdf", plot=Malaria_Pox_Prevalence, device='pdf',  height=100, width=169, units="mm", dpi=500)


# TLR4 variation in relation to pox infection status (Table 2) ----------------------

# Remove individuals with missing TLR4 data 
TLR4.models <- filter(Candidate.data, !is.na(TLR4_Prot_1 | TLR4_Prot_2 | TLR4_Prot_3 | TLR4_Prot_4))

# Subset data for each island
PS.TLR4.models <- subset(TLR4.models, subset=(Island=="PS"))
TF.TLR4.models <- subset(TLR4.models, subset=(Island=="TF"))

# Calculate variant/genotype frequencies

# The frequency TLR4 protein haplotypes among each island

Haplotypes <- data.frame(Island = c('PS','PS','PS','PS','TF','TF','TF','TF'), Haplo = c('TLR4_P1','TLR4_P2','TLR4_P3','TLR4_P4','TLR4_P1','TLR4_P2','TLR4_P3','TLR4_P4'), Haplo_count=c((sum(PS.TLR4.models$TLR4_Prot_1)),(sum(PS.TLR4.models$TLR4_Prot_2)),(sum(PS.TLR4.models$TLR4_Prot_3)),(sum(PS.TLR4.models$TLR4_Prot_4)),(sum(TF.TLR4.models$TLR4_Prot_1)),(sum(TF.TLR4.models$TLR4_Prot_2)),(sum(TF.TLR4.models$TLR4_Prot_3)),(sum(TF.TLR4.models$TLR4_Prot_4))), Individuals=c(184*2,184*2,184*2,184*2,578*2,578*2,578*2,578*2))
Haplotypes$Haplo_frequency <- Haplotypes$Haplo_count/Haplotypes$Individuals

# The frequency of heterozygous or homozygous genotypes among each island

TLR4.heterozygosity.count <- count(TLR4.models,TLR4_het,Island) 
individuals.sampled <- count(TLR4.models,Island) 
Heterozygosity <- left_join(TLR4.heterozygosity.count, individuals.sampled, by = c("Island")) 
names(Heterozygosity)[names(Heterozygosity)=="n.x"] <- "Heterozygosity_count"
names(Heterozygosity)[names(Heterozygosity)=="n.y"] <- "Individuals_sampled"
Heterozygosity$Genotype_proportion <- (Heterozygosity$Heterozygosity_count/Heterozygosity$Individuals_sampled)

# The frequency of each TLR4 protein genotype among each island

TLR4.genotype.count <- count(TLR4.models,TLR4_genotype,Island)
individuals.sampled <- count(TLR4.models,Island)
Genotypes <- left_join(TLR4.genotype.count, individuals.sampled, by = c("Island"))
names(Genotypes)[names(Genotypes)=="n.x"] <- "Genotype_count"
names(Genotypes)[names(Genotypes)=="n.y"] <- "Individuals_sampled"
Genotypes$Genotype_frequency <- (Genotypes$Genotype_count/Genotypes$Individuals_sampled)

# Subset data for TLR4 models
PS.TLR4.models <- select(PS.TLR4.models, Sample, Year, Pox, Malaria, TLR4_P1, TLR4_P2, TLR4_P3, TLR4_P4, TLR4_het, TLR4_genotype)
TF.TLR4.models <- select(TF.TLR4.models , Sample, Year, Pox, Malaria, TLR4_P1, TLR4_P2, TLR4_P3, TLR4_P4, TLR4_het, TLR4_genotype)

# TLR4 protein haplotype (presence/absence)

#PS
TLR4.PS.haplotype <- glmer(Pox~(Malaria+TLR4_P1+TLR4_P2+TLR4_P3)+(1|Year), data=PS.TLR4.models, family = binomial)
summary(TLR4.PS.haplotype)
vif(TLR4.PS.haplotype)

#TF
TLR4.TF.haplotype <- glmer(Pox~(Malaria+TLR4_P1+TLR4_P2)+(1|Year), data=TF.TLR4.models, family = binomial)
summary(TLR4.TF.haplotype)
vif(TLR4.TF.haplotype)

# TLR4 protein heterozygosity (heterozygote/homozygote)

#PS
TLR4.PS.heterozygosity<- glmer(Pox~(Malaria+TLR4_het)+(1|Year), data=PS.TLR4.models, family = binomial)
summary(TLR4.PS.heterozygosity)
vif(TLR4.PS.heterozygosity)

#TF
TLR4.TF.heterozygosity<- glmer(Pox~(Malaria+TLR4_het)+(1|Year), data=TF.TLR4.models, family = binomial)
summary(TLR4.TF.heterozygosity)
vif(TLR4.TF.heterozygosity)

# TLR4 protein haplotype genotype

#PS
# create another dataset for PS with rare genotypes removed (less than 0.05 frequency)
PS.TLR4.geno<- PS.TLR4.models
PS.TLR4.geno$TLR4_genotype[PS.TLR4.geno$TLR4_genotype=="1,4"] <- NA
PS.TLR4.geno$TLR4_genotype[PS.TLR4.geno$TLR4_genotype=="2,2"] <- NA
PS.TLR4.geno$TLR4_genotype[PS.TLR4.geno$TLR4_genotype=="2,4"] <- NA
PS.TLR4.geno$TLR4_genotype[PS.TLR4.geno$TLR4_genotype=="4,4"] <- NA
PS.TLR4.geno <- filter(PS.TLR4.geno, !is.na(TLR4_genotype))

TLR4.PS.genotype<- glmer(Pox~(Malaria+TLR4_genotype)+(1|Year), data=PS.TLR4.geno, family = binomial)
summary(TLR4.PS.genotype)
vif(TLR4.PS.genotype)

#TF
TLR4.TF.genotype<- glmer(Pox~(Malaria+TLR4_genotype)+(1|Year), data=TF.TLR4.models, family = binomial)
summary(TLR4.TF.genotype)
vif(TLR4.TF.genotype)


# Variation at MHC class I exon 3 in relation to pox infection status (Table 3) -----------------------

# Subset data for MHC models
MHC.models <- subset(Candidate.data, subset=(MHC=="Y"))
MHC.models <- select(MHC.models, Sample, Pox, Malaria, ANBE10, ANBE2, ANBE8, ANBE4, ANBE43, ANBE1, ANBE44, ANBE45, ANBE7, ANBE13, ANBE9, ANBE46, ANBE16, ANBE47, ANBE11, ANBE28, ANBE6, ANBE48, ANBE49, ANBE38, Nalleles_without_3_31, MHC_Nalleles_squared)

# MHC class 1 exon 3 diversity (number of alleles) and optimality (number of alleles squared)
MHC.TF.diversity <- glm(Pox~(Malaria+Nalleles_without_3_31+MHC_Nalleles_squared), data=MHC.models, family = binomial)
summary(MHC.TF.diversity)
vif(MHC.TF.diversity)

# MHC class 1 exon 3 alleles (presence/absence), 5% frequency threshold (without 16 and 49 (none with pox))

MHC.TF.alleles <- glm(Pox~(Malaria+ANBE10+ANBE8+ANBE4+ANBE43+ANBE1+ANBE44+ANBE45+ANBE9+ANBE46+ANBE47+ANBE11+ANBE6+ANBE38), data=MHC.models, family = binomial)
summary(MHC.TF.alleles)
vif(MHC.TF.alleles)


# Models repeated without malaria (Supplementary tables S5 & 6) -----------------------------------------

# TLR4 protein haplotype (presence/absence)

#PS
TLR4.PS.haplotype.v2 <- glmer(Pox~(TLR4_P1+TLR4_P2+TLR4_P3)+(1|Year), data=PS.TLR4.models, family = binomial)
summary(TLR4.PS.haplotype.v2)
vif(TLR4.PS.haplotype.v2)

#TF
TLR4.TF.haplotype.v2 <- glmer(Pox~(TLR4_P1+TLR4_P2)+(1|Year), data=TF.TLR4.models, family = binomial)
summary(TLR4.TF.haplotype.v2)
vif(TLR4.TF.haplotype.v2)

# TLR4 protein heterozygosity (heterozygote/homozygote)

#PS
TLR4.PS.heterozygosity.v2 <- glmer(Pox~(TLR4_het)+(1|Year), data=PS.TLR4.models, family = binomial)
summary(TLR4.PS.heterozygosity.v2)

#TF
TLR4.TF.heterozygosity.v2 <- glmer(Pox~(TLR4_het)+(1|Year), data=TF.TLR4.models, family = binomial)
summary(TLR4.TF.heterozygosity.v2)

# TLR4 protein haplotype genotype

#PS
TLR4.PS.genotype.v2 <- glmer(Pox~(TLR4_genotype)+(1|Year), data=PS.TLR4.geno, family = binomial)
summary(TLR4.PS.genotype.v2)

#TF
TLR4.TF.genotype.v2 <- glmer(Pox~(TLR4_genotype)+(1|Year), data=TF.TLR4.models, family = binomial)
summary(TLR4.TF.genotype.v2)


# MHC class 1 exon 3 diversity (number of alleles) and optimality (number of alleles squared)
MHC.TF.diversity.v2 <- glm(Pox~(Nalleles_without_3_31+MHC_Nalleles_squared), data=MHC.models, family = binomial)
summary(MHC.TF.diversity.v2)
vif(MHC.TF.diversity.v2)

# MHC class 1 exon 3 alleles (presence/absence), 5% frequency threshold (without 16 and 49 (none with pox))

MHC.TF.alleles.v2 <- glm(Pox~(ANBE10+ANBE8+ANBE4+ANBE43+ANBE1+ANBE44+ANBE45+ANBE9+ANBE46+ANBE47+ANBE11+ANBE6+ANBE38), data=MHC.models, family = binomial)
summary(MHC.TF.alleles.v2)
vif(MHC.TF.alleles.v2)

