####################
### INSTRUCTIONS ###
####################
# 1- download Kriebel et al. (2020) Supplementary Material from 
#    https://datadryad.org/stash/dataset/doi:10.5061/dryad.q573n5tg5
# 2- Extract the files 
# 3- Put scripts 1-8 and the extracted files in the same folder
# 4- Scripts are provided in parts and they must be run sequentially, 
# WARNING: Some scripts create large objects to be used in subsequent steps.

#######################
# 1. DATA PREPARATION #
#######################

# install latest version of corHMM from github
library(devtools)
install_github("thej022214/corHMM")

library(corHMM)
library(phytools)

# Read MCC tree from Kriebel et al. 
all_salvia_tree <- read.tree("Salvia_flower_evol_May2020/Phylogeny/Beast_Yule_MCC_newick.tre")

# Load pollinator and clade data from Kriebel et al.
all_salvia <- read.csv("Salvia_flower_evol_May2020/Data/groups.csv", header=T)
row.names(all_salvia) <- all_salvia$species

# Erase species not in tree
all_salvia <- all_salvia[all_salvia_tree$tip.label, ]

# Recode pollinator syndromes 
# 1 code for bee pollination
# 2 code for bird pollination
# 1&2 indicates ambiguity
all_salvia$pollinator2 <- gsub("bee&bird", "intermediate", all_salvia$pollinator)
all_salvia$pollinator2 <- gsub("sister", "bee", all_salvia$pollinator)
all_salvia$pollinator2<-as.factor(all_salvia$pollinator2)
all_salvia$pollinator2<-factor(all_salvia$pollinator2,labels = c("1","2","1&2"))

# Final base
b0 <- all_salvia[,c("species","pollinator2")]


#####################################################
# 2. ANCESTRAL STATE RECONSTRUCTION ON the MCC tree #
#####################################################

####################
# 2.1 Build models
####################

# model names code for:
# SMM. Simple Markov model
# HMM. Hidden Markov model
# ARD. All rates different
# SYM. Symmetric rates

# 2.1.1 SMM_ARD (i.e. as in Kriebel et al. 2019, 2020)
# WARNING! choose n.cores according to the available processor cores 
SMM_ARD <- corHMM(all_salvia_tree, b0, model ="ARD", n.cores = 7, 
                  node.states = "marginal", rate.cat = 1, 
                  root.p = "yang",  get.tip.states = TRUE)
SMM_ARD #summary
SMM_ARD$states[263,] # probabilities at Calosphace origin

# plot ASR to visually compare it with results from Kriebel et al. (2019, 2020)
tree <- SMM_ARD$phy
data <- SMM_ARD$data
model <- SMM_ARD$solution
model[is.na(model)] <- 0
diag(model) <- -rowSums(model)

plotRECON(tree, likelihoods = SMM_ARD$states, cex = 0.25, 
          piecolors = c("blue", "red"), show.tip.label = T) # plot AER
colores <- as.factor(b0$pollinator2)
colores <- factor(colores, labels = c("blue", "red", "purple"))
names(colores) <- b0$species; colores <- as.character(colores)
tiplabels(pch=19, cex=0.4, col = colores)

# 2.1.2 SMM_SYM
SMM_SYM <- corHMM(all_salvia_tree, b0, model ="SYM", n.cores = 7,
                  node.states = "marginal", rate.cat = 1, 
                  root.p = "yang",  get.tip.states = TRUE)
SMM_SYM # summary
SMM_SYM$states[263,]  # probabilities at Calosphace origin

# 2.1.3 HMM_ARD_ARD_CTARD
# rate transitions ARD ARD
# rate category transitions ARD
HMM_ARD_ARD_CTARD <- corHMM(all_salvia_tree, b0, model ="ARD", n.cores = 7,
                            node.states = "marginal", rate.cat = 2, 
                            root.p = "yang", get.tip.states = TRUE)
HMM_ARD_ARD_CTARD #summary
HMM_ARD_ARD_CTARD$states[263, ]  # probabilities at calosphace origin

# 2.1.4 HMM_ARD_SYM_CTARD
# rate transitions ARD SYM
# rate category transitions ARD 
RateCat1 <- getStateMat4Dat(b0, model = "ARD", dual = T)$rate.mat # R1
RateCat2 <- getStateMat4Dat(b0)$rate.mat # R2
RateCat2 <- equateStateMatPars(RateCat2, c(1:4))
RateClassMat <- getRateCatMat(2) 
StateMats <- list(RateCat1, RateCat2)
FM_ard_sym_ctard <- getFullMat(StateMats, RateClassMat)
HMM_ARD_SYM_CTARD <- corHMM(phy = all_salvia_tree, data = b0, rate.cat = 2, n.cores = 7, 
                            rate.mat = FM_ard_sym_ctard, node.states = "marginal", 
                            root.p = "yang", get.tip.states = TRUE)
HMM_ARD_SYM_CTARD # summary
HMM_ARD_SYM_CTARD$states[263, ]  # probabilities at Calosphace origin

# 2.1.5 HMM_SYM_SYM_CTARD
# rate transitions SYM SYM
# rate category transitions ARD 
RateCat1 <-getStateMat4Dat(b0)$rate.mat
RateCat1 <-equateStateMatPars(RateCat1, c(1,2)) 
RateCat2 <-getStateMat4Dat(b0)$rate.mat
RateCat2 <-equateStateMatPars(RateCat2, c(1,2))
StateMats <-list(RateCat1, RateCat2)
RateClassMat <-getRateCatMat(2)              
FM_sym_sym_ctard <-getFullMat(StateMats, RateClassMat)
HMM_SYM_SYM_CTARD <- corHMM(phy = all_salvia_tree, data = b0, rate.cat = 2, n.cores = 7,
                            rate.mat = FM_sym_sym_ctard, node.states = "marginal", 
                            root.p = "yang", get.tip.states = TRUE)
HMM_SYM_SYM_CTARD # summary
HMM_SYM_SYM_CTARD$states[263, ]  # probabilities at Calosphace origin

# 2.1.6 HMM_ARD_ARD_CTSYM
# rate transitions ARD ARD
# rate category transitions SYM
RateCat1 <-getStateMat4Dat(b0)$rate.mat
RateCat2 <-getStateMat4Dat(b0)$rate.mat
StateMats <-list(RateCat1, RateCat2)
RateClassMat <-getRateCatMat(2) 
RateClassMat <- equateStateMatPars(RateClassMat, c(1,2))
FM_ard_ard_ctsym <-getFullMat(StateMats, RateClassMat)
HMM_ARD_ARD_CTSYM <- corHMM(all_salvia_tree, b0, rate.mat = FM_ard_ard_ctsym,
                            node.states = "marginal", rate.cat = 2, n.cores = 7,
                            root.p = "yang", get.tip.states = TRUE)
HMM_ARD_ARD_CTSYM  # summary
HMM_ARD_ARD_CTSYM$states[263, ]  # probabilities at Calosphace origin

# 2.1.7 HMM_ARD_SYM_CTSYM
# rate transitions ARD SYM
# rate category transitions SYM
RateCat1 <-getStateMat4Dat(b0)$rate.mat
RateCat2 <-getStateMat4Dat(b0)$rate.mat
RateCat2 <- equateStateMatPars(RateCat2, c(1,2))
StateMats <-list(RateCat1, RateCat2)
RateClassMat <-getRateCatMat(2) 
RateClassMat <- equateStateMatPars(RateClassMat, c(1,2))
FM_ard_sym_ctsym <-getFullMat(StateMats, RateClassMat)
HMM_ARD_SYM_CTSYM <- corHMM(all_salvia_tree, b0, rate.mat = FM_ard_sym_ctsym,
                            node.states = "marginal", rate.cat = 2, n.cores = 7, 
                            root.p = "yang", get.tip.states = TRUE)
HMM_ARD_SYM_CTSYM  # summary 
HMM_ARD_SYM_CTSYM$states[263, ]  # probabilities at calosphace origin

# 2.1.8 HMM_SYM_SYM_CTSYM
# rate transitions SYM SYM
# rate category transitions SYM
RateCat1 <- getStateMat4Dat(b0)$rate.mat
RateCat1 <- equateStateMatPars(RateCat1, c(1,2))  
RateCat2 <- getStateMat4Dat(b0)$rate.mat
RateCat2 <- equateStateMatPars(RateCat2, c(1,2))  
StateMats <- list(RateCat1, RateCat2)
RateClassMat <- getRateCatMat(2) 
RateClassMat <- equateStateMatPars(RateClassMat, c(1,2))
FM_sym_sym_ctsym <- getFullMat(StateMats, RateClassMat)
HMM_SYM_SYM_CTSYM <- corHMM(all_salvia_tree, b0, rate.mat = FM_sym_sym_ctsym,
                            node.states = "marginal", rate.cat = 2, n.cores = 7,
                            root.p = "yang", get.tip.states = TRUE)
HMM_SYM_SYM_CTSYM  # summary 
HMM_SYM_SYM_CTSYM$states[263, ]  # probabilities at Calosphace origin

#----------------------------------------------------
save(FM_ard_sym_ctard, FM_sym_sym_ctard, 
     FM_ard_ard_ctsym, FM_ard_sym_ctsym, 
     FM_sym_sym_ctsym, file = "start_matrices.RData")
#----------------------------------------------------

#####################################
# 2.2 Model comparison (MCC tree)
#####################################

obj<- list(SMM_ARD, SMM_SYM, 
           HMM_ARD_ARD_CTARD, HMM_ARD_SYM_CTARD, 
           HMM_SYM_SYM_CTARD, HMM_ARD_ARD_CTSYM,
           HMM_ARD_SYM_CTSYM, HMM_SYM_SYM_CTSYM)

names(obj) <- c("SMM_ARD", "SMM_SYM", 
                "HMM_ARD_ARD_CTARD", "HMM_ARD_SYM_CTARD", 
                "HMM_SYM_SYM_CTARD", "HMM_ARD_ARD_CTSYM",
                "HMM_ARD_SYM_CTSYM", "HMM_SYM_SYM_CTSYM")

# Obtain AIC weights for model averaging
# This part of the routine was modified from Boyko & Beaulieu 2021
# https://doi.org/10.1111/2041-210X.13534 

AICcs <- unlist(lapply(obj, function(x) x$AICc))
AICwt <- exp(-0.5 * AICcs - min(AICcs))/sum(exp(-0.5 * AICcs - min(AICcs)))
res <- matrix(0, dim(obj[[1]]$states)[1], dim(obj[[1]]$states)[2])
for(i in 1:length(obj)){
  States <- colnames(obj[[i]]$solution)
  if(length(grep("R2", States)) == 0){
    ASR_i <- obj[[i]]$states[,grep("R1", States)]
  }else{
    ASR_i <- obj[[i]]$states[,grep("R1", States)] + obj[[i]]$states[,grep("R2", States)]
  }
  res <- res + (ASR_i * AICwt[i])
}
colnames(res) <- c("bee", "bird")

# Extract the probability of bird pollination at 
# the origin of Calosphace (node 263)

extr_bee <- function(x){
  bee <- ifelse(length(x$states[263, ]) == 2, 
                x$states[263, 1],
                sum(x$states[263, c(1,3)]))
  bee
}
bee_prob <- sapply(obj, extr_bee)

extr_bird <- function(x){
  bird <- ifelse(length(x$states[263, ]) == 2, 
                x$states[263, 2],
                sum(x$states[263, c(2,4)]))
  bird
}
bird_prob <- sapply(obj, extr_bird)

# extrac results 
table1 <- data.frame(model = names(obj), 
                     AIC_c = round(AICcs, 5),
                     weights = round(AICwt, 5),
                     bee = round(sapply(obj, extr_bee), 5),
                     bird = round(sapply(obj, extr_bird), 5))

# model averaged probs for the MRCA of Calosphace
average_bee <- sum(table1$bee*table1$weights); average_bee
average_bird <- sum(table1$bird*table1$weights); average_bird


###############################################################
# 2.3 plot ancestral state reconstruction from model averaging
###############################################################

# phyloch package has to be installed from github
library(devtools)
install_github("fmichonneau/phyloch")

library(phyloch)
# load("results_MCCtree.RData")
data("strat2012")

# This plot correspond to figure 1
#--------------------------------------------------------------------------------
svg(file = "averaged_recons.svg", width = 7, height = 10.5)
plot(all_salvia_tree, show.tip.label = F, edge.color = "grey40",
     edge.width = 0.8)
add.timeframe(all_salvia_tree, age = c(20.3:24.7), col = rgb(0.8, 0.3, 0.8, 0.4),
              border = NA) # add origin of modern hummingbirds in S. America
add.timeframe(all_salvia_tree, age = c(12.51:15.54), col = rgb(0.5, 1, 0.8, 0.4),
              border = NA) # add N. America recolonization by hummingbirds
par(fg="transparent")
nodelabels(pie = res, piecol = c("blue", "red"), 
           cex = 0.35)
par(fg = "black")
axisGeo(GTS = strat2012, unit = "epoch", cex = 0.8, ages = F, 
        col = c("#FFE7AA", "#806215", "#AA8B39", "#D4B76A"))
axisPhylo(cex.axis = 0.75)
colores <- as.factor(b0$pollinator2)
colores <- factor(colores, labels = c("blue", "red", "purple"))
names(colores) <- b0$species; colores <- as.character(colores)
tiplabels(pch=20, cex=0.1, col = colores)
dev.off()
# (further details were added in inkscape)
#--------------------------------------------------------------------------------



