#########################################################################
# 3 MODEL AND PHYLOGENETIC UNCERTAINTY IN ANCESTRAL STATE RECONSTRUCTION 
#########################################################################

library(ape)
library(corHMM)
library(beepr)
library(ggplot2)
library(patchwork)

load("start_matrices.RData")

# Read MCC and 10 posterior trees
tree_mcc_0 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/Beast_Yule_MCC_newick.tre")
tree_cluster_1 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_1.tre")
tree_cluster_2 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_2.tre")
tree_cluster_3 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_3.tre")
tree_cluster_4 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_4.tre")
tree_cluster_5 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_5.tre")
tree_cluster_6 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_6.tre")
tree_cluster_7 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_7.tre")
tree_cluster_8 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_8.tre")
tree_cluster_9 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_9.tre")
tree_cluster_10 <- read.tree("Salvia_flower_evol_May2020/Phylogeny/tree_cluster_10.tre")
tree_list <- list(tree_mcc_0, tree_cluster_1,tree_cluster_2,tree_cluster_3,tree_cluster_4,tree_cluster_5,
                  tree_cluster_6,tree_cluster_7,tree_cluster_8,tree_cluster_9,tree_cluster_10)

# Load pollinator and clade data
all_salvia <- read.csv("Salvia_flower_evol_May2020/Data/groups.csv", header=T)
row.names(all_salvia) <- all_salvia$species
all_salvia <- all_salvia[tree_cluster_1$tip.label, ]
all_salvia$pollinator2 <- gsub("bee&bird", "intermediate", all_salvia$pollinator)
all_salvia$pollinator2 <- gsub("sister", "bee", all_salvia$pollinator)
all_salvia$pollinator2<-as.factor(all_salvia$pollinator2)
all_salvia$pollinator2<-factor(all_salvia$pollinator2,labels = c("1","2","1&2"))
b0 <- all_salvia[,c("species","pollinator2")]

##########################################################
# 3.1 Build models in the MCC tree and 10 posterior trees
##########################################################

#----------------------------------
# WARNING: this could take a while
#----------------------------------

# 3.1.1 SMM_ARD
unc_smm_ard <- vector(mode = "list", 11)
for(i in 1:11){
  unc_smm_ard[[i]] <- corHMM(tree_list[[i]], b0, model ="ARD", n.cores = 7, 
                             node.states = "marginal", rate.cat = 1, 
                             root.p = "yang",  get.tip.states = TRUE)
}
beep(5)

# 3.1.2 SMM_SYM
unc_smm_sym <- vector(mode = "list", 11)
for(i in 1:11){
  unc_smm_sym[[i]] <- corHMM(tree_list[[i]], b0, model ="SYM", n.cores = 7, 
                             node.states = "marginal", rate.cat = 1, 
                             root.p = "yang",  get.tip.states = TRUE)
}
beep(5)

# 3.1.3 HMM_ARD_ARD_CTARD
unc_hmm_ard_ard_ctard <- vector(mode = "list", 11)
for(i in 1:11){
  unc_hmm_ard_ard_ctard[[i]] <- corHMM(tree_list[[i]], b0, model ="ARD", n.cores = 7, 
                                       node.states = "marginal", rate.cat = 2, 
                                       root.p = "yang",  get.tip.states = TRUE)
}
beep(5)

# 3.1.4 HMM_ARD_SYM_CTARD
unc_hmm_ard_sym_ctard <- vector(mode = "list", 11)
for(i in 1:11){
  unc_hmm_ard_sym_ctard[[i]] <- corHMM(tree_list[[i]], data = b0, rate.cat = 2, n.cores = 7, 
                                       rate.mat = FM_ard_sym_ctard, node.states = "marginal", 
                                       root.p = "yang", get.tip.states = TRUE)
}
beep(5)

# 3.1.5 HMM_SYM_SYM_CTARD
unc_hmm_sym_sym_ctard <- vector(mode = "list", 11)
for(i in 1:11){
  unc_hmm_sym_sym_ctard[[i]] <- corHMM(tree_list[[i]], data = b0, rate.cat = 2, n.cores = 7, 
                                       rate.mat = FM_sym_sym_ctard, node.states = "marginal", 
                                       root.p = "yang", get.tip.states = TRUE)
}
beep(5)

# 3.1.6 HMM_ARD_ARD_CTSYM
unc_hmm_ard_ard_ctsym <- vector(mode = "list", 11)
for(i in 1:11){
  unc_hmm_ard_ard_ctsym[[i]] <- corHMM(tree_list[[i]], data = b0, rate.cat = 2, n.cores = 7, 
                                       rate.mat = FM_ard_ard_ctsym, node.states = "marginal", 
                                       root.p = "yang", get.tip.states = TRUE)
}
beep(5)

# 3.1.7 HMM_ARD_SYM_CTSYM
unc_hmm_ard_sym_ctsym <- vector(mode = "list", 11)
for(i in 1:11){
  unc_hmm_ard_sym_ctsym[[i]] <- corHMM(tree_list[[i]], data = b0, rate.cat = 2, n.cores = 7, 
                                       rate.mat = FM_ard_sym_ctsym, node.states = "marginal", 
                                       root.p = "yang", get.tip.states = TRUE)
}
beep(5)

# 3.1.8 HMM_SYM_SYM_CTSYM
unc_hmm_sym_sym_ctsym <- vector(mode = "list", 11)
for(i in 1:11){
  unc_hmm_sym_sym_ctsym[[i]] <- corHMM(tree_list[[i]], data = b0, rate.cat = 2, n.cores = 7, 
                                       rate.mat = FM_sym_sym_ctsym, node.states = "marginal", 
                                       root.p = "yang", get.tip.states = TRUE)
}
beep(5)

##########################
# 3.2 Extracting results
##########################

# functions
extr_bee <- function(x){
  bee <- ifelse(length(x$states[263, ]) == 2, 
                x$states[263, 1],
                sum(x$states[263, c(1,3)]))
  bee
}

extr_bird <- function(x){
  bird <- ifelse(length(x$states[263, ]) == 2, 
                 x$states[263, 2],
                 sum(x$states[263, c(2,4)]))
  bird
}

fn_extract <- function(x){
  aicc <- numeric(length(x))
  for(i in 1:length(x)){
    aicc[i] <- x[[i]]$AICc
  }
  states.bee <- numeric(length(x))
  for(i in 1:length(x)){
    states.bee[i] <- extr_bee(x[[i]])
  }
  states.bird <- numeric(length(x))
  for(i in 1:length(x)){
    states.bird[i] <- extr_bird(x[[i]])
  }
  res <- cbind(aicc, states.bee, states.bird)
  return(res)
}

res_unc <- rbind(fn_extract(unc_smm_ard),
                 fn_extract(unc_smm_sym),
                 fn_extract(unc_hmm_ard_ard_ctard),
                 fn_extract(unc_hmm_ard_sym_ctard),
                 fn_extract(unc_hmm_sym_sym_ctard),
                 fn_extract(unc_hmm_ard_ard_ctsym),
                 fn_extract(unc_hmm_ard_sym_ctsym),
                 fn_extract(unc_hmm_sym_sym_ctsym))
res_unc <- as.data.frame(res_unc)
res_unc$model <- as.factor(c(rep("SMM_ARD", 11),
                             rep("SMM_SYM", 11),
                             rep("HMM_ARD_ARD_CTARD", 11),
                             rep("HMM_ARD_SYM_CTARD", 11),
                             rep("HMM_SYM_SYM_CTARD", 11),
                             rep("HMM_ARD_ARD_CTSYM", 11),
                             rep("HMM_ARD_SYM_CTSYM", 11),
                             rep("HMM_SYM_SYM_CTSYM", 11)))
res_unc$model <- factor(res_unc$model, levels = c("SMM_ARD", "SMM_SYM", "HMM_ARD_ARD_CTARD",
                                                  "HMM_ARD_SYM_CTARD", "HMM_SYM_SYM_CTARD",
                                                  "HMM_ARD_ARD_CTSYM", "HMM_ARD_SYM_CTSYM",
                                                  "HMM_SYM_SYM_CTSYM"))
res_unc$tree <- rep(1:11, 8)

extract_AICWT <- function(AICc){exp(-0.5 * AICc - min(AICc))/sum(exp(-0.5 * AICc - min(AICc)))}
treen <- list(11)
for(i in 1:11) treen[[i]] <-extract_AICWT(subset(res_unc, res_unc$tree==i)$aicc)
modn <- list(8)
for(i in 1:8){
  d <- cbind(rep(1:8, 11), unlist(treen))
  modn[[i]] <- subset(d[,2], d[,1] == i)
}
rm(treen)
res_unc$aiccwt <- unlist(modn)

A <- seq(from = 1, to = 88, by = 11)
B <- c(A, A+1, A+2, A+3, A+4, A+5, A+6, A+7, A+8, A+9, A+10)

# Export Supplementary Table 1
write.csv(res_unc[B,], file = "Supp_table1.csv")


#-------------------------------------
# save results and models
save(res_unc, tree_list,
     unc_smm_ard, 
     unc_smm_sym, 
     unc_hmm_ard_ard_ctard, 
     unc_hmm_ard_sym_ctard, 
     unc_hmm_sym_sym_ctard, 
     unc_hmm_ard_ard_ctsym, 
     unc_hmm_ard_sym_ctsym, 
     unc_hmm_sym_sym_ctsym,
     file = "results_uncertain.RData")
#-------------------------------------

# plot uncertainty on AICc, AICc weights and 
# marginal probability of bird pollination for the most recent 
# common ancestor of Calosphace (FIGURE 1)

p0 <- ggplot(res_unc, aes(aicc, model)) + geom_point(shape = 1, size = 3, alpha = 0.7) + 
  geom_point(data = res_unc[res_unc$tree == 1, ], aes(aicc, model), size = 5, shape = 18) +
  theme_linedraw(base_size = 9) + xlab("AICc")

p1 <- ggplot(res_unc, aes(aiccwt, model)) + geom_point(shape = 1, size = 3, alpha = 0.7) + 
  geom_point(data = res_unc[res_unc$tree == 1, ], aes(aiccwt, model), size = 5, shape = 18) +
  theme_linedraw(base_size = 9) +  theme(axis.text.y=element_blank(), axis.title.y=element_blank()) +
  xlab("AICc weights")

p2 <- ggplot(res_unc, aes(states.bird, model)) + geom_point(shape = 1, size = 3, alpha = 0.7) + 
  geom_point(data = res_unc[res_unc$tree == 1, ], aes(states.bird, model), size = 5, shape = 18) +
  theme_linedraw(base_size = 9) + theme(axis.text.y=element_blank(), axis.title.y=element_blank()) +
  xlab("marginal probability of bird pollination\nfor Calosphace MRCA")

#----------------------------------------------------
svg(filename = "figure1.svg", width = 9, height = 4)
p0 + p1 + p2 
dev.off()
#-----------------------------------------------------

