#Author: Francisco Pereira Lobo (franciscolobo@gmail.com) - 2024
#Description: Compares scaffold and donor trees to highlight topological
#differences.

#Configuring environment
library(ape)
library(phytools)
library(RRphylo)
library(ggtree)

#cleaning
rm(list=ls())

#functions
select.tip.or.node <- function(element, tree) {
  ifelse(element < Ntip(tree)+1,
    tree$tip.label[element],
    tree$node.label[element-Ntip(tree)])
}

return.divergence.time <- function(tree, d_matrix, n_matrix, spp1, spp2) {
  mrca <- MRCA(tree, c(spp1, spp2))
  sppNodeID <- n_matrix$child[n_matrix$chi.name == spp1]
  div_time <- d_matrix[sppNodeID, mrca]
  return(div_time)
}

#this path should point to the directory produced when you uncompressed file
#reproducibility.tgz
setwd("~/projects/evolution_of_complexity/docs/biorx_evolution_of_complexity/version1/dataFiles/reproductibility_raw_data/reproducibility/")


###

#Producing a scaffold Eukarya tree containing major groups also found in our dataset

#reading the input (soon-to-be scaffold) tree
input_tree <- read.tree("data/trees/raw/PBAYES_CATGTR_136o_10genes_AmoOut_ugam_CIs.nwk.tre")

# select which species to keep based on our species list
spp_include_scaff <- c("Op_Drosoph", "Op_Homo_sa", "Op_Mus_mus", "Ar_Arabido", "Op_Schizos", "Ar_Vitis_v", "Am_Dictyos", "Op_Amphime", "Ar_Volvox_", "Op_Capsasp", "Op_Neurosp", "Op_Nematos", "Op_Ciona_i", "Op_Ustilag", "Op_Gallus_", "Ar_Ostreoc", "St_Cafeter")

#generating a scaffold tree with taxa of interest
scaffold_tree <- drop.tip(input_tree,input_tree$tip.label[-match(spp_include_scaff, input_tree$tip.label)])
#adding inner node labels
scaffold_tree$node.label <- paste0("t", c(1:scaffold_tree$Nnode))

#Reading TToL tree
input_tree2 <- read.tree("data/trees/raw/TToL_spp_list.nwk")

# select which species to keep based on our species list
spp_include_donor <- c("Neurospora_crassa", "Schizosaccharomyces_pombe", "Ustilago_maydis", "Amphimedon_queenslandica", "Ciona_intestinalis", "Drosophila_melanogaster", "Gallus_gallus", "Homo_sapiens", "Mus_musculus", "Nematostella_vectensis", "Arabidopsis_thaliana", "Vitis_vinifera", "Ostreococcus_lucimarinus", "Volvox_carteri", "Dictyostelium_discoideum", "Capsaspora_owczarzaki", "Phytophthora_sojae")

#generating the TToL tree with taxa of interest for comparison
TToL_tree <- drop.tip(input_tree2,input_tree2$tip.label[-match(spp_include_donor, input_tree2$tip.label)])

#mapping names across trees
names <- read.table("data/metadata/TToLIDs2ShortNames2Scaffold.txt", sep="\t", header=FALSE)

scaffold_tree$tip.label <- names$V2[match(scaffold_tree$tip.label, names$V3)]

TToL_tree$tip.label <- names$V2[match(TToL_tree$tip.label, names$V1)]

names_matrix <- as.matrix(cbind(TToL_tree$tip.label, TToL_tree$tip.label))

#plotting tress together
cophylo<-cophylo(scaffold_tree, TToL_tree, assoc = names_matrix)

pdf("results/figures/Supplementary_Figure_1_D_cophylo_plot.pdf", width = 8, height = 8)
plot(cophylo,link.type="curved",link.lwd=4,
     link.lty="solid",link.col=make.transparent("red",
                                                0.25))

dev.off()

#Computing divergence times for the two trees

dTToL <- dist.nodes(TToL_tree)
dScaff <- dist.nodes(scaffold_tree)

edge_table_TToL <- data.frame(
  "parent" = TToL_tree$edge[,1],
  "par.name" = sapply(TToL_tree$edge[,1],
                      select.tip.or.node,
                      tree = TToL_tree),
  "child" = TToL_tree$edge[,2],
  "chi.name" = sapply(TToL_tree$edge[,2],
                      select.tip.or.node,
                      tree = TToL_tree)
)



edge_table_scaff <- data.frame(
  "parent" = scaffold_tree$edge[,1],
  "par.name" = sapply(scaffold_tree$edge[,1],
                      select.tip.or.node,
                      tree = scaffold_tree),
  "child" = scaffold_tree$edge[,2],
  "chi.name" = sapply(scaffold_tree$edge[,2],
                      select.tip.or.node,
                      tree = scaffold_tree)
)

#spp <- c("Neurospora_crassa", "Schizosaccharomyces_pombe", "Ustilago_maydis", "Amphimedon_queenslandica", "Ciona_intestinalis", "Drosophila_melanogaster", "Gallus_gallus", "Homo_sapiens", "Mus_musculus", "Nematostella_vectensis", "Arabidopsis_thaliana", "Vitis_vinifera", "Ostreococcus_lucimarinus", "Volvox_carteri", "Dictyostelium_discoideum", "Capsaspora_owczarzaki")

#spp <- c("N_cra", "S_pom", "U_may", "A_que", "C_int", "D_mel", "G_gal", "H_sap", "M_mus", "N_vec", "A_tha", "V_vin", "O_luc", "V_car", "D_dis", "C_owc")

spp <- scaffold_tree$tip.label

divergence_time_data <- data.frame(matrix(ncol = 8, nrow = (length(spp)*(length(spp)-1))))
colnames(divergence_time_data) <- c("Spp1", "Spp2", "DistTToL", "DistScaff", "Ratio", "Average", "TToLMRCA", "ScaffMRCA")
#rownames(df) <- spp
i <- 1

for (spp_1 in spp) {
  for (spp_2 in spp) {
    if (spp_1 == spp_2) {
      next
    }
    div_time_scaff <- return.divergence.time(tree = scaffold_tree, d_matrix = dScaff, n_matrix = edge_table_scaff, spp1 = spp_1, spp2 = spp_2)
    mrca_scaff <- MRCA(scaffold_tree, c(spp_1, spp_2))
    mrca_TToL <- MRCA(TToL_tree, c(spp_1, spp_2))
    div_time_TToL <- return.divergence.time(tree = TToL_tree, d_matrix = dTToL, n_matrix = edge_table_TToL, spp1 = spp_1, spp2 = spp_2)
    ratio = div_time_scaff/div_time_TToL
    divergence_time_data[i, "Spp1"] = spp_1
    divergence_time_data[i, "Spp2"] = spp_2
    divergence_time_data[i, "DistTToL"] = div_time_TToL
    divergence_time_data[i, "DistScaff"] = div_time_scaff
    divergence_time_data[i, "Ratio"] = ratio
    divergence_time_data[i, "ScaffMRCA"] = mrca_scaff
    divergence_time_data[i, "TToLMRCA"] = mrca_TToL
    divergence_time_data[i, "Average"] = (div_time_TToL+div_time_scaff)/2
    i <- i + 1
  }
}

save.image(file = "results/RData/divergence_time_data.RData")
