#Author: Francisco Pereira Lobo (franciscolobo@gmail.com) - 2024
#Description: Plots heatmaps of IPR and GO terms associated with NCT.

#loading
library(RColorBrewer)
library(gplots)
library(extrafont)
library(ape)
library(heatmaply)
library(dendextend)
library(ComplexHeatmap)
library(preprocessCore)
loadfonts()

#cleaning
rm(list=ls())

#this path should point to the directory produced when you uncompressed file
#reproducibility.tgz
setwd("~/projects/evolution_of_complexity/docs/biorx_evolution_of_complexity/version1/dataFiles/reproductibility_raw_data/reproducibility/")

#load CALANGO output objects
load(file = "results/RData/gene2GO.RData")
load(file = "results/RData/homologous2IPR.RData")

#genome medata
genome_metadata <- read.table("data/metadata/metadata_spp.txt", sep="\t", header=TRUE)

#Merging metazoans & plants
tmp <- gsub("\\(Vertebrata\\)", "", genome_metadata$Popular_group_name)
tmp <- gsub("\\(Invertebrata\\)", "", tmp)
tmp <- gsub("\\(Angiosperm\\)", "", tmp)
tmp <- gsub("\\(Chlorophyta\\)", "", tmp)

genome_metadata$Popular_group_name_small <- as.factor(tmp)

names <- data.frame(genome_metadata$GenomeID, genome_metadata$Popular_group_name_small, genome_metadata$short_name)

colnames(names) = c("genomeID", "group", "shortName")

ids <- as.vector(homologous2IPR$sig_IDs)

#normalizing raw counts
tmp <- homologous2IPR$y[as.vector(ids)]
norm <- tmp/homologous2IPR$denominator
rm(tmp)

#rownames and colnames
rownames(norm) <- names$shortName[match(rownames(norm), names$genomeID)]

colnames_df <- paste0(homologous2IPR$sig_IDs, " - ", homologous2IPR$annotation.contrasts[homologous2IPR$sig_IDs])
colnames(norm) <- colnames_df

tree <- homologous2IPR$tree

#getting tree to change plot order
tree$tip.label<-names[[3]][match(tree$tip.label, names[[1]])]
tree2 <- as.dendrogram(as.hclust.phylo(tree))
clade_order <- order.dendrogram(tree2)

max <- ncol(norm)
mat <- as.matrix(normalize(norm[,1:max]))
ord.mat.IPR <- mat[tree$tip.label,]

my_palette <- colorRampPalette(c("white","blue"))(n = 51)


#color by group

jColors <- data.frame(LABEL = levels(as.factor(names$group)),
                      COLOR = I(RColorBrewer::brewer.pal(nlevels(as.factor(names$group)),
                                                   name = 'Set1')))
col_letters <- jColors$COLOR
names(col_letters) <- jColors$LABEL

#I like my protists orange, thank you :-)
col_letters["Protist"] <- "#FF7F00"

tmp <- list()
tmp$species <- rownames(ord.mat.IPR)
tmp$group <- names$group[match(tmp$species, names$shortName)]
tmp$group <- jColors$LABEL[match(tmp$group, jColors$LABEL)]
le <- tmp$group
names(le) <- tmp$species
species2color <- tmp
rm(tmp)



ids <- as.vector(gene2GO$sig_IDs)

#colnames
colnames_df <- paste0(gene2GO$sig_IDs, " - ", gene2GO$annotation.contrasts[gene2GO$sig_IDs])

#normalizing raw counts
tmp <- gene2GO$y[as.vector(ids)]
norm <- tmp/gene2GO$denominator
rm(tmp)

rownames(norm) <- names$shortName[match(rownames(norm), names$genomeID)]
colnames(norm) <- colnames_df

max <- ncol(norm)
mat <- as.matrix(normalize(norm[,1:max]))
ord.mat.GO <- mat[tree$tip.label,]


distance2 = dist(as.matrix(t(ord.mat.GO)), method = "euclidean")

cluster2_final = set(as.dendrogram(hclust(distance2, method=c("average"))), "branches_lwd", 2)

tree_final <- set(tree2, "branches_lwd", 2)

#heatmap using phylogeny to cluster genomes
ht1 = Heatmap(t(as.matrix(ord.mat.IPR)), clustering_distance_rows = "euclidean", clustering_method_rows = "complete", cluster_columns = as.hclust(tree_final), name = "IPR", col = my_palette, row_title = "IPR", row_dend_width = unit(4, "cm"), show_row_names = FALSE, row_dend_gp = gpar(lwd = 2), show_heatmap_legend = FALSE)

ht2 = Heatmap(t(as.matrix(ord.mat.GO)), clustering_distance_rows = "euclidean", clustering_method_rows = "complete", cluster_columns = as.hclust(tree_final), name = "GO", col = my_palette, row_title = "GO", row_dend_width = unit(4, "cm"), show_row_names = FALSE, row_dend_gp = gpar(lwd = 2), column_names_gp = gpar(fontfamily = "mono", fontsize = 14), show_heatmap_legend = FALSE)

ht3 = Heatmap(rbind(groups = le), cluster_columns = as.hclust(tree_final), name = "Group", col = col_letters, column_dend_height = unit(4, "cm"), column_dend_gp = gpar(lwd = 2), height = 8, show_heatmap_legend = FALSE)

ht_list = ht3 %v% ht1 %v% ht2

cairo_pdf("results/figures/Figure_3A_heatmap_IPR_GO.pdf", width = 12, height = 35)

draw(ht_list, padding = unit(c(2, 2, 2, 2), "mm"), heatmap_legend_side = "top")

dev.off()