#Author: Francisco Pereira Lobo (franciscolobo@gmail.com) - 2024
#Description: compares how different values of NCT for H. sapiens interfere
#with IPR/GO associated with NCT.

library(ggplot2)
library(ggVennDiagram)
library(cowplot)

rm(list=ls())

#this path should point to the directory produced when you uncompressed file
#reproducibility.tgz
setwd("~/projects/evolution_of_complexity/docs/biorx_evolution_of_complexity/version1/dataFiles/reproductibility_raw_data/reproducibility/")

#loading CALANGO output objects (GO-annotation)
load("results/RData/gene2GO.RData")
load("results/RData/gene2GO_less_H_sapiens.RData")
load("results/RData/gene2GO_original_NCT.RData")

unique_GOs <- unique(c(gene2GO$sig_IDs, gene2GO_less_H_sapiens$sig_IDs, gene2GO_original_NCT$sig_IDs))

df_GO <- data.frame(rep(FALSE, length(unique_GOs)), rep(FALSE, length(unique_GOs)), rep(FALSE, length(unique_GOs)))

rownames(df_GO) <- unique_GOs
colnames(df_GO) <- c("original_NCT", "Pan_as_proxy", "less_H_sapiens")

#computing presence/absence of each GO term for each experiment
for (GO in rownames(df_GO)) {
  for (experiment in rownames(df_GO)) {
    if (GO %in% gene2GO$sig_IDs) {
      df_GO[GO, "Pan_as_proxy"] <- TRUE
    }
    if (GO %in% gene2GO_less_H_sapiens$sig_IDs) {
      df_GO[GO, "less_H_sapiens"] <- TRUE
    }
    if (GO %in% gene2GO_original_NCT$sig_IDs) {
      df_GO[GO, "original_NCT"] <- TRUE
    }
  }
}

#loading CALANGO output objects (IPR-annotation)
load("results/RData/homologous2IPR.RData")
load("results/RData/homologous2IPR_less_H_sapiens.RData")
load("results/RData/homologous2IPR_original_NCT.RData")

unique_IPRs <- unique(c(homologous2IPR$sig_IDs, homologous2IPR_less_H_sapiens$sig_IDs, homologous2IPR_original_NCT$sig_IDs))
df_IPR <- data.frame(rep(FALSE, length(unique_IPRs)), rep(FALSE, length(unique_IPRs)), rep(FALSE, length(unique_IPRs)))
rownames(df_IPR) <- unique_IPRs
colnames(df_IPR) <- c("original_NCT", "Pan_as_proxy", "less_H_sapiens")

#computing presence/absence of each IPR term for each experiment
for (IPR in rownames(df_IPR)) {
  for (experiment in rownames(df_IPR)) {
    if (IPR %in% homologous2IPR$sig_IDs) {
      df_IPR[IPR, "Pan_as_proxy"] <- TRUE
    }
    if (IPR %in% homologous2IPR_less_H_sapiens$sig_IDs) {
      df_IPR[IPR, "less_H_sapiens"] <- TRUE
    }
    if (IPR %in% homologous2IPR_original_NCT$sig_IDs) {
      df_IPR[IPR, "original_NCT"] <- TRUE
    }
  }
}

#creating list for Venn diagram
GO <- list("Pan_as_proxy" = gene2GO$sig_IDs, "less_H_sapiens" = gene2GO_less_H_sapiens$sig_IDs, "original_NCT" = gene2GO_original_NCT$sig_IDs)

GO_venn <- ggVennDiagram(GO)

#creating list for Venn diagram
homology <- list("Pan_as_proxy" = homologous2IPR$sig_IDs, "less_H_sapiens" = homologous2IPR_less_H_sapiens$sig_IDs, "original_NCT" = homologous2IPR_original_NCT$sig_IDs)

homology_venn <- ggVennDiagram(homology)

#plotting
cairo_pdf("results/figures/Supplementary_Figure_2_Compare_NCT.pdf", width=12, heigh=16)
cowplot::plot_grid(homology_venn, GO_venn, ncol = 1, align = "v", labels = c("A", "B"))
dev.off()
