#Author: Francisco Pereira Lobo (franciscolobo@gmail.com) - 2024
#Description: Plots scatterplots and violin plots for IPRs associated with
#NCT (supplementary figures)


library(ggplot2)
library(dplyr)
library(cowplot)
library(grDevices)

rm(list=ls())

#this path should point to the directory produced when you uncompressed file zenodo_reproductibility.tgz
setwd("~/projects/evolution_of_complexity/docs/biorx_evolution_of_complexity/version1/dataFiles/reproductibility_raw_data/reproducibility/")

#load CALANGO output object
load("results/RData/homologous2IPR.RData")

output <- homologous2IPR

#please uncomment each line to produce the outputs
#remember to uncomment the line at the end of this file to change the
#output pdf file as well

#shared with protozoans (Supp Figure 3)
#fnames <- c("IPR001429", "IPR018487", "IPR022166")

#metazoan (Supp Figures 4 & 5)
#fnames <- c("IPR001050", "IPR009254", "IPR002946", "IPR013847", "IPR015395", "IPR016376", "IPR015471")
#fnames <- c("IPR006711", "IPR013301", "IPR020408", "IPR040180", "IPR002546", "IPR028127", "IPR018890", "IPR031847", "IPR025740")

#vertebrata (Supp Figure 6)
fnames <- c("IPR030714", "IPR000532", "IPR041381", "IPR034879", "IPR002278", "IPR034277", "IPR042999", "IPR001065", "IPR001003", "IPR028104", "IPR027882", "IPR022582")

#plotting order
plot_order <- c("Protist", "Fungi", "Plantae(Chlorophyta)", "Plantae(Angiosperm)", "Metazoa(Invertebrata)", "Metazoa(Vertebrata)")

genome_metadata <- read.table("data/metadata/metadata_spp.txt", sep="\t", header=TRUE)
genome_metadata$Popular_group_name_small <- as.factor(genome_metadata$Popular_group_name)
tmp <- gsub("\\(Vertebrata\\)", "", genome_metadata$Popular_group_name)
tmp <- gsub("\\(Invertebrata\\)", "", tmp)
tmp <- gsub("\\(Angiosperm\\)", "", tmp)
tmp <- gsub("\\(Chlorophyta\\)", "", tmp)
genome_metadata$Color_group <- as.factor(tmp)
df_color <- data.frame(genome_metadata$GenomeID, genome_metadata$Color_group, genome_metadata$short_name)
colnames(df_color) = c("genomeID", "group", "shortName")


#defining color for major groups

jColors <- data.frame(LABEL = levels(as.factor(df_color$group)),
                      COLOR = I(RColorBrewer::brewer.pal(nlevels(as.factor(df_color$group)),
                                                         name = 'Set1')))
# I like my protists orange, thank you :-)
jColors$COLOR[jColors$LABEL == "Protist"] <- "#FF7F00"

myplots <- vector('list', length(fnames))

My_Theme = theme(
  axis.text.y = element_text(size = 18),
  axis.title.y = element_text(size = 20)
  )

i <- 1

for (fname in fnames) {
  
  myplots[[i]] <- local({
    plot_title <- paste0("    ", LETTERS[i], "  ", fname, " - ", output$annotation.cor[fname])
    
    Xdf         <- output$x
    names(Xdf)  <- "X_var"

    #for relative frequencies
    Xdf$feature <- output$y[, fname] / output$denominator
    
    #for raw counts
    #Xdf$feature <- output$y[, fname]
    ID          <- row.names(Xdf)
    
    tmp_x        <- output$x[, 1]
    names(tmp_x) <- rownames(output$x)
    contrast_x   <- ape::pic(x = tmp_x, phy = output$tree)
    
    tmp_y        <- output$y[, fname]/output$denominator
    names(tmp_y) <- rownames(output$x)
    contrast_y   <- ape::pic(x = tmp_y, phy = output$tree)
    
    Xdf   <- data.frame(contrast_x, contrast_y)
    model <- stats::lm(contrast_y ~ contrast_x + 0)
    
    
    corr <- cor(Xdf$contrast_x, Xdf$contrast_y, method = "pearson")
    cor_val <- round(corr, digits = 2)
    
    q_val <- output$contrasts.corrected[fname]
    q_val <- formatC(q_val, format = "e", digits = 2)
    grob <- grid::grobTree(grid::textGrob(paste0("cor = ", cor_val, "\n", "q-val = ", q_val), x=0.05,  y=0.90, hjust=0,
                                          gp=grid::gpar(col="black", fontsize=14, fontface="italic")))
    
    p1 <- ggplot2::ggplot(data    = Xdf,
                          mapping = ggplot2::aes(x     = contrast_x,
                                                 y     = contrast_y,
                                                 label = rownames(Xdf))) +
      ggplot2::geom_abline(slope     = model$coefficients[1],
                           intercept = 0,
                           color     = c("#3366FFFF"),
                           size      = 3) +
      ggplot2::geom_point(size=3) +
      ggplot2::theme_light()  +
      ggplot2::theme(axis.text.x = element_text(size = 18), axis.title.x = element_text(size = 20))  +
      ggplot2::scale_y_continuous(labels = function(x) format(x, scientific = TRUE)) +
      ggplot2::labs(x = "PIC - NCT",
                    y = "PIC - annotation\nterm frequency") +
      ggplot2::annotation_custom(grob) +
      My_Theme
    
    
    tmp <- gsub("\\(Vertebrata\\)", "", output$groups)
    tmp <- gsub("\\(Invertebrata\\)", "", tmp)
    tmp <- gsub("\\(Angiosperm\\)", "", tmp)
    tmp <- gsub("\\(Chlorophyta\\)", "", tmp)
    output$groups_simple <- tmp
    tmp <- data.frame(output$x, output$short.name, output$groups, output$groups_simple, output$y[fname], (output$y[fname]/output$denominator))
    colnames(tmp) <- c("cell_type", "short", "group", "group_color", "count", "frequency")
    tmp$color <- jColors$COLOR[match(tmp$group_color, jColors$LABEL)]
    rownames(tmp) <- tmp$short
    
    p2 = ggplot(tmp, aes(x = factor(group, level= plot_order), y=count), color = color) +
      geom_violin(aes(fill = color), scale = "width",  alpha = 0.8) +
      scale_y_continuous(trans='log10') +
      stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, col="black") +
      ggplot2::labs(x = "Group",
                    y = "Annotation\nterm count") +
      ggplot2::theme_light() +
      theme(axis.text.x = element_blank(), axis.title.x = element_blank())  +
      My_Theme
    
    p3 = ggplot(tmp, aes(x = factor(group, level = plot_order), y=frequency), color=color) +
      geom_violin(aes(fill = color), scale = "width", alpha = 0.8) +
      scale_y_continuous(trans='log10', labels = function(x) format(x, scientific = TRUE)) +
      #      scale_fill_brewer(palette="Set1") +
      stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median, geom = "crossbar", width = 0.2, col="black") +
      ggplot2::theme_light() +
      ggplot2::labs(x = "Group",
                    y = "Annotation\nterm frequency") +
      theme(axis.text.x = element_blank(), axis.title.x = element_blank())  +
      My_Theme
    
    
    prow <- plot_grid( #p1 + theme(legend.position="none"),
      p1 + theme(legend.position="none"),
      p2 + theme(legend.position="none"),
      p3 + theme(legend.position="none"),
      align = 'vh',
      hjust = 0,
      nrow = 1
    )
    
    title <- ggdraw() +
      draw_label(
        plot_title,
        size = 20,
        fontface = 'bold',
        x = 0,
        hjust = 0
      ) +
      theme(
        plot.margin = margin(0, 0, 0, 7)
      )
    pf <- plot_grid(
      title, prow,
      ncol = 1,
      rel_heights = c(0.1, 1)
    )
  })

  i <- i + 1
  
}


#pdf(file = "results/figures/Supplementary_Figure_3.pdf", width = 16.5, height = length(fnames)*5)
#pdf(file = "results/figures/Supplementary_Figure_4.pdf", width = 16.5, height = length(fnames)*5)
#pdf(file = "results/figures/Supplementary_Figure_5.pdf", width = 16.5, height = length(fnames)*5)
pdf(file = "results/figures/Supplementary_Figure_6.pdf", width = 16.5, height = length(fnames)*5)
  plot_grid(plotlist = myplots, ncol=1)
dev.off()
