#!/usr/bin/env Rscript

hlp = " Correlations between selected genera and compounds in selected metabolic pathway (KEGG). "

# Read arguments
args = commandArgs(trailingOnly = TRUE)
if(length(args) < 1){
  message(hlp)
  message(sprintf("Wrong input"))
  message(sprintf("Usage: figure_msps_pathway_multi pathway"))
  q(1, save = "no")
}
target = args[1]

# Target genus and pathway
genus = c("Roseburia", "Eubacterium", "Bifidobacterium", "Ruminococcus", "Coprococcus")
alpha = 0.05

# Paths
in.data = "./data/data.Robj"
in.kgml = "./output/pathways/kgml/"
out.dir = file.path("./output/pathways/")
img.dir = file.path(out.dir, "img")
dir.create(img.dir, recursive = T, showWarnings = F)

# Load data object
load(in.data)
obj = .GlobalEnv$obj
mm = obj$assays$Metabolites$metadata

# Read pathway graph
in.ko = file.path(in.kgml, sprintf("%s.graph.csv", target))
kf = read.csv(in.ko, stringsAsFactors = F)

# Compound mapping
cps = rbind(cbind(kf$SubstrateID, kf$Substrate),
            cbind(kf$ProductID, kf$Product))
colnames(cps) = c("CompoundID", "Compound")
cps = data.frame(cps, stringsAsFactors = F)
cps = cps[!duplicated(cps[,"CompoundID"]),]
row.names(cps) = cps$CompoundID


# Libs
require(ggplot2)
require(ggpubr)
require(pheatmap)
require(reshape2)
require(plyr)

# Read combined results
data = data.frame()
for(g in genus){
  fname = file.path(out.dir, sprintf("correlations_%s_%s.csv", g, target))
  df = read.csv(fname, stringsAsFactors = F)
  data = rbind(data, df)
  message(sprintf("Read %s", fname))  
}

# Filter rows
dfc = data[grep("Combined", data$Label),]
dfc$Label2 = sprintf("%s (%.2f) %%", dfc$Genus, 100 * dfc$Prevalence)
dfr = data[grep("msp_112.core", data$Label),]
dfr$Label2 = sprintf("MSP 112 (%.2f %%)", 100 * dfr$Prevalence)
df = rbind(dfc, dfr)
df = df[df$Pvalue < alpha,]
ranks = 1:5
names(ranks) = c("Roseburia", "Bifidobacterium", "Eubacterium", "Coprococcus", "Ruminococcus")
df$Rank = ranks[df$Genus]
df$Pearson = df$Correlation
df$KEGG = mm[df$Mb,]$Top.annotation.ids
df$Rename = ""
df$FullName = ""
for(i in 1:nrow(df)){
  ids = gsub(" ", "", unlist(strsplit(df$KEGG[i], ";")))
  matches = intersect(ids, row.names(cps))
  m = df$Mb[i]
  if(length(matches)){
    df[i, "Rename"] = sprintf("%.3f %s", mm[m, ]$ionMz, cps[matches, "Compound"][1])
    df[i, "FullName"] = sprintf("%.3f %s", mm[m, ]$ionMz, paste0(cps[matches, "Compound"], collapse = " /\n"))
  }
}

# Figure
fname = file.path(out.dir, sprintf("cor_combined_genera.pdf"))
ggplot(data=df) + geom_tile(mapping = aes(x=Rename, y=reorder(Label2, -Rank), fill=Pearson)) + 
  theme(axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1)) + xlab("") + ylab("") +
  scale_fill_gradient(low = "cyan", high = "firebrick", na.value = NA) + 
  theme(text= element_text(size = 7)) + 
  theme(legend.margin = margin(1,1,1,1)) + 
  theme(legend.key.size = unit(1.,"line"),
        legend.position = "top")
ggsave(fname, width = 2.8, height = 2.7) 
message(sprintf("Written %s", fname))

fname = file.path(out.dir, sprintf("cor_combined_genera_ver.pdf"))
ggplot(data=df) + geom_tile(mapping = aes(x=reorder(Label2, -Rank), y=FullName, fill=Pearson)) + 
  theme(axis.text.x = element_text(angle = 90, vjust = .5, hjust = 1)) + xlab("") + ylab("") +
  scale_fill_gradient(low = "cyan", high = "firebrick", na.value = NA) + 
  theme(text= element_text(size = 7)) + 
  theme(legend.margin = margin(1,1,1,1)) + 
  theme(legend.key.size = unit(1.,"line"),
        legend.position = "top")
ggsave(fname, width = 4.1, height = 4.5) 
message(sprintf("Written %s", fname))

