#!/usr/bin/env Rscript

hlp = " Do significant species have a larger effect on metabolism? "

# Read pipeline arguments
args = commandArgs(trailingOnly = TRUE)
if(length(args) < 1){
  message(hlp)
  message(sprintf("Wrong input"))
  message(sprintf("Usage: figure_msps_pathway genus pathway"))
  q(1, save = "no")
}
genus = args[1]
target = args[2]

# Paths
in.data = "./data/data.Robj"
out.dir = file.path("./output/pathways/")
img.dir = file.path(out.dir, "img")
dir.create(img.dir, recursive = T, showWarnings = F)

# Parameters
alpha = 0.05  # P-value

# Libs
require(ggplot2)
require(ggpubr)
require(pheatmap)
require(reshape2)
require(plyr)

# Load data object
load(in.data)
obj = .GlobalEnv$obj

# Keep data sets
M = obj$assays$Metabolites$data
mm = obj$assays$Metabolites$metadata
S = obj$assays$MSPCore$data
sm = obj$assays$MSPCore$metadata
keep = intersect(row.names(M), row.names(S))
M = M[keep,]
S = S[keep,]
message(sprintf("Keeping %d shared samples", length(keep)))

# Filter by pathway
P = obj$assays$Metabolites$pathway
mbx = names(which(P[,target] > 0))
M = M[,mbx]
mm = mm[mbx,]
message(sprintf("Keeping %d compounds for %s", length(mbx), target))

# Format data for plot
M = scale(M)
dm = melt(M)
colnames(dm) = c("Sample", "Mb", "Value")
dm$Sample = as.character(dm$Sample)
dm$Mb = as.character(dm$Mb)
dm$Name = mm[dm$Mb, "Top.annotation.name"]

# Melt
msps = row.names(sm)[which(sm$genus == genus)]
ds = melt(S[,msps])
prev = colMeans(S[,msps] > 0 )
colnames(ds) = c("Sample", "MSP", "Abundance")
ds$Sample = as.character(ds$Sample)
ds$MSP = as.character(ds$MSP)
ds$Detected = factor(ds$Abundance > 0)
levels(ds$Detected) = c("-", "+")
ds$Prevalence = prev[ds$MSP]
ds$Species = sm[ds$MSP, "species"]
ds$Label = sprintf("%s (%s) %.2f %%", ds$MSP, ds$Species, 100 * ds$Prevalence)

# Merge
data = merge(dm, ds, by = "Sample")

# Compute correlation
dcor = expand.grid(Mb=mbx, MSP=msps, stringsAsFactors = F)
dcor$Genus = genus
dcor$Correlation = 0
dcor$Pvalue = 0
dcor$Padj = 0
dcor$Effect = factor("Neutral", levels = c("Negative", "Neutral", "Positive"))
for(i in 1:nrow(dcor)){
  m = dcor[i, "Mb"]
  s = dcor[i, "MSP"]
  ct = cor.test(M[,m], S[,s])
  dcor[i, "Correlation"] = ct$estimate
  dcor[i, "Pvalue"] = ct$p.value
}
inxs.pos = (dcor$Correlation > 0) & dcor$Pvalue < alpha
inxs.neg = (dcor$Correlation < 0) & dcor$Pvalue < alpha
dcor[inxs.pos, "Effect"] = "Positive"
dcor[inxs.neg, "Effect"] = "Negative"
dcor$Prevalence = prev[dcor$MSP]
dcor$Species = sm[dcor$MSP, "species"]
dcor$Label = sprintf("%s (%s) %.2f %%", dcor$MSP, dcor$Species, 100 * dcor$Prevalence)
dcor$Name = mm[dcor$Mb, "Top.annotation.name"]

# Aggregate by maximum correlation
pr = mean(rowSums(S[,msps]) > 0)
agg = dcor[rev(order(abs(dcor$Correlation))),]
agg = agg[!duplicated(agg$Mb),]
agg$Prevalence = pr
agg$Label = sprintf("Combined (%.2f %%)", 100 * pr)
dcor2 = rbind.fill(dcor, agg)

# Select msps
fname = file.path(img.dir, sprintf("box_%s_%s.pdf", genus, target))
qplot(data=data, fill=Detected, x=Name, y=Value, geom="boxplot") + 
    stat_compare_means(method = "t.test", aes(label = ..p.signif..), 
                       symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), 
                                          symbols = c("****", "***", "**", "*", ""))) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) + 
  coord_flip() + facet_wrap(~reorder(Label, -Prevalence)) + xlab("") + ylab("")
ggsave(fname, width = 16, height = 9.5)
message(sprintf("Written %s", fname))

# Correlation
fname = file.path(img.dir, sprintf("cor_%s_%s.pdf", genus, target))
qplot(data=dcor2, x=Name, y=Correlation, geom = "col", fill=Effect) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) + 
  coord_flip() + facet_wrap(~reorder(Label, -Prevalence)) + xlab("") + ylab("") +
  scale_fill_manual(values = c("#0bb6d9", "gray", "firebrick"), drop=FALSE)
ggsave(fname, width = 16, height = 9.5)
message(sprintf("Written %s", fname))

# Write output
fname = file.path(out.dir, sprintf("results_%s_%s.csv", genus, target))
write.csv(data, fname, quote = F, row.names = F)
message(sprintf("Wrtten %s", fname))

# Write output
fname = file.path(out.dir, sprintf("correlations_%s_%s.csv", genus, target))
write.csv(dcor2, fname, quote = F, row.names = F)
message(sprintf("Wrtten %s", fname))


