#!/usr/bin/env Rscript
require(ggplot2)
require(reshape2)
require(webchem)
require(openxlsx)
require(KEGGREST)

# Paths
out.dir = "./output/metabolomics/invitro/"
in.data = "./output/metabolomics/invitro/in_vitro_data.txt"
in.meta = "./output/metabolomics/invitro/in_vitro_metadata.txt"
in.mbx = "./output/metabolomics/invitro/tables/media-1.xlsx"
in.pth = "./output/pathways/kgml/map00360.graph.csv"

# Read compounds from pathway
pth = read.csv(in.pth, stringsAsFactors = F)
comps = sprintf("cpd:%s", unique(c(pth$ProductID, pth$SubstrateID)))
kegg2pubchem = keggConv("pubchem", "cpd")
sid = gsub("pubchem:", "", kegg2pubchem[comps])
res = get_cid(sid, from="sid", domain = "substance")
cid = res$cid
message(sprintf("Found %d/%d Pubchem IDS", length(unique(cid)), length(comps)))

# Sync measured compounds
mm = openxlsx::read.xlsx(in.mbx)
keep = as.character(mm$PubChem_CID) %in% cid
mm = mm[keep,]
message(sprintf("Keep %d/%d compounds in selected pathway", sum(keep), length(keep)))

# Find taxonomy
M = read.csv(in.data,  sep = "\t", header = T, row.names = 1, check.names = F)
ms = read.csv(in.meta,  sep = "\t", header = T, check.names = F)
row.names(ms) = row.names(M) = sprintf("exp%d", 1:nrow(ms))
keep = row.names(ms)[grep("Roseburia", ms$taxonomy)]
ms = ms[keep,]
M = M[keep,]

# Find metabolites
# queries = c("tyrosine", "phenyl", "hippur", "cinnam")
# metabolites = unlist(lapply(queries, function(q) colnames(M)[grep(q, colnames(M), ignore.case = T)]))
metabolites = intersect(mm$Compound, colnames(M))
M = M[,metabolites]
message(sprintf("Measured %d metabolites in pathway", length(metabolites)))

# Melt
dm = melt(as.matrix(M))
colnames(dm) = c("Row", "Metabolite", "Value")
dm$Row = as.character(dm$Row)
dff = dm[!is.na(dm$Value),]


# Plot
fname = file.path(out.dir, "Roseburia_inulinivorans_mbx.pdf")
ggplot(dff, aes(y=reorder(Metabolite, Value), x=Value)) + xlab("Log2 fold change") + ylab("") + 
  geom_vline(xintercept = -1, linetype="dashed") + geom_vline(xintercept = 1, linetype="dashed") + 
  theme_light() + geom_point(size=2, alpha=0.3) + ggtitle(ms[1,]$taxonomy) +
  theme(text=element_text(size=7)) + xlim(-4, 4)
ggsave(fname, width = 3.5, height = 1.5)
message(sprintf("Written %s", fname))
fname = file.path(out.dir, "Roseburia_inulinivorans_mbx.png")
ggsave(fname, width = 3.5, height = 1.5)
message(sprintf("Written %s", fname))




