# Pathway:  https://www.genome.jp/pathway/map00650+C00246
# butyrate kinase [EC:2.7.2.7]
# K00929
# acetate CoA/acetoacetate CoA-transferase alpha subunit [EC:2.8.3.8 2.8.3.9]
# K01034 K01035 K19709

require(ggplot2)
require(ggrepel)

# Parameters
alpha = .25

# Butyrate enzymes
df = data.frame(
  KO=c("K01034", "K01035", "K19709", "K00929"),
  EC=c("2.8.3.8", "2.8.3.8", "2.8.3.8", "2.7.2.7"),
  name = c("acetate CoA/acetoacetate CoA-transferase alpha subunit",
           "acetate CoA/acetoacetate CoA-transferase alpha subunit",
           "acetate CoA/acetoacetate CoA-transferase alpha subunit",
           "butyrate kinase"))


#!/usr/bin/env Rscript

hlp = " Linear models for individual cytokine responses from selected species. 
  Correct signal for covariates (age, gender) and correlate them with the microbiome 
  density.
"

# Paths
mode = "trained"
out.dir = file.path("./output/response/scfas/", mode)
img.dir = file.path(out.dir, "img")
dir.create(img.dir, recursive = TRUE, showWarnings = FALSE)

# Paths
in.data = "./data/data.Robj"
in_results = "./output/response/trained/results.csv"
out.dir = file.path("./output/response/", target)
dir.create(out.dir, recursive = TRUE, showWarnings = FALSE)
img.dir = file.path(out.dir, "img")
dir.create(img.dir, recursive = TRUE, showWarnings = FALSE)

# Load data object
load(in.data)
obj = .GlobalEnv$obj

# Search butyrate enzymes in gene metadata
hits = data.frame()
mm = obj$assays$MSPGenes$metadata
for(i in 1:nrow(df)){
  inxs = grep(df[i, "KO"], mm$KEGG_KOs)
  mf = mm[inxs,]
  if(nrow(mf) > 0){
    mf$Query  = df[i, "KO"]
    hits = rbind(hits, mf)
  }
}
message(sprintf("Found %d genes", nrow(hits)))

# Select enzyme encoders
map = obj$assays$MSPGenes$mapping
inxs = map$Feature %in% as.character(hits$Feature)
msps = sprintf("%s.core", unique(map[inxs, "MSP"]))

# Load trained immunity results
results = read.csv(in_results, stringsAsFactors = F)
rf = results

# Add gene info
gene = sprintf("%s (%s)", hits$`eggNOG annot`[1], hits$KEGG_KOs[1])
rf$Butyrate = factor(rf$Feature %in% msps)
levels(rf$Butyrate) = c("N/A", gene)
rf$LabelTmp = ""
rf$LabelTmp[rf$Feature %in% msps] = rf[rf$Feature %in% msps, "label"]

# Volcano
fname = file.path(out.dir, "results_volcano_butyrate.pdf")
rfp = rf[rf$Butyrate != "N/A",]
p = qplot(data=rf, x=Estimate, y=-log10(Padj), geom="point", col=Butyrate, shape=Prevalence) + 
  geom_hline(yintercept = -log10(alpha), linetype="dashed", col="gray") + 
  ylab("-Log10 (FDR)") + xlab("Effect size") + 
  geom_point(data=rfp, aes(x=Estimate, y=-log10(Padj), col=Butyrate, shape=Prevalence)) + 
  geom_text_repel(x=rf$Estimate, y=-log10(rf$Padj), label=rf$LabelTmp, show.legend = F, force = 5, size=2) + 
  scale_shape_manual(values = c(1, 16)) + ylim(c(-0.2, 4.2)) +
  scale_color_manual(values=c("#DDDDDD", "black")) + 
  theme(legend.margin = margin(1,1,1,1)) + 
  theme(text = element_text(size=7),
        panel.background = element_blank(),
        panel.border = element_rect(fill="transparent", colour = "gray"))
ggsave(fname, width = 5.8, height = 3.2, plot=p)
message(sprintf("Written %s", fname))


# Save table
prod = map[map$Feature %in% as.character(hits$Feature), ]
prod$Key = sprintf("%s.core", prod$MSP)
ma = merge(prod, rf, by.x = "Key", by.y = "Feature")
mb = merge(ma, hits, by.x = "Feature", by.y = "Feature")
mff = mb[,c("Label", "Feature", "KEGG_KOs", "eggNOG annot", "seed_ortholog_evalue")]
colnames(mff) = c("MSP", "Feature", "KEGG Kos", "eggNOG annotation", "Seed ortholog e-value")
fname = file.path(out.dir, "butyrate_genes.csv")
write.csv(mff, fname, row.names = F)
message(sprintf("Written %s", fname))



