#!/usr/bin/env Rscript

hlp = " Linear models for individual cytokine responses from selected species. 
  Correct signal for covariates (age, gender) and correlate them with the microbiome 
  density.
"

# Read pipeline arguments
args = commandArgs(trailingOnly = TRUE)
target = args[1]

# Paths
in.data = "./data/data.Robj"
out.dir = file.path("./output/response/", target)
dir.create(out.dir, recursive = TRUE, showWarnings = FALSE)
img.dir = file.path(out.dir, "img")
dir.create(img.dir, recursive = TRUE, showWarnings = FALSE)

# Params
assay = "MSPCore"
alpha = 0.05
frm = y ~ Age + Gender

# Libs
require(pheatmap)
require(ggplot2)

# Load data object
load(in.data)
obj = .GlobalEnv$obj
C = obj$assays[[target]]$data
cm = obj$assays[[target]]$metadata
meta = obj$metadata

# Remove evening vaccinations
meta = meta[meta$Vacc_time == "Morning",]
message(sprintf("Keeping %d morning vaccinations", nrow(meta)))

# Filter cytokines
keep = intersect(row.names(C), row.names(meta))
meta = meta[keep,]
C = C[keep,]
message(sprintf("Keeping %d samples with cytokines", nrow(meta)))

# Fill in the results for each cytokine
results = data.frame()
for(i in 1:ncol(C)){
  
  # Fit a residual model
  keep = !is.na(C[,i])
  m = meta[keep, ]
  m$y = C[keep, i]
  model = lm(frm, m)
  m$yr = model$residuals
  r2 = cor(model$fitted.values, m$y)^2
  
  # Spearman correlation with assay of interest
  X = obj$assays[[assay]]$data
  xm = obj$assays[[assay]]$metadata
  keep = intersect(row.names(m), row.names(X))
  X = X[keep,]
  m = m[keep,]
  message(sprintf("Keeping %d samples intersecting with %s", nrow(m), assay))
  
  # Compute correlation for each feature in the assay
  for(j in 1:ncol(X)){
    ct = cor.test(m$yr, X[,j], method="spearman")
    prev = mean(X[,j] > 0)
    df = data.frame(Target=row.names(cm)[i], Feature=row.names(xm)[j], Prevalence=prev, 
                    r2=r2, coef=ct$estimate, pvalue=ct$p.value,padj=1, stringsAsFactors = F)
    results = rbind(results, df)  
  }
}

# Merge data frames
results[,colnames(xm)] = xm[as.character(results$Feature),]
results[,colnames(cm)] = cm[as.character(results$Target),]

# Adjust p-values
results$padj = p.adjust(results$pvalue)
results = results[order(results$pvalue),]

# Save results
fname = file.path(out.dir, "results.csv")
write.csv(results, fname)
message(sprintf("Written %d rows, %d features, %d targets to %s", 
                nrow(results), length(unique(results$Feature)),
                length(unique(results$Target)), fname))

# Save filtered results
rf = results[results$pvalue < alpha,]
rf = rf[!is.na(rf$Target),]
fname = file.path(out.dir, "results_filtered.csv")
write.csv(rf, fname)
message(sprintf("Written %d rows, %d features, %d targets to %s", 
                nrow(rf), length(unique(rf$Feature)),
                length(unique(rf$Target)), fname))
