###############################################################################
# This script does pathway enrichment analysis for PCA and ICA models.
#
# Usage:
#     Rscript PCA_ICA_pathway_enrichment.R KEGG_file data_file
#
#     KEGG_file: file path to 'pseudomonas_KEGG_terms.txt'
#     data_file: file path to the gene expression compendium, used to extract
#                the gene identifiers
#     HW_cutoff: number of standard deviations from the mean to be counted as
#                high-weight
###############################################################################

source("../netsize_evaluation/pathway_enrichment.R")

###### load commandArgs

KEGG_file <- commandArgs(trailingOnly = TRUE)[1]
data_file <- commandArgs(trailingOnly = TRUE)[2]
HW_cutoff <- as.numeric(commandArgs(trailingOnly = TRUE)[3])

###### load constants
sig_cutoff <- 0.05

PCA_weightFile <- "./PCA_weight_matrix.txt"
PCA_outfile1 <- "PCA_sigPathway.txt"
PCA_outfile2 <- "PCA_allPathway.txt"

ICA_modelFolder <- "./ICA_models"
ICA_weightFiles <- list.files(ICA_modelFolder, pattern = "ICA_weight_matrix_*")
ICA_outfolder <- "./ICA_pathways"

dir.create(ICA_outfolder)

###### read in data

# count the number of columns in the data file
col_n <- count.fields(data_file, sep = "\t")[1]
# read in the gene IDs from data file
geneID <- read.table(data_file, sep = "\t", header = T,
                     colClasses = c("character", rep("NULL", col_n - 1)))
KEGG <- read.table(KEGG_file, sep = "\t", header = F, row.names = 1,
                   stringsAsFactors = F)


###### pathway analysis for PCA

print(paste("processing PCA model:", PCA_weightFile))
one.pathway.analysis(PCA_weightFile, geneID, KEGG, PCA_outfile1, PCA_outfile2,
                     HW_cutoff, sig_cutoff, component = TRUE, output_all = TRUE)

###### pathway analysis for ICA

for (weightFile in ICA_weightFiles) {
  modelN <- tail(unlist(strsplit(weightFile, "_")), 1)
  outfile1 <- file.path(ICA_outfolder, paste0("ICA_sigPathway_", modelN))
  outfile2 <- file.path(ICA_outfolder, paste0("ICA_allPathway_", modelN))
  ICA_model <- file.path(ICA_modelFolder, weightFile)
  print(paste("processing ICA model:", ICA_model))
  one.pathway.analysis(ICA_model, geneID, KEGG, outfile1, outfile2, HW_cutoff,
                       sig_cutoff, component = TRUE, output_all = TRUE)
}