################################################################################
# This script plots HW activity heatmaps for each dataset in the compendium
# with all nodes and for each node with all experiments.
#
# Usage:
#     Run inside node_interpretation folder.
#
#     Rscript make_HW_activity_heatmaps.R networkFile dataFile dataset_list
#     out_folder1 out_folder2
#
#     networkFile: file path to the network file of an ADAGE/eADAGE model
#     dataFile: file path to the training expression compendium
#     dataset_list: a file that stores the name of each individual dataset
#                   and names of samples belong to that dataset
#     out_folder1: output folder for HW activity heatmaps with all nodes
#                  for each dataset
#     out_folder2: output folder for HW activity heatmaps of each node with
#                  all experiments
#     HW_cutoff: number of standard deviations from the mean to be counted as
#                high-weight
################################################################################

pacman::p_load("readr")
source("HWactivity.R")

networkFile <- commandArgs(trailingOnly = TRUE)[1]
dataFile <- commandArgs(trailingOnly = TRUE)[2]
dataset_list <- commandArgs(trailingOnly = TRUE)[3]
out_folder1 <- commandArgs(trailingOnly = TRUE)[4]
out_folder2 <- commandArgs(trailingOnly = TRUE)[5]
sd_cutoff <- as.numeric(commandArgs(trailingOnly = TRUE)[6])
dir.create(out_folder1)
dir.create(out_folder2)

############ Load in data

data <- read_delim(dataFile, col_names = T, delim = "\t")
data <- data[, -1]  # remove gene name column
gene_num <- nrow(data)
weight <- read_delim(networkFile, delim = "\t", col_names = F, n_max = gene_num,
                     skip = 2)
weight <- data.matrix(weight)
net_size <- ncol(weight)
dataset_list <- read.table(dataset_list, header = F, sep = "\t",
                           stringsAsFactors = F)

############# Plot a HW activity heatmap for each dataset

# Calculate HW activity per gene
HWactivity_perGene <- cal_HWactivity(data, weight, net_size, sd_cutoff)

# Loop over all datasets
for (row in 1:nrow(dataset_list)) {
  dataset <- dataset_list[row, 1]
  # get the sample names
  sampleList <- unlist(strsplit(dataset_list[row, 2], ";"))

  # only consider datasets that are in the train set
  if (sampleList[1] %in% colnames(data)) {

    # extract HW activity values of this dataset with columns names
    this_activity <- HWactivity_perGene[sampleList, ]
    outputFile <- file.path(out_folder1, paste0(dataset, ".pdf"))

    # plot HW activity heatmap for this dataset
    plot.HWactivity.per.exp(this_activity, dataset, outputFile)
  }
}


############## Plot a HW activity heatmap for each node across all experiments

# build a sample experiment matching table
sample_experiment <- c()
for (row in 2:nrow(dataset_list)) {
  dataset <- dataset_list[row, 1]
  # get the sample names
  sampleList <- unlist(strsplit(dataset_list[row, 2], ";"))
  sample_exp <- paste(dataset, sampleList, sep = "_")
  sample_exp_df <- data.frame(sample = sampleList, experiment = sample_exp)
  sample_experiment <- rbind(sample_experiment, sample_exp_df)
}
rownames(HWactivity_perGene) <- sample_experiment$experiment[
  match(rownames(HWactivity_perGene), sample_experiment$sample)]

# loop over each signature
for (i in 1:ncol(HWactivity_perGene)) {
  # get the activity values of this signature across all datasets
  this_activity <- HWactivity_perGene[, i]
  signature_name <- colnames(HWactivity_perGene)[i]
  key_range <- seq(min(this_activity), max(this_activity), by = 0.001)
  outputFile <- file.path(out_folder2, paste0(signature_name, ".pdf"))
  # plot HW activity heatmap for this signature
  plot.HWactivity.per.signature(this_activity, signature_name, key_range,
                                outputFile)
}
