################################################################################
# This script plots a HW activity heatmap for a test dataset using all nodes.
# It also plots a HW activity heatmap for each node using the same color
# key used when plotting one-node heatmaps across the compendium.
#
# Usage:
#     Run inside node_interpretation folder.
#
#     Rscript make_HW_activity_heatmaps_testset.R networkFile dataFile dataname
#     out_folder selected_samples activity_normed
#
#     networkFile: file path to the network file of an ADAGE/eADAGE model
#     dataFile: file path to the test dataset
#     dataname: the name of the testset that will used in each heatmap
#     activity_normed: normalized activity values for each node of all
#                      samples in the compendium
#     out_folder1: output folder for the HW activity heatmap with all nodes
#     out_folder2: output folder for HW activity heatmaps of each node with
#                  all samples in the test dataset
#     selected_samples: if not "all", then only plot the selected samples using
#                       the provided sample names
#     HW_cutoff: number of standard deviations from the mean to be counted as
#                high-weight
################################################################################

pacman::p_load("gplots", "readr")
source("HWactivity.R")

networkFile <- commandArgs(trailingOnly = TRUE)[1]
dataFile <- commandArgs(trailingOnly = TRUE)[2]
dataname <- commandArgs(trailingOnly = TRUE)[3]
activity_normed <- commandArgs(trailingOnly = TRUE)[4]
out_folder1 <- commandArgs(trailingOnly = TRUE)[5]
out_folder2 <- commandArgs(trailingOnly = TRUE)[6]
selected_samples <- commandArgs(trailingOnly = TRUE)[7]
sd_cutoff <- as.numeric(commandArgs(trailingOnly = TRUE)[8])

dir.create(out_folder1)
dir.create(out_folder2)

############ Load in data


data <- read_delim(dataFile, col_names = T, delim = "\t")
data <- data[, -1]  # remove gene name column
gene_num <- nrow(data)
weight <- read_delim(networkFile, delim = "\t", col_names = F, n_max = gene_num,
                     skip = 2)
weight <- data.matrix(weight)
net_size <- ncol(weight)
activity_normed <- read.table(activity_normed, header = T, row.names = 1,
                              sep = "\t")

# preprocess the data to only contain the required samples
if (selected_samples != "all") {
  selected_samples <- as.integer((unlist(strsplit(selected_samples, ", "))))
  data <- data[, selected_samples]
}

############## Plot a HW activity heatmap for the test set

# Calculate HW activity per gene
HWactivity <- cal_HWactivity(data, weight, net_size, sd_cutoff)
outputFile <- file.path(out_folder1, paste0(dataname, ".pdf"))

# plot HW activity heatmap for this dataset
plot.HWactivity.per.exp(HWactivity, dataname, outputFile)


############## Plot HW activity heatmaps for each node in the testset

# loop over each hidden node
for (i in 1:ncol(HWactivity)) {
  # get the raw activity values of this hidden node
  HWactivity.sig <- HWactivity[, i]
  signature_name <- colnames(HWactivity)[i]
  key_range <- seq(min(activity_normed[, i]), max(activity_normed[, i]),
                   by = 0.001)
  outputFile <- file.path(out_folder2, paste0(signature_name, "_", dataname,
                                             ".pdf"))
  # plot HW activity heatmap for this signature
  plot.HWactivity.per.signature(HWactivity.sig, signature_name, key_range,
                                outputFile)

}
