################################################################################
# This script calculates each feature's HW activity per gene for all samples
# in the compendium and (optional) plots each node's HW activity distribution.
#
# Usage:
#     Run inside node_interpretation folder.
#
#     Rscript HWG_activity.R networkFile sd_cutoff expressionFile output_file
#     activity_plot activity_plot_fd
#
#     networkFile: file path to the network file of an ADAGE model
#     sd_cutoff: the standard deviation cutoff that defines HW genes
#     dataFile: file path to the training expression compendium
#     output_file: file name of the output file that stores HW activity per
#                  sample per feature
#     activity_plot: whether plot node activity distribution, either T or F
#     activity_plot_fd: the output folder for node activity distribution
################################################################################

pacman::p_load("readr")
source("HWactivity.R")

############ load in arguments

commArgs <- commandArgs(trailingOnly = TRUE)
networkFile <- commArgs[1]
sd_cutoff <- as.numeric(commArgs[2])
dataFile <- commArgs[3]
output_file <- commArgs[4]
activity_plot <- as.logical(commArgs[5])
if (activity_plot) {
  activity_plot_fd <- commArgs[6]
  dir.create(activity_plot_fd)
}

############ load in data

data <- read_delim(dataFile, col_names = T, delim = "\t")
data <- data[, -1]  # remove gene name column
gene_num <- nrow(data)
weight <- read_delim(networkFile, delim = "\t", col_names = F, n_max = gene_num, 
                     skip = 2)
weight <- data.matrix(weight)
net_size <- ncol(weight)

########### calculate HW activity per gene

HWactivity_perGene <- cal_HWactivity(data, weight, net_size, sd_cutoff,
                                     activity_plot, activity_plot_fd)
write.table(HWactivity_perGene, output_file, row.names = T, col.names = NA,
            sep = "\t", quote = F)
