###############################################################################
# This script builds a PCA model and 10 ICA models using the input data.
#
# Usage:
#     Rscript PCA_ICA.R data_file component_num ICA_num
#
#     data_file: file path to the gene expression compendium
#     component_num: number of components in PCA/ICA
#     ICA_num: number of ICA models to build
###############################################################################

pacman::p_load("fastICA")

########## load command arguments
data_file <- commandArgs(trailingOnly = T)[1]
component_num <- as.numeric(commandArgs(trailingOnly = T)[2])
ICA_num <- as.numeric(commandArgs(trailingOnly = T))[3]

########## load constants
# summary_file stores the output of this script, it will be overwritten in
# each run.
summary_file <- "./PCA_ICA_output.txt"

########## read in data
data <- read.table(data_file, header = T, sep = "\t", row.names = 1,
                   check.names = F)
gene_id <- rownames(data)
X <- data.matrix(t(data))

####### PCA
pca_result <- prcomp(X)
pca_weight <- pca_result$rotation
write("PCA", "./PCA_weight_matrix.txt")
write.table(pca_weight[, 1:component_num], "./PCA_weight_matrix.txt", quote = F,
            row.names = F, sep = "\t", append = T)

# accumulated variance
accu_stev <- cumsum((pca_result$sdev) ^ 2) / sum((pca_result$sdev) ^ 2)
write(paste("First", component_num, "PCs explains",
            round(accu_stev[component_num], 2), "variance."), summary_file)

####### ICA
dir.create("./ICA_models")
for (i in 1:ICA_num) {
  print(paste("calculating ICA model", i))
  ica_result <- fastICA(X, component_num)
  ica_weight <- ica_result$K %*% ica_result$W
  write("ICA", paste0("./ICA_models/ICA_weight_matrix_", i, ".txt"))
  write.table(ica_weight, paste0("./ICA_models/ICA_weight_matrix_", i, ".txt"),
              quote = F, row.names = F, sep = "\t", append = T)
}
