
#This script computes gene modules using antler clustering software.
# We identify module characteristics by functional enrichment with gprofiler2
# and isolate modules with terms relating to cell deveopment and maturation



## ----receive input from nextflow----------------------------------------------
#Get parameters
args <- commandArgs(TRUE)
path<-args[1]
file<-args[2]
print(path)
set.seed(123)
timepoints<-substr(file,12,nchar(file)-17)

#Load standard package list
source(paste0(path,'/bin/auxiliary/DirectoryChecker.R'))
library(devtools)

#this package is only on github at time of writing
if (!require(Antler)){
  #this cannot be installed through conda
  Sys.setenv(R_REMOTES_NO_ERRORS_FROM_WARNINGS="true")
  devtools::install_github("juliendelile/Antler", dependencies = FALSE)
}

library(Antler)
library(Hmisc)
library(readr)
library(Seurat)
library(gprofiler2)
library(reshape2)
source(paste0(path,'/bin/auxiliary/SetPlottingParameters.R'))

#Place where all the Antler Results will go
AntlerPath<-paste0(path,"/output/scrnaseq/",timepoints,"/GeneMods")
if(!dir.exists(AntlerPath)){
  dir.create(AntlerPath)
}



#load integrated data
x<-load(file)
integrated_clean<-subset(integrated,cells=WhichCells(integrated,expression=major_cluster_label %nin% c('FailedClassification','Stressed','Meis2')))
antlerInput<-integrated_clean

#Antler requires certain metadata entires
times<-parse_number(sort(unique(antlerInput$orig.ident)))
#use day 20 as transition from E to P notation. Make sure this fits for the species you are using.
embryonic<-grep('^E',sort(unique(antlerInput$orig.ident)))
times[embryonic]<-times[embryonic] - 20

antlerInput$timepoint<-times[antlerInput$orig.ident]
antlerInput$replicate_id<-1
antlerInput$treatment<-'None'

# #for antler to work, have to write files as .csv in correct format, in specified path
# write.csv(as.matrix(antlerInput@assays[['RNA']]@counts),file=paste0(path,'/assayData.csv'),col.names = T)
# write.csv(as.matrix(antlerInput@meta.data),file=paste0(path,'/phenoData.csv'),col.names = T)


#Begin antler processing
antler <- Antler$new(output_folder=AntlerPath)

antler$load_dataset(assayData = as.matrix(antlerInput@assays[['RNA']]@counts), phenoData = AnnotatedDataFrame( as.data.frame(antlerInput@meta.data)) )


antler$remove_outliers(
  min_genes = 700,
  min_cells = 10)

antler$normalize('CPM')

antler$gene_modules$identify(
  name                  = "unbiasedGMs",
  corr_t                = 0.3,  # the Spearman correlation treshold
  corr_min              = 3,    # min. number of genes a gene must correlate with
  mod_min_cell          = 10,   # min. number of cells expressing the module
  mod_consistency_thres = 0.3,  # ratio of expressed genes among "positive" cells
  process_plots         = TRUE, # plot optimal module number heuristics
  num_cores             = 10,   # number of cores to use
  display=FALSE
)

antler$gene_modules$get("unbiasedGMs")


modList<-antler$gene_modules$get("unbiasedGMs")
names(modList)<-paste("Mod",as.character(1:length(antler$gene_modules$get("unbiasedGMs"))))
file<-melt(modList)
colnames(file)<-c('Gene.Symbol','Module')

#Save to pipeline directory for later reference
write.csv(file,file=paste0(AntlerPath,'/Antler_gene_modules.csv'))
#Pass to working directory
write.csv(file,file='Antler_gene_modules.csv')


# do functional enrichment as well, for completeness
linkList<-c()
for(mod in 1:length(modList)){

  g<-modList[[mod]]
  
  #if there are enough genes, try gprofiler
  if(length(g)>=10){
    warning("Attempting to generate functional enrichment using gprofileR. Sometimes this fails due to bad connection / problems with the gprofiler online service, and may simply need to be rerun.")
    res<-gost(g,organism='mmusculus',as_short_link=F)$result
    res <- data.frame(lapply(res, as.character), stringsAsFactors=FALSE)
    
    write.csv(res,file=paste0(AntlerPath,'/Module_',as.character(mod),'_Functional_Enrichment.csv'))
    linkList<-rbind( linkList,  c(paste0('Mod',mod), gost(g,organism='mmusculus',as_short_link=T)))
 
    
  }
}

linkList<-
write.csv(linkList,file=paste0(AntlerPath,'/gprofiler_links.csv'))

save( modList, file='Gene_Modules.Rdata')
save(modList, file=paste0(AntlerPath,'/Gene_Modules.Rdata'))
#Add modules to gene meta data in seurat object

modvals<-rep(NA,dim(integrated[['RNA']])[1])
names(modvals)<-rownames(integrated[['RNA']])
modvals[file[,1]]<-file[,2]
integrated[['RNA']]<-AddMetaData(integrated[['RNA']],modvals,'Module')

integrated_clean<-AddModuleScore(integrated_clean,modList,name='ModuleScore_',assay='RNA')

for(i in 1:length(modList)){
  
  
  integrated[[paste0('ModuleScore_',i)]]<-integrated_clean[[paste0('ModuleScore_',i)]]
}



save(integrated,dend,file=paste0(path,'/output/scrnaseq/',timepoints,'/integrated_',timepoints,'.Rdata'))





