
#Integrate:
#This script takes all the processed samples and merges them together. We check the name of the file for tags indcating age, separated by '_'
#So 'E16' means embryonic day 16, and should be sorted earlier than 'P1' meaning postnatal day 1. If you have your own files, 
# tag them on the end with numbers to indicate integration order. 


## ----receive input from nextflow----------------------------------------------

#Get parameters
args <- commandArgs(TRUE)
path<-args[1]
print(paste0(args[2],' files passed to integration'))
nsamples<-as.numeric(args[2])
print(nsamples)


#Get objects 
samples<-args[3:(2+nsamples)]
samples<-gtools::mixedsort(samples)

timepoints<-substr(samples,10,nchar(samples)-6)
namestring<-paste(timepoints,collapse="_")




## ----libraries----------------------------

#Load standard package list
source(paste0(path,'/bin/auxiliary/DirectoryChecker.R'))
source(paste0(path,'/bin/auxiliary/PackageLoader.R'))
source(paste0(path,'/bin/auxiliary/SetPlottingParameters.R'))

ncor=detectCores()
print(paste(ncor,'cores detected'))
plan("multiprocess", workers = ncor -1 )
options(future.globals.maxSize = 15000 * 1024^2)



#Check if directory exists and make it if it doesn't
if(!dir.exists(paste0(path,'/output/scrnaseq/',namestring))){
    dir.create(paste0(path,'/output/scrnaseq/',namestring))
}


#---- files -------------------------------------------------

print(samples)

objectList<-vector()
for(i in 1:nsamples){
    x<-load(samples[i])
    name<- substr(x,1,nchar(x)-6)
    assign(name,get(x[1]))
    objectList<-c(objectList,get(x))
    rm(x)
}


#---- integrate  -------------------------------------------------

print(paste0("length of object list is  ", length(objectList)))
i.anchors <- FindIntegrationAnchors(object.list = objectList, dims = 1:30,reduction='cca',scale=T,k.anchor=5,k.filter=100,k.score=15,anchor.features=3000)


#Generate sample tree, enforce reverse chronological order
sample_tree<-matrix(nrow=(nsamples-1),ncol=2)
sample_tree[1,]<-c(-nsamples,-(nsamples-1))
for(i in 2:(nsamples-1)){
    sample_tree[i,1]<-i-1
    sample_tree[i,2]<-sample_tree[i-1,2]+1
}


integrated <- IntegrateData(anchorset = i.anchors, dims = 1:15,normalization.method ='LogNormalize',k.weight=100,sample.tree=sample_tree)


#---- process -------------------------------------------------
DefaultAssay(integrated)<-'integrated'
integrated<-ScaleData(integrated)
integrated<-FindVariableFeatures(integrated)
integrated<-RunPCA(integrated)
ElbowPlot(integrated)
integrated<-RunTSNE(integrated,dims=1:15)
integrated<-RunUMAP(integrated,dims=1:15)
integrated<-FindNeighbors(integrated,dims=1:15)
integrated<-FindClusters(integrated,resolution = 0.6)

DefaultAssay(integrated)<-'RNA'
#FeaturePlot(integrated,features=c('SST','CHODL','DACH1'))


#---- putative identities by markers -------------------------------------------------
#Label clusters with putative identity based on marker expression, this is not a final classification 
integrated[['putative_identity']]<-NA


#Find LRP group
NosCells<-WhichCells(integrated,expression= NOS1>0 | CHODL>0)
NosContent<-table(integrated[["seurat_clusters"]][NosCells,]) / table(integrated[["seurat_clusters"]])
clustersLRP<-names(which(NosContent>0.5))
integrated[['putative_identity']][which(!is.na(match(integrated[['seurat_clusters']][,1],clustersLRP))),]<-'LRP'

#Find Martinotti group
RelnCells<-WhichCells(integrated,expression= RELN>0 |NPAS1>0 )
RelnContent<-table(integrated[["seurat_clusters"]][RelnCells,]) / table(integrated[["seurat_clusters"]])
clustersRELN<-names(which(RelnContent>0.5))
integrated[['putative_identity']][which(!is.na(match(integrated[['seurat_clusters']][,1],clustersRELN))),]<-'Martinotti'

#Find Meis2 group
Meis2Cells<-WhichCells(integrated,expression=MEIS2>0 )
Meis2Content<-table(integrated[["seurat_clusters"]][Meis2Cells,]) / table(integrated[["seurat_clusters"]])
clustersMEIS2<-names(which(Meis2Content>0.5))
integrated[['putative_identity']][which(!is.na(match(integrated[['seurat_clusters']][,1],clustersMEIS2))),]<-'Meis2'


#Find NonMartinotti group
NfibCells<-WhichCells(integrated,expression= (NFIB>0 | TAC1> 0 | CORT>0 | CALB1>0) )
NfibContent<-table(integrated[["seurat_clusters"]][NfibCells,]) / table(integrated[["seurat_clusters"]])
clustersNM<-which(as.character(sort(unique(integrated[['seurat_clusters']][,1]))) %nin% c(clustersLRP,clustersRELN,clustersMEIS2))-1
integrated[['putative_identity']][which(!is.na(match(integrated[['seurat_clusters']][,1],clustersNM))),]<-'Non-Martinotti'

print(paste('integration complete'))




#---- save   --------------------------------------------------

#Save to pipeline directory for later reference
save(integrated,i.anchors,file=paste0(path,'/output/scrnaseq/',namestring,'/integrated_',namestring,'.Rdata'),compress = TRUE)
#Pass to working directory
save(integrated,i.anchors,file=paste0('integrated_',namestring,'.Rdata'),compress = TRUE)




#Do we want to create an object with adult integrated too? Nah, leave this out. This ended up not being very useful
# if(useAdult==1){
#     
#     print('integrated,with adult reference data')
#     
#     x<-load(paste0(path,'/output/scrnaseq/processedP56.Rdata'))
#     name<- substr(x,1,nchar(x)-6)
#     assign(name,get(x[1]))
#     objectList<-c(objectList,get(x[1]))
#     rm(x)
#     samples<-c(samples, 'processedP56.Rdata')
#                
#     
#     print(paste0("length of object list is  ", length(objectList)))
#     i.anchors <- FindIntegrationAnchors(object.list = objectList, dims = 1:30,reduction='cca',scale=T,k.anchor=5,k.filter=100,k.score=15,anchor.features=3000)
#     
#     #Default settings + extra features
#     # i.anchors.default <- FindIntegrationAnchors(object.list = c(processedE16,processedP1M37,processedP5), dims = 1:30,reduction='cca',scale=T,k.anchor=5,k.filter=200,k.score=30)
#     nsamples<-nsamples+useAdult
#     #Generate sample tree
#     sample_tree<-matrix(nrow=(nsamples-1),ncol=2)
#     sample_tree[1,]<-c(-nsamples,-(nsamples-1))
#     for(i in 2:(nsamples-1)){
#         sample_tree[i,1]<-i-1
#         sample_tree[i,2]<-sample_tree[i-1,2]+1
#     }
#     
#     
#     integrated <- IntegrateData(anchorset = i.anchors, dims = 1:15,normalization.method ='LogNormalize',k.weight=100,sample.tree=sample_tree)
#     
#     
#     ## -----------------------------------------------------
#     DefaultAssay(integrated)<-'integrated'
#     integrated<-ScaleData(integrated)
#     integrated<-FindVariableFeatures(integrated)
#     integrated<-RunPCA(integrated)
#     ElbowPlot(integrated)
#     
#      integrated<-RunTSNE(integrated,dims=1:15)
#      integrated<-RunUMAP(integrated,dims=1:15)
#     # integrated<-FindNeighbors(integrated,dims=1:15)
#     # #Cluster twice, use general clustering for type assignment and granulated clustering for detecting suspicious groups (they can be quite small)
#     # integrated<-FindClusters(integrated,resolution = 0.6)
#     
#     DefaultAssay(integrated)<-'RNA'
#     #FeaturePlot(integrated,features=c('SST','CHODL','DACH1'))
#     
#     timepoints<-substr(samples,10,nchar(samples)-6)
#     namestring<-paste(timepoints,collapse="_")
#     
#     if(!dir.exists(paste0(path,'/output/scrnaseq/',namestring))){
#         dir.create(paste0(path,'/output/scrnaseq/',namestring))
#     }
#     
#     integratedAdultRef<-integrated
#     #Save to pipeline directory for later reference
#     save(integratedAdultRef,i.anchors,file=paste0(path,'/output/scrnaseq/',namestring,'/integrated_',namestring,'.Rdata'),compress = TRUE)
#        
#                
# }
# 
# 
# 
# 
