

#This script reannotates spatial data by DE gene labels. Annotation is 
#limited by the small number of genes in the dataset,
#and not all labels are fully representative, specific markers

## ----receive input from nextflow----------------------------------------------

#Get parameters
args <- commandArgs(TRUE)
path<-args[1]
file<-args[2]
set.seed(123)



## ----libraries----------------------------

#Load standard package list
source(paste0(path,'/bin/auxiliary/DirectoryChecker.R'))
source(paste0(path,'/bin/auxiliary/PackageLoader.R'))
source(paste0(path,'/bin/auxiliary/SetPlottingParameters.R'))



#Annotate cell identity by DEgenes
source(paste0(path,'/bin/auxiliary/DEScoreClassify.R'))





#load files
integrated <- load(file)



#Annotate cluster by DE gene names
Idents(integrated)<-'seurat_clusters'
DefaultAssay(integrated) <-'RNA'
DEGenes<-FindAllMarkers(integrated,logfc.threshold = log(1.5),base=exp(1),only.pos = TRUE)

#Restrict min pct diff
#DEGenes<-DEGenes[which(DEGenes$pct.1-DEGenes$pct.2>0.2),]

integrated$reannotation<-NA
integrated$putative_identity<-NA

#Quantify proportion of markers that appear in DE genes. 
for(cluster in unique(integrated[['seurat_clusters']][,1])){
  
  sub<-DEGenes[DEGenes$cluster==cluster,]
  scores<-vector('list',length=3)
  names(scores)<- c('Martinotti','Non-Martinotti','LRP')
  
  #Compute DEscore for each type
  for(type in c('Martinotti','Non-Martinotti','LRP')){
    mark<-typeMarkers[[type]]
    m<- sub[na.omit(match(mark,sub$gene)),]
    spec<-m$pct.1-m$pct.2
    names(spec)<-m$gene
    DEscore<- -log10(m$p_val_adj)
    DEscore<-unlist(mapply(DEscore,FUN=function(x) min(x,20)))*spec
    scores[[type]]<-c(sum(DEscore),m$gene[grep(max(m$avg_logFC),m$avg_logFC)])
    
  }
  
  vals<-as.numeric(unlist(lapply(scores,FUN=function(x)x[[1]])))
  names(vals)<-names(scores)
  if(length(grep(max(vals),vals))==1){
    
    
    #Find identity
    clust<-str_to_title(scores[[grep(max(vals),vals)]][2])
    
    #has that identity already been found in another cluster?
    if(length(grep(clust,integrated$reannotation))>1){
      #get all the matching labels, seek maximum number on the end
      nams<-integrated$reannotation[grep(clust,integrated$reannotation)]
      nums<-lapply(nams,FUN=function(x) substring(x,nchar(x)))
      m<-max(as.numeric(nums))
      m<-m+1
      integrated$reannotation[which(integrated$seurat_clusters==cluster)]<-paste0('Sst_',clust,'_',as.character(m))
      
    }
    else{
      integrated$reannotation[which(integrated$seurat_clusters==cluster)]<-paste0('Sst_',clust,'_1')
    }
    
    #figure out how confident the identity is
    
    #label general cell type
    if(grep(max(vals),vals)>40){
      integrated$putative_identity[which(integrated$seurat_clusters==cluster)]<-  names(vals)[grep(max(vals),vals)]
    }
    
    
  }
  
}



