

#This script uses a filtered version of the patch seq data set from the following publication. We compare this data to our own, and use it for classification. 

# Nathan W. Gouwens, Staci A. Sorensen, Fahimeh Baftizadeh, Agata Budzillo, Brian R. Lee, Tim Jarsky, Lauren Alfiler, 
#Katherine Baker, Eliza Barkan, Kyla Berry, Darren Bertagnolli, Kris Bickley, Jasmine Bomben, Thomas Braun, Krissy Brouner,
#Tamara Casper, Kirsten Crichton, Tanya L. Daigle, Rachel Dalley, Rebecca A. de Frates, Nick Dee, Tsega Desta, Samuel Dingman Lee, 
#Nadezhda Dotson, Tom Egdorf, Lauren Ellingwood, Rachel Enstrom, Luke Esposito, Colin Farrell, David Feng, Olivia Fong, Rohan Gala, Clare Gamlin,
#Amanda Gary, Alexandra Glandon, Jeff Goldy, Melissa Gorham, Lucas Graybuck, Hong Gu, Kristen Hadley, Michael J. Hawrylycz, Alex M. Henry, 
#DiJon Hill, Madie Hupp, Sara Kebede, Tae Kyung Kim, Lisa Kim, Matthew Kroll, Changkyu Lee, Katherine E. Link, Matthew Mallory, Rusty Mann,
#Michelle Maxwell, Medea McGraw, Delissa McMillen, Alice Mukora, Lindsay Ng, Lydia Ng, Kiet Ngo, Philip R. Nicovich, Aaron Oldre, Daniel Park,
#Hanchuan Peng, Osnat Penn, Thanh Pham, Alice Pom, Zoran Popović, Lydia Potekhina, Ramkumar Rajanbabu, Shea Ransford, David Reid, Christine Rimorin, 
#Miranda Robertson, Kara Ronellenfitch, Augustin Ruiz, David Sandman, Kimberly Smith, Josef Sulc, Susan M. Sunkin, Aaron Szafer, Michael Tieu, 
#Amy Torkelson, Jessica Trinh, Herman Tung, Wayne Wakeman, Katelyn Ward, Grace Williams, Zhi Zhou, Jonathan T. Ting, Anton Arkhipov, Uygar Sümbül,
#Ed S. Lein, Christof Koch, Zizhen Yao, Bosiljka Tasic, Jim Berg, Gabe J. Murphy, Hongkui Zeng,
# Integrated Morphoelectric and Transcriptomic Classification of Cortical GABAergic Cells,
# Cell,
# Volume 183, Issue 4,
# 2020,
# Pages 935-953.e19,
# ISSN 0092-8674,
# https://doi.org/10.1016/j.cell.2020.09.057.
# (https://www.sciencedirect.com/science/article/pii/S009286742031254X)


#The full version of their dataset is available at: https://portal.brain-map.org/explore/classes/multimodal-characterization


args <- commandArgs(TRUE)
path<-args[1]


#---- Load Libraries -------------------------------------
source(paste0(path,'/bin/auxiliary/DirectoryChecker.R'))
source(paste0(path,'/bin/auxiliary/PackageLoader.R'))

# ---- Load Data --------------------------------------

mat <- read.csv(paste0(path,'/input/external_reference/P56AllenPatchseq_matrix.csv'), row.names=1)

meta <- read.csv(paste0(path,'/input/external_reference/P56AllenPatchseq_metadata.csv'))
rownames(meta)<-str_replace_all(meta$transcriptomics_sample_id,
'-','.')

#load line up of MET types and cell type morphology
METclusterTypes<-read.csv(paste0(path,'/input/supplementary/LimLab_MET_alignment_01-04-21.csv'))
celltypes<-METclusterTypes$type.label
names(celltypes)<-METclusterTypes$predominant.MET.type


METcelltypes<-read.csv(paste0(path,'/input/supplementary/MET_celltype.csv'), header=F)
rownames(METcelltypes)<-METcelltypes[,1]
METcelltypes<-METcelltypes[,-1, drop=F]
METcelltypes[which(METcelltypes[,1] == 'U'),] <- NA

#Load gene list for filtering
load(paste0(path,'/input/supplementary/Ref_Genes.Rdata'))

#load type markers, we use these to compile together with DE genes for joint marker sets
canonmarkers<-read.csv(paste0(path,'/input/supplementary/CanonicalMarkers.csv'))
canonmarkers[,1] <- toupper(canonmarkers[,1])
markers<-list()
markers[['Martinotti']]<-canonmarkers[grep('^Martinotti',canonmarkers[,2]),1]
markers[['Non-Martinotti']]<-canonmarkers[grep('Non-Martinotti',canonmarkers[,2]),1]
markers[['LRP']]<-canonmarkers[grep('projecting',canonmarkers[,2]),1]
markers[['Stressed']]<-canonmarkers[grep('Stress',canonmarkers[,2]),1]




# #load devlopmental here...
# #clean cells to include only groups of interest
# integrated_clean<-subset(integrated,cells=WhichCells(integrated,expression=major_cluster_label %nin% c('Failure','Stressed','Meis2')))
# 
# 
# #load the alignment of morphology and transcriptomics
# cluster_meta<-read.csv('/Users/josephinefisher/Analysis/SST_Interneurons_Lineage/MET-Type Smartseq Comparison/Jo_MET_alignment_01-04-21.csv')
# METclusterTypes<-cluster_meta$type.label
# names(METclusterTypes)<-cluster_meta$t.type
# 
# rownames(mat)<-toupper(mat[,1])
# colnames(mat) %in% meta$transcriptomics_sample_id
# 
# mat<-mat[,-1]
# meta$transcriptomics_sample_id<-str_replace_all(meta$transcriptomics_sample_id,'-','.')
# rownames(meta)<-meta$transcriptomics_sample_id

meta$MET.type.Label[which(meta$MET.type.Label=='NULL')]   <- NA

# ---- Create object ------------------------------------
MET<-CreateSeuratObject(mat,meta.data = meta, min.cells = 10, min.features = 700)
#save(MET,file=paste0(path,'/input/METdebug1.Rdata'))
MET$MET.type.Label <- as.character(MET$MET.type.Label)
MET$MET.type.Label[MET$MET.type.Label == 'NULL']<-NA
MET$cell_type <- METcelltypes[MET$MET.type.Label,]

# MET$major_cluster_label<-celltypes[as.character(MET$cluster_label)]
# MET$major_cluster_label[which(MET$major_cluster_label=='U')]<-NA

# MET$MET_label<-METclusterTypes$predominant.MET.type[match(MET$cluster_label, METclusterTypes$t.type)]
# MET$MET_label[which(MET$MET_label=='U')]<-NA


MET[['percent.mt']]<-PercentageFeatureSet(MET, features=grep(pattern = "^MT-",rownames(MET))  )


#Filter Genes

#remove Pseudo genes
PseudoGenes<-rownames(MET)[grep('^GM', rownames(MET))]

#remove Y chr genes
Yset<-BMout$mgi_symbol[BMout$chromosome_name=='Y']
Ygenes<-rownames(MET)[rownames(MET)%in%Yset]

#remove MT genes
MTset<-BMout$mgi_symbol[BMout$chromosome_name=='MT']
MTgenes<-rownames(MET)[rownames(MET)%in%MTset]


keepgenes<-rownames(MET)[rownames(MET) %nin% c(PseudoGenes, Ygenes, MTgenes)]
MET<-subset(MET,features=keepgenes)


#---- Process ----------------------------------------------
MET<-NormalizeData(MET)
MET<-FindVariableFeatures(MET)
MET<-ScaleData(MET, features=rownames(MET))
MET<-RunPCA(MET)
ElbowPlot(MET, ndims=30)
MET<-RunUMAP(MET, dims=1:20)
MET<-FindNeighbors(MET)
MET<-FindClusters(MET)

#DimPlot(MET, group.by='cluster_label')+theme(legend.position = 'none')
#DimPlot(MET, group.by='major_cluster_label')+theme(legend.position = 'none')
# DimPlot(MET, group.by='MET_label')+theme(legend.position = 'none')
# DimPlot(MET, group.by='transcriptomics_batch')
# FeaturePlot(MET, c('CHODL','RELN','NFIB','TAC1','ETV1','THSD7A'), ncol=2, order=T) & scale_color_gradientn(colors=FeatureCol)


# type<-METclusterTypes$type.label
# names(type)<-METclusterTypes$predominant.MET.type
# 
# sub <- subset(MET, cells=names(which(!is.na(MET$MET_label))))
# 
# sub$celltype<- type[sub$MET_label]
# 
# 
# Idents(sub)<-'celltype'
sub<-subset(MET, cells=which(!is.na(MET$cell_type)))
Idents(sub)<-'cell_type'
METmarkers<-FindAllMarkers(sub, logfc.threshold = log(1.5), base=exp(1), only.pos=T)


topMET<-METmarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)

markers<-lapply(markers, FUN = function(x){
  x[which(x %nin% topMET$gene)]
})

#clean duplicate entries - markers that appear for two different groups aren't reliable enough. defer to highest de score
dupgenes<-topMET$gene[duplicated(topMET$gene)]

removeRows<-c()

for(gene in dupgenes){
  ind<-grep(gene,topMET$gene)
  
  if(length(ind)>1){
    
    vals<- -log10(topMET$p_val_adj[ind])
    ind<-ind[which(vals<max(vals))]
    
    removeRows<-c(removeRows,ind)
  }
  
}


if(!is.null(removeRows)){
  topMET<-topMET[-unique(removeRows),]
}



#collate together markers form literature and MET DE genes.
markers[['Martinotti']]<-unique(c(unlist(markers[['Martinotti']]), topMET$gene[which(topMET$cluster=='Martinotti')]))
markers[['Non-Martinotti']]<-unique(c(markers[['Non-Martinotti']], topMET$gene[which(topMET$cluster=='Non-Martinotti')]))
markers[['LRP']]<-unique(c(markers[['LRP']], topMET$gene[which(topMET$cluster=='LRP')]))



# ---- Get DE genes ------------------------------------------------
# Idents(MET)<-'major_cluster_label'
# METmarkers<-FindAllMarkers(MET, only.pos=T, logfc.threshold = log(1.5), base=exp(1))
# METmarkers<-METmarkers[which(METmarkers$p_val_adj<0.01),]
# 
# topMET<-METmarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
# 
# 
# markers<-lapply(markers, FUN = function(x){
#   x[which(x %nin% topMET$gene)]
# })
# 
# #clean duplicate entries - markers that appear for two different groups aren't reliable enough. defer to highest de score
# dupgenes<-topMET$gene[duplicated(topMET$gene)]
# 
# for(gene in dupgenes){
#   ind<-grep(gene,topMET$gene)
#   
#   if(length(ind)>1){
#     
#     vals<--log10(topMET$p_val_adj[ind])
#     ind<-ind[which(vals<max(vals))]
#     
#     topMET<-topMET[-ind,]
#   }
# 
# }
# 
# 
# 
# #collate together markers form literature and MET DE genes.
# markers[['Martinotti']]<-unique(c(unlist(markers[['Martinotti']]), topMET$gene[which(topMET$cluster=='Martinotti')]))
# markers[['Non-Martinotti']]<-unique(c(markers[['Non-Martinotti']], topMET$gene[which(topMET$cluster=='Non-Martinotti')]))
# markers[['LRP']]<-unique(c(markers[['LRP']], topMET$gene[which(topMET$cluster=='LRP')]))


#pass down pipeline
#save(MET,file='processedMETdata.Rdata')
typeMarkers<-markers
save(typeMarkers,MET,file='typeMarkers.Rdata')

#save for reference
save(MET,METmarkers,typeMarkers,file=paste0(path,'/input/external_reference/processedAllenP56Patchseq.Rdata'))

typeMarkersDF<-data.frame()
for(i in 1:length(typeMarkers)){
  sub<-cbind(typeMarkers[[i]],rep(names(typeMarkers)[i], length(typeMarkers[i])))
  typeMarkersDF<-rbind(typeMarkersDF,sub)
  
}
category <- rep(NA, dim(typeMarkersDF)[1])
ind <- which(typeMarkersDF[,1] %in% canonmarkers[,1])
category[ind] <- 'canonical'
ind<-which(typeMarkersDF[,1] %in% topMET$gene)
category[ind] <- 'MET-derived'
ind<-which(typeMarkersDF[,1] %in% intersect(topMET$gene,canonmarkers[,1]))
category[ind] <- 'canonical and MET-derived'

typeMarkersDF <- cbind( typeMarkersDF, category)

write.csv(typeMarkersDF, file=paste0(path,'/input/supplementary/typeMarkers.csv'))

#write met markers to garnett file
Idents(MET) <-'MET.type.Label'
mark <- FindAllMarkers(MET, only.pos = T, logfc.threshold=log(1.5), base=exp(1))
topmark <- mark %>% group_by(cluster) %>% top_n(20, avg_logFC)

markerFile<-c()
for(group in mixedsort(unique(topmark$cluster))){
  
  name<-paste0('>',group)
  genes<- topmark$gene[which(topmark$cluster == group)]
  genes<-paste0(genes, collapse = ', ', sep = '')
  genes<-paste('expressed:', genes)
  chunk<-c(name,genes)
  markerFile<-c(markerFile,chunk)
  
}




# Idents(MET)<-'cluster_label'
# METmark<-FindAllMarkers(MET, logfc.threshold = log(1.5), base=exp(1), only.pos=T,min.diff.pct = 0.3)
# METmark<-METmark[which(mark$p_val_adj<0.01),]
# topmark<-METmark %>% group_by(cluster) %>% top_n(5,avg_logFC)
# 
# #extract markers from names...
# nameGenes<-unlist(lapply(cluster_meta$t.type[order(cluster_meta$type.label)], FUN=function(x){
#   s<-unlist(str_split(x,' '))
#   return(s[2:length(s)])
# }))
# nameGenes<-toupper(nameGenes)
# nameGenes<-nameGenes[nameGenes!='']
# 
# 
# save(MET,METmark,file='processedMETdata.Rdata')


# 
# avg<-AverageExpression(MET,return.seurat = T)
# Input<-avg@assays[['RNA']]@scale.data[topmark$gene[order(match(topmark$cluster, cluster_meta$t.type[order(cluster_meta$type.label)]))],cluster_meta$t.type[order(cluster_meta$type.label)]]
# Input<-avg@assays[['RNA']]@scale.data[nameGenes,cluster_meta$t.type[order(cluster_meta$type.label)]]
# 
# pairs.breaks <- seq(-3, 3, length.out=1001);
# gplots::heatmap.2(Input,
#                   margins=c(12,12),
#                   breaks = pairs.breaks,
#                   keysize=1,
#                   key.xlab="zscore",
#                   key.title="NULL",
#                   trace = "none",
#                   density.info = "none",
#                   col = HeatCol,
#                   #breaks = breaks,
#                   offsetRow=0.1,
#                   offsetCol=-90,
#                   cexRow = 0.8,
#                   cexCol = 0.8,
#                   Colv = F ,
#                   Rowv = F,
#                   adjCol=c(0,0.5),
#                   srtCol=45)
# 
# 
# 
# 
# 
# 
# #Compare to developmental data
# 
# DefaultAssay(integrated_clean)<-'RNA'
# integrated_clean<-FindVariableFeatures(integrated_clean)
# Idents(integrated_clean)<-'cluster_label'
# intmark<-FindAllMarkers(integrated_clean, only.pos=T, logfc.threshold = log(1.2), base=exp(1))
# 
# integrated_clean[['comparison_cluster_label']]<-integrated_clean$cluster_label
# MET[['comparison_cluster_label']]<-MET$cluster_label
# integrated_clean[['experiment']]<-'Developmental'
# MET[['experiment']]<-'Allen_PatchSeq'
# topmark<-METmark %>% group_by(cluster) %>% top_n(50,avg_logFC)
# var_genes = intersect(MET@assays[['RNA']]@var.features,integrated_clean@assays[['integrated']]@var.features)
# var_genes<-unique(METmarkers$gene)
# var_genes<-intersect(METmarkers$gene,intmark$gene)
# 
# joint<-merge(MET,integrated_clean)
# joint<-subset(joint,slot='RNA')
# DefaultAssay(joint)<-'RNA'
# jointSCE<-as.SingleCellExperiment(joint)
# 
# celltype_NV = MetaNeighborUS(var_genes = var_genes,
#                              dat = jointSCE,
#                              study_id = jointSCE$experiment,
#                              cell_type = jointSCE$comparison_cluster_label,
#                              fast_version = TRUE)
# 
# HeatCol<- colorRampPalette(colors = c("#1b75bb", "black","#faaf40"))(1000)
# pairs.breaks <- seq(-1.5, 1.5, length.out=1001);
# p<-gplots::heatmap.2(celltype_NV,
#                      margins=c(12,12),
#                      keysize=1,
#                      key.xlab="AUROC",
#                      key.title="NULL",
#                      trace = "none",
#                      density.info = "none",
#                      col = HeatCol,
#                      #breaks = breaks,
#                      offsetRow=0.1,
#                      offsetCol=0.1,
#                      cexRow = 0.8,
#                      cexCol = 0.8,
#                      Colv = T, )
# 
# threshMatrix<-as.matrix(celltype_NV>0.8)
# threshMatrix<-1*threshMatrix
# 
# 
# gplots::heatmap.2(threshMatrix,
#                   margins=c(15,15),
#                   keysize=1,
#                   key.xlab="AUROC > 0.8",
#                   key.title="NULL",
#                   trace = "none",
#                   density.info = "none",
#                   col = c("#1b75bb", "#faaf40"),
#                   breaks = c(0,0.8,1),
#                   offsetRow=0.1,
#                   offsetCol=0.1,
#                   cexRow = 1,
#                   cexCol = 1.0,
#                   Colv = p[["colDendrogram"]],
#                   Rowv = p[["rowDendrogram"]],
#                   sepcolor='black',
#                   sepwidth=c(0.01, 0.01),
#                   colsep=1:ncol(threshMatrix),
#                   rowsep=1:nrow(threshMatrix),
#                   srtCol=45,
#                   adjCol=c(1,1))
# 
# 

# #Try projecting labels?
# var_features<-intersect(integrated_clean[['integrated']]@var.features, MET[['RNA']]@var.features)
# anch<-FindTransferAnchors(reference = MET, query = integrated_clean)
# pred<-TransferData(anchorset = anch, refdata = MET$cluster_label, dims = 1:30, n.trees=500)
# integrated_clean[['METtype']]<-NA
# integrated_clean[['METtype']]<-pred$predicted.id
# #P5[['METtype']][which(pred$prediction.score.max>0.5),]<-pred$predicted.id[which(pred$prediction.score.max>0.5)]
# integrated_clean$pred_score<-pred$prediction.score.max
# integrated_clean$morphology_type<-celltypes[integrated_clean$METtype]
# DimPlot(integrated_clean,group.by='METtype', label=T)
# DimPlot(integrated_clean,group.by='morphology_type', label=T)
# 
# 
# # # #Try projecting labels?
# var_features<-intersect(integrated_clean[['integrated']]@var.features, MET[['RNA']]@var.features)
# P5<-subset(integrated_clean,cells=WhichCells(integrated_clean,expression = orig.ident=='P5'))
# DefaultAssay(P5)<-'RNA'
# Idents(P5)<-'cluster_label'
# P5mark<-FindAllMarkers(P5, logfc.threshold = log(1.5), only.pos = T, base=exp(1))
# var_features<-intersect(P5mark$gene, METmarkers$gene)
# var_features<-unlist(markers)
# anch<-FindTransferAnchors(reference = MET, query = P5, features = unique(c(var_features, METmarkers$gene)))
# #anch<-FindTransferAnchors(reference = MET, query = P5)
# pred<-TransferData(anchorset = anch, refdata = MET$cluster_label, dims = 1:30,n.trees=500)
# P5[['METtype']]<-NA
# P5[['METtype']]<-pred$predicted.id
# #P5[['METtype']][which(pred$prediction.score.max>0.5),]<-pred$predicted.id[which(pred$prediction.score.max>0.5)]
# P5$pred_score<-pred$prediction.score.max
# P5$morphology_type<-celltypes[P5$METtype]
# DimPlot(P5,group.by='METtype', label=T, reduction='umap')
# FeaturePlot(P5, 'pred_score', reduction='umap' )
# DimPlot(P5,group.by='morphology_type', label=T, reduction='umap')
# 
# 
# i.anch<-FindIntegrationAnchors(list(P5,MET))
# int<-IntegrateData(i.anch)
#river plot of label matchup
# library(riverplot)
# 
# #get nodes
# ID<-c(sort(unique(P5$cluster_label)), sort(unique(as.character(MET$cluster_label))))
# x<-c(rep(1,length(unique(P5$cluster_label))) , rep(2,length(unique(MET$cluster_label))))
# labels<-ID
# nodes<-as.data.frame(cbind(ID,x))
# nodes$x<-as.numeric(nodes$x)
# 
# #create edges from one group to another
# edges<-data.frame()
# for(group in sort(unique(P5$cluster_label))){
#   sub<-subset(P5,cells=WhichCells(P5, expression=cluster_label==group))
#   props<-table(sub$METtype) / sum(table(sub$METtype))
#   N1<-rep(group,length(table(sub$METtype)))
#   N2<-names(table(sub$METtype))
#   Value<-props
# 
#   chunk<-as.data.frame(cbind(N1,N2,Value))
#   chunk$Value<-as.numeric(chunk$Value)
#   edges<-rbind(edges,chunk)
# 
# }
# 
# #generate cols
# cols<-hue_pal()(length(nodes$ID))
# river<-makeRiver(nodes = nodes, edges = edges, node_labels = nodes$ID )
# plot(river, plot_area=c(1,1))
# 
# 
# #try random forest classification
# library(randomForest)
# library(caTools)
# 
# 
# Idents(MET)<-'cluster_label'
# mark<-FindAllMarkers(MET, logfc.threshold = log(1.5), only.pos=T, base=exp(1))
# mark<-mark[which(mark$p_val_adj<0.01),]
# 
# dat<-t(as.matrix(MET@assays[['RNA']]@data[unique(METmarkers$gene[METmarkers$gene%in% rownames(P5)]),]))
# dat<-t(as.matrix(MET@assays[['RNA']]@data[unique(mark$gene[mark$gene%in% rownames(P5)]),]))
# sample <- sample.split(dat, SplitRatio = .75)
# train <- sample(nrow(dat), 0.7*nrow(dat), replace = FALSE)
# TrainSet <- dat[train,]
# ValidSet <- dat[-train,]
# summary(TrainSet)
# summary(ValidSet)
# 
# model1 <- randomForest(x = as.matrix(TrainSet), y = as.factor(MET$cluster_label[rownames(TrainSet)]) , ntree=1000, importance=TRUE, sampsize = table(as.factor(MET$cluster_label[rownames(TrainSet)]) ))
# 
# #filter by importance
# imp<-data.frame(model1$importance)
# topg<-rownames(imp)[order(imp$MeanDecreaseAccuracy, decreasing = T)[1:100]]
# 
# 
# #predict on na values
# dat2<-t(as.matrix(P5@assays[['RNA']]@data))
# pred<- predict(model1, dat2, type = "class")
# P5$rfPred<-pred


#try scalign?


# 
# 
# anch<-FindTransferAnchors(reference = MET, query = integrated_clean)
# pred<-TransferData(anchorset = anch, refdata = MET$cluster_label, 
#                    dims = 1:30)
# integrated_clean$METtype<-NA
# integrated_clean$METtype<-pred$predicted.id
# integrated_clean$pred_score<-pred$prediction.score.max
# integrated_clean$morphology_type<-METclusterTypes[integrated_clean$METtype]
# 
# 
# pdf(file='MET_Developmental_Label_Transfer.pdf', height=10, width=10)
# DimPlot(P5, group.by = 'METtype', label=T, reduction='umap', repel=T)+ggtitle('Projected MET types onto P5')
# DimPlot(P5, group.by = 'cluster_label', label=T, reduction='umap', repel=T)+ggtitle('Cluster Labels: P5')
# FeaturePlot(P5,'pred_score', reduction = 'umap', pt.size=1) & scale_color_viridis_c() & ggtitle('Prediction Score (i.e. Confidence)')
# 
# 
# n<-unique(P5$METtype)
# n<-str_replace(n,'Sst','')
# n<-trimws(n)
# n<-str_split(n,' ')
# markers<-unlist(lapply(n,FUN=function(x) toupper(x[[1]]) ))
# markers<-c('RELN','NPAS1','NR2F2',markers)
# GeneList<-split(markers,ceiling(seq_along(markers)/4))
# 
# for(i in 1:length(GeneList)){
#   genes<-GeneList[[i]]
#   
#   plotList<-list()
#   p<-FeaturePlot(P5, genes, reduction='umap', ncol=2, order=T) & scale_color_gradientn(colors=FeatureCol)
#   
#   print(p)
#   
# }
# 
# dev.off()
# 
# 
# FeaturePlot(P5, c('RELN','NR2F2','NPAS1','ETV1'), reduction='umap') & scale_color_gradientn(colors=FeatureCol)
# FeaturePlot(P5, c('RELN','NR2F2','RXFP1','NTS'), reduction='umap') & scale_color_gradientn(colors=FeatureCol)
# 
# 
# 
# 


# #Try  cellassign
# library(scran)
# library(SingleCellExperiment)
# library(cellassign)
# 
# int<-as.SingleCellExperiment(integrated_clean,assay='RNA')
# s<-computeSumFactors(int)
# 
# Idents(MET)<-'cluster_label'
# METmarkers<-FindAllMarkers(MET, only.pos=T, logfc.threshold = log(1.5), base=exp(1))
# METmarkers<-METmarkers[which(METmarkers$p_val_adj<0.01),]
# 
# #clean duplicate entries - markers that appear for two different groups aren't reliable enough. defer to highest de score
# filtMarkers<-METmarkers
# dupgenes<-filtMarkers$gene[duplicated(filtMarkers$gene)]
# 
# for(gene in dupgenes){
#  
#   ind<-grep(gene,filtMarkers$gene)
#   
#   if(length(ind)>1){
#     vals<- -log10(filtMarkers$p_val_adj[ind])
#     ind<-ind[which(vals < max(vals))]
#     filtMarkers<-filtMarkers[-ind,]
#   }
#  
# }
# 
# mat<-data.frame(Gene=filtMarkers$gene,ID=filtMarkers$cluster)
# temp <- table(mat)
# temp[temp > 1] <- 1
# binMat<-temp
# 
# binMat<-binMat[which(rownames(binMat) %in% rownames(int)),]
# 
# m<-as.matrix(int[rownames(binMat),])
# fit <- cellassign(exprs_obj = int[rownames(binMat),], 
#                   marker_gene_info = binMat)


#TRY OUT MET LABEL GENES

#load type markers, we use these to compile together with DE genes for joint marker sets
# canonmarkers<-read.csv(paste0(path,'/input/supplementary/CanonicalMarkers.csv'))
# markers<-list()
# markers[['Martinotti']]<-toupper(canonmarkers[grep('^Martinotti',canonmarkers[,2]),1])
# markers[['Non-Martinotti']]<-toupper(canonmarkers[grep('Non-Martinotti',canonmarkers[,2]),1])
# markers[['LRP']]<-toupper(canonmarkers[grep('projecting',canonmarkers[,2]),1])
# markers[['Stressed']]<-toupper(canonmarkers[grep('Stress',canonmarkers[,2]),1])
# 
# 
# 
# 
# 
# type<-METclusterTypes$type.label
# names(type)<-METclusterTypes$predominant.MET.type
# 
# sub <- subset(MET, cells=names(which(!is.na(MET$MET_label))))
# 
# sub$celltype<- type[sub$MET_label]
# 
# 
# Idents(sub)<-'celltype'
# METmarkers<-FindAllMarkers(sub, logfc.threshold = log(2), base=exp(1), only.pos=T)
# 
# 
# topMET<-METmarkers %>% group_by(cluster) %>% top_n(10, avg_logFC)
# 
# 
# markers<-lapply(markers, FUN = function(x){
#   x[which(x %nin% topMET$gene)]
# })
# 
# #clean duplicate entries - markers that appear for two different groups aren't reliable enough. defer to highest de score
# dupgenes<-topMET$gene[duplicated(topMET$gene)]
# 
# for(gene in dupgenes){
#   ind<-grep(gene,topMET$gene)
#   
#   if(length(ind)>1){
#     
#     vals<--log10(topMET$p_val_adj[ind])
#     ind<-ind[which(vals<max(vals))]
#     
#     topMET<-topMET[-ind,]
#   }
#   
# }
# 
# 
# 
# #collate together markers form literature and MET DE genes.
# markers[['Martinotti']]<-unique(c(unlist(markers[['Martinotti']]), topMET$gene[which(topMET$cluster=='Martinotti')]))
# markers[['Non-Martinotti']]<-unique(c(markers[['Non-Martinotti']], topMET$gene[which(topMET$cluster=='Non-Martinotti')]))
# markers[['LRP']]<-unique(c(markers[['LRP']], topMET$gene[which(topMET$cluster=='LRP')]))
# 
# 
# out <- DEScoreClassify(integratedSubset, markers, 'cluster_label')




