
#Function to iterate through data and partition into cell types
RecursiveClustering <- function(seuratObject,masterDendrogram=NULL,masterClassification=NULL,indexRecord=NULL,clusterCounter=1,minSize=50,depth=1,giveUp=FALSE,isIntegrated=TRUE, DEscore.thresh=60,logFC.thresh=log(2)){
  stop<-FALSE
 
  
  if(isIntegrated){
    DefaultAssay(seuratObject)<-"integrated"
    assay='integrated'
  }else{
    DefaultAssay(seuratObject)<-"RNA"
    assay='RNA'
  }

 
  
  
  clustergenes<-seuratObject@assays[[assay]]@data@Dimnames[[1]]
 
  if(!isIntegrated){
    #We have the rna assay with too many genes, only use the highly variable ones
    seuratObject<-FindVariableFeatures(seuratObject,verbose = F,nfeatures = 2000,assay=assay)
    clustergenes<-seuratObject@assays[["RNA"]]@var.features
    seuratObject<-ScaleData(seuratObject,assay=assay,features = clustergenes)
  }else if(!is.null(masterDendrogram)){
    #Rescale the data
    seuratObject<-FindVariableFeatures(seuratObject,verbose = F,nfeatures = 1000,assay=assay)
    seuratObject<-ScaleData(seuratObject,assay=assay)
    clustergenes<-unique(clustergenes[!is.na(match(clustergenes,rownames(seuratObject@assays[[assay]]@scale.data)))])
    
  }
  
  print(assay)
  hInput<-seuratObject@assays[[assay]]@scale.data
  hInput<-hInput[match(clustergenes,rownames(hInput)),]
  
  
  #Cluster (Hierarchical)
  dis <- as.dist(1 - cor(hInput))
  clu <- hclust(dis, method = "average")
  
  #Compute silhouette from trying different tree cuts
  scores<-ComputeSilhouette(dis,c(2,10))
  numGroups<-names(scores[which(scores==max(scores))])
  #numGroups<-2
  #Find number of clusters
  # out<-fviz_nbclust(print.summary=F,x=as.matrix(dis), FUNcluster=clusterCells, method = "silhouette", k.max = 5) + theme_minimal() + ggtitle("The Silhouette Plot")
  #numGroups<-max(grep(max(out[["data"]][["y"]]),out[["data"]][["y"]]),2)
  
  cl <- cutree(clu, k=numGroups,order_clusters_as_data
               =TRUE)
  #cl<-cl[order.dendrogram(as.dendrogram(clu))]
  
  updatedDend<-as.dendrogram(clu,leaflab='none')
  
  #Reset dendrogram entries to reference the original data object, if this isn't the first dendrogram
  if(!is.null(masterDendrogram)){
    updatedDend<-dendrapply(X=updatedDend, FUN= function(n) CorrectDendEntries(n,seuratObject,masterClassification ))
  }
  
  #Add to object and find distinctive genes
  seuratObject[['hier.clust']]<-cl
  
  print(paste0(as.character(length(unique(cl))),' clusters found at depth ',as.character(depth)))
  
  #Find DE genes
  Idents(seuratObject)<-'hier.clust'
  DEMarkers<-FindAllMarkers(seuratObject,assay='RNA',logfc.threshold=logFC.thresh,only.pos=T)
  #Keep only if adjust p <0.01
  DEMarkers<-DEMarkers[which(DEMarkers$p_val_adj<=0.01),]
  
  #Check stop conditions. If any clusters meet conditions, merge their labels and don't update their dendrogram entry. 
  clustersToCarryForward<-sort(as.numeric(as.character(unique(cl))))
  
  #Have Louvain and heirarchical failed to sufficiently agree?
  # if(adjustedRandIndex(seuratObject[['RF_class_from_louvain']],seuratObject[['RF_class_from_hclust']])<0.5){
  #   #We have reached a terminal node. Return. 
  #   output <- list("cluster_Labels" = foo, "Grafted_Dendrogram" = bar)
  #   return(output)
  # }
  
  #Is a cluster too small?
  FailedClusters<-vector()
  FailedClusters<-as.numeric(names(table(seuratObject[['hier.clust']]))[which(table(seuratObject[['hier.clust']])<minSize)])
  
  #Does a cluster fail to have enough strongly DE genes?
  for(cluster in unique(cl)){
    markers<-DEMarkers[which(DEMarkers$cluster==cluster),]
    if(length(markers)==0){
      FailedClusters<-c(FailedClusters,cluster)
    }
    else{
      DEscore<- log10(markers$p_val_adj)*(-1)
      #adjust down contributions to 20
      DEscore[which(DEscore>20)]<-20
      DEscore<-sum(DEscore)
      print(paste0("cluster ",as.character(cluster)," has DEscore: ",as.character(DEscore)))
      thresh<-DEscore.thresh
      if(DEscore<thresh && cluster %nin% FailedClusters){
        FailedClusters<-c(FailedClusters,cluster)
      }
    }
  }
  print(paste0(as.character(length(FailedClusters)),' clusters failed criteria'))
  
  
  
  failedIndList <-list()
  i<-1
  if(length(FailedClusters)!=0){
    clustersToCarryForward<-clustersToCarryForward[-FailedClusters]
  }
  
  if(length(FailedClusters)==length(unique(cl))){
    
    #All clusters failed! Make no changes to dendrogram and label all as same cluster
    print('All clusters failed!')
    cellNames<-rownames(seuratObject[['hier.clust']])
    dim(masterClassification)
    masterClassification[cellNames,]<-clusterCounter
    
    print(paste0('clusterCounter: ',as.character(clusterCounter)))
    clusterCounter<-clusterCounter+1
    output<-list(masterDendrogram,masterClassification,clusterCounter)
    names(output)<-c('Grafted Dendrogram','Cluster Labels','Total Clusters')
    stop<-TRUE
    
    return(output)
    
  }else if(length(FailedClusters)>0 ){
    print('regrouping failed clusters')
    #Set failed clusters to same label as nearest neighbour cluster
    for(cluster in FailedClusters){
      # cophDist<-as.matrix(cophenetic(dendCut$upper))
      # closest<-which(cophDist[cluster,] == min(cophDist[cluster,cophDist[cluster,] > 0]))
      # closest<-closest[1]
      
      #Find our clusters in the dendrogram
      clusterLocations<-list()
      for(clust in unique(seuratObject[['hier.clust']][,1])){
        clustCells<-rownames(seuratObject[['hier.clust']])[which(seuratObject[['hier.clust']][,1]==clust)]
        #Find indices that lead to this cluster
        newdend<-dendrapply(updatedDend, function(n) CharacterDendname(n))
        newdend<-dendrapply(newdend, function(n) LabelNode(n,cellSet=clustCells,clust))
        locations<-listnames.get(list.obj=newdend, do.basename=TRUE, do.name.chain=TRUE)
        ind<-locations$index[grep(paste0('Cluster',"$"),locations$name)]
        ind<-unlist(ind)
        ind<-strsplit(ind,split='-')
        names(ind)<-cluster
        clusterLocations[[as.character(clust)]]<-ind
      }
      
      #Closest is the nearest non-failed cluster in hierarchy
      diff<-abs(clustersToCarryForward-as.numeric(cluster))
      closest<-clustersToCarryForward[which(diff==min(diff))][1]
      
      #try to choose closest by which has most similar tree structure
      failedInd<-unlist(clusterLocations[as.character(cluster)])
      failedIndList[[i]]<-failedInd
      i<-i+1
      potentialParents<-clustersToCarryForward
      potentialParentInds<-clusterLocations[as.character(clustersToCarryForward)]
      for(i in 1:length(failedInd)){
        comp<-lapply(potentialParentInds,function(x) sum(failedInd[i]==unlist(x)[i]))
        potentialParentInds<-potentialParentInds[which(comp==1)]
        if(length(potentialParentInds)==1){
          closest<-names(potentialParentInds)
        }
      }
      
      
      #Set as parent of the failed cluster
      parentInd<-unlist(clusterLocations[as.character(closest)])
      
      replacement<-merge(FetchSubDend(updatedDend,indices = failedInd),FetchSubDend(updatedDend,indices = parentInd))
      
      updatedDend<-ChangeEntry(dendrogram=updatedDend,indices = parentInd,newentry = replacement,mode="update")
      updatedDend<-ChangeEntry(dendrogram=updatedDend, indices=failedInd,mode='clear')
      updatedDend<-CollapseSingles(updatedDend,indices = failedInd)
      
      #Adjust cluster labels to match the new groups
      seuratObject[['hier.clust']][seuratObject[['hier.clust']][,1]==cluster,]<-closest[1]
      
      print('new clusters are')
      print(table(seuratObject[['hier.clust']]))
    }
  } 
  
  #Check if resulting clusters still pass DE thresholding
  #Find DE genes
  Idents(seuratObject)<-'hier.clust'
  DEMarkers<-FindAllMarkers(seuratObject,assay='RNA',logfc.threshold=logFC.thresh,only.pos=T)
  #Keep only if adjust p <0.01
  DEMarkers<-DEMarkers[which(DEMarkers$p_val_adj<=0.01),]
  
  FailedClusters<-vector()
  #Does a cluster fail to have enough strongly DE genes?
  for(cluster in unique(cl)){
    markers<-DEMarkers[which(DEMarkers$cluster==cluster),]
    if(length(markers)==0){
      FailedClusters<-c(FailedClusters,cluster)
    }
    else{
      DEscore<- log10(markers$p_val_adj)*(-1)
      #adjust down contributions to 20
      DEscore[which(DEscore>20)]<-20
      DEscore<-sum(DEscore)
      print(paste0("cluster ",as.character(cluster)," has DEscore: ",as.character(DEscore)))
      thresh<-DEscore.thresh
      if(DEscore<thresh && cluster %nin% FailedClusters){
        FailedClusters<-c(FailedClusters,cluster)
      }
    }
  }
  
  
  print('check if back to what we started with')
  print(dim(seuratObject)[2])
  print(table(seuratObject[['hier.clust']]))
  #if dendrogram is null, this is our initial starting dendrogram, need to define setup files
  if(is.null(masterDendrogram)){
    masterDendrogram<-updatedDend
    masterClassification<-seuratObject[['hier.clust']]
    rownames(masterClassification)<-rownames(seuratObject[['hier.clust']])
    masterClassification[1:dim(masterClassification)[1],1]<-NA
  }else if(length(FailedClusters)>0)
  {
    print('returned to previous clustering')
    clustersToCarryForward<-NA
    
    cellNames<-rownames(seuratObject[['hier.clust']])
    masterClassification[cellNames,]<-clusterCounter
    
    print(paste0('clusterCounter: ',as.character(clusterCounter)))
    clusterCounter<-clusterCounter+1
    stop<-TRUE
    output<-list(masterDendrogram,masterClassification,clusterCounter)
    names(output)<-c('Grafted Dendrogram','Cluster Labels','Total Clusters')
    return(output)
    
    
  } else if(sum(table(seuratObject[['hier.clust']])==dim(seuratObject)[2])>0)
  {
    #If the clusters we have match the cluster we started with, then don't update
    print('returned to previous clustering')
    clustersToCarryForward<-NA
    
    cellNames<-rownames(seuratObject[['hier.clust']])
    masterClassification[cellNames,]<-clusterCounter
    
    print(paste0('clusterCounter: ',as.character(clusterCounter)))
    clusterCounter<-clusterCounter+1
    stop<-TRUE
    output<-list(masterDendrogram,masterClassification,clusterCounter)
    names(output)<-c('Grafted Dendrogram','Cluster Labels','Total Clusters')
    return(output)
    
    
  }else{
    #Update the dendrogram
    
    masterDendrogram<-ChangeEntry(dendrogram=masterDendrogram,indices = indexRecord,newentry=updatedDend,mode='update',nestDepth=0)
    print('dendrogram updated')
  }
  
  print(paste0('Attempting subdivision on ',as.character(length(clustersToCarryForward)),' clusters'))
  
  
  
  if(length(clustersToCarryForward)>0 && !stop){
    #We have more clusters to attempt subdivision. Only attempt to divide those with size>100, i.e. those that can create at least 2 subclusters of permissable size
    #Cluster structure may have changed, find them again
    #save(masterDendrogram,masterClassification, depth,seuratObject,FailedClusters,file=paste0(path,'/output/',timepoints,'/test_grafted_dendrogram.Rdata'))
    
    clusterLocations<-list()
    for(clust in unique(seuratObject[['hier.clust']][,1])){
      clustCells<-rownames(seuratObject[['hier.clust']])[which(seuratObject[['hier.clust']][,1]==clust)]
      #Find indices that lead to this cluster
      newdend<-dendrapply(updatedDend, function(n) CharacterDendname(n))
      newdend<-dendrapply(newdend, function(n) LabelNode(n,cellSet=clustCells,clust))
      locations<-listnames.get(list.obj=newdend, do.basename=TRUE, do.name.chain=TRUE)
      ind<-locations$index[grep(paste0('Cluster',"$"),locations$name)]
      ind<-unlist(ind)
      ind<-strsplit(ind,split='-')
      names(ind)<-clust
      clusterLocations[[as.character(clust)]]<-ind
    }
    
    #Loop through each viable cluster and run again
    setname<-paste0('ClustersToCarryForward_',as.character(depth))
    assign(setname,clustersToCarryForward)
    
    for(i in 1:length(clustersToCarryForward)){
      clusterToSubdivide<-clustersToCarryForward[i]
      if(table(seuratObject[['hier.clust']])[as.character(clusterToSubdivide )]>=(2*minSize)){
        print('making new seurat object')   
        cellSet<-rownames(seuratObject[['hier.clust']])[which(seuratObject[['hier.clust']][,1]==clusterToSubdivide)]
        newSeuratObject<-subset(seuratObject,cells=cellSet)
        
        clusterInd<-as.numeric(unlist(clusterLocations[as.character(clusterToSubdivide )])) 
        print("new seurat object, size:")
        print(dim(newSeuratObject)[2])
        print("classification currently is")
        print(table(masterClassification))
        newIndexRecord<-c(indexRecord,clusterInd)
        depth<-depth+1
        
        out<-RecursiveClustering(seuratObject=newSeuratObject,
                                 masterDendrogram=masterDendrogram,
                                 masterClassification=masterClassification,
                                 indexRecord=newIndexRecord,
                                 clusterCounter=clusterCounter,
                                 depth=depth,minSize=minSize, 
                                 isIntegrated = isIntegrated,
                                 DEscore.thresh = DEscore.thresh,
                                 logFC.thresh = logFC.thresh)
        depth<-depth-1
        
        masterDendrogram<-out$'Grafted Dendrogram'
        masterClassification<-out$'Cluster Labels'
        clusterCounter<-out$'Total Clusters'
        
      }else if(table(seuratObject[['hier.clust']])[as.character(clusterToSubdivide)]<2*minSize){
        #We only attempt to subdivide those which can produce at least 2 groups over size threshold
        
        
        cellNames<-rownames(seuratObject[['hier.clust']])[which(seuratObject[['hier.clust']]==clusterToSubdivide)]
        masterClassification[cellNames,]<-clusterCounter
        print('saving labels of too-small clusters')
        
        print(paste0('clusterCounter: ',as.character(clusterCounter)))
        clusterCounter<-clusterCounter+1
        
        #masterDendrogram<-ChangeEntry(dendrogram=masterDendrogram,indices = indexRecord,newentry=updatedDend,mode='update',nestDepth=0)
        #print('dendrogram updated')
      }
    }
  }
  
  
  
  #After loop completes we should have finished all classification
  #Return the master dendrogram
  output<-list(masterDendrogram,masterClassification,clusterCounter)
  names(output)<-c('Grafted Dendrogram','Cluster Labels','Total Clusters')
  return(output)
  
  
}
