library(CMDdemux)
library(Seurat)
library(scran)
library(scater)
library(deMULTIplex2)
library(demuxmix)
library(cellhashR)
library(DropletUtils)
library(stringr)
library(tidyverse)
library(clValid)

source(~/bench.R)

# EMBRYO MULTI-Seq LMO
load("~/emlmo.hash.count.rdata")
load("~/emlmo.gex.count.rdata")

# 1. CMDdemux
emlmo.clr.norm <- LocalCLRNorm(emlmo.hash.count)
emlmo.kmed.cl <- KmedCluster(emlmo.clr.norm, optional = TRUE, extra_cluster = 1) 
emlmo.cl.dist <- EuclideanClusterDist(emlmo.clr.norm, emlmo.kmed.cl)
emlmo.noncore <- DefineNonCore(emlmo.cl.dist, emlmo.kmed.cl, c(0.9, 0.83, 0.65, 0.87, 0.91, 0.94, 0.8, 0.89, 0.84, 0.53, 0.9, 0.87, 0.88), optional = TRUE, clr.norm = emlmo.clr.norm)
emlmo.cluster.assign <- LabelClusterHTO(emlmo.clr.norm, emlmo.kmed.cl, emlmo.noncore, "medoids")
emlmo.md.mat <- CalculateMD(emlmo.clr.norm, emlmo.noncore, emlmo.kmed.cl, emlmo.cluster.assign)
emlmo.outlier.assign <- AssignOutlierDrop(emlmo.md.mat, md_cut_q = 0.85)
emlmo.cmddemux.assign <- CMDdemuxClass(emlmo.md.mat, emlmo.hash.count, emlmo.outlier.assign, use_gex_data = TRUE, emlmo.gex.count, optional = TRUE, kmed.cl = emlmo.kmed.cl2, num_modes = 10, cut_no = 12, clr.norm = emlmo.clr.norm, unlabel_cl_cut = 0.5, unlabel_raw_cut = 4.6, unlabel_clr_cut = -0.5)
emlmo.demux.result <- data.frame("CMDdemux" = emlmo.cmddemux.assign$demux_global_class)
rownames(emlmo.demux.result) <- rownames(emlmo.cmddemux.assign)

# 2. HTOdemux
emlmo.obj <- CreateSeuratObject(counts = emlmo.hash.count)
emlmo.obj[["HTO"]] <- CreateAssayObject(counts = emlmo.hash.count)
emlmo.obj <- NormalizeData(emlmo.obj, assay = "HTO", normalization.method = "CLR")
emlmo.obj <- HTODemux(emlmo.obj, assay = "HTO", positive.quantile = 0.99)
emlmo.demux.result$HTODemux <- emlmo.obj$hash.ID[rownames(emlmo.demux.result)] 

# 3. GMM-Demux
# Prepare input data
emlmo.gmm.input <- t(emlmo.hash.count)
write.csv(emlmo.gmm.input, "~/emlmo.gmm.input.csv", quote=F)
# Command: GMM-demux -c ~/emlmo.gmm.input.csv MULTI_2,MULTI_3,MULTI_4,MULTI_5,MULTI_6,MULTI_7,MULTI_8,MULTI_9,MULTI_10,MULTI_11,MULTI_12,MULTI_13 -x MULTI_2,MULTI_3,MULTI_4,MULTI_5,MULTI_6,MULTI_7,MULTI_8,MULTI_9,MULTI_10,MULTI_11,MULTI_12,MULTI_13 -f .
emlmo.gmm.output <- read.csv("~/emlmo_GMM_full.csv")
emlmo.gmm.config <- read.table("~/emlmo_GMM_full.config", header = FALSE, sep = ",")
emlmo.gmm.demux <- GMM_demux_class(emlmo.gmm.output, emlmo.gmm.config, emlmo.hash.count)
emlmo.demux.result$`GMM-Demux` <- emlmo.gmm.demux

# 4. deMULTIplex2
emlmo.demultiplex2.output <- demultiplexTags(emlmo.gmm.input, plot.diagnostics = FALSE, seed = 2024)
emlmo.demultiplex2.assign <- deMULTIplex2_class(emlmo.demultiplex2.output)
emlmo.demux.result$deMULTIplex2 <- emlmo.demultiplex2.assign

# 5. demuxEM
# The results are not available for demuxEM
emlmo.hash.write <- as.data.frame(emlmo.hash.count) %>% rownames_to_column('Antibody')
emlmo.hash.write$Antibody <- 1:nrow(emlmo.hash.write)
write.csv(emlmo.hash.write, "~/emlmo.hash.write.csv", quote=F)
write10xCounts("~/emlmo.gex.h5", emlmo.gex.count, version='3')
# demuxEM -p 8 --random-state 2024 ~/emlmo.gex.h5 ~/emlmo.hash.write.csv emlmo_demuxEM

# 6. demuxmix
emlmo.gex.genes <- colSums(emlmo.gex.count > 0)
emlmo.demuxmix.model <- demuxmix(hto = as.matrix(emlmo.hash.count), rna = emlmo.gex.genes)
emlmo.demuxmix.labels <- dmmClassify(emlmo.demuxmix.model)
emlmo.demuxmix.assign <- demuxmix_class(emlmo.demuxmix.labels)
emlmo.demux.result$demuxmix <- emlmo.demuxmix.assign[rownames(emlmo.demux.result)]

# 7. hashedDrops
emlmo.hasheddrops.output <- hashedDrops(emlmo.hash.count)
emlmo.hasheddrops.assign <- hashedDrops_class(emlmo.hasheddrops.output, emlmo.hash.count)
emlmo.demux.result$hashedDrops <- emlmo.hasheddrops.assign[rownames(emlmo.demux.result)]

# 8. BFF
emlmo.bff.output <- GenerateCellHashingCalls(barcodeMatrix = emlmo.hash.count, methods = c("bff_raw", "bff_cluster"))
emlmo.demux.result$BFF_raw <- emlmo.bff.output$bff_raw
emlmo.demux.result$BFF_cluster <- emlmo.bff.output$bff_cluster

# Dimensional reduction for visualization
emlmo.sce <- SingleCellExperiment(assays = list(hto = emlmo.hash.count, clr = emlmo.clr.norm))
emlmo.sce <- runTSNE(emlmo.sce,exprs_values = "clr")
emlmo.sce <- runUMAP(emlmo.sce,exprs_values = "clr")

# Proportion of doublets
emlmo.demux.result2 <- DemuxSingletClass(emlmo.demux.result, emlmo.hash.count)
emlmo.doublet.prop <- AssignProp(emlmo.demux.result2, "doublet")

# Library size
emlmo.hto.lib <- log(colSums(emlmo.hash.count)+1)
emlmo.gex.lib <- log(colSums(emlmo.gex.count))

# Library size ratios of doublets vs. singlets and singlets vs. negatives across different methods
emlmo.lib.ratio <- LibRatio(emlmo.gex.lib, emlmo.demux.result)

# Summary of library size ratio
emlmo.ratio.sum <- LibRatioSum(emlmo.gex.lib, emlmo.demux.result)

# CH index
emlmo.bench.metrics <- BenchMetricsNoGT(emlmo.hash.count, emlmo.demux.result)
emlmo.ch.index <- emlmo.bench.metrics$`CH index`

# Check extra MULTI 4 singlets 
emlmo.common.multi4 <- which(emlmo.demux.result$CMDdemux == "MULTI_4")
common.methods <- c("deMULTIplex2", "hashedDrops", "BFF_cluster")
for(i in common.methods){
  emlmo.common.multi4 <- intersect(emlmo.common.multi4, which(emlmo.demux.result[,i] == "MULTI_4"))
}
emlmo.common.multi4 <- rownames(emlmo.demux.result)[emlmo.common.multi4]
emlmo.extra.multi4 <- rownames(emlmo.demux.result)[which(emlmo.demux.result$CMDdemux == "MULTI_4")]
emlmo.extra.multi4 <- emlmo.extra.multi4[which(!emlmo.extra.multi4 %in% emlmo.common.multi4)]
set.seed(2025)
emlmo.check.barcodes <- emlmo.extra.multi4[sample(1:length(emlmo.extra.multi4), 30, replace = FALSE)]
CheckAssign2DPlot(emlmo.hash.count, emlmo.gex.count, emlmo.md.mat, emlmo.cmddemux.assign, emlmo.check.barcodes)


# EMBRYO MULTI-Seq CMO
load("~/emcmo.hash.count.rdata")
load("~/emcmo.gex.count.rdata")

# 1. CMDdemux
emcmo.clr.norm <- LocalCLRNorm(emcmo.hash.count)
emcmo.kmed.cl <- KmedCluster(emcmo.clr.norm) 
emcmo.cl.dist <- EuclideanClusterDist(emcmo.clr.norm, emcmo.kmed.cl)
emcmo.noncore <- DefineNonCore(emcmo.cl.dist, emcmo.kmed.cl, c(0.9, 0.81, 0.81, 0.79, 0.23, 0.72, 0.95, 0.84, 0.83, 0.9, 0.8, 0.91, 0.79), optional = TRUE, clr.norm = emcmo.clr.norm)
emcmo.cluster.assign <- LabelClusterHTO(emcmo.clr.norm, emcmo.kmed.cl, emcmo.noncore, "expression")
emcmo.md.mat <- CalculateMD(emcmo.clr.norm, emcmo.noncore, emcmo.kmed.cl, emcmo.cluster.assign)
emcmo.outlier.assign <- AssignOutlierDrop(emcmo.md.mat,  md_cut_q = 0.51)
emcmo.cmddemux.assign <- CMDdemuxClass(emcmo.md.mat, emcmo.hash.count, emcmo.outlier.assign, TRUE, emcmo.gex.count, 20, 26)
emcmo.demux.result <- data.frame("CMDdemux" = emcmo.cmddemux.assign$demux_global_class)
rownames(emcmo.demux.result) <- rownames(emcmo.cmddemux.assign)

# 2. HTOdemux
emcmo.obj <- CreateSeuratObject(counts = emcmo.hash.count)
emcmo.obj[["HTO"]] <- CreateAssayObject(counts = emcmo.hash.count)
emcmo.obj <- NormalizeData(emcmo.obj, assay = "HTO", normalization.method = "CLR")
emcmo.obj <- HTODemux(emcmo.obj, assay = "HTO", positive.quantile = 0.99)
emcmo.demux.result$HTODemux <- emcmo.obj$hash.ID[rownames(emcmo.demux.result)] 

# 3. GMM-Demux
# Prepare input data
emcmo.gmm.input <- t(emcmo.hash.count)
write.csv(emcmo.gmm.input, "~/emcmo.gmm.input.csv", quote=F)
# Command: GMM-demux -c ~/emcmo.gmm.input.csv Nxt_451,Nxt_452,Nxt_453,Nxt_455,Nxt_456,Nxt_457,Nxt_458,Nxt_459,Nxt_460,Nxt_462,Nxt_463,Nxt_465 -x Nxt_451,Nxt_452,Nxt_453,Nxt_455,Nxt_456,Nxt_457,Nxt_458,Nxt_459,Nxt_460,Nxt_462,Nxt_463,Nxt_465 -f .
emcmo.gmm.output <- read.csv("~/emcmo_GMM_full.csv")
emcmo.gmm.config <- read.table("~/emcmo_GMM_full.config", header = FALSE, sep = ",")
emcmo.gmm.demux <- GMM_demux_class(emcmo.gmm.output, emcmo.gmm.config, emcmo.hash.count)
emcmo.demux.result$`GMM-Demux` <- emcmo.gmm.demux

# 4. deMULTIplex2
emcmo.demultiplex2.output <- demultiplexTags(emcmo.gmm.input, plot.diagnostics = FALSE, seed = 2024)
emcmo.demultiplex2.assign <- deMULTIplex2_class(emcmo.demultiplex2.output)
# deMULTIplex2 cannot classify 36 cells. We label them as "Uncertain"
emcmo.demultiplex2.assign2 <- rep("Uncertain", ncol(emcmo.hash.count))
names(emcmo.demultiplex2.assign2) <- colnames(emcmo.hash.count)
emcmo.demultiplex2.assign2[names(emcmo.demultiplex2.assign)] <- emcmo.demultiplex2.assign
emcmo.demux.result$deMULTIplex2 <- emcmo.demultiplex2.assign2

# 5. demuxEM
# The results are not available for demuxEM
emcmo.hash.write <- as.data.frame(emcmo.hash.count) %>% rownames_to_column('Antibody')
emcmo.hash.write$Antibody <- 1:nrow(emcmo.hash.write)
write.csv(emcmo.hash.write, "~/emcmo.hash.write.csv", quote=F)
write10xCounts("~/emcmo.gex.h5", emcmo.gex.count, version='3')
# demuxEM -p 8 --random-state 2024 ~/emcmo.gex.h5 ~/emcmo.hash.write.csv emcmo_demuxEM

# 6. demuxmix
emcmo.gex.genes <- colSums(emcmo.gex.count > 0)
emcmo.demuxmix.model <- demuxmix(hto = as.matrix(emcmo.hash.count), rna = emcmo.gex.genes)
emcmo.demuxmix.labels <- dmmClassify(emcmo.demuxmix.model)
emcmo.demuxmix.assign <- demuxmix_class(emcmo.demuxmix.labels)
emcmo.demux.result$demuxmix <- emcmo.demuxmix.assign[rownames(emcmo.demux.result)]

# 7. hashedDrops
emcmo.hasheddrops.output <- hashedDrops(emcmo.hash.count)
emcmo.hasheddrops.assign <- hashedDrops_class(emcmo.hasheddrops.output, emcmo.hash.count)
emcmo.demux.result$hashedDrops <- emcmo.hasheddrops.assign[rownames(emcmo.demux.result)]

# 8. BFF
emcmo.bff.output <- GenerateCellHashingCalls(barcodeMatrix = emcmo.hash.count, methods = c("bff_raw", "bff_cluster"))
emcmo.demux.result$BFF_raw <- emcmo.bff.output$bff_raw
emcmo.demux.result$BFF_cluster <- emcmo.bff.output$bff_cluster

# Dimensional reduction for visualization
emcmo.sce <- SingleCellExperiment(assays = list(hto = emcmo.hash.count, clr = emcmo.clr.norm))
emcmo.sce <- runTSNE(emcmo.sce,exprs_values = "clr")
emcmo.sce <- runUMAP(emcmo.sce,exprs_values = "clr")

# Proportion of doublets
emcmo.demux.result2 <- DemuxSingletClass(emcmo.demux.result, emcmo.hash.count)
emcmo.doublet.prop <- AssignPropPlot(emcmo.demux.result2, "doublet")

# Proportion of negatives
emcmo.negative.prop <- AssignPropPlot(emcmo.demux.result2, "negative")

# Library size
emcmo.gex.lib <- log(colSums(emcmo.gex.count))

# Library size ratios of doublets vs. singlets and singlets vs. negatives across different methods
emcmo.lib.ratio <- LibRatio(emcmo.gex.lib, emcmo.demux.result)

# Summary of library size ratio
emcmo.ratio.sum <- LibRatioSum(emcmo.gex.lib, emcmo.demux.result)

# Check randomly selected singlets, negatives and doublets in the contaminated cluster
contaminate.barcodes <- names(emcmo.kmed.cl$clustering)[which(emcmo.kmed.cl$clustering %in% 5)]
emcmo.contaminate.assign <- emcmo.cmddemux.assign[contaminate.barcodes,"demux_global_class"]
set.seed(2025)
emcmo.contaminate.ng <- contaminate.barcodes[which(emcmo.contaminate.assign == "Negative")]
emcmo.check.ng <- emcmo.contaminate.ng[sample(1:length(emcmo.contaminate.ng), 30, replace = FALSE)]
emcmo.contaminate.db <- contaminate.barcodes[which(emcmo.contaminate.assign == "Doublet")]
emcmo.check.db <- emcmo.contaminate.db[sample(1:length(emcmo.contaminate.db), 30, replace = FALSE)]
emcmo.contaminate.sg <- contaminate.barcodes[which(!emcmo.contaminate.assign %in% c("Doublet", "Negative"))]
emcmo.check.sg <- emcmo.contaminate.sg[sample(1:length(emcmo.contaminate.sg), 30, replace = FALSE)]
emcmo.check.barcodes <- list("Singlet" = emcmo.check.sg, "Doublet" = emcmo.check.db, "Negative" = emcmo.check.ng)
CheckAssign2DPlot(emcmo.hash.count, emcmo.gex.count, emcmo.md.mat, emcmo.cmddemux.assign, emcmo.check.barcodes$Singlet)
CheckAssign2DPlot(emcmo.hash.count, emcmo.gex.count, emcmo.md.mat, emcmo.cmddemux.assign, emcmo.check.barcodes$Doublet)
CheckAssign2DPlot(emcmo.hash.count, emcmo.gex.count, emcmo.md.mat, emcmo.cmddemux.assign, emcmo.check.barcodes$Negative)
