library(CMDdemux)
library(Seurat)
library(scran)
library(scater)
library(deMULTIplex2)
library(demuxmix)
library(cellhashR)
library(DropletUtils)
library(stringr)
library(tidyverse)
library(clValid)

source(~/bench.R)
load("~/pbmc.hash.count.rdata")
pbmc.gex.count <- readRDS("~/pbmc_umi_mtx.rds")

# 1. CMDdemux
pbmc.clr.norm <- LocalCLRNorm(pbmc.hash.count)
pbmc.kmed.cl <- KmedCluster(pbmc.clr.norm, optional = TRUE)
pbmc.cl.dist <- EuclideanClusterDist(pbmc.clr.norm, pbmc.kmed.cl)
pbmc.noncore <- DefineNonCore(pbmc.cl.dist, pbmc.kmed.cl, c(0.7, 0.7, 0.8, 0.73, 0.65, 0.9, 0.72, 0.67, 0.67), optional = TRUE, clr.norm = pbmc.clr.norm)
pbmc.cluster.assign <- LabelClusterHTO(pbmc.clr.norm, pbmc.kmed.cl, pbmc.noncore, "medoids")
pbmc.md.mat <- CalculateMD(pbmc.clr.norm, pbmc.noncore, pbmc.kmed.cl, pbmc.cluster.assign)
pbmc.outlier.assign <- AssignOutlierDrop(pbmc.md.mat,  md_cut_q = 0.72)
pbmc.cmddemux.assign <- CMDdemuxClass(pbmc.md.mat, pbmc.hash.count, pbmc.outlier.assign,  TRUE, pbmc.gex.count, 3, 2, NULL, TRUE, pbmc.kmed.cl, pbmc.cluster.assign)
pbmc.demux.result <- data.frame("CMDdemux" = pbmc.cmddemux.assign$demux_global_class)
rownames(pbmc.demux.result) <- rownames(pbmc.cmddemux.assign)

# 2. HTODemux
pbmc.hash <- CreateSeuratObject(counts = pbmc.count)
pbmc.hash <- PercentageFeatureSet(pbmc.hash, pattern = "^MT-", col.name = "percent.mt")
pbmc.hash <- SCTransform(pbmc.hash, vars.to.regress = "percent.mt", verbose = FALSE)
pbmc.hash <- FindVariableFeatures(pbmc.hash, selection.method = "vst", nfeatures = 3000)
pbmc.hash[["HTO"]] <- CreateAssayObject(counts = pbmc.hto)
pbmc.hash <- NormalizeData(pbmc.hash, assay = "HTO", normalization.method = "CLR")
pbmc.hash <- HTODemux(pbmc.hash, assay = "HTO", positive.quantile = 0.99)
pbmc.demux.result$HTODemux <- pbmc.hash$hash.ID[rownames(pbmc.demux.result)]

# 3. GMM-Demux
# Prepare input data
pbmc.gmm.input <- t(pbmc.hash.count)
pbmc.gmm.input <- as.data.frame(pbmc.gmm.input)
write.csv(pbmc.gmm.input, "~/pbmc.gmm.input.csv", quote=T, row.names = TRUE, col.names = T)
# Command: GMM-demux -c ~/pbmc.gmm.input.csv HTO-A,HTO-B,HTO-C,HTO-D,HTO-E,HTO-F,HTO-G,HTO-H -x HTO-A,HTO-B,HTO-C,HTO-D,HTO-E,HTO-F,HTO-G,HTO-H -f .
pbmc.gmm.output <- read.csv("~/GMM_full.csv")
pbmc.gmm.config <- read.table("~/GMM_full.config", header = FALSE, sep = ",")
pbmc.gmm.demux <- GMM_demux_class(pbmc.gmm.output, pbmc.gmm.config, pbmc.hash.count)
pbmc.demux.result$`GMM-Demux` <- pbmc.gmm.demux

# 4. deMULTIplex2
pbmc.demultiplex2.output <- demultiplexTags(pbmc.gmm.input, plot.diagnostics = FALSE, seed = 2024)
pbmc.demultiplex2.assign <- deMULTIplex2_class(pbmc.demultiplex2.output)
pbmc.demux.result$deMULTIplex2 <- pbmc.demultiplex2.assign

# 5. demuxEM
pbmc.hash.write <- as.data.frame(pbmc.hash.count) %>% rownames_to_column('Antibody')
write.csv(pbmc.hash.write, "~/pbmc.hash.write.csv", quote=F)
write10xCounts("~/pbmc.gex.h5", pbmc.gex.count, version='3')
# demuxEM -p 8 --random-state 2024 ~/pbmc.gex.h5 ~/pbmc.hash.write.csv pbmc_demuxEM
pbmc.demuxEM.out1 <- read.table(file = "~/pbmc.demuxEM.demux.txt", header = TRUE)
pbmc.demuxEM.out2 <- read.table(file = "~/pbmc.demuxEM.assign.txt", sep = "\t", header = TRUE)
pbmc.demuxEM.assign <- demuxEM_class(pbmc.demuxEM.out1, pbmc.demuxEM.out2, pbmc.hash.count, TRUE)
pbmc.demux.result$demuxEM <- pbmc.demuxEM.assign[rownames(pbmc.demux.result)]
pbmc.demux.result$demuxEM[is.na(pbmc.demux.result$demuxEM)] <- "Uncertain"

# 6. demuxmix
# Cannot use gene expression data, because its column dimension is not equal to HTO, use "naive" model here
#pbmc.gex.genes <- colSums(pbmc.gex.count > 0)
#pbmc.demuxmix.model <- demuxmix(hto = pbmc.hash.count, rna = pbmc.gex.count)
pbmc.demuxmix.model <- demuxmix(hto = as.matrix(pbmc.hash.count), model = "naive")
pbmc.demuxmix.labels <- dmmClassify(pbmc.demuxmix.model)
pbmc.demuxmix.assign <- demuxmix_class(pbmc.demuxmix.labels)
pbmc.demux.result$demuxmix <- pbmc.demuxmix.assign[rownames(pbmc.demux.result)]

# 7. hashedDrops
pbmc.hasheddrops.output <- hashedDrops(pbmc.hash.count)
pbmc.hasheddrops.assign <- hashedDrops_class(pbmc.hasheddrops.output, pbmc.hash.count)
pbmc.demux.result$hashedDrops <- pbmc.hasheddrops.assign[rownames(pbmc.demux.result)]

# 8. BFF
pbmc.bff.output <- GenerateCellHashingCalls(barcodeMatrix = pbmc.hash.count, methods = c("bff_raw", "bff_cluster"))
pbmc.bff.raw <- pbmc.bff.output$bff_raw
names(pbmc.bff.raw) <- pbmc.bff.output$cellbarcode
pbmc.demux.result$BFF_raw <- pbmc.bff.raw[rownames(pbmc.demux.result)]
pbmc.bff.cluster <- pbmc.bff.output$bff_cluster
names(pbmc.bff.cluster) <- pbmc.bff.output$cellbarcode
pbmc.demux.result$BFF_cluster <- pbmc.bff.cluster[rownames(pbmc.demux.result)]

# Dimensional reduction for visualization
pbmc.sce <- SingleCellExperiment(assays = list(hto = pbmc.hash.count, clr = pbmc.clr.norm))
pbmc.sce <- runUMAP(pbmc.sce,exprs_values = "clr")
pbmc.sce <- runTSNE(pbmc.sce,exprs_values = "clr")

# Library size
pbmc.hto.lib <- log(colSums(pbmc.hash.count)+1)
pbmc.gex.lib <- log(colSums(pbmc.gex.count))

# Proportion of doublets
pbmc.demux.result2 <- DemuxSingletClass(pbmc.demux.result, pbmc.hash.count)
pbmc.doublet.prop <- AssignPropPlot(pbmc.demux.result2, "doublet")

# Proportion of negatives
pbmc.negative.prop <- AssignPropPlot(pbmc.demux.result2, "negative")

# Benchmarking metrics: silhouette, DB index, Dunn index, CH index
pbmc.bench.metrics <- BenchMetricsNoGT(pbmc.hash.count, pbmc.demux.result)
