# Arrange GMM-Demux output
GMM_demux_class <- function(output.df, config.df, hash.count){
  gmm.demux <- output.df$Cluster_id
  config.df[,2] <- gsub(" ", "", config.df[,2])
  cl.class <- config.df[,2]
  names(cl.class) <- as.character(config.df[,1])
  for(i in unique(gmm.demux)){
    if(cl.class[as.character(i)] == "negative"){
      gmm.demux[which(gmm.demux %in% i)] <- "Negative"
    }else if(cl.class[as.character(i)] %in% rownames(hash.count)){
      gmm.demux[which(gmm.demux %in% i)] <- cl.class[as.character(i)]
    }else{
      gmm.demux[which(gmm.demux %in% i)] <- "Doublet"
    }
  }
  names(gmm.demux) <- output.df$X
  return(gmm.demux)
}

# Arrange deMULTIplex2 output
deMULTIplex2_class <- function(output){
  output.assign <- output$final_assign
  output.assign[which(output.assign %in% "negative")] <- "Negative"
  output.assign[which(output.assign %in% "multiplet")] <- "Doublet"
  return(output.assign)
}

# Arrange demuxEM output
demuxEM_class <- function(out1.df, out2.df, hash.count = NULL, label_by_no = FALSE){
  demuxEM.assign <- out1.df$demux_type
  names(demuxEM.assign) <- out1.df$barcodekey
  demuxEM.assign[which(demuxEM.assign %in% "unknown")] <- "Negative"
  demuxEM.assign[which(demuxEM.assign %in% "doublet")] <- "Doublet"
  singlet.idx <- which(out1.df$demux_type %in% "singlet")
  if(is.null(hash.count) | isFALSE(label_by_no)){
    demuxEM.assign[singlet.idx] <- out2.df$assignment[singlet.idx]
  }else{
    demuxEM.assign[singlet.idx] <- rownames(hash.count)[as.numeric(out2.df$assignment[singlet.idx])]
  }
  return(demuxEM.assign)
}

# Arrange demuxmix output
demuxmix_class <- function(out.df){
  demuxmix.assign <- out.df$HTO
  demuxmix.assign[which(demuxmix.assign %in% "uncertain")] <- "Uncertain"
  demuxmix.assign[which(demuxmix.assign %in% "negative")] <- "Negative"
  demuxmix.assign[which(out.df$Type %in% "multiplet")] <- "Doublet"
  names(demuxmix.assign) <- rownames(out.df)
  return(demuxmix.assign)
}

# Arrange hashedDrops output
hashedDrops_class <- function(output.df, hash.count){
  hasheddrops.assign <- rownames(hash.count)[output.df$Best]
  hasheddrops.assign[!output.df$Confident] <- "Uncertain"
  hasheddrops.assign[which(output.df$Doublet)] <- "Doublet"
  names(hasheddrops.assign) <- rownames(output.df)
  return(hasheddrops.assign)
}

# Convert demultiplexing results to singlets
DemuxSingletClass <- function(demux.result, hash.count){
  for(i in 1:ncol(demux.result)){
    demux.result[,i] <- as.character(demux.result[,i])
    demux.result[,i][which(demux.result[,i] %in% rownames(hash.count))] <- "Singlet"
  }
  return(demux.result)
}

# Concordance of each method with the ground truth
ConfusionMat <- function(drop.assign, ground.truth.method, class.levels){
  plot.df <- NULL
  for(i in colnames(drop.assign)[which(!colnames(drop.assign) %in% ground.truth.method)]){ 
    ground.levels <- class.levels[which(class.levels %in% unique(drop.assign[,ground.truth.method]))]
    cmp.levels <- class.levels[which(class.levels %in% unique(drop.assign[,i]))]
    mat <- matrix(0, nrow = length(ground.levels), ncol = length(cmp.levels))
    rownames(mat) <- ground.levels
    colnames(mat) <- cmp.levels
    for(j in ground.levels){
      for(k in cmp.levels){
        count <- length(intersect(which(drop.assign[,ground.truth.method] %in% j), which(drop.assign[,i] %in% k)))
        mat[j,k] <- count/nrow(drop.assign)*100
        data.df <- melt(mat, varnames = c('r', 'c'), value.name = 'prop')
        data.df$prop <- round(data.df$prop)
        data.df$method <- i
        plot.df <- rbind(plot.df, data.df)
      }
    }
  }
  return(plot.df)
}

# Overall concordance
ConcordanceDF <- function(drop.assign, ground.truth.method){
  plot.df <- data.frame("value" = integer(), "method" = character())
  for(i in colnames(drop.assign)[which(!colnames(drop.assign) %in% ground.truth.method)]){
    data.df <- drop.assign[,c(ground.truth.method, i)]
    concordant.calls <- length(which(data.df[,1] == data.df[,2]))
    concordant.percent <- round(concordant.calls/nrow(drop.assign)*100)
    plot.df[nrow(plot.df)+1,] <- c(concordant.percent, i)
  }
  return(plot.df)
}

# Precision recall F-score MCC (sample-specific)
GTStats <- function(drop.assign, ground.truth.method, class.levels){
  plot.df <- NULL
  for(i in colnames(drop.assign)[which(!colnames(drop.assign) %in% ground.truth.method)]){
    data.df <- drop.assign[,c(ground.truth.method, i)]
    for(j in class.levels){
      tp <- length(which(data.df[,1] == j & data.df[,2] == j))
      fn <- length(which(data.df[,1] == j & data.df[,2] != j))
      tn <- length(which(data.df[,1] != j & data.df[,2] != j))
      fp <- length(which(data.df[,1] != j & data.df[,2] == j))
      precision <- tp/(tp+fp)
      recall <- tp/(tp+fn)
      fscore <- 2*precision*recall/(precision+recall)
      tp <- as.numeric(tp)
      fn <- as.numeric(fn)
      tn <- as.numeric(tn)
      fp <- as.numeric(fp)
      mcc <- (tn*tp-fn*fp)/sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
      data.df2 <- data.frame("value" = c(precision, recall, fscore, mcc), "score" = c("Precision", "Recall", "F-score", "MCC"), "class" = j, "method" = i)
      plot.df <- rbind(plot.df, data.df2)
    }
  }
  return(plot.df)
}

# Precision recall F-score barplot (overall)
GTStatsAvg <- function(drop.assign, ground.truth.method, class.levels, mode){
  plot.df <- NULL
  for(i in colnames(drop.assign)[which(!colnames(drop.assign) %in% ground.truth.method)]){
    data.df <- drop.assign[,c(ground.truth.method, i)]
    precision <- c()
    recall <- c()
    mcc <- c()
    if(mode == "macro"){
      for(j in class.levels){
        tp <- length(which(data.df[,1] == j & data.df[,2] == j))
        fn <- length(which(data.df[,1] == j & data.df[,2] != j))
        tn <- length(which(data.df[,1] != j & data.df[,2] != j))
        fp <- length(which(data.df[,1] != j & data.df[,2] == j))
        precision <- c(precision, tp/(tp+fp))
        recall <- c(recall, tp/(tp+fn))
        tp <- as.numeric(tp)
        fn <- as.numeric(fn)
        tn <- as.numeric(tn)
        fp <- as.numeric(fp)
        mcc <- c(mcc, (tn*tp-fn*fp)/sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)))
      }
      precision <- sum(precision[!is.na(precision)])/length(precision[!is.na(precision)])
      recall <- sum(recall[!is.na(recall)])/length(recall[!is.na(recall)])
      fscore <- 2*precision*recall/(precision+recall)
      mcc <- sum(mcc[!is.na(mcc)])/length(mcc[!is.na(mcc)])
    }else if(mode == "micro"){
      tp <- 0
      fn <- 0
      tn <- 0
      fp <- 0
      for(j in class.levels){
        tp <- tp + length(which(data.df[,1] == j & data.df[,2] == j))
        fn <- fn + length(which(data.df[,1] == j & data.df[,2] != j))
        tn <- tn + length(which(data.df[,1] != j & data.df[,2] != j))
        fp <- fp + length(which(data.df[,1] != j & data.df[,2] == j))
        precision <- c(precision, tp/(tp+fp))
        recall <- c(recall, tp/(tp+fn))
        tp <- as.numeric(tp)
        fn <- as.numeric(fn)
        tn <- as.numeric(tn)
        fp <- as.numeric(fp)
        mcc <- c(mcc, (tn*tp-fn*fp)/sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)))
      }
      precision <- tp/(tp+fp)
      recall <- tp/(tp+fn) 
      fscore <- 2*precision*recall/(precision+recall)
      tp <- as.numeric(tp)
      fn <- as.numeric(fn)
      tn <- as.numeric(tn)
      fp <- as.numeric(fp)
      mcc <- (tn*tp-fn*fp)/sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))
    }
    data.df2 <- data.frame("value" = c(precision, recall, fscore, mcc), "score" = c("Precision", "Recall", "F-score", "MCC"), "method" = i)
    plot.df <- rbind(plot.df, data.df2)
  }
  return(plot.df)
}

# Benchmarking metrics without ground truth: Silhouette score, DB index, Dunn index, CH index
BenchMetricsNoGT <- function(hash.count, drop.assign){
  hash.log <- log(as.matrix(hash.count)+1)
  bench.list <- list("Silhouette score" = NULL, "DB index" = c(), "Dunn index" = c(), "CH index" = c())
  for(i in 1:ncol(drop.assign)){
    sil.out <- approxSilhouette(t(hash.log), drop.assign[,i])
    sil.width <- sil.out$width
    bench.list$`Silhouette score` <- cbind(bench.list$`Silhouette score`, sil.width)
    colnames(bench.list$`Silhouette score`)[i] <- colnames(drop.assign)[i]
    sub.hash.log <- hash.log[,which(drop.assign[,i] %in% rownames(hash.count))]
    sub.drop.assign <- drop.assign[,i][which(drop.assign[,i] %in% rownames(hash.count))]
    cl.idx <- match(sub.drop.assign, unique(sub.drop.assign))
    eu.dist <- dist(t(sub.hash.log), method="euclidean")
    db.index <- index.DB(as.data.frame(t(sub.hash.log)), cl.idx, eu.dist, centrotypes="medoids")$DB
    bench.list$`DB index` <- c(bench.list$`DB index`, db.index)
    names(bench.list$`DB index`)[i] <- colnames(drop.assign)[i]
    dunn.index <- dunn(eu.dist, cl.idx)
    bench.list$`Dunn index` <- c(bench.list$`Dunn index`, dunn.index)
    names(bench.list$`Dunn index`)[i] <- colnames(drop.assign)[i]
    ch.index <- index.G1(as.data.frame(t(sub.hash.log)), cl.idx, eu.dist, centrotypes="medoids")
    bench.list$`CH index` <- c(bench.list$`CH index`, ch.index)
    names(bench.list$`CH index`)[i] <- colnames(drop.assign)[i]
  }
  return(bench.list)
}

# Library size ratios of doublets vs. singlets and singlets vs. negatives across different methods
LibRatio <- function(lib, drop.assign){
  plot.df <- NULL
  for(i in 1:ncol(drop.assign)){
    data.df <- data.frame("lib" = lib, "class" = drop.assign[,i])
    data.df2 <- data.df %>% 
      group_by(class) %>% 
      summarise("m_lib" = median(lib))
    
    if(length(which(!data.df2$class %in% c("Doublet", "Negative", "Uncertain"))) > 0){
      if("Doublet" %in% data.df2$class & ("Negative" %in% data.df2$class | "Uncertain" %in% data.df2$class)){
        d.lib <- data.df2$m_lib[which(data.df2$class == "Doublet")]
        n.lib <- data.df2$m_lib[which(data.df2$class == "Negative" | data.df2$class == "Uncertain")]
        ds_ratio <- d.lib/data.df2$m_lib[which(!data.df2$class %in% c("Doublet", "Negative", "Uncertain"))]/d.lib
        ns_ratio <- data.df2$m_lib[which(!data.df2$class %in% c("Doublet", "Negative", "Uncertain"))]/n.lib
      }else{
        ds_ratio <- NA
        ns_ratio <- NA
      }
    }else{
      ds_ratio <- NA
      ns_ratio <- NA
    }
    if(!is.na(ds_ratio[1])){
      data.df <- data.frame("r" = c(ds_ratio, ns_ratio), "ratio" = c(rep("Doublet/Singlet",length(ds_ratio)), rep("Singlet/Negative",length(ns_ratio))), "method" = colnames(drop.assign)[i])
      plot.df <- rbind(plot.df, data.df) 
    }
  }
  return(plot.df)
}

# Summary of library size ratio
LibRatioSum <- function(lib, drop.assign){
  plot.df <- NULL
  for(i in 1:ncol(drop.assign)){
    data.df <- data.frame("lib" = lib, "class" = drop.assign[,i])
    data.df2 <- data.df %>% 
      group_by(class) %>% 
      summarise("m_lib" = median(lib))
    if(length(which(!data.df2$class %in% c("Doublet", "Negative", "Uncertain"))) > 0){
      s.lib <- data.df2$m_lib[which(!data.df2$class %in% c("Doublet", "Negative", "Uncertain"))]
      s.lib.max <- max(s.lib)
      s.lib.min <- min(s.lib)
      if("Doublet" %in% data.df2$class & ("Negative" %in% data.df2$class | "Uncertain" %in% data.df2$class)){
        ds_ratio <- data.df2$m_lib[which(data.df2$class == "Doublet")]/s.lib.max
        if("Negative"%in% data.df2$class){
          ns_ratio <- s.lib.min/data.df2$m_lib[which(data.df2$class == "Negative")]
        }else{
          ns_ratio <- s.lib.min/data.df2$m_lib[which(data.df2$class == "Uncertain")]
        }
      }else{
        ds_ratio <- NA
        ns_ratio <- NA
      }
    }else{
      ds_ratio <- NA
      ns_ratio <- NA
    }
    if(!is.na(ds_ratio)){
      data.df <- data.frame("dr" = ds_ratio, "nr" = ns_ratio, "method" = colnames(drop.assign)[i])
      plot.df <- rbind(plot.df, data.df) 
    }
  }
  return(plot.df)
}

# Proption of doublets or negatives
AssignProp <- function(drop.assign, mode){
  plot.df <- NULL
  for(i in 1:ncol(drop.assign)){
    assign.num <- table(drop.assign[,i])
    data.df <- data.frame("class" = names(assign.num), "value" = as.vector(assign.num), "method" = colnames(drop.assign)[i])
    plot.df <- rbind(plot.df, data.df)
  }
  if(mode == "doublet"){
    plot.df <- plot.df[which(plot.df$class %in% "Doublet"),]
    plot.df$value <- plot.df$value/nrow(drop.assign)*100
    plot.df$class <- "Doublet"
  }else if(mode == "negative"){
    plot.df <- plot.df[which(plot.df$class %in% c("Negative", "Uncertain")),]
    plot.df <- plot.df %>% 
      group_by(method) %>%
      summarise(value = sum(value))
    plot.df$value <- plot.df$value/nrow(drop.assign)*100
    plot.df$class <- "Negative"
  }
  return(plot.df)
}
