Contents

Data Processing

Session Preparation

Load libraries:

start_time <- Sys.time()

quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}
quiet_library(BarMixer)
quiet_library(ggplot2)
quiet_library(cowplot)

Declaring start

stm("Starting HTO count processing")
stm(paste0("Using BarMixer v", installed.packages()["BarMixer","Version"]))

Argument parsing

in_file <- params$in_file
in_samples  <- params$in_samples
in_well <- params$in_well
if(is.null(params$in_min_cutoff)) {
  in_min_cutoff <- "auto"
} else {
  in_min_cutoff <- as.numeric(params$in_min_cutoff)
}
if(is.null(params$in_eel)) {
  in_eel <- TRUE
} else {
  in_eel <- as.logical(params$in_eel)
}
out_dir <- params$out_dir

out_mat <- file.path(out_dir, paste0(in_well, "_hto_count_matrix.csv.gz"))
out_tbl <- file.path(out_dir, paste0(in_well, "_hto_category_table.csv.gz"))
out_json <- file.path(out_dir, paste0(in_well, "_hto_processing_metrics.json"))

stm(paste0("IN  HTO Tag_Counts.csv : ", in_file))
stm(paste0("IN  SampleSheet.csv    : ", in_samples))
stm(paste0("IN  Well               : ", in_well))
stm(paste0("IN  Min. Cutoff        : ", in_min_cutoff))
stm(paste0("IN  Expect equal load  : ", in_eel))
stm(paste0("OUT Count matrix       : ", out_mat))
stm(paste0("OUT Category table     : ", out_tbl))
stm(paste0("OUT JSON metrics       : ", out_json))
print(c(
  paste0("IN  Tag_Counts.csv   : ", in_file),
  paste0("IN  HTO sample sheet : ", in_samples),
  paste0("IN  Well             : ", in_well),
  paste0("IN  Min. Cutoff      : ", in_min_cutoff),
  paste0("IN  Expect equal load: ", in_eel),
  paste0("OUT Count matrix     : ", out_mat),
  paste0("OUT Category table   : ", out_tbl),
  paste0("OUT JSON metrics     : ", out_json)
))
## [1] "IN  Tag_Counts.csv   : /mnt/barware-manuscript/BarWare-demo/demo_output/W4-hashed-24k/hto_counts/W4-hashed-24k-HTO_Tag_Counts.csv"            
## [2] "IN  HTO sample sheet : /mnt/barware-manuscript/BarWare-demo/demo_sample_sheet.csv"                                                            
## [3] "IN  Well             : W4-hashed-24k"                                                                                                         
## [4] "IN  Min. Cutoff      : 10"                                                                                                                    
## [5] "IN  Expect equal load: TRUE"                                                                                                                  
## [6] "OUT Count matrix     : /mnt/barware-manuscript/BarWare-demo/demo_output/W4-hashed-24k/hto_processed/W4-hashed-24k_hto_count_matrix.csv.gz"    
## [7] "OUT Category table   : /mnt/barware-manuscript/BarWare-demo/demo_output/W4-hashed-24k/hto_processed/W4-hashed-24k_hto_category_table.csv.gz"  
## [8] "OUT JSON metrics     : /mnt/barware-manuscript/BarWare-demo/demo_output/W4-hashed-24k/hto_processed/W4-hashed-24k_hto_processing_metrics.json"

Return to Contents

Load Input Datasets

Check Input Files

if(!file.exists(in_file)) {
  stm(paste0("ERROR: Cannot find IN Tag_Counts.csv:", in_file))
  stop()
} else {
  stm(paste0("Found Tag Counts file:", in_file))
}

if(!file.exists(in_samples)) {
  stm(paste0("ERROR: Cannot find IN SampleSheet.csv:", in_samples))
  stop()
} else {
  stm(paste0("Found SampleSheet.csv:", in_samples))
}

Sample Sheet

Read input table/matrix

in_hto_table <- fread(in_file)

Return to Contents

Process HTO Counts

Convert input data to matrix

stm("Formatting HTO matrix and filtering for valid barcodes")

keep_cols <- colnames(in_hto_table)[colnames(in_hto_table) %in% in_hto_key$hash_name]
in_hto_mat <- t(as.matrix(in_hto_table[, ..keep_cols]))
in_hto_mat <- as(in_hto_mat, "dgCMatrix")
colnames(in_hto_mat) <- in_hto_table$cell_barcode
rownames(in_hto_mat) <- in_hto_key$hash_tag[match(rownames(in_hto_mat), in_hto_key$hash_name)]

in_hto_mat <- add_missing_hto_rows(in_hto_mat,
                                   valid_htos = in_hto_key$hash_tag)
in_hto_mat <- in_hto_mat[,colSums(in_hto_mat) > 1]

Call positive and negative HTOs

stm("Binarizing HTO counts to call positive and negative hashes")
if(in_min_cutoff == "auto") {
  binary_results <- binarize_hash_matrix(in_hto_mat,
                                         valid_htos = in_hto_key$hash_tag,
                                         use_median_cut = TRUE,
                                         expect_equal_loading = in_eel)
} else {
  binary_results <- binarize_hash_matrix(in_hto_mat,
                                         valid_htos = in_hto_key$hash_tag,
                                         use_median_cut = FALSE,
                                         min_cut = as.numeric(in_min_cutoff),
                                         expect_equal_loading = in_eel)
}

Convert hashes to category calls

stm("Converting binary results to categories")
category_results <- categorize_binary_hash_matrix(binary_results$bmat)

Return to Contents

Write Outputs

Write count matrix

stm(paste0("Writing count matrix to ", out_mat))
out_hto_mat <- as(in_hto_mat, "matrix")
out_hto_mat <- as.data.table(out_hto_mat)
out_hto_mat <- cbind(data.table(hto_barcode = rownames(in_hto_mat)),
                     out_hto_mat)
fwrite(out_hto_mat,
       file = out_mat)

Write category table

stm(paste0("Writing category table to ", out_tbl))

hto_barcodes_to_sample_ids <- function(x,
                                       bc_key) {
  x <- as.character(x)
  res <- character()
  for(i in seq_along(x)) {
    bcs <- unlist(strsplit(x[i], ";")[[1]])
    res[i] <- paste(bc_key$sample_id[match(bcs, bc_key$hash_tag)],
                    collapse = ";")
  }
  res
}

hash_category_table <- category_results$hash_category_table
hash_category_table$sample_id <- hto_barcodes_to_sample_ids(hash_category_table$hto_barcode,
                                                            in_hto_key)

rownames(hash_category_table) <- NULL

fwrite(hash_category_table,
       file = out_tbl)

Return to Contents

QC Tables and Plots

qc_list <- list(report_type = "hto_processing",
                report_datetime = as.character(start_time),
                report_uuid = ids::uuid(use_time = TRUE),
                package = "HTOparser",
                package_version = sessionInfo()$otherPkgs$HTOparser$Version,
                well_id = in_well)

Hash Count Summary

Return to Contents

Generate plots

stm("Generating plots for report")
plot_list <- lapply(
  1:nrow(in_hto_key),
  function(x) {
    barcode <- in_hto_key$hash_tag[x]
    sample_id <- in_hto_key$sample_id[x]
    vals <- in_hto_mat[barcode,]
    
    cutoff <- bsummary$cutoff[bsummary$hto_barcode == barcode]
    
    n_vals_below_min_cut <- sum(vals <= 5)
    vals_above_min_cut <- vals[vals > 5]
    
    if(length(vals_above_min_cut) > 0) {
      
      val_df <- data.frame(vals = log10(vals_above_min_cut))
      cut_df <- data.frame(cutoff = log10(cutoff))
      
      ggplot() +
        geom_histogram(data = val_df,
                       aes(x = vals),
                       fill = "darkgreen",
                       binwidth = 0.05) +
        geom_vline(data = cut_df,
                   aes(xintercept = cutoff),
                   size = 1,
                   color = "dodgerblue") +
        scale_x_continuous("log10(HTO Count)", 
                           limits = c(0,5)) +
        scale_fill_identity() +
        scale_color_identity() +
        ggtitle(paste(sample_id, "|", barcode)) +
        theme_bw(base_size = 8)
    } else {
      text_df <- data.frame(x = 2.5,
                            y = 0.5,
                            label = "Hash not detected")
      
      ggplot() +
        geom_text(data = text_df,
                  aes(x = x,
                      y = y,
                      label = label)) +
        scale_x_continuous("log10(HTO Count)", 
                           limits = c(0,5)) +
        scale_y_continuous("count",
                           limits = c(0,1)) +
        ggtitle(paste(sample_id, "|", barcode)) +
        theme_bw(base_size = 8)
    }
  }
)

Hash Count Distribution Plots

suppressWarnings(
  plot_grid(plotlist = plot_list,
            nrow = ceiling(length(plot_list) / 2),
            ncol = 2)
)

Return to Contents

Hash Category Summary

Return to Contents

Sample/HTO Singlet Counts

singlet_summary <- make_singlet_summary(category_results$hash_category_table,
                                        valid_htos = in_hto_key$hash_tag)
singlet_summary$sample_id <- in_hto_key$sample_id[match(singlet_summary$hto_barcode, in_hto_key$hash_tag)]
singlet_summary <- setcolorder(singlet_summary,
                               c("sample_id",
                                 "hto_barcode",
                                 "n_singlets",
                                 "frac_singlets"))

n_no_sample_id <- sum(is.na(singlet_summary$sample_id))
if(n_no_sample_id > 0) {
  singlet_summary$sample_id[is.na(singlet_summary$sample_id)] <- paste0("NA_",1:n_no_sample_id)
}

for(sample_id in singlet_summary$sample_id) {
  qc_list$sample_hto_stats[[sample_id]]$n_singlets <- singlet_summary$n_singlets[singlet_summary$sample_id == sample_id]
}

qc_table(singlet_summary)

Return to Contents

Sample/HTO Singlet Counts Plot

singlet_plot_data <- singlet_summary
singlet_plot_data$x <- 1:nrow(singlet_summary)

ggplot() +
  geom_rect(data = singlet_plot_data,
            aes(xmin = x - 0.4, xmax = x + 0.4,
                ymin = 0, ymax = n_singlets)) +
  geom_text(data = singlet_plot_data,
            aes(x = x,
                y = n_singlets * 1.02,
                label = n_singlets),
            vjust = 0,
            size = 3) +
  ggtitle("Singlet Counts") +
  scale_x_continuous("",
                     breaks = singlet_plot_data$x,
                     labels = paste0(singlet_plot_data$hto_barcode,
                                     "\n",
                                     singlet_plot_data$sample_id)) +
  scale_y_continuous("N Singlets") +
  theme_bw(base_size = 8) +
  theme(axis.text.x = element_text(angle = 90,
                                   hjust = 1,
                                   vjust = 0.3))

Return to Contents

Write QC JSON

stm(paste0("Writing JSON to ", out_json))
qc_list_json <- jsonlite::toJSON(qc_list,
                                 auto_unbox = TRUE,
                                 pretty = TRUE)
writeLines(qc_list_json,
           out_json)

Return to Contents

Session Information

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
## 
## Matrix products: default
## BLAS:   /usr/lib/libblas/libblas.so.3.7.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] cowplot_1.0.0     ggplot2_3.3.0     BarMixer_1.0.1    rhdf5_2.28.1     
## [5] Matrix_1.2-18     data.table_1.13.0 optparse_1.6.6   
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.1.0   xfun_0.12          ks_1.13.1          purrr_0.3.4       
##  [5] lattice_0.20-40    colorspace_1.4-1   vctrs_0.3.4        generics_0.0.2    
##  [9] htmltools_0.5.1.1  getopt_1.20.3      yaml_2.2.1         pracma_2.2.9      
## [13] rlang_0.4.8        pillar_1.4.6       later_1.0.0        glue_1.4.2        
## [17] withr_2.1.2        uuid_0.1-4         lifecycle_0.2.0    rootSolve_1.8.2.1 
## [21] stringr_1.4.0      munsell_0.5.0      gtable_0.3.0       htmlwidgets_1.5.1 
## [25] mvtnorm_1.1-0      evaluate_0.14      labeling_0.3       knitr_1.28        
## [29] fastmap_1.0.1      httpuv_1.5.2       crosstalk_1.0.0    Rcpp_1.0.3        
## [33] KernSmooth_2.23-16 xtable_1.8-4       diptest_0.76-0     scales_1.1.0      
## [37] promises_1.1.0     DT_0.12            jsonlite_1.6.1     farver_2.0.3      
## [41] mime_0.9           ids_1.0.1          multimode_1.5      digest_0.6.25     
## [45] stringi_1.4.6      dplyr_1.0.2        shiny_1.4.0        grid_3.6.3        
## [49] tools_3.6.3        magrittr_1.5       tibble_3.0.4       crayon_1.3.4      
## [53] pkgconfig_2.0.3    ellipsis_0.3.1     assertthat_0.2.1   rmarkdown_2.1     
## [57] Rhdf5lib_1.6.3     R6_2.4.1           mclust_5.4.7       compiler_3.6.3

Total time elapsed

end_time <- Sys.time()
diff_time <- end_time - start_time
time_message <- paste0("Elapsed Time: ", 
                       round(diff_time, 3),
                       " ", units(diff_time))
print(time_message)
## [1] "Elapsed Time: 31.198 secs"
stm(time_message)
stm("HTO count processing complete.")

Return to Contents