start_time <- Sys.time()
quiet_library <- function(...) {
suppressPackageStartupMessages(library(...))
}
quiet_library(BarMixer)
quiet_library(ggplot2)
quiet_library(cowplot)
Declaring start
stm("Starting HTO count processing")
stm(paste0("Using BarMixer v", installed.packages()["BarMixer","Version"]))
in_file <- params$in_file
in_samples <- params$in_samples
in_well <- params$in_well
if(is.null(params$in_min_cutoff)) {
in_min_cutoff <- "auto"
} else {
in_min_cutoff <- as.numeric(params$in_min_cutoff)
}
if(is.null(params$in_eel)) {
in_eel <- TRUE
} else {
in_eel <- as.logical(params$in_eel)
}
out_dir <- params$out_dir
out_mat <- file.path(out_dir, paste0(in_well, "_hto_count_matrix.csv.gz"))
out_tbl <- file.path(out_dir, paste0(in_well, "_hto_category_table.csv.gz"))
out_json <- file.path(out_dir, paste0(in_well, "_hto_processing_metrics.json"))
stm(paste0("IN HTO Tag_Counts.csv : ", in_file))
stm(paste0("IN SampleSheet.csv : ", in_samples))
stm(paste0("IN Well : ", in_well))
stm(paste0("IN Min. Cutoff : ", in_min_cutoff))
stm(paste0("IN Expect equal load : ", in_eel))
stm(paste0("OUT Count matrix : ", out_mat))
stm(paste0("OUT Category table : ", out_tbl))
stm(paste0("OUT JSON metrics : ", out_json))
print(c(
paste0("IN Tag_Counts.csv : ", in_file),
paste0("IN HTO sample sheet : ", in_samples),
paste0("IN Well : ", in_well),
paste0("IN Min. Cutoff : ", in_min_cutoff),
paste0("IN Expect equal load: ", in_eel),
paste0("OUT Count matrix : ", out_mat),
paste0("OUT Category table : ", out_tbl),
paste0("OUT JSON metrics : ", out_json)
))
## [1] "IN Tag_Counts.csv : /mnt/barware-manuscript/BarWare-demo/demo_output/W3-hashed-16k/hto_counts/W3-hashed-16k-HTO_Tag_Counts.csv"
## [2] "IN HTO sample sheet : /mnt/barware-manuscript/BarWare-demo/demo_sample_sheet.csv"
## [3] "IN Well : W3-hashed-16k"
## [4] "IN Min. Cutoff : 10"
## [5] "IN Expect equal load: TRUE"
## [6] "OUT Count matrix : /mnt/barware-manuscript/BarWare-demo/demo_output/W3-hashed-16k/hto_processed/W3-hashed-16k_hto_count_matrix.csv.gz"
## [7] "OUT Category table : /mnt/barware-manuscript/BarWare-demo/demo_output/W3-hashed-16k/hto_processed/W3-hashed-16k_hto_category_table.csv.gz"
## [8] "OUT JSON metrics : /mnt/barware-manuscript/BarWare-demo/demo_output/W3-hashed-16k/hto_processed/W3-hashed-16k_hto_processing_metrics.json"
if(!file.exists(in_file)) {
stm(paste0("ERROR: Cannot find IN Tag_Counts.csv:", in_file))
stop()
} else {
stm(paste0("Found Tag Counts file:", in_file))
}
if(!file.exists(in_samples)) {
stm(paste0("ERROR: Cannot find IN SampleSheet.csv:", in_samples))
stop()
} else {
stm(paste0("Found SampleSheet.csv:", in_samples))
}
stm("Formatting HTO matrix and filtering for valid barcodes")
keep_cols <- colnames(in_hto_table)[colnames(in_hto_table) %in% in_hto_key$hash_name]
in_hto_mat <- t(as.matrix(in_hto_table[, ..keep_cols]))
in_hto_mat <- as(in_hto_mat, "dgCMatrix")
colnames(in_hto_mat) <- in_hto_table$cell_barcode
rownames(in_hto_mat) <- in_hto_key$hash_tag[match(rownames(in_hto_mat), in_hto_key$hash_name)]
in_hto_mat <- add_missing_hto_rows(in_hto_mat,
valid_htos = in_hto_key$hash_tag)
in_hto_mat <- in_hto_mat[,colSums(in_hto_mat) > 1]
stm("Binarizing HTO counts to call positive and negative hashes")
if(in_min_cutoff == "auto") {
binary_results <- binarize_hash_matrix(in_hto_mat,
valid_htos = in_hto_key$hash_tag,
use_median_cut = TRUE,
expect_equal_loading = in_eel)
} else {
binary_results <- binarize_hash_matrix(in_hto_mat,
valid_htos = in_hto_key$hash_tag,
use_median_cut = FALSE,
min_cut = as.numeric(in_min_cutoff),
expect_equal_loading = in_eel)
}
stm("Converting binary results to categories")
category_results <- categorize_binary_hash_matrix(binary_results$bmat)
stm(paste0("Writing count matrix to ", out_mat))
out_hto_mat <- as(in_hto_mat, "matrix")
out_hto_mat <- as.data.table(out_hto_mat)
out_hto_mat <- cbind(data.table(hto_barcode = rownames(in_hto_mat)),
out_hto_mat)
fwrite(out_hto_mat,
file = out_mat)
stm(paste0("Writing category table to ", out_tbl))
hto_barcodes_to_sample_ids <- function(x,
bc_key) {
x <- as.character(x)
res <- character()
for(i in seq_along(x)) {
bcs <- unlist(strsplit(x[i], ";")[[1]])
res[i] <- paste(bc_key$sample_id[match(bcs, bc_key$hash_tag)],
collapse = ";")
}
res
}
hash_category_table <- category_results$hash_category_table
hash_category_table$sample_id <- hto_barcodes_to_sample_ids(hash_category_table$hto_barcode,
in_hto_key)
rownames(hash_category_table) <- NULL
fwrite(hash_category_table,
file = out_tbl)
qc_list <- list(report_type = "hto_processing",
report_datetime = as.character(start_time),
report_uuid = ids::uuid(use_time = TRUE),
package = "HTOparser",
package_version = sessionInfo()$otherPkgs$HTOparser$Version,
well_id = in_well)
stm("Generating plots for report")
plot_list <- lapply(
1:nrow(in_hto_key),
function(x) {
barcode <- in_hto_key$hash_tag[x]
sample_id <- in_hto_key$sample_id[x]
vals <- in_hto_mat[barcode,]
cutoff <- bsummary$cutoff[bsummary$hto_barcode == barcode]
n_vals_below_min_cut <- sum(vals <= 5)
vals_above_min_cut <- vals[vals > 5]
if(length(vals_above_min_cut) > 0) {
val_df <- data.frame(vals = log10(vals_above_min_cut))
cut_df <- data.frame(cutoff = log10(cutoff))
ggplot() +
geom_histogram(data = val_df,
aes(x = vals),
fill = "darkgreen",
binwidth = 0.05) +
geom_vline(data = cut_df,
aes(xintercept = cutoff),
size = 1,
color = "dodgerblue") +
scale_x_continuous("log10(HTO Count)",
limits = c(0,5)) +
scale_fill_identity() +
scale_color_identity() +
ggtitle(paste(sample_id, "|", barcode)) +
theme_bw(base_size = 8)
} else {
text_df <- data.frame(x = 2.5,
y = 0.5,
label = "Hash not detected")
ggplot() +
geom_text(data = text_df,
aes(x = x,
y = y,
label = label)) +
scale_x_continuous("log10(HTO Count)",
limits = c(0,5)) +
scale_y_continuous("count",
limits = c(0,1)) +
ggtitle(paste(sample_id, "|", barcode)) +
theme_bw(base_size = 8)
}
}
)
suppressWarnings(
plot_grid(plotlist = plot_list,
nrow = ceiling(length(plot_list) / 2),
ncol = 2)
)
singlet_summary <- make_singlet_summary(category_results$hash_category_table,
valid_htos = in_hto_key$hash_tag)
singlet_summary$sample_id <- in_hto_key$sample_id[match(singlet_summary$hto_barcode, in_hto_key$hash_tag)]
singlet_summary <- setcolorder(singlet_summary,
c("sample_id",
"hto_barcode",
"n_singlets",
"frac_singlets"))
n_no_sample_id <- sum(is.na(singlet_summary$sample_id))
if(n_no_sample_id > 0) {
singlet_summary$sample_id[is.na(singlet_summary$sample_id)] <- paste0("NA_",1:n_no_sample_id)
}
for(sample_id in singlet_summary$sample_id) {
qc_list$sample_hto_stats[[sample_id]]$n_singlets <- singlet_summary$n_singlets[singlet_summary$sample_id == sample_id]
}
qc_table(singlet_summary)
singlet_plot_data <- singlet_summary
singlet_plot_data$x <- 1:nrow(singlet_summary)
ggplot() +
geom_rect(data = singlet_plot_data,
aes(xmin = x - 0.4, xmax = x + 0.4,
ymin = 0, ymax = n_singlets)) +
geom_text(data = singlet_plot_data,
aes(x = x,
y = n_singlets * 1.02,
label = n_singlets),
vjust = 0,
size = 3) +
ggtitle("Singlet Counts") +
scale_x_continuous("",
breaks = singlet_plot_data$x,
labels = paste0(singlet_plot_data$hto_barcode,
"\n",
singlet_plot_data$sample_id)) +
scale_y_continuous("N Singlets") +
theme_bw(base_size = 8) +
theme(axis.text.x = element_text(angle = 90,
hjust = 1,
vjust = 0.3))
stm(paste0("Writing JSON to ", out_json))
qc_list_json <- jsonlite::toJSON(qc_list,
auto_unbox = TRUE,
pretty = TRUE)
writeLines(qc_list_json,
out_json)
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Debian GNU/Linux 9 (stretch)
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.7.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.7.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] cowplot_1.0.0 ggplot2_3.3.0 BarMixer_1.0.1 rhdf5_2.28.1
## [5] Matrix_1.2-18 data.table_1.13.0 optparse_1.6.6
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.1.0 xfun_0.12 ks_1.13.1 purrr_0.3.4
## [5] lattice_0.20-40 colorspace_1.4-1 vctrs_0.3.4 generics_0.0.2
## [9] htmltools_0.5.1.1 getopt_1.20.3 yaml_2.2.1 pracma_2.2.9
## [13] rlang_0.4.8 pillar_1.4.6 later_1.0.0 glue_1.4.2
## [17] withr_2.1.2 uuid_0.1-4 lifecycle_0.2.0 rootSolve_1.8.2.1
## [21] stringr_1.4.0 munsell_0.5.0 gtable_0.3.0 htmlwidgets_1.5.1
## [25] mvtnorm_1.1-0 evaluate_0.14 labeling_0.3 knitr_1.28
## [29] fastmap_1.0.1 httpuv_1.5.2 crosstalk_1.0.0 Rcpp_1.0.3
## [33] KernSmooth_2.23-16 xtable_1.8-4 diptest_0.76-0 scales_1.1.0
## [37] promises_1.1.0 DT_0.12 jsonlite_1.6.1 farver_2.0.3
## [41] mime_0.9 ids_1.0.1 multimode_1.5 digest_0.6.25
## [45] stringi_1.4.6 dplyr_1.0.2 shiny_1.4.0 grid_3.6.3
## [49] tools_3.6.3 magrittr_1.5 tibble_3.0.4 crayon_1.3.4
## [53] pkgconfig_2.0.3 ellipsis_0.3.1 assertthat_0.2.1 rmarkdown_2.1
## [57] Rhdf5lib_1.6.3 R6_2.4.1 mclust_5.4.7 compiler_3.6.3
Total time elapsed
end_time <- Sys.time()
diff_time <- end_time - start_time
time_message <- paste0("Elapsed Time: ",
round(diff_time, 3),
" ", units(diff_time))
print(time_message)
## [1] "Elapsed Time: 19.288 secs"
stm(time_message)
stm("HTO count processing complete.")