The present script takes the summary of the spaceRanger output from the Spatial Transcriptomics data generated with 10X VISIUM technology on a set of CRC samples.
We first load the libraries and set the paths to the raw data.
library(ggplot2)
library(vctrs)
library(patchwork, lib.loc = "/apps/rocs/2020.08/cascadelake/software/R/4.1.2-foss-2020a/lib64/R/library")
library(Seurat)
library(tidyverse)
library(kableExtra)
library(RColorBrewer)
source(file = "WrapperFunction/SeuratWrappers.R")
data_directory <-
params$data_directory
analysis_name <-
params$analysis_name
counts_directory <- "/outs"
dir.create(paste0(data_directory, analysis_name), showWarnings = FALSE)
# setwd(paste0(data_directory, analysis_name))
data_directory_counts <- paste0(data_directory, "Fastq_Merged")
We get the folders containing the spatial counts for the different samples.
datasets <-
list.dirs(path = data_directory_counts, recursive = FALSE, full.names = TRUE) %>%
str_subset("Count_") %>% paste0(counts_directory)
sample_names <-
list.dirs(path = data_directory_counts, recursive = FALSE, full.names = FALSE) %>%
str_subset("Count_") %>% str_replace("Count_", "")
We read the metrics summary for all the samples.
QC_global_df <-
get.global.QCmetrics(datasets, sample_names, metrics_name = "/metrics_summary.csv")
QC_global_df %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
| Sample ID | Number of Spots Under Tissue | Number of Reads | Mean Reads per Spot | Mean Reads Under Tissue per Spot | Fraction of Spots Under Tissue | Median Genes per Spot | Median UMI Counts per Spot | Valid Barcodes | Valid UMIs | Sequencing Saturation | Q30 Bases in Barcode | Q30 Bases in RNA Read | Q30 Bases in UMI | Reads Mapped to Genome | Reads Mapped Confidently to Genome | Reads Mapped Confidently to Intergenic Regions | Reads Mapped Confidently to Intronic Regions | Reads Mapped Confidently to Exonic Regions | Reads Mapped Confidently to Transcriptome | Reads Mapped Antisense to Gene | Fraction Reads in Spots Under Tissue | Total Genes Detected | samples_name |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Count_SN048_A121573_Rep1 | 2203 | 238571549 | 108293.94 | 60463.56 | 0.4413061 | 4264.0 | 11920.0 | 0.9575566 | 0.9973647 | 0.7276006 | 0.9371116 | 0.9099210 | 0.9377244 | 0.8557846 | 0.8250528 | 0.0292398 | 0.0235099 | 0.7723031 | 0.7529259 | 0.0079163 | 0.5889230 | 22977 | SN048_A121573_Rep1 |
| Count_SN048_A121573_Rep2 | 2385 | 218249797 | 91509.35 | 47076.25 | 0.4777644 | 3809.0 | 9460.0 | 0.9563191 | 0.9961037 | 0.6590206 | 0.9321118 | 0.9047080 | 0.9321161 | 0.7663808 | 0.7347149 | 0.0262561 | 0.0224679 | 0.6859909 | 0.6676575 | 0.0075557 | 0.5522567 | 22833 | SN048_A121573_Rep2 |
| Count_SN048_A416371_Rep1 | 2317 | 164220396 | 70876.30 | 38981.42 | 0.4641426 | 4116.0 | 9924.0 | 0.9485526 | 0.9924445 | 0.4967044 | 0.9285334 | 0.8982389 | 0.9288378 | 0.6709256 | 0.6413772 | 0.0202746 | 0.0217622 | 0.5993404 | 0.5804310 | 0.0053696 | 0.6113097 | 22192 | SN048_A416371_Rep1 |
| Count_SN048_A416371_Rep2 | 1803 | 135118970 | 74941.19 | 44431.33 | 0.3611779 | 4588.0 | 12685.0 | 0.9543406 | 0.9931894 | 0.4628681 | 0.9342732 | 0.9107446 | 0.9359586 | 0.6702179 | 0.6409008 | 0.0209851 | 0.0215138 | 0.5984019 | 0.5792172 | 0.0058035 | 0.6957941 | 22148 | SN048_A416371_Rep2 |
| Count_SN123_A551763_Rep1 | 691 | 84959596 | 122951.66 | 42749.08 | 0.1384215 | 4643.0 | 11756.0 | 0.9721363 | 0.9985743 | 0.5594079 | 0.9549964 | 0.9387691 | 0.9531112 | 0.7303343 | 0.6757109 | 0.0282179 | 0.0367193 | 0.6107737 | 0.5929185 | 0.0075300 | 0.4015557 | 20150 | SN123_A551763_Rep1 |
| Count_SN123_A595688_Rep1 | 1192 | 123561252 | 103658.77 | 55240.88 | 0.2387821 | 4388.0 | 12977.0 | 0.9692355 | 0.9975122 | 0.5869393 | 0.9548186 | 0.9302510 | 0.9537855 | 0.7504864 | 0.7180723 | 0.0290811 | 0.0222581 | 0.6667331 | 0.6473198 | 0.0088081 | 0.5747350 | 22687 | SN123_A595688_Rep1 |
| Count_SN123_A798015_Rep1 | 1685 | 195534214 | 116044.04 | 60988.81 | 0.3375401 | 2343.0 | 5522.0 | 0.9722068 | 0.9987664 | 0.7485725 | 0.9531217 | 0.9368612 | 0.9512725 | 0.6472138 | 0.5728710 | 0.0276017 | 0.0377490 | 0.5075203 | 0.4945108 | 0.0049185 | 0.5681802 | 22121 | SN123_A798015_Rep1 |
| SN123_A938797_Rep1 | 2128 | 113886618 | 53518.15 | 32244.26 | 0.4262821 | 3084.5 | 6598.0 | 0.9708266 | 0.9977994 | 0.5233447 | 0.9603618 | 0.9374743 | 0.9586598 | 0.6293995 | 0.5852414 | 0.0240003 | 0.0453451 | 0.5158960 | 0.4984374 | 0.0080705 | 0.6855957 | 22572 | SN123_A938797_Rep1_X |
| Count_SN124_A551763_Rep2 | 1219 | 471863476 | 387090.63 | 71290.96 | 0.2441907 | 1233.0 | 1828.0 | 0.9718199 | 0.9994796 | 0.9514726 | 0.9583634 | 0.9453707 | 0.9572916 | 0.7790948 | 0.7216550 | 0.0348214 | 0.0503531 | 0.6364805 | 0.6231167 | 0.0035308 | 0.2091223 | 18072 | SN124_A551763_Rep2 |
| Count_SN124_A595688_Rep2 | 387 | 72073556 | 186236.58 | 38019.12 | 0.0775240 | 4407.0 | 12319.0 | 0.9737471 | 0.9982102 | 0.4650953 | 0.9547990 | 0.9396376 | 0.9529286 | 0.5719089 | 0.5043402 | 0.0434788 | 0.0412854 | 0.4195760 | 0.4071708 | 0.0042920 | 0.3270215 | 19788 | SN124_A595688_Rep2 |
| Count_SN124_A798015_Rep21 | 1656 | 137192998 | 82846.01 | 41384.02 | 0.3317308 | 2691.5 | 7395.0 | 0.9727572 | 0.9980660 | 0.6111810 | 0.9540471 | 0.9382082 | 0.9529220 | 0.7247318 | 0.6560772 | 0.0274734 | 0.0360448 | 0.5925590 | 0.5787086 | 0.0048207 | 0.5444075 | 22523 | SN124_A798015_Rep2 |
| Count_SN124_A938797_Rep2 | 1691 | 168083674 | 99398.98 | 58377.63 | 0.3387420 | 5457.0 | 18113.0 | 0.9710085 | 0.9981355 | 0.5525068 | 0.9573377 | 0.9357632 | 0.9556994 | 0.8007195 | 0.7543746 | 0.0264059 | 0.0405447 | 0.6874239 | 0.6679099 | 0.0083701 | 0.6499086 | 23445 | SN124_A938797_Rep2 |
| Count_SN84_A120838_Rep1 | 328 | 54756730 | 166941.25 | 38074.14 | 0.0657051 | 3958.0 | 10228.5 | 0.9716287 | 0.9965244 | 0.5255282 | 0.9575546 | 0.9375129 | 0.9562052 | 0.7295707 | 0.6911641 | 0.0297442 | 0.0361460 | 0.6252738 | 0.6113695 | 0.0039466 | 0.2680933 | 18651 | SN84_A120838_Rep1 |
| Count_SN84_A120838_Rep2 | 1048 | 110841367 | 105764.66 | 35332.70 | 0.2099359 | 3348.0 | 7218.0 | 0.9717985 | 0.9976415 | 0.6533942 | 0.9557841 | 0.9343218 | 0.9544974 | 0.7509154 | 0.7027843 | 0.0264602 | 0.0269533 | 0.6493708 | 0.6343222 | 0.0047185 | 0.3488974 | 20269 | SN84_A120838_Rep2 |
get.barplot.qc(QC_global_df, column_to_plot = "Number of Spots Under Tissue")
get.barplot.qc(QC_global_df, column_to_plot = "Median Genes per Spot")
get.barplot.qc(QC_global_df, column_to_plot = "Number of Reads")
get.barplot.qc(QC_global_df, column_to_plot = "Reads Mapped Confidently to Transcriptome")
get.barplot.qc(QC_global_df, column_to_plot = "Reads Mapped Confidently to Genome")
get.barplot.qc(QC_global_df, column_to_plot = "Fraction of Spots Under Tissue")
get.barplot.qc(QC_global_df, column_to_plot = "Fraction Reads in Spots Under Tissue")
get.barplot.qc(QC_global_df, column_to_plot = "Median UMI Counts per Spot")
df_patient_ID_transform <- data.frame(
patient = c("A120838","A121573","A416371","A551763","A595688","A798015",
"A938797"),
patient_ID = c("S4_Col_Sig","S5_Rec","S3_Col_R","S1_Cec","S2_Col_R",
"S7_Rec/Sig","S6_Rec"))
QC_global_df$samples_name <-
stringr::str_replace(QC_global_df$samples_name, pattern= "SN123_A938797_Rep1_X", replacement = "SN123_A938797_Rep1")
QC_global_df <- QC_global_df %>%
dplyr::mutate(replicate = str_remove(.$samples_name, pattern = ".*_"))
QC_global_df$patient <-
stringr::str_match(QC_global_df$samples_name, "_\\s*(.*?)\\s*_")[,2]
QC_global_df <- inner_join(QC_global_df, df_patient_ID_transform)
QC_global_df$slide <- str_extract(QC_global_df$samples_name , "[^_]+")
QC_global_df$patient_ID_rep <- paste0(QC_global_df$patient_ID, "_", QC_global_df$replicate)
p1 <- ggplot(QC_global_df, aes(fill=slide, y=`Median Genes per Spot`, x=patient_ID_rep)) +
geom_bar(position="dodge", stat="identity") + theme_light() +
theme(legend.position = "top", legend.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12, hjust = 1, angle = 0),
axis.text.x = element_text(angle = 90, hjust = 0, size = 12, face = "bold"),
axis.text.y = element_text(angle = 0, hjust = 1, size = 12),
axis.title.y = element_text(size = 16, face = "bold")) +
xlab("") + ylab("Median Genes per Spot") + scale_fill_brewer(palette="Dark2")
p2 <- ggplot(QC_global_df, aes(fill=slide, y=`Median UMI Counts per Spot`, x=patient_ID_rep)) +
geom_bar(position="dodge", stat="identity") + theme_light() +
theme(legend.position = "top", legend.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12, hjust = 1, angle = 0),
axis.text.x = element_text(angle = 90, hjust = 0, size = 12, face = "bold"),
axis.text.y = element_text(angle = 0, hjust = 1, size = 12),
axis.title.y = element_text(size = 16, face = "bold")) +
xlab("") + ylab("Median UMI Counts per Spot") + scale_fill_brewer(palette="Dark2")
p1 + p2
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora 33 (Container Image)
##
## Matrix products: default
## BLAS/LAPACK: /apps/rocs/2020.08/cascadelake/software/OpenBLAS/0.3.9-GCC-9.3.0/lib/libopenblas_skylakexp-r0.3.9.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 kableExtra_1.3.4 forcats_0.5.1
## [4] stringr_1.4.0 dplyr_1.0.9 purrr_0.3.4
## [7] readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [10] tidyverse_1.3.1 sp_1.5-0 SeuratObject_4.1.0
## [13] Seurat_4.1.0 patchwork_1.1.1 vctrs_0.4.1
## [16] ggplot2_3.3.6 BiocManager_1.30.18
##
## loaded via a namespace (and not attached):
## [1] readxl_1.4.0 backports_1.4.1 systemfonts_1.0.4
## [4] plyr_1.8.7 igraph_1.3.2 lazyeval_0.2.2
## [7] splines_4.1.2 listenv_0.8.0 scattermore_0.8
## [10] digest_0.6.29 htmltools_0.5.2 fansi_1.0.3
## [13] magrittr_2.0.3 tensor_1.5 cluster_2.1.3
## [16] ROCR_1.0-11 tzdb_0.3.0 globals_0.15.0
## [19] modelr_0.1.8 matrixStats_0.62.0 vroom_1.5.7
## [22] svglite_2.1.0 spatstat.sparse_2.1-1 colorspace_2.1-0
## [25] rvest_1.0.2 ggrepel_0.9.1 haven_2.5.0
## [28] xfun_0.31 crayon_1.5.1 jsonlite_1.8.0
## [31] progressr_0.10.1 spatstat.data_2.2-0 survival_3.3-1
## [34] zoo_1.8-10 glue_1.6.2 polyclip_1.10-0
## [37] gtable_0.3.0 webshot_0.5.3 leiden_0.4.2
## [40] future.apply_1.9.0 abind_1.4-7 scales_1.2.0
## [43] DBI_1.1.3 spatstat.random_2.2-0 miniUI_0.1.1.1
## [46] Rcpp_1.0.8.3 viridisLite_0.4.0 xtable_1.8-6
## [49] reticulate_1.25 spatstat.core_2.4-4 bit_4.0.4
## [52] htmlwidgets_1.5.4 httr_1.4.3 ellipsis_0.3.2
## [55] ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3
## [58] sass_0.4.1 uwot_0.1.11 dbplyr_2.2.0
## [61] deldir_1.0-6 utf8_1.2.2 labeling_0.4.2
## [64] tidyselect_1.1.2 rlang_1.0.2 reshape2_1.4.4
## [67] later_1.3.0 munsell_0.5.0 cellranger_1.1.0
## [70] tools_4.1.2 cli_3.3.0 generics_0.1.2
## [73] broom_0.8.0 ggridges_0.5.3 evaluate_0.15
## [76] fastmap_1.1.0 yaml_2.3.5 goftest_1.2-3
## [79] bit64_4.0.5 knitr_1.39 fs_1.5.2
## [82] fitdistrplus_1.1-8 RANN_2.6.1 pbapply_1.5-0
## [85] future_1.26.1 nlme_3.1-158 mime_0.12
## [88] xml2_1.3.3 compiler_4.1.2 rstudioapi_0.13
## [91] plotly_4.10.0 png_0.1-7 spatstat.utils_2.3-1
## [94] reprex_2.0.1 bslib_0.3.1 stringi_1.7.6
## [97] highr_0.9 rgeos_0.5-10 lattice_0.20-45
## [100] Matrix_1.4-2 pillar_1.7.0 lifecycle_1.0.1
## [103] spatstat.geom_2.4-0 lmtest_0.9-40 jquerylib_0.1.4
## [106] RcppAnnoy_0.0.19 data.table_1.14.2 cowplot_1.1.1
## [109] irlba_2.3.5 httpuv_1.6.5 R6_2.5.1
## [112] promises_1.2.0.1 KernSmooth_2.23-20 gridExtra_2.3
## [115] parallelly_1.32.0 codetools_0.2-18 MASS_7.3-57
## [118] assertthat_0.2.1 withr_2.5.0 sctransform_0.3.3
## [121] mgcv_1.8-40 parallel_4.1.2 hms_1.1.1
## [124] grid_4.1.2 rpart_4.1.16 rmarkdown_2.14
## [127] Rtsne_0.16 shiny_1.7.1 lubridate_1.8.0