1 Introduction

The present script takes the summary of the spaceRanger output from the Spatial Transcriptomics data generated with 10X VISIUM technology on a set of CRC samples.

2 Getting Ready

We first load the libraries and set the paths to the raw data.

library(ggplot2)
library(vctrs)
library(patchwork, lib.loc = "/apps/rocs/2020.08/cascadelake/software/R/4.1.2-foss-2020a/lib64/R/library")
library(Seurat)
library(tidyverse)
library(kableExtra)
library(RColorBrewer)
source(file = "WrapperFunction/SeuratWrappers.R")

data_directory <- 
 params$data_directory
analysis_name <- 
  params$analysis_name
counts_directory <- "/outs"

dir.create(paste0(data_directory, analysis_name), showWarnings = FALSE)

# setwd(paste0(data_directory, analysis_name))
data_directory_counts <- paste0(data_directory, "Fastq_Merged")

We get the folders containing the spatial counts for the different samples.

datasets <- 
  list.dirs(path = data_directory_counts, recursive = FALSE, full.names = TRUE) %>% 
  str_subset("Count_") %>% paste0(counts_directory)
  
sample_names <- 
  list.dirs(path = data_directory_counts, recursive = FALSE, full.names = FALSE) %>% 
  str_subset("Count_") %>% str_replace("Count_", "")

We read the metrics summary for all the samples.

QC_global_df <- 
  get.global.QCmetrics(datasets, sample_names, metrics_name = "/metrics_summary.csv") 
QC_global_df %>% kbl() %>% kable_styling() %>% scroll_box(width = "100%")
Sample ID Number of Spots Under Tissue Number of Reads Mean Reads per Spot Mean Reads Under Tissue per Spot Fraction of Spots Under Tissue Median Genes per Spot Median UMI Counts per Spot Valid Barcodes Valid UMIs Sequencing Saturation Q30 Bases in Barcode Q30 Bases in RNA Read Q30 Bases in UMI Reads Mapped to Genome Reads Mapped Confidently to Genome Reads Mapped Confidently to Intergenic Regions Reads Mapped Confidently to Intronic Regions Reads Mapped Confidently to Exonic Regions Reads Mapped Confidently to Transcriptome Reads Mapped Antisense to Gene Fraction Reads in Spots Under Tissue Total Genes Detected samples_name
Count_SN048_A121573_Rep1 2203 238571549 108293.94 60463.56 0.4413061 4264.0 11920.0 0.9575566 0.9973647 0.7276006 0.9371116 0.9099210 0.9377244 0.8557846 0.8250528 0.0292398 0.0235099 0.7723031 0.7529259 0.0079163 0.5889230 22977 SN048_A121573_Rep1
Count_SN048_A121573_Rep2 2385 218249797 91509.35 47076.25 0.4777644 3809.0 9460.0 0.9563191 0.9961037 0.6590206 0.9321118 0.9047080 0.9321161 0.7663808 0.7347149 0.0262561 0.0224679 0.6859909 0.6676575 0.0075557 0.5522567 22833 SN048_A121573_Rep2
Count_SN048_A416371_Rep1 2317 164220396 70876.30 38981.42 0.4641426 4116.0 9924.0 0.9485526 0.9924445 0.4967044 0.9285334 0.8982389 0.9288378 0.6709256 0.6413772 0.0202746 0.0217622 0.5993404 0.5804310 0.0053696 0.6113097 22192 SN048_A416371_Rep1
Count_SN048_A416371_Rep2 1803 135118970 74941.19 44431.33 0.3611779 4588.0 12685.0 0.9543406 0.9931894 0.4628681 0.9342732 0.9107446 0.9359586 0.6702179 0.6409008 0.0209851 0.0215138 0.5984019 0.5792172 0.0058035 0.6957941 22148 SN048_A416371_Rep2
Count_SN123_A551763_Rep1 691 84959596 122951.66 42749.08 0.1384215 4643.0 11756.0 0.9721363 0.9985743 0.5594079 0.9549964 0.9387691 0.9531112 0.7303343 0.6757109 0.0282179 0.0367193 0.6107737 0.5929185 0.0075300 0.4015557 20150 SN123_A551763_Rep1
Count_SN123_A595688_Rep1 1192 123561252 103658.77 55240.88 0.2387821 4388.0 12977.0 0.9692355 0.9975122 0.5869393 0.9548186 0.9302510 0.9537855 0.7504864 0.7180723 0.0290811 0.0222581 0.6667331 0.6473198 0.0088081 0.5747350 22687 SN123_A595688_Rep1
Count_SN123_A798015_Rep1 1685 195534214 116044.04 60988.81 0.3375401 2343.0 5522.0 0.9722068 0.9987664 0.7485725 0.9531217 0.9368612 0.9512725 0.6472138 0.5728710 0.0276017 0.0377490 0.5075203 0.4945108 0.0049185 0.5681802 22121 SN123_A798015_Rep1
SN123_A938797_Rep1 2128 113886618 53518.15 32244.26 0.4262821 3084.5 6598.0 0.9708266 0.9977994 0.5233447 0.9603618 0.9374743 0.9586598 0.6293995 0.5852414 0.0240003 0.0453451 0.5158960 0.4984374 0.0080705 0.6855957 22572 SN123_A938797_Rep1_X
Count_SN124_A551763_Rep2 1219 471863476 387090.63 71290.96 0.2441907 1233.0 1828.0 0.9718199 0.9994796 0.9514726 0.9583634 0.9453707 0.9572916 0.7790948 0.7216550 0.0348214 0.0503531 0.6364805 0.6231167 0.0035308 0.2091223 18072 SN124_A551763_Rep2
Count_SN124_A595688_Rep2 387 72073556 186236.58 38019.12 0.0775240 4407.0 12319.0 0.9737471 0.9982102 0.4650953 0.9547990 0.9396376 0.9529286 0.5719089 0.5043402 0.0434788 0.0412854 0.4195760 0.4071708 0.0042920 0.3270215 19788 SN124_A595688_Rep2
Count_SN124_A798015_Rep21 1656 137192998 82846.01 41384.02 0.3317308 2691.5 7395.0 0.9727572 0.9980660 0.6111810 0.9540471 0.9382082 0.9529220 0.7247318 0.6560772 0.0274734 0.0360448 0.5925590 0.5787086 0.0048207 0.5444075 22523 SN124_A798015_Rep2
Count_SN124_A938797_Rep2 1691 168083674 99398.98 58377.63 0.3387420 5457.0 18113.0 0.9710085 0.9981355 0.5525068 0.9573377 0.9357632 0.9556994 0.8007195 0.7543746 0.0264059 0.0405447 0.6874239 0.6679099 0.0083701 0.6499086 23445 SN124_A938797_Rep2
Count_SN84_A120838_Rep1 328 54756730 166941.25 38074.14 0.0657051 3958.0 10228.5 0.9716287 0.9965244 0.5255282 0.9575546 0.9375129 0.9562052 0.7295707 0.6911641 0.0297442 0.0361460 0.6252738 0.6113695 0.0039466 0.2680933 18651 SN84_A120838_Rep1
Count_SN84_A120838_Rep2 1048 110841367 105764.66 35332.70 0.2099359 3348.0 7218.0 0.9717985 0.9976415 0.6533942 0.9557841 0.9343218 0.9544974 0.7509154 0.7027843 0.0264602 0.0269533 0.6493708 0.6343222 0.0047185 0.3488974 20269 SN84_A120838_Rep2

2.1 Number of Spots Under Tissue

get.barplot.qc(QC_global_df, column_to_plot = "Number of Spots Under Tissue")

2.2 Median Genes per Spot

get.barplot.qc(QC_global_df, column_to_plot = "Median Genes per Spot")

2.3 Number of Reads

get.barplot.qc(QC_global_df, column_to_plot = "Number of Reads")

2.4 Reads Mapped Confidently to Transcriptome

get.barplot.qc(QC_global_df, column_to_plot = "Reads Mapped Confidently to Transcriptome")

2.5 Reads Mapped Confidently to Genome

get.barplot.qc(QC_global_df, column_to_plot = "Reads Mapped Confidently to Genome")

2.6 Fraction of Spots Under Tissue

get.barplot.qc(QC_global_df, column_to_plot = "Fraction of Spots Under Tissue")

2.7 Fraction Reads in Spots Under Tissue

get.barplot.qc(QC_global_df, column_to_plot = "Fraction Reads in Spots Under Tissue")

2.8 Median UMI Counts per Spot

get.barplot.qc(QC_global_df, column_to_plot = "Median UMI Counts per Spot")

2.9 Improved Median Genes and UMI counts per Spot

df_patient_ID_transform <- data.frame(
  patient = c("A120838","A121573","A416371","A551763","A595688","A798015",
              "A938797"), 
  patient_ID = c("S4_Col_Sig","S5_Rec","S3_Col_R","S1_Cec","S2_Col_R",
                 "S7_Rec/Sig","S6_Rec"))

QC_global_df$samples_name <- 
  stringr::str_replace(QC_global_df$samples_name, pattern= "SN123_A938797_Rep1_X", replacement = "SN123_A938797_Rep1")

QC_global_df <- QC_global_df %>%
    dplyr::mutate(replicate = str_remove(.$samples_name, pattern = ".*_")) 
    
QC_global_df$patient <- 
  stringr::str_match(QC_global_df$samples_name, "_\\s*(.*?)\\s*_")[,2]

QC_global_df <-  inner_join(QC_global_df, df_patient_ID_transform)

QC_global_df$slide <- str_extract(QC_global_df$samples_name , "[^_]+")

QC_global_df$patient_ID_rep <- paste0(QC_global_df$patient_ID, "_", QC_global_df$replicate)
  


p1 <- ggplot(QC_global_df, aes(fill=slide, y=`Median Genes per Spot`, x=patient_ID_rep)) + 
    geom_bar(position="dodge", stat="identity") + theme_light() + 
    theme(legend.position = "top", legend.title = element_text(size = 14, face = "bold"),
          legend.text = element_text(size = 12, hjust = 1, angle = 0),
          axis.text.x = element_text(angle = 90, hjust = 0, size = 12, face = "bold"), 
          axis.text.y = element_text(angle = 0, hjust = 1, size = 12), 
          axis.title.y = element_text(size = 16, face = "bold")) + 
    xlab("") + ylab("Median Genes per Spot") + scale_fill_brewer(palette="Dark2")
p2 <- ggplot(QC_global_df, aes(fill=slide, y=`Median UMI Counts per Spot`, x=patient_ID_rep)) + 
    geom_bar(position="dodge", stat="identity") + theme_light() + 
    theme(legend.position = "top", legend.title = element_text(size = 14, face = "bold"),
          legend.text = element_text(size = 12, hjust = 1, angle = 0),
          axis.text.x = element_text(angle = 90, hjust = 0, size = 12, face = "bold"), 
          axis.text.y = element_text(angle = 0, hjust = 1, size = 12), 
          axis.title.y = element_text(size = 16, face = "bold")) + 
    xlab("") + ylab("Median UMI Counts per Spot") + scale_fill_brewer(palette="Dark2")
p1 + p2  

3 Conclusion

4 Session Info Details

## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora 33 (Container Image)
## 
## Matrix products: default
## BLAS/LAPACK: /apps/rocs/2020.08/cascadelake/software/OpenBLAS/0.3.9-GCC-9.3.0/lib/libopenblas_skylakexp-r0.3.9.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] RColorBrewer_1.1-3  kableExtra_1.3.4    forcats_0.5.1      
##  [4] stringr_1.4.0       dplyr_1.0.9         purrr_0.3.4        
##  [7] readr_2.1.2         tidyr_1.2.0         tibble_3.1.7       
## [10] tidyverse_1.3.1     sp_1.5-0            SeuratObject_4.1.0 
## [13] Seurat_4.1.0        patchwork_1.1.1     vctrs_0.4.1        
## [16] ggplot2_3.3.6       BiocManager_1.30.18
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.0          backports_1.4.1       systemfonts_1.0.4    
##   [4] plyr_1.8.7            igraph_1.3.2          lazyeval_0.2.2       
##   [7] splines_4.1.2         listenv_0.8.0         scattermore_0.8      
##  [10] digest_0.6.29         htmltools_0.5.2       fansi_1.0.3          
##  [13] magrittr_2.0.3        tensor_1.5            cluster_2.1.3        
##  [16] ROCR_1.0-11           tzdb_0.3.0            globals_0.15.0       
##  [19] modelr_0.1.8          matrixStats_0.62.0    vroom_1.5.7          
##  [22] svglite_2.1.0         spatstat.sparse_2.1-1 colorspace_2.1-0     
##  [25] rvest_1.0.2           ggrepel_0.9.1         haven_2.5.0          
##  [28] xfun_0.31             crayon_1.5.1          jsonlite_1.8.0       
##  [31] progressr_0.10.1      spatstat.data_2.2-0   survival_3.3-1       
##  [34] zoo_1.8-10            glue_1.6.2            polyclip_1.10-0      
##  [37] gtable_0.3.0          webshot_0.5.3         leiden_0.4.2         
##  [40] future.apply_1.9.0    abind_1.4-7           scales_1.2.0         
##  [43] DBI_1.1.3             spatstat.random_2.2-0 miniUI_0.1.1.1       
##  [46] Rcpp_1.0.8.3          viridisLite_0.4.0     xtable_1.8-6         
##  [49] reticulate_1.25       spatstat.core_2.4-4   bit_4.0.4            
##  [52] htmlwidgets_1.5.4     httr_1.4.3            ellipsis_0.3.2       
##  [55] ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3      
##  [58] sass_0.4.1            uwot_0.1.11           dbplyr_2.2.0         
##  [61] deldir_1.0-6          utf8_1.2.2            labeling_0.4.2       
##  [64] tidyselect_1.1.2      rlang_1.0.2           reshape2_1.4.4       
##  [67] later_1.3.0           munsell_0.5.0         cellranger_1.1.0     
##  [70] tools_4.1.2           cli_3.3.0             generics_0.1.2       
##  [73] broom_0.8.0           ggridges_0.5.3        evaluate_0.15        
##  [76] fastmap_1.1.0         yaml_2.3.5            goftest_1.2-3        
##  [79] bit64_4.0.5           knitr_1.39            fs_1.5.2             
##  [82] fitdistrplus_1.1-8    RANN_2.6.1            pbapply_1.5-0        
##  [85] future_1.26.1         nlme_3.1-158          mime_0.12            
##  [88] xml2_1.3.3            compiler_4.1.2        rstudioapi_0.13      
##  [91] plotly_4.10.0         png_0.1-7             spatstat.utils_2.3-1 
##  [94] reprex_2.0.1          bslib_0.3.1           stringi_1.7.6        
##  [97] highr_0.9             rgeos_0.5-10          lattice_0.20-45      
## [100] Matrix_1.4-2          pillar_1.7.0          lifecycle_1.0.1      
## [103] spatstat.geom_2.4-0   lmtest_0.9-40         jquerylib_0.1.4      
## [106] RcppAnnoy_0.0.19      data.table_1.14.2     cowplot_1.1.1        
## [109] irlba_2.3.5           httpuv_1.6.5          R6_2.5.1             
## [112] promises_1.2.0.1      KernSmooth_2.23-20    gridExtra_2.3        
## [115] parallelly_1.32.0     codetools_0.2-18      MASS_7.3-57          
## [118] assertthat_0.2.1      withr_2.5.0           sctransform_0.3.3    
## [121] mgcv_1.8-40           parallel_4.1.2        hms_1.1.1            
## [124] grid_4.1.2            rpart_4.1.16          rmarkdown_2.14       
## [127] Rtsne_0.16            shiny_1.7.1           lubridate_1.8.0