In this script, we generate pseudo-bulk of the external set of 10x VISIUM ST CRC samples extracted from the following publication:
Wu, Y. et al. Spatiotemporal Immune Landscape of Colorectal Cancer Liver Metastasis at Single-Cell Level. Cancer Discov. 12, 134–153 (2022).
and we use https://github.com/Lothelab/CMScaller to assign them to the different CMS.
library(ggplot2)
library(vctrs)
library(patchwork, lib.loc = "/apps/rocs/2020.08/cascadelake/software/R/4.1.2-foss-2020a/lib64/R/library")
library(Seurat)
library(scater)
library(dplyr)
library(readr)
library(stringr)
library(RColorBrewer)
library(cowplot)
library(Biobase)
library(CMScaller)
source(file = "../WrapperFunction/SeuratWrappers.R")
source(file = "../WrapperFunction/PseudoBulkUtils.R")
data_directory <- params$data_directory
analysis_name <- params$analysis_name
input_folder <- "ST_LiverMetastases_pub/IntermediaryFiles/"
input_names_1 <- "SeuratList_Clusters_Res05.rds"
files_to_read_1 <- paste0(data_directory, analysis_name,input_folder,input_names_1)
I read the seurat object
seurat_objects <- readRDS(files_to_read_1)
pseudobulk_profiles_all <- lapply(seurat_objects, get_sample_pseudo)
gex_all <- do.call(cbind, lapply(pseudobulk_profiles_all, function(x){
assay(x)}))
## We change the names for the plotting:
colnames(gex_all) <-
c("ST-colon1_Unt", "ST-colon2_Unt",
"ST-colon3_Tre", "ST-colon4_Tre",
"ST-liver1_Unt", "ST-liver2_Unt",
"ST-liver3_Tre", "ST-liver4_Tre")
We call CMScaller to see the results on the whole section of the samples.
gex_all_entrez <- replaceGeneId(gex_all, id.in = "symbol", id.out = "entrez")
gex_all_entrez <- gex_all_entrez[!(str_detect(rownames(gex_all_entrez), "^NA.")),]
res_all <- CMScaller(gex_all_entrez, RNAseq=TRUE, doPlot = FALSE)
##
## CMS1 CMS2 CMS3 CMS4 <NA>
## 1 3 1 2 1
res_df_all <- res_all %>% as.data.frame() %>% tibble::rownames_to_column(var = "sample") %>%
dplyr::select(sample, prediction)
res_df_all$sample <-
factor(res_df_all$sample ,
levels = sort(unique(res_df_all$sample), decreasing = TRUE))
res_df_all %>% ggplot(aes(prediction, sample, color = prediction)) +
geom_point(size = 8) +
scale_color_brewer(palette = "Paired") +
theme( # remove the vertical grid lines
axis.title = element_text(size=14, face = "bold", family="Arial"),
axis.text.x = element_text(size=10, face = "bold", family="Arial", angle = 90),
axis.text.y = element_text(size=10, face = "bold", family="Arial")) +
labs(x="Predicted CMS", y ="Sample") +
theme(legend.position ="none")
cam <- CMSgsa(emat=gex_all_entrez, class=res_df_all$prediction,RNAseq=TRUE)
res_df_all %>% dplyr::mutate(type = "CMS") %>%
ggplot(aes(type, sample , fill= prediction)) +
geom_tile(size =1.25, color ="black", width = 0.95) +
theme_bw() +
theme(axis.title = element_blank(),
axis.ticks = element_blank(), panel.border = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_text(size=12, face = "bold", family="Arial"),
legend.position ="right", legend.text = element_text(size=12, face="bold", family="Arial"),
legend.title = element_text(size=12, face="bold", family="Arial")) +
scale_fill_brewer(palette = "Set1") +
guides(fill = guide_legend(title = "Predicted CMS", override.aes = list(size = 10)))
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora 33 (Container Image)
##
## Matrix products: default
## BLAS/LAPACK: /apps/rocs/2020.08/cascadelake/software/OpenBLAS/0.3.9-GCC-9.3.0/lib/libopenblas_skylakexp-r0.3.9.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] CMScaller_2.0.1 cowplot_1.1.1
## [3] RColorBrewer_1.1-3 stringr_1.4.0
## [5] readr_2.1.2 dplyr_1.0.9
## [7] scater_1.22.0 scuttle_1.4.0
## [9] SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0
## [11] Biobase_2.54.0 GenomicRanges_1.46.1
## [13] GenomeInfoDb_1.30.1 IRanges_2.28.0
## [15] S4Vectors_0.32.4 BiocGenerics_0.40.0
## [17] MatrixGenerics_1.6.0 matrixStats_0.62.0
## [19] sp_1.5-0 SeuratObject_4.1.0
## [21] Seurat_4.1.0 patchwork_1.1.1
## [23] vctrs_0.4.1 ggplot2_3.3.6
## [25] BiocManager_1.30.18
##
## loaded via a namespace (and not attached):
## [1] plyr_1.8.7 igraph_1.3.2
## [3] lazyeval_0.2.2 splines_4.1.2
## [5] BiocParallel_1.28.3 listenv_0.8.0
## [7] scattermore_0.8 digest_0.6.29
## [9] htmltools_0.5.2 viridis_0.6.2
## [11] fansi_1.0.3 magrittr_2.0.3
## [13] ScaledMatrix_1.2.0 tensor_1.5
## [15] cluster_2.1.3 ROCR_1.0-11
## [17] limma_3.50.3 tzdb_0.3.0
## [19] globals_0.15.0 spatstat.sparse_2.1-1
## [21] colorspace_2.1-0 ggrepel_0.9.1
## [23] xfun_0.31 crayon_1.5.1
## [25] RCurl_1.98-1.7 jsonlite_1.8.0
## [27] progressr_0.10.1 spatstat.data_2.2-0
## [29] survival_3.3-1 zoo_1.8-10
## [31] glue_1.6.2 polyclip_1.10-0
## [33] gtable_0.3.0 zlibbioc_1.40.0
## [35] XVector_0.34.0 leiden_0.4.2
## [37] DelayedArray_0.20.0 BiocSingular_1.10.0
## [39] future.apply_1.9.0 abind_1.4-7
## [41] scales_1.2.0 DBI_1.1.3
## [43] spatstat.random_2.2-0 miniUI_0.1.1.1
## [45] Rcpp_1.0.8.3 viridisLite_0.4.0
## [47] xtable_1.8-6 reticulate_1.25
## [49] spatstat.core_2.4-4 rsvd_1.0.5
## [51] htmlwidgets_1.5.4 httr_1.4.3
## [53] ellipsis_0.3.2 ica_1.0-2
## [55] farver_2.1.0 pkgconfig_2.0.3
## [57] sass_0.4.1 uwot_0.1.11
## [59] deldir_1.0-6 utf8_1.2.2
## [61] tidyselect_1.1.2 rlang_1.0.2
## [63] reshape2_1.4.4 later_1.3.0
## [65] munsell_0.5.0 tools_4.1.2
## [67] cli_3.3.0 generics_0.1.2
## [69] ggridges_0.5.3 evaluate_0.15
## [71] fastmap_1.1.0 yaml_2.3.5
## [73] goftest_1.2-3 knitr_1.39
## [75] fitdistrplus_1.1-8 purrr_0.3.4
## [77] RANN_2.6.1 sparseMatrixStats_1.6.0
## [79] pbapply_1.5-0 future_1.26.1
## [81] nlme_3.1-158 mime_0.12
## [83] compiler_4.1.2 rstudioapi_0.13
## [85] beeswarm_0.4.0 plotly_4.10.0
## [87] png_0.1-7 spatstat.utils_2.3-1
## [89] tibble_3.1.7 bslib_0.3.1
## [91] stringi_1.7.6 highr_0.9
## [93] rgeos_0.5-10 lattice_0.20-45
## [95] Matrix_1.4-2 pillar_1.7.0
## [97] lifecycle_1.0.1 spatstat.geom_2.4-0
## [99] lmtest_0.9-40 jquerylib_0.1.4
## [101] BiocNeighbors_1.12.0 RcppAnnoy_0.0.19
## [103] data.table_1.14.2 bitops_1.0-7
## [105] irlba_2.3.5 httpuv_1.6.5
## [107] R6_2.5.1 promises_1.2.0.1
## [109] KernSmooth_2.23-20 gridExtra_2.3
## [111] vipor_0.4.5 parallelly_1.32.0
## [113] codetools_0.2-18 MASS_7.3-57
## [115] assertthat_0.2.1 withr_2.5.0
## [117] sctransform_0.3.3 GenomeInfoDbData_1.2.7
## [119] hms_1.1.1 mgcv_1.8-40
## [121] parallel_4.1.2 beachmat_2.10.0
## [123] grid_4.1.2 rpart_4.1.16
## [125] tidyr_1.2.0 DelayedMatrixStats_1.16.0
## [127] rmarkdown_2.14 Rtsne_0.16
## [129] shiny_1.7.1 ggbeeswarm_0.6.0