library(ggplot2)
library(vctrs)
library(patchwork, lib.loc = "/apps/rocs/2020.08/cascadelake/software/R/4.1.2-foss-2020a/lib64/R/library")
library(Seurat)
library(dplyr)
library(readr)
library(stringr)
library(purrr)
library(cowplot)
library(ggpubr)
library(kableExtra)
data_directory <- params$data_directory
analysis_name <- params$analysis_name
input_folder <- "IntermediaryFiles/"
input_names_1 <- "SeuratList_Clusters_Res05.rds"
input_names_2 <- "Patho_Annotations/"
files_to_read_1 <- paste0(data_directory, analysis_name,input_folder,input_names_1)
files_to_read_2 <- paste0(data_directory, analysis_name,input_folder,input_names_2)
df_patient_ID_transform <- data.frame(
patient = c("A120838","A121573","A416371","A551763","A595688","A798015",
"A938797"),
patient_ID = c("S4_Col_Sig","S5_Rec","S3_Col_R","S1_Cec","S2_Col_R",
"S7_Rec/Sig","S6_Rec"))
## We consider all the samples but the one with low QC results
samples_to_consider <- c("SN048_A121573_Rep1","SN048_A121573_Rep2",
"SN048_A416371_Rep1", "SN048_A416371_Rep2", "SN123_A551763_Rep1",
"SN123_A595688_Rep1", "SN123_A798015_Rep1", "SN123_A938797_Rep1_X",
"SN124_A595688_Rep2", "SN124_A798015_Rep2",
"SN124_A938797_Rep2", "SN84_A120838_Rep1", "SN84_A120838_Rep2" )
We include pathologist annotations into Seurat objects
seurat_objects <- readRDS(files_to_read_1)
seurat_objects <- seurat_objects[samples_to_consider]
seurat_objects_patho <-
lapply(seurat_objects, function(x){
current_sample <- unique(x@meta.data$orig.ident)
patho_anno_current_sample <-
read_csv(file = paste0(files_to_read_2, "Pathologist_Annotations_",
current_sample, ".csv"))
colnames(patho_anno_current_sample) <- c("Barcode", "Pathologist_Annotations")
x@meta.data <- x@meta.data %>%
tibble::rownames_to_column("spot_id") %>%
dplyr::left_join(patho_anno_current_sample, by = c("spot_id" = "Barcode")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"tumor&stroma IC med to high", "tumor&stroma_IC med to high")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"epitehlium&submucosa", "epithelium&submucosa")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"IC aggregate submucosa", "IC aggregate_submucosa")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"IC aggregregate_submucosa", "IC aggregate_submucosa")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"IC aggreates_stroma or muscularis", "IC aggregate_stroma or muscularis")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"IC aggragate_stroma or muscularis", "IC aggregate_stroma or muscularis")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"IC aggreates_stroma or muscularis", "IC aggregate_stroma or muscularis")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"IC aggregate_muscularis or stroma", "IC aggregate_stroma or muscularis")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"stroma desmoplastic_IC low", "stroma_desmoplastic_IC low")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"stroma desmoplastic_IC med to high", "stroma_desmoplastic_IC med to high")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"stroma_fibroblastic_IC high", "stroma_fibroblastic_IC_high")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"stroma_fibroblastic_IC med", "stroma_fibroblastic_IC_med")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"stroma_fibroblastic_IC_med", "stroma_fibroblastic_IC med to high")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"stroma_fibroblastic_IC_high", "stroma_fibroblastic_IC med to high")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"submucosa", "lamina propria")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"epithelium&submucosa", "epithelium&lam propria")) %>%
dplyr::mutate(Pathologist_Annotations = str_replace(Pathologist_Annotations,
"IC aggregate_submucosa", "IC aggregate_lam propria")) %>%
tibble::column_to_rownames("spot_id")
saveRDS(object = x@meta.data %>%
tibble::rownames_to_column("spot_id"),
file = paste0(files_to_read_2, "Pathologist_Annotations_",
current_sample, ".rds"))
Idents(x) <- 'Pathologist_Annotations'
return(x)
})
We visualize the global number of spots with the different annotations
all_annotations <-
unlist(lapply(seurat_objects_patho, function(x){x@meta.data$Pathologist_Annotations}))
table(all_annotations)
## all_annotations
## connective tissue_1_edema connective tissue_2_fibroblastic_IC low
## 454 806
## connective tissue_3_fibroblastic_IC med connective tissue_4_muscularis_IC low
## 763 195
## connective tissue_6_hemosiderin? epithelium&lam propria
## 119 231
## epithelium&lamina propria exclude
## 1876 1185
## glandular tissue IC aggreagate_connective tissue
## 61 66
## IC aggregate connective tissue IC aggregate_lamina propria
## 43 130
## IC aggregate_stroma or muscularis lamina propria
## 42 674
## muscularis_IC med to high non neo epithelium
## 87 885
## squamous epithelium stroma_desmoplastic_IC low
## 19 222
## stroma_desmoplastic_IC med to high stroma_fibroblastic_IC low
## 165 102
## stroma_fibroblastic_IC med to high tumor
## 4682 3329
## tumor&stroma tumor&stroma_IC low
## 645 71
## tumor&stroma_IC med to high
## 2586
list_all_annotations <-
lapply(seurat_objects_patho, function(x){unique(x@meta.data$Pathologist_Annotations)})
table(unlist(list_all_annotations))
##
## connective tissue_1_edema connective tissue_2_fibroblastic_IC low
## 2 2
## connective tissue_3_fibroblastic_IC med connective tissue_4_muscularis_IC low
## 2 2
## connective tissue_6_hemosiderin? epithelium&lam propria
## 2 2
## epithelium&lamina propria exclude
## 6 11
## glandular tissue IC aggreagate_connective tissue
## 2 1
## IC aggregate connective tissue IC aggregate_lamina propria
## 1 6
## IC aggregate_stroma or muscularis lamina propria
## 5 7
## muscularis_IC med to high non neo epithelium
## 4 8
## squamous epithelium stroma_desmoplastic_IC low
## 2 2
## stroma_desmoplastic_IC med to high stroma_fibroblastic_IC low
## 2 2
## stroma_fibroblastic_IC med to high tumor
## 11 11
## tumor&stroma tumor&stroma_IC low
## 1 2
## tumor&stroma_IC med to high
## 10
selected_annotations <- names(which(table(unlist(list_all_annotations)) > 2))
selected_annotations <- selected_annotations[selected_annotations != "exclude"]
seurat_objects_patho_merge <-
merge(x= seurat_objects_patho[[1]],y=seurat_objects_patho[-1])
Then, we read cell2location results, to explore the cell type content/abundance of this tumor annotated spots
filename_results_C2L_Korean <- paste0(data_directory, analysis_name, "Cell2Location/results/LocationModelLinearDependentWMultiExperiment_14experiments_36clusters_20654locations_4188genes/W_cell_density_q05.csv")
results_C2L_Korean <- read_csv(filename_results_C2L_Korean) %>%
dplyr::mutate(spot_id = str_remove(.$spot_id, pattern = "Count_"))
colnames(results_C2L_Korean) <-
str_replace(colnames(results_C2L_Korean),
pattern = "q05_spot_factors", replacement = "")
results_C2L_Korean <- results_C2L_Korean %>%
dplyr::mutate(sample = str_extract(.$spot_id, pattern =".*Rep[1-2]")) %>%
dplyr::mutate(spot_id = str_remove(.$spot_id, pattern =".*Rep[1-2]_")) %>%
dplyr::mutate(spot_id = str_remove(spot_id, pattern = "^X_")) %>%
tidyr::pivot_longer(!c(spot_id,sample), names_to = "Cell_subtype", values_to = "cell_density_q05")
### To obtain the proportions at the major cell type level, we read the metadata
## from the lee Paper and assing the different cell subtypes to its broader category.
## We define the different cell types:
metadata_Lee_paper <-
read_tsv(file = "../Cell2Location/inputs/scRNAseq-ref/raw/metadata.tsv")
## Global results
epithelial_cells <- metadata_Lee_paper %>%
dplyr::filter(Cell_type=="Epithelial cells")
tumor_epithelial_cells <- epithelial_cells %>%
dplyr::filter(Cell_subtype %in% c("CMS1","CMS2","CMS3","CMS4")) %>%
dplyr::select(Cell_type,Cell_subtype ) %>% distinct()
tumor_epithelial_cells$Cell_type <- "Tumor Cells"
normal_epithelial_cells <- epithelial_cells %>%
dplyr::filter(!(Cell_subtype %in% c("CMS1","CMS2","CMS3","CMS4"))) %>%
dplyr::select(Cell_type,Cell_subtype ) %>% dplyr::distinct()
stromal_cells <- metadata_Lee_paper %>%
dplyr::filter(Cell_type=="Stromal cells") %>%
dplyr::select(Cell_type,Cell_subtype ) %>% dplyr::distinct()
myeloid_cells <- metadata_Lee_paper %>%
dplyr::filter(Cell_type=="Myeloids") %>%
dplyr::select(Cell_type,Cell_subtype ) %>% distinct()
T_cells <- metadata_Lee_paper %>%
dplyr::filter(Cell_type=="T cells") %>%
dplyr::select(Cell_type,Cell_subtype ) %>% distinct()
B_cells <- metadata_Lee_paper %>%
dplyr::filter(Cell_type=="B cells") %>%
dplyr::select(Cell_type,Cell_subtype ) %>% distinct()
Mast_cells <- metadata_Lee_paper %>%
dplyr::filter(Cell_type=="Mast cells") %>%
dplyr::select(Cell_type,Cell_subtype ) %>% distinct()
MainCellTypes_labels <- rbind(tumor_epithelial_cells,
normal_epithelial_cells,
stromal_cells,
myeloid_cells,
T_cells, B_cells, Mast_cells) %>%
dplyr::filter(Cell_subtype != "Unknown")
results_C2L_Korean_MajorCells <- results_C2L_Korean %>%
dplyr::inner_join(MainCellTypes_labels, by = "Cell_subtype")
spots_annotations <- seurat_objects_patho_merge@meta.data %>%
dplyr::select(orig.ident, Pathologist_Annotations) %>%
tibble::rownames_to_column(var = "spot_id") %>%
dplyr::mutate(spot_id = str_remove(spot_id, "_[0-9]+")) %>%
dplyr::mutate(orig.ident = str_replace(orig.ident, "SN123_A938797_Rep1_X", "SN123_A938797_Rep1")) %>%
dplyr::filter(!is.na(Pathologist_Annotations)) %>%
dplyr::filter(Pathologist_Annotations %in% selected_annotations)
df_C2L_Proportions_annotatedspots <- results_C2L_Korean_MajorCells %>%
dplyr::inner_join(spots_annotations, by = c("spot_id" = "spot_id", "sample" = "orig.ident")) %>%
dplyr::select(spot_id, sample, cell_density_q05, Cell_type, Pathologist_Annotations) %>%
dplyr::mutate(spot_sample_id = paste0(sample, "_", spot_id)) %>%
dplyr::group_by(Cell_type, Pathologist_Annotations) %>%
summarise(Total = sum(cell_density_q05))
# dplyr::mutate(Proportions_perSpot = cell_density_q05/sum(cell_density_q05)) %>%
# dplyr::ungroup()
df_patient_ID_transform <- data.frame(
patient = c("A120838","A121573","A416371","A551763","A595688","A798015",
"A938797"),
patient_ID = c("S4_Col_Sig","S5_Rec","S3_Col_R","S1_Cec","S2_Col_R",
"S7_Rec/Sig","S6_Rec"))
df_C2L_Proportions_annotatedspots$Pathologist_Annotations <-
factor(df_C2L_Proportions_annotatedspots$Pathologist_Annotations ,
levels = rev(c("non neo epithelium",
"lamina propria",
"epithelium&lamina propria",
"muscularis_IC med to high",
"tumor",
"tumor&stroma_IC med to high",
"stroma_fibroblastic_IC med to high",
"IC aggregate_lamina propria",
"IC aggregate_stroma or muscularis")))
p1 <- ggplot(df_C2L_Proportions_annotatedspots,aes(x = Pathologist_Annotations , y= Total, fill = Cell_type)) +
geom_bar(stat="identity", position = "fill", width = 0.75, col ="Black") +
coord_flip() +
theme_minimal() +
scale_fill_brewer(palette = "Set1") +
theme( # remove the vertical grid lines
panel.grid.major = element_blank(),
axis.title = element_text(size=14, face = "bold", family="Arial"),
axis.text = element_text(size=12, face = "bold", family="Arial"),
plot.title = element_text(face="bold", size=18, hjust = 0.5)) +
labs(y="Major Cell \n\ Type Proportions", x ="Sample") +
theme(legend.position ="bottom", legend.title = element_blank(),
axis.title.y = element_blank(),
legend.text = element_text(size=12, face="bold", family="Arial")) +
scale_y_continuous(position = "right")
df_C2L_Proportions_annotatedspots_proportions <-
df_C2L_Proportions_annotatedspots %>%
dplyr::group_by(Pathologist_Annotations) %>%
dplyr::mutate(Proportions = Total / sum(Total)) %>%
dplyr::ungroup()
df_C2L_Proportions_annotatedspots_proportions %>%
dplyr::select(Pathologist_Annotations, Cell_type, Proportions) %>%
dplyr::arrange(desc(Pathologist_Annotations), desc(Proportions)) %>%
kbl() %>% kable_styling()
| Pathologist_Annotations | Cell_type | Proportions |
|---|---|---|
| non neo epithelium | Epithelial cells | 0.8959178 |
| non neo epithelium | Tumor Cells | 0.0483514 |
| non neo epithelium | T cells | 0.0211536 |
| non neo epithelium | B cells | 0.0159131 |
| non neo epithelium | Stromal cells | 0.0098541 |
| non neo epithelium | Myeloids | 0.0061617 |
| non neo epithelium | Mast cells | 0.0026483 |
| lamina propria | Epithelial cells | 0.7035515 |
| lamina propria | B cells | 0.0775026 |
| lamina propria | Tumor Cells | 0.0749892 |
| lamina propria | T cells | 0.0542044 |
| lamina propria | Stromal cells | 0.0540047 |
| lamina propria | Myeloids | 0.0293740 |
| lamina propria | Mast cells | 0.0063736 |
| epithelium&lamina propria | Epithelial cells | 0.8447941 |
| epithelium&lamina propria | Tumor Cells | 0.0647482 |
| epithelium&lamina propria | B cells | 0.0315031 |
| epithelium&lamina propria | T cells | 0.0244199 |
| epithelium&lamina propria | Stromal cells | 0.0226263 |
| epithelium&lamina propria | Myeloids | 0.0081363 |
| epithelium&lamina propria | Mast cells | 0.0037721 |
| muscularis_IC med to high | Stromal cells | 0.5837082 |
| muscularis_IC med to high | Epithelial cells | 0.1460257 |
| muscularis_IC med to high | T cells | 0.1254904 |
| muscularis_IC med to high | B cells | 0.0701334 |
| muscularis_IC med to high | Myeloids | 0.0310070 |
| muscularis_IC med to high | Tumor Cells | 0.0229361 |
| muscularis_IC med to high | Mast cells | 0.0206992 |
| tumor | Tumor Cells | 0.3553592 |
| tumor | T cells | 0.2565217 |
| tumor | B cells | 0.2480366 |
| tumor | Epithelial cells | 0.0621975 |
| tumor | Myeloids | 0.0468194 |
| tumor | Stromal cells | 0.0299568 |
| tumor | Mast cells | 0.0011088 |
| tumor&stroma_IC med to high | Tumor Cells | 0.3328156 |
| tumor&stroma_IC med to high | T cells | 0.2334725 |
| tumor&stroma_IC med to high | B cells | 0.2010243 |
| tumor&stroma_IC med to high | Stromal cells | 0.1030106 |
| tumor&stroma_IC med to high | Epithelial cells | 0.0682988 |
| tumor&stroma_IC med to high | Myeloids | 0.0564004 |
| tumor&stroma_IC med to high | Mast cells | 0.0049778 |
| stroma_fibroblastic_IC med to high | Stromal cells | 0.2893739 |
| stroma_fibroblastic_IC med to high | T cells | 0.2480404 |
| stroma_fibroblastic_IC med to high | Tumor Cells | 0.1723412 |
| stroma_fibroblastic_IC med to high | B cells | 0.1470584 |
| stroma_fibroblastic_IC med to high | Myeloids | 0.0854114 |
| stroma_fibroblastic_IC med to high | Epithelial cells | 0.0367632 |
| stroma_fibroblastic_IC med to high | Mast cells | 0.0210116 |
| IC aggregate_lamina propria | B cells | 0.4356801 |
| IC aggregate_lamina propria | T cells | 0.3911308 |
| IC aggregate_lamina propria | Epithelial cells | 0.0645397 |
| IC aggregate_lamina propria | Stromal cells | 0.0412531 |
| IC aggregate_lamina propria | Tumor Cells | 0.0319510 |
| IC aggregate_lamina propria | Myeloids | 0.0308461 |
| IC aggregate_lamina propria | Mast cells | 0.0045992 |
| IC aggregate_stroma or muscularis | B cells | 0.3477493 |
| IC aggregate_stroma or muscularis | T cells | 0.3346706 |
| IC aggregate_stroma or muscularis | Stromal cells | 0.1644068 |
| IC aggregate_stroma or muscularis | Myeloids | 0.0594964 |
| IC aggregate_stroma or muscularis | Tumor Cells | 0.0500007 |
| IC aggregate_stroma or muscularis | Epithelial cells | 0.0292950 |
| IC aggregate_stroma or muscularis | Mast cells | 0.0143811 |
p1
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Fedora 33 (Container Image)
##
## Matrix products: default
## BLAS/LAPACK: /apps/rocs/2020.08/cascadelake/software/OpenBLAS/0.3.9-GCC-9.3.0/lib/libopenblas_skylakexp-r0.3.9.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.3.4 ggpubr_0.4.0 cowplot_1.1.1
## [4] purrr_0.3.4 stringr_1.4.0 readr_2.1.2
## [7] dplyr_1.0.9 sp_1.5-0 SeuratObject_4.1.0
## [10] Seurat_4.1.0 patchwork_1.1.1 vctrs_0.4.1
## [13] ggplot2_3.3.6 BiocManager_1.30.18
##
## loaded via a namespace (and not attached):
## [1] backports_1.4.1 systemfonts_1.0.4 plyr_1.8.7
## [4] igraph_1.3.2 lazyeval_0.2.2 splines_4.1.2
## [7] listenv_0.8.0 scattermore_0.8 digest_0.6.29
## [10] htmltools_0.5.2 fansi_1.0.3 magrittr_2.0.3
## [13] tensor_1.5 cluster_2.1.3 ROCR_1.0-11
## [16] tzdb_0.3.0 globals_0.15.0 matrixStats_0.62.0
## [19] vroom_1.5.7 svglite_2.1.0 spatstat.sparse_2.1-1
## [22] colorspace_2.1-0 rvest_1.0.2 ggrepel_0.9.1
## [25] xfun_0.31 crayon_1.5.1 jsonlite_1.8.0
## [28] progressr_0.10.1 spatstat.data_2.2-0 survival_3.3-1
## [31] zoo_1.8-10 glue_1.6.2 polyclip_1.10-0
## [34] gtable_0.3.0 webshot_0.5.3 leiden_0.4.2
## [37] car_3.1-0 future.apply_1.9.0 abind_1.4-7
## [40] scales_1.2.0 DBI_1.1.3 rstatix_0.7.0
## [43] spatstat.random_2.2-0 miniUI_0.1.1.1 Rcpp_1.0.8.3
## [46] viridisLite_0.4.0 xtable_1.8-6 reticulate_1.25
## [49] spatstat.core_2.4-4 bit_4.0.4 htmlwidgets_1.5.4
## [52] httr_1.4.3 RColorBrewer_1.1-3 ellipsis_0.3.2
## [55] ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3
## [58] sass_0.4.1 uwot_0.1.11 deldir_1.0-6
## [61] utf8_1.2.2 labeling_0.4.2 tidyselect_1.1.2
## [64] rlang_1.0.2 reshape2_1.4.4 later_1.3.0
## [67] munsell_0.5.0 tools_4.1.2 cli_3.3.0
## [70] generics_0.1.2 broom_0.8.0 ggridges_0.5.3
## [73] evaluate_0.15 fastmap_1.1.0 yaml_2.3.5
## [76] goftest_1.2-3 knitr_1.39 bit64_4.0.5
## [79] fitdistrplus_1.1-8 RANN_2.6.1 pbapply_1.5-0
## [82] future_1.26.1 nlme_3.1-158 mime_0.12
## [85] xml2_1.3.3 compiler_4.1.2 rstudioapi_0.13
## [88] plotly_4.10.0 png_0.1-7 ggsignif_0.6.3
## [91] spatstat.utils_2.3-1 tibble_3.1.7 bslib_0.3.1
## [94] stringi_1.7.6 highr_0.9 rgeos_0.5-10
## [97] lattice_0.20-45 Matrix_1.4-2 pillar_1.7.0
## [100] lifecycle_1.0.1 spatstat.geom_2.4-0 lmtest_0.9-40
## [103] jquerylib_0.1.4 RcppAnnoy_0.0.19 data.table_1.14.2
## [106] irlba_2.3.5 httpuv_1.6.5 R6_2.5.1
## [109] promises_1.2.0.1 KernSmooth_2.23-20 gridExtra_2.3
## [112] parallelly_1.32.0 codetools_0.2-18 MASS_7.3-57
## [115] assertthat_0.2.1 withr_2.5.0 sctransform_0.3.3
## [118] mgcv_1.8-40 parallel_4.1.2 hms_1.1.1
## [121] grid_4.1.2 rpart_4.1.16 tidyr_1.2.0
## [124] rmarkdown_2.14 carData_3.0-5 Rtsne_0.16
## [127] shiny_1.7.1