project_id <- "Nowakowski" # determines the name of the cache folder
doc_id <- "01" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out <- paste0("output/", doc_id); dir.create(out, recursive = TRUE)
figout <- paste0("figures/", doc_id, "/")
cache <- paste0("~/tmp/", project_id, "/", doc_id, "/")
Here I’m taking the data for the Nowakowski et al scRNAseq dataset of human fetal telencephalon and producing a Seurat object with the data thy’ve provided.
library(tidyverse)
library(cytobox)
library(glue)
library(data.table)
library(Seurat)
source("../../../sjessa/from_hydra/misc/sjlib.R")
We have metadata for all cells for which we have TPM.
data_tpm <- data.table::fread("../data_from_paper/exprMatrix.tsv.gz",
data.table = FALSE) %>%
tibble::column_to_rownames(var = "gene")
data_raw <- data.table::fread("../data_from_paper/geneMatrix.tsv", data.table = FALSE) %>%
tibble::column_to_rownames(var = "geneId")
meta <- data.table::fread("../data_from_paper/meta.tsv", data.table = FALSE) %>%
mutate(WGCNAcluster = ifelse(WGCNAcluster == "", "None", WGCNAcluster))
rownames(meta) <- meta$`_id`
meta <- meta[colnames(data_tpm), ]
tsne <- data.table::fread("../data_from_paper/tSNE_on_WGCNA.coords.tsv",
data.table = FALSE)
rownames(tsne) <- tsne$V1
colnames(tsne) <- c("Cell", "tSNE_1", "tSNE_2")
clusters <- read_csv("input/nowakowski2017_tableS4_cluster_labels_withcolours.csv")
## Parsed with column specification:
## cols(
## `Cluster Name` = col_character(),
## `Cluster Number (Fig 1B)` = col_double(),
## `Cluster Interpretation` = col_character(),
## Colour = col_character()
## )
clusters[48, ]$`Cluster Name` <- "None"
clusters[48, ]$`Cluster Interpretation` <- "No cluster provided"
clusters[48, ]$Colour <- "gray90"
Check our dimensions:
dim(data_tpm)
## [1] 56864 4261
dim(data_raw)
## [1] 32418 7137
dim(meta)
## [1] 4261 7
dim(tsne)
## [1] 4261 3
The data is provided as TPM so we will log-transform:
data_tpm_log <- log2(as.matrix(data_tpm) + 1)
# Populate object
nowa <- CreateSeuratObject(raw.data = data_raw[, meta$`_id`],
meta.data = meta,
project = "Nowakowski human fetal telencephalon")
# Add TPM as the normalized data
nowa@data <- as(data_tpm_log, "dgCMatrix")
# Add tSNE
nowa <- SetDimReduction(
object = nowa,
reduction.type = "tsne",
slot = "cell.embeddings",
new.data = tsne[, c("tSNE_1", "tSNE_2")] %>%
as.matrix())
nowa <- SetAllIdent(nowa, "WGCNAcluster")
# Add full cell type name to metadata
nowa@meta.data$Cell_type <- plyr::mapvalues(nowa@meta.data$WGCNAcluster,
from = clusters$`Cluster Name`,
to = clusters$`Cluster Interpretation`)
head(nowa@meta.data)
Check it works… nice!
tsne(nowa)
feature(nowa, "TTYH1", label = FALSE, legend = TRUE)
Set a palette – I chose some meaningful colours manually, here, but there are not as many colours as clusters:
nowa@misc$colours <- clusters %>%
select(`Cluster Name`, Colour) %>%
dfToNamedVector()
tsne(nowa, colours = getClusterColours(nowa), label = FALSE, legend = TRUE)
tsne(nowa, colours = nowa@misc$colours, label = FALSE, legend = TRUE)
save(nowa, file = glue("{out}/nowakowski_seurat.Rda"))
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /var/chroots/hydrars-centos-7/usr/lib64/R/lib/libRblas.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=C
## [4] LC_COLLATE=C LC_MONETARY=C LC_MESSAGES=C
## [7] LC_PAPER=C LC_NAME=C LC_ADDRESS=C
## [10] LC_TELEPHONE=C LC_MEASUREMENT=C LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] bindrcpp_0.2.2 Seurat_2.3.4 Matrix_1.2-14
## [4] cowplot_0.9.4 data.table_1.12.0 glue_1.3.0
## [7] cytobox_0.6.1 forcats_0.3.0 stringr_1.3.1
## [10] dplyr_0.7.7 purrr_0.2.5 readr_1.3.1
## [13] tidyr_0.8.2 tibble_1.4.2 ggplot2_3.1.0
## [16] tidyverse_1.2.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.2.0 snow_0.4-3 backports_1.1.3
## [4] Hmisc_4.2-0 plyr_1.8.4 igraph_1.2.2
## [7] lazyeval_0.2.1 splines_3.5.0 digest_0.6.16
## [10] foreach_1.4.4 htmltools_0.3.6 viridis_0.5.1
## [13] lars_1.2 gdata_2.18.0 magrittr_1.5
## [16] checkmate_1.9.1 cluster_2.0.7-1 mixtools_1.1.0
## [19] ROCR_1.0-7 modelr_0.1.3 R.utils_2.7.0
## [22] colorspace_1.4-0 rvest_0.3.2 haven_2.0.0
## [25] xfun_0.4 crayon_1.3.4 jsonlite_1.6
## [28] bindr_0.1.1 survival_2.41-3 zoo_1.8-4
## [31] iterators_1.0.10 ape_5.2 gtable_0.2.0
## [34] kernlab_0.9-27 prabclus_2.2-7 DEoptimR_1.0-8
## [37] scales_1.0.0 mvtnorm_1.0-10 bibtex_0.4.2
## [40] Rcpp_1.0.0 metap_1.1 dtw_1.20-1
## [43] viridisLite_0.3.0 htmlTable_1.13.1 reticulate_1.10
## [46] foreign_0.8-70 bit_1.1-14 proxy_0.4-22
## [49] mclust_5.4.2 SDMTools_1.1-221 Formula_1.2-3
## [52] stats4_3.5.0 tsne_0.1-3 htmlwidgets_1.3
## [55] httr_1.4.0 gplots_3.0.1.1 RColorBrewer_1.1-2
## [58] fpc_2.1-11.1 acepack_1.4.1 modeltools_0.2-22
## [61] ica_1.0-2 pkgconfig_2.0.2 R.methodsS3_1.7.1
## [64] flexmix_2.3-14 nnet_7.3-12 tidyselect_0.2.5
## [67] labeling_0.3 rlang_0.4.0 reshape2_1.4.3
## [70] munsell_0.5.0 cellranger_1.1.0 tools_3.5.0
## [73] cli_1.0.1 generics_0.0.2 broom_0.5.1
## [76] ggridges_0.5.1 evaluate_0.12 yaml_2.2.0
## [79] npsurv_0.4-0 knitr_1.21 bit64_0.9-7
## [82] fitdistrplus_1.0-14 robustbase_0.93-2 caTools_1.17.1.1
## [85] randomForest_4.6-14 RANN_2.6 pbapply_1.4-0
## [88] nlme_3.1-137 R.oo_1.22.0 xml2_1.2.0
## [91] hdf5r_1.0.0 compiler_3.5.0 rstudioapi_0.9.0
## [94] png_0.1-7 lsei_1.2-0 stringi_1.2.4
## [97] lattice_0.20-35 trimcluster_0.1-2.1 pillar_1.3.1
## [100] Rdpack_0.10-1 lmtest_0.9-36 bitops_1.0-6
## [103] irlba_2.3.3 gbRd_0.4-11 R6_2.3.0
## [106] latticeExtra_0.6-28 KernSmooth_2.23-15 gridExtra_2.3
## [109] codetools_0.2-15 MASS_7.3-49 gtools_3.8.1
## [112] assertthat_0.2.0 withr_2.1.2 diptest_0.75-7
## [115] parallel_3.5.0 doSNOW_1.0.16 hms_0.4.2
## [118] grid_3.5.0 rpart_4.1-13 class_7.3-14
## [121] rmarkdown_1.11 segmented_0.5-3.0 Rtsne_0.15
## [124] lubridate_1.7.4 base64enc_0.1-3