project_id <- "Nowakowski" # determines the name of the cache folder
doc_id     <- "01" # determines name of the subfolder of `figures` where pngs/pdfs are saved
out        <- paste0("output/", doc_id); dir.create(out, recursive = TRUE)
figout     <- paste0("figures/", doc_id, "/")
cache      <- paste0("~/tmp/", project_id, "/", doc_id, "/")

1 Overview

Here I’m taking the data for the Nowakowski et al scRNAseq dataset of human fetal telencephalon and producing a Seurat object with the data thy’ve provided.

2 Set up

library(tidyverse)
library(cytobox)
library(glue)
library(data.table)
library(Seurat)

source("../../../sjessa/from_hydra/misc/sjlib.R")

3 Load data

We have metadata for all cells for which we have TPM.

data_tpm <- data.table::fread("../data_from_paper/exprMatrix.tsv.gz",
                              data.table = FALSE) %>% 
  tibble::column_to_rownames(var = "gene")

data_raw <- data.table::fread("../data_from_paper/geneMatrix.tsv", data.table = FALSE) %>% 
  tibble::column_to_rownames(var = "geneId")

meta <- data.table::fread("../data_from_paper/meta.tsv", data.table = FALSE) %>% 
  mutate(WGCNAcluster = ifelse(WGCNAcluster == "", "None", WGCNAcluster))

rownames(meta) <- meta$`_id`
meta <- meta[colnames(data_tpm), ]

tsne <- data.table::fread("../data_from_paper/tSNE_on_WGCNA.coords.tsv",
                          data.table = FALSE)
rownames(tsne) <- tsne$V1
colnames(tsne) <- c("Cell", "tSNE_1", "tSNE_2")

clusters <- read_csv("input/nowakowski2017_tableS4_cluster_labels_withcolours.csv")
## Parsed with column specification:
## cols(
##   `Cluster Name` = col_character(),
##   `Cluster Number (Fig 1B)` = col_double(),
##   `Cluster Interpretation` = col_character(),
##   Colour = col_character()
## )
clusters[48, ]$`Cluster Name` <- "None"
clusters[48, ]$`Cluster Interpretation` <- "No cluster provided"
clusters[48, ]$Colour <- "gray90"

Check our dimensions:

dim(data_tpm)
## [1] 56864  4261
dim(data_raw)
## [1] 32418  7137
dim(meta)
## [1] 4261    7
dim(tsne)
## [1] 4261    3

The data is provided as TPM so we will log-transform:

data_tpm_log <- log2(as.matrix(data_tpm) + 1)

4 Make Seurat object

# Populate object
nowa <- CreateSeuratObject(raw.data = data_raw[, meta$`_id`],
                           meta.data = meta,
                           project = "Nowakowski human fetal telencephalon")

# Add TPM as the normalized data
nowa@data <- as(data_tpm_log, "dgCMatrix")

# Add tSNE
nowa <- SetDimReduction(
  object = nowa,
  reduction.type = "tsne",
  slot = "cell.embeddings",
  new.data = tsne[, c("tSNE_1", "tSNE_2")] %>%
    as.matrix())

nowa <- SetAllIdent(nowa, "WGCNAcluster")

# Add full cell type name to metadata
nowa@meta.data$Cell_type <- plyr::mapvalues(nowa@meta.data$WGCNAcluster,
                                            from = clusters$`Cluster Name`,
                                            to = clusters$`Cluster Interpretation`)

head(nowa@meta.data)

Check it works… nice!

tsne(nowa)

feature(nowa, "TTYH1", label = FALSE, legend = TRUE)

Set a palette – I chose some meaningful colours manually, here, but there are not as many colours as clusters:

nowa@misc$colours <- clusters %>%
  select(`Cluster Name`, Colour) %>%
  dfToNamedVector()

tsne(nowa, colours = getClusterColours(nowa), label = FALSE, legend = TRUE)

tsne(nowa, colours = nowa@misc$colours, label = FALSE, legend = TRUE)

5 Save

save(nowa, file = glue("{out}/nowakowski_seurat.Rda"))

6 Session info

sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
## 
## Matrix products: default
## BLAS/LAPACK: /var/chroots/hydrars-centos-7/usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C         LC_TIME=C           
##  [4] LC_COLLATE=C         LC_MONETARY=C        LC_MESSAGES=C       
##  [7] LC_PAPER=C           LC_NAME=C            LC_ADDRESS=C        
## [10] LC_TELEPHONE=C       LC_MEASUREMENT=C     LC_IDENTIFICATION=C 
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] bindrcpp_0.2.2    Seurat_2.3.4      Matrix_1.2-14    
##  [4] cowplot_0.9.4     data.table_1.12.0 glue_1.3.0       
##  [7] cytobox_0.6.1     forcats_0.3.0     stringr_1.3.1    
## [10] dplyr_0.7.7       purrr_0.2.5       readr_1.3.1      
## [13] tidyr_0.8.2       tibble_1.4.2      ggplot2_3.1.0    
## [16] tidyverse_1.2.1  
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.2.0        snow_0.4-3          backports_1.1.3    
##   [4] Hmisc_4.2-0         plyr_1.8.4          igraph_1.2.2       
##   [7] lazyeval_0.2.1      splines_3.5.0       digest_0.6.16      
##  [10] foreach_1.4.4       htmltools_0.3.6     viridis_0.5.1      
##  [13] lars_1.2            gdata_2.18.0        magrittr_1.5       
##  [16] checkmate_1.9.1     cluster_2.0.7-1     mixtools_1.1.0     
##  [19] ROCR_1.0-7          modelr_0.1.3        R.utils_2.7.0      
##  [22] colorspace_1.4-0    rvest_0.3.2         haven_2.0.0        
##  [25] xfun_0.4            crayon_1.3.4        jsonlite_1.6       
##  [28] bindr_0.1.1         survival_2.41-3     zoo_1.8-4          
##  [31] iterators_1.0.10    ape_5.2             gtable_0.2.0       
##  [34] kernlab_0.9-27      prabclus_2.2-7      DEoptimR_1.0-8     
##  [37] scales_1.0.0        mvtnorm_1.0-10      bibtex_0.4.2       
##  [40] Rcpp_1.0.0          metap_1.1           dtw_1.20-1         
##  [43] viridisLite_0.3.0   htmlTable_1.13.1    reticulate_1.10    
##  [46] foreign_0.8-70      bit_1.1-14          proxy_0.4-22       
##  [49] mclust_5.4.2        SDMTools_1.1-221    Formula_1.2-3      
##  [52] stats4_3.5.0        tsne_0.1-3          htmlwidgets_1.3    
##  [55] httr_1.4.0          gplots_3.0.1.1      RColorBrewer_1.1-2 
##  [58] fpc_2.1-11.1        acepack_1.4.1       modeltools_0.2-22  
##  [61] ica_1.0-2           pkgconfig_2.0.2     R.methodsS3_1.7.1  
##  [64] flexmix_2.3-14      nnet_7.3-12         tidyselect_0.2.5   
##  [67] labeling_0.3        rlang_0.4.0         reshape2_1.4.3     
##  [70] munsell_0.5.0       cellranger_1.1.0    tools_3.5.0        
##  [73] cli_1.0.1           generics_0.0.2      broom_0.5.1        
##  [76] ggridges_0.5.1      evaluate_0.12       yaml_2.2.0         
##  [79] npsurv_0.4-0        knitr_1.21          bit64_0.9-7        
##  [82] fitdistrplus_1.0-14 robustbase_0.93-2   caTools_1.17.1.1   
##  [85] randomForest_4.6-14 RANN_2.6            pbapply_1.4-0      
##  [88] nlme_3.1-137        R.oo_1.22.0         xml2_1.2.0         
##  [91] hdf5r_1.0.0         compiler_3.5.0      rstudioapi_0.9.0   
##  [94] png_0.1-7           lsei_1.2-0          stringi_1.2.4      
##  [97] lattice_0.20-35     trimcluster_0.1-2.1 pillar_1.3.1       
## [100] Rdpack_0.10-1       lmtest_0.9-36       bitops_1.0-6       
## [103] irlba_2.3.3         gbRd_0.4-11         R6_2.3.0           
## [106] latticeExtra_0.6-28 KernSmooth_2.23-15  gridExtra_2.3      
## [109] codetools_0.2-15    MASS_7.3-49         gtools_3.8.1       
## [112] assertthat_0.2.0    withr_2.1.2         diptest_0.75-7     
## [115] parallel_3.5.0      doSNOW_1.0.16       hms_0.4.2          
## [118] grid_3.5.0          rpart_4.1-13        class_7.3-14       
## [121] rmarkdown_1.11      segmented_0.5-3.0   Rtsne_0.15         
## [124] lubridate_1.7.4     base64enc_0.1-3