###INSTALL Packages### install.packages("remotes") remotes::install_version("Seurat", version = "3.1.4") # or version "3.1.5" install.packages("BiocManager") BiocManager::install("biomaRt") install.packages("Matrix") #####START PIPELINE ALL CELLS PoA Moffit et. all.##### library(Matrix) library(Seurat) library(biomaRt) ###UNZIP THE FILES#### #upload all the files (genes, barcodes, matrix) PoA_files_DIR = "/code_folder/PoA_files/" barcode.path <- paste0(PoA_files_DIR, "GSE113576_barcodes.tsv.gz") features.path <- paste0(PoA_files_DIR, "GSE113576_genes.tsv.gz") matrix.path <- paste0(PoA_files_DIR, "GSE113576_matrix.mtx.gz") mat <- readMM(file = matrix.path) feature.names = read.delim(features.path, header = FALSE, stringsAsFactors = FALSE) barcode.names = read.delim(barcode.path, header = FALSE, stringsAsFactors = FALSE) colnames(mat) = barcode.names$V1 rownames(mat) = feature.names$V1 #replace the Ensemble id with gene name v1 <- mat@Dimnames[[1]] V2 <- feature.names$V1 V3 <- feature.names$V2 mat@Dimnames[[1]] <- V3 #start Seurat pipeline on all cells of the Preoptic Area pre_OA <- CreateSeuratObject(mat, min.cells = 2, min.features = 200) pre_OA <- PercentageFeatureSet(pre_OA, pattern = "^mt-", col.name = "mt.percent") pre_OA <- subset(pre_OA, subset = nFeature_RNA > 200 & nFeature_RNA < 6000 & mt.percent < 10) #check for duplicates in the original matrix. length(pre_OA@assays[["RNA"]]@counts@Dimnames[[1]]) length(unique(pre_OA@assays[["RNA"]]@counts@Dimnames[[1]])) #NOTE:regression for cell cycle genes option 2 s.genes <- cc.genes$s.genes g2m.genes <- cc.genes$g2m.genes convertHumanGeneList <- function(x){ require("biomaRt") human = useMart("ensembl", dataset = "hsapiens_gene_ensembl"); class(human) mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl"); class(mouse) genesV2 = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T) humanx <- unique(genesV2[, 2]) # Print the first 6 genes found to the screen print(head(humanx)) return(humanx) } s.genes <- convertHumanGeneList(cc.genes.updated.2019$s.genes) g2m.genes <- convertHumanGeneList(cc.genes.updated.2019$g2m.genes) pre_OA<-NormalizeData(pre_OA, verbose = T) pre_OA <- CellCycleScoring(pre_OA, s.features = s.genes, g2m.features = g2m.genes, set.ident = FALSE) pre_OA$CC.Difference <- pre_OA$S.Score - pre_OA$G2M.Score pre_OA <- SCTransform(pre_OA, assay = "RNA", new.assay.name = "SCT", vars.to.regress = c("mt.percent", "CC.Difference"), verbose = T, theta_regularization = "log_theta", bin_size = 256) pre_OA <- RunPCA(pre_OA, npcs = 50, verbose = T) pre_OA <- RunUMAP(pre_OA, dims = 1:50, verbose = T) pre_OA <- RunTSNE(pre_OA, dims = 1:50,verbose=T) pre_OA <- FindNeighbors(pre_OA, dims = 1:50, verbose = T) pre_OA <- FindClusters(pre_OA, verbose = T, resolution = 0.6) TSNEPlot(pre_OA, label=TRUE, pt.size=0.5) UMAPPlot(pre_OA, label=TRUE, pt.size=0.5) PCAPlot(pre_OA, label=TRUE, pt.size=0.5) pre_OA_tree<-BuildClusterTree(pre_OA) PlotClusterTree(pre_OA_tree) pre_OA_mark<-FindAllMarkers(pre_OA, only.pos = TRUE, assay = "RNA", slot = "data", test.use = "wilcox") ###UNZIP THE FILES### #####START PIPELINE only VLPO neurons PoA Moffit et. all.##### VLPO_files_DIR = "/code_folder/VLPO_files/" barcode.path <- paste0(VLPO_files_DIR, "VLPO_barcodes.tsv") features.path <- paste0(VLPO_files_DIR, "VLPO_genes.tsv") matrix.path <- paste0(VLPO_files_DIR, "VLPO_matrix.mtx") mat <- readMM(file = matrix.path) feature.names = read.delim(features.path, header = FALSE, stringsAsFactors = FALSE) barcode.names = read.delim(barcode.path, header = FALSE, stringsAsFactors = FALSE) colnames(mat) = barcode.names$V1 rownames(mat) = feature.names$V1 #start Seurat pipeline on neurons of the VLPO VLPO_all <- CreateSeuratObject(mat) VLPO_all <- PercentageFeatureSet(VLPO_all, pattern = "^mt-", col.name = "mt.percent") VLPO_all <- SCTransform(VLPO_all, assay = "RNA", new.assay.name = "SCT", vars.to.regress = "mt.percent", verbose = T, theta_regularization = "log_theta", bin_size = 256) VLPO_all <- RunPCA(VLPO_all, npcs = 50, verbose = T) VLPO_all <- RunUMAP(VLPO_all, dims = 1:50, verbose = T) VLPO_all <- RunTSNE(VLPO_all, dims = 1:50, verbose = T) VLPO_all <- FindNeighbors(VLPO_all, dims = 1:50, verbose = T) VLPO_all <- FindClusters(VLPO_all, resolution = 1.2, verbose = T) TSNEPlot(VLPO_all, label=TRUE, pt.size=0.8) UMAPPlot(VLPO_all, label=TRUE, pt.size=0.8) PCAPlot(VLPO_all, label=TRUE, pt.size=0.8) VLPO_all_tree <- BuildClusterTree(VLPO_all) PlotClusterTree(VLPO_all_tree) VLPO_all_mark <- FindAllMarkers(VLPO_all, only.pos = TRUE, assay = "RNA", slot = "data", test.use = "wilcox") ##Plots for VLP0_all VLPO_all<-RenameIdents(VLPO_all, "0"="n00_Pdyn/Meis2", "1"="n01_Sox6/Satb1", "2"="n02_Fbxw13/Nr4a2", "3"="n03_Id2/Moxd1", "4"="n04_Tac2/Slc17a6", "5"="n05_Rorb/Gal", "6"="n06_C1ql1/Samd3", "7"="n07_Prok2/Cxcl14", "8"="n08_Nr4a1/Arl4d", "9"="n09_Inf2/Crym", "10"="n10_Ctxn3/Pmaip1", "11"="n11_Tcf7l2/Shox2", "12"="n12_Crh/Sema3a", "13"="n13_Fign/Prox1", "14"="n14_Th/Cck", "15"="n15_Nr2e1/Cyp26a1", "16"="n16_Otp/Sim1", "17"="n17_Sostdc1/Fam19a4", "18"="n18_Creb3l1/Eya1", "19"="n19_Igf1/Pxdc1") DoHeatmap(VLPO_all, features = c("Ppp1r1b", "Meis2", "Pdyn", "Sox6", "Satb1", "2610028E06Rik", "Nr4a2", "Cmbl", "Fbxw13", "Id2", "Moxd1", "Nfib", "Slc17a6", "Tac2" , "Nkx2-1", "Gal", "Hmx2", "Rorb", "C1ql1", "Gng8", "Samd3", "Prok2", "Pcp4l1", "Cxcl14", "Arc", "Nr4a1", "Arl4d", "Crym", "Inf2", "Syndig1l", "Pmaip1", "Ctxn3", "Hmx3", "Tcf7l2", "Shox2", "Gbx2", "Crh", "Sema3a", "Pde11a", "Fign", "Prox1", "Qrfpr", "Cck", "Th", "Bmp3", "Nr2e1", "Cyp26a1", "Dkkl1", "Otp", "Sim1", "Ebf3", "Fam19a4", "Nr4a2", "Sostdc1", "Creb3l1", "Eya1", "Eomes", "Igf1", "Pxdc1", "Vip"), cells = NULL, group.by = "seurat_clusters", group.bar = TRUE, group.colors = NULL, disp.min = -2.5, disp.max = NULL, label = TRUE, size = 4, hjust = 0, angle = 45, raster = TRUE, draw.lines = FALSE, lines.width = NULL, group.bar.height = 0.02, combine = TRUE) DotPlot(VLPO_all, features = c("Pdyn", "Sox6", "Fbxw13", "Id2", "Tac2" , "Rorb", "C1ql1", "Prok2", "Nr4a1", "Inf2", "Ctxn3", "Tcf7l2", "Crh", "Fign", "Th", "Nr2e1", "Otp", "Sostdc1", "Creb3l1", "Igf1"))+ RotatedAxis() #VLPO all GABA neurons Idents(VLPO_all)<-"seurat_clusters" GABA0 <- subset(VLPO_all, idents = 0) GABA1 <- subset(VLPO_all, idents = 1) GABA2 <- subset(VLPO_all, idents = 2) GABA3 <- subset(VLPO_all, idents = 3) GABA7 <- subset(VLPO_all, idents = 7) GABA8 <- subset(VLPO_all, idents = 8) GABA9 <- subset(VLPO_all, idents = 9) GABA12 <- subset(VLPO_all, idents = 12) GABA13 <- subset(VLPO_all, idents = 13) GABA15 <- subset(VLPO_all, idents = 15) GABA17 <- subset(VLPO_all, idents = 17) GABA19 <- subset(VLPO_all, idents = 19) GABA0 <- AddMetaData(GABA0, metadata = c("GABA","GABA0"), col.name = c("o.ident", "sample.ident")) GABA1 <- AddMetaData(GABA1, metadata = c("GABA","GABA1"), col.name = c("o.ident", "sample.ident")) GABA2 <- AddMetaData(GABA2, metadata = c("GABA","GABA2"), col.name = c("o.ident", "sample.ident")) GABA3 <- AddMetaData(GABA3, metadata = c("GABA","GABA3"), col.name = c("o.ident", "sample.ident")) GABA7 <- AddMetaData(GABA7, metadata = c("GABA","GABA7"), col.name = c("o.ident", "sample.ident")) GABA8 <- AddMetaData(GABA8, metadata = c("GABA","GABA8"), col.name = c("o.ident", "sample.ident")) GABA9 <- AddMetaData(GABA9, metadata = c("GABA","GABA9"), col.name = c("o.ident", "sample.ident")) GABA12 <- AddMetaData(GABA12, metadata = c("GABA","GABA12"), col.name = c("o.ident", "sample.ident")) GABA13 <- AddMetaData(GABA13, metadata = c("GABA","GABA13"), col.name = c("o.ident", "sample.ident")) GABA15 <- AddMetaData(GABA15, metadata = c("GABA","GABA15"), col.name = c("o.ident", "sample.ident")) GABA17 <- AddMetaData(GABA17, metadata = c("GABA","GABA17"), col.name = c("o.ident", "sample.ident")) GABA19 <- AddMetaData(GABA19, metadata = c("GABA","GABA19"), col.name = c("o.ident", "sample.ident")) GABA<-merge(x=GABA0, y=c(GABA1, GABA2, GABA3, GABA7, GABA8, GABA9, GABA12, GABA13, GABA15, GABA17, GABA19)) #VLPO all GABA-Gal neurons GGal5 <- subset(VLPO_all, idents = 5) GGal10 <- subset(VLPO_all, idents = 10) GGal14 <- subset(VLPO_all, idents = 14) GGal5 <- AddMetaData(GGal5, metadata = c("GGal","GGal5"), col.name = c("o.ident", "sample.ident")) GGal10 <- AddMetaData(GGal10, metadata = c("GGal","GGal10"), col.name = c("o.ident", "sample.ident")) GGal14 <- AddMetaData(GGal14, metadata = c("GGal","GGal14"), col.name = c("o.ident", "sample.ident")) GGal<-merge(x=GGal5, y=c(GGal10, GGal14)) res<-merge(x=GABA, y=GGal) Idents(res)<-"o.ident" DotPlot(res, features = "Hcrtr2") GABA_vs_GABAGal_diff_expr<-FindMarkers(res, ident.1 = "GABA", ident.2 = "GGal", test.use = "wilcox", only.pos = FALSE, assay = "RNA", slot = "data") #genes ordered for FDR (15 upregulated / 15 downregulated) genes <- c("Gal", "Hmx2", "Rorb", "Hmx3", "2010005H15Rik", "Calcr", "Amigo2", "C1ql3", "Rgs10", "Chgb", "Asb4", "Scn9a","Gnas", "Bub3", "Nell1", "Celf2", "Pde1a", "Grm5", "Snca", "Ncdn", "Snap25", "Atp2b1", "Bcl11b", "Arpp21", "Lmo4", "Gria2", "Ppp3ca", "Camkv", "Ndrg4", "Car11") DoHeatmap(res, features = genes, cells = NULL, group.bar = TRUE, group.colors = NULL, disp.min = -2.5, disp.max = NULL, label = FALSE, size = 4, hjust = 0, angle = 45, raster = TRUE, draw.lines = FALSE,lines.width = NULL, group.bar.height = 0.02, combine = TRUE, group.by = "sample.ident") DoHeatmap(res, features = c("Gal","Slc32a1", "Gad1", "Gad2", "Slc17a6", "Hcrtr1", "Hcrtr2"), cells = NULL, group.bar = TRUE, slot = "counts", # or "counts" group.colors = NULL, disp.min = -3, disp.max = NULL, label = TRUE, size = 4, hjust = 0, raster = TRUE, draw.lines = FALSE, lines.width = NULL, group.bar.height = 0.02, combine = TRUE, group.by = "sample.ident") GABA <- merge(x=GABA0, y=c(GABA2, GABA3, GABA7, GABA8, GABA9, GABA12, GABA13, GABA15, GABA17, GABA19)) set.seed(639245) n.cells <- 515 GABA <- subset(GABA, cells = sample(x = GABA@assays[["RNA"]]@counts@Dimnames[[2]], size = n.cells)) GABA1 <- AddMetaData(GABA1, metadata =c("test1"), col.name = c("sample.ident")) GABA <- AddMetaData(GABA, metadata =c("GABA", "test2"), col.name = c("o.ident", "sample.ident")) GGal <- AddMetaData(GGal, metadata =c("GGal", "test3"), col.name = c("o.ident", "sample.ident")) gaba_gaba1 <- merge(x=GABA1, y=GABA) gaba_ggal <- merge(x=GABA, y=GGal) gaba1_ggal <- merge(x=GABA1, y=GGal) GGG <- merge(x=GABA, y=c(GGal, GABA1)) Idents(gaba_gaba1)<-"sample.ident" Idents(gaba_ggal)<-"sample.ident" Idents(gaba1_ggal)<-"sample.ident" Idents(GGG)<-"sample.ident" GABA_1_vs_all_GABAGal <- FindMarkers(gaba1_ggal, ident.1 = "test1", ident.2 = "test3", assay = "RNA", slot = "data", only.pos = FALSE, test.use = "wilcox") GABA_1_vs_all_GABA_except_1 <- FindMarkers(gaba_gaba1, ident.1 = "test1", ident.2 = "test2", assay = "RNA", slot = "data", only.pos = FALSE, test.use = "wilcox") allGABA_except1_vs_allGABAGal <- FindMarkers(gaba_ggal, ident.1 = "test2", ident.2 = "test3", assay = "RNA", slot = "data", only.pos = FALSE, test.use = "wilcox") DoHeatmap(GGG, features = c("Gal", "Hcrtr2"), cells = NULL, group.bar = TRUE, group.colors = NULL, disp.min = -2, disp.max = NULL, label = TRUE, size = 4, hjust = 0, raster = TRUE, draw.lines = FALSE, lines.width = NULL, group.bar.height = 0.02, combine = TRUE, group.by = "sample.ident") VlnPlot(GGG, features = c("Gal", "Hcrtr2"), group.by = "sample.ident", pt.size = 0)