Using single-cell chromatin accessibility sequencing to characterize CD4+ T cells from murine tissues - downstream analysis
Kathrin Luise Braband, Annekathrin Ludt, Sara Salome Helbich, Malte Simon, Niklas Beumer, Benedikt Brors, Federico Marini and Michael Delacher
# Set seeds and initial parameters for ArchR
set.seed(1)
# Number of cores to use (adapt to your machine, but has to be > 1 otherwise plotTSSenrichment does not work properly)
addArchRThreads(threads = 8)
## Setting default number of Parallel threads to 8.
# Reference genome (adapt to your data, use the same reference that was used for the alignment)
addArchRGenome("mm10")
## Setting default genome to Mm10.
# Load ArchRProject
proj = loadArchRProject(
path = "ArchRProject",
force = TRUE,
showLogo = FALSE
)
# Get an overview of the project
proj
##
## ___ .______ ______ __ __ .______
## / \ | _ \ / || | | | | _ \
## / ^ \ | |_) | | ,----'| |__| | | |_) |
## / /_\ \ | / | | | __ | | /
## / _____ \ | |\ \\___ | `----.| | | | | |\ \\___.
## /__/ \__\ | _| `._____| \______||__| |__| | _| `._____|
##
## class: ArchRProject
## outputDirectory: /omics/groups/OE0436/internal/Regensburg/Mainz/03_archr/scripts/methods/paper/ArchRProject
## samples(5): scATAC_4 scATAC_1 scATAC_9 scATAC_8 scATAC_5
## sampleColData names(1): ArrowFiles
## cellColData names(34): Sample TSSEnrichment ... Treg_trajectory_skin
## Treg_trajectory_colon
## numberOfCells(1): 14435
## medianTSS(1): 20.872
## medianFrags(1): 29176
# Color by clusters
p_clusters = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "Clusters",
embedding = "UMAP",
size = 0.5
)
# Color by samples
p_samples = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "Sample",
embedding = "UMAP",
size = 0.5
)
plot_grid(p_clusters, p_samples, align = "h", ncol = 2)
# Color by doublet enrichment score
p_doubscore = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "DoubletScore",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
p_doubenr=plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "DoubletEnrichment",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
# Color by number of fragments per cell
p_nfrags = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "nFrags",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
# Plot
plot_grid(p_doubscore, p_doubenr, p_nfrags, align = "h", ncol = 3, scale = 1.1)
# Overlay gene scores on UMAP embedding, use MAGIC smoothing
# Get marker features
markersGS = getMarkerFeatures(
ArchRProj = proj,
useMatrix = "GeneScoreMatrix",
groupBy = "Clusters",
bias = c("TSSEnrichment", "log10(nFrags)"),
testMethod = "wilcoxon"
)
# Define which genes to plot
markerGenes = c("Cd4", "Il2ra", "Foxp3", "Batf", "Ccr8", "Ikzf2", "Rorc", "Tbx21", "Ifng", "Gata3", "Il2", "Sell")
# Add impute weights
proj = addImputeWeights(proj)
# Plot
magic_genes = plotEmbedding(
ArchRProj = proj,
colorBy = "GeneScoreMatrix",
name = markerGenes,
embedding = "UMAP",
plotAs = "points",
imputeWeights = getImputeWeights(proj),
size = 0.5
)
# Plot all genes in cowplot
magic_genes_cow = lapply(magic_genes, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
do.call(cowplot::plot_grid, c(list(ncol = 4), magic_genes_cow))
# Save as PDF
plotPDF(magic_genes_cow,
name = "UMAP_genescores.pdf",
ArchRProj = proj,
addDOC = FALSE,
width = 5,
height = 5
)
p_Cd4 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Cd4",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Cd4
p_Il2ra = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Il2ra",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Il2ra
p_Foxp3 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Foxp3",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Foxp3
p_Batf = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Batf",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Batf
p_Ccr8 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Ccr8",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Ccr8
p_Ikzf2 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Ikzf2",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Ikzf2
p_Rorc = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Rorc",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Rorc
p_Tbx21 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Tbx21",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Tbx21
p_Ifng = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Ifng",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Ifng
p_Gata3 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Gata3",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Gata3
# Look at tissue Tregs and their precursor stages in the spleen
markerGenes_treg = c("Foxp3", "Batf", "Nfil3", "Klrg1", "Gata3", "Il1rl1", "Id3", "Id2", "Ikzf2")
# Plot
magic_genes_treg = plotEmbedding(
ArchRProj = proj,
colorBy = "GeneScoreMatrix",
name = markerGenes_treg,
embedding = "UMAP",
plotAs = "points",
imputeWeights = getImputeWeights(proj),
size = 0.5
)
# Plot all genes in cowplot
magic_genes_cow_treg = lapply(magic_genes_treg, function(x){
x + guides(color = FALSE, fill = FALSE) +
theme_ArchR(baseSize = 6.5) +
theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
theme(
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
})
do.call(cowplot::plot_grid, c(list(ncol = 3), magic_genes_cow_treg))
# To further characterize Treg cells in our dataset, we look at various Treg
# markers
p_Foxp3 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Foxp3",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Foxp3
p_Batf = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Batf",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Batf
p_Nfil3 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Nfil3",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Nfil3
p_Klrg1 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Klrg1",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Klrg1
p_Gata3 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Gata3",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Gata3
p_Il1rl1 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Il1rl1",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Il1rl1
p_Id3 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Id3",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Id3
p_Id2 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Id2",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Id2
p_Ikzf2 = plotGroups(
ArchRProj = proj,
groupBy = "Clusters",
colorBy = "GeneScoreMatrix",
name = "Ikzf2",
plotAs = "violin",
alpha = 0.4,
addBoxPlot = TRUE
)
p_Ikzf2
# Define clusters for comparisons/ project subsetting (to be performed later)
tisTreg_cluster = c("C16", "C8", "C9", "C6") #C16 spleen, C8 fat, C9 skin, C6 colon
tTreg_cluster = c("C19", "C18") #C19 early precursor, C18 late precursor
pTreg_cluster = "C7"
# Also scATAC_8 is from spleen, however this sample includes only CD25+ cells
# whereas all other samples are CD4+
spleen = "scATAC_1"
skin = "scATAC_9"
colon = "scATAC_4"
fat = "scATAC_5"
tissue = c(skin, colon, fat)
# Add cluster names to AchRProject
clusters = as.data.frame(proj@cellColData$Clusters)
colnames(clusters) = "V1"
clusters = clusters %>% mutate(across("V1", str_replace, "^C16$", "C16 tisTreg spleen"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C8$", "C8 tisTreg fat"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C9$", "C9 tisTreg skin"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C6$", "C6 tisTreg colon"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C7$", "C7 pTreg colon"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C19$", "C19 early precursor spleen"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C18$", "C18 late precursor spleen"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C1$", "C1 naive CD4+ colon"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C3$", "C3 naive CD4+ spleen/fat"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C2$", "C2 naive CD4+ spleen"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C4$", "C4 naive CD4+ spleen"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C5$", "C5 naive CD4+ spleen"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C14$", "C14 Th1 spleen/fat"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C15$", "C15 Th1 colon"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C11$", "C11 Th1 skin"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C12$", "C12 Th17 skin"))
clusters = clusters %>% mutate(across("V1", str_replace, "^C10$", "C10 Th2 colon"))
clusters_vector = as.vector(clusters$V1)
proj@cellColData$Clusters2 = clusters_vector
# We have a look at the number of cells in the individual clusters
cM_doubfilter = confusionMatrix(paste0(proj$Clusters2), paste0(proj$Sample))
cM_doubfilter
## 21 x 5 sparse Matrix of class "dgCMatrix"
## scATAC_1 scATAC_4 scATAC_5 scATAC_8 scATAC_9
## C4 naive CD4+ spleen 482 . . . .
## C5 naive CD4+ spleen 282 3 8 4 1
## C2 naive CD4+ spleen 2707 . 53 . .
## C18 late precursor spleen 8 . . 291 .
## C16 tisTreg spleen 32 4 19 475 8
## C14 Th1 spleen/fat 174 40 548 2 13
## C19 early precursor spleen 170 9 45 1191 3
## C8 tisTreg fat 10 4 118 30 6
## C11 Th1 skin 1 1 6 . 892
## C3 naive CD4+ spleen/fat 244 43 228 . 10
## C12 Th17 skin 1 7 15 . 472
## C13 . 461 6 . 1
## C6 tisTreg colon . 777 22 . 8
## C1 naive CD4+ colon . 1276 20 . .
## C7 pTreg colon . 513 6 . .
## C15 Th1 colon . 1377 3 . 2
## C20 . 23 7 . 30
## C21 . 188 13 . 22
## C10 Th2 colon . 104 30 . 2
## C17 . 30 5 . 1
## C9 tisTreg skin . 4 1 3 840
# The key parameter here is groupBy, which defines the groups for which
# pseudo-bulk replicates should be made
proj = addGroupCoverages(
ArchRProj = proj,
groupBy = "Clusters2",
minCells = 40,
maxCells = 500,
minReplicates = 2,
maxReplicates = 5,
sampleRatio = 0.8,
force = TRUE
)
# You can use the following function to search the path to Macs2
# However, sometimes this might not work and you have to manually add the path
# like it is shown in the second line.
pathToMacs2 = findMacs2()
# If you manually add the path, you have to change this line!
pathToMacs2 = "../../../../Macs2/bin/macs2"
proj = addReproduciblePeakSet(
ArchRProj = proj,
groupBy = "Clusters2",
pathToMacs2 = pathToMacs2
)
# Add peak matrix to ArchRProject
proj = addPeakMatrix(proj)
# Confirm that peak matrix has been added
getAvailableMatrices(proj)
# Check for enrichment of Treg signatures in Clusters
# signatures from PMID:33789089
signature_names <- c("early_progenitor_tisTreg","late_progenitor_tisTreg","tisTreg_core","tisTreg_skin","tisTreg_VAT","tisTreg_colon")
granges_list <- list()
n <- 1
for (x in signature_names){
filename = paste("data/signatures/",x,".csv", sep = "")
sig_csv = read.csv(filename, sep = "-", header = FALSE)
sig_csv = data.frame(seqnames=sig_csv[,1], start=as.character(sig_csv[,2]), end=as.character(sig_csv[,3]))
granges_list[[n]] = makeGRangesFromDataFrame(sig_csv, seqnames.field="seqnames", start.field="start", end.field="end")
n <- n+1
}
early_progenitor_tisTreg_GR = granges_list[[1]]
late_progenitor_tisTreg_GR = granges_list[[2]]
tisTreg_core_GR = granges_list[[3]]
tisTreg_skin_GR = granges_list[[4]]
tisTreg_VAT_GR = granges_list[[5]]
tisTreg_colon_GR = granges_list[[6]]
# Signature z-scores can be calculated via the 'addDeviationsMatrix' function
# of the chromVAR package. This wrapper function accepts the ArchRProject, the
# GRanges objects with my signatures as a list, and the name for the signature
# set as a string. The output is the ArchRProject with the z-scores and
# deviation-scores added as additional columns to cellColData (one column for
# each signature).
archr_add_peak_signatures = function(project, signature_list, signature_name){
#signature_list: list of granges
#signature_name: name string for the set of signatures
add_df_to_cellcoldata = function(pro, pheno_df, force=FALSE){
stopifnot(identical(rownames(pro@cellColData), rownames(pheno_df)))
cnames = colnames(pheno_df)
for(i in 1:ncol(pheno_df)){
pro = addCellColData(ArchRProj = pro, data=pheno_df[, i], name = cnames[i],
cells = rownames(pro@cellColData), force = force)
}
return(pro)
}
if(length(signature_list)<2){
stop('Currently, only works if at least two signatures are provided')
}
for(i in seq_along(signature_list)){
names(signature_list[[i]]) = NULL
}
project <- addPeakAnnotations(ArchRProj = project,
regions = signature_list,
name = signature_name,
force=T)
method_use = "chromVAR" #does only work with fixed width peaks
if(any(sapply(signature_list, function(x) length(unique(width(x)))) > 1)){
method_use = 'ArchR'
}
project <- addBgdPeaks(project, force = T, method=method_use)
project <- addDeviationsMatrix(
ArchRProj = project,
peakAnnotation = signature_name,
binarize=TRUE,
bgdPeaks = getBgdPeaks(project, method = method_use),
force = TRUE
)
dr_df = as.data.frame(project@cellColData)
sig_se = getMatrixFromProject(project, paste0(signature_name, 'Matrix'))
z_score_mat = t(assays(sig_se)[['z']])
z_score_mat = z_score_mat[match(rownames(dr_df),rownames(z_score_mat)), ]
colnames(z_score_mat) = paste0('z_', colnames(z_score_mat))
stopifnot(identical(rownames(z_score_mat), rownames(dr_df)))
dev_score_mat = t(assays(sig_se)[['deviations']])
dev_score_mat = dev_score_mat[match(rownames(dr_df),rownames(dev_score_mat)), ]
colnames(dev_score_mat) = paste0('dev_', colnames(dev_score_mat))
stopifnot(identical(rownames(dev_score_mat), rownames(dr_df)))
project = add_df_to_cellcoldata(project, z_score_mat, force=T)
project = add_df_to_cellcoldata(project, dev_score_mat, force=T)
return(project)
}
signature_list = list(early_progenitor_tisTreg_sig = early_progenitor_tisTreg_GR,
late_progenitor_tisTreg_sig = late_progenitor_tisTreg_GR,
tisTreg_core_sig = tisTreg_core_GR,
tisTreg_skin_sig = tisTreg_skin_GR,
tisTreg_VAT_sig = tisTreg_VAT_GR,
tisTreg_colon_sig = tisTreg_colon_GR
)
proj <- archr_add_peak_signatures(proj, signature_list, "signatures")
# Overlay signatures on UMAP
p_early_progenitor_tisTreg_sig = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "z_early_progenitor_tisTreg_sig",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
p_late_progenitor_tisTreg_sig = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "z_late_progenitor_tisTreg_sig",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
p_tisTreg_core_sig = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "z_tisTreg_core_sig",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
p_tisTreg_skin_sig = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "z_tisTreg_skin_sig",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
p_tisTreg_VAT_sig = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "z_tisTreg_VAT_sig",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
p_tisTreg_colon_sig = plotEmbedding(
ArchRProj = proj,
colorBy = "cellColData",
name = "z_tisTreg_colon_sig",
embedding = "UMAP",
plotAs = "points",
size = 0.5
)
# Plot
ggAlignPlots(p_early_progenitor_tisTreg_sig, p_late_progenitor_tisTreg_sig, p_tisTreg_core_sig, type = "h")
ggAlignPlots(p_tisTreg_skin_sig, p_tisTreg_VAT_sig, p_tisTreg_colon_sig, type = "h")
# Identify marker peaks (differential peaks) between clusters (cell types):
markersPeaks = getMarkerFeatures(
ArchRProj = proj,
useMatrix = "PeakMatrix",
groupBy = "Clusters2",
bias = c("TSSEnrichment", "log10(nFrags)"),
testMethod = "wilcoxon"
)
# Retrieve particular slices of this SummarizedExperiment that we are interested
# in: The default behavior of this function is to return a list of DataFrame
# objects, one for each cell group
markerList = getMarkers(markersPeaks, cutOff = "FDR <= 0.01 & Log2FC >= 1")
# Instead of a list of DataFrame objects, we can use getMarkers() to return a
# GRangesList object by setting returnGR = TRUE
markerList2 = getMarkers(markersPeaks, cutOff = "FDR <= 0.01 & Log2FC >= 1", returnGR = TRUE)
# Plot marker peaks as heatmap
heatmapPeaks = plotMarkerHeatmap(
seMarker = markersPeaks,
cutOff = "FDR <= 0.001 & Log2FC >= 3",
transpose = FALSE,
)
# Plot this heatmap using draw()
heatmap_peaks = ComplexHeatmap::draw(heatmapPeaks,
heatmap_legend_side = "right"
)
# Look for motifs that are enriched in peaks that are up or down in various cell
# types
# We must first add these motif annotations to our ArchRProject; this
# effectively creates a binary matrix where the presence of a motif in each peak
# is indicated numerically
proj = addMotifAnnotations(ArchRProj = proj, motifSet = "cisbp", name = "Motif")
# We perform motif enrichment on our marker peaks
# In this dataset we used more strict thresholds for the FDR and Log2FC
# because of the size of the data; otherwise plots could not be generated.
enrichMotifs = peakAnnoEnrichment(
seMarker = markersPeaks,
ArchRProj = proj,
peakAnnotation = "Motif",
cutOff = "FDR <= 0.001 & Log2FC >= 2"
)
# Plot these motif enrichments across all cell groups
heatmapEM = plotEnrichHeatmap(enrichMotifs,
n = 10,
transpose = TRUE
)
# Visualize
row_order = c("C1 naive CD4+ colon","C3 naive CD4+ spleen/fat","C2 naive CD4+ spleen","C4 naive CD4+ spleen","C5 naive CD4+ spleen","C19 early precursor spleen","C18 late precursor spleen","C16 tisTreg spleen","C8 tisTreg fat","C9 tisTreg skin","C6 tisTreg colon","C7 pTreg colon","C14 Th1 spleen/fat","C15 Th1 colon","C11 Th1 skin","C12 Th17 skin","C10 Th2 colon","C13","C17","C20","C21")
heatmapEM2 = ComplexHeatmap::draw(heatmapEM,
heatmap_legend_side = "bot",
annotation_legend_side = "bot",
row_order = row_order
)
ChromVAR is designed for predicting enrichment of TF activity on a per-cell basis from sparse chromatin accessibility data
# Add a set of background peaks; sample peaks based on similarity in GC-content
# and nFrags across all samples using the Mahalanobis distance
proj = addBgdPeaks(proj, force = T)
# Compute per-cell deviations across all of our motif annotations
proj = addDeviationsMatrix(
ArchRProj = proj,
peakAnnotation = "Motif",
force = TRUE
)
# Access deviations
# If we want this function to return a ggplot object, we set plot = TRUE
# otherwise, this function would return the DataFrame object.
plotVarDev = getVarDeviations(proj, name = "MotifMatrix", plot = TRUE)
## DataFrame with 6 rows and 6 columns
## seqnames idx name combinedVars combinedMeans rank
## <Rle> <array> <array> <numeric> <numeric> <integer>
## f104 z 104 Fos_104 117.3228 -0.008371192 1
## f843 z 843 Smarcc1_843 117.0174 -0.000772193 2
## f108 z 108 Bach1_108 89.5822 0.003612684 3
## f119 z 119 Bach2_119 71.1386 0.039212150 4
## f98 z 98 Fosb_98 69.8579 -0.008232795 5
## f135 z 135 Jund_135 62.7090 0.083707388 6
# plot variable deviations:
plotVarDev
# If we want to extract a subset of motifs for downstream analysis,
# we can do this using the getFeatures() function.
motifs = c("Batf")
markerMotifs = getFeatures(proj, select = paste(motifs, collapse="|"), useMatrix = "MotifMatrix")
markerMotifs
## [1] "z:Batf_790" "z:Batf3_789" "deviations:Batf_790"
## [4] "deviations:Batf3_789"
# Exclude Batf3
markerMotifs = grep("z:", markerMotifs, value = TRUE)
markerMotifs = markerMotifs[markerMotifs %ni% "z:Batf3_789"]
markerMotifs
## [1] "z:Batf_790"
# We supply the impute weights that we calculated previously during our gene
# score analyses (impute weights allow us to smooth the signal across nearby
# cells, helpful in the context of our sparse scATAC-seq data).
# First, re-run addImputeWeights (subset cells from the original imputation)
proj = addImputeWeights(proj)
# Then, plot distribution of chromVAR deviation scores
p_zscore = plotGroups(ArchRProj = proj,
groupBy = "Clusters",
colorBy = "MotifMatrix",
name = markerMotifs,
imputeWeights = getImputeWeights(proj)
)
p_zscore
# Instead of looking at the distributions of these z-scores, we can overlay the
# z-scores on our UMAP embedding as we’ve done previously for gene scores.
# Overlay z-scores
p_zscore_UMAP_Batf = plotEmbedding(
ArchRProj = proj,
colorBy = "MotifMatrix",
name = "z:Batf_790",
embedding = "UMAP",
plotAs = "points",
imputeWeights = getImputeWeights(proj),
size = 0.5
)
p_gene_exp_UMAP_Batf = plotEmbedding(
ArchRProj = proj,
colorBy = "GeneScoreMatrix",
name = "Batf",
embedding = "UMAP",
plotAs = "points",
imputeWeights = getImputeWeights(proj),
size = 0.5
)
plot_grid(p_zscore_UMAP_Batf, p_gene_exp_UMAP_Batf, align = "h", ncol = 2)
# Obtain the positions of the relevant motifs
motifPositions = getPositions(proj, name = "Motif")
motifPositions
## GRangesList object of length 884:
## $Tcfap2a_1
## GRanges object with 13813 ranges and 1 metadata column:
## seqnames ranges strand | score
## <Rle> <IRanges> <Rle> | <numeric>
## [1] chr1 4808001-4808010 - | 7.48774
## [2] chr1 4857513-4857522 + | 7.02247
## [3] chr1 6214851-6214860 + | 6.96612
## [4] chr1 7395839-7395848 + | 7.11519
## [5] chr1 7395840-7395849 - | 7.56411
## ... ... ... ... . ...
## [13809] chrX 167203920-167203929 + | 7.26500
## [13810] chrX 167209093-167209102 - | 7.27392
## [13811] chrX 167212044-167212053 + | 7.77924
## [13812] chrX 168673410-168673419 - | 7.46782
## [13813] chrX 169319935-169319944 + | 7.70292
## -------
## seqinfo: 20 sequences from an unspecified genome; no seqlengths
##
## ...
## <883 more elements>
# This creates a GRangesList object where each TF motif is represented by a
# separate GRanges object. We can subset this GRangesList to a few TF motifs
# that we are interested in.
motifs_fp = c("Tbx21", "Gata3", "Rorc", "Rora", "Foxp3", "Batf", "Ikzf2")
markerMotifs_fp = unlist(lapply(motifs_fp, function(x) grep(x, names(motifPositions), value = TRUE)))
markerMotifs_fp
## [1] "Tbx21_762" "Gata3_384" "Rorc_678" "Rora_682" "Foxp3_311" "Batf3_789"
## [7] "Batf_790"
# Exclude Batf3
markerMotifs_fp = markerMotifs_fp[markerMotifs_fp %ni% "Batf3_789"]
markerMotifs_fp
## [1] "Tbx21_762" "Gata3_384" "Rorc_678" "Rora_682" "Foxp3_311" "Batf_790"
# To accurately profile TF footprints, a large number of reads is required.
# Therefore we will use the pseudobulk data stored as group coverages calculated above.
# Compute footprints for the subset of marker motifs defined above
seFoot = getFootprints(
ArchRProj = proj,
positions = motifPositions[markerMotifs_fp],
groupBy = "Clusters2"
)
# Subset proj (with all matrices added, see "methods.Rmd") to only Treg cells
proj_treg = proj[(proj@cellColData@listData[["Clusters"]] == c(tisTreg_cluster,tTreg_cluster)), ]
# Load ArchRProject
proj_treg = loadArchRProject(
path = "ArchRProject_Treg",
force = TRUE,
showLogo = FALSE
)
# Add UMAP embedding
proj_treg = addUMAP(
ArchRProj = proj_treg,
reducedDims = "IterativeLSI",
name = "UMAP",
nNeighbors = 30,
minDist = 0.5,
metric = "cosine",
force = TRUE
)
# The key parameter here is groupBy, which defines the groups for which
# pseudo-bulk replicates should be made.
proj_treg = addGroupCoverages(
ArchRProj = proj_treg,
groupBy = "Clusters2",
minCells = 40,
maxCells = 500, #standard
minReplicates = 2, #standard
maxReplicates = 5, #standard
sampleRatio = 0.8,
force = TRUE
)
# Compute footprints for the subset of marker motifs defined above
seFoot = getFootprints(
ArchRProj = proj_treg,
positions = motifPositions[markerMotifs_fp],
groupBy = "Clusters2"
)
plotFootprints(
seFoot = seFoot,
ArchRProj = proj,
normMethod = "Subtract",
plotName = "Footprints-Subtract-Bias",
addDOC = FALSE,
smoothWindow = 5
)
# Plots are saved in the output directory of the ArchRProject by default
# (ggplot object would be extremely large).
plotFootprints(
seFoot = seFoot,
ArchRProj = proj_treg,
normMethod = "Subtract",
plotName = "Footprints-Subtract-Bias_Treg",
addDOC = FALSE,
smoothWindow = 5
)
plotFootprints(
seFoot = seFoot,
ArchRProj = proj,
normMethod = "Divide",
plotName = "Footprints-Divide-Bias",
addDOC = FALSE,
smoothWindow = 5
)
plotFootprints(
seFoot = seFoot,
ArchRProj = proj_treg,
normMethod = "Divide",
plotName = "Footprints-Divide-Bias_Treg",
addDOC = FALSE,
smoothWindow = 5
)
plotFootprints(
seFoot = seFoot,
ArchRProj = proj,
normMethod = "None",
plotName = "Footprints-No-Normalization",
addDOC = FALSE,
smoothWindow = 5
)
plotFootprints(
seFoot = seFoot,
ArchRProj = proj_treg,
normMethod = "None",
plotName = "Footprints-No-Normalization_Treg",
addDOC = FALSE,
smoothWindow = 5
)
# Co-accessibility is a correlation in accessibility between two peaks
# across many single cells. Calculate co-accessibility
proj = addCoAccessibility(
ArchRProj = proj,
reducedDims = "IterativeLSI"
)
# Retrieve co-accessibility information via the getCoAccessibility() function
# Increase resolution to plow fewer co-accessibility interactions
# if returnLoops = FALSE, then the function will return a dataframe instead of
# a loop track
cA = getCoAccessibility(
ArchRProj = proj,
corCutOff = 0.5,
resolution = 1000,
returnLoops = TRUE
)
# Plot browser tracks of co-accessibility for our marker genes
markerGenes = c("Cd4", "Il2ra", "Foxp3", "Batf", "Ccr8", "Ikzf2", "Rorc", "Tbx21", "Ifng", "Gata3", "Il2", "Sell")
p_cA = plotBrowserTrack(
ArchRProj = proj,
groupBy = "Clusters",
geneSymbol = markerGenes,
upstream = 50000,
downstream = 50000,
loops = getCoAccessibility(proj)
)
## GRanges object with 12 ranges and 2 metadata columns:
## seqnames ranges strand | gene_id symbol
## <Rle> <IRanges> <Rle> | <character> <character>
## [1] chr6 124864693-124888209 - | 12504 Cd4
## [2] chr2 11642792-11693194 + | 16184 Il2ra
## [3] chrX 7579676-7595243 + | 20371 Foxp3
## [4] chr12 85686720-85709087 + | 53314 Batf
## [5] chr9 120092133-120094906 + | 12776 Ccr8
## ... ... ... ... . ... ...
## [8] chr11 97098007-97115331 - | 57765 Tbx21
## [9] chr10 118441046-118445894 + | 15978 Ifng
## [10] chr2 9857078-9878600 - | 14462 Gata3
## [11] chr3 37120713-37125954 - | 16183 Il2
## [12] chr1 164062076-164080785 + | 20343 Sell
## -------
## seqinfo: 21 sequences from mm10 genome
# To plot the browser track select a specific marker gene using the $ accessor
grid::grid.newpage()
grid::grid.draw(p_cA$Batf)
# Create user-defined trajectory backbone from early via late progenitors to
# tissue Treg cells in the respective non-lymphoid tissue
Treg_trajectory_VAT = c("C19", "C18", "C16", "C8")
Treg_trajectory_skin = c("C19", "C18", "C16", "C9")
Treg_trajectory_colon = c("C19", "C18", "C16", "C6")
# Create the trajectory
# This creates a new column in cellColData that stores the pseudo-time value for
# each cell in the trajectory
proj = addTrajectory(
ArchRProj = proj,
name = "Treg_trajectory_VAT",
groupBy = "Clusters",
trajectory = Treg_trajectory_VAT,
embedding = "UMAP",
force = TRUE
)
proj = addTrajectory(
ArchRProj = proj,
name = "Treg_trajectory_skin",
groupBy = "Clusters",
trajectory = Treg_trajectory_skin,
embedding = "UMAP",
force = TRUE
)
proj = addTrajectory(
ArchRProj = proj,
name = "Treg_trajectory_colon",
groupBy = "Clusters",
trajectory = Treg_trajectory_colon,
embedding = "UMAP",
force = TRUE
)
# Exclude cells with NA values because these are not part of the trajectory
proj$Treg_trajectory_VAT[!is.na(proj$Treg_trajectory_VAT)]
proj$Treg_trajectory_skin[!is.na(proj$Treg_trajectory_skin)]
proj$Treg_trajectory_colon[!is.na(proj$Treg_trajectory_colon)]
# Save ArchRProject
saveArchRProject(proj,
outputDirectory= "ArchRProject",
overwrite = TRUE,
load = FALSE
)
# Plot trajectory
# This overlays the pseudo-time values on our UMAP embedding and displays an
# arrow approximating the trajectory path from the spline-fit; cells that are
# not part of the trajectory are coloured in gray.
Treg_trajectory_VAT_p = plotTrajectory(proj,
trajectory = "Treg_trajectory_VAT",
colorBy = "cellColData",
name = "Treg_trajectory_VAT",
plotAs = "points"
)
Treg_trajectory_skin_p = plotTrajectory(proj,
trajectory = "Treg_trajectory_skin",
colorBy = "cellColData",
name = "Treg_trajectory_skin",
plotAs = "points"
)
Treg_trajectory_colon_p = plotTrajectory(proj,
trajectory = "Treg_trajectory_colon",
colorBy = "cellColData",
name = "Treg_trajectory_colon",
plotAs = "points"
)
ggAlignPlots(Treg_trajectory_VAT_p[[1]], Treg_trajectory_skin_p[[1]], Treg_trajectory_colon_p[[1]], type = "h")
# Overlay other features on the trajectory within our UMAP embedding
# (display specific features only within the cells that are relevant to our
# trajectory)
# Overlay genescores on Treg trajectory
# Batf
Treg_traj_VAT_p_Batf = plotTrajectory(proj,
trajectory = "Treg_trajectory_VAT",
colorBy = "GeneScoreMatrix",
name = "Batf",
continuousSet = "horizonExtra",
plotAs = "points"
)
Treg_traj_skin_p_Batf = plotTrajectory(proj,
trajectory = "Treg_trajectory_skin",
colorBy = "GeneScoreMatrix",
name = "Batf",
continuousSet = "horizonExtra",
plotAs = "points"
)
Treg_traj_colon_p_Batf = plotTrajectory(proj,
trajectory = "Treg_trajectory_colon",
colorBy = "GeneScoreMatrix",
name = "Batf",
continuousSet = "horizonExtra",
plotAs = "points"
)
# Overlay motif scores on Treg trajectory
# Batf
Treg_traj_VAT_p_Batf_motif = plotTrajectory(proj,
trajectory = "Treg_trajectory_VAT",
colorBy = "MotifMatrix",
name = "z:Batf_790",
continuousSet = "solarExtra",
plotAs = "points"
)
Treg_traj_skin_p_Batf_motif = plotTrajectory(proj,
trajectory = "Treg_trajectory_skin",
colorBy = "MotifMatrix",
name = "z:Batf_790",
continuousSet = "solarExtra",
plotAs = "points"
)
Treg_traj_colon_p_Batf_motif = plotTrajectory(proj,
trajectory = "Treg_trajectory_colon",
colorBy = "MotifMatrix",
name = "z:Batf_790",
continuousSet = "solarExtra",
plotAs = "points"
)
# Plot
ggAlignPlots(Treg_trajectory_VAT_p[[1]], Treg_traj_VAT_p_Batf[[2]], type = "h", sizes = c(1,2))
ggAlignPlots(Treg_trajectory_skin_p[[1]], Treg_traj_skin_p_Batf[[2]], type = "h", sizes = c(1,2))
ggAlignPlots(Treg_trajectory_colon_p[[1]], Treg_traj_colon_p_Batf[[2]], type = "h", sizes = c(1,2))
ggAlignPlots(Treg_trajectory_VAT_p[[1]], Treg_traj_VAT_p_Batf_motif[[2]], type = "h", sizes = c(1,2))
ggAlignPlots(Treg_trajectory_skin_p[[1]], Treg_traj_skin_p_Batf_motif[[2]], type = "h", sizes = c(1,2))
ggAlignPlots(Treg_trajectory_colon_p[[1]], Treg_traj_colon_p_Batf_motif[[2]], type = "h", sizes = c(1,2))