library(tidyverse)
library(ggplot2)
library(ggrepel)
library(reshape2)
library(factoextra)
library(ComplexHeatmap)
library(RColorBrewer)
library(DescTools)
library(viridis)
library(matrixStats)
library(viridis)
source("./visualization.functions.R")
Here is a demonstration of how to use the 87 LSGI processed tumor ST data. We include demos to visualize the spatial gradients, finding the proximity between gradients, and annotate them with functional gene sets (GO, KEGG, etc.).
The processed data are all related our manuscript, “Interpretable Spatial Gradient Analysis for Spatial Transcriptomics Data”.
For more general tutorial of LSGI, please visit the GitHub page.
infos <- readRDS("./UKF243_T_ST.out.info.rds") # LSGI output
# plot multiple factors
plot.factors.gradient.ind(info = infos, r_squared_thresh = 0.6,
minimum.nmf = 20) # plot gradient (had to appear in at least 20 grids)
plot.factors.gradient.ind(info = infos, r_squared_thresh = 0.6,
sel.factors = c("NMF_5", "NMF_3")) # plot selected gradients
# plot individual factor together with the NMF loadings
plot.factor.gradient.ind(info = infos, fctr = c("NMF_3"), r_squared_thresh = 0.6)
dist.mat <- avg.dist.calc(info = infos, minimum.nmf = 20) # calculate average distance between NMF gradients
plt.dist.heat(dist.mat) # plot distance heatmap
# this can be done in the same way of NMF factor annotation
# there are different ways of doing this analysis, here we
# use hypergeometric test with top 50 genes in each NMF
# (top loadings) here we only use hallmark gene sets as a
# brief example
# the top genes are stored in the LSGI outputs
print(str(infos$nmf.info$top.genes))
#> List of 10
#> $ NMF_1 : chr [1:50] "GAPDH" "SEC61G" "MIF" "UQCR11" ...
#> $ NMF_2 : chr [1:50] "NCAN" "MALAT1" "DBI" "FABP7" ...
#> $ NMF_3 : chr [1:50] "GFAP" "GJA1" "CLU" "AQP1" ...
#> $ NMF_4 : chr [1:50] "NDRG1" "VEGFA" "IGFBP5" "EPAS1" ...
#> $ NMF_5 : chr [1:50] "FTH1" "FTL" "GAPDH" "EEF1A1" ...
#> $ NMF_6 : chr [1:50] "CHI3L1" "TAGLN" "IGFBP7" "SOD2" ...
#> $ NMF_7 : chr [1:50] "HSPA5" "ERO1A" "DNAJB9" "GBE1" ...
#> $ NMF_8 : chr [1:50] "SNAP25" "NRGN" "OLFM1" "CAMK2N1" ...
#> $ NMF_9 : chr [1:50] "GFAP" "EEF1A1" "GAPDH" "CLU" ...
#> $ NMF_10: chr [1:50] "SPP1" "APOC1" "CD74" "FTL" ...
#> NULL
# obtain gene sets
library(msigdbr)
library(hypeR)
mdb_h <- msigdbr(species = "Homo sapiens", category = "H")
gene.set.list <- list()
for (gene.set.name in unique(mdb_h$gs_name)) {
gene.set.list[[gene.set.name]] <- mdb_h[mdb_h$gs_name %in%
gene.set.name, ]$gene_symbol
}
# run hypeR test
mhyp <- hypeR(signature = infos$nmf.info$top.genes, genesets = gene.set.list,
test = "hypergeometric", background = rownames(infos[["nmf.info"]][["feature.loadings"]]))
hyper.data <- mhyp$data
hyper.res.list <- list()
for (nmf.name in names(hyper.data)) {
res <- as.data.frame(hyper.data[[nmf.name]]$data)
hyper.res.list[[nmf.name]] <- res
}
print(head(hyper.res.list[[1]])) # here we output part of the NMF_1 annotation result
#> label
#> HALLMARK_OXIDATIVE_PHOSPHORYLATION HALLMARK_OXIDATIVE_PHOSPHORYLATION
#> HALLMARK_HYPOXIA HALLMARK_HYPOXIA
#> HALLMARK_MTORC1_SIGNALING HALLMARK_MTORC1_SIGNALING
#> HALLMARK_ADIPOGENESIS HALLMARK_ADIPOGENESIS
#> HALLMARK_GLYCOLYSIS HALLMARK_GLYCOLYSIS
#> HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY
#> pval fdr signature geneset
#> HALLMARK_OXIDATIVE_PHOSPHORYLATION 7.5e-15 3.8e-13 50 193
#> HALLMARK_HYPOXIA 8.4e-04 8.8e-03 50 195
#> HALLMARK_MTORC1_SIGNALING 8.4e-04 8.8e-03 50 195
#> HALLMARK_ADIPOGENESIS 8.5e-04 8.8e-03 50 196
#> HALLMARK_GLYCOLYSIS 8.8e-04 8.8e-03 50 198
#> HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY 5.1e-03 4.2e-02 50 49
#> overlap background
#> HALLMARK_OXIDATIVE_PHOSPHORYLATION 12 23109
#> HALLMARK_HYPOXIA 4 23109
#> HALLMARK_MTORC1_SIGNALING 4 23109
#> HALLMARK_ADIPOGENESIS 4 23109
#> HALLMARK_GLYCOLYSIS 4 23109
#> HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY 2 23109
#> hits
#> HALLMARK_OXIDATIVE_PHOSPHORYLATION ATP5F1E,ATP5MF,COX6A1,COX6B1,COX6C,NDUFA3,NDUFA4,NDUFA7,NDUFB2,NDUFB4,UQCR11,UQCRQ
#> HALLMARK_HYPOXIA ENO1,GAPDH,MIF,TPI1
#> HALLMARK_MTORC1_SIGNALING ENO1,GAPDH,PPIA,TPI1
#> HALLMARK_ADIPOGENESIS COX6A1,DDT,UQCR11,UQCRQ
#> HALLMARK_GLYCOLYSIS ENO1,MIF,PPIA,TPI1
#> HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY FTL,NDUFB4
# Visualize annotation results
ggplot(hyper.res.list[[1]][1:5, ], aes(x = reorder(label, -log10(fdr)),
y = overlap/signature, fill = -log10(fdr))) + geom_bar(stat = "identity",
show.legend = T) + xlab("Gene Set") + ylab("Gene Ratio") +
viridis::scale_fill_viridis() + theme_classic() + coord_flip() +
theme(axis.text.x = element_text(color = "black", size = 12,
angle = 45, hjust = 1), axis.text.y = element_text(color = "black",
size = 8, angle = 0), panel.border = element_rect(colour = "black",
fill = NA, size = 1))