LSGI annotation and visualization demo

Introduction

Here is a demonstration of how to use the 87 LSGI processed tumor ST data. We include demos to visualize the spatial gradients, finding the proximity between gradients, and annotate them with functional gene sets (GO, KEGG, etc.).

The processed data are all related our manuscript, “Interpretable Spatial Gradient Analysis for Spatial Transcriptomics Data”.

For more general tutorial of LSGI, please visit the GitHub page.

Read data and plot the gradients


infos <- readRDS("./UKF243_T_ST.out.info.rds")  # LSGI output
# plot multiple factors
plot.factors.gradient.ind(info = infos, r_squared_thresh = 0.6,
    minimum.nmf = 20)  # plot gradient (had to appear in at least 20 grids)

plot.factors.gradient.ind(info = infos, r_squared_thresh = 0.6,
    sel.factors = c("NMF_5", "NMF_3"))  # plot selected gradients

Plot single gradient with NMF loadings

# plot individual factor together with the NMF loadings
plot.factor.gradient.ind(info = infos, fctr = c("NMF_3"), r_squared_thresh = 0.6)

Distance analysis

dist.mat <- avg.dist.calc(info = infos, minimum.nmf = 20)  # calculate average distance between NMF gradients
plt.dist.heat(dist.mat)  # plot distance heatmap

Functional annotation

# this can be done in the same way of NMF factor annotation
# there are different ways of doing this analysis, here we
# use hypergeometric test with top 50 genes in each NMF
# (top loadings) here we only use hallmark gene sets as a
# brief example

# the top genes are stored in the LSGI outputs
print(str(infos$nmf.info$top.genes))
#> List of 10
#>  $ NMF_1 : chr [1:50] "GAPDH" "SEC61G" "MIF" "UQCR11" ...
#>  $ NMF_2 : chr [1:50] "NCAN" "MALAT1" "DBI" "FABP7" ...
#>  $ NMF_3 : chr [1:50] "GFAP" "GJA1" "CLU" "AQP1" ...
#>  $ NMF_4 : chr [1:50] "NDRG1" "VEGFA" "IGFBP5" "EPAS1" ...
#>  $ NMF_5 : chr [1:50] "FTH1" "FTL" "GAPDH" "EEF1A1" ...
#>  $ NMF_6 : chr [1:50] "CHI3L1" "TAGLN" "IGFBP7" "SOD2" ...
#>  $ NMF_7 : chr [1:50] "HSPA5" "ERO1A" "DNAJB9" "GBE1" ...
#>  $ NMF_8 : chr [1:50] "SNAP25" "NRGN" "OLFM1" "CAMK2N1" ...
#>  $ NMF_9 : chr [1:50] "GFAP" "EEF1A1" "GAPDH" "CLU" ...
#>  $ NMF_10: chr [1:50] "SPP1" "APOC1" "CD74" "FTL" ...
#> NULL

# obtain gene sets
library(msigdbr)
library(hypeR)

mdb_h <- msigdbr(species = "Homo sapiens", category = "H")

gene.set.list <- list()
for (gene.set.name in unique(mdb_h$gs_name)) {
    gene.set.list[[gene.set.name]] <- mdb_h[mdb_h$gs_name %in%
        gene.set.name, ]$gene_symbol
}

# run hypeR test
mhyp <- hypeR(signature = infos$nmf.info$top.genes, genesets = gene.set.list,
    test = "hypergeometric", background = rownames(infos[["nmf.info"]][["feature.loadings"]]))
hyper.data <- mhyp$data
hyper.res.list <- list()
for (nmf.name in names(hyper.data)) {
    res <- as.data.frame(hyper.data[[nmf.name]]$data)
    hyper.res.list[[nmf.name]] <- res
}

print(head(hyper.res.list[[1]]))  # here we output part of the NMF_1 annotation result
#>                                                                             label
#> HALLMARK_OXIDATIVE_PHOSPHORYLATION             HALLMARK_OXIDATIVE_PHOSPHORYLATION
#> HALLMARK_HYPOXIA                                                 HALLMARK_HYPOXIA
#> HALLMARK_MTORC1_SIGNALING                               HALLMARK_MTORC1_SIGNALING
#> HALLMARK_ADIPOGENESIS                                       HALLMARK_ADIPOGENESIS
#> HALLMARK_GLYCOLYSIS                                           HALLMARK_GLYCOLYSIS
#> HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY
#>                                             pval     fdr signature geneset
#> HALLMARK_OXIDATIVE_PHOSPHORYLATION       7.5e-15 3.8e-13        50     193
#> HALLMARK_HYPOXIA                         8.4e-04 8.8e-03        50     195
#> HALLMARK_MTORC1_SIGNALING                8.4e-04 8.8e-03        50     195
#> HALLMARK_ADIPOGENESIS                    8.5e-04 8.8e-03        50     196
#> HALLMARK_GLYCOLYSIS                      8.8e-04 8.8e-03        50     198
#> HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY 5.1e-03 4.2e-02        50      49
#>                                          overlap background
#> HALLMARK_OXIDATIVE_PHOSPHORYLATION            12      23109
#> HALLMARK_HYPOXIA                               4      23109
#> HALLMARK_MTORC1_SIGNALING                      4      23109
#> HALLMARK_ADIPOGENESIS                          4      23109
#> HALLMARK_GLYCOLYSIS                            4      23109
#> HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY       2      23109
#>                                                                                                                        hits
#> HALLMARK_OXIDATIVE_PHOSPHORYLATION       ATP5F1E,ATP5MF,COX6A1,COX6B1,COX6C,NDUFA3,NDUFA4,NDUFA7,NDUFB2,NDUFB4,UQCR11,UQCRQ
#> HALLMARK_HYPOXIA                                                                                        ENO1,GAPDH,MIF,TPI1
#> HALLMARK_MTORC1_SIGNALING                                                                              ENO1,GAPDH,PPIA,TPI1
#> HALLMARK_ADIPOGENESIS                                                                               COX6A1,DDT,UQCR11,UQCRQ
#> HALLMARK_GLYCOLYSIS                                                                                      ENO1,MIF,PPIA,TPI1
#> HALLMARK_REACTIVE_OXYGEN_SPECIES_PATHWAY                                                                         FTL,NDUFB4


# Visualize annotation results
ggplot(hyper.res.list[[1]][1:5, ], aes(x = reorder(label, -log10(fdr)),
    y = overlap/signature, fill = -log10(fdr))) + geom_bar(stat = "identity",
    show.legend = T) + xlab("Gene Set") + ylab("Gene Ratio") +
    viridis::scale_fill_viridis() + theme_classic() + coord_flip() +
    theme(axis.text.x = element_text(color = "black", size = 12,
        angle = 45, hjust = 1), axis.text.y = element_text(color = "black",
        size = 8, angle = 0), panel.border = element_rect(colour = "black",
        fill = NA, size = 1))