# Reference mapping using Azimuth (Seurat) - R ## Install all needed packages following the instructions detailed here: https://satijalab.github.io/azimuth/index.html ## To see some examples how to use the tool and visualize the results, please visit: https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html ## Loading libraries library(Seurat) library(Azimuth) library(patchwork) source('path/to/modified_azimuth.R') # file provided in the zenodo GBmap repository ## Mapping your_gbm_data <- RunAzimuth(your_gbm_data, # check https://satijalab.github.io/azimuth/reference/RunAzimuth.html for required input types reference = 'path/to/azimuth_core_GBmap.rds', # RDS file provided in the zenodo GBmap repository annotation.levels = c('annotation_level_3', 'annotation_level_4') # you can modify which annotation level you would like to include ) ## Visualization p1 <- DimPlot(your_gbm_data, group.by = 'predicted.annotation_level_3', reduction = 'ref.umap', label = TRUE, label.size = 3) + NoLegend() p2 <- DimPlot(your_gbm_data, group.by = 'predicted.annotation_level_4', reduction = 'ref.umap', label = TRUE, label.size = 3) + NoLegend() p1 + p2 # Transfer learning usingn SCANVI (scArches) - Python ## Install scArches following the instructions detailed here: https://scarches.readthedocs.io/en/latest/installation.html ## Load packages import scanpy as sc import torch import scarches as sca import matplotlib.pyplot as plt import pandas as pd import numpy as np ## Load the core GBmap model and sett parameters ref_path = 'path/to/unzipped/scarches_SCANVI_core_GBmap.zip' source_adata = sc.read('path/to/scarches_core_GBmap.h5ad') vae = sca.models.SCANVI.load(ref_path, source_adata) # unzip scarches-SCANVI_GBmap.zip cell_type_key = 'CellID' surgery_epochs = 500 early_stopping_kwargs_surgery = { # Suggested values. For more info, please consult: https://scarches.readthedocs.io/en/latest/about.html "early_stopping_metric": "elbo", "save_best_state_metric": "elbo", "on": "full_dataset", "patience": 10, "threshold": 0.001, "reduce_lr_on_plateau": True, "lr_patience": 8, "lr_factor": 0.1, } ## Load and prepare your data target_adata = sc.read('path/to/your/data.h5ad) # read your data using scanpy (load AnnData object) target_adata.layers["counts"] = target_adata.X.copy() # target_adata.X must be where your raw counts are stored target_adata.obs[cell_type_key] = vae.unlabeled_category_ ## Perform surgery on GBmap core reference and train on query dataset without cell type labels model = sca.models.SCANVI.load_query_data( target_adata, ref_path, freeze_dropout = True, inplace_subset_query_vars = True # your adata must have the same gene length (5000) and names as the GBmap core (source_adata) # `inplace_subset_query_vars = True` will subset and reorder the vars to tarin the model properly # In case you want to subet and tidy up your AnnData object beforehand, you can use the file genes_for_mapping.csv uploaded in this repository # and the function `subset_and_pad_adata` developed by Lisa Sikkema and Theis team # You can find it here: https://github.com/LungCellAtlas/HLCA_reproducibility/blob/main/scripts/preprocessing.py ) model._unlabeled_indices = np.arange(target_adata.n_obs) model._labeled_indices = [] print("Labelled Indices: ", len(model._labeled_indices)) print("Unlabelled Indices: ", len(model._unlabeled_indices)) model.train( n_epochs_semisupervised=surgery_epochs, train_base_model=False, semisupervised_trainer_kwargs=dict(metrics_to_monitor=["accuracy", "elbo"], weight_decay=0, early_stopping_kwargs=early_stopping_kwargs_surgery ), frequency=1 ) query_latent = sc.AnnData(model.get_latent_representation()) query_latent.obs["add_any_metadata_of_interest"] = target_adata.obs["add_any_metadata_of_interest"].tolist() query_latent.obs['predictions'] = model.predict() # get predictions of cell anotation from the model ## Visualization sc.pp.neighbors(query_latent) sc.tl.leiden(query_latent) sc.tl.umap(query_latent) sc.pl.umap(query_latent, color=['predictions'], frameon=False, wspace=0.6, ) ## Get latent representation of core GBmap + query dataset and compute new UMAP adata_full = source_adata.concatenate(target_adata, batch_key="ref_query") full_latent = sc.AnnData(model.get_latent_representation(adata=adata_full)) ## Visualization sc.pp.neighbors(full_latent) sc.tl.leiden(full_latent) sc.tl.umap(full_latent) sc.pl.umap(full_latent, color=['any_metadata_of_interest'], frameon=False, wspace=0.6, ) ## More information and examples on how to use scArches can be found in https://scarches.readthedocs.io/en/latest/ # Exploration of GB interactome with CellChat - R ## For vignettes on how to visualize cell-cell communication networks stored in the CellChat object (cellchat_GBmap.rds) ## please check: https://github.com/sqjin/CellChat