# Reference mapping using Azimuth (Seurat) - R

## Install all needed packages following the instructions detailed here: https://satijalab.github.io/azimuth/index.html
## To see some examples how to use the tool and visualize the results, please visit: https://satijalab.github.io/azimuth/articles/run_azimuth_tutorial.html 

## Loading libraries
library(Seurat)
library(Azimuth)
library(patchwork)
source('path/to/modified_azimuth.R') # file provided in the zenodo GBmap repository

## Mapping
your_gbm_data <- RunAzimuth(your_gbm_data, # check https://satijalab.github.io/azimuth/reference/RunAzimuth.html for required input types
                            reference = 'path/to/azimuth_core_GBmap.rds', # RDS file provided in the zenodo GBmap repository
                            annotation.levels = c('annotation_level_3', 'annotation_level_4') 
                            # you can modify which annotation level you would like to include
                            )

## Visualization
p1 <- DimPlot(your_gbm_data, group.by = 'predicted.annotation_level_3', reduction = 'ref.umap',
        label = TRUE, label.size = 3) + NoLegend()
p2 <- DimPlot(your_gbm_data, group.by = 'predicted.annotation_level_4', reduction = 'ref.umap',
        label = TRUE, label.size = 3) + NoLegend()
p1 + p2


# Transfer learning usingn SCANVI (scArches) - Python

## Install scArches following the instructions detailed here: https://scarches.readthedocs.io/en/latest/installation.html

## Load packages
import scanpy as sc
import torch
import scarches as sca
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

## Load the core GBmap model and sett parameters
ref_path = 'path/to/unzipped/scarches_SCANVI_core_GBmap.zip'
source_adata = sc.read('path/to/scarches_core_GBmap.h5ad')

vae = sca.models.SCANVI.load(ref_path, source_adata) # unzip scarches-SCANVI_GBmap.zip 

cell_type_key = 'CellID'

surgery_epochs = 500
early_stopping_kwargs_surgery = {
    # Suggested values. For more info, please consult: https://scarches.readthedocs.io/en/latest/about.html
    "early_stopping_metric": "elbo",
    "save_best_state_metric": "elbo",
    "on": "full_dataset",
    "patience": 10,
    "threshold": 0.001,
    "reduce_lr_on_plateau": True,
    "lr_patience": 8,
    "lr_factor": 0.1,
}

## Load and prepare your data
target_adata = sc.read('path/to/your/data.h5ad) # read your data using scanpy (load AnnData object)
target_adata.layers["counts"] = target_adata.X.copy() # target_adata.X must be where your raw counts are stored
target_adata.obs[cell_type_key] = vae.unlabeled_category_

## Perform surgery on GBmap core reference and train on query dataset without cell type labels
model = sca.models.SCANVI.load_query_data(
    target_adata, 
    ref_path,
    freeze_dropout = True,
    inplace_subset_query_vars = True 
    # your adata must have the same gene length (5000) and names as the GBmap core (source_adata)
    # `inplace_subset_query_vars = True` will subset and reorder the vars to tarin the model properly
    # In case you want to subet and tidy up your AnnData object beforehand, you can use the file genes_for_mapping.csv uploaded in this repository
    # and the function `subset_and_pad_adata` developed by Lisa Sikkema and Theis team
    # You can find it here: https://github.com/LungCellAtlas/HLCA_reproducibility/blob/main/scripts/preprocessing.py 
)

model._unlabeled_indices = np.arange(target_adata.n_obs)
model._labeled_indices = []
print("Labelled Indices: ", len(model._labeled_indices))
print("Unlabelled Indices: ", len(model._unlabeled_indices))

model.train(
    n_epochs_semisupervised=surgery_epochs,
    train_base_model=False,
    semisupervised_trainer_kwargs=dict(metrics_to_monitor=["accuracy", "elbo"],
                                       weight_decay=0,
                                       early_stopping_kwargs=early_stopping_kwargs_surgery
                                      ),
    frequency=1
)

query_latent = sc.AnnData(model.get_latent_representation())
query_latent.obs["add_any_metadata_of_interest"] = target_adata.obs["add_any_metadata_of_interest"].tolist()
query_latent.obs['predictions'] = model.predict() # get predictions of cell anotation from the model

## Visualization
sc.pp.neighbors(query_latent)
sc.tl.leiden(query_latent)
sc.tl.umap(query_latent)

sc.pl.umap(query_latent,
           color=['predictions'],
           frameon=False,
           wspace=0.6,
           )

## Get latent representation of core GBmap + query dataset and compute new UMAP

adata_full = source_adata.concatenate(target_adata, batch_key="ref_query")
full_latent = sc.AnnData(model.get_latent_representation(adata=adata_full))

## Visualization
sc.pp.neighbors(full_latent)
sc.tl.leiden(full_latent)
sc.tl.umap(full_latent)

sc.pl.umap(full_latent,
           color=['any_metadata_of_interest'],
           frameon=False,
           wspace=0.6,
           )

## More information and examples on how to use scArches can be found in https://scarches.readthedocs.io/en/latest/


# Exploration of GB interactome with CellChat - R

## For vignettes on how to visualize cell-cell communication networks stored in the CellChat object (cellchat_GBmap.rds)
## please check: https://github.com/sqjin/CellChat