In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import scipy

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=300, facecolor='white')

In [None]:
results_file = 'RA_results.txt'  # the file that will store the analysis results

In [None]:
adata = sc.read_csv(filename='input_data/RA_scanpy_counts.csv')

In [None]:
adata

In [None]:
sc.pl.highest_expr_genes(adata, n_top=20, )

In [None]:
sc.pp.normalize_total(adata, target_sum=1e4)

In [None]:
sc.pp.log1p(adata)

In [None]:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)

In [None]:
sc.pl.highly_variable_genes(adata)

In [None]:
adata = adata[:, adata.var.highly_variable]

In [None]:
sc.pp.scale(adata, max_value=10)

In [None]:
sc.tl.pca(adata, svd_solver='arpack')

In [None]:
adata.var_names

In [None]:
sc.pl.pca_variance_ratio(adata, log=False)

In [None]:
adata.write(results_file)

In [None]:
adata

In [None]:
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=20)

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color = "SDC1")

In [None]:
sc.tl.leiden(adata)

In [None]:
sc.pl.umap(adata, color=['leiden'])

In [None]:
adata.write(results_file)

In [None]:
adata

In [None]:
# Extract PCA projection for SI computation
pca_data = adata.obsm['X_pca']
np.savetxt("scanpy_RA_pca.csv", pca_data, delimiter = ",")

In [None]:
anno = pd.read_csv('input_data/RA_scanpy_metadata.csv')

In [None]:
anno

In [None]:
adata.obs = anno

In [None]:
scanpy_umap = sc.pl.umap(adata, color=['Type'])


In [None]:
results_file = "RA_results.txt"

In [None]:
adata.write(results_file)

In [None]:
# Data integration using scanorama
import scanorama


In [None]:
adata_list = []
unique_plates = anno['Plate'].unique()

In [None]:
for plate in unique_plates:
    print(plate)
    adata_list.append(sc.read_csv(filename='input_data/scanpy_RA_{}_counts.csv'.format(plate)))

In [None]:
# Integration.
integrated = scanorama.integrate_scanpy(adata_list)


In [None]:
# Batch correction.
corrected = scanorama.correct_scanpy(adata_list)


In [None]:
corrected

In [None]:
# Integration and batch correction.
integrated, corrected = scanorama.correct_scanpy(adata_list, return_dimred=True)

In [None]:
integrated[1].shape

In [None]:
for i in range(0,25):
    np.savetxt('scanorama_RA_{}_dimred.csv'.format(unique_plates[i]), integrated[i], delimiter = ",")