# Parameters
FILE = "SCENIC_SCope_output.loom"
# import dependencies
import os
import numpy as np
import pandas as pd
import scanpy as sc
import loompy as lp
import json
import base64
import zlib
from pyscenic.export import add_scenic_metadata
from pyscenic.cli.utils import load_signatures
import matplotlib as mpl
import matplotlib.pyplot as plt
from scanpy.plotting._tools.scatterplots import plot_scatter
import seaborn as sns
# plot settings
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)
# used for plotting DRs:
def colorMap( x, palette='bright' ):
import natsort
from collections import OrderedDict
#
n=len(set(x))
cpalette = sns.color_palette(palette,n_colors=n )
cdict = dict( zip( list(set(x)), cpalette ))
cmap = [ cdict[i] for i in x ]
cdict = OrderedDict( natsort.natsorted(cdict.items()) )
return cmap,cdict
def drplot( dr, colorlab, ax, palette='bright', title=None, **kwargs ):
cmap,cdict = colorMap( colorlab, palette )
for lab,col in cdict.items():
ix = colorlab.loc[colorlab==lab].index
ax.scatter( dr['X'][ix], dr['Y'][ix], c=[col]*len(ix), alpha=0.7, label=lab, edgecolors='none')
if( title is not None ):
ax.set_title(title, fontsize='x-large');
#
ax.set_xticks([])
ax.set_yticks([])
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
# used for heatmap
def palplot(pal, names, colors=None, size=1):
n = len(pal)
f, ax = plt.subplots(1, 1, figsize=(n * size, size))
ax.imshow(np.arange(n).reshape(1, n),
cmap=mpl.colors.ListedColormap(list(pal)),
interpolation="nearest", aspect="auto")
ax.set_xticks(np.arange(n) - .5)
ax.set_yticks([-.5, .5])
ax.set_xticklabels([])
ax.set_yticklabels([])
colors = n * ['k'] if colors is None else colors
for idx, (name, color) in enumerate(zip(names, colors)):
ax.text(0.0+idx, 0.0, name, color=color, horizontalalignment='center', verticalalignment='center')
return f
# regulon size barplot
def plotRegulonSize( rlen, title ):
x=np.arange(rlen.shape[0])
fig,ax = plt.subplots(figsize=(5,5), dpi=150)
ax.barh( y=x, width=rlen)
ax.set_yticks(x)#, rlen.index) #, rotation=0)
ax.set_yticklabels(rlen.index, minor=False)
ax.set_xlabel("# of target genes")
ax.set_ylabel("Transcription factor")
ax.set_ylim((-1,x.max()+1))
ax.set_title(title)
for i, v in enumerate(rlen):
ax.text(v + 0, i-0.1, str(v), color='blue', fontweight='bold',fontsize=10)
plt.rc('ytick', labelsize=10)
plt.tight_layout()
plt.show()
# scenic output
lf = lp.connect( FILE, mode='r', validate=False )
meta = json.loads(zlib.decompress(base64.b64decode( lf.attrs.MetaData )))
#exprMat = pd.DataFrame( lf[:,:], index=lf.ra.Gene, columns=lf.ca.CellID).T
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
# create a dictionary of regulons:
regulons = {}
for i,r in pd.DataFrame(lf.ra.Regulons,index=lf.ra.Gene).iteritems():
regulons[i] = list(r[r==1].index.values)
# capture embeddings:
dr = [
pd.DataFrame( lf.ca.Embedding, index=lf.ca.CellID )
]
dr_names = [
meta['embeddings'][0]['name'].replace(" ","_")
]
# add other embeddings
drx = pd.DataFrame( lf.ca.Embeddings_X, index=lf.ca.CellID )
dry = pd.DataFrame( lf.ca.Embeddings_Y, index=lf.ca.CellID )
for i in range( len(drx.columns) ):
dr.append( pd.concat( [ drx.iloc[:,i], dry.iloc[:,i] ], sort=False, axis=1, join='outer' ))
dr_names.append( meta['embeddings'][i+1]['name'].replace(" ","_").replace('/','-') )
# rename columns:
for i,x in enumerate( dr ):
x.columns = ['X','Y']
lf.close()
scanpy.AnnData object¶This can be done directly from the integrated loom file, with a few modifications to allow for SCENIC- and SCope-specific loom attributes:
adata = sc.read( FILE, validate=False)
# drop the embeddings and extra attributes from the obs object
adata.obs.drop( ['Embedding','Embeddings_X','Embeddings_Y','RegulonsAUC'], axis=1, inplace=True )
# add the embeddings into the adata.obsm object
for i,x in enumerate( dr ):
adata.obsm[ 'X_'+dr_names[i] ] = x.as_matrix()
sc.utils.sanitize_anndata( adata )
print(
f"There are {len(regulons)} total regulons")
rlen = pd.Series( [len(x) for x in regulons.values()], index=regulons.keys() ).sort_values(ascending=True)
plotRegulonSize(rlen.tail(20),"20 largest regulons")
plotRegulonSize(rlen.head(20),"20 smallest regulons")
plt.rcParams.update({'font.size':10})
fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,5), dpi=100 )
drplot( dr[0], colorlab=adata.obs['louvain'], ax=ax1, palette='bright', s=2, title='Highly variable genes - UMAP' )
drplot( dr[4], colorlab=adata.obs['louvain'], ax=ax2, palette='bright', s=2, title='SCENIC AUC - UMAP' )
ax2.legend(loc='right', bbox_to_anchor=(1.15, 0.5), ncol=1, markerscale=2, fontsize='x-large', frameon=False, title="Louvain\nclusters")
plt.tight_layout()
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_rss
import matplotlib.pyplot as plt
from adjustText import adjust_text
import seaborn as sns
rss = regulon_specificity_scores( auc_mtx, adata.obs['louvain'] )
rss
cats = sorted(list(set(adata.obs['louvain'])))
fig = plt.figure(figsize=(15, 8))
for c,num in zip(cats, range(1,len(cats)+1)):
x=rss.T[c]
ax = fig.add_subplot(2,5,num)
plot_rss(rss, c, top_n=5, max_n=None, ax=ax)
ax.set_ylim( x.min()-(x.max()-x.min())*0.05 , x.max()+(x.max()-x.min())*0.05 )
for t in ax.texts:
t.set_fontsize(12)
ax.set_ylabel('')
ax.set_xlabel('')
adjust_text(ax.texts, autoalign='xy', ha='right', arrowprops=dict(arrowstyle='-',color='lightgrey'), precision=0.001 )
fig.text(0.5, 0.0, 'Regulon', ha='center', va='center', size='x-large')
fig.text(0.00, 0.5, 'Regulon specificity score (RSS)', ha='center', va='center', rotation='vertical', size='x-large')
plt.tight_layout()
plt.rcParams.update({
'figure.autolayout': True,
'figure.titlesize': 'large' ,
#'axes.labelsize': 'x-large',
#'axes.titlesize':'x-large',
'xtick.labelsize':'large',
'ytick.labelsize':'large'
})
plt.show()
topreg = []
for i,c in enumerate(cats):
topreg.extend(
list(rss.T[c].sort_values(ascending=False)[:5].index)
)
topreg = list(set(topreg))
auc_mtx_Z = pd.DataFrame( index=auc_mtx.index )
for col in list(auc_mtx.columns):
auc_mtx_Z[ col ] = ( auc_mtx[col] - auc_mtx[col].mean()) / auc_mtx[col].std(ddof=0)
#auc_mtx_Z.sort_index(inplace=True)
colors = sns.color_palette('bright',n_colors=len(cats) )
colorsd = dict( zip( cats, colors ))
colormap = [ colorsd[x] for x in adata.obs['louvain'] ]
sns.set()
sns.set(font_scale=0.8)
fig = palplot( colors, cats, size=1.0)
g = sns.clustermap(auc_mtx_Z[topreg].T, annot=False, square=False, linecolor='gray',
xticklabels=False, vmin=-2, vmax=6, col_colors=colormap,
cmap="YlGnBu", figsize=(21,16) )
sns.set(font_scale=1.1)
g.cax.set_visible(True)
g.ax_heatmap.set_ylabel('')
g.ax_heatmap.set_xlabel('')