In [1]:
# Parameters
FILE = "SCENIC_SCope_output.loom"

Single-Cell Report: SCENIC results

In [2]:
# import dependencies
import os
import numpy as np
import pandas as pd
import scanpy as sc
import loompy as lp
import json
import base64
import zlib
from pyscenic.export import add_scenic_metadata
from pyscenic.cli.utils import load_signatures
import matplotlib as mpl
import matplotlib.pyplot as plt
from scanpy.plotting._tools.scatterplots import plot_scatter
import seaborn as sns
/opt/venv/lib/python3.7/site-packages/dask/config.py:161: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
  data = yaml.load(f.read()) or {}
/opt/venv/lib/python3.7/site-packages/louvain/Optimiser.py:349: SyntaxWarning: assertion is always true, perhaps remove parentheses?
  assert(issubclass(partition_type, LinearResolutionParameterVertexPartition),

Plotting settings and functions

In [3]:
# plot settings
sc.set_figure_params(dpi=150, fontsize=10, dpi_save=600)
In [4]:
# used for plotting DRs:
def colorMap( x, palette='bright' ):
    import natsort
    from collections import OrderedDict
    #
    n=len(set(x))
    cpalette = sns.color_palette(palette,n_colors=n )
    cdict = dict( zip( list(set(x)), cpalette ))
    cmap = [ cdict[i] for i in x ]
    cdict = OrderedDict( natsort.natsorted(cdict.items()) )
    return cmap,cdict

def drplot( dr, colorlab, ax, palette='bright', title=None, **kwargs ):
    cmap,cdict = colorMap( colorlab, palette )
    for lab,col in cdict.items():  
        ix = colorlab.loc[colorlab==lab].index
        ax.scatter( dr['X'][ix], dr['Y'][ix], c=[col]*len(ix), alpha=0.7, label=lab, edgecolors='none')
    if( title is not None ):
        ax.set_title(title, fontsize='x-large');
    #
    ax.set_xticks([])
    ax.set_yticks([])
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.spines['left'].set_visible(False)
In [5]:
# used for heatmap
def palplot(pal, names, colors=None, size=1):
    n = len(pal)
    f, ax = plt.subplots(1, 1, figsize=(n * size, size))
    ax.imshow(np.arange(n).reshape(1, n),
              cmap=mpl.colors.ListedColormap(list(pal)),
              interpolation="nearest", aspect="auto")
    ax.set_xticks(np.arange(n) - .5)
    ax.set_yticks([-.5, .5])
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    colors = n * ['k'] if colors is None else colors
    for idx, (name, color) in enumerate(zip(names, colors)):
        ax.text(0.0+idx, 0.0, name, color=color, horizontalalignment='center', verticalalignment='center')
    return f
In [6]:
# regulon size barplot
def plotRegulonSize( rlen, title ):
    x=np.arange(rlen.shape[0])

    fig,ax = plt.subplots(figsize=(5,5), dpi=150)
    ax.barh( y=x, width=rlen)

    ax.set_yticks(x)#, rlen.index) #, rotation=0)
    ax.set_yticklabels(rlen.index, minor=False)
    ax.set_xlabel("# of target genes")
    ax.set_ylabel("Transcription factor")
    ax.set_ylim((-1,x.max()+1))
    ax.set_title(title)

    for i, v in enumerate(rlen):
        ax.text(v + 0, i-0.1, str(v), color='blue', fontweight='bold',fontsize=10)

    plt.rc('ytick', labelsize=10)

    plt.tight_layout()
    plt.show()

Extract relevant data from the integrated loom file

In [7]:
# scenic output
lf = lp.connect( FILE, mode='r', validate=False )
meta = json.loads(zlib.decompress(base64.b64decode( lf.attrs.MetaData )))
#exprMat = pd.DataFrame( lf[:,:], index=lf.ra.Gene, columns=lf.ca.CellID).T
auc_mtx = pd.DataFrame( lf.ca.RegulonsAUC, index=lf.ca.CellID)
In [8]:
# create a dictionary of regulons:
regulons = {}
for i,r in pd.DataFrame(lf.ra.Regulons,index=lf.ra.Gene).iteritems():
    regulons[i] =  list(r[r==1].index.values)
In [9]:
# capture embeddings:
dr = [
    pd.DataFrame( lf.ca.Embedding, index=lf.ca.CellID )
]
dr_names = [
    meta['embeddings'][0]['name'].replace(" ","_")
]

# add other embeddings
drx = pd.DataFrame( lf.ca.Embeddings_X, index=lf.ca.CellID )
dry = pd.DataFrame( lf.ca.Embeddings_Y, index=lf.ca.CellID )

for i in range( len(drx.columns) ):
    dr.append( pd.concat( [ drx.iloc[:,i], dry.iloc[:,i] ], sort=False, axis=1, join='outer' ))
    dr_names.append( meta['embeddings'][i+1]['name'].replace(" ","_").replace('/','-') )

# rename columns:
for i,x in enumerate( dr ):
    x.columns = ['X','Y']
In [10]:
lf.close()

Load this data into a scanpy.AnnData object

This can be done directly from the integrated loom file, with a few modifications to allow for SCENIC- and SCope-specific loom attributes:

In [11]:
adata = sc.read( FILE, validate=False)

# drop the embeddings and extra attributes from the obs object
adata.obs.drop( ['Embedding','Embeddings_X','Embeddings_Y','RegulonsAUC'], axis=1, inplace=True )
In [12]:
# add the embeddings into the adata.obsm object
for i,x in enumerate( dr ):
    adata.obsm[ 'X_'+dr_names[i] ] = x.as_matrix()
/opt/venv/lib/python3.7/site-packages/ipykernel_launcher.py:3: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  This is separate from the ipykernel package so we can avoid doing imports until
In [13]:
sc.utils.sanitize_anndata( adata )
... storing 'ClusterID' as categorical
... storing 'Clusterings' as categorical
... storing 'chromium_chemistry' as categorical
... storing 'id' as categorical
... storing 'louvain' as categorical
... storing 'ClusterMarkers_0' as categorical
... storing 'ClusterMarkers_0_avg_logFC' as categorical
... storing 'ClusterMarkers_0_pval' as categorical
... storing 'Regulons' as categorical

Regulon summary

In [14]:
print(
f"There are {len(regulons)} total regulons")
There are 401 total regulons

Show the top 20 and bottom 20 regulons by size

In [15]:
rlen = pd.Series( [len(x) for x in regulons.values()], index=regulons.keys() ).sort_values(ascending=True)

plotRegulonSize(rlen.tail(20),"20 largest regulons")
plotRegulonSize(rlen.head(20),"20 smallest regulons")

Dimensionality reduction plots

Show both highly variable genes and SCENIC UMAP plots with Louvain clustering side-by-side

In [16]:
plt.rcParams.update({'font.size':10})

fig, (ax1,ax2) = plt.subplots(1,2, figsize=(10,5), dpi=100 )

drplot( dr[0], colorlab=adata.obs['louvain'], ax=ax1, palette='bright', s=2, title='Highly variable genes - UMAP' )

drplot( dr[4], colorlab=adata.obs['louvain'], ax=ax2, palette='bright', s=2, title='SCENIC AUC - UMAP' )
ax2.legend(loc='right', bbox_to_anchor=(1.15, 0.5), ncol=1, markerscale=2, fontsize='x-large', frameon=False, title="Louvain\nclusters")

plt.tight_layout()

Regulon specificity scores (RSS) across Louvain clusters

In [17]:
from pyscenic.rss import regulon_specificity_scores
from pyscenic.plotting import plot_rss
import matplotlib.pyplot as plt
from adjustText import adjust_text
import seaborn as sns

Calculate RSS

In [18]:
rss = regulon_specificity_scores( auc_mtx, adata.obs['louvain'] )
rss
Out[18]:
ADNP2_(+)-motif ARID3A_(+)-motif ARID5B_(+)-motif ARNT_(+)-motif ATF1_(+)-motif ATF2_(+)-motif ATF3_(+)-motif ATF4_(+)-motif ATF6_(+)-motif ATF6B_(+)-motif ... ZFP1_(+)-track ZHX2_(+)-track ZNF133_(+)-track ZNF16_(+)-track ZNF302_(+)-track ZNF34_(+)-track ZNF544_(+)-track ZNF626_(+)-track ZNF639_(+)-track ZSCAN29_(+)-track
1 0.358985 0.329396 0.313704 0.393491 0.388205 0.356750 0.523774 0.403685 0.407430 0.235039 ... 0.486147 0.295498 0.274563 0.362635 0.178708 0.183651 0.227356 0.188607 0.322692 0.444228
0 0.213841 0.324653 0.377387 0.431896 0.451711 0.391064 0.364462 0.443175 0.380241 0.264563 ... 0.311880 0.437655 0.272123 0.383073 0.201654 0.212950 0.253983 0.223400 0.450338 0.323341
4 0.183134 0.222548 0.252104 0.218407 0.248943 0.291020 0.235568 0.250587 0.227496 0.222730 ... 0.202836 0.253597 0.213111 0.239870 0.178377 0.175026 0.217930 0.185015 0.256334 0.246459
3 0.206936 0.244428 0.296780 0.266223 0.291138 0.285302 0.257312 0.290702 0.259587 0.225311 ... 0.234477 0.302752 0.222117 0.252906 0.185109 0.187852 0.191487 0.199924 0.288824 0.261987
2 0.207788 0.404323 0.396465 0.300505 0.318207 0.309961 0.281228 0.306863 0.326091 0.227912 ... 0.255227 0.315415 0.276213 0.343911 0.187872 0.472333 0.210558 0.229719 0.286091 0.230934
5 0.243287 0.182564 0.204071 0.202490 0.208569 0.208639 0.227466 0.213704 0.205255 0.183728 ... 0.225572 0.192193 0.215382 0.222953 0.167445 0.193382 0.178986 0.195027 0.214776 0.207714

6 rows × 401 columns

RSS panel plot with all cell types

In [19]:
cats = sorted(list(set(adata.obs['louvain'])))

fig = plt.figure(figsize=(15, 8))
for c,num in zip(cats, range(1,len(cats)+1)):
    x=rss.T[c]
    ax = fig.add_subplot(2,5,num)
    plot_rss(rss, c, top_n=5, max_n=None, ax=ax)
    ax.set_ylim( x.min()-(x.max()-x.min())*0.05 , x.max()+(x.max()-x.min())*0.05 )
    for t in ax.texts:
        t.set_fontsize(12)
    ax.set_ylabel('')
    ax.set_xlabel('')
    adjust_text(ax.texts, autoalign='xy', ha='right', arrowprops=dict(arrowstyle='-',color='lightgrey'), precision=0.001 )
 
fig.text(0.5, 0.0, 'Regulon', ha='center', va='center', size='x-large')
fig.text(0.00, 0.5, 'Regulon specificity score (RSS)', ha='center', va='center', rotation='vertical', size='x-large')
plt.tight_layout()
plt.rcParams.update({
    'figure.autolayout': True,
        'figure.titlesize': 'large' ,
        #'axes.labelsize': 'x-large',
        #'axes.titlesize':'x-large',
        'xtick.labelsize':'large',
        'ytick.labelsize':'large'
        })
plt.show()

Select the top 5 regulons from each cell type

In [20]:
topreg = []
for i,c in enumerate(cats):
    topreg.extend(
        list(rss.T[c].sort_values(ascending=False)[:5].index)
    )
topreg = list(set(topreg))

Generate a Z-score for each regulon to enable comparison between regulons

In [21]:
auc_mtx_Z = pd.DataFrame( index=auc_mtx.index )
for col in list(auc_mtx.columns):
    auc_mtx_Z[ col ] = ( auc_mtx[col] - auc_mtx[col].mean()) / auc_mtx[col].std(ddof=0)
#auc_mtx_Z.sort_index(inplace=True)

Generate a heatmap

In [22]:
colors = sns.color_palette('bright',n_colors=len(cats) )
colorsd = dict( zip( cats, colors ))
colormap = [ colorsd[x] for x in adata.obs['louvain'] ]
In [23]:
sns.set()
sns.set(font_scale=0.8)
fig = palplot( colors, cats, size=1.0)
/opt/venv/lib/python3.7/site-packages/matplotlib/figure.py:2369: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  warnings.warn("This figure includes Axes that are not compatible "
In [24]:
g = sns.clustermap(auc_mtx_Z[topreg].T, annot=False,  square=False,  linecolor='gray',
    xticklabels=False, vmin=-2, vmax=6, col_colors=colormap,
    cmap="YlGnBu", figsize=(21,16) )
sns.set(font_scale=1.1)
g.cax.set_visible(True)
g.ax_heatmap.set_ylabel('')    
g.ax_heatmap.set_xlabel('')    
Out[24]:
Text(0.5, 351.66666666666674, '')
In [ ]: