import scanpy as sc
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import scrublet as scr

%matplotlib inline
plt.rcParams['pdf.fonttype'] = 42

def us(adata,gene,groups=None, show=False, exclude =None,figsize=None,**kwargs):
    from matplotlib import rcParams
    if figsize:
        rcParams['figure.figsize'] = figsize
    if ',' in gene:
        gene = gene.split(',')
    if groups:
        if ',' in groups:groups = groups.split(',')
        sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
    
    else:
        if exclude: # list to not show
            groups = [x for x in set(adata.obs[gene]) if x not in exclude]
            sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
        else:
            sc.pl.umap(adata,color=gene,color_map='OrRd',show=show, **kwargs)
    rcParams['figure.figsize'] = [3,3]

def dotraw(adata, value, key):
    if value not in adata.obs.columns:
        if value not in adata.raw.var_names:
            print('no'); return
        idx = adata.raw.var_names.get_loc(value)
        array = adata.raw.X.toarray()[:,idx]
    else: 
        array = np.array(adata.obs[value])
    df = pd.DataFrame(array, index=adata.obs.index).merge(adata.obs[[key]], 
                                                               left_index=True, right_index=True)
    df['mask'] = df[0].where(df[0] == 0, 1)
    
    ratio = df.groupby(key)['mask'].mean()
    mean = df.groupby(key)[0].mean()
    
    res = pd.concat([ratio,mean], axis=1)
    res.columns = ['ratio', 'mean']
    
    return res

def us(adata,gene,groups=None, show=False, exclude =None,figsize=None,**kwargs):
    from matplotlib import rcParams
    if figsize:
        rcParams['figure.figsize'] = figsize
    else: rcParams['figure.figsize'] = (3,3)
    if ',' in gene:
        gene = gene.split(',')
    if groups:
        sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
    else:
        if exclude: # list to not show
            groups = [x for x in set(adata.obs[gene]) if x not in exclude]
            sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
        else:
            sc.pl.umap(adata,color=gene,color_map='OrRd',show=show, **kwargs)
    
def size(a,b): plt.gcf().set_size_inches(a,b)

def bind(adata, a,b):
    order = []
    for x in adata.obs[a].cat.categories:
        for y in adata.obs[b].cat.categories:
            order.append(x+'_'+y)
            
    
    adata.obs[a+'_'+b] = [x+'_'+y for x,y in zip(adata.obs[a], adata.obs[b])]
    adata.obs[a+'_'+b] = adata.obs[a+'_'+b].astype('category')
    order = [x for x in order if x in set(adata.obs[a+'_'+b])]
    adata.obs[a+'_'+b] = adata.obs[a+'_'+b].cat.reorder_categories(order)

def deg_single(adata, key, target, reference, method='wilcoxon', log=False, filter_res=False, pvalcut = 0.05, foldcut = 1):
    import sys
    sc.tl.rank_genes_groups(adata, key, groups = [target, reference], reference=reference, method='wilcoxon')
    name = pd.DataFrame(adata.uns['rank_genes_groups']['names'])
    logf = pd.DataFrame(adata.uns['rank_genes_groups']['logfoldchanges'])
    pval = pd.DataFrame(adata.uns['rank_genes_groups']['pvals_adj'])

    df = pd.concat([name,logf,pval], axis=1)
    df.columns = ['name','logFC', 'pval']
    if log:
        df['logpval'] = [np.log2(sys.float_info.max-1) if x == 0 else -np.log2(x) for x in df['pval']]
    if filter_res:
        df = df[df.pval < pvalcut].copy()
        df = df[abs(df.logpval) > foldcut].copy()

    return df

def degplot(deg, topgene=20, dotsize=3, xlim=None, pvalcut=0.05, foldcut=0):
    import seaborn as sns
    import matplotlib.pyplot as plt
    from adjustText import adjust_text

    
    if 'logpval' not in deg.columns:
        deg['logpval'] = [np.log2(sys.float_info.max-1) if x == 0 else -np.log2(x) for x in deg['pval']]
    

    deg = deg[deg.pval < pvalcut].copy()
    deg = deg[abs(deg.logpval) > foldcut].copy()
    deg = deg.sort_values("logFC", ascending=False)
    
    
    sns.scatterplot(data = deg, x='logFC', y='logpval', s=dotsize, color='grey')
    lim = np.max([abs(deg['logFC'].max()), abs(deg['logFC'].min())])
    plt.xlim(-lim-1,lim+1)
    if xlim:
        plt.xlim(-xlim,xlim)
    plt.axvline(x=0, color='black', ls='--', lw=0.7)
    
    for i in range(topgene):
        if deg[(deg.pval < pvalcut) & (deg.logFC > foldcut)].shape[0] < i+1: continue
        plt.text(deg.loc[deg.index[i],'logFC'], deg.loc[deg.index[i], 'logpval'], 
                 deg.loc[deg.index[i], 'name'], fontsize=6, color='red')
    for i in range(topgene):
        if deg[(deg.pval < pvalcut) & (deg.logFC < foldcut)].shape[0] < i+1: continue
        plt.text(deg.loc[deg.index[-(i+1)],'logFC'], deg.loc[deg.index[-(i+1)], 'logpval'], 
                 deg.loc[deg.index[-(i+1)], 'name'], fontsize=6, color='blue')
        
        
    
    
    adjust_text(plt.gca().texts, arrowprops=dict(arrowstyle='-', lw=0.3, color='grey'), precision = 0.3)
    

        
    plt.ylabel('log [Adjusted P-value]')
    plt.xlabel('log [Fold Changes]')
    
    sns.despine(left=True)
    
    for i in range(topgene):
        if deg[(deg.pval < pvalcut) & (deg.logFC > foldcut)].shape[0] < i+1: continue
        plt.plot(deg.loc[deg.index[i],'logFC'], deg.loc[deg.index[i], 'logpval'], 
                 color='red', markersize=dotsize/3, marker='o')
    for i in range(topgene):
        if deg[(deg.pval < pvalcut) & (deg.logFC < foldcut)].shape[0] < i+1: continue
        plt.plot(deg.loc[deg.index[-(i+1)],'logFC'], deg.loc[deg.index[-(i+1)], 'logpval'], 
                 color='blue', markersize=dotsize/3, marker='o')

plt.rcParams['figure.dpi'] = 300


# Setup
import importlib
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt

from sccoda.util import comp_ana as mod
from sccoda.util import cell_composition_data as dat
from sccoda.util import data_visualization as viz

import sccoda.datasets as scd

2024-05-01 01:30:58.968196: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-01 01:30:59.048697: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-01 01:30:59.048732: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-01 01:30:59.053617: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-01 01:30:59.073778: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-01 01:30:59.074430: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-01 01:30:59.832671: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT


muscle_original = sc.read('data/muscle_230426_final_data_uptobatch7.h5ad')


cmapdic = dict(zip(muscle_original.obs['annotation'].cat.categories,
                  muscle_original.uns['annotation_colors']))


pre = muscle_original.copy()


df1 = pd.crosstab(pre.obs['Identity'], pre.obs['annotation'])
df1['treat_time'] = ['_'.join(x.split("_")[:2]) for x in df1.index]


# We are analyzing 2 hour and 16 hour samples

df1 = df1[df1['treat_time'] != 'Subunit_16hr'].copy()
df2 = df1[df1['treat_time'] != 'Spike_40hr'].copy()

df3 = df2.melt("treat_time")


df3.head()


df3['treat_time'] = df3['treat_time'].astype('category').cat.reorder_categories('PBS_16hr,LNP_2hr,Spike_2hr,LNP_16hr,Spike_16hr'.split(","))


mdf = df2.groupby("treat_time").mean()


base = dict(mdf.loc['PBS_16hr'])


ndf3 = df3.copy()
ndf3['value'] = [i / base[j] for i,j in zip(ndf3['value'], ndf3['annotation'])]


import seaborn as sns


sns.boxplot(data = df3[df3['annotation'].isin('Neutrophil,Monocyte,T-CD8'.split(","))],
            x = 'annotation', y = 'value', hue = 'treat_time', palette = cmap, boxprops = dict(alpha = .7),
           legend = False)
sns.stripplot(data = df3[df3['annotation'].isin('Neutrophil,Monocyte,T-CD8'.split(","))],
            x = 'annotation', y = 'value', hue = 'treat_time', palette = cmap, dodge=True)
plt.legend(bbox_to_anchor=(1,.5), loc = 'center left')
plt.axhline(1000, ls = '--', c = 'grey')
plt.ylim(0)
sns.despine()
plt.xlabel('')
size(6,2.5)
plt.savefig('fig/R_count.pdf', bbox_inches='tight')


f, axes = plt.subplots(1,2, sharey=True)
cmap = 'inferno'

sns.boxplot(data = df3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, 
           palette=cmap, ax = axes[0], legend=False)
sns.stripplot(data = df3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, jitter=False,
             palette=cmap, ax = axes[0], legend=False)


sns.boxplot(data = ndf3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, legend=False,
           palette=cmap, ax = axes[1])
sns.stripplot(data = ndf3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, jitter=False,
             palette=cmap, ax = axes[1])
plt.axvline(1, ls = '--', color='grey')



sns.despine()
axes[0].set_ylabel("")
plt.legend(bbox_to_anchor=(1,1))
size(3,8)


data_all = dat.from_pandas(df1, covariate_columns=["treat_time"])


data_all

AnnData object with n_obs × n_vars = 10 × 22
    obs: 'treat_time'


scdic = {}


data_spike = data_all[data_all.obs['treat_time'].isin('PBS_16hr,LNP_2hr,Spike_2hr'.split(","))].copy()


data_spike.obs['treat_time'] = data_spike.obs['treat_time'].astype('category').cat.rename_categories({
    'PBS_16hr':'PBS',
    'LNP_2hr':'LNP',
    'Spike_2hr':'LNP+mRNA'
})


data_spike.obs['treat_time'] = data_spike.obs['treat_time'].astype('category').cat.reorder_categories(
    'PBS,LNP,LNP+mRNA'.split(","))


data_in = data_spike[data_spike.obs['treat_time'].isin('PBS,LNP+mRNA'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()

scdic['Spike_2hr'] = sim_results.copy()

Automatic reference selection! Reference cell type set to DC-cDC1
Zero counts encountered in data! Added a pseudocount of 0.5.

2024-05-01 01:32:07.849285: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-01 01:32:07.849596: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  0%|                                                                                                                        | 0/20000 [00:00<?, ?it/s]2024-05-01 01:32:09.602500: I external/local_xla/xla/service/service.cc:168] XLA service 0xaabb690 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2024-05-01 01:32:09.602546: I external/local_xla/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2024-05-01 01:32:09.657089: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
I0000 00:00:1714494730.146518 1555467 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2024-05-01 01:32:10.159893: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:03<00:00, 316.79it/s]

MCMC sampling finished. (81.609 sec)
Acceptance rate: 42.8%


data_in = data_spike[data_spike.obs['treat_time'].isin('PBS,LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()

scdic['LNP_2hr'] = sim_results.copy()

Automatic reference selection! Reference cell type set to Mast

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:03<00:00, 315.59it/s]

MCMC sampling finished. (81.022 sec)
Acceptance rate: 72.2%


data_in = data_spike[data_spike.obs['treat_time'].isin('LNP+mRNA,LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()

scdic['SvL_2hr']  = sim_results.copy()

Automatic reference selection! Reference cell type set to DC-migratory
Zero counts encountered in data! Added a pseudocount of 0.5.

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:05<00:00, 303.13it/s]

MCMC sampling finished. (84.574 sec)
Acceptance rate: 50.7%


data_spike = data_all[data_all.obs['treat_time'].isin('PBS_16hr,LNP_16hr,Spike_16hr'.split(","))]


data_spike.obs['treat_time'] = data_spike.obs['treat_time'].astype('category').cat.rename_categories({
    'PBS_16hr':'PBS',
    'LNP_16hr':'LNP',
    'Spike_16hr':'LNP+mRNA'
})


data_spike.obs['treat_time'] = data_spike.obs['treat_time'].astype('category').cat.reorder_categories(
    'PBS,LNP,LNP+mRNA'.split(","))


data_in = data_spike[data_spike.obs['treat_time'].isin('PBS,LNP+mRNA'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()

scdic['Spike_16hr'] = sim_results.copy()

Automatic reference selection! Reference cell type set to DC-cDC1
Zero counts encountered in data! Added a pseudocount of 0.5.

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:06<00:00, 300.64it/s]

MCMC sampling finished. (84.660 sec)
Acceptance rate: 61.6%


data_in = data_spike[data_spike.obs['treat_time'].isin('PBS,LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()

scdic['LNP_16hr'] = sim_results.copy()

Automatic reference selection! Reference cell type set to T-prolif
Zero counts encountered in data! Added a pseudocount of 0.5.

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:04<00:00, 311.27it/s]

WARNING:tensorflow:5 out of the last 5 calls to <function CompositionalModel.sampling.<locals>.sample_mcmc at 0x7fcdbc6a1f70> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
MCMC sampling finished. (82.370 sec)
Acceptance rate: 47.2%


data_in = data_spike[data_spike.obs['treat_time'].isin('LNP+mRNA,LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()

scdic['SvL_16hr']  = sim_results.copy()

Automatic reference selection! Reference cell type set to Schwann-non-myelinating
Zero counts encountered in data! Added a pseudocount of 0.5.

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:06<00:00, 302.08it/s]

WARNING:tensorflow:6 out of the last 6 calls to <function CompositionalModel.sampling.<locals>.sample_mcmc at 0x7fcdbc57fca0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
MCMC sampling finished. (84.937 sec)
Acceptance rate: 59.2%


fdr_cut = .05


dfls = []
for cond in scdic.keys():
    if cond.startswith('SvL'): continue
    simres = scdic[cond].copy()
    simres.set_fdr(est_fdr=fdr_cut)
    dfx = simres.effect_df[['log2-fold change', 'Final Parameter']]
    dfx['Final'] = [i if j != 0 else 0 for i,j in zip(dfx['log2-fold change'], dfx['Final Parameter'])]
    dfx = dfx.reset_index().set_index('Cell Type')['Final'].copy()
    dfls.append(dfx)


simres = pd.concat(dfls, axis = 1).copy()


simres.columns = [x for x in scdic.keys() if not x.startswith("SvL")]


simres


df1 = pd.crosstab(pre.obs['Identity'], pre.obs['annotation'], normalize=0)


df1['treat_time'] = ['_'.join(x.split("_")[:2]) for x in df1.index]


df1 = df1[df1['treat_time'] != 'Subunit_16hr'].copy()

df1


df2 = df1[df1['treat_time'] != 'Spike_40hr'].copy()


df3 = df2.melt("treat_time")


df3.head()


df3['treat_time'] = df3['treat_time'].astype('category').cat.reorder_categories('PBS_16hr,LNP_2hr,Spike_2hr,LNP_16hr,Spike_16hr'.split(","))


mdf = df2.groupby("treat_time").mean()


base = dict(mdf.loc['PBS_16hr'])


ndf3 = df3.copy()
ndf3['value'] = [i / base[j] for i,j in zip(ndf3['value'], ndf3['annotation'])]


dummy = pd.DataFrame(index = simres.index, columns = ['PBS_16hr'])


simres1 = pd.concat([simres, dummy], axis = 1).copy()


df4 = simres1.reset_index().melt("Cell Type")


df4['variable'] = df4['variable'].astype('category').cat.reorder_categories('PBS_16hr,LNP_2hr,Spike_2hr,LNP_16hr,Spike_16hr'.split(","))


cats = df4['variable'].cat.categories


simres


ndf3


from math import ceil, floor


f, axes = plt.subplots(1,5, sharey=True,
                      width_ratios=[1,.5,.5,.5,.5])
cmap = 'inferno'

sns.boxplot(data = df3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, 
           palette=cmap, ax = axes[0], legend=False)
sns.stripplot(data = df3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, jitter=False,
             palette=cmap, ax = axes[0], legend=False)


# sns.boxplot(data = ndf3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, legend=False,
#            palette=cmap, ax = axes[1])
# sns.stripplot(data = ndf3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, jitter=False,
#              palette=cmap, ax = axes[1])
# axes[1].axvline(1, ls = '--', color='grey')

n_idx = 0

for cat in cats:
    if cat in 'PBS_16hr'.split(","): continue
    n_idx += 1
    sns.barplot(data = df4[df4.variable == cat], x = 'value', y = 'Cell Type', hue = 'variable', dodge=False, 
           palette=cmap, ax = axes[n_idx], edgecolor='black', lw = .5)
    axes[n_idx].legend([],[],frameon=False)
    if simres[cat].sum() == 0: 
        axes[n_idx].set_xlim(0, 1)
    else:
        axes[n_idx].set_xticks([floor(df4[df4.variable == cat]['value'].min()),
                                0,
                                ceil(df4[df4.variable == cat]['value'].max())])
        axes[n_idx].axvline(0, c= 'black', lw = .5)

sns.despine()
axes[0].set_ylabel("")
axes[1].legend(bbox_to_anchor=(-5,1))
size(5,5)

plt.savefig('fig/R_sccoda.pdf', bbox_inches='tight')


dic2 = {}


data_spike.obs['treat_time']

Identity
LNP_16hr_batch2_M_LNP_1st                  LNP
LNP_16hr_batch3_M-LNP_2nd                  LNP
PBS_16hr_batch2_M_PBS_1st                  PBS
Spike_16hr_batch2_M_spike_late_1st    LNP+mRNA
Spike_16hr_batch3_M-SL_2nd            LNP+mRNA
Name: treat_time, dtype: category
Categories (3, object): ['PBS', 'LNP', 'LNP+mRNA']


data_spike.obs['shot'] = [i.split("_")[-1] for i in data_spike.obs.index]


data_spike.X.max()

3072.0


data_in = data_spike[data_spike.obs['treat_time'].isin('LNP+mRNA'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="shot")
sim_results = model_spike.sample_hmc()

dic2['1vs2_mRNA'] = sim_results.copy()

Automatic reference selection! Reference cell type set to T-prolif
Zero counts encountered in data! Added a pseudocount of 0.5.

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:02<00:00, 319.66it/s]

MCMC sampling finished. (79.988 sec)
Acceptance rate: 52.9%


data_in = data_spike[data_spike.obs['treat_time'].isin('LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="shot")
sim_results = model_spike.sample_hmc()

dic2['1vs2_LNP'] = sim_results.copy()

Automatic reference selection! Reference cell type set to B-prolif
Zero counts encountered in data! Added a pseudocount of 0.5.

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:02<00:00, 318.04it/s]

MCMC sampling finished. (81.938 sec)
Acceptance rate: 67.3%


dfls = []
for cond in dic2.keys():
    if cond.startswith('SvL'): continue
    simres = dic2[cond].copy()
    simres.set_fdr(est_fdr=fdr_cut)
    dfx = simres.effect_df[['log2-fold change', 'Final Parameter']]
    dfx['Final'] = [i if j != 0 else 0 for i,j in zip(dfx['log2-fold change'], dfx['Final Parameter'])]
    dfx = dfx.reset_index().set_index('Cell Type')['Final'].copy()
    dfls.append(dfx)


simres = pd.concat(dfls, axis = 1).copy()


simres.columns = [x for x in dic2.keys() if not x.startswith("SvL")]


data_spike.to_df()['B-prolif'].sort_values()

Identity
LNP_16hr_batch3_M-LNP_2nd               8.0
Spike_16hr_batch2_M_spike_late_1st      8.0
LNP_16hr_batch2_M_LNP_1st              11.0
PBS_16hr_batch2_M_PBS_1st              36.0
Spike_16hr_batch3_M-SL_2nd            150.0
Name: B-prolif, dtype: float32


data_spike.to_df()['Monocyte']

Identity
LNP_16hr_batch2_M_LNP_1st             585.0
LNP_16hr_batch3_M-LNP_2nd             283.0
PBS_16hr_batch2_M_PBS_1st             141.0
Spike_16hr_batch2_M_spike_late_1st    679.0
Spike_16hr_batch3_M-SL_2nd            174.0
Name: Monocyte, dtype: float32


cdf = data_spike.to_df().copy()
cdf = (cdf.T / cdf.T.sum()).T
cdf.index = ['_'.join(i.split("_")[:1]+[i.split("_")[-1]]) if not i.startswith("PBS") else
             'PBS' for i in cdf.index]
cdf = cdf.reset_index()
cdf['index'] = cdf['index'].astype('category').cat.reorder_categories('PBS,LNP_1st,LNP_2nd,Spike_1st,Spike_2nd'.split(","))
cdf = cdf.sort_values("index")


cdf1 = cdf.melt('index')


cellorder = cdf.mean(numeric_only=True).sort_values(ascending=False).index


cmapcond = {
    'PBS':'darkgrey',
    'LNP_1st':'bisque',
    'LNP_2nd':'orange',
    'Spike_1st':'lightskyblue',
    'Spike_2nd':'royalblue'
}


sns.barplot(data = cdf1, x= 'value', y = 'annotation', hue='index', 
            width = 1, linewidth = 0,
           order=cellorder, palette=cmapcond)
sns.despine()


cdf2 = cdf.set_index("index")


enr1 = (cdf2.loc['LNP_2nd'] / (cdf2.loc['LNP_1st'])).fillna(0).to_frame()
enr2 = (cdf2.loc['Spike_2nd'] / (cdf2.loc['Spike_1st'])).fillna(0).to_frame()


enr1 = (cdf2.loc['LNP_2nd'] / (cdf2.loc['LNP_1st'] + cdf2.loc['LNP_2nd'])).fillna(0) * 2
enr2 = (cdf2.loc['Spike_2nd'] / (cdf2.loc['Spike_1st'] + cdf2.loc['Spike_2nd'])).fillna(0) * 2


enr1 = enr1.to_frame()
enr2 = enr2.to_frame()
enr1.columns = ['ratio']
enr2.columns = ['ratio']


f, ax = plt.subplots(1,5, sharey = True, width_ratios=[1.5,1,1,.5,.5])

sns.barplot(data = cdf1, x= 'value', y = 'annotation', hue='index', 
            width = 1, linewidth = 0,
           order=cellorder, palette=cmapcond, ax = ax[0])

sns.barplot(data = enr1.reset_index(), x= 'ratio', y = 'annotation',
           order = cellorder, color = cmapcond['LNP_2nd'],
           edgecolor='black', ax = ax[1])

sns.barplot(data = enr2.reset_index(), x= 'ratio', y = 'annotation',
           order = cellorder, color = cmapcond['Spike_2nd'],
           edgecolor='black', ax = ax[2])


sns.barplot(data = simres.reset_index(), x = '1vs2_LNP', y = 'Cell Type',
           color = cmapcond['LNP_2nd'], ax = ax[3], edgecolor='black')
sns.barplot(data = simres.reset_index(), x = '1vs2_mRNA', y = 'Cell Type',
           color = cmapcond['Spike_2nd'], ax = ax[4], edgecolor='black')

ax[0].legend(title = '', loc = 'lower right')

ax[0].set_ylabel("")
ax[1].axvline(1, color='black', ls = '--')
ax[2].axvline(1, color='black', ls = '--')

ax[1].set_xlim(0,2)
ax[2].set_xlim(0,2)
sns.despine()
size(10,5)
plt.savefig('fig/R_shot12_barplot.pdf', bbox_inches='tight')

pre

AnnData object with n_obs × n_vars = 83094 × 32285
    obs: 'dose', 'time', 'name', 'n_counts', 'batch', 'treat', 'Sample', 'doublet_scores', 'n_genes', 'shot', 'Identity', 'mito', 'predicted_doublets', 'Spike', 'Pfizer', 'Adeno', 'is_Spike', 'is_Pfizer', 'is_Adeno', 'genotype', 'Spike_log1p', 'annotation', 'treat_time', 'treat_time_shot'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'Identity_colors', 'annotation_colors', 'hvg', 'neighbors', 'pca', 'time_colors', 'treat_colors', 'umap', 'treat_time_colors', 'treat_time_shot_colors'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'


bind(pre, 'treat_time', 'shot')


cmapdic2 = dict(zip(pre.obs['treat_time_shot'].cat.categories,
                   pre.uns['treat_time_shot_colors']))


cmapdic2['PBS_16hr_1st'] = 'grey'
cmapdic2['LNP_16hr_1st'] = 'bisque'
cmapdic2['LNP_16hr_2nd'] = 'orange'
cmapdic2['LNP+mRNA_16hr_1st'] = 'lightskyblue'
cmapdic2['LNP+mRNA_16hr_2nd'] = 'royalblue'


cats = 'PBS_16hr_1st,LNP_16hr_1st,LNP_16hr_2nd,LNP+mRNA_16hr_1st,LNP+mRNA_16hr_2nd'.split(",")


cat = cats[0]


for cat in cats:
    sc.pl.umap(pre, color='treat_time_shot', groups = cat,
               palette=cmapdic2, show = False, frameon=False, zorder = -1)
    size(2,2)
    plt.gca().set_rasterization_zorder(0)
    plt.title(cat)
    plt.savefig('fig/R_shot12_UMAP_%s.pdf'%(cat), bbox_inches='tight')


cellorder1 = [
    'Fibroblast',
    'Endothelial',
    'Mural',
    'Monocyte',
    'T-CD4',
 'B-naive',
 'T-CD8',
 'Macrophage',
  'Muscle-skeletal-mature', 
 'Neutrophil',
  'DC-plasmacytoid',
    'Schwann-myelinating',
    'Mast',
 'NK',
 'Muscle-skeletal-progenitor',
 'T-prolif',
    'Schwann-non-myelinating',
 'Tenocyte',
     'B-prolif',
 'DC-migratory'
 ]


pca = sc.read('data/PCA_revision_total_240503.h5ad')

pca

AnnData object with n_obs × n_vars = 123 × 32285
    obs: 'treat', 'time', 'batch', 'celltype', 'batch_treat_time', 'imported', 'condition'
    uns: 'batch_colors', 'celltype_colors', 'condition_colors', 'time_colors', 'treat_colors'
    obsm: 'X_sPC'


sc.pl.embedding(pca, basis='sPC', color = 'treat')


bdata = pca[pca.obs['treat'].str.startswith("ALC")].copy()


bdata

AnnData object with n_obs × n_vars = 74 × 32285
    obs: 'treat', 'time', 'batch', 'celltype', 'batch_treat_time', 'imported', 'condition'
    uns: 'batch_colors', 'celltype_colors', 'condition_colors', 'time_colors', 'treat_colors'
    obsm: 'X_sPC'


dfco = pd.DataFrame(bdata.obsm['X_sPC'], index = bdata.obs.index).copy()
dfco = dfco.iloc[:,:2].copy()
dfco.columns = ['PC1', 'PC2']


dfco = dfco.merge(bdata.obs, left_index=True, right_index=True, how = 'left')
dfco['shot'] = [i.split("_")[-2] for i in dfco.index]
rename = {
    'ALC-0315':'LNP',
    'ALC-0315+mRNA':'LNP+mRNA'}
dfco['treat'] = dfco['treat'].map(rename)

dfco['treat_shot'] = ['_'.join([x,y]) for x,y in zip(dfco['treat'], dfco['shot'])]
dfco['celltype'] = dfco['celltype'].astype('category').cat.reorder_categories(cellorder1)
dfco['treat_shot'] = dfco['treat_shot'].astype('category').cat.reorder_categories('LNP_1st,LNP_2nd,LNP+mRNA_1st,LNP+mRNA_2nd'.split(","))


cmapdic3 = {}
cmapdic3['LNP_1st'] = cmapdic2['LNP_16hr_1st']
cmapdic3['LNP_2nd'] = cmapdic2['LNP_16hr_2nd']
cmapdic3['LNP+mRNA_1st'] = cmapdic2['LNP+mRNA_16hr_1st']
cmapdic3['LNP+mRNA_2nd'] = cmapdic2['LNP+mRNA_16hr_2nd']


f, ax = plt.subplots(2,1, sharex = True)

sns.lineplot(data = dfco, y = 'PC1', x = 'celltype', hue='treat_shot', ax = ax[0],
            palette = cmapdic3, legend = False)
sns.stripplot(data = dfco, y = 'PC1', x = 'celltype', hue='treat_shot', ax = ax[0],
            palette = cmapdic3, legend = False)
sns.lineplot(data = dfco, y = 'PC2', x = 'celltype', hue='treat_shot', ax = ax[1],
            palette = cmapdic3, legend = False)
sns.stripplot(data = dfco, y = 'PC2', x = 'celltype', hue='treat_shot', ax = ax[1],
            palette = cmapdic3, legend = True)

sns.despine()
plt.legend(bbox_to_anchor=(1,1), loc = 'center left')

size(10,2.5)
plt.tick_params(axis = 'x', rotation = 90)

plt.xlabel('')
plt.savefig('fig/R_shot12_PC12.pdf', bbox_inches='tight')

	Spike_2hr	LNP_2hr	Spike_16hr	LNP_16hr
Cell Type
B-naive	1.333924	0	0.000000	0.577742
B-prolif	0.000000	0	0.000000	0.000000
DC-cDC1	0.000000	0	0.000000	0.000000
DC-cDC2	0.000000	0	0.000000	0.000000
DC-migratory	0.000000	0	0.000000	0.000000
DC-plasmacytoid	0.000000	0	0.000000	0.000000
Endothelial	0.000000	0	0.000000	0.000000
Fibroblast	-2.172026	0	0.000000	-2.594465
Macrophage	0.000000	0	0.000000	0.000000
Mast	0.000000	0	0.000000	0.000000
Monocyte	0.000000	0	1.756246	1.725104
Mural	0.000000	0	0.000000	0.000000
Muscle-skeletal-mature	0.000000	0	-2.453719	-3.400790
Muscle-skeletal-progenitor	0.000000	0	0.000000	0.000000
NK	0.000000	0	0.000000	1.496633
Neutrophil	0.000000	0	2.449540	2.215557
Schwann-myelinating	0.000000	0	0.000000	0.000000
Schwann-non-myelinating	0.000000	0	0.000000	0.000000
T-CD4	0.000000	0	0.000000	0.959133
T-CD8	1.677280	0	1.935967	1.655765
T-prolif	0.000000	0	0.000000	0.000000
Tenocyte	0.000000	0	0.000000	0.000000

annotation	B-naive	B-prolif	DC-cDC1	DC-cDC2	DC-migratory	DC-plasmacytoid	Endothelial	Fibroblast	Macrophage	Mast	...	Muscle-skeletal-progenitor	NK	Neutrophil	Schwann-myelinating	Schwann-non-myelinating	T-CD4	T-CD8	T-prolif	Tenocyte	treat_time
Identity
LNP_2hr_batch5_LNP1_1st	0.086504	0.000713	0.004560	0.001710	0.009121	0.004988	0.222602	0.103321	0.017956	0.002850	...	0.030497	0.014251	0.058715	0.009121	0.008551	0.195525	0.075246	0.002850	0.015534	LNP_2hr
LNP_2hr_batch5_LNP2_1st	0.297016	0.000208	0.003951	0.001559	0.007901	0.003743	0.123090	0.098139	0.015594	0.003431	...	0.012267	0.008005	0.014451	0.012371	0.004782	0.217278	0.099802	0.003431	0.002911	LNP_2hr
LNP_16hr_batch2_M_LNP_1st	0.172715	0.001436	0.000261	0.000000	0.008747	0.002872	0.086162	0.031723	0.004308	0.001567	...	0.009922	0.019060	0.042167	0.003394	0.002611	0.344778	0.141645	0.004439	0.002480	LNP_16hr
LNP_16hr_batch3_M-LNP_2nd	0.161532	0.001393	0.000000	0.000000	0.007137	0.004003	0.071192	0.024543	0.003829	0.000348	...	0.005222	0.014970	0.031506	0.003307	0.002089	0.448042	0.142907	0.002611	0.001741	LNP_16hr
PBS_16hr_batch2_M_PBS_1st	0.115428	0.004042	0.000112	0.000112	0.002358	0.000786	0.167864	0.153492	0.026387	0.004267	...	0.026724	0.003930	0.004604	0.007635	0.007186	0.188188	0.040647	0.001347	0.024590	PBS_16hr
Spike_2hr_batch2_M_spike_early_1st	0.296388	0.001484	0.000495	0.000000	0.012370	0.005443	0.064819	0.025235	0.005443	0.000000	...	0.010391	0.012370	0.004948	0.000990	0.000000	0.304305	0.184067	0.010886	0.009401	Spike_2hr
Spike_2hr_batch3_M-SE_2nd	0.238082	0.025615	0.000569	0.000569	0.006119	0.010673	0.128220	0.043689	0.005977	0.001423	...	0.009819	0.007969	0.007542	0.004696	0.003985	0.281486	0.103600	0.003558	0.008112	Spike_2hr
Spike_16hr_batch2_M_spike_late_1st	0.167154	0.001063	0.000133	0.000000	0.011294	0.001860	0.052883	0.039065	0.004119	0.000930	...	0.008504	0.010098	0.033218	0.001594	0.002392	0.408185	0.119453	0.004518	0.003588	Spike_16hr
Spike_16hr_batch3_M-SL_2nd	0.171102	0.024490	0.000327	0.000000	0.003265	0.005224	0.071184	0.036735	0.004898	0.001306	...	0.004898	0.012571	0.052408	0.000816	0.001959	0.393469	0.143184	0.004571	0.005224	Spike_16hr
Spike_40hr_batch6_spkie_40h_1st	0.211730	0.004016	0.000243	0.000000	0.010221	0.004381	0.029691	0.013020	0.001825	0.000365	...	0.000487	0.004624	0.011803	0.001339	0.000365	0.506084	0.149671	0.006206	0.002069	Spike_40hr

	Spike_2hr	LNP_2hr	Spike_16hr	LNP_16hr
Cell Type
B-naive	1.333924	0	0.000000	0.577742
B-prolif	0.000000	0	0.000000	0.000000
DC-cDC1	0.000000	0	0.000000	0.000000
DC-cDC2	0.000000	0	0.000000	0.000000
DC-migratory	0.000000	0	0.000000	0.000000
DC-plasmacytoid	0.000000	0	0.000000	0.000000
Endothelial	0.000000	0	0.000000	0.000000
Fibroblast	-2.172026	0	0.000000	-2.594465
Macrophage	0.000000	0	0.000000	0.000000
Mast	0.000000	0	0.000000	0.000000
Monocyte	0.000000	0	1.756246	1.725104
Mural	0.000000	0	0.000000	0.000000
Muscle-skeletal-mature	0.000000	0	-2.453719	-3.400790
Muscle-skeletal-progenitor	0.000000	0	0.000000	0.000000
NK	0.000000	0	0.000000	1.496633
Neutrophil	0.000000	0	2.449540	2.215557
Schwann-myelinating	0.000000	0	0.000000	0.000000
Schwann-non-myelinating	0.000000	0	0.000000	0.000000
T-CD4	0.000000	0	0.000000	0.959133
T-CD8	1.677280	0	1.935967	1.655765
T-prolif	0.000000	0	0.000000	0.000000
Tenocyte	0.000000	0	0.000000	0.000000

scCODA¶

2hr¶

16hr¶

	treat_time	annotation	value
0	LNP_2hr	B-naive	0.749423
1	LNP_2hr	B-naive	2.573178
2	LNP_16hr	B-naive	1.496307
3	LNP_16hr	B-naive	1.399418
4	PBS_16hr	B-naive	1.000000
...	...	...	...
193	PBS_16hr	Tenocyte	1.000000
194	Spike_2hr	Tenocyte	0.382319
195	Spike_2hr	Tenocyte	0.329870
196	Spike_16hr	Tenocyte	0.145894
197	Spike_16hr	Tenocyte	0.212463