import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scrublet as scr
%matplotlib inline
plt.rcParams['pdf.fonttype'] = 42
def us(adata,gene,groups=None, show=False, exclude =None,figsize=None,**kwargs):
from matplotlib import rcParams
if figsize:
rcParams['figure.figsize'] = figsize
if ',' in gene:
gene = gene.split(',')
if groups:
if ',' in groups:groups = groups.split(',')
sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
else:
if exclude: # list to not show
groups = [x for x in set(adata.obs[gene]) if x not in exclude]
sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
else:
sc.pl.umap(adata,color=gene,color_map='OrRd',show=show, **kwargs)
rcParams['figure.figsize'] = [3,3]
def dotraw(adata, value, key):
if value not in adata.obs.columns:
if value not in adata.raw.var_names:
print('no'); return
idx = adata.raw.var_names.get_loc(value)
array = adata.raw.X.toarray()[:,idx]
else:
array = np.array(adata.obs[value])
df = pd.DataFrame(array, index=adata.obs.index).merge(adata.obs[[key]],
left_index=True, right_index=True)
df['mask'] = df[0].where(df[0] == 0, 1)
ratio = df.groupby(key)['mask'].mean()
mean = df.groupby(key)[0].mean()
res = pd.concat([ratio,mean], axis=1)
res.columns = ['ratio', 'mean']
return res
def us(adata,gene,groups=None, show=False, exclude =None,figsize=None,**kwargs):
from matplotlib import rcParams
if figsize:
rcParams['figure.figsize'] = figsize
else: rcParams['figure.figsize'] = (3,3)
if ',' in gene:
gene = gene.split(',')
if groups:
sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
else:
if exclude: # list to not show
groups = [x for x in set(adata.obs[gene]) if x not in exclude]
sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
else:
sc.pl.umap(adata,color=gene,color_map='OrRd',show=show, **kwargs)
def size(a,b): plt.gcf().set_size_inches(a,b)
def bind(adata, a,b):
order = []
for x in adata.obs[a].cat.categories:
for y in adata.obs[b].cat.categories:
order.append(x+'_'+y)
adata.obs[a+'_'+b] = [x+'_'+y for x,y in zip(adata.obs[a], adata.obs[b])]
adata.obs[a+'_'+b] = adata.obs[a+'_'+b].astype('category')
order = [x for x in order if x in set(adata.obs[a+'_'+b])]
adata.obs[a+'_'+b] = adata.obs[a+'_'+b].cat.reorder_categories(order)
def deg_single(adata, key, target, reference, method='wilcoxon', log=False, filter_res=False, pvalcut = 0.05, foldcut = 1):
import sys
sc.tl.rank_genes_groups(adata, key, groups = [target, reference], reference=reference, method='wilcoxon')
name = pd.DataFrame(adata.uns['rank_genes_groups']['names'])
logf = pd.DataFrame(adata.uns['rank_genes_groups']['logfoldchanges'])
pval = pd.DataFrame(adata.uns['rank_genes_groups']['pvals_adj'])
df = pd.concat([name,logf,pval], axis=1)
df.columns = ['name','logFC', 'pval']
if log:
df['logpval'] = [np.log2(sys.float_info.max-1) if x == 0 else -np.log2(x) for x in df['pval']]
if filter_res:
df = df[df.pval < pvalcut].copy()
df = df[abs(df.logpval) > foldcut].copy()
return df
def degplot(deg, topgene=20, dotsize=3, xlim=None, pvalcut=0.05, foldcut=0):
import seaborn as sns
import matplotlib.pyplot as plt
from adjustText import adjust_text
if 'logpval' not in deg.columns:
deg['logpval'] = [np.log2(sys.float_info.max-1) if x == 0 else -np.log2(x) for x in deg['pval']]
deg = deg[deg.pval < pvalcut].copy()
deg = deg[abs(deg.logpval) > foldcut].copy()
deg = deg.sort_values("logFC", ascending=False)
sns.scatterplot(data = deg, x='logFC', y='logpval', s=dotsize, color='grey')
lim = np.max([abs(deg['logFC'].max()), abs(deg['logFC'].min())])
plt.xlim(-lim-1,lim+1)
if xlim:
plt.xlim(-xlim,xlim)
plt.axvline(x=0, color='black', ls='--', lw=0.7)
for i in range(topgene):
if deg[(deg.pval < pvalcut) & (deg.logFC > foldcut)].shape[0] < i+1: continue
plt.text(deg.loc[deg.index[i],'logFC'], deg.loc[deg.index[i], 'logpval'],
deg.loc[deg.index[i], 'name'], fontsize=6, color='red')
for i in range(topgene):
if deg[(deg.pval < pvalcut) & (deg.logFC < foldcut)].shape[0] < i+1: continue
plt.text(deg.loc[deg.index[-(i+1)],'logFC'], deg.loc[deg.index[-(i+1)], 'logpval'],
deg.loc[deg.index[-(i+1)], 'name'], fontsize=6, color='blue')
adjust_text(plt.gca().texts, arrowprops=dict(arrowstyle='-', lw=0.3, color='grey'), precision = 0.3)
plt.ylabel('log [Adjusted P-value]')
plt.xlabel('log [Fold Changes]')
sns.despine(left=True)
for i in range(topgene):
if deg[(deg.pval < pvalcut) & (deg.logFC > foldcut)].shape[0] < i+1: continue
plt.plot(deg.loc[deg.index[i],'logFC'], deg.loc[deg.index[i], 'logpval'],
color='red', markersize=dotsize/3, marker='o')
for i in range(topgene):
if deg[(deg.pval < pvalcut) & (deg.logFC < foldcut)].shape[0] < i+1: continue
plt.plot(deg.loc[deg.index[-(i+1)],'logFC'], deg.loc[deg.index[-(i+1)], 'logpval'],
color='blue', markersize=dotsize/3, marker='o')
plt.rcParams['figure.dpi'] = 300
# Setup
import importlib
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import pickle as pkl
import matplotlib.pyplot as plt
from sccoda.util import comp_ana as mod
from sccoda.util import cell_composition_data as dat
from sccoda.util import data_visualization as viz
import sccoda.datasets as scd
2024-05-01 01:30:58.968196: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used. 2024-05-01 01:30:59.048697: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered 2024-05-01 01:30:59.048732: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered 2024-05-01 01:30:59.053617: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered 2024-05-01 01:30:59.073778: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used. 2024-05-01 01:30:59.074430: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. 2024-05-01 01:30:59.832671: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
muscle_original = sc.read('data/muscle_230426_final_data_uptobatch7.h5ad')
cmapdic = dict(zip(muscle_original.obs['annotation'].cat.categories,
muscle_original.uns['annotation_colors']))
pre = muscle_original.copy()
df1 = pd.crosstab(pre.obs['Identity'], pre.obs['annotation'])
df1['treat_time'] = ['_'.join(x.split("_")[:2]) for x in df1.index]
# We are analyzing 2 hour and 16 hour samples
df1 = df1[df1['treat_time'] != 'Subunit_16hr'].copy()
df2 = df1[df1['treat_time'] != 'Spike_40hr'].copy()
df3 = df2.melt("treat_time")
df3.head()
treat_time | annotation | value | |
---|---|---|---|
0 | LNP_2hr | B-naive | 607 |
1 | LNP_2hr | B-naive | 2857 |
2 | LNP_16hr | B-naive | 1323 |
3 | LNP_16hr | B-naive | 928 |
4 | PBS_16hr | B-naive | 1028 |
df3['treat_time'] = df3['treat_time'].astype('category').cat.reorder_categories('PBS_16hr,LNP_2hr,Spike_2hr,LNP_16hr,Spike_16hr'.split(","))
mdf = df2.groupby("treat_time").mean()
base = dict(mdf.loc['PBS_16hr'])
ndf3 = df3.copy()
ndf3['value'] = [i / base[j] for i,j in zip(ndf3['value'], ndf3['annotation'])]
import seaborn as sns
sns.boxplot(data = df3[df3['annotation'].isin('Neutrophil,Monocyte,T-CD8'.split(","))],
x = 'annotation', y = 'value', hue = 'treat_time', palette = cmap, boxprops = dict(alpha = .7),
legend = False)
sns.stripplot(data = df3[df3['annotation'].isin('Neutrophil,Monocyte,T-CD8'.split(","))],
x = 'annotation', y = 'value', hue = 'treat_time', palette = cmap, dodge=True)
plt.legend(bbox_to_anchor=(1,.5), loc = 'center left')
plt.axhline(1000, ls = '--', c = 'grey')
plt.ylim(0)
sns.despine()
plt.xlabel('')
size(6,2.5)
plt.savefig('fig/R_count.pdf', bbox_inches='tight')
f, axes = plt.subplots(1,2, sharey=True)
cmap = 'inferno'
sns.boxplot(data = df3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True,
palette=cmap, ax = axes[0], legend=False)
sns.stripplot(data = df3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, jitter=False,
palette=cmap, ax = axes[0], legend=False)
sns.boxplot(data = ndf3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, legend=False,
palette=cmap, ax = axes[1])
sns.stripplot(data = ndf3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, jitter=False,
palette=cmap, ax = axes[1])
plt.axvline(1, ls = '--', color='grey')
sns.despine()
axes[0].set_ylabel("")
plt.legend(bbox_to_anchor=(1,1))
size(3,8)
data_all = dat.from_pandas(df1, covariate_columns=["treat_time"])
data_all
AnnData object with n_obs × n_vars = 10 × 22 obs: 'treat_time'
scdic = {}
data_spike = data_all[data_all.obs['treat_time'].isin('PBS_16hr,LNP_2hr,Spike_2hr'.split(","))].copy()
data_spike.obs['treat_time'] = data_spike.obs['treat_time'].astype('category').cat.rename_categories({
'PBS_16hr':'PBS',
'LNP_2hr':'LNP',
'Spike_2hr':'LNP+mRNA'
})
data_spike.obs['treat_time'] = data_spike.obs['treat_time'].astype('category').cat.reorder_categories(
'PBS,LNP,LNP+mRNA'.split(","))
data_in = data_spike[data_spike.obs['treat_time'].isin('PBS,LNP+mRNA'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()
scdic['Spike_2hr'] = sim_results.copy()
Automatic reference selection! Reference cell type set to DC-cDC1 Zero counts encountered in data! Added a pseudocount of 0.5.
2024-05-01 01:32:07.849285: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355 2024-05-01 01:32:07.849596: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform. Skipping registering GPU devices... 0%| | 0/20000 [00:00<?, ?it/s]2024-05-01 01:32:09.602500: I external/local_xla/xla/service/service.cc:168] XLA service 0xaabb690 initialized for platform Host (this does not guarantee that XLA will be used). Devices: 2024-05-01 01:32:09.602546: I external/local_xla/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version 2024-05-01 01:32:09.657089: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable. WARNING: All log messages before absl::InitializeLog() is called are written to STDERR I0000 00:00:1714494730.146518 1555467 device_compiler.h:186] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process. 2024-05-01 01:32:10.159893: E external/local_xla/xla/stream_executor/stream_executor_internal.h:177] SetPriority unimplemented for this stream. 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:03<00:00, 316.79it/s]
MCMC sampling finished. (81.609 sec) Acceptance rate: 42.8%
data_in = data_spike[data_spike.obs['treat_time'].isin('PBS,LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()
scdic['LNP_2hr'] = sim_results.copy()
Automatic reference selection! Reference cell type set to Mast
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:03<00:00, 315.59it/s]
MCMC sampling finished. (81.022 sec) Acceptance rate: 72.2%
data_in = data_spike[data_spike.obs['treat_time'].isin('LNP+mRNA,LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()
scdic['SvL_2hr'] = sim_results.copy()
Automatic reference selection! Reference cell type set to DC-migratory Zero counts encountered in data! Added a pseudocount of 0.5.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:05<00:00, 303.13it/s]
MCMC sampling finished. (84.574 sec) Acceptance rate: 50.7%
data_spike = data_all[data_all.obs['treat_time'].isin('PBS_16hr,LNP_16hr,Spike_16hr'.split(","))]
data_spike.obs['treat_time'] = data_spike.obs['treat_time'].astype('category').cat.rename_categories({
'PBS_16hr':'PBS',
'LNP_16hr':'LNP',
'Spike_16hr':'LNP+mRNA'
})
data_spike.obs['treat_time'] = data_spike.obs['treat_time'].astype('category').cat.reorder_categories(
'PBS,LNP,LNP+mRNA'.split(","))
data_in = data_spike[data_spike.obs['treat_time'].isin('PBS,LNP+mRNA'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()
scdic['Spike_16hr'] = sim_results.copy()
Automatic reference selection! Reference cell type set to DC-cDC1 Zero counts encountered in data! Added a pseudocount of 0.5.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:06<00:00, 300.64it/s]
MCMC sampling finished. (84.660 sec) Acceptance rate: 61.6%
data_in = data_spike[data_spike.obs['treat_time'].isin('PBS,LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()
scdic['LNP_16hr'] = sim_results.copy()
Automatic reference selection! Reference cell type set to T-prolif Zero counts encountered in data! Added a pseudocount of 0.5.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:04<00:00, 311.27it/s]
WARNING:tensorflow:5 out of the last 5 calls to <function CompositionalModel.sampling.<locals>.sample_mcmc at 0x7fcdbc6a1f70> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. MCMC sampling finished. (82.370 sec) Acceptance rate: 47.2%
data_in = data_spike[data_spike.obs['treat_time'].isin('LNP+mRNA,LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="treat_time")
sim_results = model_spike.sample_hmc()
scdic['SvL_16hr'] = sim_results.copy()
Automatic reference selection! Reference cell type set to Schwann-non-myelinating Zero counts encountered in data! Added a pseudocount of 0.5.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:06<00:00, 302.08it/s]
WARNING:tensorflow:6 out of the last 6 calls to <function CompositionalModel.sampling.<locals>.sample_mcmc at 0x7fcdbc57fca0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details. MCMC sampling finished. (84.937 sec) Acceptance rate: 59.2%
fdr_cut = .05
dfls = []
for cond in scdic.keys():
if cond.startswith('SvL'): continue
simres = scdic[cond].copy()
simres.set_fdr(est_fdr=fdr_cut)
dfx = simres.effect_df[['log2-fold change', 'Final Parameter']]
dfx['Final'] = [i if j != 0 else 0 for i,j in zip(dfx['log2-fold change'], dfx['Final Parameter'])]
dfx = dfx.reset_index().set_index('Cell Type')['Final'].copy()
dfls.append(dfx)
simres = pd.concat(dfls, axis = 1).copy()
simres.columns = [x for x in scdic.keys() if not x.startswith("SvL")]
simres
Spike_2hr | LNP_2hr | Spike_16hr | LNP_16hr | |
---|---|---|---|---|
Cell Type | ||||
B-naive | 1.333924 | 0 | 0.000000 | 0.577742 |
B-prolif | 0.000000 | 0 | 0.000000 | 0.000000 |
DC-cDC1 | 0.000000 | 0 | 0.000000 | 0.000000 |
DC-cDC2 | 0.000000 | 0 | 0.000000 | 0.000000 |
DC-migratory | 0.000000 | 0 | 0.000000 | 0.000000 |
DC-plasmacytoid | 0.000000 | 0 | 0.000000 | 0.000000 |
Endothelial | 0.000000 | 0 | 0.000000 | 0.000000 |
Fibroblast | -2.172026 | 0 | 0.000000 | -2.594465 |
Macrophage | 0.000000 | 0 | 0.000000 | 0.000000 |
Mast | 0.000000 | 0 | 0.000000 | 0.000000 |
Monocyte | 0.000000 | 0 | 1.756246 | 1.725104 |
Mural | 0.000000 | 0 | 0.000000 | 0.000000 |
Muscle-skeletal-mature | 0.000000 | 0 | -2.453719 | -3.400790 |
Muscle-skeletal-progenitor | 0.000000 | 0 | 0.000000 | 0.000000 |
NK | 0.000000 | 0 | 0.000000 | 1.496633 |
Neutrophil | 0.000000 | 0 | 2.449540 | 2.215557 |
Schwann-myelinating | 0.000000 | 0 | 0.000000 | 0.000000 |
Schwann-non-myelinating | 0.000000 | 0 | 0.000000 | 0.000000 |
T-CD4 | 0.000000 | 0 | 0.000000 | 0.959133 |
T-CD8 | 1.677280 | 0 | 1.935967 | 1.655765 |
T-prolif | 0.000000 | 0 | 0.000000 | 0.000000 |
Tenocyte | 0.000000 | 0 | 0.000000 | 0.000000 |
df1 = pd.crosstab(pre.obs['Identity'], pre.obs['annotation'], normalize=0)
df1['treat_time'] = ['_'.join(x.split("_")[:2]) for x in df1.index]
df1 = df1[df1['treat_time'] != 'Subunit_16hr'].copy()
df1
annotation | B-naive | B-prolif | DC-cDC1 | DC-cDC2 | DC-migratory | DC-plasmacytoid | Endothelial | Fibroblast | Macrophage | Mast | ... | Muscle-skeletal-progenitor | NK | Neutrophil | Schwann-myelinating | Schwann-non-myelinating | T-CD4 | T-CD8 | T-prolif | Tenocyte | treat_time |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Identity | |||||||||||||||||||||
LNP_2hr_batch5_LNP1_1st | 0.086504 | 0.000713 | 0.004560 | 0.001710 | 0.009121 | 0.004988 | 0.222602 | 0.103321 | 0.017956 | 0.002850 | ... | 0.030497 | 0.014251 | 0.058715 | 0.009121 | 0.008551 | 0.195525 | 0.075246 | 0.002850 | 0.015534 | LNP_2hr |
LNP_2hr_batch5_LNP2_1st | 0.297016 | 0.000208 | 0.003951 | 0.001559 | 0.007901 | 0.003743 | 0.123090 | 0.098139 | 0.015594 | 0.003431 | ... | 0.012267 | 0.008005 | 0.014451 | 0.012371 | 0.004782 | 0.217278 | 0.099802 | 0.003431 | 0.002911 | LNP_2hr |
LNP_16hr_batch2_M_LNP_1st | 0.172715 | 0.001436 | 0.000261 | 0.000000 | 0.008747 | 0.002872 | 0.086162 | 0.031723 | 0.004308 | 0.001567 | ... | 0.009922 | 0.019060 | 0.042167 | 0.003394 | 0.002611 | 0.344778 | 0.141645 | 0.004439 | 0.002480 | LNP_16hr |
LNP_16hr_batch3_M-LNP_2nd | 0.161532 | 0.001393 | 0.000000 | 0.000000 | 0.007137 | 0.004003 | 0.071192 | 0.024543 | 0.003829 | 0.000348 | ... | 0.005222 | 0.014970 | 0.031506 | 0.003307 | 0.002089 | 0.448042 | 0.142907 | 0.002611 | 0.001741 | LNP_16hr |
PBS_16hr_batch2_M_PBS_1st | 0.115428 | 0.004042 | 0.000112 | 0.000112 | 0.002358 | 0.000786 | 0.167864 | 0.153492 | 0.026387 | 0.004267 | ... | 0.026724 | 0.003930 | 0.004604 | 0.007635 | 0.007186 | 0.188188 | 0.040647 | 0.001347 | 0.024590 | PBS_16hr |
Spike_2hr_batch2_M_spike_early_1st | 0.296388 | 0.001484 | 0.000495 | 0.000000 | 0.012370 | 0.005443 | 0.064819 | 0.025235 | 0.005443 | 0.000000 | ... | 0.010391 | 0.012370 | 0.004948 | 0.000990 | 0.000000 | 0.304305 | 0.184067 | 0.010886 | 0.009401 | Spike_2hr |
Spike_2hr_batch3_M-SE_2nd | 0.238082 | 0.025615 | 0.000569 | 0.000569 | 0.006119 | 0.010673 | 0.128220 | 0.043689 | 0.005977 | 0.001423 | ... | 0.009819 | 0.007969 | 0.007542 | 0.004696 | 0.003985 | 0.281486 | 0.103600 | 0.003558 | 0.008112 | Spike_2hr |
Spike_16hr_batch2_M_spike_late_1st | 0.167154 | 0.001063 | 0.000133 | 0.000000 | 0.011294 | 0.001860 | 0.052883 | 0.039065 | 0.004119 | 0.000930 | ... | 0.008504 | 0.010098 | 0.033218 | 0.001594 | 0.002392 | 0.408185 | 0.119453 | 0.004518 | 0.003588 | Spike_16hr |
Spike_16hr_batch3_M-SL_2nd | 0.171102 | 0.024490 | 0.000327 | 0.000000 | 0.003265 | 0.005224 | 0.071184 | 0.036735 | 0.004898 | 0.001306 | ... | 0.004898 | 0.012571 | 0.052408 | 0.000816 | 0.001959 | 0.393469 | 0.143184 | 0.004571 | 0.005224 | Spike_16hr |
Spike_40hr_batch6_spkie_40h_1st | 0.211730 | 0.004016 | 0.000243 | 0.000000 | 0.010221 | 0.004381 | 0.029691 | 0.013020 | 0.001825 | 0.000365 | ... | 0.000487 | 0.004624 | 0.011803 | 0.001339 | 0.000365 | 0.506084 | 0.149671 | 0.006206 | 0.002069 | Spike_40hr |
10 rows × 23 columns
df2 = df1[df1['treat_time'] != 'Spike_40hr'].copy()
df3 = df2.melt("treat_time")
df3.head()
treat_time | annotation | value | |
---|---|---|---|
0 | LNP_2hr | B-naive | 0.086504 |
1 | LNP_2hr | B-naive | 0.297016 |
2 | LNP_16hr | B-naive | 0.172715 |
3 | LNP_16hr | B-naive | 0.161532 |
4 | PBS_16hr | B-naive | 0.115428 |
df3['treat_time'] = df3['treat_time'].astype('category').cat.reorder_categories('PBS_16hr,LNP_2hr,Spike_2hr,LNP_16hr,Spike_16hr'.split(","))
mdf = df2.groupby("treat_time").mean()
base = dict(mdf.loc['PBS_16hr'])
ndf3 = df3.copy()
ndf3['value'] = [i / base[j] for i,j in zip(ndf3['value'], ndf3['annotation'])]
dummy = pd.DataFrame(index = simres.index, columns = ['PBS_16hr'])
simres1 = pd.concat([simres, dummy], axis = 1).copy()
df4 = simres1.reset_index().melt("Cell Type")
df4['variable'] = df4['variable'].astype('category').cat.reorder_categories('PBS_16hr,LNP_2hr,Spike_2hr,LNP_16hr,Spike_16hr'.split(","))
cats = df4['variable'].cat.categories
simres
Spike_2hr | LNP_2hr | Spike_16hr | LNP_16hr | |
---|---|---|---|---|
Cell Type | ||||
B-naive | 1.333924 | 0 | 0.000000 | 0.577742 |
B-prolif | 0.000000 | 0 | 0.000000 | 0.000000 |
DC-cDC1 | 0.000000 | 0 | 0.000000 | 0.000000 |
DC-cDC2 | 0.000000 | 0 | 0.000000 | 0.000000 |
DC-migratory | 0.000000 | 0 | 0.000000 | 0.000000 |
DC-plasmacytoid | 0.000000 | 0 | 0.000000 | 0.000000 |
Endothelial | 0.000000 | 0 | 0.000000 | 0.000000 |
Fibroblast | -2.172026 | 0 | 0.000000 | -2.594465 |
Macrophage | 0.000000 | 0 | 0.000000 | 0.000000 |
Mast | 0.000000 | 0 | 0.000000 | 0.000000 |
Monocyte | 0.000000 | 0 | 1.756246 | 1.725104 |
Mural | 0.000000 | 0 | 0.000000 | 0.000000 |
Muscle-skeletal-mature | 0.000000 | 0 | -2.453719 | -3.400790 |
Muscle-skeletal-progenitor | 0.000000 | 0 | 0.000000 | 0.000000 |
NK | 0.000000 | 0 | 0.000000 | 1.496633 |
Neutrophil | 0.000000 | 0 | 2.449540 | 2.215557 |
Schwann-myelinating | 0.000000 | 0 | 0.000000 | 0.000000 |
Schwann-non-myelinating | 0.000000 | 0 | 0.000000 | 0.000000 |
T-CD4 | 0.000000 | 0 | 0.000000 | 0.959133 |
T-CD8 | 1.677280 | 0 | 1.935967 | 1.655765 |
T-prolif | 0.000000 | 0 | 0.000000 | 0.000000 |
Tenocyte | 0.000000 | 0 | 0.000000 | 0.000000 |
ndf3
treat_time | annotation | value | |
---|---|---|---|
0 | LNP_2hr | B-naive | 0.749423 |
1 | LNP_2hr | B-naive | 2.573178 |
2 | LNP_16hr | B-naive | 1.496307 |
3 | LNP_16hr | B-naive | 1.399418 |
4 | PBS_16hr | B-naive | 1.000000 |
... | ... | ... | ... |
193 | PBS_16hr | Tenocyte | 1.000000 |
194 | Spike_2hr | Tenocyte | 0.382319 |
195 | Spike_2hr | Tenocyte | 0.329870 |
196 | Spike_16hr | Tenocyte | 0.145894 |
197 | Spike_16hr | Tenocyte | 0.212463 |
198 rows × 3 columns
from math import ceil, floor
f, axes = plt.subplots(1,5, sharey=True,
width_ratios=[1,.5,.5,.5,.5])
cmap = 'inferno'
sns.boxplot(data = df3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True,
palette=cmap, ax = axes[0], legend=False)
sns.stripplot(data = df3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, jitter=False,
palette=cmap, ax = axes[0], legend=False)
# sns.boxplot(data = ndf3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, legend=False,
# palette=cmap, ax = axes[1])
# sns.stripplot(data = ndf3, x = 'value', y = 'annotation', hue = 'treat_time', dodge=True, jitter=False,
# palette=cmap, ax = axes[1])
# axes[1].axvline(1, ls = '--', color='grey')
n_idx = 0
for cat in cats:
if cat in 'PBS_16hr'.split(","): continue
n_idx += 1
sns.barplot(data = df4[df4.variable == cat], x = 'value', y = 'Cell Type', hue = 'variable', dodge=False,
palette=cmap, ax = axes[n_idx], edgecolor='black', lw = .5)
axes[n_idx].legend([],[],frameon=False)
if simres[cat].sum() == 0:
axes[n_idx].set_xlim(0, 1)
else:
axes[n_idx].set_xticks([floor(df4[df4.variable == cat]['value'].min()),
0,
ceil(df4[df4.variable == cat]['value'].max())])
axes[n_idx].axvline(0, c= 'black', lw = .5)
sns.despine()
axes[0].set_ylabel("")
axes[1].legend(bbox_to_anchor=(-5,1))
size(5,5)
plt.savefig('fig/R_sccoda.pdf', bbox_inches='tight')
dic2 = {}
data_spike.obs['treat_time']
Identity LNP_16hr_batch2_M_LNP_1st LNP LNP_16hr_batch3_M-LNP_2nd LNP PBS_16hr_batch2_M_PBS_1st PBS Spike_16hr_batch2_M_spike_late_1st LNP+mRNA Spike_16hr_batch3_M-SL_2nd LNP+mRNA Name: treat_time, dtype: category Categories (3, object): ['PBS', 'LNP', 'LNP+mRNA']
data_spike.obs['shot'] = [i.split("_")[-1] for i in data_spike.obs.index]
data_spike.X.max()
3072.0
data_in = data_spike[data_spike.obs['treat_time'].isin('LNP+mRNA'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="shot")
sim_results = model_spike.sample_hmc()
dic2['1vs2_mRNA'] = sim_results.copy()
Automatic reference selection! Reference cell type set to T-prolif Zero counts encountered in data! Added a pseudocount of 0.5.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:02<00:00, 319.66it/s]
MCMC sampling finished. (79.988 sec) Acceptance rate: 52.9%
data_in = data_spike[data_spike.obs['treat_time'].isin('LNP'.split(","))].copy()
model_spike = mod.CompositionalAnalysis(data_in, formula="shot")
sim_results = model_spike.sample_hmc()
dic2['1vs2_LNP'] = sim_results.copy()
Automatic reference selection! Reference cell type set to B-prolif Zero counts encountered in data! Added a pseudocount of 0.5.
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 20000/20000 [01:02<00:00, 318.04it/s]
MCMC sampling finished. (81.938 sec) Acceptance rate: 67.3%
dfls = []
for cond in dic2.keys():
if cond.startswith('SvL'): continue
simres = dic2[cond].copy()
simres.set_fdr(est_fdr=fdr_cut)
dfx = simres.effect_df[['log2-fold change', 'Final Parameter']]
dfx['Final'] = [i if j != 0 else 0 for i,j in zip(dfx['log2-fold change'], dfx['Final Parameter'])]
dfx = dfx.reset_index().set_index('Cell Type')['Final'].copy()
dfls.append(dfx)
simres = pd.concat(dfls, axis = 1).copy()
simres.columns = [x for x in dic2.keys() if not x.startswith("SvL")]
data_spike.to_df()['B-prolif'].sort_values()
Identity LNP_16hr_batch3_M-LNP_2nd 8.0 Spike_16hr_batch2_M_spike_late_1st 8.0 LNP_16hr_batch2_M_LNP_1st 11.0 PBS_16hr_batch2_M_PBS_1st 36.0 Spike_16hr_batch3_M-SL_2nd 150.0 Name: B-prolif, dtype: float32
data_spike.to_df()['Monocyte']
Identity LNP_16hr_batch2_M_LNP_1st 585.0 LNP_16hr_batch3_M-LNP_2nd 283.0 PBS_16hr_batch2_M_PBS_1st 141.0 Spike_16hr_batch2_M_spike_late_1st 679.0 Spike_16hr_batch3_M-SL_2nd 174.0 Name: Monocyte, dtype: float32
cdf = data_spike.to_df().copy()
cdf = (cdf.T / cdf.T.sum()).T
cdf.index = ['_'.join(i.split("_")[:1]+[i.split("_")[-1]]) if not i.startswith("PBS") else
'PBS' for i in cdf.index]
cdf = cdf.reset_index()
cdf['index'] = cdf['index'].astype('category').cat.reorder_categories('PBS,LNP_1st,LNP_2nd,Spike_1st,Spike_2nd'.split(","))
cdf = cdf.sort_values("index")
cdf1 = cdf.melt('index')
cellorder = cdf.mean(numeric_only=True).sort_values(ascending=False).index
cmapcond = {
'PBS':'darkgrey',
'LNP_1st':'bisque',
'LNP_2nd':'orange',
'Spike_1st':'lightskyblue',
'Spike_2nd':'royalblue'
}
sns.barplot(data = cdf1, x= 'value', y = 'annotation', hue='index',
width = 1, linewidth = 0,
order=cellorder, palette=cmapcond)
sns.despine()
cdf2 = cdf.set_index("index")
enr1 = (cdf2.loc['LNP_2nd'] / (cdf2.loc['LNP_1st'])).fillna(0).to_frame()
enr2 = (cdf2.loc['Spike_2nd'] / (cdf2.loc['Spike_1st'])).fillna(0).to_frame()
enr1 = (cdf2.loc['LNP_2nd'] / (cdf2.loc['LNP_1st'] + cdf2.loc['LNP_2nd'])).fillna(0) * 2
enr2 = (cdf2.loc['Spike_2nd'] / (cdf2.loc['Spike_1st'] + cdf2.loc['Spike_2nd'])).fillna(0) * 2
enr1 = enr1.to_frame()
enr2 = enr2.to_frame()
enr1.columns = ['ratio']
enr2.columns = ['ratio']
f, ax = plt.subplots(1,5, sharey = True, width_ratios=[1.5,1,1,.5,.5])
sns.barplot(data = cdf1, x= 'value', y = 'annotation', hue='index',
width = 1, linewidth = 0,
order=cellorder, palette=cmapcond, ax = ax[0])
sns.barplot(data = enr1.reset_index(), x= 'ratio', y = 'annotation',
order = cellorder, color = cmapcond['LNP_2nd'],
edgecolor='black', ax = ax[1])
sns.barplot(data = enr2.reset_index(), x= 'ratio', y = 'annotation',
order = cellorder, color = cmapcond['Spike_2nd'],
edgecolor='black', ax = ax[2])
sns.barplot(data = simres.reset_index(), x = '1vs2_LNP', y = 'Cell Type',
color = cmapcond['LNP_2nd'], ax = ax[3], edgecolor='black')
sns.barplot(data = simres.reset_index(), x = '1vs2_mRNA', y = 'Cell Type',
color = cmapcond['Spike_2nd'], ax = ax[4], edgecolor='black')
ax[0].legend(title = '', loc = 'lower right')
ax[0].set_ylabel("")
ax[1].axvline(1, color='black', ls = '--')
ax[2].axvline(1, color='black', ls = '--')
ax[1].set_xlim(0,2)
ax[2].set_xlim(0,2)
sns.despine()
size(10,5)
plt.savefig('fig/R_shot12_barplot.pdf', bbox_inches='tight')
pre
AnnData object with n_obs × n_vars = 83094 × 32285 obs: 'dose', 'time', 'name', 'n_counts', 'batch', 'treat', 'Sample', 'doublet_scores', 'n_genes', 'shot', 'Identity', 'mito', 'predicted_doublets', 'Spike', 'Pfizer', 'Adeno', 'is_Spike', 'is_Pfizer', 'is_Adeno', 'genotype', 'Spike_log1p', 'annotation', 'treat_time', 'treat_time_shot' var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std' uns: 'Identity_colors', 'annotation_colors', 'hvg', 'neighbors', 'pca', 'time_colors', 'treat_colors', 'umap', 'treat_time_colors', 'treat_time_shot_colors' obsm: 'X_pca', 'X_umap' varm: 'PCs' obsp: 'connectivities', 'distances'
bind(pre, 'treat_time', 'shot')
cmapdic2 = dict(zip(pre.obs['treat_time_shot'].cat.categories,
pre.uns['treat_time_shot_colors']))
cmapdic2['PBS_16hr_1st'] = 'grey'
cmapdic2['LNP_16hr_1st'] = 'bisque'
cmapdic2['LNP_16hr_2nd'] = 'orange'
cmapdic2['LNP+mRNA_16hr_1st'] = 'lightskyblue'
cmapdic2['LNP+mRNA_16hr_2nd'] = 'royalblue'
cats = 'PBS_16hr_1st,LNP_16hr_1st,LNP_16hr_2nd,LNP+mRNA_16hr_1st,LNP+mRNA_16hr_2nd'.split(",")
cat = cats[0]
for cat in cats:
sc.pl.umap(pre, color='treat_time_shot', groups = cat,
palette=cmapdic2, show = False, frameon=False, zorder = -1)
size(2,2)
plt.gca().set_rasterization_zorder(0)
plt.title(cat)
plt.savefig('fig/R_shot12_UMAP_%s.pdf'%(cat), bbox_inches='tight')
cellorder1 = [
'Fibroblast',
'Endothelial',
'Mural',
'Monocyte',
'T-CD4',
'B-naive',
'T-CD8',
'Macrophage',
'Muscle-skeletal-mature',
'Neutrophil',
'DC-plasmacytoid',
'Schwann-myelinating',
'Mast',
'NK',
'Muscle-skeletal-progenitor',
'T-prolif',
'Schwann-non-myelinating',
'Tenocyte',
'B-prolif',
'DC-migratory'
]
pca = sc.read('data/PCA_revision_total_240503.h5ad')
pca
AnnData object with n_obs × n_vars = 123 × 32285 obs: 'treat', 'time', 'batch', 'celltype', 'batch_treat_time', 'imported', 'condition' uns: 'batch_colors', 'celltype_colors', 'condition_colors', 'time_colors', 'treat_colors' obsm: 'X_sPC'
sc.pl.embedding(pca, basis='sPC', color = 'treat')
bdata = pca[pca.obs['treat'].str.startswith("ALC")].copy()
bdata
AnnData object with n_obs × n_vars = 74 × 32285 obs: 'treat', 'time', 'batch', 'celltype', 'batch_treat_time', 'imported', 'condition' uns: 'batch_colors', 'celltype_colors', 'condition_colors', 'time_colors', 'treat_colors' obsm: 'X_sPC'
dfco = pd.DataFrame(bdata.obsm['X_sPC'], index = bdata.obs.index).copy()
dfco = dfco.iloc[:,:2].copy()
dfco.columns = ['PC1', 'PC2']
dfco = dfco.merge(bdata.obs, left_index=True, right_index=True, how = 'left')
dfco['shot'] = [i.split("_")[-2] for i in dfco.index]
rename = {
'ALC-0315':'LNP',
'ALC-0315+mRNA':'LNP+mRNA'}
dfco['treat'] = dfco['treat'].map(rename)
dfco['treat_shot'] = ['_'.join([x,y]) for x,y in zip(dfco['treat'], dfco['shot'])]
dfco['celltype'] = dfco['celltype'].astype('category').cat.reorder_categories(cellorder1)
dfco['treat_shot'] = dfco['treat_shot'].astype('category').cat.reorder_categories('LNP_1st,LNP_2nd,LNP+mRNA_1st,LNP+mRNA_2nd'.split(","))
cmapdic3 = {}
cmapdic3['LNP_1st'] = cmapdic2['LNP_16hr_1st']
cmapdic3['LNP_2nd'] = cmapdic2['LNP_16hr_2nd']
cmapdic3['LNP+mRNA_1st'] = cmapdic2['LNP+mRNA_16hr_1st']
cmapdic3['LNP+mRNA_2nd'] = cmapdic2['LNP+mRNA_16hr_2nd']
f, ax = plt.subplots(2,1, sharex = True)
sns.lineplot(data = dfco, y = 'PC1', x = 'celltype', hue='treat_shot', ax = ax[0],
palette = cmapdic3, legend = False)
sns.stripplot(data = dfco, y = 'PC1', x = 'celltype', hue='treat_shot', ax = ax[0],
palette = cmapdic3, legend = False)
sns.lineplot(data = dfco, y = 'PC2', x = 'celltype', hue='treat_shot', ax = ax[1],
palette = cmapdic3, legend = False)
sns.stripplot(data = dfco, y = 'PC2', x = 'celltype', hue='treat_shot', ax = ax[1],
palette = cmapdic3, legend = True)
sns.despine()
plt.legend(bbox_to_anchor=(1,1), loc = 'center left')
size(10,2.5)
plt.tick_params(axis = 'x', rotation = 90)
plt.xlabel('')
plt.savefig('fig/R_shot12_PC12.pdf', bbox_inches='tight')