import sceleto2 as scl
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['pdf.fonttype'] = 42
def bind(adata, a,b):
order = []
for x in adata.obs[a].cat.categories:
for y in adata.obs[b].cat.categories:
order.append(x+'_'+y)
adata.obs[a+'_'+b] = [x+'_'+y for x,y in zip(adata.obs[a], adata.obs[b])]
adata.obs[a+'_'+b] = adata.obs[a+'_'+b].astype('category')
order = [x for x in order if x in set(adata.obs[a+'_'+b])]
adata.obs[a+'_'+b] = adata.obs[a+'_'+b].cat.reorder_categories(order)
def size(a,b):
plt.gcf().set_size_inches(a,b)
muscle = sc.read('data/muscle_230426_final_data_uptobatch7.h5ad')
import seaborn as sns
from matplotlib.ticker import FormatStrFormatter
mdc = muscle[muscle.obs.annotation == 'DC-migratory'].copy()
mdc = mdc.raw.to_adata().copy() mdc.raw = mdc.copy()
sc.pp.highly_variable_genes(mdc, max_mean=10) sc.pp.scale(mdc) sc.tl.pca(mdc, use_highly_variable=True)
sc.pp.neighbors(mdc) sc.tl.umap(mdc)
scl.us(mdc, 'treat')
sc.tl.leiden(mdc,0.3)
scl.us(mdc, 'leiden')
mdc = mdc[~mdc.obs.leiden.isin(['3'])].copy()
mdc = mdc.raw.to_adata().copy() mdc.raw = mdc.copy()
sc.pp.highly_variable_genes(mdc, max_mean=10) sc.pp.scale(mdc) sc.tl.pca(mdc, use_highly_variable=True)
sc.pp.neighbors(mdc) sc.tl.umap(mdc)
scl.us(mdc, 'treat')
sc.tl.leiden(mdc,0.2)
scl.us(mdc, 'leiden')
mdc.obs['anno'] = mdc.obs['leiden'].astype(str).map({'0':'mDC_others','1':'mDC_ISG'})
mdc = sc.read('data/mDC_230504.h5ad')
def deg_single(adata, key, target, reference, method='wilcoxon', log=False, filter_res=False, pvalcut = 0.05, foldcut = 1):
import sys
sc.tl.rank_genes_groups(adata, key, groups = [target, reference], reference=reference, method='wilcoxon')
name = pd.DataFrame(adata.uns['rank_genes_groups']['names'])
logf = pd.DataFrame(adata.uns['rank_genes_groups']['logfoldchanges'])
pval = pd.DataFrame(adata.uns['rank_genes_groups']['pvals_adj'])
df = pd.concat([name,logf,pval], axis=1)
df.columns = ['name','logFC', 'pval']
if log:
df['logpval'] = [np.log10(sys.float_info.max-1) if x == 0 else -np.log10(x) for x in df['pval']]
if filter_res:
df = df[df.pval < pvalcut].copy()
df = df[abs(df.logpval) > foldcut].copy()
return df
deg = deg_single(mdc, 'treat_time', 'LNP+mRNA_16hr', 'LNP_16hr', log=True)
from adjustText import adjust_text
deg
name | logFC | pval | logpval | |
---|---|---|---|---|
0 | Bst2 | 3.436362 | 4.384336e-21 | 20.358096 |
1 | Isg15 | 3.993746 | 1.191967e-19 | 18.923736 |
2 | Ifi44 | 3.246211 | 3.244526e-18 | 17.488849 |
3 | Ifit1 | 3.319930 | 3.244526e-18 | 17.488849 |
4 | Irf7 | 3.827315 | 9.145540e-18 | 17.038791 |
... | ... | ... | ... | ... |
32280 | Xist | -1.310870 | 2.051446e-06 | 5.687940 |
32281 | Hebp1 | -1.859955 | 9.378457e-07 | 6.027869 |
32282 | Kctd12 | -2.383595 | 8.339671e-07 | 6.078851 |
32283 | Eef2 | -1.370289 | 1.982958e-08 | 7.702686 |
32284 | Gm42418 | -0.809498 | 7.474346e-09 | 8.126427 |
32285 rows × 4 columns
deg1 = deg[deg.pval < 0.05].copy()
plt.rcParams['figure.dpi'] = 300
sns.set_style("white")
nhead = 10
sns.scatterplot(data = deg1, x='logFC', y='logpval', color='grey', linewidth=0, s=12)
sns.despine(left = True)
plt.xlim(-5.5,5.5)
for x in deg1.head(nhead).index:
plt.text(deg1.loc[x,'logFC'],
deg1.loc[x,'logpval'],
deg1.loc[x,'name'], fontsize=6)
ax = sns.scatterplot(data = deg1.head(nhead), x='logFC', y='logpval', color='red',
linewidth=.5,
edgecolor='black', s=12)
adjust_text(plt.gca().texts, lim=int(1e3), precision=0.1, force_text=(5,5),
arrowprops={'arrowstyle':'-', 'color':'red', 'linewidth':.3})
plt.xlabel(" LNP <----logFC----> LNP+mRNA")
plt.ylabel("log10[Adjusted P-value]")
plt.title('DC-migratory')
plt.axvline(0, color='black', linestyle='-', linewidth=.5)
size(2.5,2.5)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/C_volcano.pdf', bbox_inches='tight')
import gseapy as gp
deg = pd.read_csv('data/mDC_LNPvsLNP+mRNA.csv', index_col=0)
up = deg[(deg.pval < 0.05) & (deg.logFC > 1)]['name'].to_list()
len(up)
161
do = deg[(deg.pval < 0.05) & (deg.logFC < -1)]['name'].to_list()
len(do)
60
import gseapy as gp
enrup = gp.enrichr(up, gene_sets='GO_Biological_Process_2021', organism='mouse').res2d
enrup
Gene_set | Term | Overlap | P-value | Adjusted P-value | Old P-value | Old Adjusted P-value | Odds Ratio | Combined Score | Genes | |
---|---|---|---|---|---|---|---|---|---|---|
0 | GO_Biological_Process_2021 | cellular response to type I interferon (GO:007... | 18/65 | 2.734781e-23 | 1.702401e-20 | 0 | 0 | 53.006398 | 2753.862887 | IFITM3;SP100;RSAD2;STAT1;STAT2;ADAR;ISG15;IFI3... |
1 | GO_Biological_Process_2021 | type I interferon signaling pathway (GO:0060337) | 18/65 | 2.734781e-23 | 1.702401e-20 | 0 | 0 | 53.006398 | 2753.862887 | IFITM3;SP100;RSAD2;STAT1;STAT2;ADAR;ISG15;IFI3... |
2 | GO_Biological_Process_2021 | defense response to virus (GO:0051607) | 22/133 | 6.791729e-23 | 2.818567e-20 | 0 | 0 | 28.129885 | 1435.854878 | IFITM3;ZBP1;RTP4;CD40;RSAD2;DDX58;STAT1;STAT2;... |
3 | GO_Biological_Process_2021 | defense response to symbiont (GO:0140546) | 21/124 | 4.042291e-22 | 1.258163e-19 | 0 | 0 | 28.741748 | 1415.820223 | IFITM3;ZBP1;RTP4;CD40;RSAD2;DDX58;STAT1;STAT2;... |
4 | GO_Biological_Process_2021 | negative regulation of viral process (GO:0048525) | 12/70 | 3.477998e-13 | 8.660214e-11 | 0 | 0 | 27.467253 | 787.957191 | IFITM3;IFIH1;BST2;ISG20;RSAD2;STAT1;OAS3;EIF2A... |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1240 | GO_Biological_Process_2021 | proteolysis (GO:0006508) | 1/287 | 9.033345e-01 | 9.062461e-01 | 0 | 0 | 0.427295 | 0.043440 | USP25 |
1241 | GO_Biological_Process_2021 | protein phosphorylation (GO:0006468) | 2/496 | 9.115072e-01 | 9.137089e-01 | 0 | 0 | 0.492578 | 0.045640 | MAP2K1;EIF2AK2 |
1242 | GO_Biological_Process_2021 | intracellular protein transport (GO:0006886) | 1/336 | 9.353532e-01 | 9.368582e-01 | 0 | 0 | 0.363881 | 0.024319 | STXBP3 |
1243 | GO_Biological_Process_2021 | gene expression (GO:0010467) | 1/356 | 9.451589e-01 | 9.459187e-01 | 0 | 0 | 0.343028 | 0.019348 | ADAR |
1244 | GO_Biological_Process_2021 | protein transport (GO:0015031) | 1/369 | 9.507246e-01 | 9.507246e-01 | 0 | 0 | 0.330690 | 0.016710 | STXBP3 |
1245 rows × 10 columns
enrdo = gp.enrichr(do, gene_sets='GO_Biological_Process_2021', organism='mouse').res2d
enrdo
Gene_set | Term | Overlap | P-value | Adjusted P-value | Old P-value | Old Adjusted P-value | Odds Ratio | Combined Score | Genes | |
---|---|---|---|---|---|---|---|---|---|---|
0 | GO_Biological_Process_2021 | sympathetic nervous system development (GO:004... | 3/16 | 0.000014 | 0.011254 | 0 | 0 | 80.676113 | 901.790759 | NRP2;INSM1;SOX4 |
1 | GO_Biological_Process_2021 | regulation of cysteine-type endopeptidase acti... | 4/89 | 0.000148 | 0.036147 | 0 | 0 | 16.684874 | 147.166595 | IGBP1;GPX1;PMAIP1;CD44 |
2 | GO_Biological_Process_2021 | noradrenergic neuron differentiation (GO:0003357) | 2/7 | 0.000184 | 0.036147 | 0 | 0 | 137.482759 | 1182.383245 | INSM1;SOX4 |
3 | GO_Biological_Process_2021 | mitral valve development (GO:0003174) | 2/7 | 0.000184 | 0.036147 | 0 | 0 | 137.482759 | 1182.383245 | BMPR2;SOX4 |
4 | GO_Biological_Process_2021 | negative regulation of systemic arterial blood... | 2/9 | 0.000314 | 0.036147 | 0 | 0 | 98.192118 | 791.928764 | BMPR2;SOD2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
800 | GO_Biological_Process_2021 | regulation of nucleic acid-templated transcrip... | 1/430 | 0.729102 | 0.732743 | 0 | 0 | 0.770851 | 0.243543 | SOX4 |
801 | GO_Biological_Process_2021 | nervous system development (GO:0007399) | 1/447 | 0.742886 | 0.745665 | 0 | 0 | 0.740822 | 0.220182 | ADAM23 |
802 | GO_Biological_Process_2021 | regulation of cellular macromolecule biosynthe... | 1/468 | 0.758964 | 0.760854 | 0 | 0 | 0.706747 | 0.194922 | SOX4 |
803 | GO_Biological_Process_2021 | cellular response to cytokine stimulus (GO:007... | 1/482 | 0.769129 | 0.770085 | 0 | 0 | 0.685683 | 0.179990 | ZFP36 |
804 | GO_Biological_Process_2021 | positive regulation of nucleic acid-templated ... | 1/511 | 0.788862 | 0.788862 | 0 | 0 | 0.645729 | 0.153144 | SOX4 |
805 rows × 10 columns
enrup['logP'] = -np.log10(enrup['Adjusted P-value'])
enrdo['logP'] = -np.log10(enrdo['Adjusted P-value'])
df1 = enrup.head(10).copy()
df2 = enrdo.head(10).copy()
df1['Term'] = [x.split(" (GO")[0] for x in df1['Term']]
df2['Term'] = [x.split(" (GO")[0] for x in df2['Term']]
ax = df1[['Term','logP']].sort_values("logP").plot.barh(x='Term',
width=.9, edgecolor='black', color='orangered',
linewidth = 0)
plt.xlabel("log10[Adjusted P-value]")
plt.ylabel("")
size(3.5,3)
sns.despine()
plt.legend([],[],frameon=False)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/C_enrichr.pdf', bbox_inches='tight')
scl.us(mdc, 'anno', legend_loc='on data', frameon=False, show=False, legend_fontoutline=True, palette='Set1')
plt.title("")
size(3,3)
plt.savefig('fig/C_umap.pdf', bbox_inches='tight')
sc.pl.umap(mdc, color='treat_time', groups=['LNP'])
dic1 = dict(zip(mdc.obs['treat_time'].cat.categories, mdc.uns['treat_time_colors']))
dic1
{'PBS': '#1f77b4', 'LNP_2hr': '#ff7f0e', 'LNP_16hr': '#2ca02c', 'LNP+mRNA_2hr': '#d62728', 'LNP+mRNA_16hr': '#9467bd', 'LNP+mRNA_40hr': '#8c564b', 'LNP+Subunit_16hr': '#e377c2', 'LNP+Subunit+IFNB_16hr': '#7f7f7f'}
dic2 = dic1.copy()
dic2['LNP_16hr'] = dic1['PBS']
dic2['LNP+Subunit_16hr'] = dic1['LNP_16hr']
dic2['LNP+mRNA_16hr'] = dic1['LNP_2hr']
dic2['LNP+Subunit+IFNB_16hr'] = dic1['LNP+mRNA_2hr']
ls1 = 'LNP_16hr,LNP+Subunit_16hr,LNP+mRNA_16hr,LNP+Subunit+IFNB_16hr'.split(",")
import warnings
warnings.filterwarnings('ignore')
for cat in ls1:
ax = sc.pl.umap(mdc, color='treat_time', groups=[cat], palette=dic2, show=False, frameon=False, legend_loc = 'right',
zorder = -1)
size(2.5,2.5)
ax.set_rasterization_zorder(0)
plt.title(cat.rstrip('_16hr'), zorder=1)
plt.savefig('fig/C_mDC_LN_%s.pdf'%(cat), bbox_inches='tight')
plt.show()
deg = pd.read_csv('data/mdc_isgvsothers_deg.csv', index_col=0)
deg[deg.pval < 0.05]
name | logFC | pval | |
---|---|---|---|
0 | Isg15 | 5.118700 | 2.518384e-59 |
1 | Bst2 | 3.734284 | 5.108659e-54 |
2 | Irf7 | 4.563956 | 6.309309e-52 |
3 | Zbp1 | 3.457207 | 4.526061e-51 |
4 | Ifitm3 | 3.719438 | 4.957918e-47 |
... | ... | ... | ... |
32280 | Rpl11 | -0.759046 | 2.651944e-19 |
32281 | Rpl5 | -1.078041 | 1.937796e-20 |
32282 | H2-M2 | -2.476869 | 9.652693e-21 |
32283 | Rps25 | -0.925525 | 2.043302e-22 |
32284 | Rps3a1 | -1.029989 | 3.495323e-25 |
1230 rows × 3 columns
ax = sc.pl.heatmap(mdc, 'Isg15,Bst2,Irf7,Zbp1,Ifitm3,Ifi209,Ifi44,Ifit1,Oasl1,Ifi209,Rsad2,Trim30a,Ly6a,Ms4a4c,Phf11d'.split(",")+
'Cd44,H2-M2,Kctd12,Sema6d,Rassf4,Hebp1,Anxa3,Mfge8,Clu,Insm1,Smarca2,Ltc4s,Slco5a1,S100a4,Pbxip1'.split(","),
groupby='anno',
swap_axes=True, standard_scale='var', cmap='viridis', show=False)
size(6,5)
ax = ax['heatmap_ax']
ax.set_xlabel("")
plt.tick_params(axis='x', rotation = 90)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/C_heatmap.pdf', bbox_inches='tight')
/home/srkim/.local/lib/python3.9/site-packages/scanpy/plotting/_anndata.py:2414: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead. obs_tidy.index.value_counts(sort=False).iteritems()
isgdc = 'Ifit1bl1,Ifit3b,Ifit3,Ifit1,Ifi204,Cxcl10,Ms4a4c,Ifit2,Isg15,Fcgr1,Rsad2,Phf11d,Isg20,Usp18,Ly6a'.split(",")
x31 = 'Sct,Hamp,Gm4951,Iigp1,Apod,Gpr33,B3gnt5,Amigo3,Nt5c3'.split(",")
pvm = 'Ccl5,Ly6a,Rsad2,Ifi205,Cxcl9,Cxcl10,Il1rn,Fgl2,Ifit3,Ifit1'.split(",")
sc.tl.score_genes(mdc, isgdc, score_name='ISG+ DC')
sc.tl.score_genes(mdc, x31, score_name='X31')
sc.tl.score_genes(mdc, pvm, score_name='PVM')
/home/srkim/.local/lib/python3.9/site-packages/scanpy/tools/_score_genes.py:151: FutureWarning: Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead. for cut in np.unique(obs_cut.loc[gene_list]): /home/srkim/.local/lib/python3.9/site-packages/scanpy/tools/_score_genes.py:151: FutureWarning: Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead. for cut in np.unique(obs_cut.loc[gene_list]): /home/srkim/.local/lib/python3.9/site-packages/scanpy/tools/_score_genes.py:151: FutureWarning: Passing a set as an indexer is deprecated and will raise in a future version. Use a list instead. for cut in np.unique(obs_cut.loc[gene_list]):
mdc.obs['ISG+ DC'] = mdc.raw.to_adata().to_df()[isgdc].mean(axis=1)
mdc.obs['X31'] = mdc.raw.to_adata().to_df()[x31].mean(axis=1)
mdc.obs['PVM'] = mdc.raw.to_adata().to_df()[pvm].mean(axis=1)
ax = sc.pl.umap(mdc, color='ISG+ DC', show=False,frameon=False, cmap='inferno', zorder = -1)
ax.set_rasterization_zorder(0)
plt.title("ISG$^+$ DC\n(Duong et al., 2022)", zorder = 1)
size(2.5,2.5)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/C_ISG.pdf', bbox_inches='tight')
ax = sc.pl.umap(mdc, color='PVM', show=False,frameon=False, cmap='inferno', zorder = -1)
ax.set_rasterization_zorder(0)
plt.title("Inf-cDC2_PVM\n(Bosteels et al., 2020)", zorder = 1)
size(2.5,2.5)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/C_PVM.pdf', bbox_inches='tight')
ax = sc.pl.umap(mdc, color='X31', show=False,frameon=False, cmap='inferno', zorder = -1)
ax.set_rasterization_zorder(0)
plt.title("Ifn-cDC2_X31\n(Bosteels et al., 2020)", zorder = 1)
size(2.5,2.5)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/C_X31.pdf', bbox_inches='tight')
mdc1 = mdc[mdc.obs.treat.isin(['PBS', 'LNP', 'LNP+mRNA'])].copy()
scores = mdc1.obs[['anno','X31','PVM','ISG+ DC']].copy()
scores = scores.melt("anno")
#scores['variable'] = scores['vari']
scores
anno | variable | value | |
---|---|---|---|
0 | mDC_others | X31 | 0.074780 |
1 | mDC_others | X31 | 0.000000 |
2 | mDC_others | X31 | 0.057503 |
3 | mDC_others | X31 | 0.000000 |
4 | mDC_others | X31 | 0.000000 |
... | ... | ... | ... |
1441 | mDC_others | ISG+ DC | 0.308215 |
1442 | mDC_others | ISG+ DC | 0.723906 |
1443 | mDC_others | ISG+ DC | 0.635049 |
1444 | mDC_others | ISG+ DC | 0.319666 |
1445 | mDC_others | ISG+ DC | 0.517706 |
1446 rows × 3 columns
ax = sns.barplot(data = scores, x='variable', y= 'value', hue='anno', capsize=.2, errwidth=.5, errcolor='black',
hue_order='mDC_others,mDC_ISG'.split(","), edgecolor='black', lw=.5,
palette=dict({'mDC_others':'grey','mDC_ISG':'red'}))
plt.legend(title='', frameon=False, fontsize=5.8, bbox_to_anchor=(0,1), loc='upper left')
sns.despine()
size(2,2)
plt.xlabel('')
plt.gca().set_xticklabels('X31,PVM,ISG+ DC'.split(","), fontsize=9)
plt.ylabel('mean expression', fontsize=9)
from statannot import add_stat_annotation
add_stat_annotation(ax, data = scores, x='variable', y='value', hue='anno', test='Mann-Whitney',
box_pairs=[(('X31','mDC_others'), ('X31','mDC_ISG')),
(('PVM','mDC_others'),('PVM','mDC_ISG')),
(('ISG+ DC','mDC_others'),('ISG+ DC','mDC_ISG'))],
line_height=0.02, width=1, linewidth=.5, loc='outside')
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/C_scores_bar.pdf', bbox_inches='tight')
p-value annotation legend: ns: 5.00e-02 < p <= 1.00e+00 *: 1.00e-02 < p <= 5.00e-02 **: 1.00e-03 < p <= 1.00e-02 ***: 1.00e-04 < p <= 1.00e-03 ****: p <= 1.00e-04 X31_mDC_ISG v.s. X31_mDC_others: Mann-Whitney-Wilcoxon test two-sided with Bonferroni correction, P_val=2.944e-49 U_stat=4.148e+04 PVM_mDC_ISG v.s. PVM_mDC_others: Mann-Whitney-Wilcoxon test two-sided with Bonferroni correction, P_val=5.857e-51 U_stat=4.303e+04 ISG+ DC_mDC_ISG v.s. ISG+ DC_mDC_others: Mann-Whitney-Wilcoxon test two-sided with Bonferroni correction, P_val=2.694e-56 U_stat=4.408e+04
mdc1.obs['treat_time'] = mdc1.obs['treat_time'].cat.reorder_categories('PBS,LNP_2hr,LNP+mRNA_2hr,LNP_16hr,LNP+mRNA_16hr,LNP+mRNA_40hr'.split(","))
mdc1.obs['treat_time'] = mdc1.obs['treat_time'].cat.reorder_categories(list(reversed(mdc1.obs['treat_time'].cat.categories)))
ax = pd.crosstab(mdc1.obs['treat_time'], mdc1.obs['anno'], normalize=0).plot(kind='barh', stacked=True,
cmap='Set1', width=.9, edgecolor='black',
lw=.5)
sns.despine()
size(2,2)
plt.xlabel("")
plt.ylabel("")
plt.legend(bbox_to_anchor=(.5,1), loc='lower center', ncol=1, columnspacing=0.8, labelspacing=.5)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/C_proportion_barh.pdf', bbox_inches='tight')