import sceleto2 as scl
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
def bind(adata, a,b):
order = []
for x in adata.obs[a].cat.categories:
for y in adata.obs[b].cat.categories:
order.append(x+'_'+y)
adata.obs[a+'_'+b] = [x+'_'+y for x,y in zip(adata.obs[a], adata.obs[b])]
adata.obs[a+'_'+b] = adata.obs[a+'_'+b].astype('category')
order = [x for x in order if x in set(adata.obs[a+'_'+b])]
adata.obs[a+'_'+b] = adata.obs[a+'_'+b].cat.reorder_categories(order)
def size(a,b):
plt.gcf().set_size_inches(a,b)
plt.rcParams['pdf.fonttype'] = 42
muscle = sc.read('data/muscle_230426_final_data_uptobatch7.h5ad')
plt.rcParams['figure.dpi'] = 300
muscle.obs['treat'] = muscle.obs['treat'].cat.reorder_categories('PBS,LNP,LNP+mRNA,LNP+Subunit,LNP+Subunit+IFNB'.split(","))
m1 = muscle[muscle.obs.treat.isin("PBS,LNP,LNP+mRNA".split(","))].copy()
scl.us(m1, 'Identity', figsize=(2,2))
m1.obs['Spike'].max()
4890.0
m1.obs['Spike'].min()
0.0
m1.obs['Spike?'] = [1 if x>0 else 0 for x in m1.obs['Spike']]
m1.obs['Spike?'] = [1 if x>0 else 0 for x in m1.obs['Spike']]
d1 = m1.obs.value_counts("treat_time")
d1
treat_time LNP_2hr 16636 LNP+mRNA_16hr 13651 LNP_16hr 13405 LNP+mRNA_2hr 9048 PBS_16hr 8906 LNP+mRNA_40hr 8218 dtype: int64
d2 = m1[m1.obs['Spike?'] ==1].obs.value_counts("treat_time")
d2
treat_time LNP+mRNA_2hr 6560 LNP+mRNA_16hr 2074 LNP+mRNA_40hr 441 LNP_2hr 4 PBS_16hr 2 LNP_16hr 1 dtype: int64
df = pd.DataFrame(d1).merge(pd.DataFrame(d2), left_index=True, right_index=True)
df.columns = 'total,spike'.split(",")
df['ratio'] = df['spike'] / df['total']
df['ratio'] = 100 * df['ratio']
df
total | spike | ratio | |
---|---|---|---|
treat_time | |||
LNP_2hr | 16636 | 4 | 0.024044 |
LNP+mRNA_16hr | 13651 | 2074 | 15.193026 |
LNP_16hr | 13405 | 1 | 0.007460 |
LNP+mRNA_2hr | 9048 | 6560 | 72.502210 |
PBS_16hr | 8906 | 2 | 0.022457 |
LNP+mRNA_40hr | 8218 | 441 | 5.366269 |
df.index = df.index.rename_categories({'PBS_16hr':'PBS'})
o1 = 'LNP+mRNA_40hr,LNP+mRNA_16hr,LNP+mRNA_2hr,LNP_16hr,LNP_2hr,PBS'.split(",")
import seaborn as sns
sns.set_style("white")
df.loc[o1,['ratio']].plot.barh(width=.85, edgecolor='black', color='red')
plt.ylabel("")
plt.xlabel('Spike mRNA(+) cell (%)')
plt.legend([],[], frameon=False)
size(1.5,2.5)
sns.despine()
#plt.savefig("fig/D_spikecount.pdf", bbox_inches='tight')
sns.set_style('white')
muscle.obs['Spike'].max()
4890.0
muscle.obs['Spike detection'] = [1 if x > 1 else 0 for x in muscle.obs['Spike']]
plt.rcParams['figure.dpi'] = 500
sc.pl.umap(muscle, color='Spike detection', show=False, cmap='cividis',
frameon=False, legend_loc='on data')
plt.gca().set_rasterization_zorder(100)
size(2.5,2.5)
plt.savefig("fig/D_spikeumap_binary.pdf", bbox_inches='tight')
sc.pl.umap(muscle, color='Spike_log1p', show=False, cmap='inferno',
frameon=False, legend_loc='on data', vmax=3)
plt.gca().set_rasterization_zorder(100)
size(2,2)
plt.savefig("fig/D_spikeumap_vmax.pdf", bbox_inches='tight')
m2h = m1[m1.obs.treat_time == 'LNP+mRNA_2hr'].copy()
m16h = m1[m1.obs.treat_time == 'LNP+mRNA_16hr'].copy()
d1 = m2h.obs[['annotation','Spike_log1p']].groupby('annotation').mean()
d2 = m16h.obs[['annotation','Spike_log1p']].groupby('annotation').mean()
d1 = d1 / d1.mean()
d2 = d2 / d2.mean()
df = d1.merge(d2, left_index=True, right_index=True)
df.columns = '2hr,16hr'.split(",")
df.sort_values("2hr", ascending=False)
2hr | 16hr | |
---|---|---|
annotation | ||
DC-cDC1 | 1.867471 | 0.000000 |
Macrophage | 1.723804 | 2.092995 |
Fibroblast | 1.694387 | 5.843758 |
Endothelial | 1.536995 | 1.185776 |
Mural | 1.530292 | 1.453917 |
Mast | 1.141721 | 1.583158 |
Neutrophil | 1.115733 | 1.349197 |
Tenocyte | 0.992020 | 1.130822 |
Monocyte | 0.968742 | 1.243444 |
Schwann-myelinating | 0.780374 | 0.304339 |
Muscle-skeletal-progenitor | 0.761720 | 0.165120 |
T-prolif | 0.750058 | 0.250344 |
DC-migratory | 0.728323 | 0.813353 |
B-prolif | 0.625758 | 0.347336 |
Muscle-skeletal-mature | 0.618028 | 0.194557 |
NK | 0.614676 | 0.493077 |
B-naive | 0.596837 | 0.323674 |
Schwann-non-myelinating | 0.594417 | 0.618259 |
T-CD8 | 0.578330 | 0.259308 |
T-CD4 | 0.561615 | 0.232297 |
DC-plasmacytoid | 0.555586 | 1.115268 |
df = df.reset_index()
csdic = dict(zip(muscle.obs['annotation'].cat.categories, muscle.uns['annotation_colors']))
from adjustText import adjust_text
??sns.set_style
sns.set_style("whitegrid")
ax = sns.scatterplot(data = df, x='2hr', y='16hr', hue='annotation', palette=csdic, s=200,
edgecolor='black', linewidth=.5, zorder = 100)
for x in df[(df['2hr'] > 1) |( df['16hr'] > 2)].index:
plt.text(df.loc[x,'2hr'],
df.loc[x,'16hr'],
df.loc[x,'annotation'], zorder=200, fontweight=500, fontsize=12)
adjust_text(plt.gca().texts)
plt.axvline(1, c='black', ls='--', lw = 1)
plt.axhline(1, c='black', ls='--', lw = 1)
sns.despine(left = True, bottom=True)
ax.tick_params(width=0.5, labelsize = 10)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.xlabel("")
plt.ylabel('')
plt.legend(bbox_to_anchor=(1,0.5), loc='center left', ncol=2, frameon=False)
size(5,5)
plt.savefig("fig/D_spikescatter.pdf", bbox_inches='tight')
fib = muscle[muscle.obs.annotation == 'Fibroblast'].copy()
fib = fib.raw.to_adata().copy()
fib.raw = fib.copy()
sc.pp.highly_variable_genes(fib, max_mean=10)
sc.pl.highly_variable_genes(fib)
sc.pp.scale(fib)
sc.tl.pca(fib, use_highly_variable=True)
sc.pp.neighbors(fib)
sc.tl.umap(fib)
sc.external.pp.bbknn(fib, 'Identity')
sc.tl.umap(fib)
scl.us(fib, 'treat')
sc.tl.leiden(fib, 0.2)
scl.us(fib, 'leiden')
scl.markers.marker(fib, 'leiden').plot_marker()
{'0': ['Adamtsl2', 'Ndufa4l2', 'Crlf1', 'Gdf10', 'Mfap4', 'Hmcn2', 'Tspan11', 'Bmp4', 'Inmt', 'Fbln7'], '1': ['Nos2', 'Hmga2', 'Ifi206', 'Trim30c', 'Serpinb2', 'Apol9b', 'Apol9a', 'Cxcl5', 'Acod1', 'Oas3'], '2': ['Wnt10b', 'Fam167a', 'Krtdap', 'Sv2c', 'Edn1', 'Car8', 'Sytl2', 'Cmah', '1700019D03Rik', 'Stmn4'], '3': ['Akr1cl', 'Gm14207', 'Pcdh15', 'Spic', 'Syt1', 'Serpina10', 'Serpina1a', 'Serpina1c', 'Gm36899', 'Ager'], '4': ['Wnt10a', 'Gpm6a', 'Msln', 'Sfrp5', 'Tenm2', 'Efnb3', 'Lypd2', 'Foxd1', 'Ctxn3', 'Mpzl2']}
## cluster4: possbile MESOTHELIAL cell contamination (Msln) -> Remove
scl.us(fib, 'Pi16,Col15a1,Ccl19,Cxcl5,Msln,Sfrp5')
fib = fib[fib.obs.leiden != '4'].copy()
fib = fib.raw.to_adata().copy()
fib.raw = fib.copy()
sc.pp.highly_variable_genes(fib, max_mean=10)
sc.pl.highly_variable_genes(fib)
sc.pp.scale(fib)
sc.tl.pca(fib, use_highly_variable=True)
sc.pp.neighbors(fib)
sc.tl.umap(fib)
sc.external.pp.bbknn(fib, 'Identity')
sc.tl.umap(fib)
scl.us(fib, 'treat')
sc.tl.leiden(fib, 0.35)
scl.us(fib, 'leiden')
scl.us(fib, 'Col15a1,Pi16,Cxcl5,Ccl19')
annot = scl.annotater(fib, 'annosub')
def anno(a,b): annot.update(fib, 'leiden', a,b)
anno('0,1', 'Fib_Col15a1')
anno('2', 'Fib_Cxcl5')
anno('3', 'Fib_Pi16')
anno('4', 'Fib_Ccl19')
scl.us(fib, 'annosub')
fib.obs['annosub'] = fib.obs['annosub'].cat.reorder_categories('Fib_Pi16,Fib_Col15a1,Fib_Cxcl5,Fib_Ccl19'.split(","))
pd.crosstab(fib.obs['treat_time'], fib.obs['annosub'], normalize=0).plot(kind='bar', stacked=True)
<AxesSubplot:xlabel='treat_time'>
scl.us(fib, 'annosub')
sns.set_style('white')
plt.rcParams['pdf.fonttype'] = 42
ax = sc.pl.dotplot(fib, 'Pi16,Dpp4,Wnt10b,Col15a1,Penk,Bmp4,Cxcl5,Pdpn,Oas2,Ccl19,Ccl21a,Cxcl13'.split(","),
groupby='annosub', cmap='OrRd',
standard_scale='var', show=False, lw = .5)
ax = ax['mainplot_ax']
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.gcf().set_size_inches(5,2.5)
plt.savefig('fig/D_fib_dotplot.pdf', bbox_inches='tight')
fib1 = fib[fib.obs['is_Spike'] == 'False'].copy()
fib2 = fib[fib.obs['is_Spike'] != 'False'].copy()
order = ['LNP+mRNA_40hr', 'LNP+mRNA_16hr', 'LNP_16hr','LNP+mRNA_2hr', 'LNP_2hr','PBS_16hr']
df0 = pd.crosstab(fib.obs['treat_time'], fib.obs['annosub'], normalize=0)
df1 = pd.crosstab(fib1.obs['treat_time'], fib1.obs['annosub'], normalize=0)
df2 = pd.crosstab(fib2.obs['treat_time'], fib2.obs['annosub'], normalize=0)
import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['figure.dpi'] = 300
ax = df0.loc[order].plot.barh(stacked=True, width = .85, linewidth = .5, edgecolor='black')
plt.xlabel('')
plt.ylabel('')
sns.despine()
plt.gcf().set_size_inches(2.5,2)
plt.legend(bbox_to_anchor=(0.5,1), loc='lower center', ncol=2, frameon=False)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/D_fib_proportions_base.pdf', bbox_inches='tight')
ax = sc.pl.umap(fib, color='Spike_log1p', show=False, frameon=False)
plt.gcf().set_size_inches(2,2)
plt.title("Spike mRNA")
plt.gca().set_rasterization_zorder(100)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/D_fib_Spike_UMAP.pdf', bbox_inches='tight')
f, ax = plt.subplots(1,2, sharey=True)
pd.DataFrame(index = order).merge(df1, left_index=True, right_index=True, how='left').plot.barh(stacked=True, ax=ax[0], lw = .5, width=.75, edgecolor='black')
pd.DataFrame(index = order).merge(df2, left_index=True, right_index=True, how='left').plot.barh(stacked=True, ax=ax[1], lw = .5, width=.75, edgecolor='black')
ax[0].legend([],[], frameon=False)
ax[1].legend([],[], frameon=False)
ax[0].set_title('Spike negative')
ax[1].set_title("Spike positive")
sns.despine()
plt.gcf().set_size_inches(4,2.5)
ax[0].tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax[0].spines.values()]
ax[1].tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax[1].spines.values()]
plt.savefig('fig/D_proportion_by_spike.pdf', bbox_inches='tight')
bind(fib, 'treat_time', 'annosub')
fib.obs['treat_time_annosub']
M_LNP_AAACGCTGTCCGATCG-1 LNP_16hr_Fib_Ccl19 M_LNP_AAAGGGCAGTAGCCAG-1 LNP_16hr_Fib_Ccl19 M_LNP_AAAGGTATCTTTCTAG-1 LNP_16hr_Fib_Col15a1 M_LNP_AAAGTGATCGGTTGTA-1 LNP_16hr_Fib_Col15a1 M_LNP_AACAAAGGTGTCCGTG-1 LNP_16hr_Fib_Cxcl5 ... Sub_lnp_IFNB_TTTCCTCGTCCCACGA-1 LNP+Subunit+IFNB_16hr_Fib_Cxcl5 Sub_lnp_IFNB_TTTCCTCTCTCATTTG-1 LNP+Subunit+IFNB_16hr_Fib_Col15a1 Sub_lnp_IFNB_TTTGACTAGGCGAAGG-1 LNP+Subunit+IFNB_16hr_Fib_Cxcl5 Sub_lnp_IFNB_TTTGACTGTGGACCTC-1 LNP+Subunit+IFNB_16hr_Fib_Cxcl5 Sub_lnp_IFNB_TTTGTTGGTGTTTGCA-1 LNP+Subunit+IFNB_16hr_Fib_Col15a1 Name: treat_time_annosub, Length: 5146, dtype: category Categories (31, object): ['PBS_16hr_Fib_Pi16', 'PBS_16hr_Fib_Col15a1', 'PBS_16hr_Fib_Cxcl5', 'PBS_16hr_Fib_Ccl19', ..., 'LNP+Subunit+IFNB_16hr_Fib_Pi16', 'LNP+Subunit+IFNB_16hr_Fib_Col15a1', 'LNP+Subunit+IFNB_16hr_Fib_Cxcl5', 'LNP+Subunit+IFNB_16hr_Fib_Ccl19']
d = pd.crosstab(fib[fib.obs['treat'] == 'LNP+mRNA'].obs['treat_time_annosub'], fib[fib.obs['treat'] == 'LNP+mRNA'].obs['is_Spike'], normalize=0)
d['cond'] = ['_'.join(x.split("_")[:-2]) for x in d.index]
d['time'] = [x.split("_")[-3] for x in d.index]
d['annosub'] = ['_'.join(x.split("_")[-2:]) for x in d.index]
d
is_Spike | False | True | cond | time | annosub |
---|---|---|---|---|---|
treat_time_annosub | |||||
LNP+mRNA_2hr_Fib_Pi16 | 0.090909 | 0.909091 | LNP+mRNA_2hr | 2hr | Fib_Pi16 |
LNP+mRNA_2hr_Fib_Col15a1 | 0.392523 | 0.607477 | LNP+mRNA_2hr | 2hr | Fib_Col15a1 |
LNP+mRNA_2hr_Fib_Cxcl5 | 0.000000 | 1.000000 | LNP+mRNA_2hr | 2hr | Fib_Cxcl5 |
LNP+mRNA_2hr_Fib_Ccl19 | 0.458333 | 0.541667 | LNP+mRNA_2hr | 2hr | Fib_Ccl19 |
LNP+mRNA_16hr_Fib_Pi16 | 0.833333 | 0.166667 | LNP+mRNA_16hr | 16hr | Fib_Pi16 |
LNP+mRNA_16hr_Fib_Col15a1 | 0.904762 | 0.095238 | LNP+mRNA_16hr | 16hr | Fib_Col15a1 |
LNP+mRNA_16hr_Fib_Cxcl5 | 0.324324 | 0.675676 | LNP+mRNA_16hr | 16hr | Fib_Cxcl5 |
LNP+mRNA_16hr_Fib_Ccl19 | 0.852941 | 0.147059 | LNP+mRNA_16hr | 16hr | Fib_Ccl19 |
LNP+mRNA_40hr_Fib_Col15a1 | 0.954545 | 0.045455 | LNP+mRNA_40hr | 40hr | Fib_Col15a1 |
LNP+mRNA_40hr_Fib_Cxcl5 | 0.560976 | 0.439024 | LNP+mRNA_40hr | 40hr | Fib_Cxcl5 |
LNP+mRNA_40hr_Fib_Ccl19 | 0.880952 | 0.119048 | LNP+mRNA_40hr | 40hr | Fib_Ccl19 |
base = pd.crosstab(fib.obs['treat_time'], fib.obs['is_Spike'], normalize=0)
#sns.boxplot(data = d,x='time', y='True', hue='annosub')
ax = sns.stripplot(data = d, x='time', y='True', hue='annosub', dodge=True,
s=8, edgecolor='black', linewidth=.5)
plt.hlines(y=base.loc['LNP+mRNA_2hr','True'], xmin=-.5, xmax=.5, linestyles='--', color='grey')
plt.hlines(y=base.loc['LNP+mRNA_16hr','True'], xmin=.5, xmax=1.5, linestyles='--', color='grey')
plt.hlines(y=base.loc['LNP+mRNA_40hr','True'], xmin=1.5, xmax=2.5, linestyles='--', color='grey')
plt.xlabel('')
plt.ylabel('Ratio of spike(+) cells')
#plt.axvline(1.5, c='black')
#plt.axvline(.5,c='black')
plt.gcf().set_size_inches(3.5,2)
sns.despine()
plt.legend([],[], frameon=False)
ax.tick_params(width=0.5)
[x.set_linewidth(.5) for x in ax.spines.values()]
plt.savefig('fig/D_cluster_spike_by_time.pdf', bbox_inches='tight')
/home/srkim/.local/lib/python3.9/site-packages/seaborn/categorical.py:292: FutureWarning: In a future version, `df.iloc[:, i] = newvals` will attempt to set the values inplace instead of always setting a new array. To retain the old behavior, use either `df[df.columns[i]] = newvals` or, if columns are non-unique, `df.isetitem(i, newvals)` sub_data.loc[:, self.cat_axis] = adjusted_data
scl.us(fib, 'treat_time')
fib.obs['treat_time'] = fib.obs['treat_time'].cat.rename_categories({'PBS_16hr':"PBS"})