import milopy
/opt/anaconda3/lib/python3.9/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.26.4 warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
import scanpy as sc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scrublet as scr
%matplotlib inline
plt.rcParams['pdf.fonttype'] = 42
def us(adata,gene,groups=None, show=False, exclude =None,figsize=None,**kwargs):
from matplotlib import rcParams
if figsize:
rcParams['figure.figsize'] = figsize
if ',' in gene:
gene = gene.split(',')
if groups:
if ',' in groups:groups = groups.split(',')
sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
else:
if exclude: # list to not show
groups = [x for x in set(adata.obs[gene]) if x not in exclude]
sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
else:
sc.pl.umap(adata,color=gene,color_map='OrRd',show=show, **kwargs)
rcParams['figure.figsize'] = [3,3]
def dotraw(adata, value, key):
if value not in adata.obs.columns:
if value not in adata.raw.var_names:
print('no'); return
idx = adata.raw.var_names.get_loc(value)
array = adata.raw.X.toarray()[:,idx]
else:
array = np.array(adata.obs[value])
df = pd.DataFrame(array, index=adata.obs.index).merge(adata.obs[[key]],
left_index=True, right_index=True)
df['mask'] = df[0].where(df[0] == 0, 1)
ratio = df.groupby(key)['mask'].mean()
mean = df.groupby(key)[0].mean()
res = pd.concat([ratio,mean], axis=1)
res.columns = ['ratio', 'mean']
return res
def us(adata,gene,groups=None, show=False, exclude =None,figsize=None,**kwargs):
from matplotlib import rcParams
if figsize:
rcParams['figure.figsize'] = figsize
else: rcParams['figure.figsize'] = (3,3)
if ',' in gene:
gene = gene.split(',')
if groups:
sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
else:
if exclude: # list to not show
groups = [x for x in set(adata.obs[gene]) if x not in exclude]
sc.pl.umap(adata,color=gene,color_map='OrRd',groups=groups, show=show, **kwargs)
else:
sc.pl.umap(adata,color=gene,color_map='OrRd',show=show, **kwargs)
def size(a,b): plt.gcf().set_size_inches(a,b)
def bind(adata, a,b):
order = []
for x in adata.obs[a].cat.categories:
for y in adata.obs[b].cat.categories:
order.append(x+'_'+y)
adata.obs[a+'_'+b] = [x+'_'+y for x,y in zip(adata.obs[a], adata.obs[b])]
adata.obs[a+'_'+b] = adata.obs[a+'_'+b].astype('category')
order = [x for x in order if x in set(adata.obs[a+'_'+b])]
adata.obs[a+'_'+b] = adata.obs[a+'_'+b].cat.reorder_categories(order)
def deg_single(adata, key, target, reference, method='wilcoxon', log=False, filter_res=False, pvalcut = 0.05, foldcut = 1):
import sys
sc.tl.rank_genes_groups(adata, key, groups = [target, reference], reference=reference, method='wilcoxon')
name = pd.DataFrame(adata.uns['rank_genes_groups']['names'])
logf = pd.DataFrame(adata.uns['rank_genes_groups']['logfoldchanges'])
pval = pd.DataFrame(adata.uns['rank_genes_groups']['pvals_adj'])
df = pd.concat([name,logf,pval], axis=1)
df.columns = ['name','logFC', 'pval']
if log:
df['logpval'] = [np.log2(sys.float_info.max-1) if x == 0 else -np.log2(x) for x in df['pval']]
if filter_res:
df = df[df.pval < pvalcut].copy()
df = df[abs(df.logpval) > foldcut].copy()
return df
def degplot(deg, topgene=20, dotsize=3, xlim=None, pvalcut=0.05, foldcut=0):
import seaborn as sns
import matplotlib.pyplot as plt
from adjustText import adjust_text
if 'logpval' not in deg.columns:
deg['logpval'] = [np.log2(sys.float_info.max-1) if x == 0 else -np.log2(x) for x in deg['pval']]
deg = deg[deg.pval < pvalcut].copy()
deg = deg[abs(deg.logpval) > foldcut].copy()
deg = deg.sort_values("logFC", ascending=False)
sns.scatterplot(data = deg, x='logFC', y='logpval', s=dotsize, color='grey')
lim = np.max([abs(deg['logFC'].max()), abs(deg['logFC'].min())])
plt.xlim(-lim-1,lim+1)
if xlim:
plt.xlim(-xlim,xlim)
plt.axvline(x=0, color='black', ls='--', lw=0.7)
for i in range(topgene):
if deg[(deg.pval < pvalcut) & (deg.logFC > foldcut)].shape[0] < i+1: continue
plt.text(deg.loc[deg.index[i],'logFC'], deg.loc[deg.index[i], 'logpval'],
deg.loc[deg.index[i], 'name'], fontsize=6, color='red')
for i in range(topgene):
if deg[(deg.pval < pvalcut) & (deg.logFC < foldcut)].shape[0] < i+1: continue
plt.text(deg.loc[deg.index[-(i+1)],'logFC'], deg.loc[deg.index[-(i+1)], 'logpval'],
deg.loc[deg.index[-(i+1)], 'name'], fontsize=6, color='blue')
adjust_text(plt.gca().texts, arrowprops=dict(arrowstyle='-', lw=0.3, color='grey'), precision = 0.3)
plt.ylabel('log [Adjusted P-value]')
plt.xlabel('log [Fold Changes]')
sns.despine(left=True)
for i in range(topgene):
if deg[(deg.pval < pvalcut) & (deg.logFC > foldcut)].shape[0] < i+1: continue
plt.plot(deg.loc[deg.index[i],'logFC'], deg.loc[deg.index[i], 'logpval'],
color='red', markersize=dotsize/3, marker='o')
for i in range(topgene):
if deg[(deg.pval < pvalcut) & (deg.logFC < foldcut)].shape[0] < i+1: continue
plt.plot(deg.loc[deg.index[-(i+1)],'logFC'], deg.loc[deg.index[-(i+1)], 'logpval'],
color='blue', markersize=dotsize/3, marker='o')
plt.rcParams['figure.dpi'] = 300
muscle = sc.read('data/muscle_revision_reinvestigate_total_240428.h5ad')
muscle_original = sc.read('data/muscle_230426_final_data_uptobatch7.h5ad')
# pre = muscle[~muscle.obs.index.str.startswith('batch')].copy()
# new = muscle[muscle.obs.index.str.startswith('batch')].copy()
# pre = muscle[(~muscle.obs.index.str.startswith('batch')) | (muscle.obs.treat == 'PBS')].copy()
# new = muscle[muscle.obs.index.str.startswith('batch')].copy()
# pre = pre.raw.to_adata().copy()
# pre.raw = pre.copy()
# sc.pp.highly_variable_genes(pre)
# sc.pl.highly_variable_genes(pre)
# sc.pp.scale(pre)
# sc.tl.pca(pre)
# sc.external.pp.bbknn(pre, 'Identity')
# sc.tl.umap(pre)
# sc.pl.umap(pre, color='annotation')
# sc.pl.umap(pre, color='treat')
pre = muscle_original.copy()
import milopy.utils
import scanpy as sc
import numpy as np
import milopy
import milopy.core as milo
set(pre.obs.treat)
{'LNP', 'LNP+Subunit', 'LNP+Subunit+IFNB', 'LNP+mRNA', 'PBS'}
set(pre.obs.treat_time)
{'LNP+Subunit+IFNB_16hr', 'LNP+Subunit_16hr', 'LNP+mRNA_16hr', 'LNP+mRNA_2hr', 'LNP+mRNA_40hr', 'LNP_16hr', 'LNP_2hr', 'PBS_16hr'}
set(pre.obs.treat)
{'LNP', 'LNP+Subunit', 'LNP+Subunit+IFNB', 'LNP+mRNA', 'PBS'}
pre.obs.treat = pre.obs.treat.astype('category').cat.reorder_categories(
[ 'PBS', 'LNP', 'LNP+Subunit', 'LNP+Subunit+IFNB', 'LNP+mRNA'])
bind(pre, 'treat', 'time')
adata = pre[pre.obs.treat_time.isin('LNP_2hr,LNP_16hr,PBS_16hr'.split(","))].copy()
sc.external.pp.bbknn(adata, 'Identity')
/tmp/ipykernel_1555662/2364479230.py:1: FutureWarning: The specified parameters ('batch_key',) are no longer positional. Please specify them like `batch_key='Identity'` sc.external.pp.bbknn(adata, 'Identity')
milo.make_nhoods(adata)
milo.count_nhoods(adata, sample_col="Identity")
milo.DA_nhoods(adata, design="~ treat")
milo_results = adata.uns["nhood_adata"].obs
milopy.utils.build_nhood_graph(adata)
/home/srkim/.local/lib/python3.9/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index. warnings.warn("Transforming to str index.", ImplicitModificationWarning)
f, ax = plt.subplots(1,1, figsize=(3,3))
sc.pl.umap(pre, show = False, ax = ax, alpha=.1, edges=False,
edges_color='black', edges_width=.5, s = 10, zorder = -1)
milopy.plot.plot_nhood_graph(adata, alpha=.5, min_size=1, min_logFC = 0, show=False,
linewidth = 0, ax = ax, edgecolor='white', zorder = -1)
ax.set_rasterization_zorder(0)
plt.title("LNP DA fold-change")
plt.savefig('fig/R_MILO_LNP.pdf', bbox_inches='tight')
import seaborn as sns
milopy.utils.annotate_nhoods(adata, anno_col='annotation')
ndata = adata.uns['nhood_adata'].copy()
df1 = ndata.obs.copy()
order1 = df1.groupby('nhood_annotation').mean()['logFC'].sort_values(ascending=False).index.to_list()
df1_lnp = df1.copy()
/tmp/ipykernel_1555662/2958641946.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. order1 = df1.groupby('nhood_annotation').mean()['logFC'].sort_values(ascending=False).index.to_list()
adata = pre[pre.obs.treat_time.isin('LNP+mRNA_2hr,LNP+mRNA_16hr,PBS_16hr'.split(","))].copy()
adata.obs.value_counts("treat_time")
treat_time LNP+mRNA_16hr 13651 LNP+mRNA_2hr 9048 PBS_16hr 8906 dtype: int64
sc.external.pp.bbknn(adata, 'Identity')
/tmp/ipykernel_1555662/2364479230.py:1: FutureWarning: The specified parameters ('batch_key',) are no longer positional. Please specify them like `batch_key='Identity'` sc.external.pp.bbknn(adata, 'Identity')
milo.make_nhoods(adata)
milo.count_nhoods(adata, sample_col="Identity")
milo.DA_nhoods(adata, design="~ treat")
milo_results = adata.uns["nhood_adata"].obs
milopy.utils.build_nhood_graph(adata)
/home/srkim/.local/lib/python3.9/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index. warnings.warn("Transforming to str index.", ImplicitModificationWarning)
f, ax = plt.subplots(1,1, figsize=(3,3))
sc.pl.umap(pre, show = False, ax = ax, alpha=.1, edges=False,
edges_color='black', edges_width=.5, s = 10, zorder = -1)
milopy.plot.plot_nhood_graph(adata, alpha=.5, min_size=1, min_logFC = 0, show=False,
linewidth = 0, ax = ax, edgecolor='white', zorder = -1)
ax.set_rasterization_zorder(0)
plt.title("LNP+mRNA DA fold-change")
plt.savefig('fig/R_MILO_Spike.pdf', bbox_inches='tight')
import seaborn as sns
milopy.utils.annotate_nhoods(adata, anno_col='annotation')
ndata = adata.uns['nhood_adata'].copy()
df1 = ndata.obs.copy()
order1 = df1.groupby('nhood_annotation').mean()['logFC'].sort_values(ascending=False).index.to_list()
df1_spike = df1.copy()
/tmp/ipykernel_1555662/1097671584.py:6: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function. order1 = df1.groupby('nhood_annotation').mean()['logFC'].sort_values(ascending=False).index.to_list()
adata = pre[pre.obs.treat_time.isin('LNP_16hr,LNP+mRNA_16hr'.split(","))].copy()
adata.obs.value_counts("shot")
shot 1st 15186 2nd 11870 dtype: int64
sc.external.pp.bbknn(adata, 'Identity')
/tmp/ipykernel_1555662/2364479230.py:1: FutureWarning: The specified parameters ('batch_key',) are no longer positional. Please specify them like `batch_key='Identity'` sc.external.pp.bbknn(adata, 'Identity')
milo.make_nhoods(adata)
milo.count_nhoods(adata, sample_col="Identity")
milo.DA_nhoods(adata, design="~shot")
milo_results = adata.uns["nhood_adata"].obs
milopy.utils.build_nhood_graph(adata)
/home/srkim/.local/lib/python3.9/site-packages/anndata/_core/anndata.py:121: ImplicitModificationWarning: Transforming to str index. warnings.warn("Transforming to str index.", ImplicitModificationWarning)
f, ax = plt.subplots(1,1, figsize=(3,3))
sc.pl.umap(pre, show = False, ax = ax, alpha=.1, edges=False,
edges_color='black', edges_width=.5, s = 10, zorder = -1)
milopy.plot.plot_nhood_graph(adata, alpha=.5, min_size=1, min_logFC = 0, show=False,
linewidth = 0, ax = ax, edgecolor='white', zorder = -1)
ax.set_rasterization_zorder(0)
plt.title("DA fold-change")
plt.savefig('fig/R_MILO_Shot1vs2.pdf', bbox_inches='tight')