import numpy as np
import pandas as pd
import scanpy as sc
import leidenalg as leiden
import matplotlib.pyplot as plt
import seaborn as sns
import sceleto2 as scl
import scrublet as scr
import gseapy as gp
import glob
import os
import sys
import pickle
import decoupler
import scipy
from statannot import add_stat_annotation
import celltypist
import random
import symphonypy as sp
from pydeseq2.dds import DeseqDataSet
from pydeseq2.ds import DeseqStats
%matplotlib inline
sc.settings.verbosity = 3
sc.logging.print_version_and_date()
sc.settings.set_figure_params(dpi=250)
gene_sets = 'GO_Molecular_Function_2021,GO_Biological_Process_2021,KEGG_2019_Mouse,KEGG_2021_Human,Reactome_2022,WikiPathway_2021_Human,WikiPathways_2019_Mouse,MSigDB_Hallmark_2020,GTEx_Aging_Signatures_2021'
from gseapy.plot import barplot, dotplot, gseaplot
def run_enrichr(gene_list,gene_sets):
enr = gp.enrichr(gene_list=gene_list,
gene_sets=gene_sets,
organism='mouse', # don't forget to set organism to the one you desired! e.g. Yeast
background = 5000,
outdir=None,
no_plot=True,
cutoff=0.1 # test dataset, use lower value from range(0,1)
)
results = enr.results.sort_values(by = ['Adjusted P-value'])[:50]
return results
class sample_volcano():
def __init__(self,adata,patient_id,anno_key,comp1,comp2,P=0.01,quick=True,
fc_cut_pval=0.5,n_pos_cell=10,n_patient_cell=10):
'''
param P :pseudocount for fc calculation
'''
from scipy.stats import ttest_ind, mannwhitneyu
self.genelist = adata.raw.var_names
from collections import Counter
# Removing Patients with cells less than n_patient_cell(10 by default)
rmlist = []
for f in Counter(adata.obs[patient_id]).items():
if f[1]<n_patient_cell:
rmlist.append(f[0])
else:
continue
adata = adata[~adata.obs[patient_id].isin(rmlist)]
adraw = adata.raw.to_adata()
cond1 = adraw[adraw.obs[anno_key]==comp1]
cond2 = adraw[adraw.obs[anno_key]==comp2]
cond1 = pd.DataFrame(data=cond1.X.toarray(),index=cond1.obs[patient_id],columns=cond1.var_names)
cond2 = pd.DataFrame(data=cond2.X.toarray(),index=cond2.obs[patient_id],columns=cond2.var_names)
exp1 = cond1.groupby(cond1.index).mean()
exp2 = cond2.groupby(cond2.index).mean()
self.pval = []
self.fc = []
for i in adraw.var_names:
# Calculating number of cell count with positive gene exp
n_pos1 = np.sum(cond1[i]>0)
n_pos2 = np.sum(cond2[i]>0)
n_max = np.sum([n_pos1,n_pos2])
# Calculating mean exp of each patient
norm_count1 = np.mean(exp1[i])+P
norm_count2 = np.mean(exp2[i])+P
self.fc.append(np.log2(norm_count1/norm_count2))
if quick:
if np.abs(self.fc[-1])< fc_cut_pval:
self.pval.append(1)
elif n_max < n_pos_cell:
self.pval.append(1)
else:
self.pval.append(ttest_ind(exp1[i],exp2[i])[1])
else:
self.pval.append(ttest_ind(exp1[i],exp2[i])[1])
self.pval = np.array(self.pval)
self.fc = np.array(self.fc)
def draw(self, title=None, pvalue_cut=100, to_show = 0.2, adjust_lim = 5, show=True, sig_mode = 'auto',adjust = True):
'''
draw volcano plot
param pvalue_cut :-log10Pvalue for cutoff
sig_mode: ['auto','complex','pval']
'''
from adjustText import adjust_text
plt.figure(figsize=(6,6))
xpos = np.array(self.fc)
ypos = -np.log10(np.array(self.pval))
ypos[ypos==np.inf] = np.max(ypos[ypos!=np.inf])
if sig_mode == 'complex':
index = (np.abs(xpos))*ypos
index_cut = np.percentile(index,100-to_show)
sig = (np.abs(xpos) > 1) & (ypos > 2) & ((np.abs(xpos))*ypos > index_cut)
elif sig_mode =='pval':
sig = (np.abs(xpos) > 1) & (ypos > pvalue_cut)
elif sig_mode =='auto':
index_cut = np.percentile(ypos,100-to_show)
sig = (np.abs(xpos) > 1) & (ypos > index_cut)
else:
print('error, check sig_mode')
raise SystemError
if title:
plt.title(title)
plt.xlabel('log2FoldChange',fontsize=12)
plt.ylabel('-log10Pval',fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.scatter(xpos,ypos,s=1, color='k', alpha =0.5, rasterized=True)
plt.scatter(xpos[sig],ypos[sig],s=3,color='red', rasterized=True)
texts = []
for i, gene in enumerate(self.genelist[sig]):
texts.append(plt.text(xpos[sig][i],ypos[sig][i],gene,fontsize=5))
if adjust:
adjust_text(texts,only_move={'texts':'xy'},lim=adjust_lim)
else:
pass
if show:
plt.show()
final
df = pd.crosstab(final.obs['annotation_publish_1'], final.obs['age_status'], normalize=1)
l = []
for index in df.index:
l.append(dict(zip(final.obs['annotation_publish_1'].cat.categories, final.uns['annotation_publish_1_colors']))[index])
df['color'] = l
df['log2fc(FiNi)'] = np.log2(df['Old_FiNi-seq']/df['Young_FiNi-seq'])
df['log2fc(Ctrl)'] = np.log2(df['Old_Control']/df['Young_Control'])
plt.scatter(x=df['log2fc(Ctrl)'], y=df['log2fc(FiNi)'], c=df['color'])
for index in df.index:
plt.text(x=df['log2fc(Ctrl)'][index], y=df['log2fc(FiNi)'][index], s=index)
plt.xlabel('log2fc(Ctrl)')
plt.ylabel('log2fc(FiNi)')
y1, y2 = plt.gca().get_ylim()
x1, x2 = plt.gca().get_xlim()
lim = max(abs(x1), abs(x2), abs(y1), abs(y2))
plt.ylim(-lim, lim)
plt.xlim(-lim, lim)
plt.axhline(0, color='black', linewidth=0.7)
plt.axvline(0, color='black', linewidth=0.7)
plt.axhline(0.5, linestyle='--', color='lightskyblue', alpha=0.7, linewidth=0.7)
plt.axhline(-0.5, linestyle='--', color='peachpuff', alpha=0.7, linewidth=0.7)
plt.axvline(0.5, linestyle='--', color='navy', alpha=0.7, linewidth=0.7)
plt.axvline(-0.5, linestyle='--', color='orangered', alpha=0.7, linewidth=0.7)
savefig('scatter_agingproportion_bycondition')
tm
scl.us(tm, 'annotation,age')
degtotal = {}
degtotal['Old_Ctrl'] = volcano_analysis(integrated, 'group', 'Old_Control','TM_Old')
degtotal['Old_FiNi'] = volcano_analysis(integrated, 'group', 'Old_FiNi-seq','TM_Old')
degtotal['Young_Ctrl'] = volcano_analysis(integrated, 'group', 'Young_Control','TM_Young')
degtotal['Young_FiNi'] = volcano_analysis(integrated, 'group', 'Young_FiNi-seq','TM_Young')
for key in degtotal:
degtotal[key].columns = [key+'_'+x for x in degtotal[key].columns]
old_df = pd.concat([degtotal['Old_Ctrl'], degtotal['Old_FiNi']], axis=1)
young_df = pd.concat([degtotal['Young_Ctrl'], degtotal['Young_FiNi']], axis=1)
degdict = {}
for ct in ['Endothelial', 'Macrophage','Mesenchyme']:
temp = {}
for cd in ['Young','Old']:
temp[cd+'_Ctrl'] = volcano_analysis(integrated[integrated.obs['annolv1']==ct],
'group', cd+'_Control','TM_'+cd)
temp[cd+'_FiNi'] = volcano_analysis(integrated[integrated.obs['annolv1']==ct],
'group', cd+'_FiNi-seq','TM_'+cd)
degdict[ct] = temp
for key in degdict['Endothelial']:
temp = degdict['Endothelial'][key]
print(key)
temp = temp[temp['pval']<0.05]
print(temp[temp['fc']>1].shape[0], temp[temp['fc']<-1].shape[0])
for key in degdict['Mesenchyme']:
temp = degdict['Mesenchyme'][key]
print(key)
temp = temp[temp['pval']<0.05]
print(temp[temp['fc']>1].shape[0], temp[temp['fc']<-1].shape[0])
for key in degdict['Macrophage']:
temp = degdict['Macrophage'][key]
print(key)
temp = temp[temp['pval']<0.05]
print(temp[temp['fc']>1].shape[0], temp[temp['fc']<-1].shape[0])
fig, axes = plt.subplots(2,figsize=(5,10),sharey=True)
fig.tight_layout()
df = degdf[degdf['index'].str.contains('O')]
sns.barplot(data=df[df['variable']=='Endo'], x='variable', y='abs', hue='index', ax=axes[0])
df = degdf[degdf['index'].str.contains('Y')]
sns.barplot(data=df[df['variable']=='Endo'], x='variable', y='abs', hue='index', ax=axes[1])
rdict = {}
for key in degdict['Endothelial']:
temp = degdict['Endothelial'][key]
print(key)
temp = temp[temp['pval']<0.05]
if len(temp[temp['fc']>1]['genelist']) > 0:
rdict[key+'_pos'] = run_enrichr(temp[temp['fc']>1]['genelist'], gene_sets)
if len(temp[temp['fc']>1]['genelist']) > 0:
rdict[key+'_neg'] = run_enrichr(temp[temp['fc']<-1]['genelist'], gene_sets)
rdicts['Endothelial'] = rdict
rdict = {}
for key in degdict['Macrophage']:
temp = degdict['Macrophage'][key]
print(key)
temp = temp[temp['pval']<0.05]
if len(temp[temp['fc']>1]['genelist']) > 0:
rdict[key+'_pos'] = run_enrichr(temp[temp['fc']>1]['genelist'], gene_sets)
if len(temp[temp['fc']>1]['genelist']) > 0:
rdict[key+'_neg'] = run_enrichr(temp[temp['fc']<-1]['genelist'], gene_sets)
rdicts['Macrophage'] = rdict
rdict = {}
for key in degdict['Mesenchyme']:
temp = degdict['Mesenchyme'][key]
print(key)
temp = temp[temp['pval']<0.05]
if len(temp[temp['fc']>1]['genelist']) > 0:
rdict[key+'_pos'] = run_enrichr(temp[temp['fc']>1]['genelist'], gene_sets)
if len(temp[temp['fc']>1]['genelist']) > 0:
rdict[key+'_neg'] = run_enrichr(temp[temp['fc']<-1]['genelist'], gene_sets)
rdicts['Mesenchymal'] = rdict
rdicts['Endothelial']['Old_FiNi_pos']
rdicts['Macrophage']['Old_FiNi_pos']
rdicts['Mesenchymal']['Old_FiNi_pos']
rdicts['Mesenchymal']['Old_Ctrl_pos']
df1 = df[((df['Old_Ctrl_pval']<0.05) | (df['Old_FiNi_pval']<0.05))]
df1 = df
plt.scatter(df1['Old_Ctrl_fc'], df1['Old_FiNi_fc'], color='lightgray', s=100)
geneint = ['Cx3cl1','Il6','Ackr1','Vwa1','Cdkn1a','Stab2']
df2 = df1[df1['Old_Ctrl_genelist'].isin(geneint)]
for gene in geneint:
plt.scatter(df2['Old_Ctrl_fc'], df2['Old_FiNi_fc'], color='red', s=100)
plt.axhline(1, linestyle = '--', linewidth=0.5)
plt.axvline(1, linestyle = '--', linewidth=0.5)
plt.axline((0, 0), slope=1, linestyle = '-', color='black', linewidth=1)
plt.xlim(-7.5,7.5)
plt.ylim(-7.5,7.5)
savefig('scatter_degvstm_endothelial')
rdicts['Endothelial']['Old_Ctrl_pos']
rdicts['Macrophage']['Old_Ctrl_pos']
senmayo = 'Acvr1b,Ang,Angpt1,Angptl4,Areg,Axl,Bex3,Bmp2,Bmp6,C3,Ccl1,Ccl2,Ccl20,Ccl24,Ccl26,Ccl3,Ccl4,Ccl5,Ccl7,Ccl8,Cd55,Cd9,Csf1,Csf2,Csf2rb,Cst10,Ctnnb1,Ctsb,Cxcl1,Cxcl10,Cxcl12,Cxcl16,Cxcl2,Cxcl3,Cxcr2,Dkk1,Edn1,Egf,Egfr,Ereg,Esm1,Ets2,Fas,Fgf1,Fgf2,Fgf7,Gdf15,Gem,Gmfg,Hgf,Hmgb1,Icam1,Icam5,Igf1,Igfbp1,Igfbp2,Igfbp3,Igfbp4,Igfbp5,Igfbp6,Igfbp7,Il10,Il13,Il15,Il18,Il1a,Il1b,Il2,Il6,Il6st,Il7,Inha,Iqgap2,Itga2,Itpka,Jun,Kitl,Lcp1,Mif,Mmp13,Mmp10,Mmp12,Mmp13,Mmp14,Mmp2,Mmp3,Mmp9,Nap1l4,Nrg1,Pappa,Pecam1,Pgf,Pigf,Plat,Plau,Plaur,Ptbp1,Ptger2,Ptges,Rps6ka5,Scamp4,Selplg,Sema3f,Serpinb3a,Serpine1,Serpine2,Spp1,Spx,Timp2,Tnf,Tnfrsf11b,Tnfrsf1a,Tnfrsf1b,Tubgcp2,Vegfa,Vegfc,Vgf,Wnt16,Wnt2'.split(',')
huvec_ca
sc.tl.score_genes(endothelial, gene_list=senmayo, score_name='senmayo')
sc.tl.score_genes(endonew, gene_list=senmayo, score_name='senmayo')
sc.pl.umap(endothelial, color='senmayo', vmax=0.5, cmap='RdBu_r', size=20, show=False)
savefig('endothelial_senmayo_umap')
scl.us(endothelial, 'Cdkn1a,Il6', frameon=False)
sc.pl.dotplot(endonew, 'senmayo' ,'subannotation_publish_1', vmax=0.25, show=False)
savefig('endonew_senmayo_dotplot')
sns.set_style('ticks')
sns.set_context('paper')
sc.pl.violin(endonew, 'senmayo', groupby='subannotation_publish_1', jitter=False
, stripplot=False, show=False)
savefig('endonew_senmayo')
rnk = endonewdict[key][['genelist','fc']].rename(columns={'genelist':0,'fc':1})
pre_res = gp.prerank(rnk=rnk,
gene_sets={'Senmayo':senmayo,
},
threads=4,
min_size=5,
max_size=1000,
permutation_num=1000, # reduce number to speed up testing
outdir=None, # don't write to disk
seed=6,
verbose=True, # see what's going on behind the scenes
)
terms = pre_res.res2d.Term
axs = pre_res.plot(terms=terms[0])
plt.show()
savefig('Senmayo')
def volcano_analysis(adata, variable, subject, control, quick=False):
volcano = scl.markers.volcano_plot(adata, variable, subject, control, quick=quick)
deg = pd.DataFrame(volcano.genelist, columns = ['genelist'])
deg['fc'] = volcano.fc
deg['pval'] = volcano.pval
deg['pval'] = [np.min(deg['pval'][deg['pval']!=0]) if x==0 else x for x in deg['pval']]
deg['-log10pval'] = -np.log10(deg['pval'])
return deg
huvec_sorted = huvec_ca[huvec_ca['pvalue']<0.05]
huvec_sorted = huvec_sorted[huvec_sorted['log2FoldChange']>1]
huvec_sorted.dropna()
sc.tl.score_genes(endothelial, gene_list=[x.lower().capitalize() for x in huvec_sorted.dropna()['SYMBOL']], score_name='huvec_ca')
scl.us(endothelial, 'huvec_ca')
class sample_volcano():
def __init__(self,adata,patient_id,anno_key,comp1,comp2,P=0.01,quick=True,
fc_cut_pval=0.5,n_pos_cell=10,n_patient_cell=10):
'''
param P :pseudocount for fc calculation
'''
from scipy.stats import ttest_ind, mannwhitneyu
self.genelist = adata.raw.var_names
from collections import Counter
# Removing Patients with cells less than n_patient_cell(10 by default)
rmlist = []
for f in Counter(adata.obs[patient_id]).items():
if f[1]<n_patient_cell:
rmlist.append(f[0])
else:
continue
adata = adata[~adata.obs[patient_id].isin(rmlist)]
adraw = adata.raw.to_adata()
cond1 = adraw[adraw.obs[anno_key]==comp1]
cond2 = adraw[adraw.obs[anno_key]==comp2]
cond1 = pd.DataFrame(data=cond1.X.toarray(),index=cond1.obs[patient_id],columns=cond1.var_names)
cond2 = pd.DataFrame(data=cond2.X.toarray(),index=cond2.obs[patient_id],columns=cond2.var_names)
exp1 = cond1.groupby(cond1.index).mean()
exp2 = cond2.groupby(cond2.index).mean()
self.pval = []
self.fc = []
for i in adraw.var_names:
# Calculating number of cell count with positive gene exp
n_pos1 = np.sum(cond1[i]>0)
n_pos2 = np.sum(cond2[i]>0)
n_max = np.sum([n_pos1,n_pos2])
# Calculating mean exp of each patient
norm_count1 = np.mean(exp1[i])+P
norm_count2 = np.mean(exp2[i])+P
self.fc.append(np.log2(norm_count1/norm_count2))
if quick:
if np.abs(self.fc[-1])< fc_cut_pval:
self.pval.append(1)
elif n_max < n_pos_cell:
self.pval.append(1)
else:
self.pval.append(mannwhitneyu(exp1[i],exp2[i])[1])
else:
self.pval.append(mannwhitneyu(exp1[i],exp2[i])[1])
self.pval = np.array(self.pval)
self.fc = np.array(self.fc)
def draw(self, title=None, pvalue_cut=100, to_show = 0.2, adjust_lim = 5, show=True, sig_mode = 'auto',adjust = True):
'''
draw volcano plot
param pvalue_cut :-log10Pvalue for cutoff
sig_mode: ['auto','complex','pval']
'''
from adjustText import adjust_text
plt.figure(figsize=(6,6))
xpos = np.array(self.fc)
ypos = -np.log10(np.array(self.pval))
ypos[ypos==np.inf] = np.max(ypos[ypos!=np.inf])
if sig_mode == 'complex':
index = (np.abs(xpos))*ypos
index_cut = np.percentile(index,100-to_show)
sig = (np.abs(xpos) > 1) & (ypos > 2) & ((np.abs(xpos))*ypos > index_cut)
elif sig_mode =='pval':
sig = (np.abs(xpos) > 1) & (ypos > pvalue_cut)
elif sig_mode =='auto':
index_cut = np.percentile(ypos,100-to_show)
sig = (np.abs(xpos) > 1) & (ypos > index_cut)
else:
print('error, check sig_mode')
raise SystemError
if title:
plt.title(title)
plt.xlabel('log2FoldChange',fontsize=12)
plt.ylabel('-log10Pval',fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.scatter(xpos,ypos,s=1, color='k', alpha =0.5, rasterized=True)
plt.scatter(xpos[sig],ypos[sig],s=3,color='red', rasterized=True)
texts = []
for i, gene in enumerate(self.genelist[sig]):
texts.append(plt.text(xpos[sig][i],ypos[sig][i],gene,fontsize=5))
if adjust:
adjust_text(texts,only_move={'texts':'xy'},lim=adjust_lim)
else:
pass
if show:
plt.show()
scl.us(endothelial, 'fibrotic_ec')
volcano_plot(fib_deg, int_genes='Il6,Cdkn1a,Cdkn2a,Stab2,Scarb1,Scarb2,Stab1,Cx3cl1,Cxcl12'.split(','), adjust=(0,0), fccut=100, pvalcut=10)
pre_res = gp.prerank(rnk=rnk,
gene_sets={'Senmayo':senmayo,
'HUVEC_ca':[x.lower().capitalize() for x in huvec_sorted.dropna()['SYMBOL']]},
threads=4,
min_size=5,
max_size=1000,
permutation_num=1000, # reduce number to speed up testing
outdir=None, # don't write to disk
seed=6,
verbose=True, # see what's going on behind the scenes
)
terms = pre_res.res2d.Term
axs = pre_res.plot(terms=terms[0])
axs = pre_res.plot(terms=terms[1])
savefig('Fib_ec_gsea_huvec_ca')
endonew
scl.us(endonew, 'subannotation_publish_1', frameon=False, size=500, show=False)
savefig('endonew_umap_annotation')
scl.us(endonew, 'Ackr1,Vwa1', frameon=False, size=700)
savefig('endonew_umap_ackr1_vwa1')
endo_ave_1, endo_frac_1 = expression_pattern(endothelial, 'subannotation_publish')
z_endo_ave_1 = endo_ave_1.copy()
for col in endo_ave_1.columns:
z_endo_ave_1[col] = scipy.stats.zscore(endo_ave_1[col])
# reduced
plt.rcParams['figure.figsize'] = (9,9)
x = 'Cdkn2a'
y = 'Cdkn1a'
data = endo_ave_1.copy()
data.index = [x.replace('_EC','') for x in data.index]
data.index = [x.replace('Old Control','OC') for x in data.index]
data.index = [x.replace('Old FiNi-seq','OF') for x in data.index]
data.index = [x.replace('Young FiNi-seq','YF') for x in data.index]
fig, ax = plt.subplots()
for index in data.index:
if 'OC' in index:
shape = 'o'
if 'OF' in index:
shape = '^'
if 'YF' in index:
shape = ','
if 'portal' in index:
color = 'mediumseagreen'
if 'LSEC' in index:
color = 'steelblue'
if 'central' in index:
color = 'darkkhaki'
if 'LyEC' in index:
color = 'sienna'
if 'dysfunctional' in index:
color = 'salmon'
ax.scatter(x=data[x][index], y=data[y][index], color=color,
s=1200, edgecolor='black', linewidth=3, alpha=0.5)
plt.text(x=data[x][index], y=data[y][index], s=index, fontsize=20, fontweight='bold')
[x.set_linewidth(2) for x in ax.spines.values()]
plt.xlabel(x+' Relative Expression', fontsize=20)
plt.ylabel(y+' Relative Expression', fontsize=20)
plt.xticks(fontsize=15)
plt.yticks(fontsize=15)
from adjustText import adjust_text
adjust_text(plt.gca().texts,endo_move={'texts':'xy'})
volcano_plot3(fib_deg, pvalcut=10, fccut=1.5,
fontsize=25, figsize=(10,10), title='DEG in Fibrotic_EC')
vsmc
scl.us(vsmc, 'subannotation_publish_2')
vsmc_sv = sample_volcano(vsmc_again,'sample', 'subannotation_publish_2', 'VSMC_Il-6-high','VSMC_Il-6-low')
vsmc_sv.draw()
df[df['genelist'].str.contains('Cxcl')]
df[df['genelist'].str.contains('Ccl')]
def volcano_plot3 (deg1, pathway_genes=None, int_genes=None, fccut=1, pvalcut=10
, adjust=(-0.1,1), fontsize = 15, figsize = (15,15), title=''):
import math
deg = deg1.copy()
# deg.loc[deg['fc']>=5.5, 'fc'] = 5.5
# deg.loc[deg['-log10pval']>=350, '-log10pval'] = 350
plt.gcf().set_size_inches(figsize[0],figsize[1])
# label variable
fcposcut = fccut
fcnegcut = -fccut
# plot variable
scattersize = 300
font_size = fontsize
# plot scatter
plt.scatter(x=deg['fc'], y=deg['-log10pval'], s=scattersize, alpha=0.1, c='lightgray')
deg_pos = deg[(deg['pval'] < 0.05) & (deg['fc'] > 1)]
plt.scatter(x=deg_pos['fc'], y=deg_pos['-log10pval'], s=scattersize, alpha=0.8, c='grey', linewidth=0.4)
deg_neg = deg[(deg['pval'] < 0.05) & (deg['fc'] < -1)]
plt.scatter(x=deg_neg['fc'], y=deg_neg['-log10pval'], s=scattersize, alpha=0.8, c='grey')
deg_pos_label = deg_pos[(deg_pos['-log10pval'] > pvalcut) & (deg_pos['fc'] > fcposcut)]
# for index in list(deg_pos_label.index):
# plt.text(x=deg_pos_label['fc'][index], y=deg_pos_label['-log10pval'][index],
# s=deg_pos_label['genelist'][index], fontsize=font_size, alpha=1.0)
# color pathway genes
if pathway_genes != None:
a = list(deg_pos_label.sort_values('fc', ascending=False)['genelist'])
a_set = set(a)
inter = a_set.intersection(pathway_genes)
b = deg_pos_label[deg_pos_label['genelist'].isin(list(inter))].index.values
for index in b:
plt.text(x=deg_pos_label['fc'][index], y=deg_pos_label['-log10pval'][index],
s=deg_pos_label['genelist'][index], fontsize=font_size, color = 'purple')
deg_neg_label = deg_neg[(deg_neg['-log10pval'] > pvalcut) & (deg_neg['fc'] < fcnegcut)]
# for index in list(deg_neg_label.index):
# plt.text(x=deg_neg_label['fc'][index], y=deg_neg_label['-log10pval'][index],
# s=deg_neg_label['genelist'][index], fontsize=font_size, alpha=1.0)
# color pathway genes
if pathway_genes != None:
c = list(deg_neg_label.sort_values('fc', ascending=False)['genelist'])
c_set = set(c)
inter1 = c_set.intersection(pathway_genes)
d = deg_neg_label[deg_neg_label['genelist'].isin(list(pathway_genes))].index.values
for index in d:
plt.text(x=deg_neg_label['fc'][index], y=deg_neg_label['-log10pval'][index],
s=deg_neg_label['genelist'][index], fontsize=font_size, color = 'yellow')
# gene_of_interest
if int_genes!=None:
degint = deg[deg['genelist'].isin(set(int_genes))]
degpos = degint[degint['fc']>0]
degneg = degint[degint['fc']<0]
for index in list(degpos.index):
plt.scatter(x=degpos['fc'], y=degpos['-log10pval'], s=(scattersize//2)*3, alpha=1, c='red')
for index in list(degneg.index):
plt.scatter(x=degneg['fc'], y=degneg['-log10pval'], s=(scattersize//2)*3, alpha=1, c='blue')
for index in list(degint.index):
plt.text(x=degint['fc'][index], y=degint['-log10pval'][index],
s=degint['genelist'][index], fontsize=font_size, alpha=1.0, color='black')
# set boarder
fclim = round(max(deg['fc']), 0)
if max(deg['fc'])-fclim > 0:
fclim = fclim+1
pvallim = max(deg[~deg['-log10pval'].isna()]['-log10pval'])
pvallim = math.ceil(pvallim)
from adjustText import adjust_text
adjust_text(plt.gca().texts, only_move={'texts':'xy'}, arrowprops=dict(arrowstyle="-", color='k', lw=1.5))
# alpha=0.7))
# draw
plt.xlim(0, 5)
plt.ylim(0, 4)
# plt.axvline(x=1, color='tab:orange', linestyle='--', linewidth=0.5)
# plt.axvline(x=-1, color='tab:orange', linestyle='--', linewidth=0.5)
# plt.axhline(y=-math.log10(0.05))
# plt.axhspan(0, 300, xmin=0.5, xmax=1, alpha=0.1, color='tab:red')
# plt.axhspan(0, 300, xmin=-2, xmax=0.3, alpha=0.1, color='tab:blue')
# plt.grid(visible=None)
plt.xticks(fontsize=30, rotation=0)
plt.yticks(fontsize=30, rotation=0)
plt.xlabel('-log'+r'$_{2}$'+'(Fold Change)', fontsize=20, fontweight='bold')
plt.ylabel('-log'+r'$_{10}$'+'(Adjusted P-value)', fontsize=20, fontweight='bold')
if pathway_genes != None:
if len(inter)!=0:
print('pos'+str(inter))
if len(inter1)!=0:
print('neg'+str(inter1))
sns.set_style('ticks')
sns.set_context('paper')
plt.title(title, fontsize=30, fontweight='bold')
def volcano_plot2 (deg, pathway_genes=None, int_genes=None, fccut=1, pvalcut=10, adjust=(-0.1,1), fontsize = 15, figsize = (15,15), sum_cut = None):
import math
# sum_cut = (x, slope)
plt.gcf().set_size_inches(figsize[0],figsize[1])
# label variable
fcposcut = fccut
fcnegcut = -fccut
# plot variable
scattersize = 100
font_size = fontsize
xadjust = adjust[0]
yadjust = adjust[1]
sns.set_style('ticks')
sns.set_context('paper')
# plot scatter
plt.scatter(x=deg['fc'], y=deg['-log10pval'], s=scattersize, alpha=0.1, c='lightgray')
deg_pos = deg[(deg['pval'] < 0.05) & (deg['fc'] > 1)]
plt.scatter(x=deg_pos['fc'], y=deg_pos['-log10pval'], s=scattersize, alpha=1, c='darkgray')
deg_neg = deg[(deg['pval'] < 0.05) & (deg['fc'] < -1)]
plt.scatter(x=deg_neg['fc'], y=deg_neg['-log10pval'], s=scattersize, alpha=1, c='darkgray')
deg_pos_label = deg_pos[(deg_pos['-log10pval'] > pvalcut) & (deg_pos['fc'] > fcposcut)]
# for index in list(deg_pos_label.index):
# plt.text(x=deg_pos_label['fc'][index]+xadjust, y=deg_pos_label['-log10pval'][index]+yadjust,
# s=deg_pos_label['genelist'][index], fontsize=font_size, alpha=1.0)
# color pathway genes
if pathway_genes != None:
a = list(deg_pos_label.sort_values('fc', ascending=False)['genelist'])
a_set = set(a)
inter = a_set.intersection(pathway_genes)
b = deg_pos_label[deg_pos_label['genelist'].isin(list(inter))].index.values
for index in b:
plt.text(x=deg_pos_label['fc'][index]+xadjust, y=deg_pos_label['-log10pval'][index]+yadjust,
s=deg_pos_label['genelist'][index], fontsize=font_size, color = 'purple')
deg_neg_label = deg_neg[(deg_neg['-log10pval'] > pvalcut) & (deg_neg['fc'] < fcnegcut)]
# for index in list(deg_neg_label.index):
# plt.text(x=deg_neg_label['fc'][index]+xadjust, y=deg_neg_label['-log10pval'][index]+yadjust,
# s=deg_neg_label['genelist'][index], fontsize=font_size, alpha=1.0)
# color pathway genes
if pathway_genes != None:
c = list(deg_neg_label.sort_values('fc', ascending=False)['genelist'])
c_set = set(c)
inter1 = c_set.intersection(pathway_genes)
d = deg_neg_label[deg_neg_label['genelist'].isin(list(pathway_genes))].index.values
for index in d:
plt.text(x=deg_neg_label['fc'][index]+xadjust, y=deg_neg_label['-log10pval'][index]+yadjust,
s=deg_neg_label['genelist'][index], fontsize=font_size, color = 'yellow')
# gene_of_interest
if int_genes!=None:
degint = deg[deg['genelist'].isin(set(int_genes))]
for index in list(degint.index):
plt.scatter(x=degint['fc'], y=degint['-log10pval'], s=(scattersize//3+1)*3, alpha=1, c='lightcoral')
plt.text(x=degint['fc'][index]+xadjust, y=degint['-log10pval'][index]+yadjust,
s=degint['genelist'][index], fontsize=font_size, alpha=1.0, color='lightcoral')
# from adjustText import adjust_text
# adjust_text(plt.gca().texts, only_move={'texts':'xy'}, arrowprops=dict(arrowstyle="-", color='k', lw=1.5))
# alpha=0.7))
# set boarder
fclim = round(max(deg['fc']), 0)
if max(deg['fc'])-fclim > 0:
fclim = fclim+1
pvallim = max(deg[~deg['-log10pval'].isna()]['-log10pval'])
pvallim = ((pvallim //10)+1) * 10
# draw
plt.xlim(-fclim, fclim)
plt.ylim(0, pvallim)
# plt.axvline(x=1, color='tab:orange', linestyle='--', linewidth=0.5)
# plt.axvline(x=-1, color='tab:orange', linestyle='--', linewidth=0.5)
# plt.axhline(y=-math.log10(0.05))
# plt.axhspan(0, 300, xmin=0.5, xmax=1, alpha=0.1, color='tab:red')
# plt.axhspan(0, 300, xmin=-2, xmax=0.3, alpha=0.1, color='tab:blue')
# plt.grid(visible=None)
plt.xticks(fontsize=10, rotation=0)
plt.yticks(fontsize=10, rotation=0)
plt.xlabel('log2 (fold change)', fontsize=10)
plt.ylabel('-log10 (p value)', fontsize=10)
if pathway_genes != None:
if len(inter)!=0:
print('pos'+str(inter))
if len(inter1)!=0:
print('neg'+str(inter1))
vsmc_df[vsmc_df['genelist']=='Cdkn2a']
volcano_plot2(vsmc_df, pvalcut=2, sum_cut=(4,-4), figsize=(9,7))
savefig('vsmc_volcano_new')
sc.tl.score_genes(vsmc, gene_list=senmayo, score_name='senmayo')
vsmc_again = again(vsmc)
scl.us(vsmc_again, 'subannotation_publish_2', frameon=False, show=False, size=700)
savefig('vsmc_umap_new')
sc.pl.umap(vsmc_again, color='senmayo', cmap='RdBu_r', vmax=0.45, show=False, size=700)
savefig('vsmc_senmayo_umap')
pre_res_vsmc = gp.prerank(rnk=rnk,
gene_sets={'Senmayo':senmayo,
'HUVEC_ca':[x.lower().capitalize() for x in huvec_sorted.dropna()['SYMBOL']]},
threads=4,
min_size=5,
max_size=1000,
permutation_num=1000, # reduce number to speed up testing
outdir=None, # don't write to disk
seed=6,
verbose=True, # see what's going on behind the scenes
)
terms = pre_res_vsmc.res2d.Term
axs = pre_res_vsmc.plot(terms=terms[0])
savefig('Senmayo_vsmc')
sc.pl.violin(vsmc_again, 'Plaur,Il6,Cxcl2'.split(','), groupby='subannotation_publish_2', show=False)
savefig('vsmc_senmayo_leadgene_violin')
proportion_bargraph(vsmc_again, 'subannotation_publish_2' ,'age_status')
savefig('vsmc_prop_bar_new')
fb.obs['temp'] = 'no'
fb.obs.loc[fb.obs['annolv2'].str.contains('Smoc1'), 'temp'] = 'yes'
smoc1_sv = sample_volcano(fb, 'sample','temp','yes','no')
smoc1_deg = pd.DataFrame()
smoc1_deg['pval'] = smoc1_sv.pval
smoc1_deg['fc'] = smoc1_sv.fc
smoc1_deg['genelist'] = smoc1_sv.genelist
smoc1_deg[
'-log10pval'] = -np.log10(smoc1_deg['pval'])
smoc1_deg[smoc1_deg['genelist']=='Thbs2']
fb.obs['temp'] = 'no'
fb.obs.loc[fb.obs['annolv2'].str.contains('Wif1'), 'temp'] = 'yes'
volcano_plot3(smoc1_deg)
savefig('smoc1_vol_new')
temp = smoc1_deg.drop(['-log10pval'], axis=1)
temp.to_csv('/home/kytak/kwonyongtak/02_aging/write/supple/smoc1_deg.csv')
volcano_plot(smoc1_deg, adjust=(0,0),
int_genes=['Sfrp1','Thbs2','Cntfr','Nxph1','Lbp','Smoc1','Mdk','Plxna4','Igfbp3','Fmod','Tnnt2','Smoc1','Adamtsl1','Vegfd'])
smoc1_deg
volcano_plot3(smoc1_deg)
human_ptn = pd.read_csv('/home/kytak/kwonyongtak/Human_protein.csv')
human_ptn.columns = [x.lower().capitalize() for x in human_ptn.columns]
sns.set_style('ticks')
sns.set_context('paper')
df = smoc1_deg[smoc1_deg['fc']>1]
df = df[df['pval']<0.05]
for gene in df.sort_values(by='pval').head(200)['genelist']:
if gene in human_ptn.columns:
sns.regplot(data=human_ptn, x='Age', y=gene, color='mediumpurple', scatter_kws={'s':50})
print(gene)
print(scipy.stats.pearsonr(x=human_ptn['Age'], y=human_ptn[gene]))
print(-np.log10(scipy.stats.pearsonr(x=human_ptn['Age'], y=human_ptn[gene])[1]))
# figfolder = '/home/kytak/kwonyongtak/picture/raw/'
# plt.savefig(figfolder+'Ageregplot_Smoc1_pval_'+gene+'.pdf', format='pdf', dpi=600, bbox_inches='tight', transparent=True)
plt.show()
temp = wif1_deg[wif1_deg['pval']<0.05]
temp = temp[temp['fc']>1]
sns.set_style('ticks')
sns.set_context('paper')
df = wif1_deg[wif1_deg['fc']>1]
df = df[df['pval']<0.05]
for gene in df.sort_values(by='-log10pval',ascending=False).head(200)['genelist']:
if gene in human_ptn.columns:
sns.regplot(data=human_ptn, x='Age', y=gene, color='mediumpurple', scatter_kws={'s':50})
print(gene)
print(scipy.stats.pearsonr(x=human_ptn['Age'], y=human_ptn[gene]))
print(-np.log10(scipy.stats.pearsonr(x=human_ptn['Age'], y=human_ptn[gene])[1]))
# figfolder = '/home/kytak/kwonyongtak/picture/raw/'
# plt.savefig(figfolder+'Ageregplot_wif1_pval_'+gene+'.pdf', format='pdf', dpi=600, bbox_inches='tight', transparent=True)
plt.show()
proportion_statistics_bar(mes, annotation='subannotation_publish_4',classification='sample', hue='status')
plt.savefig('/home/kytak/kwonyongtak/picture/raw/mes_prop_stat_bar.pdf', format='pdf', dpi=600, bbox_inches='tight', transparent=True)
proportion_bargraph(mye, 'anno_transfer','age_status')
# pvalue 어떻게 할지 고민 필요. age로 할지 status로 할지
def proportion_statistics_bar(adata, annotation, *,order = False , title='',
classification, hue='status', hue_order=['Young Control', 'Young FiNi-seq','Old Control','Old FiNi-seq']
, palette=['peachpuff','orangered', 'lightskyblue','navy']):
sns.set_style('ticks')
sns.set_context('paper')
sns.despine(top=True, right=True)
standard = classification
variable = annotation
if order == False:
order = adata.obs[variable].cat.categories
ctint = order
title = ''
df = pd.crosstab(adata.obs[standard], adata.obs[variable], normalize=0)
df.columns = df.columns.astype(str)
df.reset_index(inplace=True)
df = pd.melt(df, id_vars=standard, value_name='proportion')
df['DQ'] = 'Control'
df['status'] = 'Old Control'
for x in df.index:
if 'DQ' in df.loc[x,standard]:
df.loc[x,'DQ'] = df.loc[x,standard].split('_')[1]
if 'FiNi' in df.loc[x,standard]:
df.loc[x, 'status'] = 'Old FiNi-seq'
if 'Young' in df.loc[x,standard]:
if 'FiNi' in df.loc[x,standard]:
df.loc[x, 'status'] = 'Young FiNi-seq'
if 'FiNi' not in df.loc[x,standard]:
df.loc[x, 'status'] = 'Young Control'
df['age'] = 'Old'
for x in df.index:
if 'Young' in df.loc[x,standard]:
df.loc[x,'age'] = 'Young'
orderdict = dict(zip(order, range(0,len(order))))
df['xtick'] = df[variable].replace(orderdict)
#
box_pairs = []
for x in ctint:
temp1 = ()
temp2 = []
for y in ['Young Control','Old Control','Young FiNi-seq','Old FiNi-seq']:
a = tuple([x]) + tuple([y])
temp2.append(a)
box_pairs.append(tuple(temp2))
fig, ax = plt.subplots(figsize=(2*len(order),10))
ax1 = sns.barplot(data=df, x='xtick', y='proportion', hue=hue, hue_order=hue_order, palette=palette,
ax=ax, capsize=0.2, errorbar='sd', errwidth=1, errcolor='dimgrey', saturation=0.7, zorder=1)
# Plot the stripplot
sns.stripplot(data=df, x='xtick', y='proportion', hue=hue, hue_order=hue_order,
palette=['black']*len(hue_order), dodge=True, ax=ax, jitter=True, size=7, zorder=2)
# star = pd.DataFrame(index = order, columns=['pvalue'])
# for ct in order:
# temp =df[df[variable]==ct]
# star.loc[ct, 'pvalue'] = scipy.stats.kruskal(list(temp[temp['status']=='Old Control']['proportion']),list(temp[temp['status']=='Old FiNi-seq']['proportion']), list(temp[temp['status']=='Young FiNi-seq']['proportion'])).pvalue
# ctint = []
# for ct in order:
# if star.loc[ct, 'pvalue'] < 0.1:
# ctint.append(ct)
# linelength = 0.3
# for ct in ctint:
# x = orderdict[ct]
# y = df[df[variable]==ct]['proportion'].max()
# plt.plot([x-linelength/2, x+linelength/2], [y+0.03, y+0.03], color='black')
# if star.loc[ct, 'pvalue'] < 0.0001:
# plt.text(x-0.05, y+0.04, s='****', fontweight='bold')
# elif star.loc[ct, 'pvalue'] < 0.001:
# plt.text(x-0.05, y+0.04, s='***', fontweight='bold')
# elif star.loc[ct, 'pvalue'] < 0.01:
# plt.text(x-0.055, y+0.04, s='**', fontweight='bold')
# elif star.loc[ct, 'pvalue'] < 0.1:
# plt.text(x-0.035, y+0.04, s='*', fontweight='bold')
for patch in ax.patches:
clr = patch.get_facecolor()
patch.set_edgecolor('black')
patch.set_linewidth(3)
labels = order.copy()
ax.set_xticklabels(labels=labels, fontweight='bold', rotation=90)
handles, labels = ax.get_legend_handles_labels()
handles = handles[0:4]
labels = labels[0:4]
legend1 = ax.legend(handles=handles, loc='upper right', fontsize=20)
[x.set_linewidth(3) for x in ax.spines.values()]
sns.despine(top=True, right=True)
plt.xlabel('')
plt.ylabel('Proportion', fontsize=30, fontweight='bold')
plt.xticks(fontsize=25)
plt.yticks(fontsize=20)
df
mye
proportion_statistics_bar(mye, annotation='annolv2',classification='sample', hue='status')
plt.savefig('/home/kytak/kwonyongtak/picture/raw/mye_bar_prop.pdf', format='pdf', dpi=600, bbox_inches='tight', transparent=True)
proportion_statistics_bar(mes, annotation='annolv2',classification='sample', hue='status')
plt.savefig('/home/kytak/kwonyongtak/picture/raw/mes_bar_prop.pdf', format='pdf', dpi=600, bbox_inches='tight', transparent=True)
proportion_statistics_bar(endo, annotation='subannotation_publish',classification='sample_refined', hue='status')
plt.savefig('/home/kytak/kwonyongtak/picture/raw/endo_bar_prop.pdf', format='pdf', dpi=600, bbox_inches='tight', transparent=True)
figfolder = '/home/kytak/kwonyongtak/picture/raw/'
endo.obs['subannotation_publish'] = endo.obs['subannotation_publish'].cat.reorder_categories(['dysfunctional EC','portal EC','LSEC','central vein EC','LyEC'])
endo.uns['subannotation_publish_colors']= [ 'salmon','mediumseagreen', 'steelblue', 'darkkhaki', 'sienna']
prop
def proportion_bargraph(adata, annotation, classification, xticklabels = False, ylabel ='Proportion (%)', title=''):
# proportion df
prop = pd.crosstab(adata.obs[classification], adata.obs[annotation], normalize=0).T
prop.columns = prop.columns.astype(str)
prop = pd.concat([prop, pd.DataFrame(adata.uns[annotation+'_colors'], columns=[annotation], index=prop.index)], axis=1)
prop_colors = list(reversed(list(prop.pop(annotation))))
prop = prop*100
# draw
sns.set_style('ticks')
sns.set_context('paper')
sns.despine(top=True, right=True)
prop = prop.T[list(reversed(list(adata.obs[annotation].cat.categories)))].T
ax = prop.T.plot.bar(stacked=True, legend=False, edgecolor='black', width=0.7, color=prop_colors)
[x.set_linewidth(2) for x in ax.spines.values()]
handles, labels = ax.get_legend_handles_labels()
ax.legend(reversed(handles), reversed(labels), bbox_to_anchor=(1, 1.02), fontsize=15)
plt.grid(visible=False)
plt.xlabel('')
if xticklabels:
ax.set_xticklabels(labels=xticklabels,fontsize=13, fontweight='bold', rotation=0)
plt.yticks(fontsize=15, rotation=0)
plt.ylim(0,102)
plt.ylabel(ylabel, fontsize=22, fontweight='bold')
plt.gcf().set_size_inches((4*(len(prop.columns)/3),8))
plt.title(title)
def again(adata):
bdata = scl.get_raw(adata)
bdata = bdata.copy()
sc.pp.highly_variable_genes(bdata, min_mean=0.0125, max_mean=4, min_disp=0.5)
sc.pl.highly_variable_genes(bdata)
scl.sc_process(bdata, 'spku')
return bdata
def leiden_auto(adata):
for x in np.arange(0.2,2.2,0.2):
a = format(x, ".1f")
sc.tl.leiden(adata, resolution=float(a), key_added='leiden_'+str(a))
def harmony_umap(adata, batch, max_harmony=10, max_kmeans=20):
sc.external.pp.harmony_integrate(adata, batch, max_iter_harmony=max_harmony, max_iter_kmeans=max_kmeans)
sc.pp.neighbors(adata, use_rep='X_pca_harmony')
scl.sc_process(adata, 'u')
def volcano_analysis(adata, variable, subject, control, quick=False):
volcano = scl.markers.volcano_plot(adata, variable, subject, control, quick=quick)
deg = pd.DataFrame(volcano.genelist, columns = ['genelist'])
deg['fc'] = volcano.fc
deg['pval'] = volcano.pval
deg['pval'] = [np.min(deg['pval'][deg['pval']!=0]) if x==0 else x for x in deg['pval']]
deg['-log10pval'] = -np.log10(deg['pval'])
return deg
def undo_normalize_log(adata):
X = adata.raw.to_adata().to_df().copy()
X = np.exp(X) - 1
X1 = np.repeat(adata.obs['n_counts'].values, X.shape[1])
X2 = np.array(X1).reshape(-1, X.shape[1])
X = np.array(X).astype(float)
X2 = X2.astype(float)
sumcount = np.rint(np.sum(X, axis=1))[0]
X3 = np.multiply(X,X2) / sumcount
X3 = np.rint(X3)
X3df = pd.DataFrame(X3, index = adata.raw.to_adata().to_df().index, columns=adata.raw.to_adata().to_df().columns)
return X3df
def volcano_plot (deg, pathway_genes=None, int_genes=None, fccut=1, pvalcut=10, adjust=(-0.1,1), fontsize = 15, figsize = (15,15), sum_cut = None):
import math
# sum_cut = (x, slope)
plt.gcf().set_size_inches(figsize[0],figsize[1])
# label variable
fcposcut = fccut
fcnegcut = -fccut
# plot variable
scattersize = 10
font_size = fontsize
xadjust = adjust[0]
yadjust = adjust[1]
# plot scatter
plt.scatter(x=deg['fc'], y=deg['-log10pval'], s=scattersize, alpha=0.1, c='tab:gray')
deg_pos = deg[(deg['pval'] < 0.05) & (deg['fc'] > 1)]
plt.scatter(x=deg_pos['fc'], y=deg_pos['-log10pval'], s=scattersize, alpha=1, c='tab:red')
deg_neg = deg[(deg['pval'] < 0.05) & (deg['fc'] < -1)]
plt.scatter(x=deg_neg['fc'], y=deg_neg['-log10pval'], s=scattersize, alpha=1, c='tab:blue')
deg_pos_label = deg_pos[(deg_pos['-log10pval'] > pvalcut) & (deg_pos['fc'] > fcposcut)]
for index in list(deg_pos_label.index):
plt.text(x=deg_pos_label['fc'][index]+xadjust, y=deg_pos_label['-log10pval'][index]+yadjust,
s=deg_pos_label['genelist'][index], fontsize=font_size, alpha=1.0)
# color pathway genes
if pathway_genes != None:
a = list(deg_pos_label.sort_values('fc', ascending=False)['genelist'])
a_set = set(a)
inter = a_set.intersection(pathway_genes)
b = deg_pos_label[deg_pos_label['genelist'].isin(list(inter))].index.values
for index in b:
plt.text(x=deg_pos_label['fc'][index]+xadjust, y=deg_pos_label['-log10pval'][index]+yadjust,
s=deg_pos_label['genelist'][index], fontsize=font_size, color = 'purple')
deg_neg_label = deg_neg[(deg_neg['-log10pval'] > pvalcut) & (deg_neg['fc'] < fcnegcut)]
for index in list(deg_neg_label.index):
plt.text(x=deg_neg_label['fc'][index]+xadjust, y=deg_neg_label['-log10pval'][index]+yadjust,
s=deg_neg_label['genelist'][index], fontsize=font_size, alpha=1.0)
# color pathway genes
if pathway_genes != None:
c = list(deg_neg_label.sort_values('fc', ascending=False)['genelist'])
c_set = set(c)
inter1 = c_set.intersection(pathway_genes)
d = deg_neg_label[deg_neg_label['genelist'].isin(list(pathway_genes))].index.values
for index in d:
plt.text(x=deg_neg_label['fc'][index]+xadjust, y=deg_neg_label['-log10pval'][index]+yadjust,
s=deg_neg_label['genelist'][index], fontsize=font_size, color = 'yellow')
# gene_of_interest
if int_genes!=None:
degint = deg[deg['genelist'].isin(set(int_genes))]
for index in list(degint.index):
plt.scatter(x=degint['fc'], y=degint['-log10pval'], s=(scattersize//3+1)*3, alpha=1, c='lightcoral')
plt.text(x=degint['fc'][index]+xadjust, y=degint['-log10pval'][index]+yadjust,
s=degint['genelist'][index], fontsize=font_size, alpha=1.0, color='lightcoral')
from adjustText import adjust_text
adjust_text(plt.gca().texts, only_move={'texts':'xy'}, arrowprops=dict(arrowstyle="-", color='k', lw=1.5))
# alpha=0.7))
# set boarder
fclim = round(max(deg['fc']), 0)
if max(deg['fc'])-fclim > 0:
fclim = fclim+1
pvallim = max(deg[~deg['-log10pval'].isna()]['-log10pval'])
pvallim = ((pvallim //5)+1) * 5
# draw
plt.xlim(-fclim, fclim)
plt.ylim(0, pvallim)
plt.axvline(x=1, color='tab:orange', linestyle='--', linewidth=0.5)
plt.axvline(x=-1, color='tab:orange', linestyle='--', linewidth=0.5)
plt.axhline(y=-math.log10(0.05))
# plt.axhspan(0, 300, xmin=0.5, xmax=1, alpha=0.1, color='tab:red')
# plt.axhspan(0, 300, xmin=-2, xmax=0.3, alpha=0.1, color='tab:blue')
plt.grid(visible=None)
plt.xticks(fontsize=10, rotation=0)
plt.yticks(fontsize=10, rotation=0)
plt.xlabel('log2 (fold change)', fontsize=10)
plt.ylabel('-log10 (p value)', fontsize=10)
if pathway_genes != None:
if len(inter)!=0:
print('pos'+str(inter))
if len(inter1)!=0:
print('neg'+str(inter1))
def expression_pattern(adata, groups):
gene_ids = adata.raw.var.index.values
clusters = adata.obs[groups].cat.categories
obs = adata.raw[:,gene_ids].X.toarray()
obs = pd.DataFrame(obs,columns=gene_ids,index=adata.obs[groups])
average_obs = obs.groupby(level=0).mean()
obs_bool = obs.astype(bool)
fraction_obs = obs_bool.groupby(level=0).sum()/obs_bool.groupby(level=0).count()
return average_obs, fraction_obs