In [1]:
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

import cell2location
import scvi
/opt/anaconda3/envs/cell2loc/lib/python3.9/site-packages/scvi/_settings.py:63: UserWarning: Since v1.0.0, scvi-tools no longer uses a random seed by default. Run `scvi.settings.seed = 0` to reproduce results from previous versions.
  self.seed = seed
/opt/anaconda3/envs/cell2loc/lib/python3.9/site-packages/scvi/_settings.py:70: UserWarning: Setting `dl_pin_memory_gpu_training` is deprecated in v1.0 and will be removed in v1.1. Please pass in `pin_memory` to the data loaders instead.
  self.dl_pin_memory_gpu_training = (
/home/kytak/.local/lib/python3.9/site-packages/umap/distances.py:1063: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/kytak/.local/lib/python3.9/site-packages/umap/distances.py:1071: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/kytak/.local/lib/python3.9/site-packages/umap/distances.py:1086: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
/home/kytak/.local/lib/python3.9/site-packages/umap/umap_.py:660: NumbaDeprecationWarning: The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
  @numba.jit()
In [2]:
%matplotlib inline

sc.settings.verbosity = 3 
sc.logging.print_version_and_date()
sc.settings.set_figure_params(dpi=100)
Running Scanpy 1.9.6, on 2024-04-02 03:10.
In [3]:
def again(adata):
    bdata = scl.get_raw(adata)
    bdata = bdata.copy()

    sc.pp.highly_variable_genes(bdata, min_mean=0.0125, max_mean=4, min_disp=0.5)
    sc.pl.highly_variable_genes(bdata)

    scl.sc_process(bdata, 'spku')
    
    return bdata


def leiden_auto(adata):
    for x in np.arange(0.2,2.2,0.2):
        a = format(x, ".1f")
        sc.tl.leiden(adata, resolution=float(a), key_added='leiden_'+str(a))



def harmony_umap(adata, batch, max_harmony=10, max_kmeans=20):
    sc.external.pp.harmony_integrate(adata, batch,  max_iter_harmony=max_harmony, max_iter_kmeans=max_kmeans)

    sc.pp.neighbors(adata, use_rep='X_pca_harmony')

    scl.sc_process(adata, 'u')



def leiden_restrict(adata, annotation, sub, resolution, name):
# sub = list
    sc.tl.leiden(adata, restrict_to=(annotation, sub), resolution=resolution, 
                 key_added=name)
    
    adata.obs[name] = [x.replace(',','_') for x in adata.obs[name]]  
    
    scl.us(adata, name)

GSE192736 data

In [4]:
results_folder = '/home/kytak/kwonyongtak/02_aging/write/stereopy/cell2location'

# create paths and names to results folders for reference regression and cell2location models
results_folder = '/home/kytak/kwonyongtak/02_aging/write/stereopy/cell2location'

# create paths and names to results folders for reference regression and cell2location models
ref_run_name = f'{results_folder}/reference_signatures'
run_name = f'{results_folder}/cell2location_map'
In [5]:
test_key = 'Old_liver'
In [12]:
old_liver = sc.read('/home/kytak/kwonyongtak/02_aging/write/stereopy/Old_liver.h5ad')
In [18]:
pd.DataFrame(old_liver.obsm['spatial'], index=old_liver.obs_names).to_csv('/home/kytak/kwonyongtak/02_aging/write/stereopy/old_liver_coord.csv')
In [11]:
old_liver.obs[old_liver.uns['mod']['factor_names']] = old_liver.obsm['q05_cell_abundance_w_sf']
In [12]:
old_liver
Out[12]:
AnnData object with n_obs × n_vars = 89983 × 28025
    obs: 'age', '_indices', '_scvi_batch', '_scvi_labels', 'B_Cd80+', 'B_Plasma', 'B_Sell+', 'Cd4_Ifnr/Fasl+(H1)', 'Cd4_Lef1/Sell,Actn1+', 'Cd4_NKT', 'Cd4_Tex', 'Cd4_Tox/Slamf6,Cxcr5+(FH)', 'Cd8_Lef1/Sell,Actn1+', 'Cd8_Tcf7/Ccr7+,Tnfrsf9/4-(naive)', 'Cd8_Tcf7/GZMK+(EM)', 'Cd8_Tcf7/GZMK+,Pdcd1-(Early_activated)', 'Cd8_Tex', 'Cholangiocyte', 'Endothelial_CV', 'Endothelial_Fibrotic', 'Endothelial_Lymphatic', 'Endothelial_Midzone', 'Endothelial_PV', 'Fibroblast_Col8a1+', 'Fibroblast_Mmp3+', 'Fibroblast_Pi16+', 'Fibroblast_Smoc1+', 'Fibroblast_Wif1+', 'Foam Cells', 'HSC', 'Hepatocyte', 'KC', 'MP', 'Mast', 'Monocyte', 'NK+_H2', 'NK+_Lilr+', 'NK_Klra+', 'Neutrophil', 'Patrolling Monocyte', 'T_Double_Negative', 'Transitioning', 'Treg', 'VSMC_Il-6-high', 'VSMC_Il-6-low', 'cDC1', 'cDC2', 'mDC', 'pDC', 'γδT', 'Gross_Endothelial', 'Gross_Fibroblast', 'Gross_Cd4T', 'Gross_Cd8T', 'Gross_Monocyte', 'Gross_Macrophage', 'Gross_VSMC', 'Gross_NK', 'Gross_DC', 'Gross_B', 'max_all', 'score_Cd8_Trm_Pdcd1high', 'score_Cd8_Trm_Pdcd1low', 'score_FB_Col8a1', 'score_FB_Portal', 'score_FB_Smoc1', 'score_FB_Srgn', 'score_FB_Wif1', 'score_HSC', 'score_Hepatocyte', 'score_Cholangiocyte', 'score_KC', 'score_LSEC', 'score_LyEC', 'score_MdM', 'score_Monocyte', 'score_NK', 'score_Neutrophil', 'score_Patrolling Monocyte', 'score_VSMC_Il-6-high', 'score_VSMC_Il-6-low', 'score_cDC1', 'score_cDC2', 'score_central vein EC', 'score_dysfunctional EC', 'score_pDC', 'score_portal EC', 'score_max', 'Anno_portal EC', 'Anno_central vein EC', 'Anno_LSEC', 'Anno_MdM', 'Anno_HSC', 'Anno_FB_Portal', 'Anno_FB_Smoc1', 'Anno_FB_Wif1', 'Anno_dysfunctional EC', 'temp'
    uns: 'Anno_FB_Portal_colors', 'Anno_FB_Smoc1_colors', 'Anno_FB_Wif1_colors', 'Anno_HSC_colors', 'Anno_LSEC_colors', 'Anno_MdM_colors', 'Anno_central vein EC_colors', 'Anno_dysfunctional EC_colors', 'Anno_portal EC_colors', '_scvi_manager_uuid', '_scvi_uuid', 'max_all_colors', 'mod', 'score_max_colors', 'temp_colors'
    obsm: 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'smoothed', 'spatial', 'stds_cell_abundance_w_sf'
In [223]:
old_liver.
Out[223]:
AnnData object with n_obs × n_vars = 89983 × 28025
    obs: 'age', '_indices', '_scvi_batch', '_scvi_labels', 'B_Cd80+', 'B_Plasma', 'B_Sell+', 'Cd4_Ifnr/Fasl+(H1)', 'Cd4_Lef1/Sell,Actn1+', 'Cd4_NKT', 'Cd4_Tex', 'Cd4_Tox/Slamf6,Cxcr5+(FH)', 'Cd8_Lef1/Sell,Actn1+', 'Cd8_Tcf7/Ccr7+,Tnfrsf9/4-(naive)', 'Cd8_Tcf7/GZMK+(EM)', 'Cd8_Tcf7/GZMK+,Pdcd1-(Early_activated)', 'Cd8_Tex', 'Cholangiocyte', 'Endothelial_CV', 'Endothelial_Fibrotic', 'Endothelial_Lymphatic', 'Endothelial_Midzone', 'Endothelial_PV', 'Fibroblast_Col8a1+', 'Fibroblast_Mmp3+', 'Fibroblast_Pi16+', 'Fibroblast_Smoc1+', 'Fibroblast_Wif1+', 'Foam Cells', 'HSC', 'Hepatocyte', 'KC', 'MP', 'Mast', 'Monocyte', 'NK+_H2', 'NK+_Lilr+', 'NK_Klra+', 'Neutrophil', 'Patrolling Monocyte', 'T_Double_Negative', 'Transitioning', 'Treg', 'VSMC_Il-6-high', 'VSMC_Il-6-low', 'cDC1', 'cDC2', 'mDC', 'pDC', 'γδT', 'Gross_Endothelial', 'Gross_Fibroblast', 'Gross_Cd4T', 'Gross_Cd8T', 'Gross_Monocyte', 'Gross_Macrophage', 'Gross_VSMC', 'Gross_NK', 'Gross_DC', 'Gross_B', 'max_all', 'score_Cd8_Trm_Pdcd1high', 'score_Cd8_Trm_Pdcd1low', 'score_FB_Col8a1', 'score_FB_Portal', 'score_FB_Smoc1', 'score_FB_Srgn', 'score_FB_Wif1', 'score_HSC', 'score_Hepatocyte', 'score_Cholangiocyte', 'score_KC', 'score_LSEC', 'score_LyEC', 'score_MdM', 'score_Monocyte', 'score_NK', 'score_Neutrophil', 'score_Patrolling Monocyte', 'score_VSMC_Il-6-high', 'score_VSMC_Il-6-low', 'score_cDC1', 'score_cDC2', 'score_central vein EC', 'score_dysfunctional EC', 'score_pDC', 'score_portal EC', 'score_max', 'Anno_portal EC', 'Anno_central vein EC', 'Anno_LSEC', 'Anno_MdM', 'Anno_HSC', 'Anno_FB_Portal', 'Anno_FB_Smoc1', 'Anno_FB_Wif1', 'Anno_dysfunctional EC', 'temp', 'Anno_EC_FB'
    uns: 'Anno_FB_Portal_colors', 'Anno_FB_Smoc1_colors', 'Anno_FB_Wif1_colors', 'Anno_HSC_colors', 'Anno_LSEC_colors', 'Anno_MdM_colors', 'Anno_central vein EC_colors', 'Anno_dysfunctional EC_colors', 'Anno_portal EC_colors', '_scvi_manager_uuid', '_scvi_uuid', 'max_all_colors', 'mod', 'score_max_colors', 'temp_colors'
    obsm: 'means_cell_abundance_w_sf', 'q05_cell_abundance_w_sf', 'q95_cell_abundance_w_sf', 'smoothed', 'spatial', 'stds_cell_abundance_w_sf'
In [229]:
old_liver.obs[['Endothelial_Fibrotic']]
Out[229]:
Endothelial_Fibrotic
0 0.000062
1 0.000110
2 0.000352
3 0.000758
4 0.000534
... ...
89978 0.001608
89979 0.001078
89980 0.001194
89981 0.001498
89982 0.000785

89983 rows × 1 columns

In [838]:
sc.pl.spatial(old_liver, color=['score_Cd8_Trm_Pdcd1high', 'score_Cd8_Trm_Pdcd1low'],
              spot_size=200)
In [21]:
sc.pl.spatial(old_liver, cmap='viridis',
                  # show first 8 cell types
                  color=['Endothelial_CV', 'Endothelial_Fibrotic', 'Endothelial_Lymphatic', 'Endothelial_Midzone', 'Endothelial_PV'],
                  ncols=4, size=1.3,
                  img_key='hires',
                  # limit color scale at 99.2% quantile of cell abundance
                  vmin=0, vmax=0.2, spot_size=100
                 )
In [34]:
import scipy
In [41]:
sns.kdeplot(old_liver.obs['Endothelial_PV'])
Out[41]:
<Axes: xlabel='Endothelial_PV', ylabel='Density'>
In [348]:
old_liver.obs['Gross_Endothelial'] = old_liver.obs[[x for x in old_liver.obs.columns if 'Endo' in x]].sum(1)
In [349]:
old_liver.obs['Gross_Fibroblast'] = old_liver.obs[[x for x in old_liver.obs.columns if 'Fibrob' in x]].sum(1)
In [350]:
old_liver.obs['Gross_Cd4T'] = old_liver.obs[[x for x in old_liver.obs.columns if 'Cd4' in x]+['Treg']].sum(1)
In [351]:
old_liver.obs['Gross_Cd8T'] = old_liver.obs[[x for x in old_liver.obs.columns if 'Cd8' in x]].sum(1)
In [352]:
old_liver.obs['Gross_Monocyte'] = old_liver.obs[[x for x in old_liver.obs.columns if 'Monocyte' in x]].sum(1)
In [353]:
old_liver.obs['Gross_Macrophage'] = old_liver.obs[['KC','MP','Foam Cells','Transitioning']].sum(1)
In [354]:
old_liver.obs['Gross_VSMC'] = old_liver.obs[[x for x in old_liver.obs.columns if 'VSMC' in x]].sum(1)
In [355]:
old_liver.obs['Gross_NK'] = old_liver.obs[[x for x in old_liver.obs.columns if 'NK' in x]].sum(1)
In [356]:
old_liver.obs['Gross_DC'] = old_liver.obs[[x for x in old_liver.obs.columns if 'DC' in x]].sum(1)
In [357]:
old_liver.obs['Gross_B'] = old_liver.obs[[x for x in old_liver.obs.columns if 'B' in x]].sum(1)
In [358]:
sc.pl.spatial(old_liver, color=['Hepatocyte','Cholangiocyte'], spot_size=100)
In [359]:
sc.pl.spatial(old_liver, color=[x for x in old_liver.obs.columns if 'Gross' in x], spot_size=100)
In [81]:
ax = sns.kdeplot(old_liver.obs['Gross_Endothelial'])

x = ax.lines[0].get_xdata() # Get the x data of the distribution
y = ax.lines[0].get_ydata() # Get the y data of the distribution
maxid = np.argmax(y) # The id of the peak (maximum of y data)
plt.plot(x[maxid],y[maxid], 'bo', ms=10)
print(x[maxid],y[maxid])
plt.axvline(x=x[maxid], linestyle='--', color='black')
0.14476290151085192 4.432251737913768
Out[81]:
<matplotlib.lines.Line2D at 0x7f5a683bd5e0>
In [96]:
old_liver.obs['cut_endo'] = 'No'
old_liver.obs.loc[old_liver.obs['Gross_Endothelial']>0.4, 'cut_endo'] = 'yes'
In [133]:
sc.pl.spatial(old_liver, color=['Fibroblast_Wif1+','Fibroblast_Smoc1+','Fibroblast_Pi16+','Endothelial_Fibrotic','Cd8_Tex'], spot_size=100)
In [142]:
sns.lineplot(data=old_liver.obs[['Endothelial_PV','Endothelial_CV']].sort_values(by='Endothelial_PV').reset_index()
             , x='index',y='Endothelial_PV')

sns.lineplot(data=old_liver.obs[['Endothelial_PV','Endothelial_CV']].sort_values(by='Endothelial_PV').reset_index()
             , x='index',y='Endothelial_CV')
Out[142]:
<Axes: xlabel='index', ylabel='Endothelial_PV'>
In [98]:
a = 'Endothelial_CV'
b = 'Endothelial_PV'
adata = old_liver[old_liver.obs['cut_endo']=='yes'].copy()
sns.regplot(x= adata.obs[a], y=adata.obs[b])
print(scipy.stats.pearsonr(adata.obs[a], adata.obs[b]))
PearsonRResult(statistic=-0.06713265605203284, pvalue=0.0021514183173745234)
In [361]:
sc.pl.spatial(old_liver, color=['max_all']
              , spot_size=100,vmax=1.0)
/opt/anaconda3/envs/cell2loc/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
In [401]:
sc.pl.spatial(old_liver, color=['Hepatocyte', 'Endothelial_Fibrotic', 'Endothelial_PV','Endothelial_CV'],
             spot_size=200)
In [414]:
sc.pl.spatial(old_liver, color='Pdcd1', spot_size=200)
In [189]:
final
Out[189]:
AnnData object with n_obs × n_vars = 93201 × 29814
    obs: 'pct_counts_mt', 'n_genes', 'n_counts', 'age', 'status', 'sample', 'age_status', 'annolv1', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'age_colors', 'age_status_colors', 'annotation_publish_1_colors', 'annotation_publish_colors', 'condition_colors', 'hvg', 'neighbors', 'pca', 'predicted_labels_colors', 'sample_colors', 'sample_refined_colors', 'status_colors', 'subannotation_publish_1_colors', 'subannotation_publish_colors', 'umap'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'
In [216]:
t
Out[216]:
AnnData object with n_obs × n_vars = 17193 × 29814
    obs: 'pct_counts_mt', 'n_genes', 'n_counts', 'age', 'status', 'sample', 'DQ', 'age_status', 'annotation_publish', 'subannotation_publish', 'subannotation_publish_1', 'annotation_publish_1', 'leiden_0.2', 'leiden_0.4', 'leiden_0.6', 'leiden_0.8', 'leiden_1.0', 'leiden_1.2', 'leiden_1.4', 'leiden_1.6', 'leiden_1.8', 'leiden_2.0', 'leiden_3.0', 'leiden_1.6_sub', 'leiden_1.6_sub_1', 'subannotation_publish_cd8', 'tcell_sub', 'doublet_scores', 'predicted_doublets', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mt', 'log1p_total_counts_mt', 'condition', 'predicted_labels', 'over_clustering', 'conf_score', 'sample_refined', 'subannotation_t', 'tcell_sub_sub', 'subannotation_t_final'
    var: 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'age_status_colors', 'cdm_leiden_0.2', 'hvg', 'leiden', 'leiden_0.2_colors', 'neighbors', 'pca', 'subannotation_publish_cd8_colors', 'subannotation_t_colors', 'subannotation_t_final_colors', 'tcell_sub_colors', 'umap'
    obsm: 'X_pca', 'X_pca_harmony', 'X_umap'
    varm: 'PCs'
    obsp: 'connectivities', 'distances'
In [233]:
final.obs['annolv2'] = final.obs['annolv1'].astype(str)
for index in mes.obs_names:
    if index in final.obs_names:
        final.obs.loc[index, 'annolv2'] = mes.obs.loc[index, 'annolv2']
    
for index in endo.obs_names:
    if index in final.obs_names:
        final.obs.loc[index, 'annolv2'] = endo.obs.loc[index, 'subannotation_publish']
    
for index in t.obs_names:
    if index in final.obs_names:
        final.obs.loc[index, 'annolv2'] = t.obs.loc[index, 'subannotation_t_final']
    
for index in momac.obs_names:
    if index in final.obs_names:
        final.obs.loc[index, 'annolv2'] = momac.obs.loc[index, 'annolv2']
In [235]:
final = final[~final.obs['annolv2'].isin({'Endothelia','Mesenchyme','T'})]
In [315]:
marker.keys()
Out[315]:
dict_keys(['B', 'Cd4_Tcm/Tem', 'Cd4_Tem_polarized_CCl5+', 'Cd4_Th17', 'Cd4_Tifn', 'Cd4_Tinnate-like', 'Cd4_Tnaive', 'Cd4_Treg', 'Cd8_Tcm', 'Cd8_Tem', 'Cd8_Tnaive', 'Cd8_Trm_Pdcd1high', 'Cd8_Trm_Pdcd1low', 'Cholangiocyte', 'FB_Col8a1', 'FB_Portal', 'FB_Smoc1', 'FB_Srgn', 'FB_Wif1', 'HSC', 'Hepatocyte', 'KC', 'LSEC', 'LyEC', 'Mast', 'MdM', 'Monocyte', 'NK', 'Neutrophil', 'Patrolling Monocyte', 'T_Double_Negative', 'T_Progenitor', 'VSMC_Il-6-high', 'VSMC_Il-6-low', 'cDC1', 'cDC2', 'central vein EC', 'dysfunctional EC', 'mDC', 'pDC', 'portal EC', 'γδT'])
In [362]:
for key in ['Cd8_Trm_Pdcd1high', 'Cd8_Trm_Pdcd1low','FB_Col8a1', 'FB_Portal', 'FB_Smoc1', 'FB_Srgn', 'FB_Wif1', 'HSC', 'Hepatocyte','Cholangiocyte', 'KC', 'LSEC', 'LyEC','MdM', 'Monocyte', 'NK', 'Neutrophil', 'Patrolling Monocyte', 'VSMC_Il-6-high', 'VSMC_Il-6-low', 'cDC1', 'cDC2', 'central vein EC', 'dysfunctional EC','pDC', 'portal EC']:
    sc.tl.score_genes(old_liver, marker[key], score_name='score_'+key)
computing score 'score_Cd8_Trm_Pdcd1high'
WARNING: genes are not in var_names and ignored: ['Tcrg-C2']
    finished: added
    'score_Cd8_Trm_Pdcd1high', score of gene set (adata.obs).
    350 total control genes are used. (0:00:01)
computing score 'score_Cd8_Trm_Pdcd1low'
    finished: added
    'score_Cd8_Trm_Pdcd1low', score of gene set (adata.obs).
    150 total control genes are used. (0:00:01)
computing score 'score_FB_Col8a1'
    finished: added
    'score_FB_Col8a1', score of gene set (adata.obs).
    349 total control genes are used. (0:00:00)
computing score 'score_FB_Portal'
    finished: added
    'score_FB_Portal', score of gene set (adata.obs).
    450 total control genes are used. (0:00:00)
computing score 'score_FB_Smoc1'
    finished: added
    'score_FB_Smoc1', score of gene set (adata.obs).
    250 total control genes are used. (0:00:00)
computing score 'score_FB_Srgn'
    finished: added
    'score_FB_Srgn', score of gene set (adata.obs).
    150 total control genes are used. (0:00:00)
computing score 'score_FB_Wif1'
    finished: added
    'score_FB_Wif1', score of gene set (adata.obs).
    399 total control genes are used. (0:00:00)
computing score 'score_HSC'
    finished: added
    'score_HSC', score of gene set (adata.obs).
    350 total control genes are used. (0:00:00)
computing score 'score_Hepatocyte'
    finished: added
    'score_Hepatocyte', score of gene set (adata.obs).
    200 total control genes are used. (0:00:00)
computing score 'score_Cholangiocyte'
    finished: added
    'score_Cholangiocyte', score of gene set (adata.obs).
    299 total control genes are used. (0:00:00)
computing score 'score_KC'
    finished: added
    'score_KC', score of gene set (adata.obs).
    400 total control genes are used. (0:00:00)
computing score 'score_LSEC'
    finished: added
    'score_LSEC', score of gene set (adata.obs).
    298 total control genes are used. (0:00:00)
computing score 'score_LyEC'
WARNING: genes are not in var_names and ignored: ['Ccl21a']
    finished: added
    'score_LyEC', score of gene set (adata.obs).
    399 total control genes are used. (0:00:00)
computing score 'score_MdM'
    finished: added
    'score_MdM', score of gene set (adata.obs).
    399 total control genes are used. (0:00:00)
computing score 'score_Monocyte'
    finished: added
    'score_Monocyte', score of gene set (adata.obs).
    398 total control genes are used. (0:00:00)
computing score 'score_NK'
    finished: added
    'score_NK', score of gene set (adata.obs).
    300 total control genes are used. (0:00:00)
computing score 'score_Neutrophil'
    finished: added
    'score_Neutrophil', score of gene set (adata.obs).
    350 total control genes are used. (0:00:00)
computing score 'score_Patrolling Monocyte'
    finished: added
    'score_Patrolling Monocyte', score of gene set (adata.obs).
    300 total control genes are used. (0:00:00)
computing score 'score_VSMC_Il-6-high'
    finished: added
    'score_VSMC_Il-6-high', score of gene set (adata.obs).
    400 total control genes are used. (0:00:00)
computing score 'score_VSMC_Il-6-low'
    finished: added
    'score_VSMC_Il-6-low', score of gene set (adata.obs).
    349 total control genes are used. (0:00:00)
computing score 'score_cDC1'
WARNING: genes are not in var_names and ignored: ['Sept3']
    finished: added
    'score_cDC1', score of gene set (adata.obs).
    299 total control genes are used. (0:00:00)
computing score 'score_cDC2'
    finished: added
    'score_cDC2', score of gene set (adata.obs).
    350 total control genes are used. (0:00:00)
computing score 'score_central vein EC'
    finished: added
    'score_central vein EC', score of gene set (adata.obs).
    299 total control genes are used. (0:00:00)
computing score 'score_dysfunctional EC'
    finished: added
    'score_dysfunctional EC', score of gene set (adata.obs).
    450 total control genes are used. (0:00:00)
computing score 'score_pDC'
WARNING: genes are not in var_names and ignored: ['Gm21762']
    finished: added
    'score_pDC', score of gene set (adata.obs).
    350 total control genes are used. (0:00:00)
computing score 'score_portal EC'
    finished: added
    'score_portal EC', score of gene set (adata.obs).
    300 total control genes are used. (0:00:00)
In [500]:
sns.kdeplot(old_liver.obs['score_FB_Smoc1'])
plt.axvline(x=0.18)
Out[500]:
<matplotlib.lines.Line2D at 0x7f55aaf54370>
In [514]:
x = 'FB_Wif1'
cutoff = 0.20
old_liver.obs['Anno_'+x] = 'no'
old_liver.obs.loc[old_liver.obs['score_'+x]>cutoff, 'Anno_'+x] = 'yes'
In [513]:
sc.pl.spatial(old_liver, color='Anno_FB_Smoc1', spot_size=200)
/opt/anaconda3/envs/cell2loc/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
In [ ]:
 
In [281]:
sns.kdeplot(old_liver.obs['FB_Portal'])
plt.axvline(x=0.15)
Out[281]:
<matplotlib.lines.Line2D at 0x7f5819c20a90>
In [282]:
old_liver.obs['Anno_FB_Portal'] = 'no'
old_liver.obs.loc[old_liver.obs['FB_Portal']>0.15, 'Anno_FB_Portal'] = 'yes'
In [283]:
sc.pl.spatial(old_liver, color='Anno_FB_Portal', spot_size=200)
/opt/anaconda3/envs/cell2loc/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1251: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
In [653]:
df = pd.DataFrame()
In [654]:
df['x'] = [x[0] for x in old_liver.obsm['spatial']]
df['y'] = [x[1] for x in old_liver.obsm['spatial']]
In [472]:
# df['Fibrotic EC score'] = list(old_liver.obs['Endothelial_Fibrotic'])
# df['FB_Wif1 score'] = list(old_liver.obs['score_FB_Wif1'])
# df['FB_Smoc1 score'] = list(old_liver.obs['score_FB_Smoc1'])
In [655]:
df['Anno FB_Wif1'] = list(old_liver.obs['Anno_FB_Wif1'])
df['Anno FB_Smoc1'] = list(old_liver.obs['Anno_FB_Smoc1'])
df['Fibrotic EC score'] = list(old_liver.obs['Endothelial_Fibrotic'])
df['Anno FB_EC'] = list(old_liver.obs['Anno_EC_FB'])
In [206]:
df
Out[206]:
x y Anno FB_Wif1 Anno FB_Smoc1 Fibrotic EC score Anno FB_EC
0 3000 14450 0.0 0.0 0.000062 0.0
1 3000 14500 0.0 0.0 0.000110 0.0
2 3050 13550 0.0 0.0 0.000352 0.0
3 3050 13600 0.0 0.0 0.000758 0.0
4 3050 13650 0.0 0.0 0.000534 0.0
... ... ... ... ... ... ...
89978 20800 16850 0.0 0.0 0.001608 0.0
89979 20800 16900 0.0 0.0 0.001078 0.0
89980 20800 16950 0.0 0.0 0.001194 0.0
89981 20800 17000 0.0 0.0 0.001498 0.0
89982 20800 17050 0.0 0.0 0.000785 0.0

89983 rows × 6 columns

In [516]:
df['anno'] = 'no'
for index in df.index:
    if df.loc[index, 'Anno FB_Wif1'] == 'yes':
        df.loc[index, 'anno'] = 'FB_Wif1'
    
    if df.loc[index, 'Anno FB_Smoc1'] == 'yes':  
        df.loc[index, 'anno'] = 'FB_Smoc1'
        
    if index == 67632:
        df.loc[index, 'anno'] = 'both'
In [517]:
sns.set_style('ticks')
color = ['gray','mediumpurple','indianred','black']
a = 0
for x in ['no', 'FB_Smoc1', 'FB_Wif1', 'both']:
    
    plt.scatter(data=df[df['anno']==x], x='x', y='y', c=color[a], s=1)
    a = a+1
In [656]:
sns.set_style('ticks')
plt.scatter(data=df.sort_values(by='Fibrotic EC score'), x='x', y='y', c='Fibrotic EC score', s=1)
plt.colorbar()
plt.axvline(17600)
Out[656]:
<matplotlib.lines.Line2D at 0x7fe28bebb820>
In [ ]:
df.sort_values(by)
In [386]:
inte = [72734,72393]
In [382]:
df[df.index.isin(inte)]
Out[382]:
x y Anno FB_Wif1 Anno FB_Smoc1 Fibrotic EC score
71660 17700 7400 0.0 0.000 0.271278
72393 17800 10300 0.0 0.000 0.046859
72734 17850 10400 0.0 0.472 0.015786
80405 19000 10450 0.0 0.000 0.019351
87738 20150 10350 0.0 0.000 0.052640
In [ ]:
 
In [ ]:
 
In [591]:
df[df['y']<10500].sort_values(by='Fibrotic EC score', ascending=False).head(50)
Out[591]:
x y Fibrotic EC score FB_Wif1 score FB_Smoc1 score Anno FB_Wif1 Anno FB_Smoc1 anno
71660 17700 7400 0.271278 0.000000 -0.004 no no no
73072 17900 10400 0.150881 -0.007519 -0.020 no no no
1147 3600 9750 0.146362 0.000000 0.000 no no no
79680 18900 6650 0.142152 -0.002506 -0.004 no no no
23514 7500 5650 0.118555 -0.002506 -0.004 no no no
42952 10550 8850 0.118352 0.079950 -0.012 no no no
77100 18500 9200 0.114397 -0.002506 -0.004 no no no
15462 6300 5150 0.112426 -0.002506 -0.008 no no no
41509 10300 8150 0.111628 -0.017544 -0.012 no no no
73400 17950 10300 0.100549 -0.002506 -0.004 no no no
26076 7850 10300 0.100013 -0.007519 0.000 no no no
84298 19600 9500 0.098787 -0.005013 -0.012 no no no
29689 8400 8000 0.097666 -0.010025 -0.008 no no no
10365 5500 6950 0.097000 -0.002506 -0.016 no no no
42401 10450 9100 0.095556 0.000000 -0.024 no no no
39366 9950 8000 0.095285 -0.020050 -0.012 no no no
31541 8700 7250 0.094776 -0.015038 -0.004 no no no
71013 17600 6550 0.093729 -0.002506 -0.008 no no no
41812 10350 8500 0.093591 -0.010025 -0.004 no no no
38099 9750 7800 0.092039 -0.020050 -0.020 no no no
35856 9400 7500 0.091977 -0.002506 -0.004 no no no
28446 8200 8000 0.091762 0.082456 -0.004 no no no
32155 8800 6900 0.091313 -0.007519 -0.004 no no no
41215 10250 8450 0.090126 -0.002506 -0.020 no no no
42400 10450 9050 0.089501 -0.025063 -0.024 no no no
23933 7550 9800 0.089483 -0.015038 -0.020 no no no
29682 8400 7650 0.089198 -0.022556 -0.012 no no no
40296 10100 7550 0.087987 -0.015038 -0.012 no no no
44289 10800 10300 0.086379 -0.012531 -0.012 no no no
31244 8650 8100 0.085211 -0.007519 -0.012 no no no
41525 10300 8950 0.084347 -0.012531 -0.008 no no no
20784 7100 5800 0.083694 -0.002506 0.000 no no no
37146 9600 7700 0.083060 -0.012531 -0.004 no no no
28445 8200 7950 0.081876 -0.002506 -0.028 no no no
7991 5100 6300 0.080965 -0.015038 0.000 no no no
29708 8400 9250 0.080274 -0.002506 -0.004 no no no
37143 9600 7550 0.079683 -0.015038 -0.004 no no no
40605 10150 7800 0.079196 -0.005013 -0.012 no no no
31238 8650 7800 0.079094 -0.017544 -0.016 no no no
42405 10450 9300 0.077416 -0.015038 -0.004 no no no
37195 9600 10150 0.076960 -0.015038 0.000 no no no
73403 17950 10450 0.076466 -0.007519 -0.016 no no no
37196 9600 10200 0.076293 -0.002506 -0.016 no no no
25643 7800 6650 0.075640 -0.010025 0.000 no no no
39043 9900 7400 0.075566 -0.015038 -0.008 no no no
86183 19900 8950 0.074686 -0.005013 -0.004 no no no
39364 9950 7900 0.074359 -0.005013 -0.012 no no no
34919 9250 8450 0.074313 -0.027569 -0.004 no no no
29395 8350 8800 0.074064 -0.002506 0.000 no no no
35236 9300 8550 0.073990 -0.005013 0.000 no no no
In [400]:
sc.pl.spatial(old_liver, color='Endothelial_Fibrotic', color_map='viridis', spot_size=250, vmax=0.7, show=False)
savefig(path, 'old_EC_FIB')
In [640]:
old_liver.obs['Anno_FB_Smoc1'] = old_liver.obs['score_FB_Smoc1'].copy()
old_liver.obs.loc[old_liver.obs['score_FB_Smoc1']<0.19, 'Anno_FB_Smoc1'] = 0
/tmp/ipykernel_64051/3699508197.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_liver.obs['Anno_FB_Smoc1'] = old_liver.obs['score_FB_Smoc1'].copy()
In [748]:
sc.pl.spatial(old_liver, color='Anno_FB_Smoc1', spot_size=250, vmax=0.7, color_map='viridis', show=False)
savefig(path, 'old_FB_Smoc1')
In [42]:
df
Out[42]:
x y Anno FB_Wif1 Anno FB_Smoc1 Fibrotic EC score
0 3000 14450 0.0 0.0 0.000062
1 3000 14500 0.0 0.0 0.000110
2 3050 13550 0.0 0.0 0.000352
3 3050 13600 0.0 0.0 0.000758
4 3050 13650 0.0 0.0 0.000534
... ... ... ... ... ...
89978 20800 16850 0.0 0.0 0.001608
89979 20800 16900 0.0 0.0 0.001078
89980 20800 16950 0.0 0.0 0.001194
89981 20800 17000 0.0 0.0 0.001498
89982 20800 17050 0.0 0.0 0.000785

89983 rows × 5 columns

In [720]:
temp = df.copy()
In [728]:
x = temp.loc[10000]['x']
y = temp.loc[10000]['y']
In [729]:
x
Out[729]:
5400
In [734]:
temp1 = temp[((temp['x']>=x-500) & (temp['x']<=x+500))]
In [735]:
temp1 = temp1[((temp1['y']>=y-500) & (temp1['y']<=y+500))]
In [737]:
temp1
Out[737]:
x y Fibrotic EC score FB_Wif1 score FB_Smoc1 score Anno FB_Wif1 Anno FB_Smoc1 anno
7128 4900 18600 0.023146 -0.002506 -0.004 no no no
7129 4900 18650 0.023328 -0.005013 0.000 no no no
7130 4900 18700 0.023621 -0.005013 0.000 no no no
7131 4900 18750 0.041043 -0.010025 0.000 no no no
7132 4900 18800 0.018038 -0.002506 -0.004 no no no
... ... ... ... ... ... ... ... ...
13128 5900 19400 0.020789 -0.002506 0.000 no no no
13129 5900 19450 0.018868 -0.002506 -0.004 no no no
13130 5900 19500 0.030079 -0.007519 0.000 no no no
13131 5900 19550 0.025549 -0.002506 0.000 no no no
13132 5900 19600 0.023625 0.000000 -0.008 no no no

441 rows × 8 columns

In [765]:
df['neighbor_index'] = ''
In [767]:
for index in df.index:
    x = df.loc[index, 'x']
    y = df.loc[index, 'y']
    
    temp = df[((df['x']>=x-500) & (df['x']<=x+500))]
    temp = temp[((temp['y']>=y-500) & (temp['y']<=y+500))]
    df.at[index, 'neighbor_index'] = list(temp.index)
In [828]:
np.sqrt(16)
Out[828]:
4.0
In [830]:
df['distance_index'] = ''
In [876]:
for index in df.index:
    a = []
    x = df.loc[index, 'x']
    y = df.loc[index, 'y']
    for index1 in df.index:
        if index1 != index:
            distance = np.sqrt(abs(df.loc[index1, 'x']-x)**2+abs(df.loc[index1, 'y']-y)**2)
            if distance <= 500:
                a.append(index1)
            
    df.at[index, 'distance_index'] = a
/tmp/ipykernel_64051/809004376.py:7: RuntimeWarning: overflow encountered in scalar subtract
  distance = np.sqrt(abs(df.loc[index1, 'x']-x)**2+abs(df.loc[index1, 'y']-y)**2)
/tmp/ipykernel_64051/809004376.py:7: RuntimeWarning: invalid value encountered in sqrt
  distance = np.sqrt(abs(df.loc[index1, 'x']-x)**2+abs(df.loc[index1, 'y']-y)**2)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[876], line 7
      5 for index1 in df.index:
      6     if index1 != index:
----> 7         distance = np.sqrt(abs(df.loc[index1, 'x']-x)**2+abs(df.loc[index1, 'y']-y)**2)
      8         if distance <= 500:
      9             a.append(index1)

KeyboardInterrupt: 
In [807]:
for index in df.index:
    temp = df[df.index.isin(df.loc[index, 'neighbor_index'])]
    df.loc[index, 'fraction'] = len(temp[temp['Endothelial_Fibrotic']>0.1]) / len(temp)
    df.loc[index, 'average'] = temp['Endothelial_Fibrotic'].mean()
/tmp/ipykernel_64051/435104859.py:3: FutureWarning: Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value '0.004524886877828055' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[index, 'fraction'] = len(temp[temp['Endothelial_Fibrotic']>0.1]) / len(temp)
In [808]:
df
Out[808]:
x y Anno FB_Wif1 Anno FB_Smoc1 anno neighbor_index average fraction Endothelial_Fibrotic
0 3000 14450 no no no [0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,... 0.007768 0.0 0.000062
1 3000 14500 no no no [0, 1, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,... 0.007994 0.0 0.000110
2 3050 13550 no no no [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 29, 30, 3... 0.008565 0.0 0.000352
3 3050 13600 no no no [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 30, 3... 0.008470 0.0 0.000758
4 3050 13650 no no no [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 3... 0.008412 0.0 0.000534
... ... ... ... ... ... ... ... ... ...
89978 20800 16850 no no no [88609, 88610, 88611, 88612, 88613, 88614, 886... 0.015712 0.0 0.001608
89979 20800 16900 no no no [88610, 88611, 88612, 88613, 88614, 88615, 886... 0.015672 0.0 0.001078
89980 20800 16950 no no no [88611, 88612, 88613, 88614, 88615, 88616, 886... 0.015437 0.0 0.001194
89981 20800 17000 no no no [88612, 88613, 88614, 88615, 88616, 88617, 886... 0.015173 0.0 0.001498
89982 20800 17050 no no no [88613, 88614, 88615, 88616, 88617, 88618, 886... 0.015138 0.0 0.000785

89983 rows × 9 columns

In [395]:
sc.pl.spatial(old_liver, color='Endothelial_Fibrotic', 
              cmap='viridis', spot_size=200, vmax=0.7)
In [823]:
plt.scatter(data=df, x='x', y='y', c='temp', s='average')
plt.colorbar()
Out[823]:
<matplotlib.colorbar.Colorbar at 0x7f58056127f0>
In [644]:
old_liver.obs['Anno_FB_Wif1'] = old_liver.obs['score_FB_Wif1'].copy()
old_liver.obs.loc[old_liver.obs['score_FB_Wif1']<0.20, 'Anno_FB_Wif1'] = 0
/tmp/ipykernel_64051/1408832790.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  old_liver.obs['Anno_FB_Wif1'] = old_liver.obs['score_FB_Wif1'].copy()
In [694]:
sc.pl.spatial(old_liver, color='Anno_FB_Wif1', spot_size=250, vmax=0.7, color_map='viridis', show=False)
savefig(path, 'old_FB_Wif1')
In [422]:
old_liver.obs['Anno_EC_FB'] = old_liver.obs['Endothelial_Fibrotic'].copy()
In [423]:
for index in old_liver.obs.index:
    if old_liver.obs.loc[index, 'Anno_EC_FB'] < 0.15:
        old_liver.obs.loc[index, 'Anno_EC_FB'] = 0
In [424]:
sc.pl.spatial(old_liver, color=['Anno_EC_FB'], spot_size=200)

gaussian

In [404]:
import matplotlib.pyplot as plt
import numpy as np
import plotly.io
import skimage
import plotly.express as px
from scipy import ndimage as ndi

from skimage import (
    filters, measure, morphology, segmentation
)
from skimage.data import protein_transport
In [906]:
df[['x','y','Anno FB_EC']]
Out[906]:
x y Anno FB_EC
0 3000 14450 0.0
1 3000 14500 0.0
2 3050 13550 0.0
3 3050 13600 0.0
4 3050 13650 0.0
... ... ... ...
89978 20800 16850 0.0
89979 20800 16900 0.0
89980 20800 16950 0.0
89981 20800 17000 0.0
89982 20800 17050 0.0

89983 rows × 3 columns

In [907]:
df
Out[907]:
x y Anno FB_Wif1 Anno FB_Smoc1 Fibrotic EC score Anno FB_EC Thresh_FB_EC Thresh_FB_Smoc1 Thresh_FB_Wif1
0 3000 14450 0.0 0.0 0.000062 0.0 0 0 0
1 3000 14500 0.0 0.0 0.000110 0.0 0 0 0
2 3050 13550 0.0 0.0 0.000352 0.0 0 0 0
3 3050 13600 0.0 0.0 0.000758 0.0 0 0 0
4 3050 13650 0.0 0.0 0.000534 0.0 0 0 0
... ... ... ... ... ... ... ... ... ...
89978 20800 16850 0.0 0.0 0.001608 0.0 0 1 0
89979 20800 16900 0.0 0.0 0.001078 0.0 0 1 0
89980 20800 16950 0.0 0.0 0.001194 0.0 0 0 0
89981 20800 17000 0.0 0.0 0.001498 0.0 0 0 0
89982 20800 17050 0.0 0.0 0.000785 0.0 0 0 0

89983 rows × 9 columns

In [908]:
image = df[['x','y','Anno FB_EC']].pivot_table(index='y', columns='x',
                                             values='Anno FB_EC')
In [909]:
image_raw = image.copy()
In [910]:
image.fillna(0, inplace=True) 
In [911]:
image = image.to_numpy()
In [912]:
image
Out[912]:
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])
In [913]:
smooth = filters.gaussian(image, sigma=2)
In [914]:
smooth_raw = smooth.copy()
In [915]:
smooth = pd.DataFrame(smooth)
In [916]:
smooth.columns = image_raw.columns
smooth.index = image_raw.index
In [917]:
smooth_out = image_raw.where(image_raw.isna(), smooth)
In [918]:
sns.set_style('white')
sns.despine(top=True, bottom=True, left=True, right=True)
fig, ax = plt.subplots()
ax.imshow(smooth_out, vmax=(0.7/old_liver.obs['Endothelial_Fibrotic'].max())*smooth_out.max().max(), cmap='viridis')
ax.set_axis_off()
savefig(path, 'stereo_fb_ec_gaussian')
<Figure size 800x800 with 0 Axes>
In [474]:
smooth_raw
Out[474]:
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])
In [531]:
thresh_value = filters.threshold_otsu(smooth_raw)
thresh = smooth_raw > thresh_value

fill = ndi.binary_fill_holes(thresh)
In [546]:
thresh
Out[546]:
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
In [547]:
thresh_out = pd.DataFrame(thresh)

thresh_out.columns = image_raw.columns
thresh_out.index = image_raw.index
In [543]:
thresh_out = image_raw.where(image_raw.isna(), thresh)
In [552]:
type(smooth_out.iloc[1,1])
Out[552]:
numpy.float64
In [566]:
thresh_out
Out[566]:
x 3000 3050 3100 3150 3200 3250 3300 3350 3400 3450 ... 20350 20400 20450 20500 20550 20600 20650 20700 20750 20800
y
4450 False False False False False False False False False False ... False False False False False False False False False False
4500 False False False False False False False False False False ... False False False False False False False False False False
4550 False False False False False False False False False False ... False False False False False False False False False False
4600 False False False False False False False False False False ... False False False False False False False False False False
4650 False False False False False False False False False False ... False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
23450 False False False False False False False False False False ... False False False False False False False False False False
23500 False False False False False False False False False False ... False False False False False False False False False False
23550 False False False False False False False False False False ... False False False False False False False False False False
23600 False False False False False False False False False False ... False False False False False False False False False False
23650 False False False False False False False False False False ... False False False False False False False False False False

385 rows × 333 columns

In [571]:
thresh_out.reset_index()
Out[571]:
x y 3000 3050 3100 3150 3200 3250 3300 3350 3400 ... 20350 20400 20450 20500 20550 20600 20650 20700 20750 20800
0 4450 False False False False False False False False False ... False False False False False False False False False False
1 4500 False False False False False False False False False ... False False False False False False False False False False
2 4550 False False False False False False False False False ... False False False False False False False False False False
3 4600 False False False False False False False False False ... False False False False False False False False False False
4 4650 False False False False False False False False False ... False False False False False False False False False False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
380 23450 False False False False False False False False False ... False False False False False False False False False False
381 23500 False False False False False False False False False ... False False False False False False False False False False
382 23550 False False False False False False False False False ... False False False False False False False False False False
383 23600 False False False False False False False False False ... False False False False False False False False False False
384 23650 False False False False False False False False False ... False False False False False False False False False False

385 rows × 334 columns

In [572]:
pd.melt(thresh_out.reset_index())
Out[572]:
x value
0 y 4450
1 y 4500
2 y 4550
3 y 4600
4 y 4650
... ... ...
128585 20800 0
128586 20800 0
128587 20800 0
128588 20800 0
128589 20800 0

128590 rows × 2 columns

In [592]:
threshdf = thresh_out.reset_index().melt(id_vars='y', value_name='Anno FB EC')
In [616]:
threshdf[threshdf['Anno FB EC']!=False]
Out[616]:
y x Anno FB EC
56164 21400 10250 True
56165 21450 10250 True
56166 21500 10250 True
56548 21350 10300 True
56549 21400 10300 True
... ... ... ...
105994 10400 17950 True
105995 10450 17950 True
105996 10500 17950 True
106378 10350 18000 True
106379 10400 18000 True

196 rows × 3 columns

In [617]:
df
Out[617]:
x y Anno FB_Wif1 Anno FB_Smoc1 Fibrotic EC score Anno FB_EC
0 3000 14450 0.0 0.0 0.000062 0.0
1 3000 14500 0.0 0.0 0.000110 0.0
2 3050 13550 0.0 0.0 0.000352 0.0
3 3050 13600 0.0 0.0 0.000758 0.0
4 3050 13650 0.0 0.0 0.000534 0.0
... ... ... ... ... ... ...
89978 20800 16850 0.0 0.0 0.001608 0.0
89979 20800 16900 0.0 0.0 0.001078 0.0
89980 20800 16950 0.0 0.0 0.001194 0.0
89981 20800 17000 0.0 0.0 0.001498 0.0
89982 20800 17050 0.0 0.0 0.000785 0.0

89983 rows × 6 columns

In [636]:
threshdf
Out[636]:
y x Anno FB EC
0 4450 3000 False
1 4500 3000 False
2 4550 3000 False
3 4600 3000 False
4 4650 3000 False
... ... ... ...
128200 23450 20800 False
128201 23500 20800 False
128202 23550 20800 False
128203 23600 20800 False
128204 23650 20800 False

128205 rows × 3 columns

In [672]:
temp = threshdf[threshdf['Anno FB EC']!=False]
In [556]:
fig, ax = plt.subplots()
ax.imshow(thresh_out, cmap=cmap)
# savefig(path, 'thresh_ECFIB')
Out[556]:
<matplotlib.image.AxesImage at 0x7fe28bc85ee0>
In [678]:
df['Thresh_FB_EC'] = 0
for index in temp.index:
    x=temp.loc[index, 'x']
    y=temp.loc[index, 'y']
    temp1 = df[df['x']==x].copy()
    temp1 = temp1[temp1['y']==y].copy()
    df.loc[temp1.index, 'Thresh_FB_EC'] = 1
In [676]:
plt.scatter(data=temp, x='x', y='y', c='Anno FB EC', s=0.1)
Out[676]:
<matplotlib.collections.PathCollection at 0x7fe261c1fac0>
In [681]:
plt.scatter(data=df[df['Thresh_FB_EC']==0], x='x', y='y', c='black', s=0.1)
plt.scatter(data=df[df['Thresh_FB_EC']==1], x='x', y='y', c='red', s=0.1)
Out[681]:
<matplotlib.collections.PathCollection at 0x7fe26335bb50>