cd /mnt/d/Brain_region_project/Data_for_cNMF
conda activate cnmf_env
python

import os
import pandas as pd
import numpy as np
from scipy.io import mmread
import scipy.sparse as sp
import matplotlib.pyplot as plt
from IPython.display import Image
import scanpy as sc

np.random.seed(14)

adata = sc.read_h5ad("Ast_2000.h5ad")
sc.pp.filter_genes(adata, min_cells=3)

#the following line may only be necessary when creating h5ad file with SeuratDisk
adata.__dict__['_raw'].__dict__['_var'] = adata.__dict__['_raw'].__dict__['_var'].rename(columns={'_index': 'features'})
count_adat_fn = 'Ast_2000_counts.h5ad'
sc.write(count_adat_fn, adata)


#######################################################################################
conda activate cnmf_env
cd /mnt/d/Brain_region_project/Data_for_cNMF

python ./cnmf.py prepare --output-dir ./cNMF --name Ast_cNMF_2000 -c ./Ast_2000_counts.h5ad -k 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20  --n-iter 200 --total-workers 1 --seed 14 --numgenes 2000

python ./cnmf.py factorize --output-dir ./cNMF --name Ast_cNMF_2000 --worker-index 0

python ./cnmf.py combine --output-dir ./cNMF --name Ast_cNMF_2000

python ./cnmf.py k_selection_plot --output-dir ./cNMF --name Ast_cNMF_2000

python ./cnmf.py consensus --output-dir ./cNMF --name Ast_cNMF_2000 --components 12 --local-density-threshold 2 --show-clustering

python ./cnmf.py consensus --output-dir ./cNMF --name Ast_cNMF_2000 --components 12 --local-density-threshold 0.1 --show-clustering

#######################################################################################

cd /mnt/d/Brain_region_project/Data_for_cNMF
conda activate cnmf_env
python
import os
import pandas as pd
import numpy as np
from scipy.io import mmread
import scipy.sparse as sp
import matplotlib.pyplot as plt
from IPython.display import Image
import scanpy as sc

usage = pd.read_csv('./cNMF/Ast_cNMF_2000/Ast_cNMF_2000.usages.k_12.dt_0_1.consensus.txt',sep='\t', index_col=0)
usage.columns = ['Usage_%s' % i for i in usage.columns]
usage.head()

usage_norm = usage.div(usage.sum(axis=1), axis=0)
usage_norm.head()

usage_norm.to_csv('Ast_cNMF_2000_usage_norm.csv')

## Load the Z-scored GEPs which reflect how enriched a gene is in each GEP relative to all of the others
gene_scores = pd.read_csv('./cNMF/Ast_cNMF_2000/Ast_cNMF_2000.gene_spectra_score.k_12.dt_0_1.txt',sep='\t', index_col=0).T
gene_scores.head()

top_genes = []
ngenes = 200
for gep in gene_scores.columns:
    top_genes.append(list(gene_scores.sort_values(by=gep, ascending=False).index[:ngenes]))

top_genes = pd.DataFrame(top_genes, index=gene_scores.columns).T
top_genes

gene_scores.to_csv('Ast_cNMF_2000_gene_scores.csv')
top_genes.to_csv('Ast_cNMF_2000_top_genes.csv')

#######################################################################################
