{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# TCR restriction for every patient so far" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from nero import Harmonia, Hoplites\n", "from glob import glob\n", "import os\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import scanpy as sc\n", "import matplotlib\n", "%matplotlib inline\n", "matplotlib.rcParams['pdf.fonttype'] = 42\n", "matplotlib.rcParams['ps.fonttype'] = 42" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "adata = sc.read(\"/covid/count_mat/run12345_combi_final2.h5ad\")\n", "adata = adata[~adata.obs.donor.isin(['Rep_C_1011', 'Rep_C_1035']),:]\n", "# Split adata into runs\n", "adata_split = {\"run{}\".format(n) : adata[adata.obs.run == run,:] for n, run in enumerate(adata.obs.run.unique())}\n", "clono_list = sorted(glob(\"/covid/yun/TCR/clonotypes*\"))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "master_df = pd.DataFrame()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | well | \n", "run | \n", "percent_mito | \n", "n_umi | \n", "n_genes | \n", "barcode | \n", "assignment | \n", "freemux_cluster | \n", "scrublet | \n", "n_counts | \n", "... | \n", "GGT1-1 | \n", "TNFSF14-1 | \n", "DR3 | \n", "EGFR-1 | \n", "cell_type_highres | \n", "cell_type_highres_mRNA | \n", "cell_type_major_highres | \n", "cell_type_major_low | \n", "cell_type_major_middle | \n", "cell_type_major_top | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
AAACCTGAGAAGATTC-1-0-0 | \n", "200423_lane1 | \n", "200423_run1 | \n", "0.016897 | \n", "3906.0 | \n", "1320 | \n", "AAACCTGAGAAGATTC-0 | \n", "SNG | \n", "2 | \n", "SNG | \n", "3906.0 | \n", "... | \n", "1.479644 | \n", "1.160761 | \n", "0.868336 | \n", "1.424757 | \n", "central_memory_CD4T | \n", "naive_CD4T | \n", "CD4T | \n", "memory_CD4T | \n", "TNK | \n", "Lymphoid | \n", "
AAACCTGAGACGCACA-1-0-0 | \n", "200423_lane1 | \n", "200423_run1 | \n", "0.065010 | \n", "1046.0 | \n", "498 | \n", "AAACCTGAGACGCACA-0 | \n", "SNG | \n", "7 | \n", "SNG | \n", "1046.0 | \n", "... | \n", "1.897901 | \n", "1.521341 | \n", "1.061458 | \n", "1.278155 | \n", "cMono | \n", "cMono | \n", "Monocyte | \n", "cMono | \n", "MonoDC | \n", "Myeloid | \n", "
AAACCTGAGACTAAGT-1-0-0 | \n", "200423_lane1 | \n", "200423_run1 | \n", "0.025267 | \n", "2058.0 | \n", "890 | \n", "AAACCTGAGACTAAGT-0 | \n", "SNG | \n", "0 | \n", "SNG | \n", "2058.0 | \n", "... | \n", "1.592165 | \n", "1.101873 | \n", "0.842044 | \n", "1.442111 | \n", "memory_CD4T | \n", "memory_CD4T | \n", "CD4T | \n", "memory_CD4T | \n", "TNK | \n", "Lymphoid | \n", "
AAACCTGAGCCATCGC-1-0-0 | \n", "200423_lane1 | \n", "200423_run1 | \n", "0.031918 | \n", "3415.0 | \n", "1560 | \n", "AAACCTGAGCCATCGC-0 | \n", "SNG | \n", "3 | \n", "SNG | \n", "3414.0 | \n", "... | \n", "1.675170 | \n", "1.202493 | \n", "0.860329 | \n", "1.448289 | \n", "T_boundplatelet | \n", "CD8T_boundplatelet | \n", "leuko_platelet | \n", "leuko_platelet | \n", "leuko_platelet_megakaryocyte | \n", "Megakaryocyte_LeukoPlatelets | \n", "
AAACCTGAGCTATGCT-1-0-0 | \n", "200423_lane1 | \n", "200423_run1 | \n", "0.044571 | \n", "1234.0 | \n", "561 | \n", "AAACCTGAGCTATGCT-0 | \n", "SNG | \n", "9 | \n", "SNG | \n", "1234.0 | \n", "... | \n", "2.094133 | \n", "1.430983 | \n", "0.877767 | \n", "1.574764 | \n", "ISG_cMono | \n", "ISG_cMono | \n", "Monocyte | \n", "cMono | \n", "MonoDC | \n", "Myeloid | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
TTTGTCATCAGCGATT-1-5-4 | \n", "200605_lane6 | \n", "200605_run5 | \n", "0.044663 | \n", "7926.0 | \n", "2417 | \n", "TTTGTCATCAGCGATT-5 | \n", "SNG | \n", "21 | \n", "SNG | \n", "7926.0 | \n", "... | \n", "1.593161 | \n", "1.407944 | \n", "0.922187 | \n", "1.633517 | \n", "ncMono | \n", "ncMono | \n", "Monocyte | \n", "ncMono | \n", "MonoDC | \n", "Myeloid | \n", "
TTTGTCATCAGGTTCA-1-5-4 | \n", "200605_lane6 | \n", "200605_run5 | \n", "0.075439 | \n", "570.0 | \n", "284 | \n", "TTTGTCATCAGGTTCA-5 | \n", "SNG | \n", "10 | \n", "SNG | \n", "570.0 | \n", "... | \n", "1.771344 | \n", "1.272756 | \n", "0.903961 | \n", "1.498104 | \n", "Megakaryocytes | \n", "Megakaryocytes_high | \n", "Megakaryocyte | \n", "leuko_platelet | \n", "leuko_platelet_megakaryocyte | \n", "Megakaryocyte_LeukoPlatelets | \n", "
TTTGTCATCAGTGTTG-1-5-4 | \n", "200605_lane6 | \n", "200605_run5 | \n", "0.064576 | \n", "2710.0 | \n", "1145 | \n", "TTTGTCATCAGTGTTG-5 | \n", "SNG | \n", "14 | \n", "SNG | \n", "2710.0 | \n", "... | \n", "1.468829 | \n", "1.314815 | \n", "1.260804 | \n", "1.418170 | \n", "EMRA_CD4T | \n", "naive_CD8T | \n", "CD4T | \n", "memory_CD4T | \n", "TNK | \n", "Lymphoid | \n", "
TTTGTCATCCAAGTAC-1-5-4 | \n", "200605_lane6 | \n", "200605_run5 | \n", "0.035937 | \n", "7068.0 | \n", "2014 | \n", "TTTGTCATCCAAGTAC-5 | \n", "SNG | \n", "21 | \n", "SNG | \n", "7067.0 | \n", "... | \n", "2.120684 | \n", "1.264018 | \n", "0.674574 | \n", "1.353770 | \n", "cMono | \n", "cMono | \n", "Monocyte | \n", "cMono | \n", "MonoDC | \n", "Myeloid | \n", "
TTTGTCATCCTAGTGA-1-5-4 | \n", "200605_lane6 | \n", "200605_run5 | \n", "0.053383 | \n", "2941.0 | \n", "1137 | \n", "TTTGTCATCCTAGTGA-5 | \n", "SNG | \n", "20 | \n", "SNG | \n", "2941.0 | \n", "... | \n", "1.680010 | \n", "1.459923 | \n", "0.843539 | \n", "1.306694 | \n", "CD8T_boundplatelet | \n", "CD4T_boundplatelet | \n", "leuko_platelet | \n", "leuko_platelet | \n", "leuko_platelet_megakaryocyte | \n", "Megakaryocyte_LeukoPlatelets | \n", "
357244 rows × 222 columns
\n", "