{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# TCR restriction for every patient so far" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from nero import Harmonia, Hoplites\n", "from glob import glob\n", "import os\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import scanpy as sc\n", "import matplotlib\n", "%matplotlib inline\n", "matplotlib.rcParams['pdf.fonttype'] = 42\n", "matplotlib.rcParams['ps.fonttype'] = 42" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "adata = sc.read(\"/covid/count_mat/run12345_combi_final2.h5ad\")\n", "adata = adata[~adata.obs.donor.isin(['Rep_C_1011', 'Rep_C_1035']),:]\n", "# Split adata into runs\n", "adata_split = {\"run{}\".format(n) : adata[adata.obs.run == run,:] for n, run in enumerate(adata.obs.run.unique())}\n", "clono_list = sorted(glob(\"/covid/yun/TCR/clonotypes*\"))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "master_df = pd.DataFrame()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wellrunpercent_miton_umin_genesbarcodeassignmentfreemux_clusterscrubletn_counts...GGT1-1TNFSF14-1DR3EGFR-1cell_type_highrescell_type_highres_mRNAcell_type_major_highrescell_type_major_lowcell_type_major_middlecell_type_major_top
AAACCTGAGAAGATTC-1-0-0200423_lane1200423_run10.0168973906.01320AAACCTGAGAAGATTC-0SNG2SNG3906.0...1.4796441.1607610.8683361.424757central_memory_CD4Tnaive_CD4TCD4Tmemory_CD4TTNKLymphoid
AAACCTGAGACGCACA-1-0-0200423_lane1200423_run10.0650101046.0498AAACCTGAGACGCACA-0SNG7SNG1046.0...1.8979011.5213411.0614581.278155cMonocMonoMonocytecMonoMonoDCMyeloid
AAACCTGAGACTAAGT-1-0-0200423_lane1200423_run10.0252672058.0890AAACCTGAGACTAAGT-0SNG0SNG2058.0...1.5921651.1018730.8420441.442111memory_CD4Tmemory_CD4TCD4Tmemory_CD4TTNKLymphoid
AAACCTGAGCCATCGC-1-0-0200423_lane1200423_run10.0319183415.01560AAACCTGAGCCATCGC-0SNG3SNG3414.0...1.6751701.2024930.8603291.448289T_boundplateletCD8T_boundplateletleuko_plateletleuko_plateletleuko_platelet_megakaryocyteMegakaryocyte_LeukoPlatelets
AAACCTGAGCTATGCT-1-0-0200423_lane1200423_run10.0445711234.0561AAACCTGAGCTATGCT-0SNG9SNG1234.0...2.0941331.4309830.8777671.574764ISG_cMonoISG_cMonoMonocytecMonoMonoDCMyeloid
..................................................................
TTTGTCATCAGCGATT-1-5-4200605_lane6200605_run50.0446637926.02417TTTGTCATCAGCGATT-5SNG21SNG7926.0...1.5931611.4079440.9221871.633517ncMononcMonoMonocytencMonoMonoDCMyeloid
TTTGTCATCAGGTTCA-1-5-4200605_lane6200605_run50.075439570.0284TTTGTCATCAGGTTCA-5SNG10SNG570.0...1.7713441.2727560.9039611.498104MegakaryocytesMegakaryocytes_highMegakaryocyteleuko_plateletleuko_platelet_megakaryocyteMegakaryocyte_LeukoPlatelets
TTTGTCATCAGTGTTG-1-5-4200605_lane6200605_run50.0645762710.01145TTTGTCATCAGTGTTG-5SNG14SNG2710.0...1.4688291.3148151.2608041.418170EMRA_CD4Tnaive_CD8TCD4Tmemory_CD4TTNKLymphoid
TTTGTCATCCAAGTAC-1-5-4200605_lane6200605_run50.0359377068.02014TTTGTCATCCAAGTAC-5SNG21SNG7067.0...2.1206841.2640180.6745741.353770cMonocMonoMonocytecMonoMonoDCMyeloid
TTTGTCATCCTAGTGA-1-5-4200605_lane6200605_run50.0533832941.01137TTTGTCATCCTAGTGA-5SNG20SNG2941.0...1.6800101.4599230.8435391.306694CD8T_boundplateletCD4T_boundplateletleuko_plateletleuko_plateletleuko_platelet_megakaryocyteMegakaryocyte_LeukoPlatelets
\n", "

357244 rows × 222 columns

\n", "
" ], "text/plain": [ " well run percent_mito n_umi \\\n", "AAACCTGAGAAGATTC-1-0-0 200423_lane1 200423_run1 0.016897 3906.0 \n", "AAACCTGAGACGCACA-1-0-0 200423_lane1 200423_run1 0.065010 1046.0 \n", "AAACCTGAGACTAAGT-1-0-0 200423_lane1 200423_run1 0.025267 2058.0 \n", "AAACCTGAGCCATCGC-1-0-0 200423_lane1 200423_run1 0.031918 3415.0 \n", "AAACCTGAGCTATGCT-1-0-0 200423_lane1 200423_run1 0.044571 1234.0 \n", "... ... ... ... ... \n", "TTTGTCATCAGCGATT-1-5-4 200605_lane6 200605_run5 0.044663 7926.0 \n", "TTTGTCATCAGGTTCA-1-5-4 200605_lane6 200605_run5 0.075439 570.0 \n", "TTTGTCATCAGTGTTG-1-5-4 200605_lane6 200605_run5 0.064576 2710.0 \n", "TTTGTCATCCAAGTAC-1-5-4 200605_lane6 200605_run5 0.035937 7068.0 \n", "TTTGTCATCCTAGTGA-1-5-4 200605_lane6 200605_run5 0.053383 2941.0 \n", "\n", " n_genes barcode assignment \\\n", "AAACCTGAGAAGATTC-1-0-0 1320 AAACCTGAGAAGATTC-0 SNG \n", "AAACCTGAGACGCACA-1-0-0 498 AAACCTGAGACGCACA-0 SNG \n", "AAACCTGAGACTAAGT-1-0-0 890 AAACCTGAGACTAAGT-0 SNG \n", "AAACCTGAGCCATCGC-1-0-0 1560 AAACCTGAGCCATCGC-0 SNG \n", "AAACCTGAGCTATGCT-1-0-0 561 AAACCTGAGCTATGCT-0 SNG \n", "... ... ... ... \n", "TTTGTCATCAGCGATT-1-5-4 2417 TTTGTCATCAGCGATT-5 SNG \n", "TTTGTCATCAGGTTCA-1-5-4 284 TTTGTCATCAGGTTCA-5 SNG \n", "TTTGTCATCAGTGTTG-1-5-4 1145 TTTGTCATCAGTGTTG-5 SNG \n", "TTTGTCATCCAAGTAC-1-5-4 2014 TTTGTCATCCAAGTAC-5 SNG \n", "TTTGTCATCCTAGTGA-1-5-4 1137 TTTGTCATCCTAGTGA-5 SNG \n", "\n", " freemux_cluster scrublet n_counts ... GGT1-1 \\\n", "AAACCTGAGAAGATTC-1-0-0 2 SNG 3906.0 ... 1.479644 \n", "AAACCTGAGACGCACA-1-0-0 7 SNG 1046.0 ... 1.897901 \n", "AAACCTGAGACTAAGT-1-0-0 0 SNG 2058.0 ... 1.592165 \n", "AAACCTGAGCCATCGC-1-0-0 3 SNG 3414.0 ... 1.675170 \n", "AAACCTGAGCTATGCT-1-0-0 9 SNG 1234.0 ... 2.094133 \n", "... ... ... ... ... ... \n", "TTTGTCATCAGCGATT-1-5-4 21 SNG 7926.0 ... 1.593161 \n", "TTTGTCATCAGGTTCA-1-5-4 10 SNG 570.0 ... 1.771344 \n", "TTTGTCATCAGTGTTG-1-5-4 14 SNG 2710.0 ... 1.468829 \n", "TTTGTCATCCAAGTAC-1-5-4 21 SNG 7067.0 ... 2.120684 \n", "TTTGTCATCCTAGTGA-1-5-4 20 SNG 2941.0 ... 1.680010 \n", "\n", " TNFSF14-1 DR3 EGFR-1 cell_type_highres \\\n", "AAACCTGAGAAGATTC-1-0-0 1.160761 0.868336 1.424757 central_memory_CD4T \n", "AAACCTGAGACGCACA-1-0-0 1.521341 1.061458 1.278155 cMono \n", "AAACCTGAGACTAAGT-1-0-0 1.101873 0.842044 1.442111 memory_CD4T \n", "AAACCTGAGCCATCGC-1-0-0 1.202493 0.860329 1.448289 T_boundplatelet \n", "AAACCTGAGCTATGCT-1-0-0 1.430983 0.877767 1.574764 ISG_cMono \n", "... ... ... ... ... \n", "TTTGTCATCAGCGATT-1-5-4 1.407944 0.922187 1.633517 ncMono \n", "TTTGTCATCAGGTTCA-1-5-4 1.272756 0.903961 1.498104 Megakaryocytes \n", "TTTGTCATCAGTGTTG-1-5-4 1.314815 1.260804 1.418170 EMRA_CD4T \n", "TTTGTCATCCAAGTAC-1-5-4 1.264018 0.674574 1.353770 cMono \n", "TTTGTCATCCTAGTGA-1-5-4 1.459923 0.843539 1.306694 CD8T_boundplatelet \n", "\n", " cell_type_highres_mRNA cell_type_major_highres \\\n", "AAACCTGAGAAGATTC-1-0-0 naive_CD4T CD4T \n", "AAACCTGAGACGCACA-1-0-0 cMono Monocyte \n", "AAACCTGAGACTAAGT-1-0-0 memory_CD4T CD4T \n", "AAACCTGAGCCATCGC-1-0-0 CD8T_boundplatelet leuko_platelet \n", "AAACCTGAGCTATGCT-1-0-0 ISG_cMono Monocyte \n", "... ... ... \n", "TTTGTCATCAGCGATT-1-5-4 ncMono Monocyte \n", "TTTGTCATCAGGTTCA-1-5-4 Megakaryocytes_high Megakaryocyte \n", "TTTGTCATCAGTGTTG-1-5-4 naive_CD8T CD4T \n", "TTTGTCATCCAAGTAC-1-5-4 cMono Monocyte \n", "TTTGTCATCCTAGTGA-1-5-4 CD4T_boundplatelet leuko_platelet \n", "\n", " cell_type_major_low cell_type_major_middle \\\n", "AAACCTGAGAAGATTC-1-0-0 memory_CD4T TNK \n", "AAACCTGAGACGCACA-1-0-0 cMono MonoDC \n", "AAACCTGAGACTAAGT-1-0-0 memory_CD4T TNK \n", "AAACCTGAGCCATCGC-1-0-0 leuko_platelet leuko_platelet_megakaryocyte \n", "AAACCTGAGCTATGCT-1-0-0 cMono MonoDC \n", "... ... ... \n", "TTTGTCATCAGCGATT-1-5-4 ncMono MonoDC \n", "TTTGTCATCAGGTTCA-1-5-4 leuko_platelet leuko_platelet_megakaryocyte \n", "TTTGTCATCAGTGTTG-1-5-4 memory_CD4T TNK \n", "TTTGTCATCCAAGTAC-1-5-4 cMono MonoDC \n", "TTTGTCATCCTAGTGA-1-5-4 leuko_platelet leuko_platelet_megakaryocyte \n", "\n", " cell_type_major_top \n", "AAACCTGAGAAGATTC-1-0-0 Lymphoid \n", "AAACCTGAGACGCACA-1-0-0 Myeloid \n", "AAACCTGAGACTAAGT-1-0-0 Lymphoid \n", "AAACCTGAGCCATCGC-1-0-0 Megakaryocyte_LeukoPlatelets \n", "AAACCTGAGCTATGCT-1-0-0 Myeloid \n", "... ... \n", "TTTGTCATCAGCGATT-1-5-4 Myeloid \n", "TTTGTCATCAGGTTCA-1-5-4 Megakaryocyte_LeukoPlatelets \n", "TTTGTCATCAGTGTTG-1-5-4 Lymphoid \n", "TTTGTCATCCAAGTAC-1-5-4 Myeloid \n", "TTTGTCATCCTAGTGA-1-5-4 Megakaryocyte_LeukoPlatelets \n", "\n", "[357244 rows x 222 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata.obs" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Gini index for ID ICC_C_0001: 0.36562779414821805\n", "Gini index for ID ICC_C_0002: 0.045312977099236644\n", "Gini index for ID ICC_C_0003: 0.21444225727014082\n", "Gini index for ID ICC_C_0004: 0.026675579159417637\n", "Gini index for ID ICC_C_0005: 0.38568831859808583\n", "Gini index for ID Rep_C_1001: 0.14186159590247052\n", "Gini index for ID Rep_C_1002: 0.0\n", "Gini index for ID Rep_C_1003: 0.04567702642225149\n", "Gini index for ID Rep_C_1005: 0.2644538600522889\n", "Gini index for ID Rep_C_1006: 0.07712765957446809\n", "Gini index for ID Rep_C_1007: 0.4742294965405967\n", "Gini index for ID Rep_C_1008: 0.04997137593288437\n", "Gini index for ID Rep_C_1009: 0.05524164391019915\n", "Gini index for ID Rep_C_1010: 0.05360068586455412\n", "Gini index for ID Rep_C_1012: 0.24989480574126888\n", "Gini index for ID Rep_C_1014: 0.1765332268754844\n", "Gini index for ID Rep_C_1015: 0.4765969662597367\n", "Gini index for ID Rep_C_1016: 0.01207108249817482\n", "Gini index for ID Rep_C_1017: 0.39915694630775783\n", "Gini index for ID Rep_C_1018: 0.16480674443831236\n", "Gini index for ID Rep_C_1020: 0.10570686410228493\n", "Gini index for ID Rep_C_1023: 0.5156233251152321\n", "Gini index for ID Rep_C_1025: 0.2552383968797375\n", "Gini index for ID Rep_C_1030: 0.0343253635179101\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Gini index for ID ICC_C_0001: 0.19150579375323656\n", "Gini index for ID ICC_C_0003: 0.3278959343855228\n", "Gini index for ID Rep_C_1001: 0.09726013865440569\n", "Gini index for ID Rep_C_1002: 0.10498979092663127\n", "Gini index for ID Rep_C_1003: 0.033008095180171816\n", "Gini index for ID Rep_C_1005: 0.12055480664659032\n", "Gini index for ID Rep_C_1006: 0.12268023065494926\n", "Gini index for ID Rep_C_1008: 0.04313220233804613\n", "Gini index for ID Rep_C_1009: 0.061458601397961804\n", "Gini index for ID Rep_C_1010: 0.0631224395078749\n", "Gini index for ID Rep_C_1014: 0.10292504539674488\n", "Gini index for ID Rep_C_1015: 0.46401117535506153\n", "Gini index for ID Rep_C_1021: 0.18256162599228293\n", "Gini index for ID Rep_C_1026: 0.1356431590412634\n", "Gini index for ID Rep_C_1028: 0.018148030158224488\n", "Gini index for ID Rep_C_1029: 0.036599578828823295\n", "Gini index for ID Rep_C_1033: 0.30064909729365447\n", "Gini index for ID Rep_C_1036: 0.14961482575395169\n", "Gini index for ID Rep_C_1037: 0.2079804357604165\n", "Gini index for ID Rep_C_1038: 0.37476635163937805\n", "Gini index for ID Rep_C_1040: 0.02659736956437305\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Gini index for ID ICC_C_0001: 0.3250842709965342\n", "Gini index for ID ICC_C_0003: 0.1752136752136752\n", "Gini index for ID None: 0.08210853742760076\n", "Gini index for ID Rep_C_1001: 0.10951859025659465\n", "Gini index for ID Rep_C_1002: 0.08526652090156889\n", "Gini index for ID Rep_C_1003: 0.02009832710234263\n", "Gini index for ID Rep_C_1005: 0.14139557925426066\n", "Gini index for ID Rep_C_1016: 0.03434904331596367\n", "Gini index for ID Rep_C_1018: 0.1133869505204287\n", "Gini index for ID Rep_C_1023: 0.2820465681034999\n", "Gini index for ID Rep_C_1025: 0.21851923722413358\n", "Gini index for ID Rep_C_1026: 0.13582361685325406\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Gini index for ID ICC_C_0001: 0.4013605442176871\n", "Gini index for ID ICC_C_0003: 0.21606830265050206\n", "Gini index for ID None: 0.17645491577604838\n", "Gini index for ID Rep_C_1016: 0.11586731484801636\n", "Gini index for ID Rep_C_1023: 0.4021464106636463\n", "Gini index for ID Rep_C_1025: 0.2271908464185571\n", "Gini index for ID Rep_C_1028: 0.05022679324894515\n", "Gini index for ID Rep_C_1033: 0.11823200873082108\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Clonotype frequencies can be also found in Hoplites.summary_attr\n", "Gini index for ID ICC_C_0001: 0.4033771739654093\n", "Gini index for ID ICC_C_0003: 0.22157519435979442\n", "Gini index for ID None: 0.13989784704019223\n", "Gini index for ID Rep_C_1002: 0.12700922796287353\n", "Gini index for ID Rep_C_1003: 0.06795403161108159\n", "Gini index for ID Rep_C_1005: 0.16748211061325485\n", "Gini index for ID Rep_C_1014: 0.1315047631524004\n", "Gini index for ID Rep_C_1023: 0.39877595398428733\n", "Gini index for ID Rep_C_1025: 0.2743605459879817\n", "Gini index for ID Rep_C_1028: 0.059489394058951386\n", "Gini index for ID Rep_C_1033: 0.10753070400215173\n", "Gini index for ID Rep_C_1037: 0.0\n" ] } ], "source": [ "for n, clono in enumerate(clono_list):\n", " df = pd.read_csv(clono, sep='\\t')\n", " df = df.merge(adata_split[\"run{}\".format(n)].obs[['barcode', 'donor', 'timepoint', 'COVID_status']], on='barcode', how='left')\n", " df.dropna(axis=0, subset=['donor'], inplace=True)\n", " hop = Hoplites.Hoplites(meta=None, clonotypes=df, verbose=True)\n", " hop.summary(genotypes='donor')\n", " gini_df = hop.gini(plot=False)\n", " gini_df = gini_df.merge(hop.clonotypes[['donor', 'timepoint', 'COVID_status']].drop_duplicates(), left_on='ID', right_on='donor', how='left')\n", " gini_df.drop('donor', axis=1, inplace=True)\n", " master_df = pd.concat([master_df, gini_df], ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "master_df = master_df[master_df.ID != \"None\"].sort_values(\"COVID_status\", axis=0)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "master_df.to_csv(\"out_gini_12345.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[Text(0, 0, 'Rep_C_1021'),\n", " Text(0, 0, 'Rep_C_1008'),\n", " Text(0, 0, 'Rep_C_1020'),\n", " Text(0, 0, 'Rep_C_1009'),\n", " Text(0, 0, 'Rep_C_1017'),\n", " Text(0, 0, 'Rep_C_1016'),\n", " Text(0, 0, 'Rep_C_1010'),\n", " Text(0, 0, 'Rep_C_1028'),\n", " Text(0, 0, 'Rep_C_1040'),\n", " Text(0, 0, 'Rep_C_1030'),\n", " Text(0, 0, 'ICC_C_0001'),\n", " Text(0, 0, 'ICC_C_0003'),\n", " Text(0, 0, 'ICC_C_0005'),\n", " Text(0, 0, 'ICC_C_0004'),\n", " Text(0, 0, 'ICC_C_0002'),\n", " Text(0, 0, 'Rep_C_1014'),\n", " Text(0, 0, 'Rep_C_1005'),\n", " Text(0, 0, 'Rep_C_1018'),\n", " Text(0, 0, 'Rep_C_1023'),\n", " Text(0, 0, 'Rep_C_1025'),\n", " Text(0, 0, 'Rep_C_1026'),\n", " Text(0, 0, 'Rep_C_1003'),\n", " Text(0, 0, 'Rep_C_1002'),\n", " Text(0, 0, 'Rep_C_1001'),\n", " Text(0, 0, 'Rep_C_1033'),\n", " Text(0, 0, 'Rep_C_1006'),\n", " Text(0, 0, 'Rep_C_1015'),\n", " Text(0, 0, 'Rep_C_1029'),\n", " Text(0, 0, 'Rep_C_1036'),\n", " Text(0, 0, 'Rep_C_1037'),\n", " Text(0, 0, 'Rep_C_1038'),\n", " Text(0, 0, 'Rep_C_1012'),\n", " Text(0, 0, 'Rep_C_1007')]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(20, 12))\n", "plt.rc('font', size=15)\n", "g = sns.barplot(x=\"ID\", y=\"Gini_index\", hue=\"timepoint\", data=master_df, hue_order = ['None', '0','4','7','14'])\n", "g.set_xticklabels(g.get_xticklabels(), rotation=90)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }