{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from glob import glob\n", "import pandas as pd\n", "import re\n", "import matplotlib as mpl\n", "mpl.rcParams['pdf.fonttype'] = 42\n", "mpl.rcParams['ps.fonttype'] = 42" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# rename patient names in a txt\n", "def re_col_names(RNA_set):\n", " '''rename patient names in a txt '''\n", " Patient_name = [re.findall('(?<=star_Gencode.v39/).+(?=/)', i) for i in RNA_set.columns.tolist()[6:]]\n", " Patient_name = sum(Patient_name, [])\n", " New_colnames = ['Geneid', 'Chr', 'Start', 'End', 'Strand', 'Length'] + Patient_name \n", " RNA_set.columns = New_colnames\n", " return RNA_set\n", "\n", "# get patient code list \n", "def get_patient_code(RNA_set):\n", " '''get patient names in a txt '''\n", " Patient_name = RNA_set.columns.tolist()[6:]\n", " return Patient_name" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Helsinki" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Import data \n", "- All expression data were annotated with gencode.v39.primary_assembly.annotation.gtf as the reference" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Geneid | \n", "Chr | \n", "Start | \n", "End | \n", "Strand | \n", "Length | \n", "FM1338 | \n", "FM2086 | \n", "FM2440 | \n", "FM2532 | \n", "... | \n", "FM3690_2 | \n", "FM3764 | \n", "FM3796 | \n", "FM3799 | \n", "FM3800 | \n", "FM3814 | \n", "FM3835 | \n", "HL_VL_AMP08 | \n", "JE_VL_AMP05 | \n", "KO_AMP09 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "ENSG00000223972.5 | \n", "chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1 | \n", "11869;12010;12179;12613;12613;12975;13221;1322... | \n", "12227;12057;12227;12721;12697;13052;14409;1337... | \n", "+;+;+;+;+;+;+;+;+ | \n", "1735.0 | \n", "2.0 | \n", "2.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "1.0 | \n", "8.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1 | \n", "ENSG00000227232.5 | \n", "chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;c... | \n", "14404;15005;15796;16607;16858;17233;17606;1791... | \n", "14501;15038;15947;16765;17055;17368;17742;1806... | \n", "-;-;-;-;-;-;-;-;-;-;- | \n", "1351.0 | \n", "412.0 | \n", "370.0 | \n", "417.0 | \n", "711.0 | \n", "... | \n", "207.0 | \n", "281.0 | \n", "454.0 | \n", "263.0 | \n", "449.0 | \n", "207.0 | \n", "695.0 | \n", "572.0 | \n", "881.0 | \n", "708.0 | \n", "
2 | \n", "ENSG00000278267.1 | \n", "chr1 | \n", "17369 | \n", "17436 | \n", "- | \n", "68.0 | \n", "36.0 | \n", "15.0 | \n", "41.0 | \n", "71.0 | \n", "... | \n", "19.0 | \n", "15.0 | \n", "55.0 | \n", "6.0 | \n", "52.0 | \n", "25.0 | \n", "53.0 | \n", "40.0 | \n", "56.0 | \n", "34.0 | \n", "
3 | \n", "ENSG00000243485.5 | \n", "chr1;chr1;chr1;chr1;chr1 | \n", "29554;30267;30564;30976;30976 | \n", "30039;30667;30667;31109;31097 | \n", "+;+;+;+;+ | \n", "1021.0 | \n", "3.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "3.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
4 | \n", "ENSG00000284332.1 | \n", "chr1 | \n", "30366 | \n", "30503 | \n", "+ | \n", "138.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
61584 | \n", "ENSG00000276017.1 | \n", "KI270734.1 | \n", "72411 | \n", "74814 | \n", "+ | \n", "2404.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
61585 | \n", "ENSG00000278817.1 | \n", "KI270734.1;KI270734.1;KI270734.1;KI270734.1;KI... | \n", "131494;131836;135443;136159;136845 | \n", "131755;131996;135543;136299;137392 | \n", "+;+;+;+;+ | \n", "1213.0 | \n", "1043.0 | \n", "1151.0 | \n", "282.0 | \n", "1116.0 | \n", "... | \n", "747.0 | \n", "876.0 | \n", "592.0 | \n", "479.0 | \n", "250.0 | \n", "405.0 | \n", "892.0 | \n", "617.0 | \n", "927.0 | \n", "687.0 | \n", "
61586 | \n", "ENSG00000277196.4 | \n", "KI270734.1;KI270734.1;KI270734.1;KI270734.1;KI... | \n", "138082;138082;138743;138743;142194;142194;1436... | \n", "138667;138667;138831;138831;142292;142292;1437... | \n", "-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;... | \n", "2405.0 | \n", "311.0 | \n", "719.0 | \n", "49.0 | \n", "876.0 | \n", "... | \n", "147.0 | \n", "240.0 | \n", "415.0 | \n", "48.0 | \n", "405.0 | \n", "150.0 | \n", "305.0 | \n", "1023.0 | \n", "429.0 | \n", "2899.0 | \n", "
61587 | \n", "ENSG00000278625.1 | \n", "KI270744.1 | \n", "51009 | \n", "51114 | \n", "- | \n", "106.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
61588 | \n", "ENSG00000277374.1 | \n", "KI270750.1 | \n", "148668 | \n", "148843 | \n", "+ | \n", "176.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
61589 rows × 26 columns
\n", "\n", " | Geneid | \n", "Chr | \n", "Start | \n", "End | \n", "Strand | \n", "Length | \n", "3804 | \n", "4445 | \n", "6172 | \n", "AS_TA_AMP01 | \n", "... | \n", "HL_VL_AMP08 | \n", "IA_VL_AMP03 | \n", "JE_VL_AMP05 | \n", "JH | \n", "JH_VL_AMP04 | \n", "KO_AMP09 | \n", "MR_VL_AMP07 | \n", "V-P_VL_AMP10 | \n", "VA_VL_AMP06 | \n", "VM_TA_AMP02 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "ENSG00000223972.5 | \n", "chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1 | \n", "11869;12010;12179;12613;12613;12975;13221;1322... | \n", "12227;12057;12227;12721;12697;13052;14409;1337... | \n", "+;+;+;+;+;+;+;+;+ | \n", "1735 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
1 | \n", "ENSG00000227232.5 | \n", "chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;c... | \n", "14404;15005;15796;16607;16858;17233;17606;1791... | \n", "14501;15038;15947;16765;17055;17368;17742;1806... | \n", "-;-;-;-;-;-;-;-;-;-;- | \n", "1351 | \n", "48 | \n", "79 | \n", "38 | \n", "93 | \n", "... | \n", "124 | \n", "68 | \n", "109 | \n", "100 | \n", "97 | \n", "130 | \n", "68 | \n", "66 | \n", "213 | \n", "112 | \n", "
2 | \n", "ENSG00000278267.1 | \n", "chr1 | \n", "17369 | \n", "17436 | \n", "- | \n", "68 | \n", "11 | \n", "5 | \n", "6 | \n", "4 | \n", "... | \n", "28 | \n", "11 | \n", "17 | \n", "6 | \n", "19 | \n", "10 | \n", "5 | \n", "8 | \n", "12 | \n", "16 | \n", "
3 | \n", "ENSG00000243485.5 | \n", "chr1;chr1;chr1;chr1;chr1 | \n", "29554;30267;30564;30976;30976 | \n", "30039;30667;30667;31109;31097 | \n", "+;+;+;+;+ | \n", "1021 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
4 | \n", "ENSG00000284332.1 | \n", "chr1 | \n", "30366 | \n", "30503 | \n", "+ | \n", "138 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
61582 | \n", "ENSG00000276017.1 | \n", "KI270734.1 | \n", "72411 | \n", "74814 | \n", "+ | \n", "2404 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
61583 | \n", "ENSG00000278817.1 | \n", "KI270734.1;KI270734.1;KI270734.1;KI270734.1;KI... | \n", "131494;131836;135443;136159;136845 | \n", "131755;131996;135543;136299;137392 | \n", "+;+;+;+;+ | \n", "1213 | \n", "308 | \n", "168 | \n", "434 | \n", "1202 | \n", "... | \n", "261 | \n", "276 | \n", "402 | \n", "768 | \n", "1023 | \n", "382 | \n", "215 | \n", "404 | \n", "145 | \n", "315 | \n", "
61584 | \n", "ENSG00000277196.4 | \n", "KI270734.1;KI270734.1;KI270734.1;KI270734.1;KI... | \n", "138082;138082;138743;138743;142194;142194;1436... | \n", "138667;138667;138831;138831;142292;142292;1437... | \n", "-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;... | \n", "2405 | \n", "91 | \n", "84 | \n", "47 | \n", "1432 | \n", "... | \n", "514 | \n", "187 | \n", "189 | \n", "3295 | \n", "2598 | \n", "1314 | \n", "520 | \n", "422 | \n", "584 | \n", "226 | \n", "
61585 | \n", "ENSG00000278625.1 | \n", "KI270744.1 | \n", "51009 | \n", "51114 | \n", "- | \n", "106 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
61586 | \n", "ENSG00000277374.1 | \n", "KI270750.1 | \n", "148668 | \n", "148843 | \n", "+ | \n", "176 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "
61587 rows × 60 columns
\n", "\n", " | Geneid | \n", "Chr | \n", "Start | \n", "End | \n", "Strand | \n", "Length | \n", "0_FM1812 | \n", "0_FM22 | \n", "0_FM2670 | \n", "0_FM3310 | \n", "... | \n", "9_FM3690_2 | \n", "9_FM3764 | \n", "9_FM3796 | \n", "9_FM3799 | \n", "9_FM3800 | \n", "9_FM3814 | \n", "9_FM3835 | \n", "9_HL_VL_AMP08 | \n", "9_JE_VL_AMP05 | \n", "9_KO_AMP09 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "ENSG00000223972.5 | \n", "chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1 | \n", "11869;12010;12179;12613;12613;12975;13221;1322... | \n", "12227;12057;12227;12721;12697;13052;14409;1337... | \n", "+;+;+;+;+;+;+;+;+ | \n", "1735 | \n", "1 | \n", "12 | \n", "2 | \n", "7 | \n", "... | \n", "0.0 | \n", "1.0 | \n", "8.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
1 | \n", "ENSG00000227232.5 | \n", "chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;chr1;c... | \n", "14404;15005;15796;16607;16858;17233;17606;1791... | \n", "14501;15038;15947;16765;17055;17368;17742;1806... | \n", "-;-;-;-;-;-;-;-;-;-;- | \n", "1351 | \n", "122 | \n", "229 | \n", "139 | \n", "74 | \n", "... | \n", "207.0 | \n", "281.0 | \n", "454.0 | \n", "263.0 | \n", "449.0 | \n", "207.0 | \n", "695.0 | \n", "572.0 | \n", "881.0 | \n", "708.0 | \n", "
2 | \n", "ENSG00000278267.1 | \n", "chr1 | \n", "17369 | \n", "17436 | \n", "- | \n", "68 | \n", "22 | \n", "28 | \n", "9 | \n", "8 | \n", "... | \n", "19.0 | \n", "15.0 | \n", "55.0 | \n", "6.0 | \n", "52.0 | \n", "25.0 | \n", "53.0 | \n", "40.0 | \n", "56.0 | \n", "34.0 | \n", "
3 | \n", "ENSG00000243485.5 | \n", "chr1;chr1;chr1;chr1;chr1 | \n", "29554;30267;30564;30976;30976 | \n", "30039;30667;30667;31109;31097 | \n", "+;+;+;+;+ | \n", "1021 | \n", "6 | \n", "10 | \n", "6 | \n", "2 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "3.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
4 | \n", "ENSG00000284332.1 | \n", "chr1 | \n", "30366 | \n", "30503 | \n", "+ | \n", "138 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
61582 | \n", "ENSG00000276017.1 | \n", "KI270734.1 | \n", "72411 | \n", "74814 | \n", "+ | \n", "2404 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
61583 | \n", "ENSG00000278817.1 | \n", "KI270734.1;KI270734.1;KI270734.1;KI270734.1;KI... | \n", "131494;131836;135443;136159;136845 | \n", "131755;131996;135543;136299;137392 | \n", "+;+;+;+;+ | \n", "1213 | \n", "264 | \n", "186 | \n", "660 | \n", "472 | \n", "... | \n", "747.0 | \n", "876.0 | \n", "592.0 | \n", "479.0 | \n", "250.0 | \n", "405.0 | \n", "892.0 | \n", "617.0 | \n", "927.0 | \n", "687.0 | \n", "
61584 | \n", "ENSG00000277196.4 | \n", "KI270734.1;KI270734.1;KI270734.1;KI270734.1;KI... | \n", "138082;138082;138743;138743;142194;142194;1436... | \n", "138667;138667;138831;138831;142292;142292;1437... | \n", "-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;-;... | \n", "2405 | \n", "106 | \n", "59 | \n", "107 | \n", "261 | \n", "... | \n", "147.0 | \n", "240.0 | \n", "415.0 | \n", "48.0 | \n", "405.0 | \n", "150.0 | \n", "305.0 | \n", "1023.0 | \n", "429.0 | \n", "2899.0 | \n", "
61585 | \n", "ENSG00000278625.1 | \n", "KI270744.1 | \n", "51009 | \n", "51114 | \n", "- | \n", "106 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
61586 | \n", "ENSG00000277374.1 | \n", "KI270750.1 | \n", "148668 | \n", "148843 | \n", "+ | \n", "176 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "... | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
61587 rows × 140 columns
\n", "