{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "b991b2cc", "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import argparse\n", "import pandas as pd\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import seaborn as sns\n", "from collections import Counter\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "a9f85aa0", "metadata": {}, "outputs": [], "source": [ "genes_df=pd.read_table(\"U00096.3.gtf\",header=None)\n", "genes_df[\"Gene name\"]=genes_df[8].apply(lambda x: x.split(';')[4].split('=')[1])\n", "genes_df=genes_df.loc[:,[6,'Gene name']]" ] }, { "cell_type": "code", "execution_count": 3, "id": "6a34b944", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/britney/miniconda3/lib/python3.7/site-packages/ipykernel_launcher.py:15: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", "of pandas will change to not sort by default.\n", "\n", "To accept the future behavior, pass 'sort=False'.\n", "\n", "To retain the current behavior and silence the warning, pass 'sort=True'.\n", "\n", " from ipykernel import kernelapp as app\n" ] } ], "source": [ "sample='bm03'\n", "plus_df=pd.read_table('2018-05-22_RNAseq/bm03_fwd_250up75d.txt',header=None)\n", "plus_df=plus_df.iloc[0:4419,:]\n", "plus_df.columns=['Gene name','plus_count']\n", "plus_df=plus_df.merge(genes_df, on ='Gene name')\n", "minus_df=pd.read_table('2018-05-22_RNAseq/bm03_rev_250up75d.txt',header=None)\n", "minus_df=minus_df.iloc[0:4419,:]\n", "minus_df.columns=['Gene name','minus_count']\n", "minus_df=minus_df.merge(genes_df, on ='Gene name')\n", "merge_df=pd.merge(plus_df,minus_df)\n", "plusgenes_df=merge_df[merge_df[6]=='+']\n", "plusgenes_df.columns=['Gene name',sample+'_TS',6,sample+'_NTS']\n", "minusgenes_df=merge_df[merge_df[6]=='-']\n", "minusgenes_df.columns=['Gene name',sample+'_NTS',6,sample+'_TS']\n", "joined_df=pd.concat([plusgenes_df,minusgenes_df])\n", "joined_df=joined_df.sort_index()" ] }, { "cell_type": "code", "execution_count": 4, "id": "ec324d4f", "metadata": {}, "outputs": [], "source": [ "filtered=joined_df[joined_df['bm03_NTS']/joined_df['bm03_TS']>2.5]" ] }, { "cell_type": "code", "execution_count": 6, "id": "88c525de", "metadata": {}, "outputs": [], "source": [ "#filtered=filtered[filtered['bm03_NTS']>10]" ] }, { "cell_type": "code", "execution_count": 5, "id": "11743f05", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Gene name | \n", "bm03_TS | \n", "6 | \n", "bm03_NTS | \n", "
---|---|---|---|---|
3 | \n", "aaeX | \n", "19 | \n", "- | \n", "159 | \n", "
7 | \n", "abgB | \n", "3 | \n", "- | \n", "21 | \n", "
45 | \n", "adiA | \n", "7 | \n", "- | \n", "18 | \n", "
46 | \n", "adiC | \n", "3 | \n", "- | \n", "10 | \n", "
90 | \n", "alsA | \n", "21 | \n", "- | \n", "57 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
4378 | \n", "ytfE | \n", "70 | \n", "- | \n", "251 | \n", "
4390 | \n", "ytiA | \n", "9 | \n", "- | \n", "56 | \n", "
4396 | \n", "yzcX | \n", "28 | \n", "+ | \n", "89 | \n", "
4397 | \n", "yzfA | \n", "1 | \n", "- | \n", "227 | \n", "
4398 | \n", "yzgL | \n", "72 | \n", "- | \n", "1462 | \n", "
252 rows × 4 columns
\n", "