{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import csv\n",
    "from rdkit import Chem"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Expression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "expression_df=pd.read_csv(\"./data/RawFile/CCLE_expression.csv\",index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "entrez_dict = {}\n",
    "drop_list = []\n",
    "for col in expression_df.columns:\n",
    "    split = col.split(\" (\")\n",
    "    entrez = split[1][:-1]\n",
    "    symbol = split[0]\n",
    "    if entrez not in list(entrez_dict.keys()):\n",
    "        entrez_dict[entrez] = (symbol,col)\n",
    "    else:\n",
    "        drop_list.append(col) \n",
    "\n",
    "new_expression_df = expression_df.drop(columns=drop_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_expression_df.to_csv(\"./data/ProcessedFile/22Q1expressions.csv\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Drug"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "drug_info = pd.read_csv(\"./data/RawFile/secondary-screen-replicate-collapsed-treatment-info.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def canonicalize_smiles(smi):\n",
    "    if not smi:\n",
    "        return \"\"\n",
    "    try:\n",
    "        mol = Chem.MolFromSmiles(smi)\n",
    "        if mol is None:\n",
    "            return \"\"\n",
    "        return Chem.MolToSmiles(mol)\n",
    "    except:\n",
    "        return \"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "keep_drug_info = drug_info[['name','smiles']].drop_duplicates()\n",
    "keep_drug_info['right_smiles'] = [smi.split(\",\")[0].strip() for smi in keep_drug_info['smiles']]\n",
    "keep_drug_info['canonical_smiles'] = [canonicalize_smiles(smi) for smi in keep_drug_info['right_smiles']]\n",
    "keep_drug_info = keep_drug_info[keep_drug_info['canonical_smiles']!=\"\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "keep_drug_info.to_csv(\"./data/ProcessedFile/sec_auc_drug.csv\",index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = pd.read_csv(\"./data/RawFile/secondary-screen-dose-response-curve-parameters.csv\",low_memory=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = response.dropna(subset=['depmap_id'])\n",
    "response =response [response['passed_str_profiling'] == True]\n",
    "\n",
    "keep_response = response[['name','depmap_id','auc']].drop_duplicates(subset=['name','depmap_id']).copy(deep=True)\n",
    "keep_response = keep_response[(keep_response['depmap_id'].isin(new_expression_df.index)) & (keep_response['name'].isin(keep_drug_info['name']))].copy(deep=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "keep_response.to_csv(\"./data/ProcessedFile/Prism_secondary_auc.csv\",index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "BioGDR",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.20"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
