{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e4c8a783-200a-4762-8b66-cf13cf8b3aa9", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "\n", "prop_cycle = plt.rcParams['axes.prop_cycle']\n", "colors = prop_cycle.by_key()['color']" ] }, { "cell_type": "markdown", "id": "06ba2568-f53e-44c9-9cba-4b99bad53ded", "metadata": {}, "source": [ "#### Read field_296_100rows.csv (downsampled from field_296.csv)" ] }, { "cell_type": "code", "execution_count": 2, "id": "e8a073fd-3f33-4671-97d5-48f670b97433", "metadata": {}, "outputs": [], "source": [ "field_296_100rows = pd.read_csv(f'field_296_100rows.csv')" ] }, { "cell_type": "markdown", "id": "e137ddf7-bff4-4afb-a00e-4ec2e1581cc4", "metadata": {}, "source": [ "#### Display dataframe" ] }, { "cell_type": "code", "execution_count": 3, "id": "1386d78d-745c-4483-8a53-5dddffee1842", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idGaia_EDR3___idAllWISE___idPS1_DR1___idradecperiodfieldccdquad...hp_xgbi_xgbblyr_xgbyso_xgbsaw_xgbceph2_xgbrrab_xgbsin_xgbosarg_xgbrrblz_xgb
01029658200994601400152013510566249061014375659249614.375673-14.4896695.751999296153...0.000.000.00.00.000.230.00.000.00.33
11029653100037124682924205399610881700137013510412489192016655903998416.655579-13.3924330.253672296142...0.000.000.00.00.000.230.00.010.00.33
21029633200704623587560828835768321880167013510630408890018235284974418.235299-15.9088330.02595029692...0.000.000.00.00.000.230.00.000.00.33
310296352004611008815019004627651219.004638-16.5365090.03327929694...0.010.010.00.00.000.230.00.000.00.33
41029619300286023551295155778732801900182013510216968591018946739091218.946842-18.4078390.02576229654...0.000.000.00.00.000.230.00.000.00.33
..................................................................
951029631100201101420182013510135688593013530477206413.530474-18.3902060.09942329684...0.010.010.00.00.000.230.00.000.00.33
961029616200457523553345358412601601900182013510600328685019058597804819.058643-17.618552637.16833529651...0.010.030.00.00.000.230.00.010.00.33
971029633200002901720152013510269128971017970584952017.970427-15.2340440.07905829692...0.000.130.00.00.020.230.00.000.00.33
981029656101833123730284310060933121550137013510409929200014978074464014.978093-13.3297950.034946296151...0.000.010.00.00.020.230.00.000.00.33
9910296392037084008819016801680681616.801518-16.5030110.366488296104...0.000.000.00.00.000.230.00.000.00.33
\n", "

100 rows × 99 columns

\n", "
" ], "text/plain": [ " _id Gaia_EDR3___id AllWISE___id \\\n", "0 10296582009946 0 140015201351056624 \n", "1 10296531000371 2468292420539961088 170013701351041248 \n", "2 10296332007046 2358756082883576832 188016701351063040 \n", "3 10296352004611 0 0 \n", "4 10296193002860 2355129515577873280 190018201351021696 \n", ".. ... ... ... \n", "95 10296311002011 0 142018201351013568 \n", "96 10296162004575 2355334535841260160 190018201351060032 \n", "97 10296332000029 0 172015201351026912 \n", "98 10296561018331 2373028431006093312 155013701351040992 \n", "99 10296392037084 0 0 \n", "\n", " PS1_DR1___id ra dec period field ccd quad \\\n", "0 90610143756592496 14.375673 -14.489669 5.751999 296 15 3 \n", "1 91920166559039984 16.655579 -13.392433 0.253672 296 14 2 \n", "2 88900182352849744 18.235299 -15.908833 0.025950 296 9 2 \n", "3 88150190046276512 19.004638 -16.536509 0.033279 296 9 4 \n", "4 85910189467390912 18.946842 -18.407839 0.025762 296 5 4 \n", ".. ... ... ... ... ... ... ... \n", "95 85930135304772064 13.530474 -18.390206 0.099423 296 8 4 \n", "96 86850190585978048 19.058643 -17.618552 637.168335 296 5 1 \n", "97 89710179705849520 17.970427 -15.234044 0.079058 296 9 2 \n", "98 92000149780744640 14.978093 -13.329795 0.034946 296 15 1 \n", "99 88190168016806816 16.801518 -16.503011 0.366488 296 10 4 \n", "\n", " ... hp_xgb i_xgb blyr_xgb yso_xgb saw_xgb ceph2_xgb rrab_xgb \\\n", "0 ... 0.00 0.00 0.0 0.0 0.00 0.23 0.0 \n", "1 ... 0.00 0.00 0.0 0.0 0.00 0.23 0.0 \n", "2 ... 0.00 0.00 0.0 0.0 0.00 0.23 0.0 \n", "3 ... 0.01 0.01 0.0 0.0 0.00 0.23 0.0 \n", "4 ... 0.00 0.00 0.0 0.0 0.00 0.23 0.0 \n", ".. ... ... ... ... ... ... ... ... \n", "95 ... 0.01 0.01 0.0 0.0 0.00 0.23 0.0 \n", "96 ... 0.01 0.03 0.0 0.0 0.00 0.23 0.0 \n", "97 ... 0.00 0.13 0.0 0.0 0.02 0.23 0.0 \n", "98 ... 0.00 0.01 0.0 0.0 0.02 0.23 0.0 \n", "99 ... 0.00 0.00 0.0 0.0 0.00 0.23 0.0 \n", "\n", " sin_xgb osarg_xgb rrblz_xgb \n", "0 0.00 0.0 0.33 \n", "1 0.01 0.0 0.33 \n", "2 0.00 0.0 0.33 \n", "3 0.00 0.0 0.33 \n", "4 0.00 0.0 0.33 \n", ".. ... ... ... \n", "95 0.00 0.0 0.33 \n", "96 0.01 0.0 0.33 \n", "97 0.00 0.0 0.33 \n", "98 0.00 0.0 0.33 \n", "99 0.00 0.0 0.33 \n", "\n", "[100 rows x 99 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "field_296_100rows" ] }, { "cell_type": "markdown", "id": "4e42f2fc-2818-40bf-804c-16c1caac3a9b", "metadata": {}, "source": [ "#### Describe dataframe" ] }, { "cell_type": "code", "execution_count": 4, "id": "f4b3f66b-3f64-43f4-b376-46768be7d164", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
_idGaia_EDR3___idAllWISE___idPS1_DR1___idradecperiodfieldccdquad...hp_xgbi_xgbblyr_xgbyso_xgbsaw_xgbceph2_xgbrrab_xgbsin_xgbosarg_xgbrrblz_xgb
count1.000000e+021.000000e+021.000000e+021.000000e+02100.000000100.000000100.000000100.0100.000000100.000000...100.000000100.000000100.0100.000000100.00000100.000000100.000000100.000000100.0000100.00
mean1.029634e+131.757088e+181.259936e+178.795916e+1615.559047-16.69669620.546433296.09.1600002.550000...0.0011000.0091000.00.0004000.000600.2303000.0010000.0017000.00020.33
std1.774576e+081.047139e+186.459028e+162.553461e+152.1686152.128283104.3647530.04.4169551.085953...0.0031450.0366280.00.0031530.003120.0017140.0071770.0045070.00200.00
min1.029601e+130.000000e+000.000000e+008.321013e+1612.136068-20.6512070.021168296.01.0000001.000000...0.0000000.0000000.00.0000000.000000.2300000.0000000.0000000.00000.33
25%1.029620e+130.000000e+001.250167e+178.600515e+1613.763547-18.3238140.026841296.05.7500002.000000...0.0000000.0000000.00.0000000.000000.2300000.0000000.0000000.00000.33
50%1.029636e+132.357130e+181.415175e+178.844513e+1615.020253-16.2931430.039651296.09.5000002.000000...0.0000000.0000000.00.0000000.000000.2300000.0000000.0000000.00000.33
75%1.029647e+132.371505e+181.720156e+178.981017e+1617.482961-15.1514790.096073296.012.2500004.000000...0.0000000.0100000.00.0000000.000000.2300000.0000000.0000000.00000.33
max1.029663e+132.468292e+181.930213e+179.200015e+1619.534744-13.329795637.168335296.016.0000004.000000...0.0100000.3300000.00.0300000.020000.2400000.0700000.0200000.02000.33
\n", "

8 rows × 99 columns

\n", "
" ], "text/plain": [ " _id Gaia_EDR3___id AllWISE___id PS1_DR1___id ra \\\n", "count 1.000000e+02 1.000000e+02 1.000000e+02 1.000000e+02 100.000000 \n", "mean 1.029634e+13 1.757088e+18 1.259936e+17 8.795916e+16 15.559047 \n", "std 1.774576e+08 1.047139e+18 6.459028e+16 2.553461e+15 2.168615 \n", "min 1.029601e+13 0.000000e+00 0.000000e+00 8.321013e+16 12.136068 \n", "25% 1.029620e+13 0.000000e+00 1.250167e+17 8.600515e+16 13.763547 \n", "50% 1.029636e+13 2.357130e+18 1.415175e+17 8.844513e+16 15.020253 \n", "75% 1.029647e+13 2.371505e+18 1.720156e+17 8.981017e+16 17.482961 \n", "max 1.029663e+13 2.468292e+18 1.930213e+17 9.200015e+16 19.534744 \n", "\n", " dec period field ccd quad ... hp_xgb \\\n", "count 100.000000 100.000000 100.0 100.000000 100.000000 ... 100.000000 \n", "mean -16.696696 20.546433 296.0 9.160000 2.550000 ... 0.001100 \n", "std 2.128283 104.364753 0.0 4.416955 1.085953 ... 0.003145 \n", "min -20.651207 0.021168 296.0 1.000000 1.000000 ... 0.000000 \n", "25% -18.323814 0.026841 296.0 5.750000 2.000000 ... 0.000000 \n", "50% -16.293143 0.039651 296.0 9.500000 2.000000 ... 0.000000 \n", "75% -15.151479 0.096073 296.0 12.250000 4.000000 ... 0.000000 \n", "max -13.329795 637.168335 296.0 16.000000 4.000000 ... 0.010000 \n", "\n", " i_xgb blyr_xgb yso_xgb saw_xgb ceph2_xgb rrab_xgb \\\n", "count 100.000000 100.0 100.000000 100.00000 100.000000 100.000000 \n", "mean 0.009100 0.0 0.000400 0.00060 0.230300 0.001000 \n", "std 0.036628 0.0 0.003153 0.00312 0.001714 0.007177 \n", "min 0.000000 0.0 0.000000 0.00000 0.230000 0.000000 \n", "25% 0.000000 0.0 0.000000 0.00000 0.230000 0.000000 \n", "50% 0.000000 0.0 0.000000 0.00000 0.230000 0.000000 \n", "75% 0.010000 0.0 0.000000 0.00000 0.230000 0.000000 \n", "max 0.330000 0.0 0.030000 0.02000 0.240000 0.070000 \n", "\n", " sin_xgb osarg_xgb rrblz_xgb \n", "count 100.000000 100.0000 100.00 \n", "mean 0.001700 0.0002 0.33 \n", "std 0.004507 0.0020 0.00 \n", "min 0.000000 0.0000 0.33 \n", "25% 0.000000 0.0000 0.33 \n", "50% 0.000000 0.0000 0.33 \n", "75% 0.000000 0.0000 0.33 \n", "max 0.020000 0.0200 0.33 \n", "\n", "[8 rows x 99 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "field_296_100rows.describe()" ] }, { "cell_type": "markdown", "id": "32116af4-c2c9-418e-899d-f66292966588", "metadata": {}, "source": [ "#### List all columns found in prediction files" ] }, { "cell_type": "code", "execution_count": 5, "id": "5e9b6e37-a30a-44cb-9c88-4c989ad8e2b4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['_id',\n", " 'Gaia_EDR3___id',\n", " 'AllWISE___id',\n", " 'PS1_DR1___id',\n", " 'ra',\n", " 'dec',\n", " 'period',\n", " 'field',\n", " 'ccd',\n", " 'quad',\n", " 'filter',\n", " 'sin_dnn',\n", " 'rscvn_dnn',\n", " 'emsms_dnn',\n", " 'ea_dnn',\n", " 'rrlyr_dnn',\n", " 'el_dnn',\n", " 'saw_dnn',\n", " 'bogus_dnn',\n", " 'wuma_dnn',\n", " 'e_dnn',\n", " 'yso_dnn',\n", " 'ceph_dnn',\n", " 'cv_dnn',\n", " 'srv_dnn',\n", " 'fla_dnn',\n", " 'blyr_dnn',\n", " 'wp_dnn',\n", " 'eb_dnn',\n", " 'blher_dnn',\n", " 'hp_dnn',\n", " 'ext_dnn',\n", " 'dip_dnn',\n", " 'rrab_dnn',\n", " 'vnv_dnn',\n", " 'pnp_dnn',\n", " 'wvir_dnn',\n", " 'dp_dnn',\n", " 'bright_dnn',\n", " 'ceph2_dnn',\n", " 'dscu_dnn',\n", " 'bis_dnn',\n", " 'osarg_dnn',\n", " 'rrblz_dnn',\n", " 'i_dnn',\n", " 'puls_dnn',\n", " 'mp_dnn',\n", " 'rrc_dnn',\n", " 'mir_dnn',\n", " 'agn_dnn',\n", " 'longt_dnn',\n", " 'blend_dnn',\n", " 'ew_dnn',\n", " 'lpv_dnn',\n", " 'rrd_dnn',\n", " 'rrd_xgb',\n", " 'wvir_xgb',\n", " 'dp_xgb',\n", " 'blher_xgb',\n", " 'lpv_xgb',\n", " 'agn_xgb',\n", " 'dscu_xgb',\n", " 'puls_xgb',\n", " 'rrc_xgb',\n", " 'mir_xgb',\n", " 'e_xgb',\n", " 'rscvn_xgb',\n", " 'rrlyr_xgb',\n", " 'bis_xgb',\n", " 'emsms_xgb',\n", " 'mp_xgb',\n", " 'ew_xgb',\n", " 'bogus_xgb',\n", " 'ea_xgb',\n", " 'vnv_xgb',\n", " 'pnp_xgb',\n", " 'longt_xgb',\n", " 'dip_xgb',\n", " 'cv_xgb',\n", " 'blend_xgb',\n", " 'ext_xgb',\n", " 'wuma_xgb',\n", " 'srv_xgb',\n", " 'fla_xgb',\n", " 'bright_xgb',\n", " 'el_xgb',\n", " 'ceph_xgb',\n", " 'eb_xgb',\n", " 'wp_xgb',\n", " 'hp_xgb',\n", " 'i_xgb',\n", " 'blyr_xgb',\n", " 'yso_xgb',\n", " 'saw_xgb',\n", " 'ceph2_xgb',\n", " 'rrab_xgb',\n", " 'sin_xgb',\n", " 'osarg_xgb',\n", " 'rrblz_xgb']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_columns = [x for x in field_296_100rows.columns]\n", "df_columns" ] }, { "cell_type": "markdown", "id": "8c69df69-f902-4a35-b1f3-1427c8d885ab", "metadata": {}, "source": [ "#### 99 total columns" ] }, { "cell_type": "code", "execution_count": 6, "id": "43383b84-6b46-4e6b-98c9-2ba9b5384403", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "99" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df_columns)" ] }, { "cell_type": "markdown", "id": "2b6bd50f-4b4a-4f4b-a139-5293574c8309", "metadata": {}, "source": [ "#### Dataframe columns begin with ZTF ID and Gaia/AllWISE/Pan-STARRS IDs" ] }, { "cell_type": "code", "execution_count": 7, "id": "05c0d38b-7cc0-4887-b06b-e0e1b5f78567", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['_id', 'Gaia_EDR3___id', 'AllWISE___id', 'PS1_DR1___id']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_columns[0:4]" ] }, { "cell_type": "markdown", "id": "5ac65805-769e-4183-abb7-5bb10bdf9dfd", "metadata": {}, "source": [ "#### Next columns include ra/dec, computed period, ZTF field/CCD/quadrant/filter of observations" ] }, { "cell_type": "code", "execution_count": 8, "id": "4056636d-032e-44e4-ab08-d7d31e9b76c1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ra', 'dec', 'period', 'field', 'ccd', 'quad', 'filter']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_columns[4:11]" ] }, { "cell_type": "markdown", "id": "18906ea8-0be4-45a8-ab9d-442bcf728d73", "metadata": {}, "source": [ "#### Next are DNN classification columns, one per class, each containing probabilities from 0-1" ] }, { "cell_type": "code", "execution_count": 9, "id": "e789cfc0-a7a4-4950-9c4e-976db80c2bd5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['sin_dnn',\n", " 'rscvn_dnn',\n", " 'emsms_dnn',\n", " 'ea_dnn',\n", " 'rrlyr_dnn',\n", " 'el_dnn',\n", " 'saw_dnn',\n", " 'bogus_dnn',\n", " 'wuma_dnn',\n", " 'e_dnn',\n", " 'yso_dnn',\n", " 'ceph_dnn',\n", " 'cv_dnn',\n", " 'srv_dnn',\n", " 'fla_dnn',\n", " 'blyr_dnn',\n", " 'wp_dnn',\n", " 'eb_dnn',\n", " 'blher_dnn',\n", " 'hp_dnn',\n", " 'ext_dnn',\n", " 'dip_dnn',\n", " 'rrab_dnn',\n", " 'vnv_dnn',\n", " 'pnp_dnn',\n", " 'wvir_dnn',\n", " 'dp_dnn',\n", " 'bright_dnn',\n", " 'ceph2_dnn',\n", " 'dscu_dnn',\n", " 'bis_dnn',\n", " 'osarg_dnn',\n", " 'rrblz_dnn',\n", " 'i_dnn',\n", " 'puls_dnn',\n", " 'mp_dnn',\n", " 'rrc_dnn',\n", " 'mir_dnn',\n", " 'agn_dnn',\n", " 'longt_dnn',\n", " 'blend_dnn',\n", " 'ew_dnn',\n", " 'lpv_dnn',\n", " 'rrd_dnn']" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_columns[11:55]" ] }, { "cell_type": "markdown", "id": "9586adb7-c7e0-451e-85bf-cdf2a1118771", "metadata": {}, "source": [ "#### Columns conclude with XGB classifications " ] }, { "cell_type": "code", "execution_count": 10, "id": "f4738d54-032e-43e4-89a8-6b77c593a23d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['rrd_xgb',\n", " 'wvir_xgb',\n", " 'dp_xgb',\n", " 'blher_xgb',\n", " 'lpv_xgb',\n", " 'agn_xgb',\n", " 'dscu_xgb',\n", " 'puls_xgb',\n", " 'rrc_xgb',\n", " 'mir_xgb',\n", " 'e_xgb',\n", " 'rscvn_xgb',\n", " 'rrlyr_xgb',\n", " 'bis_xgb',\n", " 'emsms_xgb',\n", " 'mp_xgb',\n", " 'ew_xgb',\n", " 'bogus_xgb',\n", " 'ea_xgb',\n", " 'vnv_xgb',\n", " 'pnp_xgb',\n", " 'longt_xgb',\n", " 'dip_xgb',\n", " 'cv_xgb',\n", " 'blend_xgb',\n", " 'ext_xgb',\n", " 'wuma_xgb',\n", " 'srv_xgb',\n", " 'fla_xgb',\n", " 'bright_xgb',\n", " 'el_xgb',\n", " 'ceph_xgb',\n", " 'eb_xgb',\n", " 'wp_xgb',\n", " 'hp_xgb',\n", " 'i_xgb',\n", " 'blyr_xgb',\n", " 'yso_xgb',\n", " 'saw_xgb',\n", " 'ceph2_xgb',\n", " 'rrab_xgb',\n", " 'sin_xgb',\n", " 'osarg_xgb',\n", " 'rrblz_xgb']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_columns[55:]" ] }, { "cell_type": "markdown", "id": "24440618-baa8-473a-bbb6-17d16d39436f", "metadata": {}, "source": [ "#### This code down-sampled the full field_296.csv data from 380,571 rows to 100 rows" ] }, { "cell_type": "code", "execution_count": 11, "id": "8271f4d7-e726-4f35-a874-ff4aa8d5ff75", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'\\nfield_296 = pd.read_csv(\"preds_dnn_xgb/field_296.csv\")\\nrandom = field_296.sample(n=100, frac=None, replace=False, weights=None, random_state=1, axis=None, ignore_index=False)\\nrandom.to_csv(\\'field_296_100rows.csv\\', index=False)\\n'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"\"\"\n", "field_296 = pd.read_csv(\"preds_dnn_xgb/field_296.csv\")\n", "random = field_296.sample(n=100, frac=None, replace=False, weights=None, random_state=1, axis=None, ignore_index=False)\n", "random.to_csv('field_296_100rows.csv', index=False)\n", "\"\"\"" ] }, { "cell_type": "markdown", "id": "ba978685-59ba-4550-95bf-c67002910d70", "metadata": {}, "source": [ "#### Scatter plot shows the ra/dec values of the 100 randomized rows above" ] }, { "cell_type": "code", "execution_count": 12, "id": "9e6cc6d7-2153-4567-8f5e-46426748c9cc", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ra = field_296_100rows.iloc[0:,4]\n", "dec = field_296_100rows.iloc[0:,5]\n", "plt.scatter(ra,dec)\n", "plt.xlabel('RA [deg]',fontsize=14)\n", "plt.ylabel('Dec [deg]',fontsize=14)\n", "plt.xticks(fontsize=14)\n", "plt.yticks(fontsize=14)\n", "plt.tight_layout()" ] }, { "cell_type": "markdown", "id": "262e4e19-5e0d-4de2-a3ff-bd805b4c0391", "metadata": {}, "source": [ "#### Histogram shows the classification probability distributions of specific DNN and XGB classifiers" ] }, { "cell_type": "code", "execution_count": 13, "id": "5673afe3-a97f-4685-a0e0-14c460877b00", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'Number of Light Curves')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#plots - histogram (class probs) - variable or periodic - xaxis 0-1\n", "plt.hist(field_296_100rows['vnv_dnn'], bins = 10, range = (0, 1), label = 'vnv_dnn')\n", "plt.hist(field_296_100rows['vnv_xgb'], bins = 10, range = (0, 1), alpha = 0.7, label = 'vnv_xgb')\n", "plt.legend()\n", "plt.xlabel('Classification Probability')\n", "plt.ylabel('Number of Light Curves')" ] }, { "cell_type": "code", "execution_count": null, "id": "5b2aa06e-7ea7-457c-b7d9-a54155d8695a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }