h2o.shutdown()
import pandas as pd
import random
import warnings
warnings.filterwarnings('ignore')
import h2o
h2o.init(min_mem_size='25G')
DATA_LOCATION = "../../data/"
MODELS_LOCATION = "../../models/ALL_FEATURES/"
Checking whether there is an H2O instance running at http://localhost:54321 ..... not found. Attempting to start a local H2O server... Java Version: openjdk version "1.8.0_265"; OpenJDK Runtime Environment (build 1.8.0_265-8u265-b01-0ubuntu2~16.04-b01); OpenJDK 64-Bit Server VM (build 25.265-b01, mixed mode) Starting server from /anaconda/envs/azureml_py36/lib/python3.6/site-packages/h2o/backend/bin/h2o.jar Ice root: /tmp/tmpwddo4_m7 JVM stdout: /tmp/tmpwddo4_m7/h2o_azureuser_started_from_python.out JVM stderr: /tmp/tmpwddo4_m7/h2o_azureuser_started_from_python.err Server is running at http://127.0.0.1:54321 Connecting to H2O server at http://127.0.0.1:54321 ... successful. Warning: Your H2O cluster version is too old (5 months and 14 days)! Please download and install the latest version from http://h2o.ai/download/
H2O_cluster_uptime: | 12 secs |
H2O_cluster_timezone: | Etc/UTC |
H2O_data_parsing_timezone: | UTC |
H2O_cluster_version: | 3.30.0.4 |
H2O_cluster_version_age: | 5 months and 14 days !!! |
H2O_cluster_name: | H2O_from_python_azureuser_lza322 |
H2O_cluster_total_nodes: | 1 |
H2O_cluster_free_memory: | 23.96 Gb |
H2O_cluster_total_cores: | 4 |
H2O_cluster_allowed_cores: | 4 |
H2O_cluster_status: | accepting new members, healthy |
H2O_connection_url: | http://127.0.0.1:54321 |
H2O_connection_proxy: | {"http": null, "https": null} |
H2O_internal_security: | False |
H2O_API_Extensions: | Amazon S3, XGBoost, Algos, AutoML, Core V3, TargetEncoder, Core V4 |
Python_version: | 3.6.9 final |
train = h2o.import_file( DATA_LOCATION + "processed/final.train.tsv")
train.head()
Parse progress: |██████████████████████████████████████████████████████████| 100%
SampleID | NC000962_3.22 | NC000962_3.434 | NC000962_3.524 | NC000962_3.645 | NC000962_3.648 | NC000962_3.654 | NC000962_3.666 | NC000962_3.675 | NC000962_3.678 | NC000962_3.693 | NC000962_3.698 | NC000962_3.699 | NC000962_3.702 | NC000962_3.705 | NC000962_3.708 | NC000962_3.717 | NC000962_3.729 | NC000962_3.741 | NC000962_3.744 | NC000962_3.747 | NC000962_3.750 | NC000962_3.756 | NC000962_3.770 | NC000962_3.777 | NC000962_3.780 | NC000962_3.783 | NC000962_3.793 | NC000962_3.795 | NC000962_3.799 | NC000962_3.801 | NC000962_3.805 | NC000962_3.822 | NC000962_3.840 | NC000962_3.846 | NC000962_3.849 | NC000962_3.852 | NC000962_3.1045 | NC000962_3.1049 | NC000962_3.1089 | NC000962_3.1123 | NC000962_3.1152 | NC000962_3.1153 | NC000962_3.1155 | NC000962_3.1161 | NC000962_3.1164 | NC000962_3.1166 | NC000962_3.1167 | NC000962_3.1176 | NC000962_3.1206 | NC000962_3.1212 | NC000962_3.1255 | NC000962_3.1278 | NC000962_3.1291 | NC000962_3.1302 | NC000962_3.1326 | NC000962_3.1389 | NC000962_3.1399 | NC000962_3.1413 | NC000962_3.1416 | NC000962_3.1422 | NC000962_3.1429 | NC000962_3.1431 | NC000962_3.1432 | NC000962_3.1452 | NC000962_3.1458 | NC000962_3.1461 | NC000962_3.1470 | NC000962_3.1473 | NC000962_3.1474 | NC000962_3.1653 | NC000962_3.1676 | NC000962_3.1699 | NC000962_3.1703 | NC000962_3.1708 | NC000962_3.1718 | NC000962_3.1729 | NC000962_3.1771 | NC000962_3.1827 | NC000962_3.1849 | NC000962_3.1918 | NC000962_3.1977 | NC000962_3.2532 | NC000962_3.2745 | NC000962_3.3352 | NC000962_3.3446 | NC000962_3.4013 | NC000962_3.4086 | NC000962_3.4096 | NC000962_3.4119 | NC000962_3.4938 | NC000962_3.5075 | NC000962_3.5627 | NC000962_3.5782 | NC000962_3.5790 | NC000962_3.5791 | NC000962_3.5803 | NC000962_3.5807 | NC000962_3.5824 | NC000962_3.5839 | NC000962_3.5848 | NC000962_3.5856 | NC000962_3.5858 | NC000962_3.5860 | NC000962_3.5902 | NC000962_3.6003 | NC000962_3.6013 | NC000962_3.6112 | NC000962_3.6280 | NC000962_3.6286 | NC000962_3.6292 | NC000962_3.6307 | NC000962_3.6362 | NC000962_3.6382 | NC000962_3.6388 | NC000962_3.6403 | NC000962_3.6430 | NC000962_3.6436 | NC000962_3.6439 | NC000962_3.6445 | NC000962_3.6452 | NC000962_3.6453 | NC000962_3.6502 | NC000962_3.6508 | NC000962_3.6511 | NC000962_3.6515 | NC000962_3.6520 | NC000962_3.6535 | NC000962_3.6547 | NC000962_3.6550 | NC000962_3.6551 | NC000962_3.6553 | NC000962_3.6571 | NC000962_3.6575 | NC000962_3.6579 | NC000962_3.6586 | NC000962_3.6620 | NC000962_3.6638 | NC000962_3.6695 | NC000962_3.6735 | NC000962_3.6738 | NC000962_3.6742 | NC000962_3.6749 | NC000962_3.6750 | NC000962_3.6807 | NC000962_3.6878 | NC000962_3.6881 | NC000962_3.7058 | NC000962_3.7088 | NC000962_3.7170 | NC000962_3.7268 | NC000962_3.7355 | NC000962_3.7362 | NC000962_3.7496 | NC000962_3.7563 | NC000962_3.7564 | NC000962_3.7566 | NC000962_3.7567 | NC000962_3.7570 | NC000962_3.7572 | NC000962_3.7581 | NC000962_3.7582 | NC000962_3.7585 | NC000962_3.7607 | NC000962_3.7631 | NC000962_3.7637 | NC000962_3.7652 | NC000962_3.7658 | NC000962_3.7664 | NC000962_3.7683 | NC000962_3.7685 | NC000962_3.7694 | NC000962_3.7710 | NC000962_3.7712 | NC000962_3.7725 | NC000962_3.7728 | NC000962_3.7730 | NC000962_3.7892 | NC000962_3.8040 | NC000962_3.8164 | NC000962_3.8201 | NC000962_3.8434 | NC000962_3.8452 | NC000962_3.8519 | NC000962_3.8619 | NC000962_3.8624 | NC000962_3.9023 | NC000962_3.9032 | NC000962_3.9034 | NC000962_3.9050 | NC000962_3.9051 | NC000962_3.9113 | NC000962_3.9119 | NC000962_3.9134 | NC000962_3.9143 | NC000962_3.9145 | NC000962_3.9147 | NC000962_3.9153 | NC000962_3.9154 | NC000962_3.9155 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SRR10525336 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR10380004 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR6807701 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR11033700 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR1163101 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR7592336 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR1163415 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR6458388 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR5153333 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR5152963 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
test = h2o.import_file( DATA_LOCATION + "processed/final.test.tsv")
test.head()
Parse progress: |█████████████████████████████████████████████████████████| 100%
SampleID | NC000962_3.22 | NC000962_3.434 | NC000962_3.524 | NC000962_3.645 | NC000962_3.648 | NC000962_3.654 | NC000962_3.666 | NC000962_3.675 | NC000962_3.678 | NC000962_3.693 | NC000962_3.698 | NC000962_3.699 | NC000962_3.702 | NC000962_3.705 | NC000962_3.708 | NC000962_3.717 | NC000962_3.729 | NC000962_3.741 | NC000962_3.744 | NC000962_3.747 | NC000962_3.750 | NC000962_3.756 | NC000962_3.770 | NC000962_3.777 | NC000962_3.780 | NC000962_3.783 | NC000962_3.793 | NC000962_3.795 | NC000962_3.799 | NC000962_3.801 | NC000962_3.805 | NC000962_3.822 | NC000962_3.840 | NC000962_3.846 | NC000962_3.849 | NC000962_3.852 | NC000962_3.1045 | NC000962_3.1049 | NC000962_3.1089 | NC000962_3.1123 | NC000962_3.1152 | NC000962_3.1153 | NC000962_3.1155 | NC000962_3.1161 | NC000962_3.1164 | NC000962_3.1166 | NC000962_3.1167 | NC000962_3.1176 | NC000962_3.1206 | NC000962_3.1212 | NC000962_3.1255 | NC000962_3.1278 | NC000962_3.1291 | NC000962_3.1302 | NC000962_3.1326 | NC000962_3.1389 | NC000962_3.1399 | NC000962_3.1413 | NC000962_3.1416 | NC000962_3.1422 | NC000962_3.1429 | NC000962_3.1431 | NC000962_3.1432 | NC000962_3.1452 | NC000962_3.1458 | NC000962_3.1461 | NC000962_3.1470 | NC000962_3.1473 | NC000962_3.1474 | NC000962_3.1653 | NC000962_3.1676 | NC000962_3.1699 | NC000962_3.1703 | NC000962_3.1708 | NC000962_3.1718 | NC000962_3.1729 | NC000962_3.1771 | NC000962_3.1827 | NC000962_3.1849 | NC000962_3.1918 | NC000962_3.1977 | NC000962_3.2532 | NC000962_3.2745 | NC000962_3.3352 | NC000962_3.3446 | NC000962_3.4013 | NC000962_3.4086 | NC000962_3.4096 | NC000962_3.4119 | NC000962_3.4938 | NC000962_3.5075 | NC000962_3.5627 | NC000962_3.5782 | NC000962_3.5790 | NC000962_3.5791 | NC000962_3.5803 | NC000962_3.5807 | NC000962_3.5824 | NC000962_3.5839 | NC000962_3.5848 | NC000962_3.5856 | NC000962_3.5858 | NC000962_3.5860 | NC000962_3.5902 | NC000962_3.6003 | NC000962_3.6013 | NC000962_3.6112 | NC000962_3.6280 | NC000962_3.6286 | NC000962_3.6292 | NC000962_3.6307 | NC000962_3.6362 | NC000962_3.6382 | NC000962_3.6388 | NC000962_3.6403 | NC000962_3.6430 | NC000962_3.6436 | NC000962_3.6439 | NC000962_3.6445 | NC000962_3.6452 | NC000962_3.6453 | NC000962_3.6502 | NC000962_3.6508 | NC000962_3.6511 | NC000962_3.6515 | NC000962_3.6520 | NC000962_3.6535 | NC000962_3.6547 | NC000962_3.6550 | NC000962_3.6551 | NC000962_3.6553 | NC000962_3.6571 | NC000962_3.6575 | NC000962_3.6579 | NC000962_3.6586 | NC000962_3.6620 | NC000962_3.6638 | NC000962_3.6695 | NC000962_3.6735 | NC000962_3.6738 | NC000962_3.6742 | NC000962_3.6749 | NC000962_3.6750 | NC000962_3.6807 | NC000962_3.6878 | NC000962_3.6881 | NC000962_3.7058 | NC000962_3.7088 | NC000962_3.7170 | NC000962_3.7268 | NC000962_3.7355 | NC000962_3.7362 | NC000962_3.7496 | NC000962_3.7563 | NC000962_3.7564 | NC000962_3.7566 | NC000962_3.7567 | NC000962_3.7570 | NC000962_3.7572 | NC000962_3.7581 | NC000962_3.7582 | NC000962_3.7585 | NC000962_3.7607 | NC000962_3.7631 | NC000962_3.7637 | NC000962_3.7652 | NC000962_3.7658 | NC000962_3.7664 | NC000962_3.7683 | NC000962_3.7685 | NC000962_3.7694 | NC000962_3.7710 | NC000962_3.7712 | NC000962_3.7725 | NC000962_3.7728 | NC000962_3.7730 | NC000962_3.7892 | NC000962_3.8040 | NC000962_3.8164 | NC000962_3.8201 | NC000962_3.8434 | NC000962_3.8452 | NC000962_3.8519 | NC000962_3.8619 | NC000962_3.8624 | NC000962_3.9023 | NC000962_3.9032 | NC000962_3.9034 | NC000962_3.9050 | NC000962_3.9051 | NC000962_3.9113 | NC000962_3.9119 | NC000962_3.9134 | NC000962_3.9143 | NC000962_3.9145 | NC000962_3.9147 | NC000962_3.9153 | NC000962_3.9154 | NC000962_3.9155 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ERR3335735 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR8552929 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ERR067629 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ERR067714 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
SRR5065314 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ERR067659 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ERR067590 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ERR688027 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ERR3335727 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
ERR3335759 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
# Identify predictors and response
train_predictor_cols = train.columns
train_response_col = "Resistance_Status"
train_predictor_cols.remove('SampleID')
train_predictor_cols.remove(train_response_col)
print("train frame - predictor column: ", train_predictor_cols[0], train_predictor_cols[-1])
print("train frame - response column: ", train_response_col)
# Identify predictors and response
test_predictor_cols = test.columns
test_response_col = "Resistance_Status"
test_predictor_cols.remove('SampleID')
test_predictor_cols.remove(test_response_col)
print("test frame - predictor columns: ", test_predictor_cols[0], test_predictor_cols[-1])
print("test frame - response column: ", test_response_col)
train frame - predictor column: NC000962_3.22 NC000962_3.4411327 train frame - response column: Resistance_Status test frame - predictor columns: NC000962_3.22 NC000962_3.4411327 test frame - response column: Resistance_Status
# For binary classification, response should be a factor
train[train_response_col] = train[train_response_col].asfactor()
test[test_response_col] = test[test_response_col].asfactor()
# Number of CV folds (to generate level-one data for stacking)
nfolds = 5
MAX_GRID_MODELS = 10
x = train_predictor_cols
y = train_response_col
nb_grid = h2o.load_grid(MODELS_LOCATION + "FINAL/nb_grid/Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1")
glm_grid = h2o.load_grid(MODELS_LOCATION + "FINAL/glm_grid/Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1")
gbm_grid = h2o.load_grid(MODELS_LOCATION + "FINAL/gbm_grid/Grid_GBM_py_7_sid_9651_model_python_1604407520638_1")
xgb_grid = h2o.load_grid(MODELS_LOCATION + "FINAL/xgb_grid/Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1")
dl_grid = h2o.load_grid(MODELS_LOCATION + "FINAL/dl_grid/Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1")
drf_grid = h2o.load_grid(MODELS_LOCATION + "FINAL/drf_grid/Grid_DRF_py_1_sid_a778_model_python_1605298287301_1")
# Select best models from the grids based on performance on the test data
def best_model_from_grid (model_grid):
sorted_grid = model_grid.get_grid(sort_by='auc', decreasing=True)
for mdl in sorted_grid:
print("Modeld ID: ", mdl.model_id)
# print('Train data AUC: ', mdl.model_performance(train=True).auc()) # same result with model_performance()
print('Default Test data AUC: ', mdl.model_performance(valid=True).auc())
print('Default Test data AUCPR: ', mdl.model_performance(valid=True).aucpr())
print('Default Cross-validation AUC: ', mdl.model_performance(xval=True).auc())
print('Default Cross-validation AUCPR: ', mdl.model_performance(xval=True).aucpr())
print("\n--------------------\n")
print("\n@@@@@@@@@@@@@@@@@@@@@@@\n")
return sorted_grid[0]
best_nb_model = best_model_from_grid(nb_grid)
best_glm_model = best_model_from_grid(glm_grid)
best_gbm_model = best_model_from_grid(gbm_grid)
best_xgb_model= best_model_from_grid(xgb_grid)
best_drf_model= best_model_from_grid(drf_grid)
best_dl_model= best_model_from_grid(dl_grid)
Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_2 Default Test data AUC: 0.6037862562166479 Default Test data AUCPR: 0.7338591199790186 Default Cross-validation AUC: 0.7146276595744682 Default Cross-validation AUCPR: 0.7825978708385057 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_8 Default Test data AUC: 0.5874142374850255 Default Test data AUCPR: 0.7206948614316347 Default Cross-validation AUC: 0.7076684397163121 Default Cross-validation AUCPR: 0.7874589576147568 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_4 Default Test data AUC: 0.6308581696736486 Default Test data AUCPR: 0.7700108089513915 Default Cross-validation AUC: 0.6892390661938534 Default Cross-validation AUCPR: 0.7895239621957413 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_7 Default Test data AUC: 0.5496424293026464 Default Test data AUCPR: 0.6961915462338801 Default Cross-validation AUC: 0.6501359338061465 Default Cross-validation AUCPR: 0.7671060162658365 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_9 Default Test data AUC: 0.5455403492213308 Default Test data AUCPR: 0.7034848829829791 Default Cross-validation AUC: 0.5885195035460993 Default Cross-validation AUCPR: 0.7252646916074332 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_6 Default Test data AUC: 0.5057901041855738 Default Test data AUCPR: 0.6863806675631351 Default Cross-validation AUC: 0.5441725768321513 Default Cross-validation AUCPR: 0.71174751299447 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_1 Default Test data AUC: 0.5 Default Test data AUCPR: 0.6746506986027944 Default Cross-validation AUC: 0.49874999999999997 Default Cross-validation AUCPR: 0.6535633621201896 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_5 Default Test data AUC: 0.49419174501760627 Default Test data AUCPR: 0.672087836940381 Default Cross-validation AUC: 0.49437499999999995 Default Cross-validation AUCPR: 0.6515795328002594 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_10 Default Test data AUC: 0.5 Default Test data AUCPR: 0.6746506986027944 Default Cross-validation AUC: 0.4831131796690307 Default Cross-validation AUCPR: 0.641963388575653 -------------------- Modeld ID: Grid_NaiveBayes_py_3_sid_9644_model_python_1604419067081_1_model_3 Default Test data AUC: 0.5 Default Test data AUCPR: 0.6746506986027944 Default Cross-validation AUC: 0.4831131796690307 Default Cross-validation AUCPR: 0.641963388575653 -------------------- @@@@@@@@@@@@@@@@@@@@@@@ Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_6 Default Test data AUC: 0.7188169310632736 Default Test data AUCPR: 0.8666210752127083 Default Cross-validation AUC: 0.9118498817966902 Default Cross-validation AUCPR: 0.9516216504562457 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_10 Default Test data AUC: 0.7198152248883726 Default Test data AUCPR: 0.8670408051456762 Default Cross-validation AUC: 0.9116903073286051 Default Cross-validation AUCPR: 0.9512677662998757 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_7 Default Test data AUC: 0.7198152248883726 Default Test data AUCPR: 0.8670408051456762 Default Cross-validation AUC: 0.9116903073286051 Default Cross-validation AUCPR: 0.9512677662998757 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_1 Default Test data AUC: 0.7338639416270374 Default Test data AUCPR: 0.8728658954340602 Default Cross-validation AUC: 0.9107136524822695 Default Cross-validation AUCPR: 0.9493837481679572 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_2 Default Test data AUC: 0.7338639416270374 Default Test data AUCPR: 0.8728658954340602 Default Cross-validation AUC: 0.9107136524822695 Default Cross-validation AUCPR: 0.9493837481679572 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_3 Default Test data AUC: 0.7338639416270374 Default Test data AUCPR: 0.8728658954340602 Default Cross-validation AUC: 0.9107136524822695 Default Cross-validation AUCPR: 0.9493837481679572 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_4 Default Test data AUC: 0.7338639416270374 Default Test data AUCPR: 0.8728658954340602 Default Cross-validation AUC: 0.9107136524822695 Default Cross-validation AUCPR: 0.9493837481679572 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_5 Default Test data AUC: 0.7338639416270374 Default Test data AUCPR: 0.8728658954340602 Default Cross-validation AUC: 0.9107136524822695 Default Cross-validation AUCPR: 0.9493837481679572 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_9 Default Test data AUC: 0.5604784550041746 Default Test data AUCPR: 0.7175657959955974 Default Cross-validation AUC: 0.9072089243498818 Default Cross-validation AUCPR: 0.9397585792621111 -------------------- Modeld ID: Grid_GLM_py_3_sid_b7a1_model_python_1604419221083_1_model_8 Default Test data AUC: 0.7334101717065379 Default Test data AUCPR: 0.8683588311840348 Default Cross-validation AUC: 0.9064627659574468 Default Cross-validation AUCPR: 0.9441750865491364 -------------------- @@@@@@@@@@@@@@@@@@@@@@@ Modeld ID: Grid_GBM_py_7_sid_9651_model_python_1604407520638_1_model_7 Default Test data AUC: 0.684212436925981 Default Test data AUCPR: 0.8280141965080744 Default Cross-validation AUC: 0.9054151891252956 Default Cross-validation AUCPR: 0.9453339359832847 -------------------- Modeld ID: Grid_GBM_py_7_sid_9651_model_python_1604407520638_1_model_5 Default Test data AUC: 0.6715885577376847 Default Test data AUCPR: 0.826548720005301 Default Cross-validation AUC: 0.8784219858156028 Default Cross-validation AUCPR: 0.9223871766728456 -------------------- Modeld ID: Grid_GBM_py_7_sid_9651_model_python_1604407520638_1_model_3 Default Test data AUC: 0.6682941881148583 Default Test data AUCPR: 0.8145884494739706 Default Cross-validation AUC: 0.8664420803782505 Default Cross-validation AUCPR: 0.9058203885907979 -------------------- Modeld ID: Grid_GBM_py_7_sid_9651_model_python_1604407520638_1_model_10 Default Test data AUC: 0.5869604675645261 Default Test data AUCPR: 0.7804763126501624 Default Cross-validation AUC: 0.8242966903073287 Default Cross-validation AUCPR: 0.8877821456396922 -------------------- Modeld ID: Grid_GBM_py_7_sid_9651_model_python_1604407520638_1_model_1 Default Test data AUC: 0.5575834029113879 Default Test data AUCPR: 0.7210722379282546 Default Cross-validation AUC: 0.7838489952718677 Default Cross-validation AUCPR: 0.8605782999012571 -------------------- Modeld ID: Grid_GBM_py_7_sid_9651_model_python_1604407520638_1_model_9 Default Test data AUC: 0.5702072820996842 Default Test data AUCPR: 0.7400116656909861 Default Cross-validation AUC: 0.7267109929078014 Default Cross-validation AUCPR: 0.8324621340896753 -------------------- @@@@@@@@@@@@@@@@@@@@@@@ Modeld ID: Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1_model_5 Default Test data AUC: 0.7019820670127418 Default Test data AUCPR: 0.8496929083542203 Default Cross-validation AUC: 0.9238918439716312 Default Cross-validation AUCPR: 0.9589354885090616 -------------------- Modeld ID: Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1_model_4 Default Test data AUC: 0.7161487639307366 Default Test data AUCPR: 0.8582261180336427 Default Cross-validation AUC: 0.9155112293144209 Default Cross-validation AUCPR: 0.9525693950745017 -------------------- Modeld ID: Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1_model_8 Default Test data AUC: 0.5577013830907177 Default Test data AUCPR: 0.7438412313740578 Default Cross-validation AUC: 0.8589775413711584 Default Cross-validation AUCPR: 0.9150906428929088 -------------------- Modeld ID: Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1_model_10 Default Test data AUC: 0.6182978182742223 Default Test data AUCPR: 0.7647172827791048 Default Cross-validation AUC: 0.8099586288416076 Default Cross-validation AUCPR: 0.8750487809308819 -------------------- Modeld ID: Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1_model_9 Default Test data AUC: 0.6161378734526446 Default Test data AUCPR: 0.7634064859407909 Default Cross-validation AUC: 0.8086805555555555 Default Cross-validation AUCPR: 0.8744998950155298 -------------------- Modeld ID: Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1_model_1 Default Test data AUC: 0.6169728101063637 Default Test data AUCPR: 0.7630921376254759 Default Cross-validation AUC: 0.8085549645390071 Default Cross-validation AUCPR: 0.8744937286734646 -------------------- Modeld ID: Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1_model_3 Default Test data AUC: 0.5011253494028387 Default Test data AUCPR: 0.6502733284764274 Default Cross-validation AUC: 0.7754949763593381 Default Cross-validation AUCPR: 0.8531381544915924 -------------------- Modeld ID: Grid_XGBoost_py_7_sid_a3b5_model_python_1604427337744_1_model_7 Default Test data AUC: 0.5 Default Test data AUCPR: 0.6746506986027944 Default Cross-validation AUC: 0.4831131796690307 Default Cross-validation AUCPR: 0.641963388575653 -------------------- @@@@@@@@@@@@@@@@@@@@@@@ Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_6 Default Test data AUC: 0.6008821287254511 Default Test data AUCPR: 0.7841285482627114 Default Cross-validation AUC: 0.9106057919621748 Default Cross-validation AUCPR: 0.9483337168894878 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_1 Default Test data AUC: 0.6068083638871747 Default Test data AUCPR: 0.7753372363755578 Default Cross-validation AUC: 0.8768321513002364 Default Cross-validation AUCPR: 0.9259769128578671 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_2 Default Test data AUC: 0.6078520347043235 Default Test data AUCPR: 0.7915193475796458 Default Cross-validation AUC: 0.874878841607565 Default Cross-validation AUCPR: 0.9280168513518671 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_5 Default Test data AUC: 0.6279812683776818 Default Test data AUCPR: 0.7820136792189998 Default Cross-validation AUC: 0.8741060874704492 Default Cross-validation AUCPR: 0.9305761398103337 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_9 Default Test data AUC: 0.6183522706646822 Default Test data AUCPR: 0.7756329737679835 Default Cross-validation AUC: 0.8613549054373522 Default Cross-validation AUCPR: 0.9169181801730608 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_7 Default Test data AUC: 0.6362761825244128 Default Test data AUCPR: 0.787957081209626 Default Cross-validation AUC: 0.8424748817966903 Default Cross-validation AUCPR: 0.904943934166661 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_8 Default Test data AUC: 0.6227901404871673 Default Test data AUCPR: 0.750315464411094 Default Cross-validation AUC: 0.8207432033096926 Default Cross-validation AUCPR: 0.8915766929798337 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_3 Default Test data AUC: 0.5929320797182996 Default Test data AUCPR: 0.7554018678680515 Default Cross-validation AUC: 0.7799689716312057 Default Cross-validation AUCPR: 0.8558810702598428 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_4 Default Test data AUC: 0.6297418956692199 Default Test data AUCPR: 0.7428183485238725 Default Cross-validation AUC: 0.7753782505910166 Default Cross-validation AUCPR: 0.8512090422804438 -------------------- Modeld ID: Grid_DRF_py_1_sid_a778_model_python_1605298287301_1_model_10 Default Test data AUC: 0.6051566413765564 Default Test data AUCPR: 0.7241147449125007 Default Cross-validation AUC: 0.7753073286052009 Default Cross-validation AUCPR: 0.8549228100389666 -------------------- @@@@@@@@@@@@@@@@@@@@@@@ Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_9 Default Test data AUC: 0.578656478019385 Default Test data AUCPR: 0.7323312602617683 Default Cross-validation AUC: 0.8896705082742317 Default Cross-validation AUCPR: 0.9196671289329701 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_7 Default Test data AUC: 0.6661614694885105 Default Test data AUCPR: 0.8219899274397526 Default Cross-validation AUC: 0.8847000591016548 Default Cross-validation AUCPR: 0.9315874078345767 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_5 Default Test data AUC: 0.6653991360220713 Default Test data AUCPR: 0.810199555986887 Default Cross-validation AUC: 0.8656663711583924 Default Cross-validation AUCPR: 0.9155166852905657 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_1 Default Test data AUC: 0.6338258249537154 Default Test data AUCPR: 0.7872361184530449 Default Cross-validation AUC: 0.861455378250591 Default Cross-validation AUCPR: 0.9098486749791119 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_4 Default Test data AUC: 0.6324282135985769 Default Test data AUCPR: 0.7574548130868846 Default Cross-validation AUC: 0.832826536643026 Default Cross-validation AUCPR: 0.8920855038102905 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_6 Default Test data AUC: 0.5956819254365266 Default Test data AUCPR: 0.7460317227447683 Default Cross-validation AUC: 0.8109057328605201 Default Cross-validation AUCPR: 0.8676611089744922 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_3 Default Test data AUC: 0.6207481758449196 Default Test data AUCPR: 0.7623634420995526 Default Cross-validation AUC: 0.7828088061465721 Default Cross-validation AUCPR: 0.8563348923043178 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_8 Default Test data AUC: 0.566658801321378 Default Test data AUCPR: 0.7176973818771828 Default Cross-validation AUC: 0.7075812647754137 Default Cross-validation AUCPR: 0.7794225121429629 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_10 Default Test data AUC: 0.5873960866882056 Default Test data AUCPR: 0.7348146207458401 Default Cross-validation AUC: 0.559524231678487 Default Cross-validation AUCPR: 0.7189255150153914 -------------------- Modeld ID: Grid_DeepLearning_py_1_sid_8226_model_python_1605416513239_1_model_2 Default Test data AUC: 0.5 Default Test data AUCPR: 0.6746506986027944 Default Cross-validation AUC: 0.5062573877068558 Default Cross-validation AUCPR: 0.6547294689180135 -------------------- @@@@@@@@@@@@@@@@@@@@@@@
def extract_params_from_model(actual_params_dict, extra_params = [], additional_keys = {}):
final_params = actual_params_dict
columns_to_be_removed = [
'model_id',
'validation_frame',
'response_column',
'ignored_columns',
'training_frame',
*extra_params
]
for col_name in columns_to_be_removed:
del final_params[col_name]
return {**final_params, **additional_keys}
from h2o.estimators import H2ONaiveBayesEstimator
base_nb= h2o.load_model(MODELS_LOCATION + "FINAL/top_nb/NaiveBayes_model_python_1605423034668_1")
# base_nb = best_nb_model
top_nb = H2ONaiveBayesEstimator(**extract_params_from_model(base_nb.actual_params))
top_nb.train(x=x, y=y, training_frame=train, validation_frame=test)
# h2o.save_model(top_nb, MODELS_LOCATION + "FINAL/top_nb")
test_perf = top_nb.model_performance(valid=True)
print('AUC on test data: ', test_perf.auc(), "\n\n============================")
test_perf
from h2o.estimators import H2OGeneralizedLinearEstimator
base_glm= h2o.load_model(MODELS_LOCATION + "FINAL/top_glm/GLM_model_python_1605423034668_20")
# base_glm = best_glm_model
top_glm = H2OGeneralizedLinearEstimator(**extract_params_from_model(base_glm.actual_params, ['lambda']))
top_glm.train(x=x, y=y, training_frame=train, validation_frame=test)
# h2o.save_model(top_glm, MODELS_LOCATION + "FINAL/top_glm")
test_perf = top_glm.model_performance(valid=True)
print('AUC on test data: ', test_perf.auc(), "\n\n============================")
test_perf
from h2o.estimators import H2OGradientBoostingEstimator
base_gbm= h2o.load_model(MODELS_LOCATION + "FINAL/top_gbm/GBM_model_python_1605423034668_39")
# base_gbm = best_gbm_model
top_gbm = H2OGradientBoostingEstimator(**extract_params_from_model(base_gbm.actual_params))
top_gbm.train(x=x, y=y, training_frame=train, validation_frame=test)
# h2o.save_model(top_gbm, MODELS_LOCATION + "FINAL/top_gbm")
test_perf = top_gbm.model_performance(valid=True)
print('AUC on test data: ', test_perf.auc(), "\n\n============================")
test_perf
from h2o.estimators import H2OXGBoostEstimator
base_xgb = h2o.load_model(MODELS_LOCATION + "FINAL/top_xgb/XGBoost_model_python_1605423034668_274")
# base_xgb = best_xgb_model
top_xgb = H2OXGBoostEstimator(**extract_params_from_model(base_xgb.actual_params))
top_xgb.train(x=x, y=y, training_frame=train, validation_frame=test)
# h2o.save_model(top_xgb, MODELS_LOCATION + "FINAL/top_xgb")
test_perf = top_xgb.model_performance(valid=True)
print('AUC on test data: ', test_perf.auc(), "\n\n============================")
test_perf
from h2o.estimators import H2ODeepLearningEstimator
# base_dl = best_dl_model
base_dl = h2o.load_model(MODELS_LOCATION + "FINAL/top_dl/DeepLearning_model_python_1605423034668_341")
top_dl = H2ODeepLearningEstimator(**extract_params_from_model(base_dl.actual_params))
top_dl.train(x=x, y=y, training_frame=train, validation_frame=test)
# h2o.save_model(top_dl, MODELS_LOCATION + "FINAL/top_dl")
test_perf = top_dl.model_performance(valid=True)
print('AUC on test data: ', test_perf.auc(), "\n\n============================")
test_perf
deeplearning Model Build progress: |██████████████████████████
from h2o.estimators import H2ORandomForestEstimator
base_drf = h2o.load_model(MODELS_LOCATION + "ALL_FEATURES/FINAL/top_drf/DRF_model_python_1605423034668_386")
# base_drf = best_drf_model
top_drf = H2ORandomForestEstimator(**extract_params_from_model(base_drf.actual_params,
extra_params=['weights_column']))
top_drf.train(x=x, y=y, training_frame=train, validation_frame=test)
# h2o.save_model(top_drf, MODELS_LOCATION + "FINAL/top_drf")
test_perf = top_drf.model_performance(valid=True)
print('AUC on test data: ', test_perf.auc(), "\n\n============================")
test_perf
drf Model Build progress: |██████████████████████████████████████████████| 100% AUC on test data: 0.6008821287254511 ============================ ModelMetricsBinomial: drf ** Reported on validation data. ** MSE: 0.22893121618870146 RMSE: 0.4784675706761133 LogLoss: 0.6453296660547845 Mean Per-Class Error: 0.3933186916905652 AUC: 0.6008821287254511 AUCPR: 0.7841285482627114 Gini: 0.2017642574509022 Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.1517463897101641:
0 | 1 | Error | Rate | ||
---|---|---|---|---|---|
0 | 0 | 4.0 | 159.0 | 0.9755 | (159.0/163.0) |
1 | 1 | 2.0 | 336.0 | 0.0059 | (2.0/338.0) |
2 | Total | 6.0 | 495.0 | 0.3214 | (161.0/501.0) |
Maximum Metrics: Maximum metrics at their respective thresholds
metric | threshold | value | idx | |
---|---|---|---|---|
0 | max f1 | 0.151746 | 0.806723 | 393.0 |
1 | max f2 | 0.084360 | 0.912527 | 398.0 |
2 | max f0point5 | 0.435588 | 0.738355 | 326.0 |
3 | max accuracy | 0.322021 | 0.680639 | 355.0 |
4 | max precision | 0.997516 | 1.000000 | 0.0 |
5 | max recall | 0.084360 | 1.000000 | 398.0 |
6 | max specificity | 0.997516 | 1.000000 | 0.0 |
7 | max absolute_mcc | 0.803863 | 0.242278 | 84.0 |
8 | max min_per_class_accuracy | 0.621817 | 0.529586 | 194.0 |
9 | max mean_per_class_accuracy | 0.803863 | 0.606681 | 84.0 |
10 | max tns | 0.997516 | 163.000000 | 0.0 |
11 | max fns | 0.997516 | 337.000000 | 0.0 |
12 | max fps | 0.076816 | 163.000000 | 399.0 |
13 | max tps | 0.084360 | 338.000000 | 398.0 |
14 | max tnr | 0.997516 | 1.000000 | 0.0 |
15 | max fnr | 0.997516 | 0.997041 | 0.0 |
16 | max fpr | 0.076816 | 1.000000 | 399.0 |
17 | max tpr | 0.084360 | 1.000000 | 398.0 |
Gains/Lift Table: Avg response rate: 67.47 %, avg score: 62.33 %
group | cumulative_data_fraction | lower_threshold | lift | cumulative_lift | response_rate | score | cumulative_response_rate | cumulative_score | capture_rate | cumulative_capture_rate | gain | cumulative_gain | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.011976 | 0.996163 | 1.482249 | 1.482249 | 1.00 | 0.996860 | 1.000000 | 0.996860 | 0.017751 | 0.017751 | 48.224852 | 48.224852 | |
1 | 2 | 0.021956 | 0.986897 | 1.482249 | 1.482249 | 1.00 | 0.989985 | 1.000000 | 0.993735 | 0.014793 | 0.032544 | 48.224852 | 48.224852 | |
2 | 3 | 0.031936 | 0.975071 | 1.482249 | 1.482249 | 1.00 | 0.980056 | 1.000000 | 0.989460 | 0.014793 | 0.047337 | 48.224852 | 48.224852 | |
3 | 4 | 0.041916 | 0.947489 | 1.482249 | 1.482249 | 1.00 | 0.957667 | 1.000000 | 0.981890 | 0.014793 | 0.062130 | 48.224852 | 48.224852 | |
4 | 5 | 0.051896 | 0.925490 | 1.482249 | 1.482249 | 1.00 | 0.934246 | 1.000000 | 0.972728 | 0.014793 | 0.076923 | 48.224852 | 48.224852 | |
5 | 6 | 0.101796 | 0.876634 | 1.245089 | 1.365994 | 0.84 | 0.893317 | 0.921569 | 0.933801 | 0.062130 | 0.139053 | 24.508876 | 36.599373 | |
6 | 7 | 0.151697 | 0.849182 | 1.363669 | 1.365229 | 0.92 | 0.860490 | 0.921053 | 0.909686 | 0.068047 | 0.207101 | 36.366864 | 36.522890 | |
7 | 8 | 0.201597 | 0.818601 | 1.185799 | 1.320816 | 0.80 | 0.835263 | 0.891089 | 0.891264 | 0.059172 | 0.266272 | 18.579882 | 32.081551 | |
8 | 9 | 0.301397 | 0.746936 | 0.800414 | 1.148497 | 0.54 | 0.778456 | 0.774834 | 0.853910 | 0.079882 | 0.346154 | -19.958580 | 14.849720 | |
9 | 10 | 0.401198 | 0.692021 | 0.948639 | 1.098781 | 0.64 | 0.720675 | 0.741294 | 0.820767 | 0.094675 | 0.440828 | -5.136095 | 9.878124 | |
10 | 11 | 0.500998 | 0.630109 | 0.800414 | 1.039346 | 0.54 | 0.659035 | 0.701195 | 0.788550 | 0.079882 | 0.520710 | -19.958580 | 3.934558 | |
11 | 12 | 0.600798 | 0.582242 | 1.007929 | 1.034127 | 0.68 | 0.602314 | 0.697674 | 0.757614 | 0.100592 | 0.621302 | 0.792899 | 3.412687 | |
12 | 13 | 0.700599 | 0.535456 | 1.096864 | 1.043064 | 0.74 | 0.560335 | 0.703704 | 0.729511 | 0.109467 | 0.730769 | 9.686391 | 4.306377 | |
13 | 14 | 0.800399 | 0.459822 | 1.037574 | 1.042379 | 0.70 | 0.499175 | 0.703242 | 0.700791 | 0.103550 | 0.834320 | 3.757396 | 4.237926 | |
14 | 15 | 0.900200 | 0.331218 | 0.948639 | 1.031987 | 0.64 | 0.403983 | 0.696231 | 0.667885 | 0.094675 | 0.928994 | -5.136095 | 3.198677 | |
15 | 16 | 1.000000 | 0.076816 | 0.711479 | 1.000000 | 0.48 | 0.221345 | 0.674651 | 0.623320 | 0.071006 | 1.000000 | -28.852071 | 0.000000 |
all_model_hyperparams = {
'naivebayes' : {
'pca': {
'laplace': 0.6,
'min_sdev': 0.1,
'min_prob': 0.1,
'eps_sdev': 0.1,
'eps_prob': 0.3,
},
'non_pca': {
'laplace': 0.3,
'min_sdev': 0.9,
'min_prob': 0.1,
'eps_sdev': 1,
'eps_prob': 0.1,
}
},
'glm' : {
'pca': {
'alpha': [
0.0
],
'theta': 1,
'tweedie_link_power': 0,
'tweedie_variance_power': 3,
},
'non_pca': {
'alpha': [
1.0
],
'theta': 0.3,
'tweedie_link_power': 0,
'tweedie_variance_power': 9,
}
},
'gbm' : {
'pca': {
'learn_rate': 0.9,
'learn_rate_annealing': 1,
'distribution': 'bernoulli',
'quantile_alpha': 0.3,
'tweedie_power': 1.5,
'balance_classes': False,
'ntrees': 150,
'max_depth': 10,
'sample_rate': 0.9,
'col_sample_rate': 0.3,
'col_sample_rate_per_tree': 1,
'col_sample_rate_change_per_level': 1.3,
'histogram_type': 'RoundRobin',
},
'non_pca': {
'learn_rate': 0.1,
'learn_rate_annealing': 0.9,
'distribution': 'bernoulli',
'quantile_alpha': 1,
'tweedie_power': 1.9,
'balance_classes': False,
'ntrees': 50,
'max_depth': 5,
'sample_rate': 0.9,
'col_sample_rate': 0.3,
'col_sample_rate_per_tree': 0.6,
'col_sample_rate_change_per_level': 0.8,
'histogram_type': 'Random',
}
},
'drf' : {
'pca': {
# 'mtries': 150, # doesn't work for some reason
'balance_classes': True,
'ntrees': 100,
'max_depth': 10,
'sample_rate': 0.6,
'col_sample_rate_per_tree': 0.3,
'col_sample_rate_change_per_level': 0.8,
'histogram_type': 'Auto',
},
'non_pca': {
'mtries': -1,
'balance_classes': True,
'ntrees': 50,
'max_depth': 10,
'sample_rate': 0.3,
'col_sample_rate_per_tree': 0.6,
'col_sample_rate_change_per_level': 1.7,
'histogram_type': 'RoundRobin',
}
},
'xgboost' : {
'pca': {
'distribution': 'multinomial',
'categorical_encoding': 'auto',
'ntrees': 70,
'booster': 'gbtree',
'col_sample_rate': 0.6,
'col_sample_rate_bylevel': 0.6,
'col_sample_rate_bytree': 0.6,
'learn_rate': 0.1,
'grow_policy': 'lossguide',
'max_depth': 6,
'normalize_type': 'forest',
'sample_type': 'uniform',
'sample_rate': 1,
'tree_method': 'hist',
'tweedie_power': 1.5,
},
'non_pca': {
'distribution': 'bernoulli',
'categorical_encoding': 'label_encoder',
'ntrees': 50,
'booster': 'dart',
'col_sample_rate': 0.8,
'col_sample_rate_bylevel': 0.8,
'col_sample_rate_bytree': 0.3,
'learn_rate': 0.1,
'grow_policy': 'depthwise',
'max_depth': 6,
'normalize_type': 'forest',
'sample_type': 'weighted',
'sample_rate': 1,
'tree_method': 'hist',
'tweedie_power': 1.5,
}
},
'deeplearning' : {
'pca': {
'distribution': 'bernoulli',
'epochs': 20.399,
'loss': 'CrossEntropy',
'l1': 1e-5,
'l2': 0,
'sparse': False,
'balance_classes': False,
'average_activation': 10,
'activation': 'TanH',
'hidden': [
500,
500,
500
],
'input_dropout_ratio': 0.2,
'rho': 0.95,
'standardize': False,
},
'non_pca': {
'distribution': 'bernoulli',
'epochs': 31.1822,
'loss': 'Automatic',
'l1': 0,
'l2': 0,
'sparse': False,
'balance_classes': False,
'average_activation': 0,
'activation': 'RectifierWithDropout',
'hidden': [
500,
500,
500
],
'input_dropout_ratio': 0,
'rho': 0.9,
'standardize': True,
}
},
}
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
collection_of_models = [
top_nb,
top_glm,
# checkpoint-enabled models
top_gbm, # based on boosting, like XGB
top_xgb,
top_dl,
top_drf]
meta_algos = ["xgboost", "drf", "gbm", "glm", "naivebayes", "deeplearning"]
all_models_ensembles_list = []
for metalearner in meta_algos:
print("\n\n>>>>> ", metalearner, " <<<<<<")
if metalearner == 'xgboost' or metalearner == 'naivebayes':
ensemble = H2OStackedEnsembleEstimator(
base_models= collection_of_models,
model_id= "stacked_ensemble_ALL_FEATURES_ALL_MODELS_metalearner_" + metalearner,
metalearner_algorithm= metalearner,
# metalearner_params = all_model_hyperparams[metalearner]['non_pca'],
metalearner_nfolds = 5,
metalearner_fold_assignment = 'random',
seed=1234
)
else:
ensemble = H2OStackedEnsembleEstimator(
base_models= collection_of_models,
model_id= "stacked_ensemble_ALL_FEATURES_ALL_MODELS_metalearner_" + metalearner,
metalearner_algorithm= metalearner,
metalearner_params = all_model_hyperparams[metalearner]['non_pca'],
metalearner_nfolds = 5,
metalearner_fold_assignment = 'random',
seed=1234
)
ensemble.train(x=x, y=y, training_frame=train, validation_frame=test)
h2o.save_model(ensemble, MODELS_LOCATION + "FINAL/top_ensemble_ALL_MODELS_METALEARNER_" + metalearner)
print("AUC on test data: ", ensemble.model_performance(valid=True).auc())
all_models_ensembles_list.append(ensemble)
File "<tokenize>", line 21 if metalearner == 'xgboost' or metalearner == 'naivebayes': ^ IndentationError: unindent does not match any outer indentation level
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
collection_of_models = [
top_gbm, # based on boosting, like XGB
top_xgb,
top_dl,
top_drf]
meta_algos = [ "xgboost", "drf", "gbm", "glm", "naivebayes", "deeplearning"]
checkpoint_ensembles_list = []
for metalearner in meta_algos:
print("\n\n>>>>> ", metalearn " <<<<<<")
if metalearner == 'xgboost' or metalearner = vebayes':
ensemble = H2OStackedEnsembleEstimator(
base_models= collection_of_models,
model_id= "stacked_ensemble_ALL_FEATURES_CHECKPOINT_MODELS_metalearner_" + metalearner,
metalearner_algorithm= metalearner,
# metalearner_params = all_model_hyperparams[metalearner]['non_pca'],
metalearner_nfolds = 5,
metalearner_fold_assignment = 'random',
seed=1234
)
else:
ensemble = H2OStackedEnsembleEstimator(
base_models= collection_of_models,
model_id= "stacked_ensemble_ALL_FEATURES_CHECKPOINT_MODELS_metalearner_" + metalearner,
metalearner_algorithm= metalearner,
metalearner_params = all_model_hyperparams[metalearner]['non_pca'],
metalearner_nfolds = 5,
metalearner_fold_assignment = 'random',
seed=1234
)
ensemble.train(x=x, y=y, training_frame=train, validation_frame=test)
h2o.save_model(ensemble, MODELS_LOCATION + "FINAL/top_ensemble_CHECKPOINT_MODELS_METALEARNER_" + metalearner)
print("AUC on test data: ", ensemble.model_performance(valid=True).auc())
checkpoint_ensembles_list.append(ensemble)
>>>>> auto <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.65737648382764 >>>>> xgboost <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6332994518459361 >>>>> drf <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6460866882056122 >>>>> gbm <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6414400842196972 >>>>> glm <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6688205612226377 >>>>> naivebayes <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6671506879151995 >>>>> deeplearning <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6729226413039533
from h2o.estimators.stackedensemble import H2OStackedEnsembleEstimator
collection_of_models = [
top_xgb,
top_dl,
top_drf]
meta_algos = ["xgboost", "drf", "gbm", "glm", "naivebayes", "deeplearning"]
min_checkpointable_ensemble_list = []
for metalearner in meta_algos:
print("\n\n>>>>> ", metalearner <<<<<<")
if metalearner == 'xgboost' or metalearner == ebayes':
ensemble = H2OStackedEnsembleEstimator(
base_models= collection_of_models,
model_id= "stacked_ensemble_ALL_FEATURES_CHECKPOINT_nogbm_MODELS_metalearner_" + metalearner,
metalearner_algorithm= metalearner,
# metalearner_params = all_model_hyperparams[metalearner]['non_pca'],
metalearner_nfolds = 5,
metalearner_fold_assignment = 'random',
seed=1234
)
else:
ensemble = H2OStackedEnsembleEstimator(
base_models= collection_of_models,
model_id= "stacked_ensemble_ALL_FEATURES_CHECKPOINT_nogbm_MODELS_metalearner_" + metalearner,
metalearner_algorithm= metalearner,
metalearner_params = all_model_hyperparams[metalearner]['non_pca'],
metalearner_nfolds = 5,
metalearner_fold_assignment = 'random',
seed=1234
)
ensemble.train(x=x, y=y, training_frame=train, validation_frame=test)
h2o.save_model(ensemble, MODELS_LOCATION + "FINAL/top_ensemble_CHECKPOINT_nogbm_MODELS_METALEARNER_" + metalearner)
print("AUC on test data: ", ensemble.model_performance(valid=True).auc())
min_checkpointable_ensemble_list.append(ensemble)
>>>>> auto <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.65737648382764 >>>>> xgboost <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6418757033433767 >>>>> drf <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6575035394053799 >>>>> gbm <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6681580571387085 >>>>> glm <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6883871201945765 >>>>> naivebayes <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6510690819326969 >>>>> deeplearning <<<<<< stackedensemble Model Build progress: |███████████████████████████████████| 100% AUC on test data: 0.6737575779576724
for a_mdl in no_gbm_meta_ensemble_list:
xval_perf = a_mdl.model_performance(xval=True)
valid_perf = a_mdl.model_performance(valid=True)
print('Model ID: ', a_mdl.model_id)
print('Training time (ms): ', a_mdl.run_time)
print('XVal AUC: ', xval_perf.auc())
print('XVal Accuracy: ', xval_perf.accuracy()[0][1])
print('Validation data AUC: ', valid_perf.auc())
print('Validation data Accuracy: ', valid_perf.accuracy()[0][1])
print("-----------------------------")
mdl = no_gbm_meta_ensemble_list[5]
print('Model ID: ', mdl.model_id)
mdl.model_performance(xval=True).plot()
mdl.model_performance(valid=True).plot()