#!/usr/bin/env python
from __future__ import print_function

import collections
import json
import numpy as np
import os
import re
import sys


def get_dirs(regex, path="."):
    dirs = []
    r = re.compile(regex)
    for item in os.listdir(path):
        path_item = os.path.join(path, item)
        if r.match(path_item):
            dirs.append(path_item)
    return dirs

def get_universe_from_callstring(s):
    anker = "FixedWorlds/opt/"
    idx = s.find(anker)
    if idx == -1:
        assert False
    idx += len(anker)
    idx_end = idx
    for i in range(2):
        idx_end1 = s.find("/", idx_end)
        idx_end2 = s.find(" ", idx_end)
        idx_end = min([idx_end1, idx_end2]) + 1
        if idx_end == -1:
            assert False
    return s[idx: idx_end - 1]


DATA_SET_SIZES = u"data_set_sizes"
TRAIN_SET = "train"
TEST_SET = "test"
VALIDATION_SET = "validation"
SET_TYPES = [TRAIN_SET, VALIDATION_SET, TEST_SET]

CALLSTRING = "call_string"
def get_dir_data(path):
    path_stats = os.path.join(path, "job_stats.json")
    if not os.path.isfile(path_stats):
        return
    with open(path_stats, "r") as f:
        all_stats = json.load(f)

    # {universe: {set type: [size1, size2, ...]}}
    def factory():
        return {k: [] for k in SET_TYPES}
    data_size = collections.defaultdict(factory)
    for _, stats in all_stats.items():
        for set_key in SET_TYPES:
            if DATA_SET_SIZES in stats and set_key in stats[DATA_SET_SIZES]:
                universe = get_universe_from_callstring(stats[CALLSTRING])
                tsize = stats[DATA_SET_SIZES][set_key]
                if tsize > 0:
                    data_size[universe][set_key].append(tsize)
    print(path)
    for universe, set_sizes in data_size.items():
        s = "\t%s" % universe
        for set_type, tsizes in set_sizes.items():
            s += "\t>%s: len %i, min %i, median %.1f, max %i<" % (
                set_type, len(tsizes), min(tsizes), np.median(tsizes),
                max(tsizes)
            )
        print(s)


def run(args):
    assert len(args) == 1
    for dir in get_dirs(args[0]):
        get_dir_data(dir)


if __name__ == "__main__":
    run(sys.argv[1:])