#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
"keras_dep_mlp(tparams=ktparams(epochs=2,loss=mean_squared_error,batch=100,balance=False,optimizer=adam,callbacks=[keras_model_checkpoint(val_loss,./checkpoint.ckp),keras_progress_checking(acc,100,0.001,False),keras_early_stopping(val_loss,0.01,100),keras_restart(-1,stop_successful=True),keras_stoptimer(max_time=86400,per_training=False,prevent_reinit=True,timeout_as_failure=True)]),hidden=3,dependency=pre_post,dense_layers=3,output_units=-1,dropout=0,x_fields=[current_state,goals],y_fields=[hplan],formats=[hdf5,protobuf],graphdef=graphdef.txt,count_samples=True)" --prefix tmp_ -d ../DeePDown/data/FixedWorlds/opt/transport_var_roads/c10_t2_p2/ --input "gzip(suffix=.uniform.data.gz)" -o -n model --fields goals hplan current_state --skip --skip-if-running --skip-if-flag --skip-if-trained --maximum-data-memory 0.05GB -dp --format NonStatic_A_01
"keras_adp_mlp(tparams=ktparams(epochs=10,loss=mean_squared_error,batch=100,balance=False,optimizer=adam,callbacks=[keras_model_checkpoint(val_loss,./checkpoint.ckp),keras_progress_checking(val_loss,100,2,False,True),keras_early_stopping(val_loss,0.01,100),keras_restart(-1,stop_successful=True),keras_stoptimer(max_time=86400,per_training=False,prevent_reinit=True,timeout_as_failure=True)]),hidden=3,output_units=-2, ordinal_classification=true,bin_size=2,dropout=0,x_fields=[current_state,goals],y_fields=[hplan],formats=[hdf5,protobuf],graphdef=graphdef.txt,count_samples=True)" --prefix tmp_ -d ../DeePDown/data/FixedWorlds/opt/transport_var_roads/c10_t2_p2/ --input "gzip(suffix=.uniform.data.gz)" -o -n model --fields goals hplan current_state --skip --skip-if-running --maximum-data-memory 0.1GB -dp --format NonStatic_A_01
"keras_adp_mlp(tparams=ktparams(epochs=10,loss=mean_squared_error,batch=100,balance=False,optimizer=adam,callbacks=[keras_model_checkpoint(val_loss,./checkpoint.ckp),keras_progress_checking(val_loss,100,2,False,True),keras_early_stopping(val_loss,0.01,100),keras_stoptimer(max_time=86400,per_training=False,prevent_reinit=True,timeout_as_failure=True)]),hidden=2,residual_layers=[keras_residual_block(hidden_layer_count=2),keras_residual_block(hidden_layer_count=2)],output_units=-2,batch_normalization=1, ordinal_classification=true,bin_size=1,dropout=0,x_fields=[current_state,goals],y_fields=[hplan],learner_formats=[hdf5,protobuf],graphdef=graphdef.txt,count_samples=True)" --prefix tmp_3_fold_ -d ../DeePDown/data/FixedWorlds/opt/depot_fix_goals/depot_p05/ --input "gzip(suffix=.generator.plan.sat.data.gz)" -o -n model --fields goals hplan current_state --skip --skip-if-running --maximum-data-memory 0.1GB -dp --format NonStatic_A_01 --samples-total-training 1000

keras_mlp_encoder(encoder_hidden_layer_size=[-0.9], latent_space_size=-0.8, decoder_hidden_layer_size=[-0.9])
"keras_adp_mlp(tparams=ktparams(epochs=10,loss=mean_squared_error,batch=100,balance=True,optimizer=adam,callbacks=[keras_model_checkpoint(val_loss,./checkpoint.ckp),keras_progress_checking(val_loss,100,2,False,True),keras_early_stopping(val_loss,0.01,100),keras_restart(-1,stop_successful=True),keras_stoptimer(max_time=86400,per_training=False,prevent_reinit=True,timeout_as_failure=True)]),hidden=3,output_units=-2, ordinal_classification=true,bin_size=1,dropout=0,x_fields=[current_state,goals],y_fields=[hplan],formats=[hdf5,protobuf],graphdef=graphdef.txt,count_samples=True)" --prefix tmp_3_fold_ -d ../DeePDown/data/FixedWorlds/opt/depot_fix_goals/depot_p05/ --input "gzip(suffix=.generator.plan.sat.data.gz)" -o -n model --fields goals hplan current_state --skip --skip-if-running --maximum-data-memory 0.1GB -dp --format NonStatic_A_01 --samples-total-training 1000
"""
from __future__ import print_function

import disable_external_dependencies
SUPPRESS_LIBRARY_WARNINGS = False
stderr = disable_external_dependencies.suppress_library_warnings(
    SUPPRESS_LIBRARY_WARNINGS)

import tools
from tools import constants as tc
from tools import misc as tm
from tools import parsing as apt

from src.training.bridges import StateFormat, LoadSampleBridge
from src.training.bridges.sampling_bridges import MetaFields
from src.training.misc import DomainProperties
from src.training.misc import StreamContext
from src.training.learners import LearnerFormat
from src.training.samplers import DirectorySampler

import argparse
import collections
import datetime
from enum import Enum
import json
import matplotlib as mpl
mpl.use('agg')
import matplotlib.pyplot as plt
import numpy as np
import os
import psutil
import random
import re
import shlex
import sys
if sys.version_info < (3,):
    import subprocess32 as subprocess


    def decoder(s):
        return s.decode()
else:
    import subprocess


    def decoder(s):
        return s
import time

disable_external_dependencies.unsuppress_library_warnings(stderr)


"""-------------------- Constants -------------------------------------------"""

REGEX_SUBMITTED_BATCH_JOB = re.compile(r"Submitted batch job (\d*)")
REGEX_FOLD = re.compile(r".*_(\d+)_fold_.*")

ARG_NEW_TRAINING = "--new-training"
SLURM_SUMMARIZE_KEY = "slurm_summarize"


class PruningTypes(Enum):
    Off = "off"
    Inter = "inter"
    Intra = "intra"
    IntraInter = "intra_inter"


class SampleRequirementException(Exception):
    pass


"""------------------------- Parsing Stuff ------------------------------"""


def _init_join_samples_types():
    return "\n\t".join(["%s:\t%s" % (st.name, st.description)
                        for st in tc.SampleTypes.types()])


def _init_get_pruning_types_list():
    return [x for x in PruningTypes]


restrict_total_split_data = apt.check_buffer(
    lambda: 0.0,
    lambda c, a, _: c + a,
    lambda x: apt.raise_value_error(x > 1.0, "Too much to split off")
)


ptrain = argparse.ArgumentParser(description="""Train network on previously 
sampled data. If no test data is given, then the validation data is used for 
the final evaluation. If neither validation data is given, the performance is 
evaluated on the training data. You can define additional trainings via adding 
%s and then adding the options for the next training""" % ARG_NEW_TRAINING)
ptrain_mutex_execute = ptrain.add_mutually_exclusive_group()
ptrain_mutex_dp = ptrain.add_mutually_exclusive_group()
ptrain_mutex_multiple_iterations = ptrain.add_mutually_exclusive_group()

ptrain.add_argument("network", type=apt.learner,
                    help="Definition of the network.")
ptrain.add_argument("-a", "--args", type=str, default=None,
                    help="Single string describing a set of arguments to add "
                         "in front of all arguments if calling another script "
                         "for training execution (see '--execute').")
ptrain_mutex_multiple_iterations.add_argument(
    "--cross-validation", default=None,
    type=apt.int_positive,
    help="Works only together with '--execute' (todo make this"
         "work without '--execute'). Adds '--array=0-N' as "
         "first argument into '--args'. Sorts the problems "
         "and splits"
         "them into N folds of (close to equal) size. Provides"
         "after the arguments of '--args' N arguments "
         "representing regular expressions identifying the "
         "problems for each of the N folds.")
ptrain.add_argument("-d", "--directory", type=apt.absdir,
                    nargs="+", action="append", default=[],
                    help="Path to a list of directories from which to load the"
                         " training data. This argument can be given multiple"
                         " times. The execution of this scrips equals then"
                         " calling this script with the same arguments for "
                         "each "
                         "(if not --sub-directory-training, then the domain "
                         "file"
                         "is required in the first given directory")
ptrain.add_argument("-df", "--directory-filter", type=re.compile,
                    action="append", default=[],
                    help="A subdirectory name has to match the regex otherwise"
                         "it is not traversed. By default no regex matches are"
                         "required. This argument can be given any number of"
                         "time to add additional filters (the directory name "
                         "has"
                         "to match ALL regexes)")
ptrain_mutex_dp.add_argument("-dp", "--domain-properties", action="store_true",
                             help="If set and the networks supports it, "
                                  "then the network"
                                  " is provided with an analysis of "
                                  "properties of the problems domain")
ptrain_mutex_dp.add_argument("-dpns", "--domain-properties-no-statics",
                             action="store_true",
                             help="If set and the networks supports it, "
                                  "then the network is provided with an "
                                  "analysis "
                                  "of properties of the problems domain. This "
                                  "domain properties object does not analyse "
                                  "the "
                                  "static groundings (and everything "
                                  "depending).")
ptrain.add_argument("--dry",
                    action="store_true",
                    help="Tells only which trainings it would perform, "
                         "but does "
                         "not perform the training step.")
ptrain_mutex_execute.add_argument(
    "-e", "--execute", type=apt.isfile, default=None,
    help="Path to script to execute for the training runs. If none is given, "
         "then this script is used, otherwise, it calls an external script in "
         "a subprocess and  passes its parameters")
ptrain.add_argument("--fields", type=str, nargs="+",  default=[],
                    help=("List all fields of the data which shall be"
                          "loaded in the order they shall appear(if the"
                          " order is relevant for you)"))
ptrain.add_argument("-fin", "--finalize", nargs="+", default=[],
                    type=apt.split_type("="),
                    help="List some key=value pairs which are passed "
                         "as key=value to the networks finalize method.")
ptrain.add_argument("--forget", type=apt.float_interval(0.0, 1.0),
                    default=0.0,
                    help=("Probability of skipping to load entries of the "
                          "validation data"))
ptrain.add_argument("-f", "--format", choices=StateFormat.get_formats(),
                    default=None, type=StateFormat.get,
                    help=("State format name into which the loaded data shall"
                          "be converted (if not given, the preferred of the"
                          "network is chosen)"))
ptrain.add_argument("-init", "--initialize", nargs="+", default=[],
                    type=apt.split_type("="),
                    help="List some key=value pairs which are passed "
                         "as key=value to the networks initialize method.")
ptrain.add_argument("-i", "--input", type=apt.stream_definition,
                    action="append", default=[], required=True,
                    help="Define an input stream for the loading of samples"
                         "(use this option multiple times for multiple). The "
                         " available streams can be checked in "
                         "training.misc.stream_contexts.py"
                         "(the way this is done is for every problem file of"
                         " which data shall be loaded the stream is asked,"
                         "where would you store data for this file and then"
                         "the data at the location is loaded).")
ptrain.add_argument("-l", "--load", type=str,
                    default=None,
                    help="Overrides the network load location defined in the "
                         "network definition by "
                         "'{network.path_out}/{--load}'")
ptrain.add_argument("--max-depth", default=None,
                    type=apt.restricted_type(
                        apt.named_type(apt.int_zero_positive,
                                      "max_depth"),
                        apt.check_min_max_restriction("min_depth", "max_depth")),
                    help="Maximum depth from the root which is traversed ("
                         "default has no maximum, 0 means traversing no"
                         "sub-folders, only the content of the root)")
ptrain.add_argument("--min-depth",  default=None,
                    type=apt.restricted_type(
                        apt.named_type(apt.int_zero_positive,
                                      "min_depth"),
                        apt.check_min_max_restriction("min_depth", "max_depth")),
                    help="Minimum depth from the root which has to be traversed"
                         " before problem files are registered (default has "
                         "no minimum)")
ptrain.add_argument("--maximum-data-memory",
                    type=apt.memory,
                    default=None,
                    help="Maximum memory to use for the data. Once this limit "
                         "is exhausted, not more data is loaded. Memory limit "
                         "is defined in KB unless defined otherwise via "
                         "suffices: KB, MB, GB")
ptrain.add_argument("--merge",  type=re.compile, default=None,
                    help="Regex describing all stored evaluations which shall"
                         "be combined. Most other options become useless, as no"
                         "training is performed afterwards. Network has to be "
                         "defined")
SAMPLE_RESTRICTION_HINT = (
    "\nThe value can be: value="
    "{multiply(:value)+} [MULTIPLIES THE GIVEN VALUES]|"
    "{job_stats:FILE:{train|valid|test}:(KEY=VALUE)+}|"
    "{restriction_file(:key)+} [LOADS THE sample_restrictions.json DICT OF "
    "THE MAIN PROBLEM DOMAIN AND RETURNS THE VALUE BEHIND THE GIVEN KEYS]|"
    "int [BASE VALUE]\n")

ptrain.add_argument("--global-minimum-samples-per-set", default=100,
                    type=apt.int_positive,
                    help="If any set has fewer than this number of samples,"
                         "then the training will be aborted, because of to"
                         "few samples")
ptrain.add_argument("--minimum-samples-training",  default=None,
                    type=apt.int_positive,
                    help="Minimum amount of samples to load for training "
                         "otherwise"
                         "the network will not be trained. This should be"
                         "save on pruning, but is not sufficiently tested,"
                         "this check happens before a final pruning run ("
                         "which should not change anything anymore, but I did "
                         "not debug sufficiently to be sure that I can delete "
                         "it) (this assumes that you data set contains exactly"
                         "the following data: for each problem solved the whole"
                         "solution trajectory. nothing more. %s" %
                         SAMPLE_RESTRICTION_HINT)
ptrain.add_argument("-n", "--name", type=str, default=None,
                    help="Sets the network store path to"
                         "'{network.path_out}/{--prefix}{--name}."
                         "{file suffix}'.See additionally '--output'")
ptrain.add_argument("-o", "--output", action="store_true",
                    help="overwrites the network.path_out directory specified "
                         "in the network definition with the first root "
                         "directory of the training data.")
ptrain.add_argument("--only-evaluate", action="store_true",
                    help="Does NOT train a network, therefore, a model to load "
                         "has to exist. Evaluates on the test data the network")
ptrain.add_argument("--plot-data-distribution", action="store_true",
                    help="Plot the distribution of the loaded data.")
ptrain.add_argument("-p", "--prefix", type=str, default="",
                    help="Prefix to add in front of analysis outputs and stored"
                         "model file name.")
ptrain.add_argument("--pruning", type=PruningTypes,
                    choices=_init_get_pruning_types_list(),
                    default=PruningTypes.IntraInter,
                    help="Pruning to apply to the training/validation/test "
                         "data. Options are:\n"
                         "\toff: no pruning at all. The sets are loaded as is\n"
                         "\tinter: entries are pruned between the data sets ("
                         "priority: test set, validation set, training set)\n"
                         "\tintra: entries are pruned within a data set\n"
                         "\tintra_inter: entries first pruned within data set,"
                         "then inter data sets.")
ptrain.add_argument("-pf", "--problem-filter", type=re.compile,
                    action="append", default=[],
                    help="A problem file name has to match the regex otherwise"
                         "it is not registered. By default no regex matches are"
                         "required. This argument can be given any number of"
                         "time to add additional filters (the file name has"
                         "to match ALL regexes)")
ptrain_mutex_multiple_iterations.add_argument(
    "--repetitions", type=apt.int_positive, default=1,
    help="Works only together with '--execute' (todo make this"
         "work without '--execute'). Adds '--array=0-N' as "
         "first"
         "argument into '--args'. Provides"
         "after the arguments of '--args' N arguments "
         "representing regular expressions not matching any "
         "problem (as they are all '-') (This is done to reuse"
         "the same mechanism as --cross-validation)")
ptrain.add_argument("--sample-type",  default=tc.SampleTypes.all,
                    choices=tc.SampleTypes.name2type.values(),
                    type=tc.SampleTypes.get,
                    help="Loads only samples of this given type. Available "
                         "types are: \n\t %s" % _init_join_samples_types())
ptrain.add_argument("--samples-per-problem",  default=None,
                    type=apt.int_positive,
                    help="How many problems to load at most per sampled "
                         "problem. If not specified all samples belonging to a "
                         "problem are loaded. This requires that all samples "
                         "have the meta fields %s and %s set. ASSUMPTION: "
                         "SAMPLES FROM THE SAME PAIR ARE IN CONSECUTIVE ORDER "
                         "IN THE DATA FILES!" %
                         (MetaFields.PROBLEM_HASH, MetaFields.MODIFICATION_HASH)
                    )
ptrain.add_argument("--samples-total-testing",  default=None,
                    type=apt.int_positive,
                    help="Limit the total number of samples to load for the "
                         "test data. This does not limit the data for the "
                         "training or verification data. %s" %
                         SAMPLE_RESTRICTION_HINT)
ptrain.add_argument("--samples-total-training",  default=None,
                    type=apt.int_positive,
                    help="Limit the total number of samples to load for the "
                         "training data. This does not limit the data for the "
                         "test or verification data. Due to pruning "
                         "duplicates from verification & test data in training "
                         "data, the final number of samples in the training "
                         "data set can be smaller! Todo: fix that %s" %
                         SAMPLE_RESTRICTION_HINT)
ptrain.add_argument("--samples-total-verifying",  default=None,
                    type=apt.int_positive,
                    help="Limit the total number of samples to load for the "
                         "verification data. This does not limit the data for "
                         "the "
                         "train or test data. Due to pruning "
                         "duplicates from test data in training "
                         "data, the final number of samples in the training "
                         "data set can be smaller! Todo: fix that %s" %
                         SAMPLE_RESTRICTION_HINT)

ptrain.add_argument("--seed", default=None, type=int,
                    help="Use a specific random seed")
ptrain.add_argument("--skip", action="store_true",
                    help=("If set, then missing sample files are skipped, "
                          "otherwise every problem file is expected to have "
                          "sample file."))
ptrain.add_argument("--skip-if-trained", action="store_true",
                    help="Skip training (works only without '--execute' if the"
                         "requested network model files exist already.")
ptrain.add_argument("--skip-if-flag", action="store_true",
                    help="Skip training (works only without '--execute' if a "
                         "skip flag exists (after training the skip flag file "
                         "is created).")
ptrain.add_argument("--skip-if-running", action="store_true",
                    help="Skip training (works only without '--execute' if a "
                         "valid running flag exists (prior to training set and "
                         "if not crashed afterwards deleted).")
ptrain.add_argument("--skip-magic", action="store_true",
                    help=("Tries to load the sample without performing a check"
                          "that it uses the right reader for the sample file"
                          "format (use case old sample files without magic "
                          "word. USE ONLY IF YOU KNOW WHAT YOU ARE DOING)"))
ptrain_mutex_execute.add_argument(
    "--slurm", action="store_true",
    help="Executes the training via submitting it to a slurm environment "
         "(this sets '--execute' and enables all options depending on "
         "'--execute').")
ptrain.add_argument("--slurm-dependency", default=None,
                    type=apt.slurm_dependency,
                    help="Only valid in combination with --slurm. Adds the "
                         "given"
                         "value to the slurm command as dependency. The "
                         "following"
                         "special sequences are defined which will be "
                         "expanded:\n"
                         "{key:value{;key:value)*}: key in (u, user) selects "
                         "jobs"
                         "of user, key in (p, partition) selects jobs of "
                         "partition. "
                         "If multiple given, then jobs matching all "
                         "conditions are "
                         "selected.\n"
                         "\nExample"
                         "values: afterany:9457635:{u:myuser}")
ptrain.add_argument("--slurm-summarize", action="store_true",
                    help="Only valid in combination with --slurm. Adds an "
                         "output summarization step after the training jobs.")
ptrain.add_argument("--stop-after-initialization", action="store_true",
                    help="Loads data and initialize network. Afterwards stops.")
ptrain.add_argument("-sdt", "--sub-directory-training", action="store_true",
                    help="Changes training from one network on the data within "
                         "all given directories (in the directory group)"
                         "to training a single network"
                         "per directory (and subdirectory) which contains a"
                         "domain.pddl file and at least one *.data file (for "
                         "those directories selected, the data is loaded from "
                         "them and from subdirectories like before)")
ptrain.add_argument("-t", "--test", type=re.compile, default=None,
                    help="Regex for identifying data set files to use as test"
                         "data.")
ptrain.add_argument("-ts", "--test-split", default=0.0,
                    type=apt.restricted_type(
                        apt.float_interval(0., 1.),
                        restrict_total_split_data),

                    help="Fraction of the trainings data to split off for the "
                         "test data (this is additional to '--test')")
ptrain.add_argument("-v", "--validation", type=re.compile,
                    default=None,
                    help="Regex for identifying data set files to use as "
                         "validation data (test data sets are excluded).")
ptrain.add_argument("-vs", "--validation-split", default=0.0,
                    type=apt.restricted_type(
                        apt.float_interval(0., 1.),
                        restrict_total_split_data),
                    help="Fraction of the trainings data to split off for the "
                         "validation data (this is additional to "
                         "'--validation'.")


def get_directory_groups(directories, directory_filters,
                         sub_directory_training):
    """
    Returns the directory groups for training. On each group a training run
    will be done.
    :param directories: Base directory grouping
    :param directory_filters: filter to remove directories
    :param sub_directory_training: looks for subdirectories containing
        domain.pddl files. Every such directory becomes its own group.
    :return:
    """

    def match_all(path_dir):
        return all([directory_filter.match(path_dir)
                    for directory_filter in directory_filters])

    if not sub_directory_training:
        directory_groups = [
            [g for g in group if match_all(g)]
            for group in directories
            if os.path.isfile(os.path.join(group[0], "domain.pddl"))]
    else:
        directory_groups = []
        todo = [g for group in directories for g in group]
        while len(todo) > 0:
            next_dir = todo.pop()
            if (os.path.isfile(os.path.join(next_dir, "domain.pddl")) and
                    match_all(next_dir)):
                directory_groups.append([next_dir])
            todo.extend([os.path.join(next_dir, sub)
                         for sub in os.listdir(next_dir)
                         if os.path.isdir(os.path.join(next_dir, sub))])
    return tools.misc.sort_nicely(
        [dg for dg in directory_groups if len(dg) > 0],
        sort_key=lambda d: d[0])


def parse_training_args(argv):
    options = ptrain.parse_args(argv)

    if options.slurm:
        options.execute = "sbatch"
    if options.slurm_dependency:
        assert options.slurm, \
            "Option --slurm-dependency requires option --slurm"
    if options.slurm_summarize:
        assert options.slurm, "Option --slurm-summarize requires option --slurm"

    if options.cross_validation is not None:
        assert options.execute is not None, \
            "Requires '--execute' to use '--cross-validation'"
        assert (options.test is None and options.test_split == 0.0 and
                options.validation is None and options.validation_split ==
                0.0), (
            "Cannot provide test/validation splits when doing cross validation")
        if options.args is None:
            options.args = "--array=0-%i" % (options.cross_validation - 1)
        else:
            options.args = "--array=0-%i " % (
                        options.cross_validation - 1) + options.args

    if options.repetitions != 1:
        assert options.execute is not None, \
            "Requires '--execute' to use --repetitions != 1"
        assert (options.test is None and options.validation is None), (
            "Cannot provide test/validation regexes when doing repetitions")
        if options.args is None:
            options.args = "--array=0-%i" % (options.repetitions - 1)
        else:
            options.args = "--array=0-%i " % (
                        options.repetitions - 1) + options.args

    directory_groups = get_directory_groups(options.directory,
                                            options.directory_filter,
                                            options.sub_directory_training)
    assert len(directory_groups) > 0, "No valid list of directories found."

    options.initialize = {k: v for k, v in options.initialize}
    options.finalize = {k: v for k, v in options.finalize}

    return options, directory_groups


def split_training_blocks(args):
    runs = [[]]
    for arg in args:
        if arg == ARG_NEW_TRAINING:
            runs.append([])
        else:
            runs[-1].append(arg)
    return [x for x in runs if len(x) > 0]


@tm.static_var("cache", None)
def get_parser_argument_keys():
    if get_parser_argument_keys.cache is None:
        get_parser_argument_keys.cache = set(
            [key
             for action in ptrain._actions
             for key in action.option_strings])
    return get_parser_argument_keys.cache


""" -------------------------------- Misc -----------------------------------"""


def submit_slurm_summarize(job_ids):
    summarize_command = ["sbatch", "--dependency", "afterany:%s" %
                         ":".join(str(x) for x in job_ids),
                         "--kill-on-invalid-dep=yes",
                         tools.paths.SCRIPT_SLURM_SUMMARIZE_TRAINING, "."]
    subprocess.call(summarize_command)


def merge_previous_evaluation_results(options, directories):
    network = options.network
    if not hasattr(network, "analyse_from_paths"):
        assert False, "Network cannot merge previous files"
    path_datas = []
    for d in directories:
        for item in os.listdir(d):
            path_item = os.path.join(d, item)
            if options.merge.match(path_item):
                path_datas.append(path_item)
    network.analyse_from_paths(directories[0], options.prefix, path_datas)


PATTERN_SAMPLE_RESTRICTION_FLOAT = re.compile(r"^\d+?\.\d+?$")
PATTERN_SAMPLE_RESTRICTION_INT = re.compile(r"^\d+$")
PATTERN_SAMPLE_RESTRICTION_MULTIPLY = re.compile(r"^{multiply((:[^:]+)+)}$")
PATTERN_SAMPLE_RESTRICTION_JOB_STATS = re.compile(
    r"^{job_stats:([^:]+):([^:]+)((:[^:=]+=[^:=]+)+)}$")
PATTERN_SAMPLE_RESTRICTION_RESTRICTION_FILE = re.compile(
    r"^{restriction_file((:[^:]+)+)}$")


def format_sample_count_restriction_total_problems(file_problem):
    assert file_problem is not None
    file_sizes = os.path.join(
        os.path.dirname(file_problem), "data_set_sizes.json")
    assert os.path.isfile(file_sizes)
    with open(file_sizes, "r") as f:
        data_set_sizes = json.load(f)
    summation = 0
    for stats in data_set_sizes.values():
        summation += stats["#problems"]
    return summation


def format_sample_count_restriction_multiply(
        restriction, file_problem, regex_valid, regex_test, fold):
    match = PATTERN_SAMPLE_RESTRICTION_MULTIPLY.match(restriction)
    values = apt.split_on_base_level(match.group(1))
    product = 1
    for value in values:
        product *= format_sample_count_restriction(
            value,
            file_problem=file_problem,
            regex_valid=regex_valid,
            regex_test=regex_test,
            fold=fold
        )
    return product


def format_sample_count_restriction_restriction_file(
        restriction, file_problem, regex_valid, regex_test, fold):
    file_restrictions = os.path.join(
        os.path.dirname(file_problem), "sample_restrictions.json")
    assert os.path.isfile(file_restrictions)
    match = PATTERN_SAMPLE_RESTRICTION_RESTRICTION_FILE.match(restriction)
    keys = match.group(1)
    assert keys.find("}") == -1 and keys.find("{") == -1
    keys = [x for x in keys.split(":") if len(x) > 0]
    with open(file_restrictions, "r") as f:
        dict_restrictions = json.load(f)
    for k in keys:
        if k == "$FOLD$":
            k = fold
        elif k == "$REGEX_VALID$":
            k = regex_valid
        elif k == "$REGEX_TEST$":
            k = regex_test
        dict_restrictions = dict_restrictions[k]
    return format_sample_count_restriction(
        dict_restrictions,
        file_problem=file_problem,
        regex_valid=regex_valid,
        regex_test=regex_test,
        fold=fold)


def format_sample_count_restriction_job_stats_file(
        restriction, regex_valid, regex_test):
    match = PATTERN_SAMPLE_RESTRICTION_JOB_STATS.match(restriction)
    file_job_stats, datasets, keys, _ = match.groups()
    assert os.path.exists(file_job_stats)
    datasets = [x for x in datasets.split("|") if len(x) > 0]
    assert all(x in ["test", "train", "validation"] for x in datasets)
    assert len(datasets) == len(set(datasets))
    keys = [[y.strip() for y in x.split("=")] for x in
            apt.split_on_base_level(keys)]
    assert all(len(x) == 2 for x in keys)
    for no, key in enumerate(keys):
        if key[0] == "test":
            keys[no][1] = regex_test
        elif key[0] == "validation":
            keys[no][1] = regex_valid

    with open(file_job_stats, "r") as f:
        job_stats = json.load(f)
    chosen_stats = None
    chosen_timestamp = None
    for stats in job_stats.values():
        if all(x[0] in stats for x in keys) and all(
                stats[key] == value for key, value in keys):
            if chosen_stats is None:
                chosen_stats = stats
                chosen_timestamp = datetime.datetime.strptime(
                    stats["timestamp"], '%Y-%m-%d %H:%M:%S.%f')
            else:
                new_timestamp = datetime.datetime.strptime(
                    stats["timestamp"], '%Y-%m-%d %H:%M:%S.%f')
                if new_timestamp > chosen_timestamp:
                    chosen_stats = stats
                    chosen_timestamp = new_timestamp
    assert chosen_stats is not None
    assert "data_set_sizes" in chosen_stats
    return sum(chosen_stats["data_set_sizes"][ds] for ds in datasets)


def format_sample_count_restriction(
        restriction, file_problem=None, regex_valid=None, regex_test=None,
        fold=None):

    # Simple Restrictions: None or Number
    if restriction is None:
        return None
    elif isinstance(restriction, int):
        return restriction
    elif isinstance(restriction, float):
        return int(restriction)

    while any(restriction[0] == t and restriction[-1] == t
              for t in ["'", "\""]):
        restriction = restriction[1:-1]

    if PATTERN_SAMPLE_RESTRICTION_INT.match(restriction):
        return int(restriction)
    elif PATTERN_SAMPLE_RESTRICTION_FLOAT.match(restriction):
        return float(restriction)

    # Complex Restrictions
    elif restriction == "#TOTAL_PROBLEMS":
        return format_sample_count_restriction_total_problems(file_problem)
    elif PATTERN_SAMPLE_RESTRICTION_MULTIPLY.match(restriction):
        return format_sample_count_restriction_multiply(
            restriction, file_problem, regex_valid, regex_test, fold)
    elif PATTERN_SAMPLE_RESTRICTION_RESTRICTION_FILE.match(restriction):
        return format_sample_count_restriction_restriction_file(
            restriction, file_problem, regex_valid, regex_test, fold)
    elif PATTERN_SAMPLE_RESTRICTION_JOB_STATS.match(restriction):
        return format_sample_count_restriction_job_stats_file(
            restriction, regex_valid, regex_test)
    else:
        assert False


""" ------------------------------- Common Training ------------------------"""


def prepare_network(options, directories):
    network = options.network

    if options.output:
        network.path_out = directories[0]
    if options.name is not None:
        network.path_store = os.path.join(
            network.path_out, ("" if options.prefix is None
                               else options.prefix) + options.name)
    if options.load is not None:
        network.path_load = os.path.join(network.path_out, options.load)
        assert os.path.isfile(network.path_load)
    elif options.only_evaluate:
        network.path_load = (network.path_store +
                             ".%s" % network.get_default_format().suffix[0])


def get_state_format(options):
    return (options.network.get_preferred_state_formats()[0]
            if options.format is None else options.format)


def create_directory_samplers(
        directories, directory_filter, general_task_filters,
        data_set_task_filters, min_depth, max_depth):
    all_tasks = []
    all_samplers = []
    for no, task_filters in enumerate(data_set_task_filters):
        sampler = None
        if task_filters is not None:
            task_filters = general_task_filters + task_filters

            sampler = DirectorySampler(
                None, directories, directory_filter, task_filters,
                None, all_tasks, max_depth, min_depth,
                merge=True)
            all_tasks.extend(sampler.iterable)
        all_samplers.append(sampler)

    return all_tasks, all_samplers


def check_data_set_min_samples_restriction(
        all_samplers, min_restrictions, streams,
        sample_type, samples_per_problem):
    assert len(all_samplers) == len(min_restrictions)

    for no, (sampler, min_restriction) in enumerate(
            zip(all_samplers, min_restrictions)):
        assert sampler is not None or min_restriction is None
        if min_restriction is None:
            continue

        files_data = set([stream.get_next_path(file_task)
                          for file_task in sampler.iterable
                          for stream in streams])
        files_data = [f for f in files_data if os.path.isfile(f)]

        total_bound = 0
        for file_data in files_data:
            dir_data = os.path.dirname(file_data)
            basename_data = os.path.basename(file_data)

            file_stats = tm.get_data_stats(dir_data, basename_data)
            if file_stats is None:
                total_bound = None
                break
            total_bound += tm.get_upper_sample_bound_from_data_stats(
                file_stats, sample_type, samples_per_problem)

        if total_bound is None:
            print("Cannot estimate if a minimum sample requirement can be "
                  "satisfied")
        else:
            if total_bound < min_restriction:
                raise SampleRequirementException(
                    "A minimum sample requirement cannot be satisfied "
                    "(upper bound possible/requirement): %i/%i" %
                    (total_bound, min_restriction))
            else:
                print("Upper bound on samples suggest that minimum sample"
                      "requirement could be satisfiable (it might fail due "
                      "to pruning)(bound/requirement): %i/%i" %
                      (total_bound, min_restriction))


def load_domain_properties(directories, all_tasks, full_domain_properties):
    print("Start analysing Domain:")
    start_time = time.time()
    path_domain = os.path.join(directories[0], "domain.pddl")

    path_load = os.path.join(os.path.dirname(path_domain),
                             "domain_properties.json" if
                             full_domain_properties else
                             "domain_properties_no_statics.json")
    path_store = None if os.path.exists(path_load) else path_load
    path_load = path_load if path_store is None else None

    domain_properties = DomainProperties.get_property_for(
        path_domain=path_domain,
        paths_problems=all_tasks,
        no_gnd_static=not full_domain_properties,
        load=path_load,
        store=path_store,
        verbose=1)
    _ = tm.timing(start_time, "Domain analysing time: %ss")
    return domain_properties


def convert_data_set_size_restriction(
        samplers, restrictions, regex_valid, regex_test, fold):
    assert len(samplers) == len(restrictions)
    for no, (sampler, restriction) in enumerate(zip(samplers, restrictions)):
        first_problem = (None if (sampler is None or len(sampler.iterable) == 0)
                         else sampler.iterable[0])

        converted_value = format_sample_count_restriction(
            restrictions[no],
            file_problem=first_problem,
            regex_valid=None if regex_valid is None else regex_valid.pattern,
            regex_test=None if regex_test is None else regex_test.pattern,
            fold=fold
        )
        restrictions[no] = (
            None if converted_value is None else int(converted_value))


def split_data(datas, test_split, validation_split):
    # Split data apart from the training data if desired
    all_splits = [test_split, validation_split, 0.0]
    pos_splits = [x for x in all_splits if x > 0.0]
    has_fraction = [n for n, f in enumerate(all_splits) if f > 0.0]

    if len(pos_splits) > 0:
        split = datas[-1][0].splitoff(*pos_splits)
        for idx_data, split_elem in zip(has_fraction, split):
            if datas[idx_data] is None or datas[idx_data] == []:
                datas[idx_data] = [split_elem]
            else:
                datas[idx_data][0].add_data(split_elem)


def get_func_is_elem_in_previous_data(previous_data):
    def is_elem_in_previous_data(item):
        for data_list in previous_data:
            if data_list is None:
                continue
            for data_elem in data_list:
                if data_elem.has_pruning() and data_elem.in_pruning_set(
                        data_elem.hasher(item)):
                    return True
        return False
    return is_elem_in_previous_data


def exec_prune_intra(_data_list):
    for idx_d1, d1 in enumerate(_data_list):
        for d2 in _data_list[idx_d1 + 1:]:
            d2.remove_duplicates_from(d1)


def exec_prune_inter(_datas):
    _datas = [_d for _d in _datas if _d is not None]
    for _no, _d1 in enumerate(_datas):
        for _d2 in _datas[_no + 1:]:
            for _elem in _d2:
                _elem.remove_duplicates_from_iter(_d1)


def check_minimum_sample_requirement(datas, requirement):
    if requirement is not None:
        size = 0 if datas is None else sum(len(d) for d in datas)
        if size < requirement:
            raise SampleRequirementException(
                "Minimum data requirement is not met for a set: %i/%i" %
                (size, requirement))


def load_data(options, directories, state_format, dry=False):
    """

    :param options:
    :param directories:
    :param state_format:
    :param dry: returns only which problems are in which set (test, valid,
    train)
    :return:
    """
    streams = options.input
    fold_idx = REGEX_FOLD.match(options.prefix)
    fold_idx = None if fold_idx is None else fold_idx.group(1)

    # None = skip, order = Param for test, validation, training data set
    data_set_task_filters = [
        None if options.test is None else [options.test],
        None if options.validation is None else [options.validation],
        None if (options.only_evaluate and options.test is None and
                 options.test_split > 0) else []
    ]
    data_set_max_samples = [
        options.samples_total_testing,
        options.samples_total_verifying,
        options.samples_total_training
    ]
    data_set_min_samples = [
        None,
        None,
        options.minimum_samples_training
    ]

    all_tasks, all_samplers = create_directory_samplers(
        directories, options.directory_filter, options.problem_filter,
        data_set_task_filters, options.min_depth, options.max_depth
    )

    if dry:
        return [[] if x is None else x.iterable for x in all_samplers]

    for restrictions in [data_set_min_samples, data_set_max_samples]:
        convert_data_set_size_restriction(
            all_samplers, restrictions, options.validation, options.test,
            fold_idx)

    # Load Domain Properties (and add to network)
    assert not (options.domain_properties and
                options.domain_properties_no_statics)
    domain_properties = None
    if options.domain_properties or options.domain_properties_no_statics:
        domain_properties = load_domain_properties(
            directories, all_tasks, options.domain_properties)
    options.network._domain_properties = domain_properties

    # Estimate if minimum sample requirement is possible
    check_data_set_min_samples_restriction(
        all_samplers, data_set_min_samples, streams, options.sample_type,
        options.samples_per_problem)

    # Pruning flags
    prune_inter = options.pruning in [PruningTypes.Inter,
                                      PruningTypes.IntraInter]
    prune_intra = options.pruning in [PruningTypes.Intra,
                                      PruningTypes.IntraInter]

    # Actually load data
    datas = []
    bridge_cur_mem = 0
    bridge_loaded_tasks = []
    for no, sampler in enumerate(all_samplers):
        new_data = None
        if sampler is not None:
            bridge = LoadSampleBridge(
                streams=StreamContext(streams=streams),
                fields=options.fields,
                format=state_format, prune=prune_intra,
                fprune=(get_func_is_elem_in_previous_data(datas)
                        if prune_inter else None),
                skip=options.skip, skip_magic=options.skip_magic,
                forget=options.forget,
                domain_properties=domain_properties,
                max_mem=options.maximum_data_memory,
                sample_types=options.sample_type.subtypes,
                samples_per_problem=options.samples_per_problem,
                max_container_samples=data_set_max_samples[no])
            bridge._cur_mem = bridge_cur_mem
            sampler.sbridges = [bridge]

            sampler.initialize()
            new_data = sampler.sample()
            sampler.finalize()
            bridge_loaded_tasks.append(bridge.loaded_tasks)
            bridge_cur_mem = bridge.current_memory_usage
            check_minimum_sample_requirement(
                new_data, data_set_min_samples[no])

            if prune_intra:
                exec_prune_intra(new_data)
        else:
            # This is in 'else', because pruning could have removed elements
            # and we want to allow that
            check_minimum_sample_requirement(
                new_data, data_set_min_samples[no])
        datas.append(new_data)

    print("Loaded from distinct tasks: %s (test, validation, train)" %
          ", ".join(str(x) for x in bridge_loaded_tasks))
    split_data(datas, options.test_split, options.validation_split)

    if prune_inter:  # Should be redundant to the element wise check...
        exec_prune_inter(datas)

    # Finalize the loaded data
    for ldata in datas:
        for data in ldata if ldata is not None else []:
            data.finalize(clear_pruning=True)

    # Fill missing data slots (last slot aka dtrain is not empty,
    # except if split 100%):
    for i in range(len(datas) - 2, -1, -1):
        if datas[i] is None or datas[i] == []:
            datas[i] = datas[len(datas) - 1]

    return datas


"""-------------------- Execute Branch Only ---------------------------------"""


def get_fold_regexes(nb_folds, tasks):
    tasks = tools.misc.sort_nicely(tasks)
    fold_tasks = [[] for _ in range(nb_folds)]
    for n, t in enumerate(tasks):
        fold_tasks[n % nb_folds].append(t)

    return [tm.get_common_prefix_suffix_regex(*fold) for fold in fold_tasks]


def get_execute_command(options, argv):
    new_command = list(argv)
    new_command.insert(0, options.execute)
    idx_start_arguments = 1
    if options.slurm_dependency:
        new_command.insert(1, "--dependency")
        new_command.insert(2, options.slurm_dependency)
        new_command.insert(3, "--kill-on-invalid-dep=yes")
        new_command, _ = apt.extract_and_remove_arguments(
            new_command, ["--slurm-dependency"], get_parser_argument_keys())
        idx_start_arguments += 3

    for params in [
        ["-e", "--execute"],
        ["--slurm"],
        ["--slurm-summarize"],
        ["--cross-validation"],
        ["--repetitions"],
        ["-a", "--args"],
        ["-sdt", "--sub-directory-training"],
        ["-d", "--directory"],
    ]:
        new_command, _ = apt.extract_and_remove_arguments(
            new_command, params, get_parser_argument_keys())

    if options.args is not None:
        execute_pre_args = shlex.split(options.args)
        new_command[idx_start_arguments:idx_start_arguments] = execute_pre_args
        idx_start_arguments += len(execute_pre_args)

    return new_command, idx_start_arguments


""" -------------------Local Training Branch Only ---------------------------"""


def check_skip_flags(options, start_time):
    network = options.network
    if options.skip_if_trained:
        if all([os.path.exists("%s.%s" % (network.get_store_path(), nf))
                for nf in
                [network.get_default_format() if nform is None else nform
                 for nform in network.learner_formats]]):
            _ = tm.timing(start_time, "Network previously trained: %ss")
            return True

    if options.skip_if_flag:
        if os.path.exists("%s.%s" % (network.get_store_path(),
                                     LearnerFormat.flag.suffix[0])):
            _ = tm.timing(start_time, "Skip at network flag: %ss")
            return True

    if options.skip_if_running:
        running = tools.RunningFlag.check_flag(
            "%s.%s" % (network.get_store_path(),
                       LearnerFormat.running.suffix[0]),
            ok_missing=True)
        if running:
            _ = tm.timing(start_time, "Other training still running: %ss")
            return True
    return False


def remove_running_flag(path, missing_ok=True):
    """ Maybe using a context manager would be better """
    tools.RunningFlag.remove_flag(path, missing_ok=missing_ok)


def set_network_flag(network):
    network.store(learner_formats=[LearnerFormat.flag],
                  allow_uninitialized=True)


def check_insufficient_data(options, sizes):
    if options.only_evaluate:
        return False
    else:
        if sizes[0] == 0:
            print("Exit: Training data set is empty.")
            return True
        if any([s < options.global_minimum_samples_per_set for s in sizes]):
            print("Exit: At least one given data set is close to empty.")
            return True
    return False


def print_data_set_stats(sizes, memory_consumption):
    print("Data set memory consumption: %.2fMB" % (
            memory_consumption / (2 ** 20)))
    print("Data sizes: %i, %i, %i (train, validation, test)" % tuple(
        sizes))


def plot_data_distribution(path, dtest, dvalidation, dtrain):
    # Label data as {object id: (data object, [labels used with that object]}
    datas = {}
    for data, label_name in [(dtest, "test"), (dvalidation, "validation"),
                             (dtrain, "train")]:
        if data is None:
            continue
        if id(data) not in datas:
            datas[id(data)] = [data, [label_name]]
        else:
            datas[id(data)][1].append(label_name)

    # Plot
    fig = plt.figure()
    ax = fig.add_subplot(111)
    width = 0.5 / len(datas)
    shift = (len(datas) * width) / 2.0
    for no, (data_id, (data, label_names)) in enumerate(datas.items()):
        h_values = collections.defaultdict(int)
        for sbd in data:
            def count_h_values(entry):
                h_value = entry[sbd.field_hplan]
                assert isinstance(h_value, int)
                h_values[h_value] += 1

            sbd.over_all(count_h_values)
        for i in range(max(h_values.keys())):
            if i not in h_values:
                h_values[i] = 0

        ax.bar(np.fromiter(h_values.keys(), int) + no * width - shift,
               h_values.values(), width,
               label="-".join(label_names))
    ax.set_xlabel("H Value")
    ax.set_ylabel("Counts")
    ax.set_title("Training Run Data Distribution")
    ax.legend()
    fig.tight_layout()
    fig.savefig(path)


""" -------------- Execute the training in another process ------------------"""


def train_execute(options, argv, directory_groups):
    slurm_job_ids = []
    base_command, new_idx_start_arguments = get_execute_command(
        options, list(argv))

    for dg in directory_groups:
        next_command = list(base_command)
        idx_start_arguments = new_idx_start_arguments

        # Check that call would be valid
        prepare_network(options, dg)
        state_format = get_state_format(options)
        task_test, task_valid, task_train = load_data(
            options, dg, state_format, dry=True)

        if options.cross_validation is not None or options.repetitions != 1:
            if options.cross_validation is not None:
                assert len(task_test) == 0 and len(task_valid) == 0, (
                    "Cross Validation does not allow splitting "
                    "the data manually")
                regexes_folds = get_fold_regexes(
                    options.cross_validation, task_train)

            else:  # aka repetitions != 1
                regexes_folds = ["INVALID_PROBLEM_REGEX" for _ in
                                 range(options.repetitions)]

            next_command[idx_start_arguments: idx_start_arguments] = (
                regexes_folds)
            idx_start_arguments += len(regexes_folds)

        next_command[idx_start_arguments + 1: idx_start_arguments + 1] = (
                ["--directory"] + dg)

        print("Call executable: ", next_command)
        if options.dry:
            continue

        sub_out = decoder(subprocess.check_output(next_command))
        if options.slurm_summarize:
            job_id = REGEX_SUBMITTED_BATCH_JOB.match(sub_out)
            assert job_id is not None, \
                "Error submitting slurm job: %s" % str(next_command)
            slurm_job_ids.append(job_id.group(1))

    if options.slurm_summarize and len(slurm_job_ids) > 0:
        if options.dry:
            print("Summarize training logs in: .")
        else:
            def callback(param):
                if SLURM_SUMMARIZE_KEY not in param:
                    param[SLURM_SUMMARIZE_KEY] = []
                param[SLURM_SUMMARIZE_KEY].extend(slurm_job_ids)
            return [callback]


def train_local(options, directory_groups, process, start_time):
    for idx_dg, dg in enumerate(directory_groups):
        print("Processing Directory Group %i: %s" % (idx_dg, dg))
        if options.dry:
            continue

        network = options.network
        prepare_network(options, dg)
        state_format = get_state_format(options)

        # Merge previously stored evaluation results
        if options.merge:
            merge_previous_evaluation_results(options, dg)
            _ = tm.timing(start_time, "Merging time: %ss")
            continue

        # Check flags and set running flag
        if check_skip_flags(options, start_time):
            continue
        path_running = "%s.%s" % (options.network.get_store_path(),
                                  LearnerFormat.running.suffix[0])
        tools.RunningFlag.set_flag(path_running)

        # Load data
        start_memory = process.memory_info().rss
        try:
            dtest, dvalidation, dtrain = load_data(
                options, dg, state_format)
        except SampleRequirementException as e:
            print("Exit:", e)
            set_network_flag(network)
            remove_running_flag(path_running)
            continue
        end_memory = process.memory_info().rss
        sizes = [0 if x is None else sum([len(y) for y in x]) for x in
                 [dtrain, dvalidation, dtest]]
        print_data_set_stats(sizes, end_memory - start_memory)
        if check_insufficient_data(options, sizes):
            set_network_flag(network)
            remove_running_flag(path_running)
            continue

        # Plot data distribution if desired
        if options.plot_data_distribution:
            plot_data_distribution(
                network.get_store_path() + "_data_distribution.pdf",
                dtest, dvalidation, dtrain)

        # Initialize Learner
        start_time = tm.timing(start_time, "Loading data time: %ss")
        start_memory = process.memory_info().rss

        network.initialize(None, data=dtrain, **options.initialize)
        start_time = tm.timing(
            start_time,
            "Network initialization time: %ss\n" + 21 * " " +
            "memory: %.2fMB" % (
                    (process.memory_info().rss - start_memory) / (2 ** 20)))

        if options.stop_after_initialization:
            print("Stopping after initialization.")
            remove_running_flag(path_running)
            continue

        # Train network
        if not options.only_evaluate:
            info = network.train(dtrain, dvalidation)
            start_time = tm.timing(start_time, "Network training time: %ss")
            training_outcome = info.get("training_outcome", None)
            print("Training Outcome: %s" % training_outcome.name)
            assert training_outcome is not None
            assert training_outcome.stopped

        network.evaluate(dtest)
        start_time = tm.timing(start_time, "Network evaluation time: %ss")

        network.analyse(prefix=options.prefix)
        start_time = tm.timing(start_time, "Network analysis time: %ss")

        network.finalize(**options.finalize)
        _ = tm.timing(start_time, "Network finalization time: %ss")

        set_network_flag(network)
        remove_running_flag(path_running)


def train(argv):
    print("Startup time: %s" % str(datetime.datetime.now()))
    print("Call: %s" % " ".join(argv))

    start_time = time.time()
    options, directory_groups = parse_training_args(argv)
    start_time = tm.timing(start_time, "Parsing time: %ss")

    process = psutil.Process(os.getpid())
    random.seed(options.seed)

    if options.execute is None:
        return train_local(options, directory_groups, process, start_time)
    else:
        return train_execute(options, argv, directory_groups)


def run(args):
    runs = split_training_blocks(args)

    callbacks = []
    for training_run in runs:
        c = train(training_run)
        if c is not None:
            callbacks.extend(c)

    callback_param = {}
    for callback in callbacks:
        callback(callback_param)

    if (SLURM_SUMMARIZE_KEY in callback_param
            and len(callback_param[SLURM_SUMMARIZE_KEY]) > 0):
        submit_slurm_summarize(callback_param[SLURM_SUMMARIZE_KEY])


if __name__ == "__main__":
    run(sys.argv[1:])
