#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Converts the state format of samples in the given sample files to a target format
"""
from __future__ import print_function

import disable_external_dependencies
disable_external_dependencies.disable()

import tools
import tools.parsing as apt
import datetime
import itertools
import os
import sys


from src.training import parser, parser_tools

from src.training.bridges import LoadSampleBridge
from src.training.samplers import IterableFileSampler
from src.training.samplers import DirectorySampler, GeneratorSampler
from src.training.bridges.sampling_bridges import StateFormat
from src.training.misc import DomainProperties, StreamContext, StreamDefinition

import argparse
import os
import re
import shlex
import subprocess
import sys
import logging
log = logging.getLogger()

if sys.version_info < (3,):
    import subprocess32 as subprocess


    def decoder(s):
        return s.decode()
else:
    import subprocess


    def decoder(s):
        return s




DATA_STREAM_HINT = (
    "If the stream definition requires a problem file path resp. the temporary"
    "directory, define the stream after the problem file resp. tmp dir is "
    "defined."
    "The  available streams can be checked in training.misc.stream_contexts.py."
    " If not given,  then 'gzip(suffix=.data.gz)' is used as default."
)


pconvert = argparse.ArgumentParser(
    "Use positional arguments(state_format, tasks) FIRST not at the end.")
pconvert.add_argument("--state-format", choices=StateFormat.get_formats(),
                      type=StateFormat.get,
                      help="Format into which the files shall be converted",
                      required=True)
pconvert.add_argument("--input-streams", type=apt.stream_definition,
                      action="append", default=[], required=True,
                      help="input stream. %s" % DATA_STREAM_HINT)
pconvert.add_argument("--output-streams", type=apt.stream_definition,
                      action="append", default=[], required=True,
                      help="output stream. %s" % DATA_STREAM_HINT)
pconvert.add_argument("--fields", type=str, nargs="+", action="store",
                      default=None,
                      help=("List all fields of the data which shall be"
                            "converted in the order they shall appear(if the"
                            " order is relevant for you) (if not given all "
                            "fields are converted)"))
pconvert.add_argument("-tmp", "--temporary-folder", type=apt.tmpdir,
                      action="store", default=None,
                      help="Folder to store temporary files")

preparser = argparse.ArgumentParser(
    "Parser for arguments which shall be parsed prior to the main parsing. "
    "This includes slurm stuff, but also finding all task files if they are "
    "given in a descriptive manner.")

preparser.add_argument("--task-files", type=apt.istask, nargs="+",
                       default=[],
                       help="Problem files associated to the data files "
                            "which shall be converted")
# Arguments relevant for slurm
preparser.add_argument("--slurm", action="store_true",
                        help="Submits the individual problems via slurm to be "
                         "run in parallel")
preparser.add_argument("--slurm-script", type=apt.isbash,
                       default=tools.paths.SCRIPT_SLURM_CONVERT,
                       help="Slurm script used to submit the jobs in parallel")
preparser.add_argument("--slurm-dependency", default=None,
                       help="Requires '--slurm'. Adds a dependency to the "
                            "slurm call.")

# Argument relevant to detect all task files to process
preparser.add_argument("--directory", type=apt.isdir,
                       help="Directory containing tasks for which data shall "
                            "be converted.")
preparser.add_argument("--directory-filter", type=re.compile, action="append",
                       default=[],
                       help="List of regex which any directory has to match "
                            "s.t. the script will look into it for tasks.")
preparser.add_argument("--task-filter", type=re.compile, action="append",
                       default=[],
                       help="List of regex which any tas has to match "
                            "s.t. the script will consider them.")


def check_depth(_, named_args):
    min_depth = named_args.get("min_depth")
    max_depth = named_args.get("max_depth")
    if not (min_depth is None or max_depth is None or min_depth <= max_depth):
        raise ValueError("--directory-max-depth has to be greater than or "
                         "equal to --directory-min-depth")


preparser.add_argument("--directory-max-depth", default=None,
                       type=apt.restricted_type(
                           apt.named_type(apt.int_zero_positive,
                                         "max_depth"),
                           check_depth),
                       help="Maximum recursion depth for searching tasks in "
                            "the given '--directory' (0 == only in directory "
                            "itself).")
preparser.add_argument("--directory-min-depth", default=None,
                       type=apt.restricted_type(
                           apt.named_type(apt.int_zero_positive,
                                         "min_depth"),
                           check_depth),
                       help="Minimum recursion depth for searching tasks after "
                            "which tasks can be selected (0 is "
                            "'--directory' itself).")
preparser.add_argument("--dry", action="store_true",
                       help="Perform a dry run. Does not execute the "
                            "conversion, but shows what it would convert.")


PREVIOUS_DOMAIN_PROPERTIES_FILE = None
PREVIOUS_DOMAIN_PROPERTIES = None


def domain_property_loader(_, file_task):
    global PREVIOUS_DOMAIN_PROPERTIES_FILE, PREVIOUS_DOMAIN_PROPERTIES

    file_dp = os.path.join(os.path.dirname(file_task),
                           "domain_properties.json")
    if file_dp != PREVIOUS_DOMAIN_PROPERTIES_FILE:
        if os.path.isfile(file_dp):
            PREVIOUS_DOMAIN_PROPERTIES = DomainProperties.sload(file_dp)
            PREVIOUS_DOMAIN_PROPERTIES_FILE = file_dp
        else:
            PREVIOUS_DOMAIN_PROPERTIES_FILE = None
            PREVIOUS_DOMAIN_PROPERTIES = None
    return PREVIOUS_DOMAIN_PROPERTIES


def run_convert(task_files, options, dry):
    if dry:
        print(options)
        print(task_files)
        return

    lsb = LoadSampleBridge(
        streams=StreamContext(streams=options.input_streams),
        write_streams=StreamContext(streams=options.output_streams),
        fields=options.fields,
        format=options.state_format,
        prune=False, fprune=None,
        skip=True, skip_magic=False,
        domain_properties=None,
        domain_properties_loader=domain_property_loader)
    sampler = IterableFileSampler(lsb, task_files)

    sampler.initialize()
    sampler.sample()
    sampler.finalize()


def run_slurm(options, args):

    command = (["sbatch"] +
               tools.slurm.get_dependency_arguments(options.slurm_dependency) +
               tools.slurm.get_array_arguments(len(options.task_files)) +
               [options.slurm_script] +
               options.task_files + args)
    if options.dry:
        print(command)
    else:
        subprocess.call(command)


def main(args):
    if any(x in args for x in ["-h", "--help", "h"]):
        preparser.print_help(sys.stdout)
        pconvert.print_help(sys.stdout)
    pre_options, args_convert = preparser.parse_known_args(args)

    # Validate that those arguments are correct too (s.t. we do not submit
    # invalid jobs to slurm.
    if pre_options.slurm:
        apt.ALL_TASKS.append("DummyEntry")
        conv_options = pconvert.parse_args(
             ["--temporary-folder", "."] + args_convert )
    else:
        conv_options = pconvert.parse_args(args_convert)

    # Detect additional task files by searching through he given directory
    if pre_options.directory is not None:
        pre_options.task_files = tools.misc.sort_nicely(set(
            pre_options.task_files +
            [os.path.abspath(x) for x in
             tools.misc.find_relevant_tasks(
                pre_options.directory, pre_options.directory_filter,
                pre_options.task_filter,
                pre_options.directory_min_depth,
                pre_options.directory_max_depth)]))

    if pre_options.slurm:
        run_slurm(pre_options, args_convert)
    else:
        run_convert(
            task_files=pre_options.task_files,
            options=conv_options,
            dry=pre_options.dry,
        )


if __name__ == '__main__':
    main(sys.argv[1:])
