#!/usr/bin/env perl

use strict;
use warnings;
use utf8;
use feature qw{ postderef say signatures state };
no warnings qw{ experimental::postderef experimental::signatures };
use Digest::SHA qw(sha256_hex);

use Data::Dumper;
use Cwd;
use File::Basename;

eval "use Getopt::Long::Complete 'HelpMessage'";

if($@) {
    use Getopt::Long 'HelpMessage';
}

# A runner for executing stuff, either on SLURM or using GNU-Parallel
package Runner;

sub new {
    my $type = shift;
    my $self = {};
    return bless $self, $type;
}

sub mysay($msg) {
    say $msg;
}

sub produce() {

}

package main;

# https://stackoverflow.com/a/3002214
Getopt::Long::Configure("pass_through");

my $enable_tmpout = 0;
my $fullpaths = 0;
my $gnuparallel = 0;
my $norunlim = 0;
my $partition = "";
my $waitflag = 0;

GetOptions(
    'help|h'             => sub { HelpMessage(0) },
    'executable|e=s'     => \   my $executable,
    'problem|p=s{1,}'    => \   my @problems,
    'partition=s'        => \   my $partition,
    'glob|g=s{1,}'       => \   my @problemglobs,
    'name|n=s'           => \ ( my $name = "paraqs" ),
    'commit=s'           => \ ( my $commit = "UNDEFINED" ),
    'tmpout!'            => \$enable_tmpout,
    'fullpaths!'         => \$fullpaths,
    'gnuparallel!'       => \$gnuparallel,
    'cpus_per_task=n'    => \ ( my $cpus_per_task = 2 ),
    'runlim=s'           => \ ( my $runlim = "/usr/local/bin/runlim" ),
    'norunlim!'          => \$norunlim,
    'time=n'             => \ ( my $time = 700 ),
    'space=n'            => \ ( my $space = 100000 ),
    'wait!'              => \$waitflag,
    'nice=n'             => \ ( my $nice = 0 ),
) or HelpMessage(1);

my $runner = 0;

sub mylog($msg) {
    say STDERR $msg;
}

if(not $norunlim and -e $runlim) {
    $runlim = "$runlim --real-time-limit=\"$time\" --space-limit=\"$space\" -p";
    mylog "Runlim active! Command: $runlim";
} else {
    # Deactivate runlim
    $runlim = "";
    mylog "Runlim inactive!";
}

die "Require an executable to run!" unless $executable;
die "Require a name!" unless $name;

sub check_exists_command { 
    my $check = `sh -c 'command -v $_[0]'`; 
    return $check;
}

die "Neither SLURM usable nor --gnuparallel defined!" unless check_exists_command("sbatch") or $gnuparallel;

if($commit eq "UNDEFINED" and check_exists_command("git")) {
    my $dirname = dirname($executable);
    $commit = `cd $dirname; git log --pretty=tformat:"%h" -n1; cd - > /dev/null`;
    chop $commit;
    mylog "Commit: $commit"
}

my $additional_args = join(" ", @ARGV);
mylog "Giving arguments to $executable: $additional_args";

# Outputs will be saved in the local directory as $name.log
my $output = cwd() . "/" . $name . ".log";

# Calling slurm from perl was inspired from here:
# https://hpc.nih.gov/docs/job_dependencies.html

# Generate jobs array
@problems = split(/\s+/,join(' ',@problems));

if(@problemglobs) {
    mylog "Problem globs specified! Resolving $#problemglobs globs.";
    foreach(@problemglobs) {
        push(@problems, glob "$_")
    }
}

if($#problems == 0) {
    mylog "No problems found! Aborting batch call.";
    exit(-1);
}

my $problemname_script = "";
if($fullpaths) {
    $problemname_script = << "END";
problempath=\$(realpath "\$problem"); problemname=\${problempath//\//__};
END
} else {
    $problemname_script = << "END";
problemname=`basename "\$problem"`;
END
}

package SLURMRunner;
our @ISA = qw( Runner );

sub new {
    my $type = shift;
    my $self = Runner->new;
    main::mylog "Using SLURMRunner.";
    return bless $self, $type;
}

sub mysay($msg) {
    say "c $msg";
}

sub produce($self) {
    my $common_header = << "END";
export ASAN_OPTIONS=print_stacktrace=1
export UBSAN_OPTIONS=print_stacktrace=1

# Array to store all declared problems to be solved later.
declare -A problems;
END

    my $i = 0;
    foreach(@problems) {
        my $n = $_;
        if( -e "$n") {
            $common_header .= "problems[$i]=\"$n\"\n";
            $i = $i + 1;
        } else {
            main::mylog "Cannot queue problem $n as file does not exist!";
        }
    }

    my $array_arg = $i - 1;

    my $client_job = << "END";
#!/bin/bash
$common_header
problem=\${problems[\$SLURM_ARRAY_TASK_ID]};

$problemname_script

logfile="`pwd`/$name-\$problemname.log";

echo "c submit.pl: name:  $name" > "\$logfile";
echo "c submit.pl: task:  \$SLURM_ARRAY_TASK_ID" >> "\$logfile";
echo "c submit.pl: host:  `cat /etc/hostname`" >> "\$logfile";
echo "c submit.pl: start: `date`" >> "\$logfile";
echo "c submit.pl: commit: $commit" >> "\$logfile";
echo "c submit.pl: additional args: $additional_args" >> "\$logfile";
echo "c submit.pl: executable: $executable" >> "\$logfile";
END

    if($enable_tmpout) {
        $client_job .= << "END";
tmplog="/tmp/slurmtools_submitpl_\$SLURM_ARRAY_JOB_ID.\$SLURM_ARRAY_TASK_ID.log";
echo "c Running job $name with problem \$problem of name \$problemname!" >> "\$logfile" but saving log first to "\$tmplog"! >> "\$logfile";
$runlim \"$executable\" \"\$problem\" \\
    $additional_args &>> "\$tmplog";
exitstatus=\$?;
tmplogsizebytes=\$(du -b "\$tmplog");
tmplogsizehr=\$(du -h "\$tmplog");
tmplogcopytime="\$( TIMEFORMAT='%lU'; time ( cat "\$tmplog" >> "\$logfile" ) 2>&1 1>/dev/null )";
echo "c submit.pl: temporary log size bytes: \$tmplogsizebytes" >> "\$logfile";
echo "c submit.pl: temporary log size hr \$tmplogsizehr" >> "\$logfile";
echo "c submit.pl: temporary log copy time: \$tmplogcopytime" >> "\$logfile";
rm "\$tmplog"
END
    } else {
        $client_job .= << "END";
echo "c Running job $name with problem \$problem of name \$problemname!" >> "\$logfile";
$runlim \"$executable\" \"\$problem\" \\
    $additional_args &>> "\$logfile";
exitstatus=\$?;
END
    }

    $client_job .= << "END";
echo "c submit.pl: exit status: \$exitstatus" >> "\$logfile";
END

    main::mylog "Giving the following to slurm:";
    main::mylog "$client_job";

    my $tmpfile = "./.__job_tmp.txt";

    open(TMPFH, '>', $tmpfile) or die $!;
    print TMPFH $client_job;
    close(TMPFH);

    if($partition ne "") {
	$partition = "-p ".$partition;
    }
    if($waitflag) {
        $waitflag = "--wait"
    } else {
        $waitflag == ""
    }
    my $cmd = "cat $tmpfile | sbatch --nice=$nice $partition --parsable -J $name -c $cpus_per_task --array=0-$array_arg --output=/dev/null --error=/dev/null $waitflag";
    main::mylog "sbatch command: $cmd";
    my $client_jobnum = `$cmd`;
    chop $client_jobnum;

    main::mylog "Queued job $client_jobnum";

    unlink $tmpfile;
}

package GNUParallelRunner;
our @ISA = qw( Runner );

sub new {
    my $type = shift;
    my $self = Runner->new;
    main::mylog "Using GNUParallelRunner.";
    return bless $self, $type;
}

sub mysay($msg) {
    say "# $msg";
}

sub produce($self) {
    my $problemshash = main::sha256_hex(@problems);
    my $bashfuncname = "slurmtools_submit_func_$problemshash";
    main::mylog "Computed problems-hash $problemshash - function to be created is called $bashfuncname";

    say "#/usr/bin/env bash";

    # Print function signature that is going to be used for gnu parallel.
    say "function $bashfuncname {";
    say "    problem=\$1;";
    say "    taskid=\"\$((\$2-1))\"";
    say "    export ASAN_OPTIONS=print_stacktrace=1";
    say "    export UBSAN_OPTIONS=print_stacktrace=1";
    say "    $problemname_script";
    say "    echo \"c submit.pl: name:  $name\";";
    say "    echo \"c submit.pl: task:  \$taskid\";";
    say "    echo \"c submit.pl: host:  `cat /etc/hostname`\";";
    say "    echo \"c submit.pl: start: `date`\";";
    say "    echo \"c submit.pl: commit: $commit\";";
    say "    echo \"c submit.pl: additional args: $additional_args\";";
    say "    echo \"c submit.pl: executable: $executable\";";
    say "    echo \"c Running job $name with problem \$problem of name \$problemname!\"";
    say "    $runlim $executable $additional_args \$1;";
    say "    echo \"c submit.pl: exit status: \$exitstatus;\"";
    say "}";
    say "export -f $bashfuncname;";

    # The gnu parallel part
    my $parallelcoreutil = 1 / $cpus_per_task * 100;
    say "parallel -j $parallelcoreutil% \$@ --trc $name-{}.log $bashfuncname {} {#} \"&>\" $name-{/}.log << EOF";
    foreach(@problems) {
        say $_;
    }
    say "EOF";
}

package main;

$runner = new SLURMRunner() if !$gnuparallel;
$runner = new GNUParallelRunner() if $gnuparallel;
die "No runner specified! Have to either use SLURM or GNU Parallel." unless $runner;

$runner->produce();

=head1 NAME

submit.pl - Execute a job on slurm without networked tasks with a list of problems!

=head1 SYNOPSIS

=head2 ARGUMENTS

  --executable,-e   Path to executable.
  --problem,-p      Path to problem(s) to execute. Can be supplied multiple times 
                    and also as " " separated list and defines $name for a run.
                    May also be supplied using `` backtick syntax (just a recommendation),
                    so that bash evaluates the command that then gives the paths.
                    Example for that: -p "`ls -d /some/path/*.cnf`"
  --name,-n         Name of this run to show in SLURM.
  --commit          Commit that should be included in log files. Received by default if the
                    executable is contained in some git repository and no commit was set via
                    CLI.
  --tmpout          Enable logging program output to /tmp/.... and merge the output together
                    with the regular log file in ./....log after the program finished, so
                    eventual NFS inconsistencies do not influence the program being
                    executed.
  --cpus_per_task   Number of (logical) CPUs a single instance should get. If hyperthreading
                    is on, using double the desired physical core count is strongly
                    advised.
  --runlim          Path to the runlim binary to use. Runlim is used to limit runtime
                    and memory consumption of executable instances.
                    Project page: http://fmv.jku.at/runlim/
  --time            Maximum wall-clock-time time (in seconds) until a timeout is reached
                    and runlim should quit the process. Requires runlim.
  --space           Maximum RAM (in MB) until a memout is reached
                    and runlim should quit the process. Requires runlim.
  --nice            Sets the nice value for the SLURM scheduler. Default is 0. Higher nice
                    values reduce priorities. This is good when submitting many long running
                    jobs and one wants to yield to others faster.
                    and runlim should quit the process. Requires runlim.
  --partition       Sets the slurm partition to run the array task on.
  --wait            If using slurm, waits until the slurm job is finished (as if passing the
                    --wait flag to sbatch).
  --help,-h         Produce this help message.
  --fullpaths       Activate string replace to compute full paths of problems instead
                    of just their filenames. Replaces / by _ in problemname.
  --gnuparallel     Transform the output to produce a bash script that runs GNU Parallel.
                    Very useful for batch jobs that may be executed on other machines which
                    are not tightly dependant on timings, but should still maybe get the
                    same output.

=head2 DETAILS

All other arguments are passed to the executable directly.

Produces a single .log file that combines all outputs of the task in the current
directory with $name.log as filename.

Log files may be parsed afterwards using other tools.

This script replaces the alternative of using multiple scripts for every run. The idea
is to have a single script responsible for all the actions. One may also use this script
inside some bash for loop in order to try out different configurations in one go, 
see examples.

=head2 EXAMPLES

Run depqbf on the FMV cluster on a directory of QDIMACS files:

    /home/max/montecube-benchmarks/submit.pl -e /home/max/paracooba/test/depqbf-version-6.03/depqbf -n orgsynth --time 3600 -p "`ls -d /home/max/montecube-benchmarks/2-player_benchmarks_qdimacs/*.qdimacs`" --cpus_per_task 2 > task.txt

Run paracooba on a list of QDIMACS files while stepping over multiple parameter values:

    for i in 10 12 16; do mkdir paraqs__td$i; cd paraqs__td$i && /home/max/paracooba/test/ParaqsEval/submit.pl -e /home/max/paracooba/src/build/paraqs -p "`cat /home/max/OrganicSynthesis/qdimacs-list`" --cpus_per_task 32 --worker 16 --time 3600 -n orgsynth --tree-depth $i > task.txt && cd ..; done

=head1 VERSION

0.2

=cut
