#!/bin/sh
###############################################################################
##                                                                           ##
##          l  p  j   s  u  b  m  i  t  _  m  p  i  c  h                     ##
##                                                                           ##
##    sh script to generate and submit parallel LoadLeveler jobs.            ##
##                                                                           ##
##    Usage: lpjsubmit [-class c] [-group] g] [-wtime time] [-q] ntask       ##
##                     [LPJargs...]                                          ##
##                                                                           ##
##    written by Werner von Bloh, PIK Potsdam                                ##
##                                                                           ##
##    Last change: $Date:: 2015-09-07 15:09:33 +0200 (Mon, 07 Sep 201#$      ##
##    By         : $Author:: bloh                            $               ##
##                                                                           ##
###############################################################################

if [ $# -lt 1 ]
then
  echo >&2 Error: Number of tasks missing
  echo >&2 Usage: $0 [-class c] [-group g] [-wtime time] [-q] ntasks [args ...]
  exit 1
fi

if [ "$LPJROOT" = "" ]
then
  echo >&2 Error: environment variable LPJROOT is not set
  echo >&2 "Set by export LPJROOT=<path to lpjml directory>"
  exit 1
fi

if [ $1 = "-class" ]
then
  if [ $# -lt 2 ]
  then
    echo >&2 Error: class missing
    echo >&2 Usage: $0 [-class c] [-group g] [-wtime time] [-q] ntasks [args ...]
    exit 1
  fi
  shift 1
  class=$1
  shift 1
else
  class=short
fi

if [ $1 = "-group" ]
then
  if [ $# -lt 2 ]
  then
    echo >&2 Error: group missing
    echo >&2 Usage: $0 [-class c] [-group g] [-wtime time] [-q] ntasks [args ...]
    exit 1
  fi
  shift 1
  group=$1
  shift 1
else
  group=bios-x
fi

wtime=""
if [ $1 = "-wtime" ]
then
  if [ $# -lt 2 ]
  then
    echo >&2 Error: all clock time missing
    echo >&2 Usage: $0 [-class c] [-group g] [-wtime time] [-q] ntasks [args ...]
    exit 1
  fi
  shift 1
  wtime=$1
  shift 1
fi
if [ $1 = "-q" ]
then
 shift 1
 quiet="-q"
fi
if [ $# -lt 1 ]
then
  echo >&2 Error: Number of tasks missing
  echo >&2 Usage: $0 [-class c] [-group g] [-wtime time] [-q] ntasks [args ...]
  exit 1
fi

ntask=$1 # number of tasks
shift 1
args=$*  # runtime arguments for lpjml

# check, whether LPJ configuration is valid

if $LPJROOT/bin/lpjcheck $quiet $args ;
then
# yes, create LoadL job control file
  cat <<EOF >job.jcf
#!/bin/ksh 
###############################################################################
##                                                                           ##
##                     j  o  b  .  j  c  f                                   ##
##                                                                           ##
##  LoadLeveler JCF file for running an Intel MPI job on the CLME cluster    ##
##  at PIK                                                                   ##
##                                                                           ##
##  Automatically generated by lpjsubmit shell script                        ##
##                                                                           ##
##  Created: $(date +"%d.%m.%Y")                                                      ##
##                                                                           ##
###############################################################################
 
# @ job_type = parallel
# @ total_tasks = $ntask
# @ group = $group
# @ class = $class
EOF
if [ "$wtime" != "" ]
then
echo "# @ wall_clock_limit = " $wtime >>job.jcf
fi
cat <<EOF >>job.jcf
# @ comment = LPJmL Version $(cat $LPJROOT/VERSION) args: $args
# @ environment = COPY_ALL
# @ blocking = 8
# @ output = lpjml.\$(cluster).out
# @ error = lpjml.\$(cluster).err
# @ queue

llgetmachinelist  > hostlist.\$LOADL_STEP_ID
 
machine_count=\`cat hostlist.\$LOADL_STEP_ID  | wc -l\`

mpirun_ssh -ssh  -hostfile hostlist.\$LOADL_STEP_ID -np \$machine_count \$LPJROOT/bin/lpjml $args
rc=\$?
rm hostlist.\$LOADL_STEP_ID
exit \$rc
EOF
# submit job
  if llsubmit $quiet job.jcf ;
  then
    if [ "$quiet" != "-q" ]
    then
      llq -u $(whoami)
    fi
  else
    exit 1
  fi
else
  echo >&2 "Error in LPJ configuration, job not submitted"
  exit 1
fi
