#!/bin/bash

# Copyright (C) 2025 Barcelona Supercomputing Center
#
# This file is part of DMR.
#
# DMR is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License
# as published by the Free Software Foundation; version 2 only.
#
# DMR is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with DMR; if not, see <https://www.gnu.org/licenses/>.

#
# The job script queries for the original job until it is no longer in RUNNING state
# If it did not loop and sleep, it would terminate immediately, and if it did not
# query for the original job, it would end up unnecessarily idling after its termination.
# 
# This script originally used squeue to check if a job was visible to it, but due to some
# edge cases which caused premature termination, it now uses scontrol to check state explicitly.
#
# We also verify that SSH connections are ready, because the Slurm job can start running
# before this is the case. 
#

check_ssh_ring() {
  mapfile -t NODES < <(${CUSTOM_SLURM_PREFIX}scontrol show hostnames "$SLURM_JOB_NODELIST")
  N=${#NODES[@]}
  me=${SLURM_NODEID:-0}
  target="${NODES[$(( (me + 1) % N ))]}"
  waited=0

  while ! ssh \
      -o BatchMode=yes \
      -o ConnectionAttempts=1 \
      -o StrictHostKeyChecking=no \
      -o UserKnownHostsFile=/dev/null \
      -o ConnectTimeout=2 \
      "$target" true >/dev/null 2>&1; do
    sleep 1
    waited=$((waited + 1))
    if [ "$waited" -ge "$MAX_WAIT" ]; then
      echo "Timeout: $(hostname -s) -> $target" >&2
      exit 1
    fi
  done
}

export CUSTOM_SLURM_PREFIX=$1
PARENT_JOBID=$2
READY_COMMENT=$3
IGNORE_SSH=$4

if [[ -z "$PARENT_JOBID" ]] || { [[ -z "$READY_COMMENT" ]] && [[ "$IGNORE_SSH" != "1" ]]; }; then
  echo "Error: missing required arguments." >&2
  exit 1
fi

# First, ensure that all new nodes are ready to accept SSH connections 
# This is toggleable in DMR, because on some systems, or with some level
# of privilege, ssh is always allowed. Additionally, with Slurm4DMR, the
# check is redundant as we already have an allocation. (Also, "srun" inside
# a batch job fails with Slurm4DMR, for some unknown reason.)

if [[ "$IGNORE_SSH" != "1" ]]; then
  export MAX_WAIT=30
  export -f check_ssh_ring

  # Check that SSH connections are ready in a ring structure
  ${CUSTOM_SLURM_PREFIX}srun \
    --ntasks="${SLURM_JOB_NUM_NODES}" \
    --ntasks-per-node=1 \
    --kill-on-bad-exit=1 \
    /bin/bash -c check_ssh_ring

  ${CUSTOM_SLURM_PREFIX}scontrol update JobId=$SLURM_JOB_ID Comment="${READY_COMMENT}"
fi

# Second, query for the parent job and stay alive until it is dead
while true; do
  jobinfo=$(${CUSTOM_SLURM_PREFIX}scontrol show job $PARENT_JOBID 2>/dev/null)
  jobstate=$(echo "$jobinfo" | awk -F= '/JobState=/ {print $2; exit}' | awk '{print $1}')
  [[ -z "$jobinfo" || "$jobstate" == RUNNING ]] && sleep 15 || break
done