#!/bin/bash
#SBATCH --time=04:00:00
#SBATCH --partition=main
#SBATCH --output=slurm_out.%A
#SBATCH --mem=880GB
#SBATCH --open-mode=append

nbasis=4000

dpath=data/nbasis_$(printf '%06d' $nbasis)_N${SLURM_NNODES}_c$(printf '%04d' $SLURM_NTASKS)
mkdir -p $dpath

log() {
    echo $(date '+[%Y-%m-%d %H:%M:%S]') $* >> $dpath/benchmark.log
}

write_header() {
    printf '# %8s %8s %8s %16s %16s %8s\n' "Basis" "Cores" "Nodes" "Wall-time(s)" "Memory(MiB)" "Type" > $dpath/result.log
}

log "Start job"
write_header

module load gpaw/frozen

export OMP_NUM_THREADS=1
export RHODENT_RESPONSE_MAX_MEM="100000"  # Attempt limiting response calculator to 100GB

# Generate the data
python generate_data.py $dpath $nbasis || exit 1

log "Generated test data"
log $(ls -lah $dpath/ksd.ulm)
log $(ls -lah $dpath/wfs.ulm)

srun bash -c "
log() {
    echo \$(date '+[%Y-%m-%d %H:%M:%S]') \$* >> $dpath/benchmark.log
}

write_result() {
    printf '%10s %8s %8s %16s %16s %8s\n' \$* >> $dpath/result.log
}

[ \"\$SLURM_PROCID\" == 0 ] && log == Starting benchmark with $SLURM_NTASKS ==
# Start memory poll in background for the first rank of each node
if [ \$((\$SLURM_PROCID % 128)) == 0 ]; then
    ./poll_memory_usage > $dpath/memory_\$(hostname).log &
    pid=\$!
    log Gotten pid \$pid
fi

SECONDS=0
python run_benchmark_single.py $dpath > $dpath/rhodent.log
code=\$?
elapsed=\$SECONDS
[ \$code == 0 ] && status=Passed || status=Failed
[ \"\$SLURM_PROCID\" == 0 ] && log \$status in \$elapsed seconds

# Clean up
if [ \$((\$SLURM_PROCID % ($SLURM_NTASKS / $SLURM_NNODES))) == 0 ]; then
    kill \$pid
    log Killed memory log
fi

if [ \$SLURM_PROCID == 0 ]; then
    # Sum memory logs. Hard coded up to four files
    paste $dpath/memory_*.log | grep -v '#' | awk '{ print \$1, \$2 + \$5 + \$8 + \$11, \$3 + \$6 + \$9 + \$12 }' > $dpath/memory.log
    startmem=\$(cat $dpath/memory.log | grep -v '#' | head -n 1 | awk '{print \$3}')
    mem=\$(awk -v max=0 'NR>1 {if(\$3>max){time=\$1; max=\$3}}END{print max}' $dpath/memory.log)
    mem=\$(echo \$mem-\$startmem | bc)
    memtime=\$(awk -v max=0 'NR>1 {if(\$3>max){time=\$1; max=\$3}}END{print time}' $dpath/memory.log)
    [ \$code == 0 ] && write_result $nbasis $SLURM_NTASKS $SLURM_NNODES \$elapsed \$mem single
    log \"Max memory \$mem MiB at \$memtime (idle memory is \$startmem)\"
fi
"

# Clean up
rm -rf $dpath/pulserho
rm -rf $dpath/ksd.ulm $dpath/wfs.ulm
