#!/bin/sh

show_help() 
{
	echo "Use: sge_submit_workers [options] <servername> <port> <num-workers>"
	echo "where options are:"
	echo "  -a               Enable auto mode."
	echo "  -s               Run as a shared worker."
	echo "  -n <nodes/job>   Submit a job to multiple nodes."
	echo "  -N <name>        Preferred master name."
	echo "  -C <catalog>     Set catalog server to <catalog>. <catalog> format: HOSTNAME:PORT."
	echo "  -t <time>        Abort after this amount of idle time. (default=900s)"
	echo "  -p <parameters>  SGE qsub parameters."
	echo "  -h               Show this help message."
	echo "  -f               SSH port forwarding enabled (Lee-Ping's addition)"
	exit 1
}

arguments=""
use_auto=0
parameters="--exclusive"
port_forward=0
nodes_per_job=1
numworkers=1
cores=16

while getopts asfn:N:t:p:h opt 
do
	case "$opt" in
		a)  arguments="$arguments -a"; use_auto=1;;
		s)  arguments="$arguments -s";;
                n)  nodes_per_job="$OPTARG";;
		N)  arguments="$arguments -N $OPTARG";;
		C)  arguments="$arguments -C $OPTARG";;
		t)  arguments="$arguments -t $OPTARG";;
		p)  parameters="$parameters $OPTARG";;
		h)  show_help;;
 	        f)  port_forward=1;;
		\?) show_help;;
	esac
done

shift $(expr $OPTIND - 1)

if [ $use_auto = 0 ]; then
    if [ X$3 = X ]
    then
	show_help	
    fi
    host=$1
    port=$2
    count=$3
else
    if [ X$1 = X ]
    then
	show_help	
    fi
    host=
    port=
    count=$1
fi

worker=`which work_queue_worker 2>/dev/null`
if [ $? != 0 ]
then
	echo "$0: please add 'work_queue_worker' to your PATH."
	exit 1
fi

WORKER_DIR=$WORK/temp
mkdir -p $WORKER_DIR/${USER}-workers/$$
cd $WORKER_DIR/${USER}-workers/$$
cp $worker .

#====================================================#
#| Worker script has two layers.  This gives us     |#
#| the option of whether we should SSH back into    |#
#| the local node before running the worker, which  |#
#| allows us to surpass some buggy resource limits. |#
#====================================================#

cat >worker.sh <<EOF
#!/bin/bash
#----------------------------------------------------
# SLURM job script to run MPI applications on 
# TACC's Stampede system.
#
#----------------------------------------------------

#SBATCH -J worker.sh          # Job name
#SBATCH -o myjob.%j.out       # Name of stdout output file (%j expands to jobId)
#SBATCH -p gpu                # Queue name
#SBATCH -N $nodes_per_job     # Total number of nodes requested
#SBATCH -n $nodes_per_job     # Total number of mpi tasks requested
#SBATCH -t 24:00:00           # Run time (hh:mm:ss) - 1.5 hours

#SBATCH -A TG-MCB100057       # <-- Allocation name to charge job against

cd \$PWD
rm -rf /tmp/leeping/*

ssh \$HOSTNAME "$PWD/worker1.sh \$\$"
EOF

#=======================================#
#|   Create the second layer script.   |#
#| This actually launches the workers. |#
#=======================================#
cat >worker1.sh <<EOF
#!/bin/sh

# Limit core dump size.
ulimit -c 0

# This function makes the script kill itself if:
# 1) the second layer stops running (i.e. job deleted by scheduler)
# 2) there are no more workers (i.e. idle timeout)
waitkill(){
    while sleep 1 ; do 
        kill -0 \$1 2> /dev/null || break
        [ \$( ps xjf | grep work_queue_worker | grep -v grep | wc -l ) -gt 0 ] || break
    done
    kill -TERM -\$\$
};

# Go into the directory where the worker program is.
cd $PWD

# Set environment variables.
export OMP_NUM_THREADS=$cores
export MKL_NUM_THREADS=$cores
export CORES_PER_WORKER=$cores
#export _CONDOR_SCRATCH_DIR=$SCRATCH/$HOSTNAME
export _CONDOR_SCRATCH_DIR=/tmp/leeping

mkdir -p \$_CONDOR_SCRATCH_DIR

# Optional SSH port forwarding
if [ $port_forward == 1 ]; then
    if [ \`ps aux | grep $USER | grep $HOSTNAME | grep ServerAlive | grep $port | grep -v grep | awk '{print \$2}' | wc -l\` -eq 0 ] ; then
        ssh -o ServerAliveInterval=180 -N -f -L$port:localhost:$port $HOSTNAME
    fi
fi

waitkill \$1 &

# Actually execute the workers.
for i in \`seq $numworkers\`; do
    export CUDA_DEVICE=\$(( i - 1 ))
    if [ $port_forward == 1 ]; then
        ./work_queue_worker -d all -t 86400s $arguments localhost $port &
    else
        ./work_queue_worker -d all -t 86400s $arguments $host $port &
    fi
done

wait
EOF

chmod 755 worker.sh worker1.sh

for n in `seq 1 $count`
do
	sbatch $parameters worker.sh
	return_status=$?
done

exit $return_status
