#!/bin/bash

#
# Copyright 2015, 2016, International Business Machines
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#
# This testcase processes significant interrupt stress. Run that for a day
# and you will know if your device driver and software can surive sudden
# abborts while running a lot of interrupt stress.
#
# Start N echos and kill them after a couple of seconds.
#

card=0
tools_dir=tools
verbose=0
iterations=100000
processes=160
killtimeout=2
preload=1
runpids=""
polling=""
count=""
tracing=0
trace_file="genwqe_parallel_echo.log"
PLATFORM=`uname -p`
start_time=`date`
do_build=0

# Set default accelerator based on platform we are running on
if [ ${PLATFORM} == "ppc64le" ]; then
    accelerator=CAPI
else
    accelerator=GENWQE
fi

function usage() {
    echo "Usage:"
    echo "  genwqe_parallel_echo"
    echo "     -A <accelerator> GENWQE|CAPI"
    echo "     -C <card>        card to be used for the test"
    echo "     -c <count>       send <count> echos and than stop"
    echo "     -i <iterations>  repeat  multiple times for more testing"
    echo "     -p <processes>   how many processed in parallel"
    echo "     -k <seconds>     kill timeout"
    echo "     -l <N>           send <N> echos in one shot N <= 64"
    echo "     -P               run echo in experimental polling mode (CAPI only)"
    echo "     -T               start traces (CAPI only)"
    echo "     -t <dir>         directory where the tools are located"
    echo "     -b               build the code before running the test"
    echo
    echo "Example:"
    echo "  Repro the CAPI bitstream interrupt loss problem:"
    echo "    ./scripts/parallel_echo.sh -ACAPI -C0 -i1000 -p160 -k3"
    echo
}

function start_job {
    # echo "Starting: $*"
    echo "$*" > echo_$s.cmd

    exec $* $parms &
    newpid=$!
    # echo "NewPID:   $newpid"
    runpids=$runpids" "$newpid
    # echo "RunPIDs:  $runpids"
}

function stop_jobs {
    echo "Running:   "`jobs -rp`
    echo "Expected: ${runpids}"
    kill -SIGKILL `jobs -rp`
    wait
    echo "Still running: "`jobs -rp`
    runpids=""
}

function cleanup {
    echo "Stopping all jobs ..."
    stop_jobs
    sleep 1
    echo "done"
    stop_cxl_traces
    exit 0
}

function start_cxl_traces {
    if [ ${accelerator} == "CAPI" -a ${tracing} -eq 1 ]; then
	echo "Starting CXL tracing ...";
	sudo sh -c 'echo 1 > /sys/kernel/debug/tracing/events/cxl/enable';
    fi
}

function stop_cxl_traces {
    if [ ${accelerator} == "CAPI" -a ${tracing} -eq 1 ]; then
	echo "Stopping CXL tracing ...";
	sudo sh -c 'echo 0 > /sys/kernel/debug/tracing/events/cxl/enable';
    fi
}

function collect_cxl_traces {
    if [ ${accelerator} == "CAPI" -a ${tracing} -eq 1 ]; then
	echo "Collect CXL traces ...";
	sudo sh -c 'cat /sys/kernel/debug/tracing/trace_pipe > $trace_file';
    fi
}
  
trap cleanup SIGINT
trap cleanup SIGKILL
trap cleanup SIGTERM

while getopts "TPA:C:c:p:i:k:l:t:bh" opt; do
    case $opt in
	A)
	accelerator=$OPTARG;
	;;
	C)
	card=$OPTARG;
	;;
	c)
	count="-c $OPTARG";
	;;
        i)
        iterations=$OPTARG;
        ;;
        p)
        processes=$OPTARG;
        ;;
	k)
	killtimeout=$OPTARG;
	;;
	l)
	preload=$OPTARG;
	;;
	T)
	tracing=1;
	;;
	P)
	polling="-p"
	;;
        h)
        usage;
        exit 0;
        ;;
	b)
	do_build=1;
	;;
	t)
	tools_dir=$OPTARG;
	;;
        \?)
        echo "Invalid option: -$OPTARG" >&2
        ;;
    esac
done

function test_echo ()
{
    ### Start in background ...
    echo "Starting genwqe_echo in the background ... "
    for s in `seq 1 $processes` ; do
	start_job $tools_dir/genwqe_echo -A ${accelerator} -C ${card} \
	    -l ${preload} ${count} -f ${polling} \
	    > echo_$s.stdout.log 2> echo_$s.stderr.log
    done
    echo "ok"

    if [ ${killtimeout} -ne -1 ]; then
	echo "Waiting ${killtimeout} seconds ..."
	for s in `seq 0 ${killtimeout}` ; do
	    sleep 1;
	    echo -n "."
	done
	echo " ok"
	echo "Sending SIGKILL to all ... "
	stop_jobs
	echo "ok"
    else
	echo "Skipp killing processes but wait until they terminate ..."
    fi
}

# Check if we have to do a build
if [ ${do_build} -eq 1 ]; then
echo "Build code ..."
make -s -j32 || exit 1
fi

echo "********************************************************************"
echo "Parallel echo TEST for ${accelerator} card ${card} starting ${processes}"
echo "********************************************************************"
echo

echo "********************************************************************"
echo "Hardware Version"
echo "********************************************************************"
echo
$tools_dir/genwqe_echo -A ${accelerator} -C ${card} --hardware-version

echo "********************************************************************"
echo "Remove old logfiles ..."
echo "********************************************************************"
echo
rm -f echo_*.cmd echo_*.stdout.log echo_*.stderr.log

start_cxl_traces

for i in `seq 1 ${iterations}` ; do

    echo -n "(1) Check if card is replying to an echo request ($i) ... "
    date

    $tools_dir/genwqe_echo -A ${accelerator} -C ${card} -i0 -c5
    if [ $? -ne 0 ]; then
	echo "Single echo took to long, please review results!"
	collect_cxl_traces
	stop_cxl_traces
	exit 1
    fi

    echo "(2) Perform massive interrupt stress and killing applications ..."
    test_echo;

    echo "(3) Check logfiles for string \"err\" ..."
    grep err echo_*.stderr.log
    if [ $? -ne 1 ]; then
	echo "Found potential errors ... please check logfiles"
	collect_cxl_traces
	stop_cxl_traces
	exit 2
    fi

    echo "(4) Check if card is still replying to an echo request ..."
    $tools_dir/genwqe_echo -A ${accelerator} -C ${card} -i0 -c5
    if [ $? -ne 0 ]; then
	echo "Single echo took to long, please review results!"
	collect_cxl_traces
	stop_cxl_traces
	exit 3
    fi

    echo "(5) Remove old logfiles ..."
    rm -f echo_*.cmd echo_*.stdout.log echo_*.stderr.log

    echo "Running since ${start_time} until now `date` ($i)"
    echo
done

stop_cxl_traces
exit 0
