#!/bin/sh
# PCP QA Test No. 1626
# pmproxy metrics
#
# Copyright (c) 2021-2022 Red Hat.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check

_check_key_server
which curl >/dev/null 2>&1 || _notrun needs curl

_cleanup()
{
    cd $here
    echo "=== pmproxy.log ===" >>$seq_full
    cat $PCP_LOG_DIR/pmproxy/pmproxy.log >>$seq_full
    _restore_config $PCP_SYSCONF_DIR/pmproxy/pmproxy.options
    if $pmproxy_was_running
    then
	echo "Restart pmproxy ..." >>$seq_full
	_service pmproxy restart >>$seq_full 2>&1
	_wait_for_pmproxy
    else
	echo "Stopping pmproxy ..." >>$seq_full
	_service pmproxy stop >>$seq_full 2>&1
	_wait_pmproxy_end
    fi
    $sudo rm -rf $tmp $tmp.*
}

_pmproxy_mainpid()
{
    $PCP_PS_PROG $PCP_PS_ALL_FLAGS | \
    $PCP_AWK_PROG '$8 ~ "'$PCP_BINADM_DIR'/pmproxy" {print $2}'
}

_probe_val()
{
    pminfo -f $1 >$tmp.probe_val 2>&1
    [ $? -ne 0 ] && echo pminfo $1 FAILED, result `cat $tmp.probe_val` && exit
    awk '/value/ {print $NF}' $tmp.probe_val
    rm -f $tmp.probe_val
}

status=1	# failure is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15

pmproxy_was_running=false
[ -f $PCP_RUN_DIR/pmproxy.pid ] && pmproxy_was_running=true
echo "pmproxy_was_running=$pmproxy_was_running" >>$seq_full

# Need to make sure -A is not included in the pmproxy options
#
_save_config $PCP_SYSCONF_DIR/pmproxy/pmproxy.options
echo "# Installed by PCP QA test $seq on `date`" >$tmp.options
sed <$PCP_SYSCONF_DIR/pmproxy/pmproxy.options >>$tmp.options \
    -e '/-A/s/^/#/' \
# end
$sudo cp $tmp.options $PCP_SYSCONF_DIR/pmproxy/pmproxy.options

# real QA test starts here
status=0

# need a fresh pmproxy service
if ! _service pmproxy stop >/dev/null 2>&1; then _exit 1; fi
_wait_pmproxy_end || _exit 1
if ! _service pmproxy start 2>&1; then _exit 1; fi | _filter_pmproxy_start
_wait_for_pmproxy || _exit 1

echo == wait for pmproxy server metrics
_wait_for_pmproxy_metrics || exit

echo === check pmproxy.pid
val=`_probe_val pmproxy.pid`
pid=`_pmproxy_mainpid`
if [ "$pid" -eq "$val" ]; then :; else echo FAIL pid=$pid val=$val && exit; fi

echo === check initial pmproxy.map metrics
for m in instance label metric context; do
    [ `_probe_val pmproxy.map.$m.size` -eq 0 ] && continue
    echo FAILED pmproxy.map.$m.size expected to be zero && exit
done

echo "=== start the metrics timer with a /metrics RESTAPI call"
val=`curl -Gs 'http://localhost:44322/metrics?names=pmproxy.pid' |\
     $PCP_AWK_PROG '/^pmproxy_pid/ {print $NF}'`
[ "$pid" -ne "$val" ] && echo FAIL RESTAPI fetch for pmproxy.pid && exit

echo "=== wait for the maps to be updated"
count=0
while true; do
    sz=`_probe_val pmproxy.map.metric.size`
    [ "$sz" -gt 0 ] && break
    count=`expr $count + 1`
    if [ $count -gt 20 ]
    then
	echo FAILED sz=\"$sz\" after $count iterations
	pminfo -f pmproxy >>$seq_full
	$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mproxy( |$)' >>$seq_full
	pminfo -f pmcd.agent.status >>$seq_full
	exit
    fi
    sleep 1
done
echo "namespace metrics found after $count iterations" >>$seq_full

echo === pmproxy.map size metrics should now be nonzero
for m in instance label metric context; do
    sz=`_probe_val pmproxy.map.$m.size`
    [ "$sz" -gt 0 ] && continue
    echo FAILED pmproxy.map.$m.size is \"$sz\" but expected to be non-zero
    exit
done

echo === check pmproxy cpu counters
total=`_probe_val pmproxy.cpu.total`
user=`_probe_val pmproxy.cpu.user`
sys=`_probe_val pmproxy.cpu.sys`
[ "$user" -eq 0 ] && echo FAIL pmproxy.cpu.user is zero
[ "$sys" -eq 0 ] && echo FAIL pmproxy.cpu.sys is zero
[ "$total" -eq 0 ] && echo FAIL pmproxy.cpu.total is zero

echo "=== wait for the discovery metrics to appear"
count=0
while true; do
    partial=`_probe_val pmproxy.discover.metadata.partial_reads`
    [ "$partial" = 0 ] && break
    count=`expr $count + 1`
    if [ $count -gt 20 ]
    then
	echo FAILED partial=\"$partial\" after $count iterations
	pminfo -f pmproxy >>$seq_full
	$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mproxy( |$)' >>$seq_full
	pminfo -f pmcd.agent.status >>$seq_full
	exit
    fi
    pmstore pmproxy.control.reload 1 >>$seq_full 2>&1
    sleep 1
done
echo "discovery metrics found after $count iterations" >>$seq_full

echo === check for discovery partial metadata reads
partial=`_probe_val pmproxy.discover.metadata.partial_reads`
if [ "$partial" != 0 ]
then
    echo FAIL \"$partial\" partial reads, should be zero
    pminfo -f pmproxy >>$seq_full
    $PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mproxy( |$)' >>$seq_full
    pminfo -f pmcd.agent.status >>$seq_full
    exit
fi

echo === check maxrss and datasz values
for m in maxrss datasz; do
    val=`_probe_val pmproxy.mem.$m`
    [ "$val" -gt 0 ] && continue
    echo FAIL pmproxy.mem.$m is \"$val\", should be non-zero && exit
done

echo === check maxrss doesnt grow after 100 basic restapi requests
start_maxrss=`_probe_val pmproxy.mem.maxrss`
for n in `seq 1 100`; do
    curl -Gs 'http://localhost:44322/metrics?names=kernel.all.load' >/dev/null 2>&1
done
finish_maxrss=`_probe_val pmproxy.mem.maxrss`
growth=`expr $finish_maxrss - $start_maxrss`
[ $growth -gt 0 ] && echo start=$start_maxrss finish=$finish_maxrss, growth=$growth >>$seq_full
_within_tolerance "maxrss growth after 100 /metrics calls" $finish_maxrss $start_maxrss 50% -v

# sleep to avoid systemd StartLimitIntervalSec limits
sleep 4
exit
