#!/bin/sh
# PCP QA Test No. 793
# pmlogger_check and pmlogger_daily with shell components in the dir field
# of the control file
#
# Copyright (c) 2015 Ken McDonell.  All Rights Reserved.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check

NEEDREWRITE=$PCP_LOG_DIR/pmlogger/.NeedRewrite

status=1	# failure is the default!
$sudo rm -rf $tmp $tmp.* $seq.full
trap "_cleanup; exit \$status" 0 1 2 3 15
signal=$PCP_BINADM_DIR/pmsignal

[ -f $NEEDREWRITE ] && $sudo mv $NEEDREWRITE $NEEDREWRITE.$seq

_stop_auto_restart pmlogger

_filter()
{
    sed \
	-e 's/\.\.\.*/[dots]/' \
	-e "s;$PCP_BINADM_DIR/pmlogger;pmlogger;g" \
	-e "s/$FULLLOCALHOST/LOCALHOST/g" \
	-e "s/$LOCALHOST/LOCALHOST/g" \
	-e "s/localhost/LOCALHOST/g" \
	-e "s/local:/LOCALHOST/g" \
	-e 's/control:[0-9][0-9]*]/control:LINE]/' \
	-e 's;/usr/var;/var;g' \
	-e '/^Roll .*\/NOTICES/d' \
	-e '/^Start .*\/NOTICES/d' \
	-e "s;$tmp;TMP;g" \
	-e '/Duplicate/d' \
	-e '/Roll [^ ]*\/NOTICES /d' \
	-e '/Start new [^ ]*\/NOTICES$/d' \
	-e '/^Reconfigured: .*pmlogconf./d' \
	-e '/^No reconfigure: .*pmlogconf./d' \
	-e '/Restarting/s/process [0-9][0-9]*/process PID/' \
	-e '/Looking for pmlogger/s/process [0-9][0-9]*/process PID/' \
	-e '/Terminating pmlogger/s/process [0-9][0-9]*/process PID/' \
	-e '/^pmlogger \[/{
s/\[[0-9][0-9]*/[PID/
s/from host .*/from host .../
}' \
	-e '/pmlogger -m/s/ [0-9][0-9][0-9][0-9][01][0-9][0-3][0-9]\.[0-5][0-9]\.[0-5][0-9]/ DATE.TIME/' \
	-e '/^pcp /s/ .*/ .../' \
	-e '/^PMCD host /s/host .*/host .../' \
	-e '/^log started /s/started .*/started .../' \
	-e '/^last log entry /s/entry .*/entry .../' \
	-e '/^current time /s/time .*/time .../' \
	-e '/^log size /s/size .*/size .../' \
    | _filter_cron_scripts
}

_stop_loggers()
{
    echo "entering _stop_loggers ..." >>$here/$seq.full
    $PCP_PS_PROG $PCP_PS_ALL_FLAGS \
    | grep -E '[P]PPID|/[p]mlogger( |$)' \
    | tee -a $here/$seq.full \
    | grep -v '[P]PID' \
    | $PCP_AWK_PROG '{print $2}' >$tmp.pids
    $sudo $signal -a -s TERM pmlogger
    for pid in `cat $tmp.pids`
    do
	_wait_pmlogger_end "$pid"
	echo "pmlogger [$pid] is done" >>$here/$seq.full
    done
}

_cleanup()
{
    cd $here
    echo
    echo "Cleaning up"

    [ -f $NEEDREWRITE.$seq ] && $sudo mv $NEEDREWRITE.$seq $NEEDREWRITE

    _stop_loggers >/dev/null 2>&1

    for x in A B C
    do
	echo "--- $tmp.$x.log ---" >>$here/$seq.full
	cat $tmp.$x.log >>$here/$seq.full
	echo "--- end pmlogger log ---" >>$here/$seq.full
	if [ -f $tmp.$x.strace ]
	then
	    echo "--- $tmp.$x.strace ---" >>$here/$seq.full
	    cat $tmp.$x.strace >>$here/$seq.full
	    echo "--- end strace ---" >>$here/$seq.full
	fi
    done

    _service pmlogger restart 2>&1 \
    | _filter_pcp_start \
    _wait_for_pmlogger
    _restore_auto_restart pmlogger

    if $pmproxy_was_running
    then
	# pmproxy needs a chance to get stable ... otherwise we may see
	# badness in the PMNS for the pmproxy.* metrics if check's callback
	# checks the PMNS .. seen on vm11 (Debian 11.1)
	#
	echo "Restart pmproxy ..." >>$here/$seq.full
	_service pmproxy restart >>$here/$seq.full 2>&1
	_wait_for_pmproxy
    fi

    $sudo rm -rf $tmp $tmp.*
}

_setup()
{
    _service pmlogger stop | _filter_pcp_stop
    _stop_loggers
    cd
}

_check_loggers()
{
    # Need to make sure all the pmloggers have really started ...
    # wait (up to 30 secs) for each
    #
    for x in A B C
    do
	rm -f $tmp.found $tmp.$x.pid
	z=0
	while [ "$z" -lt 30 ]
	do
	    # Cannot use $tmp as the path prefix, because on some systems
	    # /var/tmp (what $tmp is set to) and /tmp are symlinked, and
	    # pmlogger reports the path in the control file using the real
	    # name (/tmp) not the symlink name (/var/tmp)
	    # ... spotted on vm37 (OpenBSD 6.7)
	    # Fortunately /$x/ is sufficient to identify the desired
	    # pmlogger control file.
	    #
	    pid=`grep -l "/$x/" $PCP_TMP_DIR/pmlogger/[0-9]* | sed -e 's/.*pmlogger.//'`
	    if [ -n "$pid" ]
	    then
		echo "Found pid $pid for $tmp/$x after $z iterations" >>$here/$seq.full
		echo "status" | pmlc $pid >$tmp.pmlc.out 2>&1
		cat $tmp.pmlc.out >>$here/$seq.full
		if grep '^log size' $tmp.pmlc.out >/dev/null 2>&1
		then
		    touch $tmp.found
		    echo $pid >$tmp.$x.pid
		    break
		fi
	    fi
	    z=`expr $z + 1`
	    sleep 1
	done
	if [ ! -f $tmp.found ]
	then
	    echo "Arrgh ... pmlogger_* failed to keep pmlogger running for $tmp/$x" | tee -a $here/$seq.full
	    echo "--- $tmp ---" >>$here/$seq.full
	    ls -l $tmp* >>$here/$seq.full
	    echo "--- $tmp/$x ---" >>$here/$seq.full
	    ls -l $tmp/$x >>$here/$seq.full
	    echo "--- $tmp.$x.log ---" >>$here/$seq.full
	    cat $tmp.$x.log >>$here/$seq.full
	    echo "--- end pmlogger log ---" >>$here/$seq.full
	    for log in log.prev log.from.check log.from.check.prev
	    do
		echo "--- $tmp/$log ---" >>$here/$seq.full
		if [ -f $tmp/$log ]
		then
		    cat $tmp/$log >>$here/$seq.full
		else
		    echo "<missing>" >>$here/$seq.full
		fi
	    done
	    echo "--- end of logs ---" >>$here/$seq.full
	    $PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mlogger( |$)' >>$here/$seq.full
	    for state in $PCP_TMP_DIR/pmlogger/[0-9]*
	    do
		echo "=== $state ===" | tee -a $here/$seq.full
		cat $state | tee -a $here/$seq.full
	    done
	    echo "See $seq.full for details"
	    status=0
	    exit
	fi
    done
}

cat >$tmp.config <<End-of-File
log mandatory on 1 sec {
    hinv.ndisk
    hinv.ncpu
}
End-of-File

pmproxy_was_running=false
[ -f $PCP_RUN_DIR/pmproxy.pid ] && pmproxy_was_running=true
echo "pmproxy_was_running=$pmproxy_was_running" >>$here/$seq.full

# real QA test starts here

_setup

mkdir $tmp
$sudo chown $PCP_USER:$PCP_GROUP $tmp

# this one is set via the environment
#
export qatmp=$tmp

# control file
#
cat >$tmp.control <<End-of-File
\$version=1.1
\$PMLOGGER_CHECK_SKIP_JANITOR=yes
\$A=A
#
# $qatmp from the caller's environment
# $A from the control line above
LOCALHOSTNAME	n   n	\$qatmp/\$A --pmlc-ipc-version=2 -c $tmp.config -l $tmp.A.log -Dservices,appl3,appl4
LOCALHOSTNAME	n   n	'\$(echo $qatmp/B)' --pmlc-ipc-version=2 -c $tmp.config -l $tmp.B.log -Dservices,appl3,appl4
LOCALHOSTNAME	n   n	"$qatmp/\`echo c | tr '[a-z]' '[A-Z]'\`" --pmlc-ipc-version=2 -c $tmp.config -l $tmp.C.log -Dservices,appl3,appl4
End-of-File
sudo -u $PCP_USER -g $PCP_GROUP cp $tmp.control $tmp/control

cat $tmp/control >>$here/$seq.full
echo "qatmp=$qatmp" >>$here/$seq.full
echo "PCP_TMP_DIR=$PCP_TMP_DIR" >>$here/$seq.full

echo "pmlogger_check #1" | tee -a $here/$seq.full
$sudo -u $PCP_USER -g $PCP_GROUP sh -c "qatmp=$qatmp $PCP_BINADM_DIR/pmlogger_check -V -c $tmp/control -l $tmp/log"
if [ -s $tmp/log ]
then
    echo "$tmp.log from pmlogger_check #1 ..." >>$here/$seq.full
    cat $tmp/log | tee -a $here/$seq.full | _filter
fi
_check_loggers
for x in A B C
do
    ls -l $tmp/$x >>$here/$seq.full
    echo `ls $tmp/$x | grep "\.meta" | wc -l | sed -e 's/ //g'` archives in TMP/$x
done

# let pmloggers run for a while
sleep 1

_stop_loggers

echo "pmlogger_check #2" | tee -a $here/$seq.full
$sudo -u $PCP_USER -g $PCP_GROUP sh -c "qatmp=$qatmp $PCP_BINADM_DIR/pmlogger_check -V -c $tmp/control -l $tmp/log"
if [ -s $tmp/log ]
then
    echo "$tmp.log from pmlogger_check #2 ..." >>$here/$seq.full
    cat $tmp/log | tee -a $here/$seq.full | _filter
fi
_check_loggers
for x in A B C
do
    echo "--- $tmp/$x ---" >>$here/$seq.full
    ls -l $tmp/$x >>$here/$seq.full
    echo `ls $tmp/$x | grep "\.meta" | wc -l | sed -e 's/ //g'` archives in TMP/$x
done

# let pmloggers run for a while
sleep 1

echo "pmlogger processes ..." >>$here/$seq.full
$PCP_PS_PROG $PCP_PS_ALL_FLAGS | grep -E '[P]PID|/[p]mlogger( |$)' >>$here/$seq.full

# this test has a history of failing in the pmlogger_daily step below
# when one (or more) of the pmlogger processes vanishes, so we're
# taking extreme measures here!
#
if which strace >/dev/null 2>&1
then
    for x in A B C
    do
	$sudo strace -p `cat $tmp.$x.pid` >$tmp.$x.strace 2>&1 &
    done
fi

echo
echo "pmlogger_daily"
$sudo -u $PCP_USER -g $PCP_GROUP sh -c "qatmp=$qatmp $PCP_BINADM_DIR/pmlogger_daily -D -Z -VV -x never -c $tmp/control -l $tmp/log"
if [ -s $tmp/log ]
then
    echo "$tmp.log from pmlogger_daily ..." >>$here/$seq.full
    cat $tmp/log >>$here/$seq.full
    sed -n <$tmp/log \
	-e '/^=== /p' \
	-e '/^$/p' \
	-e '/merge/p' \
    | _filter
fi

# wait a bit to be sure all pmloggers have finished their re-exec
# song and dance
sleep 1

_check_loggers
for x in A B C
do
    echo "--- $tmp/$x ---" >>$here/$seq.full
    ls -l $tmp/$x >>$here/$seq.full
    echo `ls $tmp/$x | grep "\.meta" | wc -l | sed -e 's/ //g'` archives in TMP/$x
done

rm -f $tmp.?.strace

# success, all done
status=0
exit
