#!/usr/bin/perl

=head1 NAME

octo_parser - Octopussy Parser program

=head1 SYNOPSIS

octo_parser <device> 

=head1 DESCRIPTION

octo_parser is the program used by the Octopussy Project 
to parse Logs for each Device

=cut

use strict;
use warnings;
use Readonly;
use bytes;    # Fix the 'Malformed UTF-8 character' warning
use utf8;

use File::Basename;
use File::Copy;
use File::Path;
use Linux::Inotify2;
use POSIX qw( mktime strftime );
use Regexp::Assemble;

use AAT::Syslog;
use AAT::Utils qw( NOT_NULL NULL );

use Octopussy;
use Octopussy::Alert;
use Octopussy::Cache;
use Octopussy::Device;
use Octopussy::FS;
use Octopussy::Message;
use Octopussy::RRDTool;
use Octopussy::Service;
use Octopussy::Storage;
use Octopussy::Taxonomy;
use Octopussy::TimePeriod;

Readonly my $PROG_NAME    => 'octo_parser';
Readonly my $NICE_UPARSER => 'nice -n 20';

my $dir_program = Octopussy::FS::Directory('programs');
my $dir_pid     = Octopussy::FS::Directory('running');
my $device      = $ARGV[0];
my ($reload_request, $exit_request) = (0, 0);
my $cache = Octopussy::Cache::Init($PROG_NAME);

my $cache_sender = Octopussy::Cache::Init('octo_sender');

my @services;
my (%alert,      %service);
my (%taxo_stats, %msgid_stats);
my $minutes_without_logs;
my %dir_data = ();
my %log_compression = ();
my (@files, @taxo_list, %known_msgs, @unknown_msgs) = ((), (), (), ());
my ($file, $file_last) = (undef, undef);

my %parser_stats = ();
my ($year, $month, $day, $hour, $min) = (0, 0, 0, 0, 0);
my ($nb_lines_minute, $seconds_to_parse, $count_sender) = (0, 0, 1);

Octopussy::Waiting_For_Process_Already_Running($PROG_NAME, $device);

my $inotify = new Linux::Inotify2
    or Octopussy::Die($PROG_NAME,
    "[CRITICAL] Unable to create new inotify object for Device $device: $!");
$inotify->blocking(0);

=head1 FUNCTIONS

=head2 Exit()

Softly stops the parser 
(current file is fully parsed before exiting)

=cut

sub Exit
{
    $exit_request = 1;

    return ($exit_request);
}

=head2 Reload()

Softly reloads the configuration of the parser 
(current file is fully parsed before reload)

=cut

sub Reload
{
    $reload_request = 1;

    return ($reload_request);
}

=head2 Inotify_Watch($path)

Watches '$path' with inotify

=cut

sub Inotify_Watch
{
    my $filename = shift;

    if (-d $filename)
    {
        if ($filename =~ /\/Incoming\/+\d{4}\/+\d{2}\/+\d{2}/)
        {
            $inotify->watch($filename, IN_CLOSE_WRITE);
        }
        else { $inotify->watch($filename, IN_CREATE); }
        opendir DIR, $filename;
        my @content = grep { !/^\./ } readdir DIR;
        closedir DIR;
        foreach my $c (sort @content) { Inotify_Watch("$filename/$c"); }
    }
    elsif (-f $filename)
    {
        $filename =~ s/\/+/\//g;
        push @files, $filename;
    }

    return (1);
}


=head2 Is_Valid_Regexp($message)

Checks if the regexp from message '$message' is valid

=cut

sub Is_Valid_Regexp
{
    my $message = shift;

    my $valid_regexp = eval {
        use warnings FATAL => qw( regexp );
        my $re = Octopussy::Message::Pattern_To_Regexp($message);
        qr/$re/;
        };

    return ($valid_regexp);
}


=head2 Is_Regexp_Assemble_Working($ra)

Checks if Regexp::Assemble will generate a regexp without failure

=cut

sub Is_Regexp_Assemble_Working
{
    my $ra = shift;

    my $regexp_assemble_working = eval
    {
        use warnings FATAL => qw( all );
        my $global = $ra->re;
    };

    return ($regexp_assemble_working);
}


=head2 Init_Message_Alerts($service, $message, $device_alerts)

Inits Alerts for Message $message in Service $service

=cut

sub Init_Message_Alerts
{
	my ($service, $message, $device_alerts) = @_;

	my @alerts = Octopussy::Message::Alerts($device, $service, $message, 
		$device_alerts);
   	my @msg_alerts = ();
   	foreach my $a (@alerts)
  	{
		$alert{$a->{name}} = $a;
       	foreach my $act (@{$a->{action}})
       	{
        	$alert{$a->{name}}->{action_mail} = 1 if ($act =~ /Mail/i);
          	$alert{$a->{name}}->{action_jabber} = 1 if ($act =~ /Jabber/i);
          	$alert{$a->{name}}->{action_nsca} = 1 if ($act =~ /NSCA/i);
         	$alert{$a->{name}}->{action_zabbix} = 1 if ($act =~ /Zabbix/i);
		}
		push @msg_alerts, $a->{name};
  	}

	return (@msg_alerts);	
}


=head2 Init_Service_Message($s_conf, $m, $dev_alerts, $ra)

Inits Message $m from Service $s_conf->{sid}

=cut

sub Init_Service_Message
{
	my ($s_conf, $m, $dev_alerts, $ra) = @_;
	my $serv = $s_conf->{sid};
	my @msg_alerts = Init_Message_Alerts($serv, $m, $dev_alerts);

   	if (Is_Valid_Regexp($m))
   	{
    	if (($s_conf->{statistics}) || (scalar(@msg_alerts) > 0))
       	{ # statistics are enabled or this msg has alert(s) -> add to msg list
        	my $regexp = Octopussy::Message::Pattern_To_Regexp($m);
          	push @{$service{$serv}{msgs}},
            	{
                	re => qr/^$regexp\s*[^\t\n\r\f -~]?$/i,
                   	id => $m->{msg_id},
                   	pattern =>
                    	((scalar(@msg_alerts) > 0) ? $m->{pattern} : undef),
                  	taxo   => $m->{taxonomy},
                  	alerts => \@msg_alerts
              	};
       	}
		$ra->add(Octopussy::Message::Pattern_To_Regexp_Without_Catching($m))
			if (defined $ra);
 	}
  	else
   	{
    	AAT::Syslog::Message($PROG_NAME, 'SERVICE_MSG_INVALID_REGEXP',
       		$m->{msg_id}, $@);
     	print
"Msg $m->{msg_id} dropped because Runtime regexp compilation produced:\n$@\n";
 	}
}


=head2 Init()

Inits Parser & launch UParser if needed

=cut

sub Init
{
    my $dconf = Octopussy::Device::Configuration($device)
        or Octopussy::Die($PROG_NAME,
        "[CRITICAL] Unable to get Device configuration for '$device'");
    $minutes_without_logs = $dconf->{minutes_without_logs};
    @services  = Octopussy::Device::Services_Configurations($device, 'rank');
    @taxo_list = Octopussy::Taxonomy::List()
        or Octopussy::Die($PROG_NAME,
        '[CRITICAL] Unable to get Taxonomy configuration]');
    $dir_data{'Unknown'} = Octopussy::Storage::Directory_Unknown($device);

    foreach my $s_conf (@services)
    {
        my $serv = $s_conf->{sid};
        my $ra   = Regexp::Assemble->new;

		$service{$serv}{statistics_enabled} = $s_conf->{statistics};
		$log_compression{$serv} = $s_conf->{compression} || 1;
        $dir_data{$serv} =
            Octopussy::Storage::Directory_Service($device, $serv);
        Octopussy::RRDTool::Syslog_By_Device_Service_Taxonomy_Init($device,
            $serv);
        my @messages     = Octopussy::Service::Messages($serv);
        AAT::Syslog::Message($PROG_NAME, 'PARSER_INIT_SERVICE', $serv,
            scalar @messages);
        my @dev_alerts = Octopussy::Alert::For_Device($device);

        foreach my $m (@messages)
        {
			Init_Service_Message($s_conf, $m, \@dev_alerts, $ra);
        }

		if (Is_Regexp_Assemble_Working($ra))
      	{
        	my $gre = $ra->re;
        	$service{$serv}{global_regexp} = qr/^$gre/i;
		}
		else
		{	# R::A failed -> Enable statistics to reload all Messages
			$service{$serv}{global_regexp} = undef;
			$s_conf->{statistics} = 1;
			foreach my $m (@messages)
        	{
            	Init_Service_Message($s_conf, $m, \@dev_alerts, undef);
        	}				
		}

    }
    if (NOT_NULL($cache->get("uparser_$device")))
    {
        $cache->set("uparser_$device", 'reload');
    }
    else { system "$NICE_UPARSER ${dir_program}octo_uparser $device &"; }
    ($file, $file_last) = (undef, undef);
    (@files, %known_msgs, @unknown_msgs) = ((), (), ());

    my $dir = Octopussy::Storage::Directory_Incoming($device);
    Octopussy::FS::Create_Directory("$dir/$device/Incoming/")
        if (!-d "$dir/$device/Incoming/");
    Inotify_Watch("$dir/$device/Incoming/");

    return (scalar @services);
}

=head2 Write_Logfile($file, $logs, $compression)

Writes Known Logs '$logs' into Logfile '$file'

=cut

sub Write_Logfile
{
    my ($logfile, $logs, $compression) = @_;

    if (scalar(@{$logs}) > 0)
    {
        $logfile =~ s/(msg_\d\dh\d\d)_\d+/$1/;
        $logfile .= '.gz' if (($logfile !~ /^.+\.gz$/) && $compression);
        Octopussy::FS::Create_Directory(dirname($logfile));
        if ($compression && 
			(defined open my $FILEZIP, '|-', "gzip >> $logfile"))
        {
            foreach my $log (@{$logs}) { print {$FILEZIP} "$log\n"; }
            close $FILEZIP;
        }
		elsif (!$compression && (defined open my $FILE, '>>', $logfile))
		{
			foreach my $log (@{$logs}) { print {$FILE} "$log\n"; }
            close $FILE;
		}
        else
        {
            print "Unable to open file '$logfile'\n";
            AAT::Syslog::Message($PROG_NAME, 'UNABLE_OPEN_FILE', $logfile);
        }
    }

    return (scalar @{$logs});
}

=head2 Write_Unknown_Logfile($file, $logs)

Writes Unknown Logs '$logs' into Logfile '$file'

=cut

sub Write_Unknown_Logfile
{
    my ($logfile, $logs) = @_;

	if (scalar(@{$logs}) > 0)
    {	
    	$logfile =~ s/^.+\/Incoming\//$dir_data{Unknown}\/$device\/Unknown\//;

    	Write_Logfile($logfile, $logs, 1);
		AAT::Syslog::Message($PROG_NAME, 'NEW_UNKNOWN_LOGS_FROM_DEVICE', 
			scalar @{$logs}, $device);
	}	
    
	return (scalar @{$logs});
}

=head2 Incoming_To_Unknown($incoming)

Moves Incoming file to Unknown file

=cut

sub Incoming_To_Unknown
{
    my $incoming = shift;
    my $unknown  = $incoming;
    $unknown =~ s/^.+\/Incoming\//$dir_data{Unknown}\/$device\/Unknown\//;
    $unknown =~ s/(msg_\d\dh\d\d)_\d+/$1/;
    $unknown .= '.gz' if ($unknown !~ /^.+\.gz$/);
    Octopussy::FS::Create_Directory(dirname($unknown));
    my $count = 0;

    if (defined open my $FILE, '|-', "gzip >> $unknown")
    {
        my $cat = ($incoming =~ /.+\.gz$/ ? 'zcat' : 'cat');
        if (defined open my $INCOMING, '-|', "$cat \"$incoming\"")
        {
            while (<$INCOMING>)
            {
                print {$FILE} $_;
                $count++;
            }
            close $INCOMING;
            unlink $incoming;
        }
        else
        {
            print "Unable to open file '$incoming'\n";
            AAT::Syslog::Message($PROG_NAME, 'UNABLE_OPEN_FILE', $incoming);
        }
        close $FILE;
    }
    else
    {
        print "Unable to open file '$unknown'\n";
        AAT::Syslog::Message($PROG_NAME, 'UNABLE_OPEN_FILE', $unknown);
    }

	AAT::Syslog::Message($PROG_NAME, 'NEW_UNKNOWN_LOGS_FROM_DEVICE',
    	$count, $device)	if ($count);

    return ($count);
}

=head2 Alert_Handler($msg, $line)

Handles Alert(s) for the Message $msg on line $line

=cut

sub Alert_Handler
{
    my ($msg, $line) = @_;

    if ($line =~ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:(\d{2})(?:\.\d{1,6})?.\d{2}:\d{2} \S+/)
    {
        my $sec     = $1;
        my $date_tp = strftime("%A %H:%M", $sec, $min, $hour, $day, $month - 1,
            $year - 1900);
        my $secs =
            strftime("%s", $sec, $min, $hour, $day, $month - 1, $year - 1900);

        foreach my $msg_alert (@{$msg->{alerts}})
        {
            my $a = $alert{$msg_alert};
            if (   (Octopussy::TimePeriod::Match($a->{timeperiod}, $date_tp))
                && ((NULL($a->{regexp_incl}) || ($line =~ /$a->{regexp_incl}/)))
                && ((NULL($a->{regexp_excl}) || ($line !~ /$a->{regexp_excl}/)))
               )
            {	# log matches timeperiod, include & exclude regexps
                my $alert_logs = $cache->get($a->{name});
                if (defined $a->{thresold_time})
                {
                    push @{$alert_logs}, {timestamp => $secs, log => $line};
                }
                if (   (!defined $a->{thresold_time})
                    || (scalar(@{$alert_logs}) >= $a->{thresold_time}))
                {
                    my $match = 1;
                    if (defined $a->{thresold_time})
                    {
                        while (($secs - ${$alert_logs}[0]->{timestamp}) >
                            $a->{thresold_duration})
                        {
                            shift @{$alert_logs};
                            $match = 0;
                        }
                    }
					if ((NOT_NULL($a->{last_emit})) 
						&& (NOT_NULL($a->{minimum_emit_delay})) 
						&& ($secs - $a->{last_emit} < $a->{minimum_emit_delay}))
					{
						shift @{$alert_logs};
						$match = 0;
					}
                    if ($match)
                    {
						$a->{last_emit} = $secs;
                        my $data = '';
                        if (defined $a->{thresold_time})
                        {
                            foreach my $d (@{$alert_logs})
                            {
                                $data .= "$d->{log}\n";
                            }
                        }
                        else { $data = $line; }
                        $alert_logs = ();
                        Octopussy::Alert::Insert_In_DB($device, $a, $data,
                            "$year$month$day$hour$min$sec");

                        my $sender_id =
                            "msg_${device}_" . sprintf("%06d", $count_sender);
                        $cache_sender->set(
                            $sender_id,
                            {
                                action => $a,
                                device => $device,
                                data   => $data,
                                msg    => $msg
                            }
                        );
                        $count_sender++;
                    }
                }
                $cache->set($a->{name}, $alert_logs);
            } 
        }
    }

    return (undef);
}

=head2 Line_Handler($line, $serv, $msg, $known_msgs, $match)

Handles line $line for Service $serv & Message $msg

=cut

sub Line_Handler
{
    my ($line, $serv, $msg, $known_msgs, $match) = @_;

    if ($line =~ $msg->{re})
    {
        Alert_Handler($msg, $line) if (scalar(@{$msg->{alerts}}) > 0);
        push @{$known_msgs->{$serv}}, $line;

		if ($service{$serv}{statistics_enabled})
		{
        	$taxo_stats{$serv}{$msg->{taxo}} = (
            	defined $taxo_stats{$serv}{$msg->{taxo}}
            	? $taxo_stats{$serv}{$msg->{taxo}} + 1
            	: 1
        	);

        	$msgid_stats{$serv}{$msg->{id}} = (
            	defined $msgid_stats{$serv}{$msg->{id}}
            	? $msgid_stats{$serv}{$msg->{id}} + 1
            	: 1
        	);
        	$msgid_stats{$serv}{"$serv:_TOTAL_"} = (
            	defined $msgid_stats{$serv}{"$serv:_TOTAL_"}
            	? $msgid_stats{$serv}{"$serv:_TOTAL_"} + 1
            	: 1
        	);
		}	
        $$match = 1;
    }
    elsif ($line =~ /last message repeated \d+ time/)
    {
        push @{$known_msgs->{$serv}}, $line;
        $$match = 1;
    }

    return ($$match);
}

=head2 Service_Handler($serv, $y, $m, $d, $hour, $min, $file_new)

Handles Service $serv

=cut

sub Service_Handler
{
    my ($serv, $y, $m, $d, $hour, $min, $file_new) = @_;

    my $dir_service = $serv;
    $dir_service =~ s/ /_/g;
    my $dir = "$dir_data{$serv}/$device/$dir_service/$y/$m/$d/";
    Write_Logfile("$dir$file_new", \@{$known_msgs{$serv}}, 
		$log_compression{$serv});
    $parser_stats{$serv} += scalar @{$known_msgs{$serv}};

    return (scalar @{$known_msgs{$serv}});
}

=head2 Cache_MsgID_Stats()

Puts MsgID Statistics in cache

=cut

sub Cache_MsgID_Stats
{
    my @parser_msgid_stats = ();
    foreach my $k1 (keys %msgid_stats)
    {
        foreach my $k2 (keys %{$msgid_stats{$k1}})
        {
            if ($k2 =~ /^.+?:(.+)$/)
            {
                my $id    = $1;
                my $count = $msgid_stats{$k1}{$k2};
                push @parser_msgid_stats,
                    {service => $k1, id => $id, count => $count}
					if ($count > 0);
                    #if (($count > 0) && ($id ne '_TOTAL_'));
                $msgid_stats{$k1}{$k2} = 0;
            }
        }
    }
    $cache->set("parser_msgid_stats_$year$month$day$hour${min}_$device",
        \@parser_msgid_stats);

    return (scalar @parser_msgid_stats);
}

=head2 Cache_Taxonomy_Stats($service)

Puts Taxonomy Statistics in cache

=cut

sub Cache_Taxonomy_Stats
{
    my $service = shift;

    my @taxo_rrd = ();
    foreach my $t (@taxo_list)
    {
        push @taxo_rrd, $taxo_stats{$service}{$t->{value}} || 0;
        $taxo_stats{$service}{$t->{value}} = 0;
    }
    my $seconds_to_parse =
        mktime(0, $min, $hour, $day, $month - 1, $year - 1900);
    $cache->set("parser_taxo_stats_${device},${service}",
        {datetime => $seconds_to_parse, stats => \@taxo_rrd});

    return (scalar @taxo_rrd);
}

=head2 Stats_Handler()

Handles Statistics for one minute

=cut

sub Stats_Handler
{
    foreach my $s_conf (@services)
    {
        my $s = $s_conf->{sid};
        if ((NOT_NULL($parser_stats{$s})) && ($parser_stats{$s} != 0))
        {
            AAT::Syslog::Message(
                $PROG_NAME,
                'PARSER_DEVICE_SERVICE_EVENTS',
                $device,
                $s,
                "$day/$month/$year $hour:$min",
                $parser_stats{$s},
                $nb_lines_minute
            );
            Cache_Taxonomy_Stats($s);
            $parser_stats{$s} = 0;
        }
    }
    Cache_MsgID_Stats();
    AAT::Syslog::Message(
        $PROG_NAME, 'PARSER_DEVICE_PARSING_DURATION',
        $device, "$day/$month/$year $hour:$min",
        $seconds_to_parse
    );
    ($nb_lines_minute, $seconds_to_parse) = (0, 0);

    return (0);
}

=head2 File_Handler($file)

Handles file $file

=cut

sub File_Handler
{
    my $file = shift;

    my $time = time;
    if (   (-f $file)
        && ($file =~ /\/(\d+)\/(\d+)\/(\d+)\/(msg_(\d\d)h(\d\d)_\d+\.log)/))
    {
        my ($n_year, $n_month, $n_day, $file_new, $n_hour, $n_min) =
            ($1, $2, $3, $4, $5, $6);

        if ($year == 0)
        {
            ($year, $month, $day, $hour, $min) =
                ($n_year, $n_month, $n_day, $n_hour, $n_min);
        }
        if (   ($n_min != $min)
            || ($n_hour != $hour)
            || ($n_day != $day)
            || ($n_month != $month))
        {    # not the same minute than previous file -> write stats to syslog
            Stats_Handler();
            ($year, $month, $day, $hour, $min) =
                ($n_year, $n_month, $n_day, $n_hour, $n_min);
        }

        (%known_msgs, @unknown_msgs) = ((), ());
        my $cat = ($file =~ /.+\.gz$/ ? 'zcat' : 'cat');
        if (   (-f "$file")
            && (defined open my $FILE, '-|', "$cat \"$file\""))
        {
            while (my $line = <$FILE>)
            {
                chomp $line;
                my $match = 0;
                foreach my $s_conf (@services)
                {
                    my $serv = $s_conf->{sid};
                    if ((! defined $service{$serv}{global_regexp})
						|| ($line =~ $service{$serv}{global_regexp}))
                    {
                        if (   (defined $service{$serv}{msgs})
                            && (scalar @{$service{$serv}{msgs}}))
                        { # MsgID Statistics enabled and/or Alerts for this Service
                            foreach my $msg (@{$service{$serv}{msgs}})
                            {
                                Line_Handler($line, $serv, $msg, \%known_msgs,
                                    \$match);
                                last if ($match);
                            }
                            if (!$match)
                            {
                                $msgid_stats{$serv}{"$serv:_TOTAL_"} = (
                                    defined $msgid_stats{$serv}{"$serv:_TOTAL_"}
                                    ? $msgid_stats{$serv}{"$serv:_TOTAL_"} + 1
                                    : 1
                                );
                                push @{$known_msgs{$serv}}, $line;
                            }
                        }
                        else
                        { # MsgID Statistics disabled and no Alert for this Service
                            $msgid_stats{$serv}{"$serv:_TOTAL_"} = (
                                defined $msgid_stats{$serv}{"$serv:_TOTAL_"}
                                ? $msgid_stats{$serv}{"$serv:_TOTAL_"} + 1
                                : 1
                            );
                            push @{$known_msgs{$serv}}, $line;
                        }
                        $match = 1	if (defined $service{$serv}{global_regexp});
                    }
                    last if ($match);
                }
                push @unknown_msgs, $line if (!$match);
                $nb_lines_minute++;
            }
            close $FILE;
            unlink $file;
        }
        else
        {
            print "Unable to open file '$file'\n";
            AAT::Syslog::Message($PROG_NAME, 'UNABLE_OPEN_FILE', $file);
        }
        foreach my $serv (keys %known_msgs)
        {
            Service_Handler($serv, $n_year, $n_month, $n_day, $n_hour, $n_min,
                $file_new);
        }
        Write_Unknown_Logfile($file, \@unknown_msgs);
        $seconds_to_parse += (time() - $time);
    }

    return (scalar @unknown_msgs);
}

#
# MAIN
#
exit if (!Octopussy::Valid_User($PROG_NAME));

$SIG{HUP}  = \&Reload;
$SIG{USR1} = \&Exit;

AAT::Syslog::Message($PROG_NAME, 'PARSER_START', $device);
Init();
AAT::Syslog::Message($PROG_NAME, 'PARSER_FILES_TO_PARSE', $device,
    scalar @files);
my $last_logs_time = time;
while (!$exit_request)
{
    my @events = $inotify->read;
    foreach my $e (@events) { Inotify_Watch($e->fullname); }
    if (scalar @files > 0) { $last_logs_time = time; }
    else
    {
        my $diff = time - $last_logs_time;
        if (   (NOT_NULL($minutes_without_logs))
            && ($diff > ($minutes_without_logs * 60)))
        {
            AAT::Syslog::Message($PROG_NAME, 'DEVICE_HASNT_SENT_ANY_LOGS',
                $device, int($diff / 60));

           # alert 'about no logs sent' only every $minutes_without_logs minutes
            $last_logs_time = time;
        }
    }
    while ($file = shift @files)
    {
        chomp $file;
        if   (scalar @services == 0) { Incoming_To_Unknown($file); }
        else                         { File_Handler($file); }
        last if ($reload_request || $exit_request);
    }
    if ($reload_request)
    {
        $reload_request = 0;
        Init();
    }

    if (!$exit_request) { sleep 2; }
}
AAT::Syslog::Message($PROG_NAME, 'PARSER_STOP', $device);

=head1 AUTHOR

Sebastien Thebert <octo.devel@gmail.com>

=head1 SEE ALSO

octo_dispatcher, octo_extractor, octo_uparser, octo_reporter, octo_scheduler

=cut
