#!/usr/bin/perl -w

=head1 NAME

octo_uparser - Octopussy UParser program

=head1 SYNOPSIS

octo_uparser <device>

=head1 DESCRIPTION

octo_uparser is the program used by the Octopussy Project 
to parse Unknown Logs for each Device

=cut

use strict;
use warnings;
use Readonly;
use bytes;    # Fix the 'Malformed UTF-8 character' warning
use utf8;

use File::Basename;
use File::Copy;
use File::Path;
use Regexp::Assemble;

use AAT::Syslog;
use AAT::Utils qw( NOT_NULL NULL );

use Octopussy;
use Octopussy::Cache;
use Octopussy::Device;
use Octopussy::FS;
use Octopussy::Logs;
use Octopussy::Message;
use Octopussy::Service;
use Octopussy::Storage;

Readonly my $PROG_NAME => 'octo_uparser';

exit if (!Octopussy::Valid_User($PROG_NAME));

my $device = $ARGV[0];

my $cache = Octopussy::Cache::Init('octo_parser');
$cache->set("uparser_$device", 'running');

my (@files, @services);
my %service;
my %dir_data = ();
my (%known_msgs, @unknown_msgs) = ((), (), ());
my ($file, $last_file) = (undef, undef);

Octopussy::Waiting_For_Process_Already_Running($PROG_NAME, $device);

my $exit_request = 0;

=head1 FUNCTIONS

=head2 Exit()

Softly stops the uparser 
(current file is fully parsed before exiting)

=cut

sub Exit
{
    $exit_request = 1;

    return ($exit_request);
}


=head2 Is_Valid_Regexp($message)

Checks if the regexp from message '$message' is valid

=cut

sub Is_Valid_Regexp
{
	my $message = shift;

	my $valid_regexp = eval {
    	use warnings FATAL => qw( regexp );
      	my $re = Octopussy::Message::Pattern_To_Regexp($message);
      	qr/$re/;
     	};

	return ($valid_regexp);
}


=head2 Is_Regexp_Assemble_Working($ra)

Checks if Regexp::Assemble will generate a regexp without failure

=cut

sub Is_Regexp_Assemble_Working
{
	my $ra = shift;

	my $regexp_assemble_working = eval
   	{
		use warnings FATAL => qw( all );
    	my $global = $ra->re;
  	};

	return ($regexp_assemble_working);	
}


=head2 Init_Service_Message($m, $ra)

Inits Message $m if its a valid one

=cut

sub Init_Service_Message
{
	my ($m, $ra) = @_;

	if (Is_Valid_Regexp($m))
  	{
    	$ra->add(Octopussy::Message::Pattern_To_Regexp_Without_Catching($m));
   	}
   	else
  	{
    	AAT::Syslog::Message($PROG_NAME,
       		'SERVICE_MSG_INVALID_REGEXP', $m->{msg_id}, $@);
     	print
"Msg $m->{msg_id} dropped because Runtime regexp compilation produced:\n$@\n";
  	}	
}

=head2 Init()

Inits UParser

=cut

sub Init
{
    @services = Octopussy::Device::Services($device);
    @files    = ();
    if (scalar @services > 0)
    {
        push @files, Octopussy::Logs::Unknown_Files($device);
        if (scalar @files > 0)
        {
            my $storage = Octopussy::Storage::Default();
            $dir_data{'Unknown'} =
                Octopussy::Storage::Directory_Unknown($device);
            foreach my $serv (@services)
            {
                my $ra = Regexp::Assemble->new;
                $dir_data{$serv} =
                    Octopussy::Storage::Directory_Service($device, $serv);
                my @messages     = Octopussy::Service::Messages($serv);
                AAT::Syslog::Message(
                    $PROG_NAME, 'PARSER_INIT_SERVICE',
                    $serv,      scalar @messages
                );

                foreach my $m (@messages)
                {
					Init_Service_Message($m, $ra);
                }

				if (Is_Regexp_Assemble_Working($ra))
				{
					$service{$serv}{global_regexp} = $ra->re;
				}
				else
				{
					$service{$serv}{global_regexp} = undef;
					foreach my $m (@messages)
            		{
                		Init_Service_Message($m, undef);
            		}
				}
            }
            ($file, $last_file) = (undef, undef);
            (%known_msgs, @unknown_msgs) = ((), ());
        }
    }
    
	return (scalar @services);
}

=head2 Write_Logfile($logfile, $logs)

Writes Logs '$logs' into Logfile '$logfile'

=cut

sub Write_Logfile
{
    my ($logfile, $logs) = @_;

    if (scalar @{$logs} > 0)
    {
        $logfile .= '.gz' if ($logfile !~ /^.+\.gz$/);
        Octopussy::FS::Create_Directory(dirname($logfile));
        if (defined open my $FILE, '|-', "gzip >> $logfile")
        {
            foreach my $log (@{$logs}) { print {$FILE} "$log\n"; }
            close $FILE;
        }
        else
        {
            print "Unable to open file '$logfile'\n";
            AAT::Syslog::Message('octo_uparser', 'UNABLE_OPEN_FILE', $logfile);
        }
    }

    return (scalar @{$logs});
}

=head2 Service_Handler($serv, $total, $y, $m, $d, $hour, $min, $file_new)

Handles Service $serv

=cut

sub Service_Handler
{
    my ($serv, $total, $y, $m, $d, $hour, $min, $file_new) = @_;

    my $dir_service = $serv;
    $dir_service =~ s/ /_/g;
    my $dir = "$dir_data{$serv}/$device/$dir_service/$y/$m/$d/";
    AAT::Syslog::Message(
        $PROG_NAME, 'PARSER_DEVICE_SERVICE_EVENTS',
        $device, $serv,
        "$d/$m/$y $hour:$min",
        scalar @{$known_msgs{$serv}}, $total
    );
    Write_Logfile("$dir$file_new", \@{$known_msgs{$serv}});

    return (@{$known_msgs{$serv}});
}

=head2 File_Handler($file)

Handles file $file

=cut

sub File_Handler
{
    my $file = shift;

    if (   (-f $file)
        && ($file =~ /\/(\d+)\/(\d+)\/(\d+)\/(msg_(\d\d)h(\d\d)\.log.*)$/))
    {
        my ($y, $m, $d, $file_new, $hour, $min) = ($1, $2, $3, $4, $5, $6);
        (%known_msgs, @unknown_msgs) = ((), ());
        my $total = 0;
        my $cat = ($file =~ /.+\.gz$/ ? 'zcat' : 'cat');
        if (defined open my $FILE, '-|', "$cat \"$file\"")
        {
            while (my $line = <$FILE>)
            {
                chomp $line;
                my $match = 0;
                foreach my $serv (@services)
                {
                    if ((defined $service{$serv}{global_regexp}) 
						&& ($line =~ $service{$serv}{global_regexp}))
                    {
                        push @{$known_msgs{$serv}}, $line;
                        $match = 1;
                        last;
                    }
					elsif (!defined $service{$serv}{global_regexp})
					{
						foreach my $msg (@{$service{$serv}{msgs}})
                       	{
							if ($line =~ $msg->{re})
							{
								push @{$known_msgs{$serv}}, $line;
								$match = 1;
								last;
							}
                       	}
						last if ($match);
					}
                }
                push @unknown_msgs, $line if (!$match);
                $total++;
            }
            close $FILE;
            unlink $file;
        }
        else
        {
            print "Unable to open file '$file'\n";
            AAT::Syslog::Message($PROG_NAME, 'UNABLE_OPEN_FILE', $file);
        }
        foreach my $serv (keys %known_msgs)
        {
            Service_Handler($serv, $total, $y, $m, $d, $hour, $min, $file_new);
        }
        Write_Logfile($file, \@unknown_msgs);

        return ($file);
    }

    return (undef);
}

#
# MAIN
#

$SIG{USR1} = \&Exit;

AAT::Syslog::Message($PROG_NAME, 'PARSER_START', $device);
Init();
foreach my $file (@files)
{
    chomp $file;
    File_Handler($file);
    Init() if ($cache->get("uparser_$device") eq 'reload');
    last if ($exit_request);
}
AAT::Syslog::Message($PROG_NAME, 'PARSER_STOP', $device);
$cache->remove("uparser_$device");

=head1 AUTHOR

Sebastien Thebert <octo.devel@gmail.com>

=head1 SEE ALSO

octo_dispatcher, octo_extractor, octo_parser, octo_reporter, octo_scheduler

=cut
