#!/usr/bin/perl
#
# Copyright (c) 2010-2012, The Trusted Domain Project.  All rights reserved.
#
#
# Script to generate rate and spam ratio predictions for each domain
# based on accumulated data.  EXPERIMENTAL.

###
### Setup
###

use strict;
use warnings;

use DBI;
use File::Basename;
use Getopt::Long;
use IO::Handle;
use POSIX;

require DBD::mysql;

# general
my $progname      = basename($0);
my $version       = "2.9.1";
my $verbose       = 0;
my $helponly      = 0;
my $showversion   = 0;

my @domains;

my $out;
my $idx;
my $id;
my $tmpsql;
my $tmpin;

my $dbi_s;
my $dbi_h;
my $dbi_a;

my $thresh;

# DB parameters
my $def_dbhost    = "localhost";
my $def_dbname    = "opendkim";
my $def_dbuser    = "opendkim";
my $def_dbpasswd  = "opendkim";
my $def_dbport    = "3306";
my $dbhost;
my $dbname;
my $dbuser;
my $dbpasswd;
my $dbport;

my $dbscheme     = "mysql";

# output location (default)
my $reportout;
my $tmpout;

# "spammy" ratio default
my $spamratio    = 0.75;

# minimum message count for consideration
my $minmsgs      = 10;
my $minspamcount = 2;

# prediction interval calculation
my $def_pi       = 75;
my $pisize;
my $stdscore;
my %stdscores    = (	50,	0.67,
			68,	1.00,
			75,	1.15,
			90,	1.64,
			95,	1.96,
			99,	2.58 );

# Minimum messages.id value; used to mark the start of useful data
my $minmsgid     = 0;

###
### NO user-serviceable parts beyond this point
###

sub usage
{
	print STDERR "$progname: usage: $progname [options]\n";
	print STDERR "\t--dbhost=host      database host [$def_dbhost]\n";
	print STDERR "\t--dbname=name      database name [$def_dbname]\n";
	print STDERR "\t--dbpasswd=passwd  database password [$def_dbpasswd]\n";
	print STDERR "\t--dbport=port      database port [$def_dbport]\n";
	print STDERR "\t--dbuser=user      database user [$def_dbuser]\n";
	print STDERR "\t--domains=list     list of domains to include in report\n";
	print STDERR "\t--help             print help and exit\n";
	print STDERR "\t--output=file      output file\n";
	print STDERR "\t--prediction=pct   prediction interval [$def_pi]\n";
	print STDERR "\t--verbose          verbose output\n";
	print STDERR "\t--version          print version and exit\n";
}

# parse command line arguments
my $opt_retval = &Getopt::Long::GetOptions ('dbhost=s' => \$dbhost,
                                            'dbname=s' => \$dbname,
                                            'dbpasswd=s' => \$dbpasswd,
                                            'dbport=s' => \$dbport,
                                            'dbuser=s' => \$dbuser,
                                            'domain=s@' => \@domains,
                                            'help!' => \$helponly,
                                            'output=s' => \$reportout,
                                            'prediction=i' => \$pisize,
                                            'verbose!' => \$verbose,
                                            'version!' => \$showversion,
                                           );

if (!$opt_retval || $helponly)
{
	usage();

	if ($helponly)
	{
		exit(0);
	}
	else
	{
		exit(1);
	}
}

if ($showversion)
{
	print STDOUT "$progname v$version\n";
	exit(0);
}

# apply defaults
if (!defined($dbhost))
{
	if (defined($ENV{'OPENDKIM_DBHOST'}))
	{
		$dbhost = $ENV{'OPENDKIM_DBHOST'};
	}
	else
	{
		$dbhost = $def_dbhost;
	}
}

if (!defined($dbname))
{
	if (defined($ENV{'OPENDKIM_DB'}))
	{
		$dbname = $ENV{'OPENDKIM_DB'};
	}
	else
	{
		$dbname = $def_dbname;
	}
}

if (!defined($dbpasswd))
{
	if (defined($ENV{'OPENDKIM_PASSWORD'}))
	{
		$dbpasswd = $ENV{'OPENDKIM_PASSWORD'};
	}
	else
	{
		$dbpasswd = $def_dbpasswd;
	}
}

if (!defined($dbport))
{
	if (defined($ENV{'OPENDKIM_PORT'}))
	{
		$dbport = $ENV{'OPENDKIM_PORT'};
	}
	else
	{
		$dbport = $def_dbport;
	}
}

if (!defined($dbuser))
{
	if (defined($ENV{'OPENDKIM_USER'}))
	{
		$dbuser = $ENV{'OPENDKIM_USER'};
	}
	else
	{
		$dbuser = $def_dbuser;
	}
}

if (!defined($reportout))
{
	if (defined($ENV{'OPENDKIM_OUTPUT'}))
	{
		$reportout = $ENV{'OPENDKIM_OUTPUT'};
	}
}

if (defined($ENV{'OPENDKIM_MINMSGID'}))
{
	$minmsgid = $ENV{'OPENDKIM_MINMSGID'};
}

#
# If any of the domains started with "/" or "./", convert them into
# whatever's in the files they reference.
#

if (scalar @domains > 0)
{
	foreach $idx (0 .. $#domains)
	{
		if (substr($domains[$idx], 0, 1) eq "/" ||
		    substr($domains[$idx], 0, 2) eq "./")
		{
			if (!open($tmpin, "<", $domains[$idx]))
			{
				print STDERR "$progname: cannot open $domains[$idx]: $!\n";
				$dbi_s->finish;
				$dbi_h->disconnect;
				exit(1);
			}

			while (<$tmpin>)
			{
				# ignore comments
				s/#.*//;

				# ignore blank lines
				next if /^(\s)*$/;

				chomp;

				push @domains, $_;
			}

			close($tmpin);

			delete $domains[$idx];
		}
	}
}

if ($verbose)
{
	print STDERR "$progname: started at " . localtime() . "\n";
}

if (!defined($pisize))
{
	$pisize = $def_pi;
}

$stdscore = $stdscores{$pisize};
if (!defined($stdscore))
{
	print STDERR "$progname: unknown prediction interval size $pisize\n";
	exit(1);
}
elsif ($verbose)
{
	print STDERR "$progname: using standard score $stdscore\n";
}

my $dbi_dsn = "DBI:" . $dbscheme . ":database=" . $dbname .
              ";host=" . $dbhost . ";port=" . $dbport;

$dbi_h = DBI->connect($dbi_dsn, $dbuser, $dbpasswd, { PrintError => 0 });
if (!defined($dbi_h))
{
	print STDERR "$progname: unable to connect to database: $DBI::errstr\n";
	exit(1);
}

if ($verbose)
{
	print STDERR "$progname: connected to database\n";
}

#
# Convert domain names to domain IDs
# 

if (scalar @domains > 0)
{
	$tmpsql = "SELECT id FROM domains WHERE name IN (";
	$id = 0;
	foreach $idx (0 .. $#domains)
	{
		if (!defined($domains[$idx]))
		{
			next;
		}

		if ($id != 0)
		{
			$tmpsql = $tmpsql . ", ";
		}
		$tmpsql = $tmpsql . "'" . $domains[$idx] . "'";
		$id++;
	}
	$tmpsql = $tmpsql . ")";

	$dbi_s = $dbi_h->prepare($tmpsql);
	if (!$dbi_s->execute)
	{
		print STDERR "$progname: failed to collect domain IDs: " . $dbi_h->errstr . "\n";
		$dbi_s->finish;
		$dbi_h->disconnect;
		exit(1);
	}

	$tmpin = $dbi_s->fetchall_arrayref([0]);
}

#
# start the report
#
if (defined($reportout))
{
	$tmpout = $reportout . "." . $$;
	open($out, ">", $tmpout) or die "$progname: can't open $tmpout: $!";
	if ($verbose)
	{
		print STDERR "$progname: report started in $tmpout\n";
	}

	print $out "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\n";
	print $out "<html>\n";
	print $out "  <head>\n";
	print $out "    <meta http-equiv=\"content-type\" content=\"text/html;charset=us-ascii\">\n";
	print $out "    <title>\n";
	print $out "      OpenDKIM Rate Recommendations\n";
	print $out "    </title>\n";
	print $out "  </head>\n";
	print $out "\n";
	print $out "  <body>\n";

	print $out "<h1>OpenDKIM Rate Recommendations</h1>\n";
	print $out "Generated " . strftime("%b %e %Y %H:%M:%S", localtime) . "\n";
	print $out "<hr>\n";
}

#
# compute the low-time threshold
#
if ($verbose)
{
	print STDERR "$progname: computing low-time threshold\n";
}

if (defined($reportout))
{
	print $out "Analysis of high spam domain duration (>= " . $spamratio * 100 . "% spam, >= " . $minspamcount . " msg(s))\n";
	print $out "<table border=1>\n";
	print $out " <tr>\n";
	print $out "  <td>Domains</td>\n";
	print $out "  <td>Min. Duration</td>\n";
	print $out "  <td>Max. Duration</td>\n";
	print $out "  <td>Mean Duration</td>\n";
	print $out "  <td>Duration Std. Dev.</td>\n";
	print $out " </tr>\n";
}

$dbi_s = $dbi_h->prepare("SELECT COUNT(c) AS domains, MIN(l) AS 'min duration', MAX(l) AS 'max duration', AVG(l) AS 'mean duration', STDDEV_POP(l) AS 'duration stddev' FROM (SELECT COUNT(messages.id) AS c, SUM(messages.spam)/COUNT(messages.id) AS r, DATEDIFF(MAX(messages.msgtime), MIN(messages.msgtime)) AS l FROM signatures JOIN messages ON signatures.message = messages.id WHERE messages.id >= $minmsgid AND NOT spam = -1 AND pass = 1 GROUP BY signatures.domain) t1 WHERE r >= $spamratio AND c >= $minspamcount");
if (!$dbi_s->execute)
{
	print STDERR "$progname: report #1 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
else
{
	while ($dbi_a = $dbi_s->fetchrow_arrayref())
	{
		if (defined($reportout))
		{
			print $out " <tr>\n";

			for ($idx = 0; $idx < 5; $idx++)
			{
				if (defined($dbi_a->[$idx]))
				{
					print $out " <td> " . $dbi_a->[$idx] . " </td>\n";
				}
				else
				{
					print $out " <td> NULL </td>\n";
				}
			}
			print $out " </tr>\n";
		}

		if (defined($dbi_a->[3]) && defined($dbi_a->[4]))
		{
			$thresh = $dbi_a->[3] + $dbi_a->[4] * $stdscore;
		}
	}
}

$dbi_s->finish;

if (defined($reportout))
{
	print $out "</table>\n";
	print $out "<br>\n";
}

if (!defined($thresh))
{
	print STDERR "$progname: unable to determine low-time threshold\n";
	$dbi_h->disconnect;
	exit(1);
}

if ($verbose)
{
	print STDERR "$progname: low time threshold is $thresh day(s)\n";
}

#
# Tag domains that are low-time
#
if ($verbose)
{
	print STDERR "$progname: updating low_time flag\n";
}

$dbi_s = $dbi_h->prepare("UPDATE domains SET low_time = IF(DATEDIFF(DATE(NOW()), DATE(firstseen)) < $thresh, 1, 0)");
if (!$dbi_s->execute)
{
	print STDERR "$progname: low_time update failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Compute/update overall message counts and spam ratios for all signing domains
#
if ($verbose)
{
	print STDERR "$progname: computing overall counts and ratios for signers\n";
}

$dbi_s = $dbi_h->prepare("INSERT IGNORE INTO aggregate (domain, updated, date, messages, spam, reporter) SELECT signatures.domain, CURRENT_TIMESTAMP(), DATE(messages.msgtime), COUNT(messages.id), SUM(spam), 0 FROM messages JOIN signatures ON signatures.message = messages.id WHERE NOT spam = -1 AND pass = 1 AND messages.id >= $minmsgid AND NOT DATE(messages.msgtime) = DATE(CURRENT_TIMESTAMP()) GROUP BY 1, 3");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #1 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Compute/update per-reporter message counts and spam ratios for all
# signing domains
#
if ($verbose)
{
	print STDERR "$progname: computing per-reporter counts and ratios for signers\n";
}

$dbi_s = $dbi_h->prepare("INSERT IGNORE INTO aggregate (domain, updated, date, messages, spam, reporter) SELECT signatures.domain, CURRENT_TIMESTAMP(), DATE(messages.msgtime), COUNT(messages.id), SUM(spam), reporter FROM messages JOIN signatures ON signatures.message = messages.id WHERE NOT spam = -1 AND pass = 1 AND messages.id >= $minmsgid AND NOT DATE(messages.msgtime) = DATE(CURRENT_TIMESTAMP()) GROUP BY 1, 3, 6");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #2 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Compute/update overall message counts and spam ratios for unsigned mail
#
if ($verbose)
{
	print STDERR "$progname: computing overall counts and ratios for unsigned mail\n";
}

$dbi_s = $dbi_h->prepare("INSERT IGNORE INTO aggregate (domain, updated, date, messages, spam, reporter) SELECT 0, CURRENT_TIMESTAMP(), DATE(messages.msgtime), COUNT(messages.id), SUM(spam), 0 FROM messages WHERE NOT spam = -1 AND messages.id >= $minmsgid AND NOT DATE (messages.msgtime) = DATE(CURRENT_TIMESTAMP()) AND (sigcount = 0 OR messages.id NOT IN (SELECT message FROM signatures WHERE pass = 1)) GROUP BY 3");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #3 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Compute/update per-reporter message counts and spam ratios for unsigned mail
#
if ($verbose)
{
	print STDERR "$progname: computing per-reporter counts and ratios for unsigned mail\n";
}

$dbi_s = $dbi_h->prepare("INSERT IGNORE INTO aggregate (domain, updated, date, messages, spam, reporter) SELECT 0, CURRENT_TIMESTAMP(), DATE(messages.msgtime), COUNT(messages.id), SUM(spam), reporter FROM messages WHERE NOT spam = -1 AND messages.id >= $minmsgid AND NOT DATE (messages.msgtime) = DATE(CURRENT_TIMESTAMP()) AND (sigcount = 0 OR messages.id NOT IN (SELECT message FROM signatures WHERE pass = 1)) GROUP BY 3, 6");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #4 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Compute overall counts and ratios for low-time domains
#
if ($verbose)
{
	print STDERR "$progname: computing overall counts and ratios for low-time domains\n";
}

$dbi_s = $dbi_h->prepare("INSERT IGNORE INTO aggregate (domain, updated, date, messages, spam, reporter) SELECT -1, CURRENT_TIMESTAMP(), DATE(messages.msgtime), COUNT(messages.id), SUM(spam), 0 FROM messages JOIN signatures ON signatures.message = messages.id JOIN domains on signatures.domain = domains.id WHERE NOT spam = -1 AND messages.id >= $minmsgid AND NOT DATE (messages.msgtime) = DATE(CURRENT_TIMESTAMP()) AND (sigcount = 0 OR messages.id NOT IN (SELECT message FROM signatures WHERE pass = 1)) AND DATEDIFF(DATE(NOW()), DATE(domains.firstseen)) < $thresh GROUP BY 3");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #5 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Compute per-reporter counts and ratios for low-time domains
#
if ($verbose)
{
	print STDERR "$progname: computing per-reporter counts and ratios for low-time domains\n";
}

$dbi_s = $dbi_h->prepare("INSERT IGNORE INTO aggregate (domain, updated, date, messages, spam, reporter) SELECT -1, CURRENT_TIMESTAMP(), DATE(messages.msgtime), COUNT(messages.id), SUM(spam), reporter FROM messages JOIN signatures ON signatures.message = messages.id JOIN domains on signatures.domain = domains.id WHERE NOT spam = -1 AND messages.id >= $minmsgid AND NOT DATE (messages.msgtime) = DATE(CURRENT_TIMESTAMP()) AND (sigcount = 0 OR messages.id NOT IN (SELECT message FROM signatures WHERE pass = 1)) AND DATEDIFF(DATE(NOW()), DATE(domains.firstseen)) < $thresh GROUP BY 3, 6");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #6 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Compute ratios wherever they're missing
# 
if ($verbose)
{
	print STDERR "$progname: computing missing ratios\n";
}

$dbi_s = $dbi_h->prepare("UPDATE aggregate SET ratio = spam / messages WHERE ratio IS NULL");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #7 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Compute predictions
#
if ($verbose)
{
	print STDERR "$progname: computing specific domain predictions\n";
}

$dbi_s = $dbi_h->prepare("INSERT INTO predictions (reporter, domain, name, rate_samples, rate_max, rate_avg, rate_stddev, rate_high, ratio_max, ratio_avg, ratio_stddev, ratio_high, daily_limit_low, daily_limit_high) SELECT reporter, domain, domains.name, COUNT(aggregate.messages), MAX(aggregate.messages), AVG(aggregate.messages), STDDEV_POP(aggregate.messages), AVG(aggregate.messages) + STDDEV_POP(aggregate.messages) * $stdscore, MAX(aggregate.ratio), AVG(aggregate.ratio), STDDEV_POP(aggregate.ratio), AVG(aggregate.ratio) + STDDEV_POP(aggregate.ratio) * $stdscore, (AVG(aggregate.messages) + STDDEV_POP(aggregate.messages) * $stdscore) * (1 - (AVG(aggregate.ratio) + STDDEV_POP(aggregate.ratio) * $stdscore)), (AVG(aggregate.messages) + STDDEV_POP(aggregate.messages) * $stdscore) * (1 - (AVG(aggregate.ratio) - STDDEV_POP(aggregate.ratio) * $stdscore)) FROM aggregate LEFT JOIN domains ON aggregate.domain = domains.id GROUP BY 1, 2 ON DUPLICATE KEY UPDATE updated = NOW(), name = VALUES(name), rate_samples = VALUES(rate_samples), rate_max = VALUES(rate_max), rate_avg = VALUES(rate_avg), rate_stddev = VALUES(rate_stddev), rate_high = VALUES(rate_high), ratio_max = VALUES(ratio_max), ratio_avg = VALUES(ratio_avg), ratio_stddev = VALUES(ratio_stddev), ratio_high = VALUES(ratio_high), daily_limit_low = VALUES(daily_limit_low), daily_limit_high = VALUES(daily_limit_high)");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #8 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# Add query labels
#
if ($verbose)
{
	print STDERR "$progname: adding queryable names to special domains\n";
}

$dbi_s = $dbi_h->prepare("UPDATE predictions SET name = 'UNSIGNED' WHERE domain = 0");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #9 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

$dbi_s = $dbi_h->prepare("UPDATE predictions SET name = 'LOW-TIME' WHERE domain = -1");
if (!$dbi_s->execute)
{
	print STDERR "$progname: pass #10 failed: " . $dbi_h->errstr . "\n";
	$dbi_s->finish;
	$dbi_h->disconnect;
	exit(1);
}
$dbi_s->finish;

#
# reports
# 
if (defined($reportout))
{
	if ($verbose)
	{
		print STDERR "$progname: reporting on interesting domains\n";
	}

	print $out "$pisize% prediction interval, standard score $stdscore\n";
	print $out "<table border=1>\n";
	print $out " <tr>\n";
	print $out "  <td>Domain ID</td>\n";
	print $out "  <td>Domain Name</td>\n";
	print $out "  <td>Reporter ID</td>\n";
	print $out "  <td>Mean Rate</td>\n";
	print $out "  <td>Max Rate</td>\n";
	print $out "  <td>Rate Std. Dev.</td>\n";
	print $out "  <td>Rate High Prediction</td>\n";
	print $out "  <td>Mean Spam Ratio</td>\n";
	print $out "  <td>Max Spam Ratio</td>\n";
	print $out "  <td>Spam Ratio Std. Dev.</td>\n";
	print $out "  <td>Spam Ratio High Prediction</td>\n";
	print $out "  <td>Volume Limit (low)</td>\n";
	print $out "  <td>Volume Limit (high)</td>\n";
	print $out " </tr>\n";

	$tmpsql = "SELECT domain, name, reporter, rate_avg, rate_max, rate_stddev, rate_high, ratio_avg, ratio_max, ratio_stddev, ratio_high, daily_limit_low, daily_limit_high FROM predictions WHERE domain IN (0, -1";

	foreach (@$tmpin)
	{
		$id = $_->[0];

		if (!defined($id))
		{
			next;
		}

		$tmpsql = $tmpsql . ", ". $id;
	}

	$tmpsql = $tmpsql . ")";

	$dbi_s = $dbi_h->prepare($tmpsql);
	if (!$dbi_s->execute)
	{
		print STDERR "$progname: domain ID collection failed: " . $dbi_h->errstr . "\n";
		$dbi_s->finish;
		$dbi_h->disconnect;
		exit(1);
	}
	else
	{
		while ($dbi_a = $dbi_s->fetchrow_arrayref())
		{
			print $out " <tr>\n";
			for ($idx = 0; $idx < 13; $idx++)
			{
				if (defined($dbi_a->[$idx]))
				{
					print $out " <td> " . $dbi_a->[$idx] . " </td>\n";
				}
				else
				{
					print $out " <td> NULL </td>\n";
				}
			}
			print $out " </tr>\n";
		}
	}
	print $out "</table>\n";
	print $out "<br>\n";
	$dbi_s->finish;

	#
	# low-time behaviour analysis
	# 
	if ($verbose)
	{
		print STDERR "$progname: reporting on low-time domain behaviour\n";
	}

	print $out "Analysis of low-time domain behaviour (<= " . $thresh . " days, >= " . $minmsgs . " msg(s))\n";
	print $out "<table border=1>\n";
	print $out " <tr>\n";
	print $out "  <td>Domains</td>\n";
	print $out "  <td>Max Rate</td>\n";
	print $out "  <td>Mean Rate</td>\n";
	print $out "  <td>Rate Std. Dev.</td>\n";
	print $out "  <td>Rate High Prediction</td>\n";
	print $out "  <td>Max Spam Ratio</td>\n";
	print $out "  <td>Mean Spam Ratio</td>\n";
	print $out "  <td>Spam Ratio Std. Dev.</td>\n";
	print $out "  <td>Spam Ratio Low Prediction</td>\n";
	print $out "  <td>Spam Ratio High Prediction</td>\n";
	print $out "  <td>Volume Limit</td>\n";
	print $out " </tr>\n";
	$dbi_s = $dbi_h->prepare("SELECT COUNT(c) AS domains, MAX(counts.c) AS rate_max, AVG(counts.c) AS rate_avg, STDDEV_POP(counts.c) AS rate_stddev, AVG(counts.c) + STDDEV_POP(counts.c) * $stdscore AS rate_high, MAX(counts.r) AS ratio_max, AVG(counts.r) AS ratio_avg, STDDEV_POP(counts.r) AS ratio_stddev, AVG(counts.r) - STDDEV_POP(counts.r) * $stdscore AS ratio_low, AVG(counts.r) + STDDEV_POP(counts.r) * $stdscore AS ratio_high, AVG(counts.c) + STDDEV_POP(counts.c) * $stdscore * (1 - (AVG(counts.r) + STDDEV_POP(counts.r) * $stdscore)) AS daily_limit FROM (SELECT SUM(messages) AS c, SUM(spam)/SUM(messages) AS r FROM aggregate JOIN domains ON aggregate.domain = domains.id WHERE domains.firstseen >= DATE_SUB(CURRENT_TIMESTAMP(), INTERVAL $thresh DAY) GROUP BY domain HAVING SUM(messages) >= $minmsgs) counts");
	if (!$dbi_s->execute)
	{
		print STDERR "$progname: report #2 failed: " . $dbi_h->errstr . "\n";
		$dbi_s->finish;
		$dbi_h->disconnect;
		exit(1);
	}
	else
	{
		while ($dbi_a = $dbi_s->fetchrow_arrayref())
		{
			print $out " <tr>\n";
			for ($idx = 0; $idx < 11; $idx++)
			{
				if (defined($dbi_a->[$idx]))
				{
					print $out " <td> " . $dbi_a->[$idx] . " </td>\n";
				}
				else
				{
					print $out " <td> NULL </td>\n";
				}
			}
			print $out " </tr>\n";
		}
	}
	print $out "</table>\n";
	$dbi_s->finish;

	# footer
	print $out "  </body>\n";
	print $out "</html>\n";
}

# all done!
if ($verbose)
{
	print STDERR "$progname: terminating at " . localtime() . "\n";
}

$dbi_h->disconnect;

if (defined($reportout))
{
	close $out;
	rename($tmpout, $reportout) or die "$progname: rename to $reportout failed; $!";
}

exit(0);
