#! /usr/bin/perl
# Copy data from one RRD to another
#
# Copyright (C) 2009-2011 Steve Schnepp <steve.schnepp@pwkf.org>
#
# License: LGPL
#
# Usage : rrdcopy <src.rrd> <dst.rrd>
#
# Both RRD have to exist and have the same DS (DataSource).
# The tool copies all the datasources from the sources into the destination
# like you mean it:
#  - start is the oldest value from src
#     (but cannot be younger than the youngest from dst)
#  - stop is the youngest value from src
#  - asks src for the AVERAGED value on each step time.
#
# Since all of the src data must be younger than any of the data in dst, you
# probably want dst to be a new rrd. A new munin-style "huge" rrd can be
# created like this:
#
# rrdtool create $dstfile --start "-2y" -s 300 \
#	DS:42:GAUGE:600:U:U \
#	RRA:AVERAGE:0.5:1:115200 \
#	RRA:MIN:0.5:1:115200 \
#	RRA:MAX:0.5:1:115200
#
# It doesn't do any MIN/MAX replication since this information
# is usually not accurate at all when resampling.
#
# The environmment variable DEBUG is used like:
# 0/absent - no debug
# 1        - emit to stdout all the updates to dst
# 2        - emit also the data that try
# 3        - emit also the source data
#
# XXX - It depends on RRDs.pm ...
# ... as I'm waiting for a patch to use the plain rrdtool cli :-)
#
# It was known as the rrdmove tool from the pmptools project.
# - I changed its name (as data is copied, not moved)
# - I changed its hosting (way more useful for munin than for pmptools)

use strict;
use warnings;
use Carp;
use Data::Dumper;

use RRDs;

my ($src_rrd, $dst_rrd) = @ARGV;

my $infos_src = RRDs::info($src_rrd);
my $infos_dst = RRDs::info($dst_rrd);

#get ds_names
my @ds_names;
foreach my $key (keys %$infos_src) {
	if ($key =~ /^ds\[(\w+)\]\.type$/) {
		push @ds_names, $1;
	}
}

my ($start, $stop, $step);
$step = $infos_dst->{'step'};
$stop = $infos_src->{'last_update'};

my @rra_names;
foreach my $key (keys %$infos_dst) {
	if ($key =~ /^rra\[(\w+)\]\.cf$/) {
		push @rra_names, $1;
	}
}

foreach my $rra_name (sort {$a <=> $b} @rra_names) {
	my $rows = $infos_dst->{"rra[$rra_name].rows"};
	my $pdp_per_row = $infos_dst->{"rra[$rra_name].pdp_per_row"};

	my $rra_start = $stop - $rows * $pdp_per_row * $step;
	if ( (! defined $start) || $start > $rra_start) {
		$start = $rra_start;
	}

	print "rra[$rra_name]:$rra_start("
		. (scalar localtime($rra_start))
		. "):$start("
		. (scalar localtime($start))
		.")\n" if $ENV{DEBUG} && $ENV{DEBUG} >= 2;
}

# Converting
my ($step_sample) = $step * 128;
for (my $epoch = $start; $epoch <= $stop; $epoch += $step_sample) {
	my @rrd_updates;

	# Resampling by taking the average value for the period
	my ($fetched_epoch, $fetched_step, $names, $data) = RRDs::fetch(
		$src_rrd,
		"AVERAGE",
		"--start", $epoch,
		# the last should not be used
		"--end", $epoch + $step_sample-1,
	);

	if ($ENV{DEBUG} && $ENV{DEBUG} >= 3) {
		print "Start:       ", scalar localtime($fetched_epoch),
		      " ($fetched_epoch)\n";
		print "Step size:   $fetched_step seconds\n";
		print "DS names:    ", join (", ", @$names)."\n";
		print "Data points: ", $#$data + 1, "\n";
		print "Data:\n";
	}

	$fetched_epoch -= $fetched_step;

	for my $line ( @$data ) {
		$fetched_epoch += $fetched_step;

		# skip the last line, because it _should_ be picked up in the
		# next block
		if ($fetched_epoch >= $epoch + $step_sample) {
			next;
		}

		if ($ENV{DEBUG} && $ENV{DEBUG} >= 3) {
			print "  ", scalar localtime($fetched_epoch),
			      " ($fetched_epoch) ";
			for my $val (@$line) {
				printf "%12.1f ", $val ? $val : 0.0;
			}
			print "\n";
		}

		my %current_data;
		for ( my $ds_idx = 0; $ds_idx < scalar @$line; $ds_idx ++) {
			my $name = $names->[$ds_idx];
			my $value = $line->[$ds_idx];
			$current_data{$name} = $value;
		}

		# Create the template
		my @values = map { defined $current_data{$_} ? $current_data{$_} : "U" } @ds_names;

		# Ignore lines with all values set to "U"
		next unless grep { $_ ne "U" } @values;

		my $current_epoch = $fetched_epoch;
		while ( $current_epoch < $fetched_epoch + $fetched_step ) {
			push @rrd_updates, "$current_epoch:" . join(':', @values);
			$current_epoch += $step;
		}
	}

	next unless @rrd_updates;

	print "rrdtool update $dst_rrd --template " . join(':', @ds_names) . " " . join(" ", @rrd_updates) . "\n" if $ENV{DEBUG};
	system("rrdtool update $dst_rrd --template " . join(':', @ds_names) . " " . join(" ", @rrd_updates));

	# if the bulk update failed, try to update every point
	if ( $? != 0 ) {
		for my $rrd_update ( @rrd_updates ) {
			print "rrdtool update $dst_rrd --template " . join(':', @ds_names) . " " . join(" ", $rrd_update) . "\n" if $ENV{DEBUG};
			system("rrdtool update $dst_rrd --template " . join(':', @ds_names) . " " . join(" ", $rrd_update));
		}
	}

	# moved to rrdtool cli for this, but leaving it in case anyone cares
	#RRDs::update(
	#	$dst_rrd,
	#	"--template", join(':', @ds_names),
	#	@rrd_updates,
	#) or die "RRDs::update: $!";
}

