#!/bin/sh
"""": # -*-python-*-
bup_python="$(dirname "$0")/../cmd/bup-python" || exit $?
exec "$bup_python" "$0" ${1+"$@"}
"""
# end of bup preamble

from __future__ import print_function
from collections import defaultdict
from difflib import unified_diff
from itertools import chain, dropwhile, groupby, takewhile
from os import environ, chdir
from os.path import abspath, dirname
from pipes import quote
from random import choice, randint
from shutil import copytree, rmtree
from subprocess import PIPE, Popen, check_call
from sys import stderr
from time import localtime, strftime, time
import os, random, sys

script_home = abspath(dirname(sys.argv[0] or '.'))
sys.path[:0] = [abspath(script_home + '/../lib'), abspath(script_home + '/..')]
top = os.getcwd()
bup_cmd = top + '/bup'

from buptest import test_tempdir
from wvtest import wvfail, wvpass, wvpasseq, wvpassne, wvstart

from bup.helpers import partition, period_as_secs, readpipe


def logcmd(cmd):
    if isinstance(cmd, basestring):
        print(cmd, file=stderr)
    else:
        print(' '.join(map(quote, cmd)), file=stderr)

def exc(cmd, shell=False):
    logcmd(cmd)
    check_call(cmd, shell=shell)

def exo(cmd, stdin=None, stdout=True, stderr=False, shell=False, check=True):
    logcmd(cmd)
    p = Popen(cmd,
              stdin=None,
              stdout=(PIPE if stdout else None),
              stderr=PIPE,
              shell=shell)
    out, err = p.communicate()
    if check and p.returncode != 0:
        raise Exception('subprocess %r failed with status %d, stderr: %r'
                        % (' '.join(argv), p.returncode, err))
    return out, err, p

def bup(*args):
    return exo((bup_cmd,) + args)[0]

def bupc(*args):
    return exc((bup_cmd,) + args)

def create_older_random_saves(n, start_utc, end_utc):
    with open('foo', 'w') as f:
        pass
    exc(['git', 'add', 'foo'])
    utcs = set()
    while len(utcs) != n:
        utcs.add(randint(start_utc, end_utc))
    utcs = sorted(utcs)
    for utc in utcs:
        with open('foo', 'w') as f:
            f.write(str(utc) + '\n')
        exc(['git', 'commit', '--date', str(utc), '-qam', str(utc)])
    exc(['git', 'gc', '--aggressive'])
    return utcs

# There is corresponding code in bup for some of this, but the
# computation method is different here, in part so that the test can
# provide a more effective cross-check.

period_kinds = ['all', 'dailies', 'monthlies', 'yearlies']
period_scale = {'s': 1,
                'min': 60,
                'h': 60 * 60,
                'd': 60 * 60 * 24,
                'w': 60 * 60 * 24 * 7,
                'm': 60 * 60 * 24 * 31,
                'y': 60 * 60 * 24 * 366}
period_scale_kinds = period_scale.keys()

def expected_retentions(utcs, utc_start, spec):
    if not spec:
        return utcs
    utcs = sorted(utcs, reverse=True)
    period_start = dict(spec)
    for kind, duration in period_start.iteritems():
        period_start[kind] = utc_start - period_as_secs(duration)
    period_start = defaultdict(lambda: float('inf'), period_start)

    all = list(takewhile(lambda x: x >= period_start['all'], utcs))
    utcs = list(dropwhile(lambda x: x >= period_start['all'], utcs))

    matches = takewhile(lambda x: x >= period_start['dailies'], utcs)
    dailies = [min(day_utcs) for yday, day_utcs
               in groupby(matches, lambda x: localtime(x).tm_yday)]
    utcs = list(dropwhile(lambda x: x >= period_start['dailies'], utcs))

    matches = takewhile(lambda x: x >= period_start['monthlies'], utcs)
    monthlies = [min(month_utcs) for month, month_utcs
                 in groupby(matches, lambda x: localtime(x).tm_mon)]
    utcs = dropwhile(lambda x: x >= period_start['monthlies'], utcs)

    matches = takewhile(lambda x: x >= period_start['yearlies'], utcs)
    yearlies = [min(year_utcs) for year, year_utcs
                in groupby(matches, lambda x: localtime(x).tm_year)]

    return chain(all, dailies, monthlies, yearlies)

def period_spec(start_utc, end_utc):
    global period_kinds, period_scale, period_scale_kinds
    result = []
    desired_specs = randint(1, 2 * len(period_kinds))
    assert(desired_specs >= 1)  # At least one --keep argument is required
    while len(result) < desired_specs:
        period = None
        if randint(1, 100) <= 5:
            period = 'forever'
        else:
            assert(end_utc > start_utc)
            period_secs = randint(1, end_utc - start_utc)
            scale = choice(period_scale_kinds)
            mag = int(float(period_secs) / period_scale[scale])
            if mag != 0:
                period = str(mag) + scale
        if period:
            result += [(choice(period_kinds), period)]
    return tuple(result)

def unique_period_specs(n, start_utc, end_utc):
    invocations = set()
    while len(invocations) < n:
        invocations.add(period_spec(start_utc, end_utc))
    return tuple(invocations)

def period_spec_to_period_args(spec):
    return tuple(chain(*(('--keep-' + kind + '-for', period)
                         for kind, period in spec)))

def result_diffline(x):
    return str(x) + strftime(' %Y-%m-%d-%H%M%S', localtime(x)) + '\n'

def check_prune_result(expected):
    actual = sorted([int(x)
                     for x in exo(['git', 'log',
                                   '--pretty=format:%at'])[0].splitlines()])
    if expected != actual:
        for x in expected:
            print('ex:', x, strftime('%Y-%m-%d-%H%M%S', localtime(x)),
                  file=stderr)
        for line in unified_diff([result_diffline(x) for x in expected],
                                 [result_diffline(x) for x in actual],
                                 fromfile='expected', tofile='actual'):
            sys.stderr.write(line)
    wvpass(expected == actual)


environ['GIT_AUTHOR_NAME'] = 'bup test'
environ['GIT_COMMITTER_NAME'] = 'bup test'
environ['GIT_AUTHOR_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'
environ['GIT_COMMITTER_EMAIL'] = 'bup@a425bc70a02811e49bdf73ee56450e6f'

seed = int(environ.get('BUP_TEST_SEED', time()))
random.seed(seed)
print('random seed:', seed, file=stderr)

save_population = int(environ.get('BUP_TEST_PRUNE_OLDER_SAVES', 2000))
prune_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_CYCLES', 20))
prune_gc_cycles = int(environ.get('BUP_TEST_PRUNE_OLDER_GC_CYCLES', 10))

with test_tempdir('prune-older-') as tmpdir:
    environ['BUP_DIR'] = tmpdir + '/work/.git'
    environ['GIT_DIR'] = tmpdir + '/work/.git'
    now = int(time())
    three_years_ago = now - (60 * 60 * 24 * 366 * 3)
    chdir(tmpdir)
    exc(['git', 'init', 'work'])

    wvstart('generating ' + str(save_population) + ' random saves')
    chdir(tmpdir + '/work')
    save_utcs = create_older_random_saves(save_population, three_years_ago, now)
    chdir(tmpdir)
    test_set_hash = exo(['git', 'show-ref', '-s', 'master'])[0].rstrip()
    ls_saves = bup('ls', 'master').splitlines()
    wvpasseq(save_population + 1, len(ls_saves))

    wvstart('ensure everything kept, if no keep arguments')
    exc(['git', 'reset', '--hard', test_set_hash])
    _, errmsg, proc = exo((bup_cmd,
                           'prune-older', '-v', '--unsafe', '--no-gc',
                           '--wrt', str(now)) \
                          + ('master',),
                          stdout=False, stderr=True, check=False)
    wvpassne(proc.returncode, 0)
    wvpass('at least one keep argument is required' in errmsg)
    check_prune_result(save_utcs)


    wvstart('running %d generative no-gc tests on %d saves' % (prune_cycles,
                                                               save_population))
    for spec in unique_period_specs(prune_cycles,
                                    # Make it more likely we'll have
                                    # some outside the save range.
                                    three_years_ago - period_scale['m'],
                                    now):
        exc(['git', 'reset', '--hard', test_set_hash])
        expected = sorted(expected_retentions(save_utcs, now, spec))
        exc((bup_cmd,
             'prune-older', '-v', '--unsafe', '--no-gc', '--wrt', str(now)) \
            + period_spec_to_period_args(spec) \
            + ('master',))
        check_prune_result(expected)


    # More expensive because we have to recreate the repo each time
    wvstart('running %d generative gc tests on %d saves' % (prune_gc_cycles,
                                                            save_population))
    exc(['git', 'reset', '--hard', test_set_hash])
    copytree('work/.git', 'clean-test-repo', symlinks=True)
    for spec in unique_period_specs(prune_gc_cycles,
                                    # Make it more likely we'll have
                                    # some outside the save range.
                                    three_years_ago - period_scale['m'],
                                    now):
        rmtree('work/.git')
        copytree('clean-test-repo', 'work/.git')
        expected = sorted(expected_retentions(save_utcs, now, spec))
        exc((bup_cmd,
             'prune-older', '-v', '--unsafe', '--wrt', str(now)) \
            + period_spec_to_period_args(spec) \
            + ('master',))
        check_prune_result(expected)
