#!/usr/bin/perl
#============================================================= -*-perl-*-
#
# BackupPC_tarExtract: extract data from a dump
#
# DESCRIPTION
#
# AUTHOR
#   Craig Barratt  <cbarratt@users.sourceforge.net>
#
# COPYRIGHT
#   Copyright (C) 2001-2015  Craig Barratt
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#========================================================================
#
# Version 3.3.1, released 11 Jan 2015.
#
# See http://backuppc.sourceforge.net.
#
#========================================================================

use strict;
no  utf8;
use lib "__INSTALLDIR__/lib";
use Encode qw/from_to/;
use BackupPC::Lib;
use BackupPC::Attrib qw(:all);
use BackupPC::FileZIO;
use BackupPC::PoolWrite;
use File::Path;

use constant S_IFMT       => 0170000;   # type of file

die("BackupPC::Lib->new failed\n") if ( !(my $bpc = BackupPC::Lib->new) );
my $TopDir = $bpc->TopDir();
my $BinDir = $bpc->BinDir();
my %Conf   = $bpc->Conf();

if ( @ARGV != 3 ) {
    print("usage: $0 <client> <shareName> <compressLevel>\n");
    exit(1);
}
if ( $ARGV[0] !~ /^([\w\.\s-]+)$/ ) {
    print("$0: bad client name '$ARGV[0]'\n");
    exit(1);
}
my $client = $1;
if ( $ARGV[1] =~ m{(^|/)\.\.(/|$)} ) {
    print("$0: bad share name '$ARGV[1]'\n");
    exit(1);
}
my $ShareNameUM = $1 if ( $ARGV[1] =~ /(.*)/ );
my $ShareName = $bpc->fileNameEltMangle($ShareNameUM);
if ( $ARGV[2] !~ /^(\d+)$/ ) {
    print("$0: bad compress level '$ARGV[2]'\n");
    exit(1);
}
my $Compress = $1;
my $Abort = 0;
my $AbortReason;

#
# Re-read config file, so we can include the PC-specific config
#
if ( defined(my $error = $bpc->ConfigRead($client)) ) {
    print("BackupPC_tarExtract: Can't read PC's config file: $error\n");
    exit(1);
}
%Conf = $bpc->Conf();

#
# Catch various signals
#
$SIG{INT}  = \&catch_signal;
$SIG{ALRM} = \&catch_signal;
$SIG{TERM} = \&catch_signal;
$SIG{PIPE} = \&catch_signal;
$SIG{STOP} = \&catch_signal;
$SIG{TSTP} = \&catch_signal;
$SIG{TTIN} = \&catch_signal;

#
# This constant and the line of code below that uses it is borrowed
# from Archive::Tar.  Thanks to Calle Dybedahl and Stephen Zander.
# See www.cpan.org.
#
# Archive::Tar is Copyright 1997 Calle Dybedahl. All rights reserved.
#                 Copyright 1998 Stephen Zander. All rights reserved.
#
my $tar_unpack_header
    = 'Z100 A8 A8 A8 a12 A12 A8 A1 Z100 A6 A2 Z32 Z32 A8 A8 A155 x12';
my $tar_header_length = 512;

my $BufSize  = 1048576;     # 1MB or 2^20
my $MaxFiles = 20;
my $Errors   = 0;
my $OutDir   = "$TopDir/pc/$client/new";
my %Attrib   = ();

my $ExistFileCnt      = 0;
my $ExistFileSize     = 0;
my $ExistFileCompSize = 0;
my $TotalFileCnt      = 0;
my $TotalFileSize     = 0;
my $TarReadHdrCnt     = 0;

sub TarRead
{
    my($fh, $totBytes) = @_;
    my($numBytes, $newBytes, $data);

    $data = "\0" x $totBytes;
    while ( $numBytes < $totBytes ) {
	return if ( $Abort );
        $newBytes = sysread($fh,
                        substr($data, $numBytes, $totBytes - $numBytes),
                        $totBytes - $numBytes);
        if ( $newBytes <= 0 ) {
	    return if ( $TarReadHdrCnt == 1 );	 # empty tar file ok
            print("Unexpected end of tar archive (tot = $totBytes,"
                   . " num = $numBytes, posn = " . sysseek($fh, 0, 1) . ")\n");
            $Abort = 1;
            $AbortReason = "Unexpected end of tar archive";
            $Errors++;
            return;
        }
        $numBytes += $newBytes;
    }
    return $data;
}

sub TarReadHeader
{
    my($fh) = @_;

    $TarReadHdrCnt++;
    return $1 if ( TarRead($fh, $tar_header_length) =~ /(.*)/s );
    return;
}

sub TarFlush
{
    my($fh, $size) = @_;

    if ( $size % $tar_header_length ) {
        TarRead($fh, $tar_header_length - ($size % $tar_header_length));
    }
}

sub TarReadFileInfo
{
    my($fh) = @_;
    my($head, $longName, $longLink);
    my($name, $mode, $uid, $gid, $size, $mtime, $chksum, $type,
       $linkname, $magic, $version, $uname, $gname, $devmajor,
       $devminor, $prefix);

    while ( 1 ) {
        $head = TarReadHeader($fh);
        return if ( $Abort || $head eq ""
			   || $head eq "\0" x $tar_header_length );
        ($name,		# string
            $mode,	# octal number
            $uid,	# octal number
            $gid,	# octal number
            $size,	# octal number
            $mtime,	# octal number
            $chksum,	# octal number
            $type,	# character
            $linkname,	# string
            $magic,	# string
            $version,	# two bytes
            $uname,	# string
            $gname,	# string
            $devmajor,	# octal number
            $devminor,	# octal number
            $prefix) = unpack($tar_unpack_header, $head);

        $mode     = oct $mode;
        $uid      = oct $uid;
        $gid      = oct $gid;
	if ( ord($size) == 128 ) {
	    #
	    # GNU tar extension: for >=8GB files the size is stored
	    # in big endian binary.
	    #
	    $size = 65536 * 65536 * unpack("N", substr($size, 4, 4))
				  + unpack("N", substr($size, 8, 4));
	} else {
	    #
	    # We used to have a patch here for smbclient 2.2.x.  For file
	    # sizes between 2 and 4GB it sent the wrong size.  But since
	    # samba 3.0.0 has been released we no longer support this
	    # patch since valid files could have sizes that start with
	    # 6 or 7 in octal (eg: 6-8GB files).
	    #
	    # $size =~ s/^6/2/;       # fix bug in smbclient for >=2GB files
	    # $size =~ s/^7/3/;       # fix bug in smbclient for >=2GB files
	    #
	    # To avoid integer overflow in case we are in the 4GB - 8GB
	    # range, we do the conversion in two parts.
	    #
            if ( $size =~ /([0-9]{9,})/ ) {
                my $len = length($1);
                $size = oct(substr($1, 0, $len - 8)) * (1 << 24)
                      + oct(substr($1, $len - 8));
            } else {
                $size = oct($size);
            }
	}
        $mtime    = oct $mtime;
        $chksum   = oct $chksum;
        $devmajor = oct $devmajor;
        $devminor = oct $devminor;
        $name     = "$prefix/$name" if $prefix;
        $prefix   = "";
        substr ($head, 148, 8) = "        ";
        if (unpack ("%16C*", $head) != $chksum) {
           print("$name: checksum error at "
                        . sysseek($fh, 0, 1) , "\n");
           $Errors++;
        }
        if ( $type eq "L" ) {
            $longName = TarRead($fh, $size) || return;
            # remove trailing NULL
            $longName = substr($longName, 0, $size - 1);
            TarFlush($fh, $size);
            next;
        } elsif ( $type eq "K" ) {
            $longLink = TarRead($fh, $size) || return;
            # remove trailing NULL
            $longLink = substr($longLink, 0, $size - 1);
            TarFlush($fh, $size);
            next;
        }
        printf("Got file '%s', mode 0%o, size %g, type %d\n",
                $name, $mode, $size, $type) if ( $Conf{XferLogLevel} >= 3 );
        $name     = $longName if ( defined($longName) );
        $linkname = $longLink if ( defined($longLink) );

        #
        # Map client charset encodings to utf8
        #
        # printf("File $name (hex: %s)\n", unpack("H*", $name));
        if ( $Conf{ClientCharset} ne "" ) {
            from_to($name, $Conf{ClientCharset}, "utf8");
            from_to($linkname, $Conf{ClientCharset}, "utf8");
        }
        # printf("File now $name (hex: %s)\n", unpack("H*", $name));

        $name     =~ s{^\./+}{};
        $name     =~ s{/+\.?$}{};
        $name     =~ s{//+}{/}g;
        return {
            name       => $name,
            mangleName => $bpc->fileNameMangle($name),
            mode       => $mode,
            uid        => $uid,
            gid        => $gid,
            size       => $size,
            mtime      => $mtime,
            type       => $type,
            linkname   => $linkname,
            devmajor   => $devmajor,
            devminor   => $devminor,
        };
    }
}

sub TarReadFile
{
    my($fh) = @_;
    my $f = TarReadFileInfo($fh) || return;
    my($dir, $file);

    if ( $f->{name} eq "" ) {
        # top-level dir
        $dir = "";
        $file = $ShareNameUM;
    } else {
        ($file = $f->{name}) =~ s{.*?([^/]*)$}{$1};         # unmangled file
        if ( ($dir = $f->{mangleName}) =~ m{(.*)/.*} ) {
            $dir = "$ShareName/$1";
        } else {
            $dir = $ShareName;
        }
    }
    if ( !defined($Attrib{$dir}) ) {
	foreach my $d ( keys(%Attrib) ) {
	    next if ( $dir =~ m{^\Q$d/} );
	    attributeWrite($d);
	}
	$Attrib{$dir} = BackupPC::Attrib->new({ compress => $Compress });
	if ( -f $Attrib{$dir}->fileName("$OutDir/$dir")
                    && !$Attrib{$dir}->read("$OutDir/$dir") ) {
            printf("Unable to read attribute file %s\n",
                                $Attrib{$dir}->fileName("$OutDir/$dir"));
            $Errors++;
	}
    }
    if ( $f->{type} == BPC_FTYPE_DIR ) {
        #
        # Directory
        #
        logFileAction("create", $f) if ( $Conf{XferLogLevel} >= 1 );
        mkpath("$OutDir/$ShareName/$f->{mangleName}", 0, 0777)
                            if ( !-d "$OutDir/$ShareName/$f->{mangleName}" );
    } elsif ( $f->{type} == BPC_FTYPE_FILE ) {
        #
        # Regular file
        #
        my($nRead);
        #print("Reading $f->{name}, $f->{size} bytes, type $f->{type}\n");
        pathCreate($dir, "$OutDir/$ShareName/$f->{mangleName}", $f);
        my $poolWrite = BackupPC::PoolWrite->new($bpc,
                                         "$OutDir/$ShareName/$f->{mangleName}",
                                         $f->{size}, $Compress);
        while ( $nRead < $f->{size} ) {
            my $thisRead = $f->{size} - $nRead < $BufSize
                                ? $f->{size} - $nRead : $BufSize;
            my $data = TarRead($fh, $thisRead);
            if ( $data eq "" ) {
		if ( !$Abort ) {
		    print("Unexpected end of tar archive during read\n");
                    $AbortReason = "Unexpected end of tar archive";
		    $Errors++;
		}
		$poolWrite->abort;
                $Abort = 1;
		unlink("$OutDir/$ShareName/$f->{mangleName}");
		print("Removing partial file $f->{name}\n");
                return;
            }
            $poolWrite->write(\$data);
            $nRead += $thisRead;
        }
        my $exist = processClose($poolWrite, "$ShareName/$f->{mangleName}",
                                 $f->{size});
	logFileAction($exist ? "pool" : "create", $f)
                                 if ( $Conf{XferLogLevel} >= 1 );
        TarFlush($fh, $f->{size});
    } elsif ( $f->{type} == BPC_FTYPE_HARDLINK ) {
        #
        # Hardlink to another file.  GNU tar is clever about files
	# that are hardlinks to each other.  The first link will be
	# sent as a regular file.  The additional links will be sent
	# as this type.  We store the hardlink just like a symlink:
	# the link name (path of the linked-to file) is stored in
	# a plain file.
        #
        $f->{size} = length($f->{linkname});
        pathCreate($dir, "$OutDir/$ShareName/$f->{mangleName}", $f);
        my $poolWrite = BackupPC::PoolWrite->new($bpc,
                                         "$OutDir/$ShareName/$f->{mangleName}",
                                         $f->{size}, $Compress);
        $poolWrite->write(\$f->{linkname});
        my $exist = processClose($poolWrite, "$ShareName/$f->{mangleName}",
                                 $f->{size});
	logFileAction($exist ? "pool" : "create", $f)
                                 if ( $Conf{XferLogLevel} >= 1 );
    } elsif ( $f->{type} == BPC_FTYPE_SYMLINK ) {
        #
        # Symbolic link: write the value of the link to a plain file,
        # that we pool as usual (ie: we don't create a symlink).
        # The attributes remember the original file type.
        # We also change the size to reflect the size of the link
        # contents.
        #
        $f->{size} = length($f->{linkname});
        pathCreate($dir, "$OutDir/$ShareName/$f->{mangleName}", $f);
        my $poolWrite = BackupPC::PoolWrite->new($bpc,
                                         "$OutDir/$ShareName/$f->{mangleName}",
                                         $f->{size}, $Compress);
        $poolWrite->write(\$f->{linkname});
        my $exist = processClose($poolWrite, "$ShareName/$f->{mangleName}",
                                 $f->{size});
	logFileAction($exist ? "pool" : "create", $f)
                                 if ( $Conf{XferLogLevel} >= 1 );
    } elsif ( $f->{type} == BPC_FTYPE_CHARDEV
           || $f->{type} == BPC_FTYPE_BLOCKDEV
           || $f->{type} == BPC_FTYPE_FIFO ) {
        #
        # Special files: for char and block special we write the
        # major and minor numbers to a plain file, that we pool
        # as usual.  For a pipe file we create an empty file.
        # The attributes remember the original file type.
        #
        my $data;
        if ( $f->{type} == BPC_FTYPE_FIFO ) {
            $data = "";
        } else {
            $data = "$f->{devmajor},$f->{devminor}";
        }
        pathCreate($dir, "$OutDir/$ShareName/$f->{mangleName}", $f);
        my $poolWrite = BackupPC::PoolWrite->new($bpc,
                                         "$OutDir/$ShareName/$f->{mangleName}",
                                         length($data), $Compress);
        $poolWrite->write(\$data);
        $f->{size} = length($data);
        my $exist = processClose($poolWrite, "$ShareName/$f->{mangleName}",
                                 length($data));
	logFileAction($exist ? "pool" : "create", $f)
                                 if ( $Conf{XferLogLevel} >= 1 );
    } else {
        print("Got unknown type $f->{type} for $f->{name}\n");
	$Errors++;
    }
    $Attrib{$dir}->set($file, {
			    type  => $f->{type},
			    mode  => $f->{mode},
			    uid   => $f->{uid},
			    gid   => $f->{gid},
			    size  => $f->{size},
			    mtime => $f->{mtime},
		       });
    return 1;
}

sub attributeWrite
{
    my($d) = @_;
    my($poolWrite);

    return if ( !defined($Attrib{$d}) );
    if ( $Attrib{$d}->fileCount ) {
        my $data = $Attrib{$d}->writeData;
        my $fileName = $Attrib{$d}->fileName("$OutDir/$d");
        my $poolWrite = BackupPC::PoolWrite->new($bpc, $fileName,
                                         length($data), $Compress);
        $poolWrite->write(\$data);
        processClose($poolWrite, $Attrib{$d}->fileName($d), length($data), 1);
    }
    delete($Attrib{$d});
}

sub processClose
{
    my($poolWrite, $fileName, $origSize, $noStats) = @_;
    my($exists, $digest, $outSize, $errs) = $poolWrite->close;

    if ( @$errs ) {
        print(join("", @$errs));
        $Errors += @$errs;
    }
    if ( !$noStats ) {
	$TotalFileCnt++;
	$TotalFileSize += $origSize;
    }
    if ( $exists ) {
	if ( !$noStats ) {
	    $ExistFileCnt++;
	    $ExistFileSize     += $origSize;
	    $ExistFileCompSize += $outSize;
	}
    } elsif ( $outSize > 0 ) {
        print(NEW_FILES "$digest $origSize $fileName\n");
    }
    return $exists && $origSize > 0;
}

#
# Generate a log file message for a completed file
#
sub logFileAction
{
    my($action, $f) = @_;
    my $owner = "$f->{uid}/$f->{gid}";
    my $name = $f->{name};
    $name = "." if ( $name eq "" );
    my $type  = (("", "p", "c", "", "d", "", "b", "", "", "", "l", "", "s"))
		    [($f->{mode} & S_IFMT) >> 12];
    $type = "h" if ( $f->{type} == BPC_FTYPE_HARDLINK );

    printf("  %-6s %1s%4o %9s %11.0f %s\n",
				$action,
				$type,
				$f->{mode} & 07777,
				$owner,
				$f->{size},
				$name);
}

#
# Create the parent directory of $file if necessary
#
sub pathCreate
{
    my($dir, $fullPath, $f) = @_;

    #
    # Get parent directory of each of $dir and $fullPath
    #
    # print("pathCreate: dir = $dir, fullPath = $fullPath\n");
    $dir      =~ s{/([^/]*)$}{};
    my $file  = $bpc->fileNameUnmangle($1);
    $fullPath =~ s{/[^/]*$}{};
    return if ( -d $fullPath || $file eq "" );
    unlink($fullPath) if ( -e $fullPath );
    mkpath($fullPath, 0, 0777);
    $Attrib{$dir} = BackupPC::Attrib->new({ compress => $Compress })
                                if ( !defined($Attrib{$dir}) );
    # print("pathCreate: adding file = $file to dir = $dir\n");
    $Attrib{$dir}->set($file, {
                            type  => BPC_FTYPE_DIR,
                            mode  => 0755,
                            uid   => $f->{uid},
                            gid   => $f->{gid},
                            size  => 0,
                            mtime => 0,
                       });
}

sub catch_signal
{
    my $sigName = shift;

    #
    # The first time we receive a signal we try to gracefully
    # abort the backup.  This allows us to keep a partial dump
    # with the in-progress file deleted and attribute caches
    # flushed to disk etc.
    #
    print("BackupPC_tarExtract: got signal $sigName\n");
    if ( !$Abort ) {
	$Abort++;
	$AbortReason = "received signal $sigName";
	return;
    }

    #
    # This is a second signal: time to clean up.
    #
    print("BackupPC_tarExtract: quitting on second signal $sigName\n");
    close(NEW_FILES);
    exit(1)
}

mkpath("$OutDir/$ShareName", 0, 0777);
open(NEW_FILES, ">>", "$TopDir/pc/$client/NewFileList")
                 || die("can't open $TopDir/pc/$client/NewFileList");
binmode(NEW_FILES);
binmode(STDIN);
1 while ( !$Abort && TarReadFile(*STDIN) );
1 while ( !$Abort && sysread(STDIN, my $discard, 1024) );

#
# Flush out remaining attributes.
#
foreach my $d ( keys(%Attrib) ) {
    attributeWrite($d);
}
close(NEW_FILES);

if ( $Abort ) {
    print("BackupPC_tarExtact aborting ($AbortReason)\n");
}

#
# Report results to BackupPC_dump
#
print("Done: $Errors errors, $ExistFileCnt filesExist,"
    . " $ExistFileSize sizeExist, $ExistFileCompSize sizeExistComp,"
    . " $TotalFileCnt filesTotal, $TotalFileSize sizeTotal\n");
