#!/usr/bin/perl
# vim:ft=perl:cindent:ts=4:sts=4:sw=4:et:fdm=marker:cms=\ #\ %s
#
# dwww-convert -- convert docs to HTML if necessary
#
# Some types of files (e.g. UNIX man pages) are typically not handled by most
# webbrowser setups; we convert these to HTML. Other types (e.g. PDF files
# or PNG images) are best handled by or via the user's webbrowser.
#
# Simple usage: $0 <type> <location>
#   <type> is document type: text, man, html, ps, and so on
#   <location> is full pathname to original document
#
# Part of the Debian dwww package.  Written by Lars Wirzenius.
# Modified by Robert Luberda.
# "@(#)dwww:$Id: dwww-convert 520 2009-01-15 20:44:47Z robert $"

use strict;

use Debian::Dwww::Utils;
use Debian::Dwww::Initialize;
use MIME::Types;
use Cwd qw(realpath);
#$|=1;

#
# Setup defaults.
#
################################################################
#
# Initialization
#



my $dwwwvars = &DwwwInitialize("/etc/dwww/dwww.conf");

my $templates_dir  = "/usr/share/dwww/templates";
my $template_start = "$templates_dir/dwww-convert.start";
my $template_end   = "$templates_dir/dwww-convert.end";
my $template_dir_start = "$templates_dir/dwww-convert.dir.start";
my $template_dir_end   = "$templates_dir/dwww-convert.dir.end";
my $dwww_use_cache     = lc($dwwwvars->{'DWWW_USE_CACHE'}) eq "yes";
my $dwww_usefileurl    = lc($dwwwvars->{'DWWW_USEFILEURL'}) eq "yes";



################################################################
#
# Local dwww-convert functions
#


#
# Get Last Modification date
#


#######################################################################
#
# Builtin convertFunctions
#

sub OpenPipe() { # {{{
    my $program = shift;
    my $mode    = shift;

    my $FH      = undef;

    if ($mode eq "r") {
        return \*STDIN unless defined $program;
        $program = "$program |";
    } elsif ($mode eq "w") {
        return \*STDOUT unless defined $program;
        $program = "| $program";
    } else {
        die("OpenPipe: Unknown mode: $mode\n");
    }

    open($FH, $program) or die "Can't open pipe for $program: $!\n";

    return $FH;
} # }}}


sub ClosePipe() { # {{{
    my $FH = shift;

#    &fflush($FH) or die("Can't flush: $!");
    return if ($FH == \*STDIN || $FH == \*STDOUT);
    close($FH) or die("Cannot close filehandle: $!\n");
} # }}}


sub DecompressFile() { # {{{
    my $filename        = shift;
    my $OUT_FH          = shift;
    my $add_filtr_prog  = shift;
    my $decompressor    = undef;
    my $IN_FH           = shift;

    if ($filename =~ /\.gz$/) {
        $decompressor = "zcat \"$filename\"";
    } elsif ($filename =~ /\.bz2$/) {
        $decompressor = "bzcat \"$filename\"";
    }

    if (defined $add_filtr_prog) {
        if (defined $decompressor) {
            $decompressor .= " | $add_filtr_prog";
        } else {
            $decompressor = "$add_filtr_prog < \"$filename\"";
        }
    }


    if (defined $decompressor) {
        if (defined $OUT_FH) {
            $IN_FH = &OpenPipe($decompressor, "r")
        } else {
            system($decompressor);
            return;
        }
    } else {
        open($IN_FH, $filename) or die("Can't open file $filename: $!\n");
        $OUT_FH = \*STDOUT unless defined $OUT_FH;
    }

    my $s=$/;
    $/=4096;
    while (<$IN_FH>) {
        print $OUT_FH $_;
    }
    $/=$s;
    &ClosePipe($IN_FH);
} # }}}

#
# Create a directory listing in HTML.
#
sub IsTopLevelDir() { # {{{
    my $dir         = shift;

    my $dwww_docpath            = $dwwwvars->{'DWWW_DOCPATH'};
    # file does exist, check if it match any files in @dwww_docpath
    foreach my $path  (@$dwww_docpath)  {
        if ( -d $path ) {
            $path = &realpath( $path );
            return 1 if ($dir eq $path);
        }
    }
    return 0;
} # }}}

sub BuiltinDir2Html() { # {{{
    my $dir         = shift;
    my $cacheProg   = shift;
    my $charset     = shift;
    my $f           = undef;
    my @filelist = ();
    my @dirlist  = ();



    my $OUT_FH = &OpenPipe($cacheProg, "w");

    my $html_dir = &HTMLEncode($dir);
    my $url_dir  = &URLEncode($dir);

    print $OUT_FH &TemplateFile($template_dir_start,
                                        { 'TITLE'   => "Files in $html_dir",
                                          'MENUADD' => &IsTopLevelDir($dir) ? "" : " | <a href=\"../?type=dir\">Up one level</a> ",
                                          'FILEURL' => $url_dir });

    opendir (DIR, $dir) or die "Can't open directory $dir:$!\n";
    while ($f = readdir(DIR)) {
            next if $f eq ".";
            next if $f eq "..";
            if ( -d $dir . "/" . $f ) {
                push(@dirlist, $f);
            } elsif ( -f _ ) {
                push(@filelist, $f);
            }
    }
    closedir(DIR);

    if ($#filelist > -1 ) {
            @filelist = sort(@filelist);
            my $table = &BeginTable($OUT_FH, "", 3);
            foreach $f (@filelist) {
                    if ($dwww_usefileurl && ($f =~ /\.htm.*$/)) {
                        &AddToTable($OUT_FH, $table,
                                    "<a href=\"file://localhost/" . $url_dir . '/' .  &URLEncode($f) . "\">" . &HTMLEncode($f) . "</a>");
                    } else {
                        &AddToTable($OUT_FH, $table,
                                    "<a href=\"" . &URLEncode($f) . "\">" . &HTMLEncode($f) . "</a>");
                    }
            }
           &EndTable($OUT_FH, $table);
    }

    if ($#dirlist > -1 ) {
            @dirlist = sort(@dirlist);
            my $table = &BeginTable($OUT_FH, "<FONT SIZE=\"+2\">Subdirectories:</FONT>", 3);
            foreach $f (@dirlist) {
                    if ($dwww_usefileurl && defined (my $g = &GuessFileName("$dir/$f/index", 1 ))) {
                        &AddToTable($OUT_FH, $table,
                                    "<a href=\"file://localhost" . &URLEncode($g) .
                                    "\">" . &HTMLEncode($f) . "</a>");
                    } else {
                        &AddToTable($OUT_FH, $table,
                                "<a href=\"" .&URLEncode($f)
                                 . "/?type=dir\">" . &HTMLEncode($f) . "</a>");
                    }
            }
           &EndTable($OUT_FH, $table);
    }

    print $OUT_FH &TemplateFile($template_dir_end, { } );
    &ClosePipe($OUT_FH);
} # }}}


#
# Convert a manual page source code file to HTML.
#
sub BuiltinMan2Html() { # {{{
    my $filename    = shift;
    my $cacheProg   = shift;
    my $charset     = shift;
    my $title       = undef;

    die "Internal error: invalid charset\n" unless $charset eq "UTF-8";

    my $OUT_FH      = &OpenPipe($cacheProg, "w");

    ($title  = $filename) =~ s/^.*\/(.+)\.(\d[^\.\\]*)(\.gz|\.bz2)?$/$1($2)/;

    print $OUT_FH &TemplateFile($template_start,
                                    { 'TITLE'   => "$title - Man pages",
                                      'MENUADD' => " | <a href=\"/dwww/man/\">Manual pages</a> ",
                                      'FILEURL' => &URLEncode($filename)
                                    });

    my $dir = $filename;
    $dir =~ s/\/[^\/]*$//;
    chdir ("$dir/..") or die "Can't chdir!\n";
    $ENV{'MAN_KEEP_FORMATTING'} = 1;
    my $IN_FH = &OpenPipe("man  -EUTF-8 -P/bin/cat  -l \"$filename\" 2>/dev/null | dwww-txt2html --man --utf8", "r");
    chdir ("/");
    while (<$IN_FH>) {
        print $OUT_FH $_;
    }

    print $OUT_FH &TemplateFile($template_end, { });
    &ClosePipe($IN_FH);
    &ClosePipe($OUT_FH);
} # }}}

#
# Convert plain text to HTML.  This is really trivial, and buggy.
# Input from stdin.
#
sub BuiltinText2Html() { # {{{
    my $filename    = shift;
    my $cacheProg   = shift;
    my $charset     = shift;

    my $OUT_FH      = &OpenPipe($cacheProg, "w");
    print $OUT_FH &TemplateFile($template_start,
                                    { 'TITLE'   => &HTMLEncode($filename),
                                      'MENUADD' => " | <a href=\"./?type=dir\"> Show directory contents </a> ",
                                      'FILEURL' => &URLEncode($filename)
                                    });

    if ($charset && $charset eq "UTF-8") {
        &DecompressFile($filename, $OUT_FH, "dwww-txt2html --utf8");
    } else {
        &DecompressFile($filename, $OUT_FH, "dwww-txt2html");
    }

    print $OUT_FH &TemplateFile($template_end, { }) or die "Can't print\n";
    &ClosePipe($OUT_FH);
} # }}}

#
# Convert info file to HTML using info2www
#
sub BuiltinInfo2Html() { # {{{
    my $filename    = shift;
    my $cacheProg   = shift;
    my $charset     = shift;

    my $cachePipe   = defined $cacheProg ? " | $cacheProg" : "";
    system("/usr/lib/cgi-bin/info2www \"$filename\"" . $cachePipe);
} # }}}


sub GuessFileName() { # {{{
    my $basefile = shift;
    my $is_html  = shift;

    foreach my $lang ( ("", ".en") ) {
        foreach my $comp ( ("", ".gz", ".bz2") ) {
            if ($is_html) {
                foreach my $suff (".html", ".htm" ) {
                    my $f = "${basefile}${suff}${lang}${comp}";
                    return $f if (-f $f and -r $f);
                }
            } else {
                my $f = "${basefile}${lang}${comp}";
                return $f if (-f $f and -r $f);
            }
        }
    }
    return undef;
} # }}}



sub PrintHeaders() { # {{{
    my $filename    = shift;
    my $base_name   = shift;
    my $mime_type   = shift;
    my $mime_charset= shift;

    print "Content-type: $mime_type" . (defined $mime_charset ? "; charset=$mime_charset\n" : "\n");
    my @stat = stat( $filename );
    my $mtime = $stat[9];
    print "Last-modified: " . gmtime($mtime) . "\n";
    print "Content-Disposition: inline; filename=\"$base_name\"\n";
    print "\n";
} # }}}


################################################################
#
# Main program
#
my $uses_path_info = 1;

while ($ARGV[0] =~ m/^--(.*)$/) {
        shift @ARGV;
        if ($1 eq "no-path-info") { $uses_path_info = 0 }
        elsif (not $1) { last; }
        else {
                print STDERR "Error: unknown option `$ARGV[0]'\n";
                print STDERR "Usage: $0 [--no-path-info] <type> <location>\n";
                exit(1);
        }
}



if ( $#ARGV < 1 || $ARGV[0] eq "" || $ARGV[1] eq "" ) {
    print STDERR "Error: invalid arguments\n";
    print STDERR "Usage: $0 [--no-path-info] <type> <location>\n";
    exit 1;
}

my $type = $ARGV[0];
my $file = $ARGV[1];
my $orig_file=$file;

# remove anchor
$file =~ s/#.*$//;

if ( ! -e $file ) {

    if ( $type eq "runman" ) {
        if ( $file =~ /^(.+)\/([^\/]+)$/ ) {
            my $name  = $1;
            my $sect  = $2;
            my @files = &GetCommandOutput("man", "--location", "-e", $sect, $name);
            $file = $files[0] unless $#files < 0;
            $file =~ s/\s.*$//;

            if ( $file eq "" or ! -e $file ) {
                &ErrorMsg ( "404 Man page not found" ,
                            "Man page not found" ,
                            "dwww could not find the man page $name($sect)" );
            }
        }

    } elsif ( $type eq "html" || $type eq "text/html" ) {

        # A link may have referred to a .html file
        # when only a .html.gz file exists.  So check
        # to see if alternate file exists, and use
        # that one if it does
        my $tmp=$file;
        $tmp =~ s/(.*)\.htm.*$//;   # remove suffix
        if (defined ( $tmp = &GuessFileName($tmp, 1) ) ) {
            $file = $tmp;
        }

    } elsif (  defined ( my $tmp = &GuessFileName($file) ) ) {
        $file = $tmp;
    }
}

# Check if we can show file
$file = &CheckAccess( $dwwwvars, $file, $orig_file );
die "Internal error" if $file eq ""; # CheckAccess should have written the error message and exited.

# Check if we should send redirect
if ( -d $file && $orig_file !~ /\/$/) {
    $orig_file          .= "/";
    $uses_path_info     = $FALSE;
}
if (!$uses_path_info && $type ne "runman") {
    &RedirectToURL("/cgi-bin/dwww" . &URLEncode($orig_file) . "?type=" . &URLEncode($type));
    exit(0);
}



if ( -d $file ) {
    if ( $type ne "dir" ) {
        $type = "dir";

        if (defined (my $f = &GuessFileName( "$file/index", 1 ) ) ){
            if ($f ne $file) {
                $type       = "html";
                $file       = $f;
            }
       }
    }
} elsif ( $type eq "dir" ) {
    $type = "file";
}


if ( $type eq "info"  && ! -x "/usr/lib/cgi-bin/info2www" ) {
    &ErrorMsg ("500 dwww error",
               "Cannot convert info files",
               "dwww could not find the <strong>info2www</strong> program, which is required to conver the info files" );
    # NOT REACHED
}

(my $base_name = $file) =~ s/^.*\///g;

# identify the compression algorithm used for non-directories
# then, calculate the name of the uncompressed file (in extension)
if ( ! -d $file ) {
    $base_name =~ s/\.(gz|bz2|en)$//;

    # identify the file type from the file extension
    # or using the "file" command.

    if ( $type eq "file" )  {
        # first find the file extension
        my $extension = ($base_name =~ /\.([^\/\.]+)$/) ? $1 : "";

        # then guess the file type depending on the extension
        # if the extension is of size 0 or more than 4
        # text is assumed.

        if ($extension eq "" || $extension eq "txt" || $extension eq "text") {
            $type = "text/plain";
        } elsif ($extension eq "htm" || $extension eq "html" || $extension eq "shtml") {
            $type = "text/html";
        } elsif ($extension eq "css") {
            $type = "text/css";
        } elsif ($extension eq "md") {
            # TODO: add proper support for markdown files, see #973315
            $type = "text/markdown";
        } elsif ($extension =~ /^[1-9][a-z]*/ && $file =~ /\/man\//) {
        # if file is located in */man/* directory, assume it's a man page
            $type = "man";
        } elsif ($extension =~ /^info.*/ && $file =~ /\/info\//) {
            $type = "info";
        }  elsif (length($extension) > 4) {
            $type = "text/plain";
        } else {
            # use MIME:Types to guess extension
             my $mimetypes = MIME::Types->new();
             if (defined (my $t = $mimetypes->mimeTypeOf($extension))) {
                $type = $t;
             } else {
                $type = "text/plain"; # default
             }
        }
    }
}

my $convertFunction = undef;
my $mime_type = "text/html";
my $mime_charset = "UTF-8";
# Ok, now we can convert the file
if ($type eq "man" || $type eq "runman") {
    $convertFunction  =   \&BuiltinMan2Html;
} elsif ($type eq "dir") {
    $convertFunction  =   \&BuiltinDir2Html;
} elsif ($type eq "info") {
    $convertFunction  =   \&BuiltinInfo2Html;
} elsif ($type eq "text/plain" or $type eq "text/markdown") {
    $convertFunction  =   \&BuiltinText2Html;
} else {
    $mime_type = $type unless $type eq "html";
    # Do not set mime type for other types, including html docs,
    # which either they have a header or we can mostly trust the
    # server and browsers to guess correctly.
    $mime_charset = undef;
}

&PrintHeaders($file, $base_name, $mime_type, $mime_charset);

if ( not defined $convertFunction )  {
    &DecompressFile($file);
    exit 0;
}

my $cacheProg = undef;
if ($dwww_use_cache) {
        # Store the file in the cache unless it is already stored
        if (system ("dwww-cache --lookup \"$type\" \"$file\"") == 0) {
            exit 0;
        } else {
            $cacheProg = "dwww-cache --store \"$type\" \"$file\"";
        }
}

&$convertFunction( $file, $cacheProg , $mime_charset );
exit 0
