#!/usr/bin/perl -w
# /=====================================================================\ #
# |  latexmllint                                                        | #
# | style & conformance tool for LaTeXML files                          | #
# |=====================================================================| #
# | support tools for LaTeXML:                                          | #
# |  Public domain software, produced as part of work done by the       | #
# |  United States Government & not subject to copyright in the US.     | #
# |---------------------------------------------------------------------| #
# | Bruce Miller <bruce.miller@nist.gov>                        #_#     | #
# | http://dlmf.nist.gov/LaTeXML/                              (o o)    | #
# \=========================================================ooo==U==ooo=/ #
use strict;
use warnings;
use Getopt::Long qw(:config no_ignore_case);
use Pod::Usage;
use FindBin;
use File::Which qw(which);
use File::Spec::Functions;
use Term::ANSIColor;
use Pod::Checker;
use Carp;
#======================================================================
my $identity  = 'latexmllint';
my ($help)    = (0);
my $VERBOSITY = 0;
my ($DOPRECOMMIT, $DOTIDY, $DOCRITIC, $DOSYNTAX, $DOPOD) = (0, 1, 1, 1, 1);
my %exclude = ();    # excluded features
my %include = ();
my ($DOMODIFY, $SEVERITY);
GetOptions(
  "help"       => \$help,
  "precommit!" => \$DOPRECOMMIT,
  "modify!"    => \$DOMODIFY,
  "tidy!"      => \$DOTIDY,
  "critic!"    => \$DOCRITIC,
  "pod!"       => \$DOPOD,
  "severity=s" => \$SEVERITY,
  "programs!"  => sub { if ($_[1]) { $include{program} = 1; } else { $exclude{program} = 1; } },
  "modules!"   => sub { if ($_[1]) { $include{module} = 1; } else { $exclude{module} = 1; } },
  "bindings!"  => sub { if ($_[1]) { $include{binding} = 1; } else { $exclude{binding} = 1; } },
  "verbose" => sub { $VERBOSITY++; },
  "quiet"   => sub { $VERBOSITY--; },
) or pod2usage(-message => $identity, -exitval => 1, -verbose => 0, -output => \*STDERR);
pod2usage(-message => $identity, -exitval => 1, -verbose => 2, -output => \*STDOUT) if $help;

#======================================================================
my $REQ_SEVERITY = 4;    # Critic conformance required for commit
my $DEF_SEVERITY = 3;    # preferred level of conformance
my @SCOLORS   = (undef, undef, undef, undef, "magenta", "red");
my @LINTPATHS = (qw(bin lib tools));

my $TOOLDIR    = $FindBin::RealBin;
my $LATEXMLDIR = catfile($TOOLDIR, updir());            # Assumes we're in tools subdir
my $LIBDIR     = catfile($LATEXMLDIR, 'blib', 'lib');
##my $LIBDIR     = catfile($LATEXMLDIR,'lib'); # Just use source dir, not blib! (don't assume "make")

my @FILES = expand_files((@ARGV ? @ARGV : map { catfile($LATEXMLDIR, $_) } @LINTPATHS));

local $ENV{ANSI_COLORS_DISABLED} = 1 unless -t STDOUT;  ## no critic # Disable colors unless to TTY.

if (!defined $SEVERITY) {                               # Default severity, if none requested
  $SEVERITY = ($DOPRECOMMIT ? $REQ_SEVERITY : $DEF_SEVERITY); }
if (!defined $DOMODIFY) {
  $DOMODIFY = ($DOPRECOMMIT ? 0 : 1); }
$VERBOSITY-- if $DOPRECOMMIT;

my $PERL = 'perl';
my $PERLTIDY;
my $PERLCRITIC;
my $EXITSTATUS = 0;
my ($NTOTAL, $NTIDY, $NCRITIC, $NFAIL) = (0, 0, 0, 0);
my @fails = ();
foreach my $file (@FILES) {
  my $classification = classify($file);
  next if grep { $$classification{$_} } keys %exclude;
  next if (keys %include) && (!grep { $$classification{$_} } keys %include);
  $NTOTAL++;
  my $failed = 0;
  # Various Perl code tests...
  if ($$classification{perl} && $DOSYNTAX) {
    $failed = 1 if doPerlSyntax($file, $classification); }
  if ($$classification{perl} && !$$classification{binding} && $DOPOD) {
    $failed = 1 if doPodChecker($file, $classification); }
  if ($$classification{perl} && $DOTIDY) {
    if (!$PERLTIDY && !($PERLTIDY = which('perltidy'))) {    # Look for perltidy exec.
      $DOTIDY = 0;                                           # Disable
      print STDERR "No perltidy was found; skipping tidy phase.\n"; }
    else {
      $failed = 1 if doPerltidy($file, $classification); } }
  if ($$classification{perl} && $DOCRITIC) {
    if (!$PERLCRITIC && !($PERLCRITIC = which('perlcritic'))) {
      $DOCRITIC = 0;                                         # Disable
      print STDERR "No perlcritic was found; skipping critic phase.\n"; }
    else {
      $failed = 1 if doPerlcritic($file, $classification); } }
  if ($failed) {
    $NFAIL++; push(@fails, $file);
    print color('bold') . "$file fails" . color('reset') . " commit criterion\n" if $VERBOSITY >= 0;
  }
}

print "\nExamined $NTOTAL file(s): "
  . ($DOMODIFY ? plural($NTIDY, 'was', 'were') . ' reformatted'
  : plural($NTIDY, 'needs', 'need') . ' reformatting') . '; '
  . plural($NCRITIC, 'needs', 'need') . ' code revision; '
  . plural($NFAIL,   'fails', 'fail') . " requirements for commit.\n"
  if ($VERBOSITY > -1);
print "\nExamined $NTOTAL file(s): "
  . ($NTIDY   ? plural($NTIDY,   'needs', 'need') . ' reformatting '  : ' ')
  . ($NCRITIC ? plural($NCRITIC, 'needs', 'need') . ' code revision ' : ' ')
  . plural($NFAIL, 'fails', 'fail') . " requirements for commit"
  . ($NFAIL ? ': ' . join(', ', @fails) : '')
  . ".\n"

  if $NFAIL && ($VERBOSITY == -1);
print "[Note: only policies with severity >= $REQ_SEVERITY inhibit a commit.]\n"

  if $NFAIL && $NCRITIC && ($VERBOSITY > -1);
exit($EXITSTATUS);

#======================================================================
# Possibilities here:
#   language: perl,css, xslt, javascript
#   program, module, binding...
sub classify {
  my ($file) = @_;
  if ($file =~ /^latexml[a-zA-Z]*$/) {    # No /i
    return { perl => 1, program => 1 }; }
  elsif ($file =~ /\.(?:pl)$/i) {
    return { perl => 1, program => 1 }; }
  elsif ($file =~ /\.(?:pm)$/i) {
    return { perl => 1, module => 1 }; }
  elsif ($file =~ /\.(?:ltxml|latexml)$/i) {
    return { perl => 1, binding => 1 }; }
  elsif ($file =~ /\.(?:css)$/i) {
    return { css => 1 }; }
  elsif ($file =~ /\.(?:js)$/i) {
    return { javascript => 1 }; }
  elsif ($file =~ /\.(?:xsl)$/i) {
    return { xslt => 1 }; }
  elsif (my $type = `file -b $file`) {
    if ($type =~ /^Perl script/) {
      return { perl => 1, program => 1 }; }
    elsif ($type =~ /ASCII text/) {    # Desperation due to flakey file command!
      my $TEST;
      open($TEST, '<', $file) or return {};
      my $firstline = <$TEST>;
      close($TEST);
      if ($firstline =~ m|^\#\!\s*(.*?)/bin/perl\s+\-w\s*$|) {
        return { perl => 1, program => 1 }; }
    } }
  return {}; }

#======================================================================
# Particular handlers.
# These should return 1 upon "failure", that is the file they test doesn't meet standards.

sub doPerlSyntax {
  my ($file, $classes) = @_;
  my $failed = 0;
  print "Checking '$file' syntax..." if $VERBOSITY > 0;
  # Use backticks, since otherwise can't manage the output (actually stderr, here!)...
  # Output doesn't distinguish errors from warnings?
  my $critique = `$PERL -I $LIBDIR -c $file 2>&1`;
  if ($critique) {    # file has issues
    my @filtered = ();
    $failed = 1;      # Assume failed, unless we find "OK"
    foreach my $line (split("\n", $critique)) {    # Scan the output of perl
      next if $line =~ /^Subroutine\s.*?\sredefined\sat\s\Q$file\E\sline\s\d+\.$/;    # IGNORE!
      if ($line =~ /^\Q$file\E\ssyntax\sOK$/) {
        $failed = 0; next; }
      push(@filtered, $line); }
    $EXITSTATUS = 1 if $failed;
    if (@filtered) {
      print color('bold');
      print "\n" . $file . ' ' if $VERBOSITY == 0;
      print " has syntax issues\n" if $VERBOSITY > -1;
      print color('reset');
      if ($VERBOSITY > -1) {
        foreach my $line (@filtered) {
          print '  ' . $line . "\n"; } } }
    else {
      print "syntax is OK\n" if $VERBOSITY > 0; } }
  else {    # Run error
    croak "Couldn't run $PERL on '$file': $!"; }
  return $failed; }

sub doPodChecker {
  my ($file, $classification) = @_;
  my $messages;
  my $messages_fh;
  open($messages_fh, '>', \$messages) or die "Can't capture output: $!";
  my $checker = Pod::Checker->new();
  print "Checking '$file' PODs..." if $VERBOSITY > 0;
  $checker->parse_from_file($file, $messages_fh);
  my $nerrors   = $checker->num_errors();
  my $nwarnings = $checker->num_warnings();

  if ($nerrors || $nwarnings) {
    print color('bold');
    print "\n" . $file . ' ' if $VERBOSITY == 0;
    print '' . ($nerrors == -1
      ? "has NO PODs!"
      : "had " . join(' & ', ($nerrors ? ("$nerrors POD errors") : ()),
        ($nwarnings ? ("$nwarnings POD warnings") : ()))) . "\n"
      if $VERBOSITY > -1;
    print color('reset');
    if ($messages && ($VERBOSITY > -1)) {
      # reformat the messages to be more palatable...
      foreach my $msg (split(/\n/, $messages)) {
        my $iserror = $msg =~ s/^\s*\*\*\*\s+ERROR/Error/;
        $msg =~ s/^\s*\*\*\*\s+WARNING/Warning/;
        print '  ' . ($iserror ? color('red') : '') . $msg . ($iserror ? color('reset') : '') . "\n"; } } }
  else {
    print " conformance is OK.\n" if $VERBOSITY > 0; }
  return ($nerrors > 0); }

sub doPerltidy {
  my ($file, $classes) = @_;
  print "Checking '$file' formatting..." if $VERBOSITY > 0;
  my $failed = 0;
  system($PERLTIDY,
    '--profile=' . catfile($TOOLDIR, 'latexml.perltidy'),
    "-o=$file.tdy",
    $file) == 0
    or croak "Couldn't run $PERLTIDY on '$file': $!";
  my $diffs = length(`diff $file $file.tdy`);
  if (!$diffs) {    # No diffs after formatting? No formatting needed.
    print " formatting is OK.\n" if $VERBOSITY > 0;
    unlink "$file.tdy"; }
  elsif (!$DOMODIFY) {    # Else have differences; Are we just reporting it?
    $EXITSTATUS = 1;
    $NTIDY++;             # perltidy wasn't happy
    $failed = 1;          # and it fails
    print "\n" . $file . ' ' if $VERBOSITY == 0;
    print " needs reformatting.\n" if $VERBOSITY > -1;
    unlink "$file.tdy"; }
  else {                  # Or are we going to apply the reformatting?
    $NTIDY++;             # perltidy wasn't happy, but since rewrote, doesn't fail
    rename $file, $file . ".bak";
    rename $file . ".tdy", $file;
    print "Reformatted $file.\n" if $VERBOSITY > -1; }
  return $failed; }

sub doPerlcritic {
  my ($file, $classes) = @_;
  my $failed = 0;
  print "Checking '$file' policy..." if $VERBOSITY > 0;
  # Use backticks, since otherwise can't manage the output...
  my $profile = catfile($TOOLDIR, 'latexml.perlcritic');
  my $themes = join(' and ', map { 'not non_' . $_ } keys %$classes);
#  my $critique = `$PERLCRITIC  --profile=$profile --theme='$themes' --severity=$SEVERITY --quiet $file`;
  my $critique = `$PERL -I $TOOLDIR $PERLCRITIC  --profile=$profile --theme='$themes' --severity=$SEVERITY --quiet $file`;
  if ($critique eq "") {    # No errors and no policy violations
    print " conformance is OK.\n" if $VERBOSITY > 0; }
  elsif ($critique) {       # Policy violations
    $NCRITIC++;             # perlcritic wasn't happy.
    print color('bold');
    print "\n" . $file . ' ' if $VERBOSITY == 0;
    print " has policy issues\n" if $VERBOSITY > -1;
    print color('reset');
    foreach my $line (split("\n", $critique)) {    # Scan the output of perlcritic
      my ($s) = ($line =~ /\(Severity:\s+(\d+)\)$/);
      $failed = 1 if $s >= $REQ_SEVERITY;
      # We'll RE-PRINT the output so we can RE color it!
      print '  ' . ($s && $SCOLORS[$s] ? colored($line, $SCOLORS[$s]) : $line) . "\n"
        if $VERBOSITY > -1; }
    $EXITSTATUS = 1 if $failed; }
  else {                                           # Run error
    croak "Couldn't run $PERLCRITIC on '$file': $!"; }
  return $failed; }

#======================================================================
# Low-level utilities
sub expand_files {
  my (@stuff) = @_;
  return map { expand_file($_) } @stuff; }

sub expand_file {
  my ($file) = @_;
  if ($file =~ /(?:^#|~$|\.bak$|\.tdy$)/) {
    return (); }
  elsif (-d $file) {
    my $DIR;
    opendir($DIR, $file);
    my @files = grep { /^[^\.#]/ } readdir($DIR);
    closedir($DIR);
    return map { expand_file(catfile($file, $_)) } sort @files; }
  elsif (-f $file) {
    return ($file); }
  else {
    print STDERR "Skipping $file; not normal file.\n";
    return (); } }

sub plural {
  my ($n, $single, $multi) = @_;
  return $n . ' ' . ($n == 1 ? $single : $multi); }

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

__END__

=head1 NAME

C<latexmllint> I<options> I<files> ...

=head1 SYNOPSIS

A tool for establish consistent coding standards for LaTeXML.
Code formatting is implemented by C<perltidy>,  using a prefered set of formatting rules.
Coding standards is checked by C<perlcritic>, again, using
a prefered set of Policies.

The files processed are given on the command line; directories given
there are processed recursively.  If no files are specified, then
all files in the C<bin>, C<lib> and C<tools> directories are processed.

Files may be excluded from this list according to the options that
include or exclude classes of files. Files are classified according
to type (perl, css, javascript, ...) and role (program, module, binding,...).
If you exclude certain classes (eg. C<--noprogram>) then all other classes
in the file list will be processed.  If you include certain classes
(eg. C<--binding>) explicitly, then only the explicitly named classes will
be processed.

Options:

  --precommit  Checks for minimal conformance required for committing,
               but doesn't allow reformatting of files.
  --noprecommit Default level checking, allows reformatting (default)
  --modify     Allows modifying (typically reformatting) files
              (default unless --precommit)
  --nomodify   Do not allow modifying files (default with --precommit)
  --tidy       Do run perltidy (default for perl files)
  --notidy     Do not run perltidy
  --critic     Do run perlcritic (default for perl files)
  --nocritic   Do not run perlcritic
  --severity   The severity level for perlcritic
               (default 3; or 4 when --precommit)
  --programs   Process programs only
  --noprograms Exclude programs
  --modules    Include perl modules only (.pm) (see below)
  --nomodules  Exclude perl modules
  --bindings   Include LaTeXML bindings only (.ltxml, .latexml)
  --nobindings Exclude LaTeXML bindings
  --verbose    Show more output
  --quiet      Show less output
  --help       Show this help message

Note that you can override the precommit hook by using the git option
C<--no-verify>, but please use only in extreme cases.

=cut

#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
