#!/usr/bin/perl
use HTML::TreeBuilder;
use HTML::Element;

  # Check if file exists and get last modified time for header
$filename = defined $ARGV[0] ? $ARGV[0] : "doc/babel101/babelcmd.html";
die "Cannot read $filename" unless ( -r $filename );
@STAT = stat $filename;
$mtime = $STAT[9];

  # Parse html file
$tree = HTML::TreeBuilder->new();
$tree->parse_file( $filename );

  # Get Babel version from <h1>.../h1>
$body = $tree->look_down( "_tag", "body" );
$h1 = $body->look_down( "_tag", "h1" );
undef $babel_version;
$babel_version = $1 if ( $h1 && ($h1->as_text =~ /Babel ([0-9.x]+) /) );

  # Write man page header
print ".TH BABEL 1 \"";
print "Version $babel_version: " if ( $babel_version );
@DATE=localtime $mtime;
@MONTHS=qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec );
printf "%d %s %d\"\n", $DATE[3], $MONTHS[$DATE[4]], 1900 + $DATE[5];

  # Scan up to first <h2> tag
@doclist = $body->content_list();
until ( ref($doclist[0]) and ($doclist[0]->tag eq "h2") )
{
  shift @doclist;
}

  # Interpret remainder as <h2> section name </h2> section body <h2 > ...
while( $#doclist >= 0 )
{
   $elem = shift @doclist;
   die "Internal Error" if ( $elem->tag ne "h2" );
   $label = $elem->as_text;
   $label =~ //-/\\-/;
   printf "\n.SH %s\n", uc($label);

   while ( $#doclist >= 0 )
   {
      $elem = shift @doclist;
      if ( ! ref($elem) )
      {
           # Elem is just text (not a tag)
         $elem =~ s/\xA0//g;
         print "$elem";
         next;
      }

      $tag = $elem->tag;
      if ($tag eq "h2")
      {
         unshift @doclist, $elem;
         last;
      }
#       elsif ($tag eq "br")
#       {
#          print "\n";
#          next;
#       }

      print "\n.PP\n" if ( $tag eq "p" );
      @doclist = ($elem->content_list, @doclist);
   }
}
