#! /usr/bin/perl

# convert *.po files to texts.* files suitable for gfxboot
# usage: po2txt lang.po >texts.lang
# Note: en.po ist treated specially!

use Getopt::Long;
use Encode;

sub arabic_is_letter;
sub arabic_read_map;
sub arabic_conv;

sub read_texts;
sub join_msg;
sub fribidi;

$opt_product = "openSUSE";

GetOptions(
  'product=s' => \$opt_product,
  'verbose|v' => \$opt_arabic_verbose,
);


chomp ($tmp_file = `mktemp /tmp/po2txt.XXXXXXXXXX`);

arabic_read_map;

for $lang (@ARGV) {
  $lang = 'en' if $lang eq 'bootloader.pot';
  $lang =~ s/\.po$//;
  read_texts $lang;
}

unlink $tmp_file;


sub read_texts
{
  local $_;

  my ($lang, @f, $txt, $context, $t, $p, $ids, $file);

  $lang = shift;

  $file = "$lang.po";
  $file = 'bootloader.pot' if $lang eq 'en';

  if($lang eq 'en') {
    $ids = 1;
  }

  open F, $file;
  @f = (<F>);
  close F;

  map { s/<product>/$opt_product/g; } @f;

  $_ = $lang;
  s/.*\///;

  for (@f) {
    if(/^\s*#\.\s*(txt_\S+)/) {
      if($txt) {
        @msgstr = @msgid if $ids || join_msg(\@msgstr) eq "";
        $txts{$txt} = join_msg(\@msgstr);
      }

      $txt = $1;

      undef @msgid;
      undef @msgstr;
      undef $context;
      next;
    }

    next if /^\s*#.*|^\s*$/;
    
    if(/^\s*msgid\s*(\".*\")\s*$/) {
      push @msgid, $1 unless $1 eq '""';
      $context = 1;
      next;
    }

    if(/^\s*msgstr\s*(\".*\")\s*$/) {
      push @msgstr, $1 unless $1 eq '""';
      $context = 2;
      next;
    }

    if(/^\s*(\".*\")\s*$/) {
      if($context == 1) {
        push @msgid, $1;
      }
      elsif($context == 2) {
        push @msgstr, $1;
      }
      else {
        die "format oops in ${lang}.po: $_"
      }
    }
  }

  if($txt) {
    @msgstr = @msgid if $ids || join_msg(\@msgstr) eq "";
    $txts{$txt} = join_msg(\@msgstr);
  }

  @txts = sort keys %txts;

  for (@txts) {
    $txt = $txts{$_};
    $txt =~ s/\\"/"/g;
    $txt =~ s/\\\\/\\/g;
    $txt =~ s/\\n/\n/g;
    $txt = fribidi $txt if $lang =~ /^(ar|he)/;
    print "$txt\x00"
  }

  if($ids) {
    open W, ">text.inc";
    print W "%\n% This file is generated automatically. Editing it is pointless.\n%\n\n";
    print W "/texts [\n";
    $p = pop @txts;
    for (@txts) { print W "  /$_\n" }
    print W "  /$p\n] def\n";
    close W;
  }
  else {
    open F, "text.inc";
    for (<F>) {
      if(/\s+\/(txt_\S+)/) {
        $txt_ref{$1} = undef;
      }
    }
    close F;
    for (@txts) {
      $txt_list{$_}++;
      $txt_multi{$_} = 1 if $txt_list{$_} > 1;
    }
    for (@txts) {
      $txt_unknown{$_} = 1 unless exists $txt_ref{$_};
    }
    for (keys %txt_ref) {
      $txt_miss{$_} = 1 unless exists $txt_list{$_};
    }

    if(%txt_miss || %txt_unknown || %txt_multi) {
      print STDERR "$lang:\n";
      for (sort keys %txt_miss) {
        print STDERR "  missing: $_\n"
      }
      for (sort keys %txt_unknown) {
        print STDERR "  unknown: $_\n"
      }
      for (sort keys %txt_multi) {
        print STDERR "  multi: $_\n"
      }
    }
  }

}


sub join_msg
{
  local $_;
  my ($s, $msg, $m);

  $msg = shift;

  for $s (@{$msg}) {
    $_ = $s;
    s/^\"(.*)\"$/$1/;
    $m .= $_;
  }

  return $m;
}


sub fribidi
{
  local $_;
  my $m;

  $m = shift;

  open F, ">$tmp_file";
  print F arabic_conv($m);
  close F;

  open F, "cat $tmp_file | fribidi --nopad --nobreak |";
  $m = undef;
  while(read F, $_, 0x10000) {
    $m .= $_;
  }
  close F;

  return $m;
}


sub arabic_conv {
  local $_;
  my (@c, @m, $i, @attr, @attr_name);

  push @c, 0;
  push @c, unpack("V*", encode("utf32le", decode("utf8", $_[0])));
  push @c, 0;

  # isolated: 0, initial: 1, final: 2, medial: 3
  for ($i = 1; $i < @c - 1; $i++) {
    next if !arabic_is_letter $c[$i];
    $attr[$i-1] += 2 if arabic_is_letter $c[$i-1];
    $attr[$i-1] += 1 if arabic_is_letter $c[$i+1];
  }

  shift @c;
  pop @c;

  @attr_name = ( "isolated", "initial", "final", "medial" );

  for ($i = 0; $i < @c; $i++) {
    $m = $c[$i];
    if(arabic_is_letter $c[$i]) {
      $m = $arabic_map->{$c[$i]}{$attr_name[$attr[$i]]};
      if(!$m && $attr[$i] == 3) {		# medial -> final
        $attr[$i] = 2;
        $m = $arabic_map->{$c[$i]}{$attr_name[$attr[$i]]};
        if($m && $i < @c - 1) {
          if($attr[$i+1] == 3) {		# next char: medial -> initial
            $attr[$i+1] = 1;
          }
          elsif($attr[$i+1] == 1) {	# next char: initial ->isolated
            $attr[$i+1] = 0;
          }
        }
      }
      $m = $c[$i] unless $m;
    }
    push @m, $m;

    printf STDERR "%04x -> %04x (%s)\n", $c[$i], $m, $attr_name[$attr[$i]] if $opt_arabic_verbose;
  }

  return encode("utf8", decode("utf32le", pack("V*", @m)));
}


sub arabic_is_letter
{
  return $_[0] >= 0x600 && $_[0] <= 0x6ff ? 1 : 0
}


sub arabic_read_map
{
  local $_;
  my (@i, $u, $m);

  open F, "bin/arabic.txt";
  while(<F>) {
    @i = split /;/;
    $u = hex $i[0];
    if($i[5] =~ /^<(isolated|initial|final|medial)> (\S{4})$/) {
      $m = hex $2;
      if($arabic_map->{$m}{$1}) {
        printf STDERR "%04x already has a '$1' form: %04x\n", $arabic_map->{$m}{$1}
      }
      $arabic_map->{$m}{$1} = $u;
      # printf STDERR "%04x %s %04x\n", $u, $1, $m;
    }
  }
  close F;

  print STDERR "warning: no arabic support\n" unless $arabic_map
}


