# Language.pm: Language Command Package

package RDA::UI::Language;

# $Id: Language.pm,v 1.12 2015/05/08 18:17:03 RDA Exp $
# ARCS: $Header: /home/cvs/cvs/RDA_8/src/scripting/lib/RDA/UI/Language.pm,v 1.12 2015/05/08 18:17:03 RDA Exp $
#
# Change History
# 20150508  MSC  Improve the documentation.

=head1 NAME

RDA::UI::Language - Language Command Package

=head1 SYNOPSIS

 <rda> <options> -XLanguage <command> <switches> <arg> ...

 <sdci> <options> -XLanguage <command> <switches> <arg> ...

=head1 DESCRIPTION

The following commands are available:

=cut

use strict;

BEGIN
{ use Exporter;
  use IO::File;
  use RDA::Text qw(get_string);
  use RDA::Agent;
  use RDA::Object::Message;
  use RDA::Object::Rda;
  use RDA::Options;
}

# Define the global public variables
use vars qw($STRINGS $VERSION @ISA);
$VERSION = sprintf('%d.%02d', q$Revision: 1.12 $ =~ /(\d+)\.(\d+)/);
@ISA     = qw(Exporter);

# Define the global private constants

# Define the global private variables

# Report the package version
sub Version
{ return $VERSION;
}

=for stopwords lang

=head2 S<check [-l lang] set|file ...>

This command analyzes all message files and reports which characters are lost
when using the specified character sets. It supports the following command
switches and arguments:

=over 11

=item B<  -l lang>

Specifies an alternative language

=item B<  -s>

Sets the single language mode. It disables using English for missing texts.

=item B<  file>

Character set conversion file

=item B<  set>

Character set name

=back

RDA returns a 0 (zero) exit status when no errors are detected. Otherwise, it
returns a nonzero exit status.

=cut

sub check  ## no critic (Complex)
{ my ($agt, @arg) = @_;
  my ($abr, $buf, $cfg, $ctl, $dir, $dsp, $err, $fmt, $lng, $opt, $ref, $tbl,
      @tbl, %mis, %set);

  # Parse the options
  $opt = RDA::Options::getopts('l:s', \@arg);
  $cfg = $agt->get_system;
  $cfg->set_value('RDA_CHARSET', 'utf8');
  if (exists($opt->{'l'}))
  { die get_string('BAD_LANG', $opt->{'l'})
      unless $opt->{'l'} =~ m/^([a-z]{2})$/;
    $cfg->set_value('RDA_LANG', $1);
  }

  # Validate the arguments
  $cfg = $agt->get_config;
  $dir = $cfg->get_group('D_RDA_MSG');
  $err = 0;
  foreach my $arg (@arg)
  { my ($fil, $nam);

    if ($arg =~ m/\.dat$/i)
    { $nam = RDA::Object::Rda->basename($fil = $arg);
    }
    else
    { $fil = $cfg->cat_file($dir, 'charset',
        RDA::Text::norm_charset($nam = $arg).'.dat');
    }
    if (-f $fil)
    { $set{$nam} = $fil;
    }
    else
    { print get_string('BAD_CHARSET', $arg);
      ++$err;
    }
  }
  return 1 if $err;

  # Reset the text control
  $RDA::Text::CONTROL = undef;
  $ctl = RDA::Text->new($cfg);
  $ctl->set_info('chk', 1) if exists($opt->{'s'});

  # Load the message files
  $lng = $ctl->get_info('lng', 'en');
  $tbl = {};
  if (opendir(DIR, $cfg->cat_dir($dir, $lng)))
  { foreach my $fil (readdir(DIR))
    { next unless $fil =~ s/\.txt$//i;
      $ref = $ctl->load_strings($fil, 1);
      foreach my $lin (values(%{$ref->get_info('fmt', {})}))
      { _check_line($tbl, $lin, $fil);
      }
      foreach my $rec (values(%{$ref->get_info('det', {})}))
      { foreach my $lin (@{$rec})
        { _check_line($tbl, $lin, $fil);
        }
      }
      foreach my $rec (values(%{$ref->get_info('txt', {})}))
      { foreach my $lin (values(%{$rec}))
        { _check_line($tbl, $lin, $fil);
        }
      }
      $abr = $ref->get_info('abr', {});
      $fmt = $ref->get_info('fmt', {});
      $mis{$fil} = [sort @tbl]
        if (@tbl = grep {!exists($fmt->{$_})} keys(%{$abr}));
    }
    closedir(DIR);
  }

  # Analyze the POD files
  if (opendir(DIR, $dir = $cfg->get_dir('D_RDA_POD', $lng)))
  { my $ifh = IO::File->new;
    foreach my $fil (grep {m/\.pod$/i} readdir(DIR))
    { if ($ifh->open('<'.$cfg->cat_file($dir, $fil)))
      { _check_line($tbl, $_, $fil) while (<$ifh>);
        $ifh->close;
      }
    }
    closedir(DIR);
  }

  # Indicate the non-ASCII characters used
  $buf = q{};
  foreach my $chr (sort keys(%{$tbl}))
  { $buf .= _fmt_chr($chr).q{|}
      .join(q{, },sort keys(%{$tbl->{$chr}})).qq{\n};
  }
  $buf = q{.M 2 '}.get_string('NonAscii', $lng).qq{'\n}.$buf.qq{\n.N 1\n}
    if $buf;

  # Treat the character sets
  foreach my $nam (sort keys(%set))
  { $ref = $ctl->load_charset($set{$nam});
    next unless  ## no critic (Unless)
      (@tbl = grep {!exists($ref->{$_})} keys(%{$tbl}));
    $buf .= q{.M 2 '}.get_string('LostChars').qq{'\n} unless $err++;
    $buf .= qq{$nam:|}.join(q{, }, map {_fmt_str($_)} sort @tbl).qq{\n};
  }
  $buf .= qq{\n.N 1\n} if $err;

  # Retrieve the errors
  if (@tbl = $ctl->list_errors)
  { $buf .= q{.T '}.get_string('Errors').qq{'\n};
    foreach my $str (@tbl)
    { $buf .= qq{.I ' - '\n$str\n\n};
    }
    $buf .= qq{.N 1\n};
    ++$err;
  }

  # List missing texts
  if (@tbl = keys(%mis))
  { $buf .= q{.M 2 '}.get_string('Missing').qq{'\n};
    foreach my $fil (sort @tbl)
    { $buf .= qq{- $fil:|}.join(q{, }, @{$mis{$fil}}).qq{\n};
      ++$err;
    }
    $buf .= qq{\n.N 1\n};
  }

  # Display the report
  $agt->submit(q{.}, RDA::Object::Message->new('DISPLAY.DSP_REPORT',
    page => 1)->add_data($buf))->is_error($agt) if $buf;

  # Indicate the completion status
  return 1 if $err;
  $dsp->dsp_line(get_string('V_NoError', $lng)) if ($dsp = $agt->is_verbose);
  return 0;
}

sub _check_line
{ my ($tbl, $lin, $fil) = @_;

  return $lin =~ s/([\360-\367]...|[\340-\357]..|[\300-\377].)/
    $tbl->{$1}->{$fil} = q{?};/eg;
}

sub _fmt_chr
{ my ($str) = @_;
  my ($buf, $chr, $off, @tbl);

  @tbl = unpack('C*', $str);
  $buf = join(q{},
    map {sprintf(q{\134%03o}, $_)} @tbl);         ## no critic (Interpolation)
  $chr = shift(@tbl);
  ## no critic (Bit,Number,Zero)
  $off = (($chr & 0340) == 0300) ? ($chr & 0037) :
         (($chr & 0360) == 0340) ? ($chr & 0017) :
         (($chr & 0370) == 0360) ? ($chr & 0007) :
                                   0;
  foreach my $nxt (@tbl)
  { $off <<= 6;
    $off += $nxt & 077;
  }
  ## use critic
  return sprintf(q{U+%04x %s:}, $off, $buf);
}

sub _fmt_str
{ return join(q{},
    map {sprintf(q{\134%03o}, $_)} unpack(q{C*},  ## no critic (Interpolation)
    shift));
}

=head2 S<extract [-a|m|u] file ...>

This command extracts the message abbreviations from static C<get_string> calls
and reports by default the abbreviations that are not defined. This command
does not allow to retrieve abbreviations defined dynamically.

It supports the following command switches and arguments:

=over 8

=item B<  -a>

Reports all abbreviations found.

=item B<  -m>

Reports non-uppercase abbreviations found.

=item B<  -u>

Reports uppercase abbreviations found.

=item B<  file>

Specifies the Perl file to analyze.

=back

RDA returns a 0 (zero) exit status when the command does not report any
abbreviation. Otherwise, it returns a nonzero exit status.

=cut

sub extract  ## no critic (Complex)
{ my ($agt, @arg) = @_;
  my ($buf, $chk, $err, $ifh, $opt, $sel, $str, @str);

  # Parse the options
  $chk = 1;
  $sel = 0;
  $opt = RDA::Options::getopts('amu', \@arg);
  ($chk, $sel) = (0, -1) if $opt->{'m'};
  ($chk, $sel) = (0,  1) if $opt->{'u'};
  ($chk, $sel) = (0,  0) if $opt->{'a'};

  # Treat all files
  $buf = q{};
  $err = 0;
  $ifh = IO::File->new;
  foreach my $fil (@arg)
  { my ($cnt, $pkg, $txt, %abr);

    # Extract the abbreviations
    $cnt = 0;
    $ifh->open("<$fil") or die get_string('ERR_OPEN', $fil, $!);
    while (<$ifh>)
    { if (m/extract_strings\([^,]*,\s*(['"])([\w\:]+)\1\s*\)/)
      { $pkg = $2;
      }
      elsif (m/package\s+([\w\:]+);/)
      { $pkg = $1;
      }
      elsif (@str = m/get_string\(\s*(['"])(\w+)\1\s*[,\)]/g)
      { while (shift(@str))
        { $abr{shift(@str)} = ++$cnt;
        }
      }
      elsif (@str = m/\sText\:(\w+)/g)
      { $abr{$txt} = ++$cnt while ($txt = shift(@str));
      }
    }
    $ifh->close;
    next unless $cnt;

    # Get the messages
    die get_string('NO_PACKAGE') unless defined($pkg);
    RDA::Text::extract_strings(\$txt, $pkg);

    # Retrieve missing abbreviations
    $cnt = 0;
    unless ($sel < 0)  ## no critic (Unless)
    { foreach my $abr (sort grep {$_ eq uc($_)} keys(%abr))
      { next if $chk && exists($txt->{$abr});
        $buf .= q{.T '}.get_string($chk ? 'Undefined' : 'Used').qq{'\n}
          unless $err++;
        $buf .= qq{.N 1\n.T '$fil:'\n} unless $cnt++;
        $buf .= qq{.I '  '\n$abr\n\n};
      }
    }
    unless ($sel > 0)  ## no critic (Unless)
    { foreach my $abr (sort grep {$_ ne uc($_)} keys(%abr))
      { next if $chk && exists($txt->{$abr});
        $buf .= q{.T '}.get_string($chk ? 'Undefined' : 'Used').qq{'\n}
          unless $err++;
        $buf .= qq{.N 1\n.T '$fil:'\n} unless $cnt++;
        $buf .= qq{.I '  '\n$abr\n\n};
      }
    }
  }

  # Display the report
  $agt->submit(q{.}, RDA::Object::Message->new('DISPLAY.DSP_REPORT',
    page => 1)->add_data($buf))->is_error($agt) if $buf;

  # Indicate a successful completion
  return $err ? 2 : 0;
}

=head2 S<help>

This command displays the command syntax and the related explanations.

=cut

sub help
{ return shift->submit(q{.}, 'DISPLAY.DSP_POD', package => __PACKAGE__);
}

=head2 S<map set[.desc] ...>

This command generates the conversion table for the specified character sets. It
requires the F<iconv> command for determining the conversion rules. It supports
the following command arguments:

=for stopwords desc

=over 8

=item B<  set>

Target character set name

=item B<  desc>

Target character set description

=back

=cut

sub map  ## no critic (Builtin)
{ my ($agt, @arg) = @_;
  my ($dsc, $dsp, $err);

  # Parse the options
  RDA::Options::getopts(q{}, \@arg);

  # Generate the conversion table
  $err = 0;
  $dsp = $agt->is_verbose;
  foreach my $set (@arg)
  { ($set, $dsc) = split(/\//, $set, 2);
    $dsc = defined($dsc) ? qq{ ($dsc)} : q{};
    $dsp->dsp_line(get_string('V_GenTable', $set)) if $dsp;
    $err += _map($set, $dsc);
  }

  # Indicate the completion status
  return $err ? 1 : 0;
}

sub _map
{ my ($set, $dsc) = @_;
  my ($err, $fil, $ifh, $off, $ofh, $ret, @tbl);

  # Generate the character set file name
  $fil = lc($set);
  $fil =~ s/[_\W]//g;
  $fil =~ s/^iso//;

  # List characters subject to conversion
  for (32..255)
  { push(@tbl, chr($_));
  }

  # Determine the character correspondance
  $ifh = IO::File->new;
  $ofh = IO::File->new;
  for ($ret = 0 ;;)  ## no critic (Loop)
  { # Create the input file
    $ofh->open(qq{>$fil.in}) or die get_string('ERR_CREATE', qq{$fil.in}, $!);
    foreach my $chr (@tbl)
    { print {$ofh} qq{$chr\n};
    }
    $ofh->close;

    # Convert the characters
    open(CNV,  ## no critic (Handle,Open)
      qq{iconv -f $set -t utf8 -o $fil.out $fil.in 2>&1 |})
      or die get_string('ERR_CONVERT');
    ($err) = <CNV>;
    close(CNV);
    if (defined($err))
    { if ($err =~ m/illegal input sequence at position (\d+)/)
      { splice(@tbl, ($1 >> 1), 1);
        next;
      }
      elsif ($err)
      { print $err;
        $ret = 1;
        last
      }
    }

    # Generate the conversion table
    $ifh->open(qq{<$fil.out}) or die get_string('ERR_OPEN', qq{$fil.out}, $!);
    $ofh->open(qq{>$fil.dat}) or die get_string('ERR_CREATE', qq{$fil.dat}, $!);
    print {$ofh} qq{# UTF-8 / $set$dsc Conversion Table\n\n}
      .qq{# \$Id\$\n# ARCS: \$Header\$\n\n};
    $off = 0;
    while(<$ifh>)
    { s/[\n\r]$//;
      unless ($_ eq $tbl[$off])
      { s/([\000-\037\200-\377])/sprintf(qq{\\%03o}, ord($1))/eg;
        printf {$ofh} qq{\\%03o: \%s\n}, ord($tbl[$off]), $_;
      }
      ++$off;
    }
    $ofh->close;
    $ifh->close;
    last;
  }

  # Remove the temporary files
  1 while unlink(qq{$fil.in});
  1 while unlink(qq{$fil.out});

  # Indicate the completion status
  return $ret;
}

1;

__END__

=head1 SEE ALSO

L<RDA::Agent|RDA::Agent>,
L<RDA::Object::Message|RDA::Object::Message>,
L<RDA::Object::Rda|RDA::Object::Rda>,
L<RDA::Options|RDA::Options>,
L<RDA::Request::DISPLAY|RDA::Request::DISPLAY>,
L<RDA::Text|RDA::Text>

=head1 COPYRIGHT NOTICE

Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.

=head1 TRADEMARK NOTICE

Oracle and Java are registered trademarks of Oracle and/or its
affiliates. Other names may be trademarks of their respective owners.

=cut