# Language.pm: Language Command Package package RDA::UI::Language; # $Id: Language.pm,v 1.12 2015/05/08 18:17:03 RDA Exp $ # ARCS: $Header: /home/cvs/cvs/RDA_8/src/scripting/lib/RDA/UI/Language.pm,v 1.12 2015/05/08 18:17:03 RDA Exp $ # # Change History # 20150508 MSC Improve the documentation. =head1 NAME RDA::UI::Language - Language Command Package =head1 SYNOPSIS -XLanguage ... -XLanguage ... =head1 DESCRIPTION The following commands are available: =cut use strict; BEGIN { use Exporter; use IO::File; use RDA::Text qw(get_string); use RDA::Agent; use RDA::Object::Message; use RDA::Object::Rda; use RDA::Options; } # Define the global public variables use vars qw($STRINGS $VERSION @ISA); $VERSION = sprintf('%d.%02d', q$Revision: 1.12 $ =~ /(\d+)\.(\d+)/); @ISA = qw(Exporter); # Define the global private constants # Define the global private variables # Report the package version sub Version { return $VERSION; } =for stopwords lang =head2 S This command analyzes all message files and reports which characters are lost when using the specified character sets. It supports the following command switches and arguments: =over 11 =item B< -l lang> Specifies an alternative language =item B< -s> Sets the single language mode. It disables using English for missing texts. =item B< file> Character set conversion file =item B< set> Character set name =back RDA returns a 0 (zero) exit status when no errors are detected. Otherwise, it returns a nonzero exit status. =cut sub check ## no critic (Complex) { my ($agt, @arg) = @_; my ($abr, $buf, $cfg, $ctl, $dir, $dsp, $err, $fmt, $lng, $opt, $ref, $tbl, @tbl, %mis, %set); # Parse the options $opt = RDA::Options::getopts('l:s', \@arg); $cfg = $agt->get_system; $cfg->set_value('RDA_CHARSET', 'utf8'); if (exists($opt->{'l'})) { die get_string('BAD_LANG', $opt->{'l'}) unless $opt->{'l'} =~ m/^([a-z]{2})$/; $cfg->set_value('RDA_LANG', $1); } # Validate the arguments $cfg = $agt->get_config; $dir = $cfg->get_group('D_RDA_MSG'); $err = 0; foreach my $arg (@arg) { my ($fil, $nam); if ($arg =~ m/\.dat$/i) { $nam = RDA::Object::Rda->basename($fil = $arg); } else { $fil = $cfg->cat_file($dir, 'charset', RDA::Text::norm_charset($nam = $arg).'.dat'); } if (-f $fil) { $set{$nam} = $fil; } else { print get_string('BAD_CHARSET', $arg); ++$err; } } return 1 if $err; # Reset the text control $RDA::Text::CONTROL = undef; $ctl = RDA::Text->new($cfg); $ctl->set_info('chk', 1) if exists($opt->{'s'}); # Load the message files $lng = $ctl->get_info('lng', 'en'); $tbl = {}; if (opendir(DIR, $cfg->cat_dir($dir, $lng))) { foreach my $fil (readdir(DIR)) { next unless $fil =~ s/\.txt$//i; $ref = $ctl->load_strings($fil, 1); foreach my $lin (values(%{$ref->get_info('fmt', {})})) { _check_line($tbl, $lin, $fil); } foreach my $rec (values(%{$ref->get_info('det', {})})) { foreach my $lin (@{$rec}) { _check_line($tbl, $lin, $fil); } } foreach my $rec (values(%{$ref->get_info('txt', {})})) { foreach my $lin (values(%{$rec})) { _check_line($tbl, $lin, $fil); } } $abr = $ref->get_info('abr', {}); $fmt = $ref->get_info('fmt', {}); $mis{$fil} = [sort @tbl] if (@tbl = grep {!exists($fmt->{$_})} keys(%{$abr})); } closedir(DIR); } # Analyze the POD files if (opendir(DIR, $dir = $cfg->get_dir('D_RDA_POD', $lng))) { my $ifh = IO::File->new; foreach my $fil (grep {m/\.pod$/i} readdir(DIR)) { if ($ifh->open('<'.$cfg->cat_file($dir, $fil))) { _check_line($tbl, $_, $fil) while (<$ifh>); $ifh->close; } } closedir(DIR); } # Indicate the non-ASCII characters used $buf = q{}; foreach my $chr (sort keys(%{$tbl})) { $buf .= _fmt_chr($chr).q{|} .join(q{, },sort keys(%{$tbl->{$chr}})).qq{\n}; } $buf = q{.M 2 '}.get_string('NonAscii', $lng).qq{'\n}.$buf.qq{\n.N 1\n} if $buf; # Treat the character sets foreach my $nam (sort keys(%set)) { $ref = $ctl->load_charset($set{$nam}); next unless ## no critic (Unless) (@tbl = grep {!exists($ref->{$_})} keys(%{$tbl})); $buf .= q{.M 2 '}.get_string('LostChars').qq{'\n} unless $err++; $buf .= qq{$nam:|}.join(q{, }, map {_fmt_str($_)} sort @tbl).qq{\n}; } $buf .= qq{\n.N 1\n} if $err; # Retrieve the errors if (@tbl = $ctl->list_errors) { $buf .= q{.T '}.get_string('Errors').qq{'\n}; foreach my $str (@tbl) { $buf .= qq{.I ' - '\n$str\n\n}; } $buf .= qq{.N 1\n}; ++$err; } # List missing texts if (@tbl = keys(%mis)) { $buf .= q{.M 2 '}.get_string('Missing').qq{'\n}; foreach my $fil (sort @tbl) { $buf .= qq{- $fil:|}.join(q{, }, @{$mis{$fil}}).qq{\n}; ++$err; } $buf .= qq{\n.N 1\n}; } # Display the report $agt->submit(q{.}, RDA::Object::Message->new('DISPLAY.DSP_REPORT', page => 1)->add_data($buf))->is_error($agt) if $buf; # Indicate the completion status return 1 if $err; $dsp->dsp_line(get_string('V_NoError', $lng)) if ($dsp = $agt->is_verbose); return 0; } sub _check_line { my ($tbl, $lin, $fil) = @_; return $lin =~ s/([\360-\367]...|[\340-\357]..|[\300-\377].)/ $tbl->{$1}->{$fil} = q{?};/eg; } sub _fmt_chr { my ($str) = @_; my ($buf, $chr, $off, @tbl); @tbl = unpack('C*', $str); $buf = join(q{}, map {sprintf(q{\134%03o}, $_)} @tbl); ## no critic (Interpolation) $chr = shift(@tbl); ## no critic (Bit,Number,Zero) $off = (($chr & 0340) == 0300) ? ($chr & 0037) : (($chr & 0360) == 0340) ? ($chr & 0017) : (($chr & 0370) == 0360) ? ($chr & 0007) : 0; foreach my $nxt (@tbl) { $off <<= 6; $off += $nxt & 077; } ## use critic return sprintf(q{U+%04x %s:}, $off, $buf); } sub _fmt_str { return join(q{}, map {sprintf(q{\134%03o}, $_)} unpack(q{C*}, ## no critic (Interpolation) shift)); } =head2 S This command extracts the message abbreviations from static C calls and reports by default the abbreviations that are not defined. This command does not allow to retrieve abbreviations defined dynamically. It supports the following command switches and arguments: =over 8 =item B< -a> Reports all abbreviations found. =item B< -m> Reports non-uppercase abbreviations found. =item B< -u> Reports uppercase abbreviations found. =item B< file> Specifies the Perl file to analyze. =back RDA returns a 0 (zero) exit status when the command does not report any abbreviation. Otherwise, it returns a nonzero exit status. =cut sub extract ## no critic (Complex) { my ($agt, @arg) = @_; my ($buf, $chk, $err, $ifh, $opt, $sel, $str, @str); # Parse the options $chk = 1; $sel = 0; $opt = RDA::Options::getopts('amu', \@arg); ($chk, $sel) = (0, -1) if $opt->{'m'}; ($chk, $sel) = (0, 1) if $opt->{'u'}; ($chk, $sel) = (0, 0) if $opt->{'a'}; # Treat all files $buf = q{}; $err = 0; $ifh = IO::File->new; foreach my $fil (@arg) { my ($cnt, $pkg, $txt, %abr); # Extract the abbreviations $cnt = 0; $ifh->open("<$fil") or die get_string('ERR_OPEN', $fil, $!); while (<$ifh>) { if (m/extract_strings\([^,]*,\s*(['"])([\w\:]+)\1\s*\)/) { $pkg = $2; } elsif (m/package\s+([\w\:]+);/) { $pkg = $1; } elsif (@str = m/get_string\(\s*(['"])(\w+)\1\s*[,\)]/g) { while (shift(@str)) { $abr{shift(@str)} = ++$cnt; } } elsif (@str = m/\sText\:(\w+)/g) { $abr{$txt} = ++$cnt while ($txt = shift(@str)); } } $ifh->close; next unless $cnt; # Get the messages die get_string('NO_PACKAGE') unless defined($pkg); RDA::Text::extract_strings(\$txt, $pkg); # Retrieve missing abbreviations $cnt = 0; unless ($sel < 0) ## no critic (Unless) { foreach my $abr (sort grep {$_ eq uc($_)} keys(%abr)) { next if $chk && exists($txt->{$abr}); $buf .= q{.T '}.get_string($chk ? 'Undefined' : 'Used').qq{'\n} unless $err++; $buf .= qq{.N 1\n.T '$fil:'\n} unless $cnt++; $buf .= qq{.I ' '\n$abr\n\n}; } } unless ($sel > 0) ## no critic (Unless) { foreach my $abr (sort grep {$_ ne uc($_)} keys(%abr)) { next if $chk && exists($txt->{$abr}); $buf .= q{.T '}.get_string($chk ? 'Undefined' : 'Used').qq{'\n} unless $err++; $buf .= qq{.N 1\n.T '$fil:'\n} unless $cnt++; $buf .= qq{.I ' '\n$abr\n\n}; } } } # Display the report $agt->submit(q{.}, RDA::Object::Message->new('DISPLAY.DSP_REPORT', page => 1)->add_data($buf))->is_error($agt) if $buf; # Indicate a successful completion return $err ? 2 : 0; } =head2 S This command displays the command syntax and the related explanations. =cut sub help { return shift->submit(q{.}, 'DISPLAY.DSP_POD', package => __PACKAGE__); } =head2 S This command generates the conversion table for the specified character sets. It requires the F command for determining the conversion rules. It supports the following command arguments: =for stopwords desc =over 8 =item B< set> Target character set name =item B< desc> Target character set description =back =cut sub map ## no critic (Builtin) { my ($agt, @arg) = @_; my ($dsc, $dsp, $err); # Parse the options RDA::Options::getopts(q{}, \@arg); # Generate the conversion table $err = 0; $dsp = $agt->is_verbose; foreach my $set (@arg) { ($set, $dsc) = split(/\//, $set, 2); $dsc = defined($dsc) ? qq{ ($dsc)} : q{}; $dsp->dsp_line(get_string('V_GenTable', $set)) if $dsp; $err += _map($set, $dsc); } # Indicate the completion status return $err ? 1 : 0; } sub _map { my ($set, $dsc) = @_; my ($err, $fil, $ifh, $off, $ofh, $ret, @tbl); # Generate the character set file name $fil = lc($set); $fil =~ s/[_\W]//g; $fil =~ s/^iso//; # List characters subject to conversion for (32..255) { push(@tbl, chr($_)); } # Determine the character correspondance $ifh = IO::File->new; $ofh = IO::File->new; for ($ret = 0 ;;) ## no critic (Loop) { # Create the input file $ofh->open(qq{>$fil.in}) or die get_string('ERR_CREATE', qq{$fil.in}, $!); foreach my $chr (@tbl) { print {$ofh} qq{$chr\n}; } $ofh->close; # Convert the characters open(CNV, ## no critic (Handle,Open) qq{iconv -f $set -t utf8 -o $fil.out $fil.in 2>&1 |}) or die get_string('ERR_CONVERT'); ($err) = ; close(CNV); if (defined($err)) { if ($err =~ m/illegal input sequence at position (\d+)/) { splice(@tbl, ($1 >> 1), 1); next; } elsif ($err) { print $err; $ret = 1; last } } # Generate the conversion table $ifh->open(qq{<$fil.out}) or die get_string('ERR_OPEN', qq{$fil.out}, $!); $ofh->open(qq{>$fil.dat}) or die get_string('ERR_CREATE', qq{$fil.dat}, $!); print {$ofh} qq{# UTF-8 / $set$dsc Conversion Table\n\n} .qq{# \$Id\$\n# ARCS: \$Header\$\n\n}; $off = 0; while(<$ifh>) { s/[\n\r]$//; unless ($_ eq $tbl[$off]) { s/([\000-\037\200-\377])/sprintf(qq{\\%03o}, ord($1))/eg; printf {$ofh} qq{\\%03o: \%s\n}, ord($tbl[$off]), $_; } ++$off; } $ofh->close; $ifh->close; last; } # Remove the temporary files 1 while unlink(qq{$fil.in}); 1 while unlink(qq{$fil.out}); # Indicate the completion status return $ret; } 1; __END__ =head1 SEE ALSO L, L, L, L, L, L =head1 COPYRIGHT NOTICE Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. =head1 TRADEMARK NOTICE Oracle and Java are registered trademarks of Oracle and/or its affiliates. Other names may be trademarks of their respective owners. =cut