# Text.pm: Class Used for Managing the Texts package RDA::Text; # $Id: Text.pm,v 1.17 2015/10/12 14:48:40 RDA Exp $ # ARCS: $Header: /home/cvs/cvs/RDA_8/src/scripting/lib/RDA/Text.pm,v 1.17 2015/10/12 14:48:40 RDA Exp $ # # Change History # 20151012 MSC Eliminate 5.22 warnings. =head1 NAME RDA::Text - Class Used for Managing the Texts =head1 SYNOPSIS require RDA::Text; =head1 DESCRIPTION This package is designed to manage strings and texts subject to translation. It only supports character sets that include ASCII characters. The following methods are available: =cut use strict; BEGIN { use Exporter; use IO::File; use RDA::Error; use Symbol; # use RDA::Object::Rda; # Avoid circular references } # Define the global public variables use vars qw($CONTROL $STRINGS $TRACE $VERSION @DELETE @DUMP @EXPORT_OK @ISA); $TRACE = \*STDOUT; $VERSION = sprintf('%d.%02d', q$Revision: 1.17 $ =~ /(\d+)\.(\d+)/); @DELETE = qw(sub); @DUMP = ( hsh => {'RDA::Text' => 1}, str => {map {$_ => 0} qw(abr det par _dsc _tbl)}, ); @EXPORT_OK = qw(add_string extract_strings get_string norm_charset wrap_string debug debug_object); @ISA = qw(RDA::Error Exporter); # Define the global private constants my $SYNTAX = ['Syntax', 'Invalid syntax in %s near line(s) %s']; # Define the global private variables my %tb_set = ( 'cygwin' => '.88591', ); # Report the package version sub Version { return $VERSION; } =head2 S<$h = RDA::Text-Enew($config[,$agent])> The control object constructor. It takes the reference to RDA software configuration as an argument. The language is derived from the content of the C, C, C, or C environment variables. The character set is first extracted from the C environment variable. When not defined, it considers the C or C environment variables on UNIX and the output of the F command on Windows. =head2 S<$h-Enew($oid)> The string file object constructor. It takes the object identifier as an argument. C is represented by a blessed hash reference. The following special keys are used: =over 12 =item S< B<'abr' > > Abbreviation hash (S) =item S< B<'chk' > > Check indicator (M) =item S< B<'det' > > Message detail hash (S) =item S< B<'dir' > > Message top directory (M) =item S< B<'err' > > Last errors (M) =item S< B<'flg' > > Detail indicator (M) =item S< B<'fmt' > > Message format hash (S) =item S< B<'lng' > > Optional language (M) =item S< B<'oid' > > Object identifier (M,S) =item S< B<'par' > > Reference to the parent object (S) =item S< B<'txt' > > Message text hash (S) =item S< B<'_cfg'> > Software configuration (M) =item S< B<'_cnv'> > Character set conversion function (M) =item S< B<'_col'> > Screen width (in columns) =item S< B<'_dsc'> > Message description hash (S) =item S< B<'_grp'> > Group hash (S) =item S< B<'_mod'> > Module string ranges (S) =item S< B<'_sub'> > String objects (M) =item S< B<'_tbl'> > Character set conversion table (M) =back Internal keys are prefixed by an underscore. =cut sub new { my ($cls, @arg) = @_; my ($slf); if (ref($cls)) { my ($oid) = @arg; # Delete any previous occurrence $cls->{'_sub'}->{$oid}->delete_object if exists($cls->{'_sub'}->{$oid}); # Create the text object $cls->{'_sub'}->{$oid} = $slf = bless { abr => {}, fmt => {}, oid => $oid, par => $cls, typ => 'S', txt => {}, _dsc => {}, _grp => {}, _mod => {}, }, ref($cls); } elsif (ref($CONTROL)) { $slf = $CONTROL; } else { my ($cfg, $agt) = @arg; my ($fil, $lng, $set); # Assume a default configuration unless (ref($cfg)) { eval 'require RDA::Object::Rda'; die "RDA-00403: Cannot manage texts\n $@\n" if $@; $cfg = RDA::Object::Rda->new_dft; } # Create the master object $CONTROL = $slf = bless { chk => 0, dir => $cfg->get_group('D_RDA_MSG'), err => [], flg => 0, oid => $cfg->get_oid, typ => 'M', _cnv => \&_no_cnv, _col => $cfg->get_columns, _sub => {}, _cfg => $cfg, }, $cls; # Determine the language and character set requirements ($lng, $set) = split(/\./, $agt->get_info('str', q{})) if defined($agt); $lng = _get_lang($cfg, $lng); $slf->{'lng'} = $lng if $lng ne 'en' && -d $cfg->cat_dir($slf->{'dir'}, $lng); $slf->{'_tbl'} = $fil if defined($set = _get_charset($cfg, $set, $lng)) && -f ($fil = $cfg->cat_dir($slf->{'dir'}, 'charset', "$set.dat")); # Save the language and character set in the specified agent $agt->set_info('str', defined($set) ? $lng.q{.}.$set : $lng) if defined($agt); } # Return the object reference return $slf; } # Determine the display character set sub _get_charset { my ($cfg, $set, $lng) = @_; my ($str); return if $lng eq 'en'; unless (defined($str = $set || $cfg->get_system('RDA_CHARSET'))) { if ($cfg->is_unix) { return unless defined($str = $cfg->get_env('LANG') || $cfg->get_env('LC_ALL') || $tb_set{$^O}) && $str =~ m/\.([\w\-]+)/; $str = $1; } elsif ($cfg->is_windows) { $str = `chcp`; return unless $str =~ m/:\s+(\d+)\s*$/; $str = "cp$1"; } else { return; } } return norm_charset($str); } # Determine the display language sub _get_lang { my ($cfg, $lng) = @_; my ($str); return (defined($str = $lng || $cfg->get_system('RDA_LANG') || $cfg->get_env('LANG') || $cfg->get_env('LC_ALL') || $cfg->get_env('LANGUAGE')) && $str =~ m/^([a-z]{2})/i) ? lc($1) : 'en'; } =head2 S<$h-Eget_detail($id[,$default])> This method returns the detail lines associated to a message number. It returns the default when not found. =head2 S<$h-Eget_format($code,$number[,$default])> This method returns the format string associated to a message number. It returns the default when not found. =cut sub get_detail { return _get_attr('det', @_) } sub get_format { return _get_attr('fmt', @_) } sub _get_attr { my ($key, $slf, $id, $dft) = @_; my ($cod, $num, $ptr); # Load the file on the first usage $slf = $slf->get_top; ($cod, $num) = split(/\-/, $id, 2); $ptr = $slf->{'_sub'}; $ptr = (exists($ptr->{$cod}) && exists($ptr->{$cod}->{'det'})) ? $ptr->{$cod} : load_strings($slf, $cod, 1); # Return the message details return exists($ptr->{$key}->{$num}) ? $ptr->{$key}->{$num} : $dft; } =head2 S<$h-Eget_handle($file)> This method returns a file handle to a translated Plain Old Documentation (POD) file when it exists, otherwise it returns an undefined value. =cut sub get_handle { my ($slf, $fil) = @_; my ($cfg, $ifh, $pth); # Initialize the conversion routine on first usage $slf->{'_tbl'} = $slf->load_charset($slf->{'_tbl'}) if defined($slf->{'_tbl'}) && !ref($slf->{'_tbl'}); # Abort when the POD file is not found $cfg = $slf->{'_cfg'}; return unless ($slf->{'lng'} && -r ($pth = $cfg->get_file('D_RDA_POD', $slf->{'lng'}.q{/}.$fil))) || -r ($pth = $cfg->get_file('D_RDA_POD', "en/$fil")); # Create and return the file handle $ifh = bless Symbol::gensym(), ref($slf) || $slf; tie *$ifh, $ifh; ## no critic (Tie) *$ifh->{'cnv'} = $slf->{'_cnv'}; *$ifh->{'tbl'} = $slf->{'_tbl'}; return $ifh->open($pth); } sub get_missing { my ($slf) = @_; return (1, sort keys(%{$slf->{'abr'}})); } sub get_unused { my ($slf) = @_; return (2, sort keys(%{$slf->{'abr'}})); } =head2 S<$h-Eload_charset($file)> This method loads a character set conversion table. It first loads the transliteration rules for the current language, and next the character set-specific rules. It returns a reference to the conversion table hash. =cut sub load_charset { my ($slf, $fil) = @_; my $tbl = {}; # Load the transliteration rules for the language $slf = $slf->get_top; $slf->{'_cnv'} = \&_utf_cnv if exists($slf->{'lng'}) && _load_table($slf, $tbl, $slf->{'_cfg'}->cat_file($slf->{'dir'}, $slf->{'lng'}, 'rules.dat')); # Load the character set conversion rules $slf->{'_cnv'} = \&_utf_cnv if _load_table($slf, $tbl, $fil); # Return the conversion table return $tbl; } sub _load_table { my ($slf, $tbl, $fil) = @_; my ($chr, $cnt, $ifh, $lin, $num, @tbl); $cnt = $num = 0; $ifh = IO::File->new; if ($ifh->open("<$fil")) { $lin = q{}; while (<$ifh>) { # Trim spaces and join continuation lines ++$num; s/^\s+//; s/[\n\r]+$//; $lin .= $_; next if $lin =~ s/\\$//; $lin =~ s/\s+$//; # Treat the line $lin =~ s/\s*#.*$//; ($chr, @tbl) = split(/[\s:]+/, $lin); if (defined($chr) && length($chr)) { $chr =~ s{\\(\d{3})}{chr(oct($1))}eg; foreach my $itm (@tbl) { next unless defined($chr) && length($chr); $itm =~ s{\\(\d{3})}{chr(oct($1))}eg; $tbl->{$itm} = $chr; ++$cnt; } } elsif ($lin !~ m/^(?:\#.*)?$/) { $slf->add_error(get_string($SYNTAX, $fil, $num)); } # Prepare the next line $lin = q{}; } $ifh->close; } return $cnt; } =head2 S<$h-Eload_strings($code[,$flag[,$dir...]])> This method loads the strings for the specified code. It first loads the English strings before loading the strings in the current language. It formats the string based on their type and converts from UTF-8 to the appropriate character set. When the flag is set, it loads the message details also. You can specify alternative message directory structures as extra arguments. It returns the object reference. =cut sub load_strings { my ($slf, $cod, $flg, @dir) = @_; my ($fil, $grp, $ifh, $lin, $num, $obj, $pth, $cfg); return $slf->{'_sub'}->{$cod} if exists($slf->{'_sub'}->{$cod}); # Initialization $slf = $slf->get_top; $fil = $cod.'.txt'; $flg = 1 if $cod eq 'common'; $ifh = IO::File->new; $obj = $slf->new($cod); $cfg = $slf->{'_cfg'}; # Initialize the conversion routine on first usage $slf->{'_tbl'} = $slf->load_charset($slf->{'_tbl'}) if defined($slf->{'_tbl'}) && !ref($slf->{'_tbl'}); # Load the description file foreach my $dir (@dir, $slf->{'dir'}) { next unless $ifh->open('<'.($pth = $cfg->cat_file($dir, 'desc', $fil))); $lin = q{}; $num = 0; while (<$ifh>) { # Trim spaces and join continuation lines ++$num; s/^\s+//; s/[\n\r]+$//; $lin .= $_; next if $lin =~ s/\\$//; $lin =~ s/\s+$//; # Treat the line if ($lin =~ m/^(\d+)\s+(\w+)\s*(\S*)/) { $obj->{'abr'}->{$1} = $2; $obj->{'_dsc'}->{$1} = _decode($3); } elsif ($lin =~ m/^=+\s+((\w+::)*\w+)\s+(\d+)\.(\d+)(\s+(\w+))?/) { $grp = defined($6) ? $6 : _fmt_grp($1); $obj->{'_grp'}->{$grp} = $1; $obj->{'_mod'}->{$1} = [0 + $3, 0 + $4]; } elsif ($lin !~ m/^(?:\#.*)?$/) { $slf->add_error(get_string($SYNTAX, $pth, $num)); } # Prepare the next line $lin = q{}; } $ifh->close; last; } # Load the English file unless ($slf->{'chk'}) { foreach my $dir (@dir, $slf->{'dir'}) { next unless $ifh->open('<'.($pth = $cfg->cat_file($dir, 'en', $fil))); _load_strings($obj, $ifh, $pth, $flg, \&_no_cnv); last; } } # Load the translated file if (exists($slf->{'lng'})) { foreach my $dir (@dir, $slf->{'dir'}) { next unless $ifh->open('<'.($pth = $cfg->cat_file($dir, $slf->{'lng'}, $fil))); _load_strings($obj, $ifh, $pth, $flg, $slf->{'_cnv'}, $slf->{'_tbl'}); last; } } # Return the string file object return $obj; } # Format the group name sub _fmt_grp { my ($nam) = @_; $nam =~ s/[_\W]+/_/g; return $nam; } # Load an string file sub _load_strings ## no critic (Complex) { my ($slf, $ifh, $pth, $flg, $fct, $tbl) = @_; my ($cod, $err, $lin, $num, $mod, @tbl); $cod = $slf->{'oid'}; $err = undef; $num = 0; while (<$ifh>) { # Trim spaces and join continuation lines ++$num; s/^\s+//; s/[\n\r]+$//; $lin .= $_; next if $lin =~ s/\\$//; $lin =~ s/\s+$//; # Treat the line if ($lin =~ m/^(\d+)\s*'([^']*)'/) { my ($suf); $suf = exists($slf->{'_dsc'}->{$1}) ? $slf->{'_dsc'}->{$1} : q{}; $slf->{'abr'}->{$1} = $1 unless exists($slf->{'abr'}->{$1}); $slf->{'det'}->{$err = $1} = [] if $flg; $slf->{'fmt'}->{$1} = &$fct("$cod-$1: "._decode($2).$suf, $tbl); } elsif ($lin =~ m{^\[[A-Za-z]\]}) { push(@{$slf->{'det'}->{$err}}, $lin) if defined($err); } elsif ($lin =~ m/^(((\w+::)*\w+),)?(\w+)\s*'([^']*)'/) { my ($key, $val); $mod = !defined($2) ? $cod : exists($slf->{'_grp'}->{$2}) ? $slf->{'_grp'}->{$2} : $2; $key = $4; $val = &$fct(_decode($5), $tbl); if ($key =~ m/^V_/) { $val .= qq{\n}; } elsif ($key =~ m/^VI_/) { $val = "\t$val\n"; } elsif ($key =~ m/^D_/) { $val = "\t$val"; } elsif ($key =~ m/^I_/) { $val = "\t\t$val"; } elsif ($key =~ m/^T_/) { $val = " $val"; } $slf->{'txt'}->{$mod}->{$key} = $val; } elsif ($lin =~ m/^===\s*([\:\/\w]+)\s*===/) { $slf->{'det'}->{$err = $1} = [] if $flg; } else { $err = undef; push(@tbl, $num) unless $lin =~ m/^(\#.*)?$/; } # Prepare the next line $lin = q{}; } $ifh->close; # Indicate the completion status $slf->add_error(get_string($SYNTAX, $pth, join(', ', @tbl))) if @tbl; return; } =head1 STATIC METHODS =head2 S This exported method adds a translated string to module string table.. It requires the definition of a C<$STRINGS> global public variable in each module where used. It initializes the variable on first invocation. =cut sub add_string { my ($key, $str) = @_; my ($cls, $tbl); # Reject missing identifier or missing string return unless defined($key) && defined($str); # Localize special variables local $!; ## no critic (Local) local $@; ## no critic (Local) # Load the module strings on first usage $cls = caller; $tbl = eval qq{\\\$$cls\::STRINGS}; ## no critic (Eval) return "RDA-00405: Cannot add the string for \"$key\"\n" if $@; extract_strings($tbl, $cls) unless ref($$tbl); # Add the string return $$tbl->{$key} = $str; } =head2 S<$RDA::Text::debug($str...)> This method adds a debug line into the trace file. =cut sub debug { my (@arg) = @_; _write(join(q{}, grep {defined($_) && !ref($_)} @arg).qq{\n}); return; } sub _write { my ($buf) = @_; my ($lgt, $max, $off); $max = length($buf); $off = 0; while ($max && ($lgt = syswrite($TRACE, $buf, $max, $off))) { $max -= $lgt; $off += $lgt; } return; } =head2 S<$RDA::Text::debug_object($obj,$lvl[,$str,...])> This method adds an object dump as a debug line into the specified file. =cut sub debug_object { my ($obj, $lvl, @arg) = @_; _write($obj->dump($lvl, join(q{}, grep {defined($_) && !ref($_)} @arg), 1).qq{\n}); return; } =head2 S This method returns a hash reference with all messages associated to the specified package. It considers the calling package by default. It automatically loads the messages. =cut sub extract_strings { my ($tbl, $pkg, @dir) = @_; my ($cod, $max, $min, $mod, $obj, $slf); # Get the control object $slf = __PACKAGE__->new; # Load the file on the first usage ($cod, $mod) = split(/::/, $pkg || caller, 2); $mod = $cod unless defined($mod); $obj = exists($slf->{'_sub'}->{$cod}) ? $slf->{'_sub'}->{$cod} : load_strings($slf, $cod, $slf->{'flg'}, @dir); # Extract the strings ($min, $max) = exists($obj->{'_mod'}->{$mod}) ? @{$obj->{'_mod'}->{$mod}} : (0, 99999); return $$tbl = {%{exists($obj->{'txt'}->{$mod}) ? $obj->{'txt'}->{$mod} : {}}, map {$obj->{'abr'}->{$_} => $obj->{'fmt'}->{$_}} grep {$_ >= $min && $_ <= $max} keys(%{$obj->{'abr'}})}; } =head2 S This exported method retrieves a translated string. It requires the definition of a C<$STRINGS> global public variable in each module where used. It initializes the variable on first invocation. =cut sub get_string { my ($key, @arg) = @_; my ($cls, $dft, $tbl); # Reject missing identifier return unless defined($key); # Localize special variables local $!; ## no critic (Local) local $@; ## no critic (Local) # Load the module strings on first usage $cls = caller; $tbl = eval qq{\\\$$cls\::STRINGS}; ## no critic (Eval) return "RDA-00400: Cannot get the string for \"$key\"\n" if $@; extract_strings($tbl, $cls) unless ref($$tbl); # Treat a custom default if (ref($key) eq 'ARRAY') { ($key, $dft) = @{$key}; return sprintf((exists($$tbl->{$key}) && defined($$tbl->{$key})) ? $$tbl->{$key} : $dft, @arg); } # Eliminate strings with wide characters foreach my $arg (@arg) { ($arg) = $arg =~ m/^([\001-\377]*)$/; } # Format the string local $^W = 0; return (!exists($$tbl->{$key})) ? "RDA-00401: Missing description for \"$key\" in \"$cls\"\n" : defined($$tbl->{$key}) ? sprintf($$tbl->{$key}, grep {defined($_)} @arg) : "RDA-00402: Missing text for \"$key\" in \"$cls\"\n"; } =head2 S This method normalizes a character set name. =cut sub norm_charset { my ($nam) = @_; $nam = lc($nam); $nam =~ s/[_\W]//g; $nam =~ s/^iso//; return $nam; } =head2 S This method wraps a string. The string is wrapped according to the screen width. The prefix is added on each screen line. Non-space characters in the prefix are replaced by spaces on continuous lines. The last argument indicates the number of line feeds to add at the end of the string (1 by default). It supports the C<\nnn> and C<\Oxnn> character encoding in both prefix and text. =cut sub wrap_string { my ($pre, $txt, $nxt) = @_; my ($buf, $cnt, $col, $lgt, $slf, $str, @lin); # Get the control object $slf = __PACKAGE__->new; # Wrap the string $buf = q{}; $pre =~ s/\001//; $pre =~ s/\\([0-3][0-7]{2}|0x[0-9A-Fa-f]{2})/chr(oct($1))/eg; $col = $slf->{'_col'} - length($pre); $cnt = (@lin = split(/\n|\\012/, $txt)); $nxt = 1 unless defined($nxt); foreach my $lin (@lin) { --$cnt; $str = q{}; $lgt = $col; $lin =~ s/[\r\s]+$//; foreach my $wrd (split(/\s+/, $lin)) { $wrd =~ s/\\([0-3][0-7]{2}|0x[0-9A-Fa-f]{2})/chr(oct($1))/eg; $lgt += length($wrd) + 1; if ($lgt > $col) { if (length($str)) { $buf .= $pre.$str.qq{\n}; $pre =~ s/\S/ /g; } $lgt = length($wrd); $str = $wrd; } else { $str .= q{ }; $str .= $wrd; } } if (length($str)) { $str .= qq{\n} if $cnt || $nxt > 0; $buf .= $pre.$str; $pre =~ s/\S/ /g; } elsif ($cnt || $nxt > 0) { $buf .= qq{\n}; } } $buf .= qq{\n} x $nxt if --$nxt > 0; return $buf; } # --- Conversion routines ----------------------------------------------------- # Decode a value sub _decode { my ($val) = @_; $val =~ s/\\012/\n/g; $val =~ s/\\([0-3][0-7]{2}|0x[0-9A-Fa-f]{2})/chr(oct($1))/eg; return $val; } # Convert UTF-8 characters sub _utf_cnv { my ($str, $tbl) = @_; $str =~ s{([\360-\367]...|[\340-\357]..|[\300-\377].)} {exists($tbl->{$1}) ? $tbl->{$1} : q{?}}eg; return $str; } sub _no_cnv { return shift; } # --- File handle methods ----------------------------------------------------- sub close ## no critic (Ambiguous,Builtin) { my ($slf) = @_; *$slf->{'ifh'}->close if defined(*$slf->{'ifh'}); delete *$slf->{'cnv'}; delete *$slf->{'ifh'}; delete *$slf->{'tbl'}; undef *$slf; return 1; } sub getline { my ($slf) = shift; my ($lin); return defined($lin = *$slf->{'ifh'}->getline) ? &{*$slf->{'cnv'}}($lin, *$slf->{'tbl'}) : undef; } sub getlines { my ($slf) = @_; my ($lin, @tbl); die get_string('BAD_GETLINES') unless wantarray; push(@tbl, $lin) while defined($lin = $slf->getline); return @tbl; } sub open ## no critic (Builtin) { my ($slf, $pth) = @_; # Create the object when not yet done return $slf->new($pth) unless ref($slf); # Open the file if ($pth) { my $ifh = IO::File->new; $ifh->open("<$pth"); *$slf->{'ifh'} = $ifh; } # Return the object reference return $slf; } my $und = sub { return }; *BINMODE = $und; *CLOSE = \&close; sub DESTROY { } *EOF = $und; *FILENO = $und; *GETC = $und; *OPEN = $und; *PRINT = $und; *PRINTF = $und; *READ = $und; sub READLINE { goto &getlines if wantarray; goto &getline; } *SEEK = $und; *TELL = $und; sub TIEHANDLE { my $slf = shift; unless (ref($slf)) { $slf = bless Symbol::gensym(), $slf; $slf->open(@_); } return $slf; } *WRITE = $und; 1; __END__ =head1 SEE ALSO L, L, L, L =head1 COPYRIGHT NOTICE Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. =head1 TRADEMARK NOTICE Oracle and Java are registered trademarks of Oracle and/or its affiliates. Other names may be trademarks of their respective owners. =cut