# Token.pm: Class Used to Manage Lexical Units

package RDA::Object::Token;

# $Id: Token.pm,v 1.6 2015/04/29 13:55:40 RDA Exp $
# ARCS: $Header: /home/cvs/cvs/RDA_8/src/scripting/lib/RDA/Object/Token.pm,v 1.6 2015/04/29 13:55:40 RDA Exp $
#
# Change History
# 20150211  MSC  Fix symbols.

=head1 NAME

RDA::Object::Token - Class Used to Manage Lexical Units

=head1 SYNOPSIS

 use RDA::Object::Lex;

 @tok = qw(ADDOP   [-+]
           INTEGER [1-9]\d*);

 $lex = RDA::Object::Lex->new(@tok)->from(\*DATA);

 $ADDOP   = $lex->get_definition('ADDOP');
 $INTEGER = $lex->get_definition('INTEGER');

 $txt = $INTEGER->get_next;
 print "$txt\n" if $INTEGER->status;

 $txt = $ADDOP->get_next;
 print "$txt\n" if $ADDOP->status;

 print "$txt\n" if $INTEGER->isnext(\$txt);

 __END__
 1+2

=head1 DESCRIPTION

The C<RDA::Object::Token> package defines the lexical units used by lexical
analyzers. The C<new> and C<init> methods of the
L<RDA::Object::Lex|RDA::Object::Lex> package indirectly creates a
C<RDA::Object::Token> instance for each recognized lexical unit. It is a sub
class of L<RDA::Object|RDA::Object>.

=head1 SUBCLASSES

Subclasses of the C<RDA::Object::Token> class are defined. They allow to
recognize specific structures. Following subclasses are available:

=over 2

=item *

C<RDA::Token::Simple> : for defining 'ordinary' lexical units;

=item *

C<RDA::Token::Complex> : for defining lexical units that may require reading
additional data;

=item *

C<RDA::Token::Action> : for action-related lexical units;

=item *

C<RDA::Token::Quoted> : for recognizing, for example, strings within double
quotation marks.

=back

=head1 ERROR HANDLING

To handle the cases of nonrecognition of lexical units, you can define a special
C<Token> object at the end of the list of tokens that defines the lexical
analyzer. When the search for this token succeeds, it is then possible to call
an error handling routine.

=head1 METHODS

The following methods are available:

=cut

use strict;

BEGIN
{ use Exporter;
  use RDA::Text qw(get_string);
  use RDA::Object;
  use RDA::Token::Action;
  use RDA::Token::Complex;
  use RDA::Token::Quoted;
  use RDA::Token::Simple;
}

# Define the global public variables
use vars qw($AUTOLOAD $EOI $STRINGS $VERSION @DUMP @ISA %SDCL);
$EOI     = __PACKAGE__->new('EOI');
$VERSION = sprintf('%d.%02d', q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/);
@DUMP    = (
  hsh => {'RDA::Object::Token'  => 1,
          'RDA::Token::Action'  => 1,
          'RDA::Token::Complex' => 1,
          'RDA::Token::Quoted'  => 1,
          'RDA::Token::Simple'  => 1,
         },
  );
@ISA     = qw(RDA::Object Exporter);
%SDCL    = (
  dep => [qw(RDA::Token::Action
             RDA::Token::Complex
             RDA::Token::Quoted
             RDA::Token::Simple)],
  flg => 1,
  inc => [qw(RDA::Object)],
  met => {
    'get_attr'       => {ret => 0},
    'get_condition'  => {ret => 0},
    'get_expression' => {ret => 0},
    'get_next'       => {ret => 0},
    'get_re'         => {ret => 0},
    'get_status'     => {ret => 0},
    'get_text'       => {ret => 0},
    'set_attr'       => {ret => 0},
    'set_expression' => {ret => 0},
    'set_status'     => {ret => 0},
    'set_text'       => {ret => 0},
    },
  new => 1,
  );

# Define the global private constants

# Define the global private variables

# Report the package version
sub Version
{ return $VERSION;
}

=head2 S<class-E<gt>new($name,$pattern,$sub)>

The object constructor. This method takes a symbolic name, a regular
expression, and an anonymous subroutine as arguments.

The regular expression is either a simple regular expression, or a reference to
an array containing from one to three regular expressions. In the latter case,
the lexical unit can span several lines. For example, it can be a character
string delimited by quotation marks, comments in a C program, and so on. The
regular expressions are used to recognize:

=over 3

=item 1.

The beginning of the lexical unit,

=item 2.

The body of the lexical unit; when this second expression is missing, it uses
C<(?:.*?)>,

=item 3.

The end of the lexical unit; when this last expression is missing, the first
one is used. The end of the lexical unit cannot span several lines.

=back

For example,

 qw(STRING), [qw(" (?:[^"\\\\]+|\\\\(?:.|\n))* ")],

These regular expressions can recognize multi-line strings delimited by
quotation marks, where the backslash is used to quote the quotation marks
appearing within the string. Note the quadrupling of the backslashes.

Here is a variation of the previous example which uses the C<s> option to
include the newline in the characters recognized by "C<.>":

 qw(STRING), [qw(" (?s:[^"\\\\]+|\\\\.)* ")],

The anonymous subroutine is called when the lexical unit is recognized by the
lexical analyzer. This subroutine takes two arguments: a reference to the token
object, and the string recognized by the regular expression. The scalar
returned by the anonymous subroutine defines the character string memorized in
the token object.

C<RDA::Object::Token> is represented by a blessed hash reference. The following
special keys are used:

=over 12

=item S<    B<'lvl' > > Trace level

=item S<    B<'oid' > > Symbolic name

=item S<    B<'_act'> > Associated sub

=item S<    B<'_cnd'> > Associated conditions

=item S<    B<'_det'> > Token decoration

=item S<    B<'_exp'> > For an action token

=item S<    B<'_lex'> > Lexical analyzer instance

=item S<    B<'_pat'> > Regular expression (can be an array reference)

=item S<    B<'_sta'> > Object status

=item S<    B<'_tpl'> > Associated template

=item S<    B<'_txt'> > Recognized text

=back

Internal keys are prefixed by an underscore.

=cut

sub new
{ my ($cls, @arg) = @_;
  my ($cnd, $oid);

  ($cnd, $oid) = _parse_name($arg[0]);
  return bless {
    lvl  => 0,       # Trace level
    oid  => $oid,    # Symbolic name
    _act => $arg[2], # Associated sub
    _cnd => $cnd,    # Associated conditions
    _det => {},      # Token decoration
    _exp => $arg[4], # For an action token
    _lex => $arg[3], # Lexical analyzer instance
    _pat => $arg[1], # Regular expression (can be an array reference)
    _sta => 0,       # Object status
    _tpl => {},      # Associated template
    _txt => q{},     # Recognized text
    }, ref($cls) || $cls;
}

sub _parse_name
{ my ($nam) = @_;

  return ($1, $2) if $nam =~ /^(.+:)(.+)/;  # Ex. A:B:C:SYMBOL, A,C:SYMBOL
  return (q{}, $nam);
}

#
sub AUTOLOAD  ## no critic (Autoload)
{ my ($slf, $val) = @_;
  my ($nam);

  return unless ref($slf);
  $nam = $AUTOLOAD;
  $nam =~ s/.*://;
  return defined($val)
    ? $slf->{'_det'}->{$nam} = $val
    : $slf->{'_det'}->{$nam};
}

# Destroy the object
sub DESTROY
{
}

=head2 RDA::Object::Token->factory($definition...)

This method creates a list of token objects from a list of token
specifications. The list can include objects of the C<RDA::Object::Token> class
or of a class derived from it.

=cut

sub factory
{ my ($slf, @arg) = @_;
  my ($cls, $pat, $nam, $sub, @tok);

  die get_string('NO_DEFINITION') unless @arg;
  while (@arg)
  { if (ref($nam = shift(@arg)) && $nam->isa(__PACKAGE__))
    { # Add the token to the list
      push(@tok, $nam);
    }
    else
    { # Parse the specification and add it to the list
      $cls = (ref($pat = shift(@arg)) eq 'ARRAY')
        ? 'RDA::Token::Complex'
        : 'RDA::Token::Simple';
      $sub = (@_ && ref($arg[0]) eq 'CODE')
        ? shift(@arg)
        : undef;
      push(@tok, $cls->new($nam, $pat, $sub));
    }
  }
  push(@tok, 'RDA::Token::Simple'->new('ALL:DEFAULT', q{.*}));
  return @tok;
}

=head2 S<$h-E<gt>get_action>

This method returns the anonymous subroutine defined within the object.

=cut

sub get_action
{ return shift->{'_act'};
}

=head2 S<$h-E<gt>get_attr($name)>

This method returns the value of the specified attribute.

=cut

sub get_attr
{ my ($slf, $nam) = @_;

  return $slf->{'_det'}->{$nam};
}


=head2 S<$h-E<gt>get_condition>

This method returns the associated condition.

=cut

sub get_condition
{ return shift->{'_cnd'};
}

=head2 S<$h-E<gt>get_expression>

This method returns the associated expression.

=cut

sub get_expression
{ return shift->{'_exp'};
}

=head2 S<$h-E<gt>get_next>

This method activates searching for the lexical unit defined by the regular
expression contained in the object. When this lexical unit is recognized on
the character stream to analyze, it returns the string found and sets the
status of the object to true.

=cut

sub get_next
{ my ($slf) = @_;
  my ($lex, $nxt, $txt);

  $lex = $slf->{'_lex'};
  $nxt = $lex->get_token;
  if ($nxt == $EOI)
  { $slf->{'_sta'} = ($slf == $EOI) ? 1 : 0;
    return;
  }
  $lex->get_next unless $nxt;
  if ($slf == $lex->get_token)
  { $lex->set_token(0);     # now no pending token
    $txt = $slf->{'_txt'};
    $slf->{'_txt'} = q{};
    $slf->{'_sta'} = 1;
    return $txt;
  }
  $slf->{'_sta'} = 0;
  return;
}

=head2 S<$h-E<gt>get_re>

This method returns the regular expression associated to the object.

=cut

sub get_re
{ return shift->{'_pat'};
}

=head2 S<$h-E<gt>get_status(status)>

The method indicates whether the last search of the lexical unit succeeded or
failed.

=cut

sub get_status
{ return shift->{'_sta'};
}

=head2 S<$h-E<gt>get_text>

This method returns the character string that was recognized using this object.

=cut

sub get_text
{ return shift->{'_txt'};
}

=head2 S<$h-E<gt>set_attr($nam,$val)>

This method sets an attribute value.

=cut

sub set_attr
{ my ($slf, $nam, $val) = @_;

  return $slf->{'_det'}->{$nam} = $val;
}

=head2 S<$h-E<gt>set_expression>

This method sets an associate expression.

=cut

sub set_expression
{ my ($slf, $val) = @_;

  return $slf->{'_exp'} = $val;
}

=head2 S<$h-E<gt>set_status($val)>

This method overrides the existing value and sets it to the specified value.

=cut

sub set_status
{ my ($slf, $val) = @_;

  return $slf->{'_sta'} = $val
}

=head2 S<$h-E<gt>set_text($string)>

This method associates the specified character string to the lexical unit.

=cut

sub set_text
{ my ($slf, $val) = @_;

  return $slf->{'_txt'} = $val;
}

# --- Internal routines -------------------------------------------------------

# Return the associated lexical analyzer
sub get_lexer
{ return shift->{'_lex'};
}

# Return a reference to the template object
sub get_template
{ return shift->{'_tpl'};
}

# Define the associated lexical analyzer
sub set_lexer
{ my ($slf, $lex) = @_;

  $slf->{'_lex'} = $lex;
  $slf->{'lvl'} = $lex->set_trace;
  return $slf;
}

# Associate a template to the lexical unit
sub set_template
{ my ($slf, $val) = @_;

  $slf->{'_tpl'} = $val;
  return $slf;
}

1;

__END__

=head1 SEE ALSO

L<RDA::Agent|RDA::Agent>,
L<RDA::Driver::Template|RDA::Driver::Template>,
L<RDA::Object|RDA::Object>,
L<RDA::Object::Lex|RDA::Object::Lex>,
L<RDA::Text|RDA::Text>,
L<RDA::Token::Action|RDA::Token::Action>,
L<RDA::Token::Complex|RDA::Token::Complex>,
L<RDA::Token::Quoted|RDA::Token::Quoted>,
L<RDA::Token::Simple|RDA::Token::Simple>

=head1 COPYRIGHT NOTICE

Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.

=head1 TRADEMARK NOTICE

Oracle and Java are registered trademarks of Oracle and/or its
affiliates. Other names may be trademarks of their respective owners.

=cut