# Html.pm: Class Used for HTML Macros package RDA::Library::Html; # $Id: Html.pm,v 1.9 2014/11/07 18:06:49 RDA Exp $ # ARCS: $Header: /home/cvs/cvs/RDA_8/src/scripting/lib/RDA/Library/Html.pm,v 1.9 2014/11/07 18:06:49 RDA Exp $ # # Change History # 20141107 MSC Add the refresh method. =head1 NAME RDA::Library::Html - Class Used for HTML Macros =head1 SYNOPSIS require RDA::Library::Html; =head1 DESCRIPTION The objects of the C class are used to interface with HTML macros. The following methods are available: =cut use strict; BEGIN { use Exporter; use RDA::Text qw(get_string); use RDA::Driver::Library; use RDA::Object; use RDA::Object::Html; use RDA::Value::List; use RDA::Value::Scalar qw(:value); } # Define the global public variables use vars qw($STRINGS $SUSPEND $VERSION @ISA); $SUSPEND = { trc => sub {return $_[0]->{'_col'}->get_trace('HTML')}, }; $VERSION = sprintf('%d.%02d', q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/); @ISA = qw(RDA::Driver::Library Exporter); # Define the global private constants my $HTML = 'RDA::Object::Html'; # Define the global private variables my %tb_fct = ( 'htmlAttributes' => [\&_m_attrs, 'L'], 'htmlContent' => [\&_m_content, 'L'], 'htmlDisable' => [\&_m_disable, 'O'], 'htmlExists' => [\&_m_exists, 'N'], 'htmlFilter' => [\&_m_filter, 'O'], 'htmlFind' => [\&_m_find, 'L'], 'htmlFix' => [\&_m_fix, 'N'], 'htmlLoadFile' => [\&_m_load_file, 'O'], 'htmlLoadResponse' => [\&_m_load_rsp, 'O'], 'htmlName' => [\&_m_name, 'T'], 'htmlParser' => [\&_m_parser, 'O'], 'htmlError' => [\&_m_error, 'N'], 'htmlTable' => [\&_m_table, 'L'], 'htmlText' => [\&_m_text, 'T'], 'htmlType' => [\&_m_type, 'T'], 'htmlValue' => [\&_m_value, 'T'], 'setHtmlTrace' => [\&_m_trace, 'N'], ); # Report the package version sub Version { return $VERSION; } =head2 S<$h = RDA::Library::Html-Enew($driver,$collector)> The object constructor. It takes the library driver and collector references as arguments. C is represented by a blessed hash reference. The following special keys are used: =over 12 =item S< B<'trc' > > HTML trace flag =item S< B<'_col'> > Reference to the collector object =back Internal keys are prefixed by an underscore. =cut sub new { my ($cls, $drv, $col) = @_; my ($slf); # Create the macro object $slf = bless {}, ref($cls) || $cls; # Register the macros $drv->register($slf, [keys(%tb_fct)], qw(refresh suspend)); # Return the object reference return refresh($slf, $col); } =head2 S<$h-Ecall($name,...)> This method executes the macro code. =cut sub call { my ($slf, $nam, @arg) = @_; return &{$tb_fct{$nam}->[0]}($slf, @arg); } =head2 S<$h-Edelete_object> This method deletes the library control object. =cut sub delete_object { RDA::Object::dump_caller($_[0], 'Library') if $RDA::Object::DELETE; undef %{$_[0]}; undef $_[0]; return; } =head2 S<$h-Erefresh($col)> This method updates the library control object for a new collector. =cut sub refresh { my ($slf, $col) = @_; $slf->{'trc'} = $col->get_trace('HTML'); $slf->{'_col'} = $col; return $slf; } =head2 S<$h-Erun($name,$arg,$ctx)> This method executes the macro with the specified argument list in a given context. =cut sub run { my ($slf, $nam, $arg, $ctx) = @_; my ($fct, $ret, $typ); $fct = $tb_fct{$nam}; $typ = $fct->[1]; # Treat an array context return RDA::Value::List::new_from_data(&{$fct->[0]}($slf, $ctx, $arg->eval_as_array)) if $typ eq 'L'; # Treat a scalar context return defined($ret = &{$fct->[0]}($slf, $ctx, $arg->eval_as_array)) ? RDA::Value::Scalar->new($typ, $ret) : $VAL_UNDEF; } =head1 HTML MACROS =head2 S This macro returns the list of node attributes. =cut sub _m_attrs { my ($slf, $ctx, $htm) = @_; my @tbl; return () unless ref($htm) eq $HTML; return $htm->get_attr; } =head2 S This macro returns the list of child nodes after resolving the conditions. The second argument specifies the list of child types to consider. The third argument specifies a regular expression to identify objects that must be replaced by their content. By default, it returns all child nodes. =cut sub _m_content { my ($slf, $ctx, $htm, $flt, $cln) = @_; my @tbl; return () unless ref($htm) eq $HTML; return $htm->get_content($flt, $cln); } =head2 S This macro returns the number of parsing errors. =cut sub _m_error { my ($slf, $ctx, $htm) = @_; return (ref($htm) eq $HTML) ? $htm->get_error : 0; } =head2 S This macro indicates whether the attribute exists in the specified node. =cut sub _m_exists { my ($slf, $ctx, $htm, $key, $dft) = @_; return ref($htm) eq $HTML && $htm->exists($key); } =head2 S This macro performs the query on the HTML object. It returns the result as an object list. =cut sub _m_find { my ($slf, $ctx, $htm, $qry) = @_; return () unless ref($htm) eq $HTML; return $htm->find($qry); } =head2 S This macro parses a HTML file and returns the resulting HTML object. You can specify a parser as an argument to control what information is extracted. =cut sub _m_load_file { my ($slf, $ctx, $fil, $htm) = @_; $htm = RDA::Object::Html->new($slf->{'trc'}) unless ref($htm) eq $HTML; return $htm->parse_file($fil); } =head2 S This macro parses the HTTP response content and returns the resulting HTML object. You can specify a parser as an argument to control what information is extracted. =cut sub _m_load_rsp { my ($slf, $ctx, $rsp, $htm) = @_; $htm = RDA::Object::Html->new($slf->{'trc'}) unless ref($htm) eq $HTML; if (ref($rsp) eq 'RDA::Object::Response') { foreach my $lin (@{$rsp->get_content}) { $htm->parse($lin); } $htm->eof; } return $htm; } =head2 S This macro returns the node name when defined. Otherwise, it returns an undefined value. =cut sub _m_name { my ($slf, $ctx, $htm) = @_; return (ref($htm) eq $HTML) ? $htm->get_name : undef; } =head2 S This macro extracts all significant tables from the parsed document. Cells in bold or containing C
or C in their style name are taken as headings. It converts single cell rows in the header of the specified level. It considers horizontal rulers and header lines also. The macro returns the result as a list of raw data lines. =cut sub _m_table { my ($slf, $ctx, $htm, $lvl, $fmt) = @_; return () unless ref($htm) eq $HTML; return $htm->get_tables($lvl, $fmt); } =head2 S This macro extracts the texts contained in the specified node. It returns an empty string when it finds no text. =cut sub _m_text { my ($slf, $ctx, $htm) = @_; return (ref($htm) eq $HTML) ? $htm->get_text : q{}; } =head2 S This macro returns the node type. =cut sub _m_type { my ($slf, $ctx, $htm) = @_; return (ref($htm) eq $HTML) ? $htm->get_type : undef; } =head2 S This macro returns the value of the attribute in the specified node. When the attribute is not defined, it returns the default value. =cut sub _m_value { my ($slf, $ctx, $htm, $key, $dft) = @_; return (ref($htm) eq $HTML) ? $htm->get_value($key, $dft) : $dft; } =head1 PARSER MACROS =head2 S This macro indicates the list of child types to ignore. When the list is empty, any type filtering is disabled. It returns the parser object reference. =cut sub _m_disable { my ($slf, $ctx, $htm, $flt) = @_; return (ref($htm) eq $HTML) ? $htm->disable($flt) : undef; } =head2 S This macro specifies the list of the tags to consider when parsing the document. When the list is empty, any tag filtering is disabled. It returns the parser object reference. =cut sub _m_filter { my $slf = shift; my $ctx = shift; my $htm = shift; return (ref($htm) eq $HTML) ? $htm->filter(@_) : undef; } =head2 S This macro indicates that the parser can fix incorrect HTML code. It returns the parser object reference. =cut sub _m_fix { my ($slf, $ctx, $htm, $flg) = @_; return (ref($htm) eq $HTML) ? $htm->fix($flg) : undef; } =head2 S This macro initializes a new HTML parser and returns its reference. =cut sub _m_parser { return RDA::Object::Html->new(shift->{'trc'}); } =head2 S This macro sets the HTML parsing level: =over 7 =item B< 0 > No trace =item B< 1 > Trace the HTML parsing =back The level is unchanged if the new level is not defined. It returns the previous level. =cut sub _m_trace { my ($slf, $ctx, $lvl) = @_; my $old; $old = $slf->{'trc'}; $slf->{'trc'} = $lvl if defined($lvl); return $old; } 1; __END__ =head1 SEE ALSO L, L, L, L, L, L, L, L =head1 COPYRIGHT NOTICE Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. =head1 TRADEMARK NOTICE Oracle and Java are registered trademarks of Oracle and/or its affiliates. Other names may be trademarks of their respective owners. =cut