# Index.pm: Class Used for Managing the Collected Elements package RDA::Object::Index; # $Id: Index.pm,v 1.42 2015/07/23 23:32:50 RDA Exp $ # ARCS: $Header: /home/cvs/cvs/RDA_8/src/scripting/lib/RDA/Object/Index.pm,v 1.42 2015/07/23 23:32:50 RDA Exp $ # # Change History # 20150723 MSC Add the get_id method. =head1 NAME RDA::Object::Index - Class Used for Managing the Collected Elements =head1 SYNOPSIS require RDA::Object::Index; =head1 DESCRIPTION The objects of the C class are used for managing the collected elements. It is a subclass of L. The following methods are available: =cut use strict; BEGIN { use Exporter; use IO::File; use RDA::Text qw(get_string); use RDA::Driver::Archive; use RDA::Handle::Block; use RDA::Object qw(encode); use RDA::Object::Collect qw(%SUB_DIRS); use RDA::Object::Content; use RDA::Object::Rda qw($CREATE $DIR_PERMS $FIL_PERMS); use RDA::Object::View; } # Define the global public variables use vars qw($STRINGS $VERSION @ISA %SDCL); $VERSION = sprintf('%d.%02d', q$Revision: 1.42 $ =~ /(\d+)\.(\d+)/); @ISA = qw(RDA::Object Exporter); %SDCL = ( beg => \&_begin_index, inc => [qw(RDA::Object)], met => { 'extract' => {ret => 0}, 'find' => {ret => 1}, 'get_id' => {ret => 0}, 'get_prefix' => {ret => 0}, 'get_prefixes' => {ret => 1}, 'get_view' => {ret => 0}, 'grep' => {ret => 1}, 'ls' => {ret => 1}, 'predict' => {ret => 1}, 'restrict' => {ret => 0}, }, top => 'IDX', ); # Define the global private constants # Define the global private variables my %tb_dsc = ( Cygwin => {cls => 'RDA::Local::Windows', cnv => \&_w_convert, fnd => \&_w_find, itm => \&_w_item, lnk => \&_w_link, nat => \&_w_native, nrm => \&_w_norm, }, Unix => {cls => 'RDA::Local::Unix', cnv => \&_u_convert, fnd => \&_u_find, itm => \&_u_item, lnk => \&_u_link, nat => \&_u_native, nrm => \&_u_norm, }, Vms => {cls => 'RDA::Local::Vms', cnv => \&_v_convert, fnd => \&_v_find, itm => \&_v_item, lnk => \&_v_link, nat => \&_v_native, nrm => \&_v_norm, }, Windows => {cls => 'RDA::Local::Windows', cnv => \&_w_convert, fnd => \&_w_find, itm => \&_w_item, lnk => \&_w_link, nat => \&_w_native, nrm => \&_w_norm, }, ); my %tb_und = map {$_ => 1} qw(dsp); # Report the package version sub Version { return $VERSION; } =head2 S<$h = RDA::Object::Index-Enew($collector,name =E $value,...)> The object constructor. This method enables you to specify the collector object reference and initial attributes as arguments. C is represented by a blessed hash reference. The following special keys are used: =over 12 =item S< B<'all' > > Unless set, ignores incomplete files =item S< B<'ctl'> > Reference to the report archive control object =item S< B<'dsp' > > Reference to the display control object =item S< B<'err' > > When set, raises errors =item S< B<'sta' > > Last status code =item S< B<'_agt'> > Reference to the agent object =item S< B<'_cfg'> > Reference to the RDA software configuration =item S< B<'_dsc'> > Platform-specific descriptor =item S< B<'_ext'> > Extraction hash =item S< B<'_fam'> > Index operating system family =item S< B<'_idx'> > File index =item S< B<'_pre'> > Current archive file prefix =back Internal keys are prefixed by an underscore. =cut sub new { my ($cls, $agt, @arg) = @_; my ($cfg, $key, $slf, $val); # Create the index object $cfg = $agt->get_config; $slf = bless { all => 0, dsp => undef, err => 0, sta => 0, _agt => $agt, _cfg => $cfg, _pre => q{}, }, ref($cls) || $cls; # Add the initial attributes while (($key, $val) = splice(@arg, 0, 2)) { $val = $key unless defined($val) || exists($tb_und{$key}); $slf->{$key} = $val; } # Add the archive control object when missing $slf->{'ctl'} = RDA::Driver::Archive->new($agt, $agt->get_collector->get_data) unless exists($slf->{'ctl'}); $slf->{'_dsc'} = $tb_dsc{$slf->{'_fam'} = $slf->{'ctl'}->get_family($cfg->get_family)}; $slf->{'_pre'} = $slf->{'ctl'}-> get_prefix; # Return the object reference return $slf; } =head2 S<$h-Eapply($req,$lvl[,@pth])> This method applies the specified request to the specified paths. When a level is defined, it controls how deep it must search for files in the specified directories. Paths can contain wild cards. C<*> matches any string, including the null string. C matches any single character. C<[...]> matches any one of the enclosed characters, which are taken literally. When a path element starts with a C<.>, this character must be matched explicitly. When no paths are specifies it applies the request on all top nodes. =cut sub apply { my ($slf, $req, $lvl, @arg) = @_; # Adjust the request if (defined($lvl)) { $req->{'dir'} = [\&_apply_dir, [$lvl]] if $lvl > 0; } else { $lvl = 0; } # Load the indexes on first use $slf->refresh unless exists($slf->{'_idx'}); # Determine what should be done in the absence of arguments unless (@arg) { if (exists($req->{'top'})) { return $slf if &{$req->{'top'}->[0]}($slf, [], $slf->{'_idx'}, $req, @{$req->{'top'}->[1]}); } elsif (exists($req->{'dft'}) || $lvl > 0) { @arg = map {length($_) ? $_ : [q{}, q{}]} keys(%{$slf->{'_idx'}->[1]}); } } # Treat all files foreach my $arg (@arg) { unless ( (ref($arg) eq 'ARRAY') ? _apply($slf, [], $slf->{'_idx'}, $req, 0, $arg) : defined($arg) ? _apply($slf, [], $slf->{'_idx'}, $req, 0, [split(/\//, &{$slf->{'_dsc'}->{'nrm'}}($arg), -1)]) : 0) { &{$req->{'err'}->[0]}($slf, $arg, @{$req->{'err'}->[1]}) if exists($req->{'err'}); } } return $slf; } sub _apply ## no critic (Complex) { my ($slf, $pth, $cur, $req, $lvl, $pat) = @_; my ($cnt, $dsc, $nam, $rec, $ref, $sub, @pat, @sub); return 0 unless ref($cur->[1]) eq 'HASH'; $cnt = 0; $dsc = $slf->{'_dsc'}; ($sub, @pat) = @{$pat}; if (@pat) { # Treat an intermediair level return _apply($slf, $pth, $cur, $req, $lvl, [@pat]) unless $lvl == 0 || length($sub); if ($sub =~ m/[\*\?\[\]]/) { @sub = &{$dsc->{'fnd'}}($cur, RDA::Object::Rda->re($sub)); } elsif (defined($nam = &{$dsc->{'cnv'}}($cur, $sub))) { @sub = $nam; } ++$lvl; $pat = [@pat]; &{$req->{'bef'}->[0]}($slf, $pth, $cur, $req, @{$req->{'bef'}->[1]}) if exists($req->{'bef'}); foreach my $itm (@sub) { $cnt += _apply($slf, [@{$pth}, $itm], $cur->[1]->{$itm}, $req, $lvl, $pat); } &{$req->{'aft'}->[0]}($slf, $pth, $cur, @{$req->{'aft'}->[1]}) if exists($req->{'aft'}); } elsif (length($sub)) { # Treat the last level if ($sub =~ m/[\*\?\[\]]/) { @sub = &{$dsc->{'fnd'}}($cur, RDA::Object::Rda->re($sub)); } elsif (defined($nam = &{$dsc->{'cnv'}}($cur, $sub))) { @sub = ($nam); } if (exists($req->{'bot'})) { $cnt += &{$req->{'bot'}->[0]}($slf, $pth, $cur, $req, @{$req->{'bot'}->[1]}, @sub); } else { foreach my $itm (@sub) { $rec = $cur->[1]->{$itm}; $ref = ref($rec->[1]); if ($ref eq 'ARRAY') { $cnt += &{$req->{'fil'}->[0]}($slf, $req->{'fil'}->[2] ? [@{$pth}, $itm] : &{$dsc->{'nat'}}($dsc->{'cls'}, @{$pth}, $itm), $rec, @{$req->{'fil'}->[1]}) if exists($req->{'fil'}); } elsif ($ref eq 'HASH') { $cnt += &{$req->{'dir'}->[0]}($slf, [@{$pth}, $itm], $rec, $req, @{$req->{'dir'}->[1]}) if exists($req->{'dir'}); } } } } else { $cnt += &{$req->{'dir'}->[0]}($slf, $pth, $cur, $req, @{$req->{'dir'}->[1]}) if exists($req->{'dir'}); } return $cnt; } sub _apply_dir { my ($slf, $pth, $cur, $req, $lvl) = @_; my ($cnt, $dsc, $rec, $ref); $cnt = 0; $dsc = $slf->{'_dsc'}; if ($lvl-- > 0) { foreach my $itm (sort keys(%{$cur->[1]})) { next if $itm =~ m/^\.{1,2}$/; $rec = $cur->[1]->{$itm}; $ref = ref($rec->[1]); if ($ref eq 'ARRAY') { $cnt += &{$req->{'fil'}->[0]}($slf, $req->{'fil'}->[2] ? [@{$pth}, $itm] : &{$dsc->{'nat'}}($dsc->{'cls'}, @{$pth}, $itm), $rec, @{$req->{'fil'}->[1]}) if exists($req->{'fil'}); } elsif ($ref eq 'HASH') { $cnt += _apply_dir($slf, [@{$pth}, $itm], $rec, $req, $lvl); } } } return $cnt; } =head2 S<$h-Edive($nam,$pre)> This method dives to an inner archive, selects the specified prefix, and adapts the archive context accordingly. =cut sub dive { my ($slf, $nam, $pre) = @_; my ($ctl); # Dive inside the archive $ctl = $slf->{'ctl'}; $ctl = $ctl->get_archive($ctl->dive($nam)); # Select the prefix $ctl->select($pre) if defined($pre); # Update the index delete($slf->{'_idx'}); $slf->{'ctl'} = $ctl = $ctl->get_current; $slf->{'_dsc'} = $tb_dsc{$slf->{'_fam'} = $ctl->get_family($slf->{'_cfg'}->get_family)}; $slf->{'_pre'} = $ctl->get_prefix; # Return the object reference return $slf; } =head2 S<$h-Eextract([$dir,@itm])> This method extracts the requested items to the specified directory, F by default. It uses the RDA work directory as base for relative destination directories. When the list of requested items is empty, all collected files are extracted. Files and directories specified as items can contain wild cards. C<*> matches any string, including the null string. C matches any single character. C<[...]> matches any one of the enclosed characters, which are taken literally. When a path element starts with a C<.>, this character must be matched explicitly. It returns the number of extracted files. =cut sub extract { my ($slf, $dst, @itm) = @_; my ($cfg, $cnt, $ctl, $ext, $frg, $ifh, $siz, @pth); # Validate the destination directory $cfg = $slf->{'_cfg'}; if (!defined($dst) || $dst !~ m/^([\000-\377]+)$/) { $dst = $cfg->get_dir('D_CWD', 'extract'); } elsif (!$cfg->is_absolute($dst = $1)) ## no critic (Capture) { $dst = $cfg->get_dir('D_CWD', $dst); } # Extract collected files $slf->{'sta'} = 4; $slf->{'_ext'} = $ext = {}; $slf->apply({dir => [\&_extract_dir, []], err => [\&_extract_err, []], fil => [\&_extract_fil, [], 1], top => [\&_extract_dir, []], }, 0, @itm); # Extract archived files $cnt = 0; $ctl = $slf->{'ctl'}; foreach my $sub (sort keys(%{$ext})) { foreach my $rpt (sort keys(%{$ext->{$sub}})) { # Find the archived report unless ($ifh = $ctl->find_report($sub, $rpt)) { $slf->{'sta'} |= 1; ## no critic (Bit) next; } # Extract requested blocks foreach my $off (sort {$a <=> $b} keys(%{$ext->{$sub}->{$rpt}})) { ($siz, $frg, @pth) = @{$ext->{$sub}->{$rpt}->{$off}}; $cnt += _extract_block($slf, $dst, RDA::Handle::Block->new($ifh, $off, $siz, $frg), @pth); } $ifh->close; } } return $cnt ; } # Extract a block sub _extract_block { my ($slf, $dst, $ifh, $sta, @pth) = @_; my ($buf, $lgt, $nam, $ofh, $pth, @sta); $slf->{'dsp'}->dsp_line(RDA::Object::Rda->cat_file(@pth).qq{\n}) if $slf->{'dsp'}; # Create the directory $nam = pop(@pth); RDA::Object::Rda->create_dir(RDA::Object::Rda->cat_dir($dst, @pth), $DIR_PERMS); # Create the file $ofh = IO::File->new; $pth = RDA::Object::Rda->is_path(RDA::Object::Rda->cat_file($dst, @pth, $nam)); $ofh->open($pth, $CREATE, $FIL_PERMS) or die get_string('ERR_EXTRACT', $pth, $!); binmode($ifh); binmode($ofh); $ofh->syswrite($buf, $lgt) while ($lgt = $ifh->sysread($buf, 8192)); $ifh->close; $ofh->close; # Update the file status information utime($sta->[8] =~ m/^(\d+)$/, $sta->[9] =~ m/^(\d+)$/, $pth) if $sta; # Update the extraction status $slf->{'sta'} &= 3; ## no critic (Bit) return 1; } # Extract a whole directory sub _extract_dir { my ($slf, $pth, $cur) = @_; my ($rec, @pth); @pth = @{$pth}; foreach my $itm (keys(%{$cur->[1]})) { $rec = $cur->[1]->{$itm}; (ref($rec->[1]) eq 'HASH') ? _extract_dir($slf, [@pth, $itm], $rec) : _extract_fil($slf, [@pth, $itm], $rec) unless $itm eq q{.} || $itm eq q{..}; } return 1; } # Report an extraction error sub _extract_err { my ($slf, $fil) = @_; $slf->{'sta'} |= 2; ## no critic (Bit) die get_string('NO_FILE', $fil) if $slf->{'err'}; return; } # Extract a file sub _extract_fil { my ($slf, $pth, $cur) = @_; my ($blk, $nam, $off, $rpt, $siz, $sub, $vol, @pth); # Create the directory when needed @pth = @{$pth}; $nam = pop(@pth); if (defined($vol = shift(@pth)) && length($vol)) { if ($vol =~ m/^([\w\$]+):$/) { unshift(@pth, 'drive', $1) } elsif ($vol =~ s{^[\\\/]+}{_}g) { $vol =~ s{[\\\/]}{_}g; unshift(@pth, 'unc', $vol) } else { $vol =~ s{[\\\/]}{_}g; unshift(@pth, $vol) } } # Add the file to the extraction list if (defined($blk = _get_block($slf, $cur))) { (undef, $off, $siz, $sub, $rpt) = split(/\//, $blk->[0], 6); $slf->{'_ext'}->{$sub}->{$rpt}->{$off} = [$siz, $blk->[1], $cur->[0], @pth, $nam]; } else { # Indicate that the file has been skipped $slf->{'sta'} |= 1; ## no critic (Bit) } return 1; } =head2 S<$h-Efind([$pattern[,$level,@dir]])> This method returns the list of all files matching the specified pattern in the specified directory structures. By default, it searches the files in the whole directory structure. When no pattern is specified, it returns all files entries from the directory. The depth of the search is limited to the specified level, or C<20> by default. =cut sub find { my ($slf, $pat, $lvl, @dir) = @_; my ($cur, @hit, @pth); # Check the argument $lvl = 20 unless defined($lvl); $pat = q{.} unless defined($pat); # Load the indexes on first use $slf->refresh unless exists($slf->{'_idx'}); # Search files in the directory structure $slf->{'sta'} = 4; if (@dir) { foreach my $dir (@dir) { ($cur, @pth) = _get_node($slf, $dir); if (ref($cur->[1]) eq 'HASH') { push(@hit, _find($slf, $cur, $pat, $lvl, @pth)); } else { $slf->{'sta'} |= 2; ## no critic (Bit) die get_string('NO_DIR', $dir) if $slf->{'err'}; } } } else { @hit = _find($slf, $slf->{'_idx'}, $pat, $lvl); } $slf->{'sta'} &= 3 if @hit; ## no critic (Bit) # Return the hit list return @hit; } sub _find { my ($slf, $cur, $pat, $lvl, @pth) = @_; my ($cls, $fct, $rec, @tbl); $cls = $slf->{'_dsc'}->{'cls'}; $fct = $slf->{'_dsc'}->{'nat'}; foreach my $itm (keys(%{$cur->[1]})) { next if $itm eq q{.} || $itm eq q{..}; $rec = $cur->[1]->{$itm}; if (ref($rec->[1]) eq 'HASH') { push(@tbl, _find($slf, $rec, $pat, $lvl - 1, @pth, $itm)) if $lvl > 0; } elsif ($itm =~ $pat) { push(@tbl, &{$fct}($cls, @pth, $itm)) if defined($rec->[2]) || ($slf->{'all'} && @{$rec->[1]}); } } return @tbl; } =head2 S<$h-Eget_dir($path)> This method returns the corresponding directory record, or an undefined value when the directory does not exist. =cut sub get_dir { my ($slf, $pth) = @_; my ($cur); # Load the indexes on first use $slf->refresh unless exists($slf->{'_idx'}); # Return the directory hash reference $cur = _get_node($slf, $pth); return (!defined($cur)) ? undef : (ref($cur->[1]) eq 'HASH') ? $cur : undef; } =head2 S<$h-Eget_driver> This method returns a reference to the archive driver. =cut sub get_driver { return shift->{'ctl'}; } =head2 S<$h-Eget_file($path)> This method returns a block handle to the specified file. It returns an undefined value when it does not find a valid block. =cut sub get_file { my ($slf, $pth) = @_; my ($blk, $cur, $ifh, $off, $rpt, $siz, $sub); # Load the indexes on first use $slf->refresh unless exists($slf->{'_idx'}); # Find the best block definition return unless defined($cur = _get_node($slf, $pth)) && defined($blk = _get_block($slf, $cur)); # Return the block handle (undef, $off, $siz, $sub, $rpt) = split(/\//, $blk->[0], 6); return defined($ifh = $slf->{'ctl'}->find_report($sub, $rpt)) ? RDA::Handle::Block->new($ifh, $off, $siz, $blk->[1]) : undef; } =head2 S<$h-Eget_handle($node)> This method returns a block handle to the specified node. It returns an undefined value when it does not find a valid block. =cut sub get_handle { my ($slf, $nod) = @_; my ($blk, $ifh, $off, $rpt, $siz, $sub); # Find the best block definition return unless defined($blk = _get_block($slf, $nod)); # Return the block handle (undef, $off, $siz, $sub, $rpt) = split(/\//, $blk->[0], 6); return defined($ifh = $slf->{'ctl'}->find_report($sub, $rpt)) ? RDA::Handle::Block->new($ifh, $off, $siz, $blk->[1]) : undef; } =head2 S<$h-Eget_id> This method returns the archive driver identifier. =cut sub get_id { return shift->{'ctl'}->get_oid; } =head2 S<$h-Eget_prefix> This method returns the file prefix. =cut sub get_prefix { return shift->{'ctl'}->get_prefix; } =head2 S<$h-Eget_prefixes> This method returns the list of other possible file prefixes. =cut sub get_prefixes { return shift->{'ctl'}->get_prefixes; } =head2 S<$h-Eget_view> This method returns the virtual view associated with the result set. =cut sub get_view { my ($slf) = @_; my ($osn); # Load the indexes on first use $slf->refresh unless exists($slf->{'_idx'}); # Create the virtual view return defined($osn = $slf->{'ctl'}->get_os) ? $slf->{'_agt'}->get_system->new('IDX', $osn, \&_test_mode, idx => $slf) : undef; } sub _test_mode { my ($key, $slf, $opt, $fil, $flg) = @_; my ($cur, $pth); if (defined($fil)) { $opt = q{} unless defined($opt); return unless $fil =~ m/^([\000-\377]*)$/; $pth = $fil; return if $opt =~ m/[bclptS]/; unless ($opt =~ m/d/) { return unless defined($cur = _get_node($slf->{'idx'}, $pth)) && defined(_get_block($slf->{'idx'}, $cur)); } $slf->{$key} = $pth unless $flg; } return $pth; } =head2 S<$h-Egrep($opt,$lvl,$pat,@files)> This method returns the file lines that match the regular expression. When a level is defined, it controls how deep it must search for files in the specified directories. Files and directories specified as arguments can contain wild cards. C<*> matches any string, including the null string. C matches any single character. C<[...]> matches any one of the enclosed characters, which are taken literally. When a path element starts with a C<.>, this character must be matched explicitly. The following options are supported: =for stopwords Bn Cn Fn =over 9 =item B< 'b' > Prefixes lines with their byte offset. =item B< 'c' > Returns the match count instead of the match list. =item B< 'e' > Encodes paths in output. =item B< 'f' > Stops file scanning on the first match. =item B< 'h' > Suppresses the prefixing of file names on output. =item B< 'i' > Ignores case distinctions in both the pattern and the line. =item B< 'j' > Joins continuation lines. =item B< 'n' > Prefixes lines with a line number. =item B< 'v' > Inverts the sense of matching to select non-matching lines. =item B< 'An'> Prints EnE lines of trailing context after matching lines. =item B< 'Bn'> Prints EnE lines of leading context before matching lines. =item B< 'Cn'> Prints EnE lines of output context. =item B< 'Fn'> Stops file scanning after EnE matching lines. =item B< 'H' > Prints the file names for each match. =item B< 'L' > Prints only the name of the files without matching lines. =back It uses a vertical bar (|) as separator between file names, line numbers, counters, and line details. =cut sub grep ## no critic (Builtin,Complex) { my ($slf, $opt, $lvl, $pat, @arg) = @_; my ($cnt, $enc, $f_b, $f_c, $f_e, $f_h, $fil, $f_n, $inc, $inv, $r_a, $r_b, $r_m, $sta, @hit); # Decode the options and the pattern $pat = (defined($pat) && $pat =~ m/^([\000-\377]+)$/) ? $1 : q{.}; $opt = q{} unless defined($opt); $pat = RDA::Object::View->is_match($pat, index($opt, 'i') < 0); $enc = index($opt, 'e') >= 0; $inc = 1 if index($opt, 'j') >= 0; $inv = index($opt, 'v') >= 0; $r_a = $r_b = 0; if (index($opt, 'l') >= 0) { $fil = {}; $sta = 1; } elsif (index($opt, 'L') >= 0) { $fil = {}; $sta = 0; } else { $f_h = $lvl ? 1 : (($cnt = @arg) != 1) ? $cnt : ($arg[0] =~ m/[\*\?\[\]]/) ? 1 : 0; $f_h = 0 if index($opt, 'h') >= 0; $f_h = 1 if index($opt, 'H') >= 0; $r_m = $1 if $opt =~ m/F(\d+)/ && $1 > 0; $r_m = 1 if index($opt, 'f') >= 0; if (index($opt, 'c') >= 0) { $f_c = 1; } else { $f_b = index($opt, 'b') >= 0; $f_n = index($opt, 'n') >= 0; $r_a = $1 if $opt =~ m/[AC](\d+)/ && $1 > 0; $r_b = $1 if $opt =~ m/[BC](\d+)/ && $1 > 0; } $f_e = $f_b ? 0 : $f_h ? 0 : $f_n ? 0 : $enc; } # Treat all files $slf->{'sta'} = 4; $slf->apply({ fil => [\&_grep_file, [$pat, $enc, $f_b, $f_c, $f_e, $f_h, $f_n, $fil, $inc, $inv, $r_a, $r_b, $r_m, \@hit]], }, $lvl, @arg); shift(@hit) if $r_a || $r_b; @hit = sort grep {$fil->{$_} == $sta} keys(%{$fil}) if $fil; $slf->{'sta'} &= 3 if @hit; ## no critic (Bit) # Return the hits return @hit; } sub _grep_file ## no critic(Complex,Args) { my ($slf, $pth, $cur, $pat, $enc, $f_b, $f_c, $f_e, $f_h, $f_n, $fil, $inc, $inv, $r_a, $r_b, $r_m, $hit) = @_; my ($aft, $bef, $chk, $cnt, $del, $max, $nam, $num, $nxt, $off, $ifh, $lin, $sep, @bef); # Get the block handle unless ($ifh = $slf->get_handle($cur)) { $slf->{'sta'} |= 2; ## no critic (Bit) die get_string('NO_FILE', $pth) if $slf->{'err'}; return 0; } # Analyze the file $cnt = $num = $off = 0; $chk = 1; ($del, $nam) = $enc ? (q{||}, encode($pth)) : (q{-}, $pth); ($aft, $bef, $max, @bef) = (0, $r_b, $r_m); $sep = 1 if $r_a || $r_b; $fil->{$nam} = 0 if $fil; $slf->{'sta'} |= 1 if $ifh->is_partial; ## no critic (Bit) $ifh->setinfo('eol',0); while (defined($lin = $ifh->getline)) { if (defined($inc)) { $num += $inc; $inc = 1; while ($lin =~ s/\\$// && defined($nxt = $ifh->getline)) { $lin .= $nxt; $inc++; } } else { ++$num; } unless ($chk && ($inv xor $lin =~ $pat)) { if ($aft) { $lin = $off.$del.$lin if $f_b; $lin = $num.$del.$lin if $f_n; $lin = $nam.$del.$lin if $f_h; $lin = $del.$lin if $f_e; push(@{$hit}, $lin); $sep = 1 unless --$aft > 0 || $bef; ## no critic (Unless) } elsif ($bef) { $lin = $off.$del.$lin if $f_b; $lin = $num.$del.$lin if $f_n; $lin = $nam.$del.$lin if $f_h; $lin = $del.$lin if $f_e; if (push(@bef, $lin) > $bef) { shift(@bef); $sep = 1; } } } elsif ($fil) { $fil->{$nam} = 1; last; } else { unless ($f_c) { $lin = qq{$off|$lin} if $f_b; $lin = qq{$num|$lin} if $f_n; $lin = qq{$nam|$lin} if $f_h; if ($sep) { push(@{$hit}, q{--}); $sep = 0; } push(@{$hit}, splice(@bef), $lin); $aft = $r_a; } ++$cnt; if (defined($max) && --$max < 1) { last unless $aft; $chk = 0; } } $off = $ifh->tell if $f_b; } $ifh->close; push(@{$hit}, $f_h ? qq{$nam|$cnt} : $cnt) if $f_c; return 1; } =head2 S<$h-Els([$level,@dir]])> This method returns the list of all files matching the specified pattern in the specified directory structures. By default, it searches the files in the whole directory structure. When no pattern is specified, it returns all files entries from the directory. The depth of the search is limited to the specified level, or C<0> by default. =cut sub ls { my ($slf, $lvl, @fil) = @_; my ($cur, $pat, $pth, @hit, @pth); # Check the argument $lvl = 0 unless defined($lvl); # Search files in the directory structure $slf->{'sta'} = 4; $slf->apply({bot => [\&_ls_bot, [\@hit, $lvl]], dir => [\&_ls_dir, [\@hit, $lvl]], dft => 1, err => [\&_ls_err, []], }, 0, @fil); $slf->{'sta'} &= 3 if @hit; ## no critic (Bit) # Return the hit list return @hit; } sub _ls_bot { my ($slf, $pth, $cur, $req, $hit, $lvl, @itm) = @_; my ($fct, $flg, $nam, $rec, @cur, @pth, @sub); $fct = $slf->{'_dsc'}->{'itm'}; $flg = $lvl && exists($cur->[1]->{q{..}}); @pth = @{$pth}; foreach my $itm (@itm) { $rec = $cur->[1]->{$itm}; if (ref($rec->[1]) eq 'HASH') { _ls_dir($slf, [@pth, $itm], $rec, $req, \@sub, $lvl); push(@cur, [&$fct($itm), _get_stat($rec)]) if $flg && (defined($rec->[0]) || $slf->{'all'}) && !($slf->{'all'} && ($itm eq q{.} || $itm eq q{..})); } elsif (defined($rec->[2]) || $slf->{'all'}) { push(@cur, [&$fct($itm), _get_stat($rec)]); } } if (@cur) { if ($slf->{'all'}) { push(@cur, [$nam, _get_stat($cur->[1]->{q{.}})]) if defined($nam = &$fct(q{.})); push(@cur, [$nam, _get_stat($cur->[1]->{q{..}})]) if defined($nam = &$fct(q{..})) && exists($cur->[1]->{q{..}}); } unshift(@sub, [&{$slf->{'_dsc'}->{'nat'}}($slf->{'_dsc'}->{'cls'}, @pth, q{}), @cur]); } push(@{$hit}, @sub); return 1; } sub _ls_dir { my ($slf, $pth, $cur, $req, $hit, $lvl) = @_; my ($fct, $nam, $rec, @cur, @pth, @sub); $fct = $slf->{'_dsc'}->{'itm'}; @pth = @{$pth}; foreach my $itm (sort keys(%{$cur->[1]})) { next if $itm eq q{.} || $itm eq q{..}; next unless $itm !~ m/^\./ || $slf->{'all'}; ## no critic (Unless) $rec = $cur->[1]->{$itm}; if (ref($rec->[1]) eq 'HASH') { _ls_dir($slf, [@pth, $itm], $rec, $req, \@sub, $lvl - 1) if $lvl > 0; push(@cur, [&$fct($itm), _get_stat($rec)]) if defined($rec->[0]) || $slf->{'all'}; } elsif (defined($rec->[2]) || $slf->{'all'}) { push(@cur, [&$fct($itm), _get_stat($rec)]); } } if (@cur) { if ($slf->{'all'}) { push(@cur, [$nam, _get_stat($cur->[1]->{q{.}})]) if defined($nam = &$fct(q{.})); push(@cur, [$nam, _get_stat($cur->[1]->{q{..}})]) if defined($nam = &$fct(q{..})) && exists($cur->[1]->{q{..}}); } unshift(@sub, [&{$slf->{'_dsc'}->{'nat'}}($slf->{'_dsc'}->{'cls'}, @pth, q{}), @cur]); } push(@{$hit}, @sub); return 1; } sub _ls_err { my ($slf, $fil) = @_; $slf->{'sta'} |= 2; ## no critic (Bit) die get_string('NO_FILE', $fil) if $slf->{'err'}; return; } =head2 S<$h-Erefresh($flg)> This method clears the current index and loads all index files applying the current restrictions. Unless the flag is set, it loads the file status information. It returns the object reference except when it has loaded index files. Otherwise, it returns an undefined value. =cut sub refresh { my ($slf, $flg) = @_; my ($cnt, $ext, $ifh, $pat); # Build the directory tree $cnt = 0; $slf->{'_idx'} = [undef, {}, {}]; foreach my $nam ($slf->{'ctl'}->get_files('I')) { $cnt += _load_index($slf, $ifh) if defined($ifh = $slf->{'ctl'}->find_handle($nam)); } unless ($flg) { foreach my $nam ($slf->{'ctl'}->get_files('D')) { $cnt += _load_stat($slf, $ifh) if defined($ifh = $slf->{'ctl'}->find_handle($nam)); } } # Return the object reference return $cnt ? $slf : undef; } =head2 S<$h-Epredict([@itm])> This method predicts the number of files and the accumulated file size that will result from the extraction of the requested items. When the list of requested items is empty, it considers all collected files. Files and directories specified as items can contain wild cards. C<*> matches any string, including the null string. C matches any single character. C<[...]> matches any one of the enclosed characters, which are taken literally. When a path element starts with a C<.>, this character must be matched explicitly. It returns the number of files and the accumulated file size as a list. =cut sub predict { my ($slf, @itm) = @_; my ($cnt, $ext, $siz); # Identify collected files $slf->{'sta'} = 4; $slf->{'_ext'} = $ext = {}; $slf->apply({dir => [\&_extract_dir, []], err => [\&_extract_err, []], fil => [\&_extract_fil, [], 1], top => [\&_extract_dir, []], }, 0, @itm); # Accumulate collected file contributions $cnt = $siz = 0; foreach my $sub (values(%{$ext})) { foreach my $rpt (values(%{$sub})) { foreach my $blk (values(%{$rpt})) { $siz += $blk->[0]; ++$cnt; } } } return ($cnt, $siz); } =head2 S<$h-Erestrict(@modules)> This method stores the list of modules that can contribute. When called without argument, it clears any previous restriction. It returns the object reference. =cut sub restrict { my ($slf, @mod) = @_; # Declare the restrictions $slf->{'ctl'}->restrict(@mod); # Return the object reference return $slf; } =head2 S<$h-Erise> This method returns to the previous context and adapts the archive context accordingly. =cut sub rise { my ($slf) = @_; my ($ctl); # Dive inside the archive $ctl = $slf->{'ctl'}; $ctl = $ctl->get_archive($ctl->rise); # Update the index delete($slf->{'_idx'}); $slf->{'ctl'} = $ctl = $ctl->get_current; $slf->{'_dsc'} = $tb_dsc{$slf->{'_fam'} = $ctl->get_family($slf->{'_cfg'}->get_family)}; $slf->{'_pre'} = $ctl->get_prefix; # Return the object reference return $slf; } =head2 S<$h-Eselect($pre)> This method selects the specified prefix and adapts the archive context accordingly. =cut sub select ## no critic (Builtin) { my ($slf, $pre) = @_; unless ($slf->{'ctl'}->select($pre)) { delete($slf->{'_idx'}); $slf->{'ctl'} = $slf->{'ctl'}->get_current; $slf->{'_dsc'} = $tb_dsc{$slf->{'_fam'} = $slf->{'ctl'}->get_family($slf->{'_cfg'}->get_family)}; $slf->{'_pre'} = $slf->{'ctl'}->get_prefix; } # Return the object reference return $slf; } # --- Internal routines ------------------------------------------------------- # Add a directory entry sub _add_dir { my ($slf, $pth, $alt) = @_; my ($cur, $fct, $fil, $lvl, $new, $syn, @dir, @syn); @dir = split(/\//, $pth, -1); # Needs all fields to support '/' if (defined($alt)) { @syn = split(/\//, $alt, -1); # Needs all fields to support '/' @syn = () unless (scalar @dir) == (scalar @syn); } $cur = $slf->{'_idx'}; $fct = $slf->{'_dsc'}->{'cnv'}; $lvl = 0; foreach my $itm (@dir) { $syn = shift(@syn); next unless length($itm) || $lvl == 0; die get_string('BAD_DIR', $pth) unless ref($cur->[1]) eq 'HASH'; # Check for tree improvement $itm = &$fct($cur, $itm, $itm); if (defined($syn)) { $syn = &$fct($cur, $syn, $syn); if ($syn ne $itm && exists($cur->[1]->{$syn})) { if (exists($cur->[1]->{$itm})) { _merge_nodes($cur->[1]->{$itm}, delete($cur->[1]->{$syn}), $pth); } else { $cur->[1]->{$itm} = delete($cur->[1]->{$syn}); } $cur->[2]->{lc($syn)} = $itm; } } # Create new node when needed unless (exists($cur->[1]->{$itm})) { $cur->[1]->{$itm} = $new = [undef, {}, {}, $itm]; $new->[1]->{q{.}} = $new; $new->[1]->{q{..}} = $cur; if (defined($syn)) { $syn = &$fct($cur, $syn, $syn); $cur->[2]->{lc($syn)} = $itm if $syn ne $itm; } } # Make it the current node $cur = $cur->[1]->{$itm}; $lvl++; } return $cur; } # Add a file entry sub _add_file { my ($slf, $pth) = @_; my ($cur, $dir, $fil, $lvl, $sub, @dir); if ($pth =~ m{^(.*)/(.*)$}s) { ($cur, $fil) = (_add_dir($slf, $1), $2); die get_string('BAD_DIR', $pth) unless ref($cur->[1]) eq 'HASH'; } else { ($cur, $fil) = ($slf->{'_idx'}, $pth); } # Return an existing record if (defined($sub = &{$slf->{'_dsc'}->{'cnv'}}($cur, $fil))) { $cur = $cur->[1]->{$sub}; if (ref($cur->[1]) eq 'HASH') { die get_string('BAD_FILE', $pth) if (scalar keys(%{$cur->[1]})) > 2; $cur->[1] = []; $cur->[2] = undef; } return $cur; } # Create a new file node return $cur->[1]->{$fil} = [undef, []]; } # Decode a path sub _dec_path { my ($pth) = @_; $pth = RDA::Object::decode($1) if $pth =~ m/^"([^"]*)"$/; return $pth; } # Get a block containing the file sub _get_block { my ($slf, $cur) = @_; my ($blk, $frg, $lgt, $max); # Reject a directory return if ref($cur->[1]) eq 'HASH'; # Find the best block definition unless (defined($blk = $cur->[2])) { return unless $slf->{'all'}; $frg = $max = -1; foreach my $rec (@{$cur->[1]}) { (undef, undef, $lgt) = split(/\//, $rec, 4); ($blk, $max) = ($rec, $lgt) if $lgt > $max; } } return defined($blk) ? [$blk, $frg] : undef; } # Get a node sub _get_node { my ($slf, $pth) = @_; my ($cur, $fct, $lvl, @res, @sub); # Search for the node $cur = $slf->{'_idx'}; $fct = $slf->{'_dsc'}->{'cnv'}; $lvl = 0; if (ref($pth) eq 'ARRAY') { @sub = @{$pth}; } elsif (defined($pth)) { @sub = split(/\//, &{$slf->{'_dsc'}->{'nrm'}}($pth), -1); } foreach my $sub (@sub) { next unless $lvl == 0 || length($sub); return unless ref($cur->[1]) eq 'HASH'; return unless defined($sub = &$fct($cur, $sub)); push(@res, $sub); $cur = $cur->[1]->{$sub}; ++$lvl; } return ($cur, @res) if wantarray; return $cur; } # Get the file information sub _get_stat { my ($cur) = @_; my ($lgt, $max, $ref, $sta); # Return file information when available return @{$cur->[0]} if defined($cur->[0]); # Determine minimal directory information ## no critic (Number,Zero) return (0, 0, 040000, 1, -1, -1, 0, 0, 0, 0, 0, 65536, 0) if ref($cur->[1]) eq 'HASH'; # Determine minimal file information $max = -1; foreach my $rec (@{$cur->[1]}) { (undef, undef, $lgt) = split(/\//, $rec, 4); $max = $lgt if $lgt > $max; } return (0, 0, 0, 1, -1, -1, 0, $max, 0, 0, 0, 65536, ($max + 65535) >> 16); } # Extract index information sub _load_index { my ($slf, $ifh) = @_; my ($arg, $cls, $cur, $fct, $opt, $pth, $typ, @rec); # Load the index information eval { $cls = $slf->{'_dsc'}->{'cls'}; $fct = $slf->{'_dsc'}->{'nrm'}; while (<$ifh>) { ($typ, @rec) = split(/\|/, $_, -1); # Needs all fields for the pop pop(@rec); if ($typ eq 'S') { _add_dir($slf, &$fct($pth), &$fct(_dec_path($rec[1]))) if $cls->is_absolute($pth = _dec_path($rec[0])); } elsif ($typ eq 'F') { next unless $rec[1] =~ m{^\d+/\d+}; $cur = _add_file($slf, &$fct(_dec_path($rec[2]))); push(@{$cur->[1]}, $rec[3].q{/}.$rec[1]); $cur->[2] = q{F/}.$rec[1] if $rec[3] eq 'F'; } } }; $slf->{'_agt'}->add_error($@) if $@; $ifh->close; return 1; } # Extract file status information sub _load_stat { my ($slf, $ifh) = @_; my ($alt, $cur, $fct, $lnk, $pth, @sta); # Create the file tree $slf->{'_idx'} = [undef, {}, {}] unless exists($slf->{'_idx'}); # Load the metadata file eval { $fct = $slf->{'_dsc'}->{'nrm'}; $lnk = $slf->{'_dsc'}->{'lnk'}; while (<$ifh>) { ($pth, @sta) = split(/\|/, $_, -1); # Needs all fields for the pop pop(@sta); next unless @sta; $sta[13] = &$lnk(_dec_path($sta[13])) if defined($sta[13]); ((($sta[2] & 0170000) == 040000) ## no critic (Bit,Number) ? _add_dir($slf, &$fct(_dec_path($pth))) : _add_file($slf, &$fct(_dec_path($pth))))->[0] = [@sta]; } }; $slf->{'_agt'}->add_error($@) if $@; $ifh->close; return 0; } # Merge nodes sub _merge_nodes { my ($dst, $src, $pth) = @_; my ($cur, @key); # Merge directory information $dst->[0] = $src->[0] unless defined($dst->[0]); # Merge entries if (ref($src->[1]) eq 'HASH') { delete($src->[1]->{q{.}}); delete($src->[1]->{q{..}}); if (@key = keys(%{$src->[1]})) { die get_string('BAD_DIR', $pth) unless ref($dst->[1]) eq 'HASH'; foreach my $key (@key) { $cur = $src->[1]->{$key}; $cur->[1]->{q{..}} = $dst if ref($cur->[1]) eq 'HASH'; if (exists($dst->[1]->{$key})) { _merge_nodes($dst->[1]->{$key}, $cur, $pth); } else { $dst->[1]->{$key} = $cur; } } foreach my $key (keys(%{$src->[2]})) { $dst->[2]->{$key} = $src->[2]->{$key}; } } } elsif (ref($dst->[1]) eq 'HASH') { die get_string('BAD_FILE', $pth) if (scalar @{$dst->[1]}) > 2; $dst->[1] = $src->[1]; $dst->[2] = undef; } else { push(@{$dst->[1]}, @{$src->[1]}); } # Delete the source entry undef @{$src}; return; } # --- UNIX-specific routines -------------------------------------------------- # Convert the directory element sub _u_convert { my ($cur, $sub, $dft) = @_; return exists($cur->[1]->{$sub}) ? $sub : $dft; } # Find the relevant entries sub _u_find { my ($cur, $pat) = @_; my ($tbl, @tbl); foreach my $nam (keys(%{$tbl = $cur->[1]})) { push(@tbl, $nam) if $nam =~ m/$pat/; } return (sort @tbl); } # Normalize the directory entry sub _u_item { return shift; } # Normalize the link sub _u_link { return shift; } # Create a native path sub _u_native { my ($cls, @pth) = @_; return $cls->cat_dir(@pth); } # Normalize the path sub _u_norm { return shift; } # --- VMS-specific routines --------------------------------------------------- # Convert the directory element sub _v_convert { my ($cur, $sub, $dft) = @_; my ($ref, $tbl); $tbl = $cur->[1]; return $sub if exists($tbl->{$sub}); $ref = lc($sub); foreach my $dir (keys(%{$tbl})) { return $dir if lc($dir) eq $ref; } return $dft; } # Find the relevant entries sub _v_find { my ($cur, $pat) = @_; my ($tbl, @tbl); foreach my $nam (keys(%{$tbl = $cur->[1]})) { push(@tbl, $nam) if $nam =~ m/$pat/i; } return (sort @tbl); } # Normalize the directory entry sub _v_item { my ($nam) = @_; return ($nam eq q{.} || $nam eq q{..}) ? undef : ($nam =~ m/[\:\.]/) ? $nam : $nam.q{.DIR}; } # Normalize the link sub _v_link { return shift; } # Create a native path sub _v_native { my ($cls, $drv, @pth) = @_; my ($dir, $nam); $nam = pop(@pth); if ($drv eq 'root') { $drv = q{}; } elsif ($drv !~ m/:$/) { unshift(@pth, $drv); $drv = q{}; } $dir = q{[}.join(q{.}, @pth).q{]}; $dir =~ s/\[\]//g; return $drv.$dir.$nam; } # Normalize the path sub _v_norm { my ($pth) = @_; my (@tbl); push(@tbl, $1) if $pth =~ s{^([^:]*:)}{}; while ($pth =~ s{\[(.*?)\]}{}) { push(@tbl, split(/\./, $1)); } push(@tbl, $pth) if length($pth); return join(q{/}, @tbl); } # --- Windows-specific routines ----------------------------------------------- # Convert the directory element sub _w_convert { my ($cur, $sub, $dft) = @_; my ($ref, $tbl); $tbl = $cur->[1]; return $sub if exists($tbl->{$sub}); $ref = lc($sub); return $cur->[2]->{$ref} if exists($cur->[2]->{$ref}); foreach my $dir (keys(%{$tbl})) { return $dir if lc($dir) eq $ref; } return $dft; } # Find the relevant entries sub _w_find { my ($cur, $pat) = @_; my ($tbl, %tbl); foreach my $nam (keys(%{$tbl = $cur->[1]})) { $tbl{$nam} = 1 if $nam =~ m/$pat/i; } foreach my $nam (keys(%{$tbl = $cur->[2]})) { $tbl{$tbl->{$nam}} = 2 if $nam =~ m/$pat/i; } return (sort keys(%tbl)); } # Normalize the directory entry sub _w_item { return shift; } # Normalize the link sub _w_link { my ($pth) = @_; $pth =~ s{\\}{/}g; $pth =~ s{^/cygdrive/([a-z])/}{\u$1:/}i; $pth =~ s{^([a-z]):}{\u$1:}; $pth =~ s{/}{\\}g if $pth =~ m{^([A-Z]:|//\w+/)}; return $pth; } # Create a native path sub _w_native { my ($cls, @pth) = @_; my ($pth); return (@pth && $pth[0] =~ m/:$/) ? $cls->cat_native(@pth, q{}) : length($pth = join(q{/}, @pth)) ? $pth : q{/}; } # Normalize the path sub _w_norm { my ($pth) = @_; $pth =~ s{\\}{/}g; $pth =~ s{^/cygdrive/([a-z])/}{\u$1:/}i; $pth =~ s{^([a-z]):}{\u$1:}; return $pth; } # --- SDCL extensions --------------------------------------------------------- # Initialize the index control sub _begin_index { my ($pkg) = @_; $pkg->set_top('IDX', $pkg->get_collector->get_registry('IDX', \&new, __PACKAGE__, $pkg->get_agent)); return; } 1; __END__ =head1 SEE ALSO L, L, L, L, L, L, L, L, L, L, L, L, L, L, L =head1 COPYRIGHT NOTICE Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved. =head1 TRADEMARK NOTICE Oracle and Java are registered trademarks of Oracle and/or its affiliates. Other names may be trademarks of their respective owners. =cut