package Data::Traverse;

# $Id: Traverse.pm,v 1.11 2006/01/03 21:14:32 mrodrigu Exp $

use 5.006;
use strict;
use warnings;

use Scalar::Util qw(refaddr);

use UNIVERSAL qw(isa);

our @ISA= qw(Exporter);

our @EXPORT      = qw();
our @EXPORT_OK   = qw(scalars_level refs_level scalars refs);
our %EXPORT_TAGS = ( all      => [@EXPORT_OK],
                     lists    => [ qw(scalars_level refs_level scalars refs)],
                   );


our $VERSION = '0.03';


sub prune()  { 'Data::Traverse::prune' }
sub finish() { 'Data::Traverse::finish' }

sub scalars_level
  { my( $data, $level)= @_;
    my @list= grep { ! ref } _all_data_for_level( $level, [ $data ]);
    return wantarray ? @list : \@list;
  }

sub refs_level
  { my( $data, $ref, $level)= @_;
    my @list= grep { isa( $_, $ref) } _all_data_for_level( $level, [ $data ]);
    return wantarray ? @list : \@list;
  }

sub _all_data_for_level
  { my( $level, $list)= @_;
    unless( $level) { return wantarray ? @$list : $list; }
    my $new_list= _push_refs( $list);
    return _all_data_for_level( --$level, $new_list);
  }


sub scalars
  { my( $data, $level)= @_;
    my @list= grep { ! ref } _all_data( $level, [$data], [ $data ]);
    return wantarray ? @list : \@list;
  }

sub refs
  { my( $data, $ref, $level)= @_;
    my @list= grep { isa( $_, $ref) } _all_data( $level, [$data], [ $data ]);
    return wantarray ? @list : \@list;
  }
    
sub _all_data
  { my( $level, $list, $level_list)= @_;
    if( (defined $level && !$level) || !@$level_list) { return wantarray ? @$list : $list; }
    my $new_list= _push_refs( $level_list);
    push @$list, @$new_list;
    return _all_data( --$level, $list, $new_list);
  }

sub _push_refs
  { my( $list)= @_;
    my $new_list=[];
    foreach my $item (@$list)
      { if( isa( $item, 'ARRAY'))    { push @$new_list, @$item; }
        elsif(  isa( $item, 'HASH')) { push @$new_list, map { $item->{$_} } sort keys %$item; }
      }
    return $new_list;
  }
 

sub new
  { my( $class, $data)= @_;
    $class= ref $class || $class;
    my $self= bless { data  => $data, }, $class;
    $self->_reset;
    return $self;
  }

sub _reset
 { my( $self)= @_;
   $self->{_ancestor_stack}  = [];  # refs to ancestors
   $self->{_key_stack}       = [];  # stack of hash and array indexes to the item
   $self->{_seen}            = {};  # item => 1 if already traversed
   $self->{_current}         = undef;
   return $self;
 }
   
  
sub traverse
  { my( $self, $handler)= @_;
    $self->_reset;
    $self->_traverse( $handler, $self->{data});
  }

   
sub _traverse
  { my( $self, $handler, $item)= @_;

    return 1 if( refaddr( $item) && exists $self->{_seen}->{refaddr( $item)});
    $self->{_seen}->{refaddr( $item)}=1 if( refaddr( $item));
  
    local $_= $item;
    
    my $result= $handler->( $self, $_[2]);
    
    if( $result eq prune ) { return 1;              }
    if( $result eq finish) { $self->_reset; return; }
    if( !ref( $item)     ) { return 1;              }
      
    push @{$self->{_ancestor_stack}}, $item;

    if( isa( $item, 'ARRAY'))
      { push @{$self->{_key_stack}}, -1;
        foreach my $array_elt (@$item)
          { $self->{_key_stack}->[-1]++;
            $self->_traverse( $handler, $array_elt) || return;
          }
       pop @{$self->{_key_stack}};
      }
    elsif( isa( $item, 'HASH'))
      { foreach my $key (sort keys %$item)
          { push @{$self->{_key_stack}}, $key;
            $self->_traverse( $handler, $item->{$key}) || return;
            pop @{$self->{_key_stack}};
          }
      }
    pop @{$self->{_ancestor_stack}};
    return 1;
  }

sub path_matches
  { my( $self, $exp)= @_;
    my $path_string= $self->path;
    return $path_string=~ /$exp/;
  }

sub path
  { my( $self)= @_;
    my @keys= @{$self->{_key_stack}};
    my $path_string='/';
    $path_string.= @keys ? join( '/', @keys) : '';
  }

sub parent
  { my( $self)= @_;
    return $self->{_ancestor_stack}->[-1];
  }

sub ancestors
  { my( $self)= @_;
    return @{$self->{_ancestor_stack}};
  }

sub item_index
  { my( $self)= @_;
    if( isa( $self->parent, 'ARRAY')) { return $self->{_key_stack}->[-1]; }
    else                              { return -1;                       }
  }

sub item_key
  { my( $self)= @_;
    if( isa( $self->parent, 'HASH'))  { return $self->{_key_stack}->[-1]; }
    else                              { return '';                        }
  }
  
sub parent_key
  { my( $self)= @_;
    my $grand_parent= $self->{_ancestor_stack}->[-2] || return '';
    return isa( $grand_parent, 'HASH') ? $self->{_key_stack}->[-2] : '';
  }

sub parent_index
  { my( $self)= @_;
    my $grand_parent= $self->{_ancestor_stack}->[-2] || return -1;
    return isa( $grand_parent, 'ARRAY') ? $self->{_key_stack}->[-2] : -1;
  }

sub level
  { my( $self)= @_;
    return scalar @{$self->{_key_stack}};
  }

sub is_scalar { return !ref }
    
         
1;
__END__

=head1 NAME

Data::Traverse - Perl extension for traversing complex data structures

=head1 SYNOPSIS

Data::Traverse supports 2 modes: a simple procedural interface and an 
object-oriented interface.

=head2 The procedural interface

The procedural interface can be used to retrieve parts of a complex data structure.

It is used through C<use Data::Traverse qw(:lists)>

  use Data::Traverse qw(:lists);
  
  my $data= ...;                           # a complex data structure

  my @values= scalars( $data);             # all scalars in the structure
  my @values= refs( $data, 'LWP::Simple'); # all LWP::Simple objects in the structure

=head2 The OO interface

The OO interface is used to write iterators that go through a data structure.

  my $iter= Data::Traverse->new( $data);
  $iter->traverse( sub { my( $iter, $item)= @_;
                         print "$item\n" if( $iter->item_key eq 'id');
                         return $iter->prune if( $iter->path_matches( '/foo/bar'));
                       }
                  );
                  
  $iter->traverse( sub { $_[1]++ if( $_[0]->is_scalar); }); # changes the data

More methods are available to get information on the current context.

=head1 DESCRIPTION

Data::Traverse lets you traverse complex data structures without needing
to know all about them.

It can be used for example with the data structure created by XML::Simple

=head2 Procedural Interface

=over 4

=item refs ($data, $ref, $optional_level)

return the list of references of the C<$ref> type (as per
C<UNIVERSAL::isa( $field, $ref)>) in the data structure, in the
order of traversal (hashes are traversed through the dictionary
order of their keys).

The C<$optional_level> argument can be used to limit the depth
in the data structure, 0 being <$data> itself. 

=item scalars ($data, $optional_level)

return the list of scalar values in the data structure, in the
order of traversal (hashes are traversed through the dictionary
order of their keys). 

The C<$optional_level> argument can be used to limit the depth
in the data structure, 0 being <$data> itself..

=item refs_level ($data, $ref, $level)

return the list of references to C<$ref> at C<$level> in the 
data structure.

=item scalars_level ($data, $level)

return the list of scalar values at C<$level> in the data structure.

=back

=head2 Object-Oriented Interface

The Object-Oriented interface provides iterators on arbitrary data 
structures. A handler is associated with the iterator and is called
for every item in the data structure. Within the handler a host
of methods can be called to get information about the current context.

=over 4

=item new ($data)

create an iterator on C<$data>

=item traverse ($handler)

traverse the data structure and apply the handler to all item of
the data structure. An item is anything in the data structure, scalar,
arrayref or hashref.

the handler is called with the iterator and the current item as arguments.
Use C<$_[1]> if you want to update the original data structure.

the handler also receives the item as C<$_>

in the handler you can use the following functions:

=over 4

=item path

the current path to an item in the data structure is built from the
hash keys or array indexes to get to the item, joined with 'C</>'. 

For example if C<< $data= { foo => [ qw/a B<b> c/], bar => []} >>
when the iterator gets to B<b> the path (C<< $t->path >> in the handler)
will be C</foo/1>  

=item path_matches ($exp)

C<$exp> is a regular expression. The path is matched against that regexp.

=item parent

the parent of the current item (a hashref or arrayref that includes
the item)

=item ancestors

the list (root first) of ancestors of the current item

=item item_index

if the item is an element of an array then this is the item index in the array, 
otherwise -1 is returned

=item item_key

if the item is a value in a hash then this is the item key in the hash, 
otherwise the empty string is returned

=item parent_key

if the parent of the item is a value in a hash then this is the associated key, 
otherwise the empty string is returned

=item parent_index

if the parent of the item is a value in an array then this is its index, otherwise
-1 is returned

=item level

the level of depth at which the item is found in the data structure (the size of
the ancestors stack)

=item is_scalar

return true if the item is a scalar (this is just C<!ref> but reads better).

=item prune

if the handler returns prune then the children of the current item are
not traversed

=item finish

ends the traversal and returns

=back

=back

=head1 BUGS/TODO

At this point cycles in the data structure are not properly processed:

- the procedural interface will most likely enter deep recursion, 
        
- the OO interface will only get once to each item, but then testing the
  context will only occur once

The procedural interface does a breadth-first traversal of the data,
the OO interface does a depth-first traversal, it would be nice to be
able to choose the algorithm.

More tests need to be written (isn't this always the case? ;--)

It would be nice to have more generic methods to query the context (XPath-based?)

=head1 SEE ALSO

=head1 AUTHOR

Michel Rodriguez, E<lt>mirod@cpan.orgE<gt>

Feedback and comments welcome!

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2004-2006 by Michel Rodriguez

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.

=cut
