#!/usr/bin/perl -w
# $Id: gen_benchmark,v 1.2 2003/09/09 18:56:23 mrodrigu Exp $
use strict;

my $ELT_NUMBER= 10000;

my $INDENT= 2;

my $ELT_BASENAME   ="el_";
my $ELT_VARIANT    ="_var_";
my $ELT_VARIANT_NB = 5;

my $SUB_ELT_PROB   = 7;   # (1 + level)/$SUB_ELT_PROB does NOT have sub elements
my $NB_SUB_ELT     = 6;   # 1 => $NB_SUB_ELT sub elements

my $ATT_BASENAME   = "att_";
my $ATT_VARIANT_NB = 5;

my $ELT_TO_EXTRACT = "message";

my $ELT_TO_PROCESS      = "process";
my $ACTION_ATT          = 'action';
my @ACTION              = qw( delete prefix add_att change_tag duplicate erase); # duplicate erase

my %TEXT_LENGTH = ( 8 => 35);
my %ATT_LENGTH  = ( 1 => 2);

my $extract_nb = 0;
my $process_nb = 0;
my %process_nb;

print '<?xml version="1.0" encoding="UTF-8" ?>', "\n";
print "<test>";

my $elt_number=0;
my $top_level ++;

while( $elt_number < $ELT_NUMBER)
  { gen_elt( 1); 
    $top_level ++;
  }

print "\n</test>\n";
print STDERR "elements: $elt_number\n  - top_level: $top_level\n  - to_extract: $extract_nb\n  - to_process: $process_nb\n";
foreach (keys %process_nb) { printf STDERR "      - action_%-12s: %s\n", $_, $process_nb{$_}; }
exit;

sub gen_elt
  { my( $level, $no_process)= @_;
    
    my $rand= random( 0 => $ELT_VARIANT_NB +2);
    if( $rand == 0) 
      { print "<$ELT_TO_EXTRACT>", random_text( %TEXT_LENGTH), "</$ELT_TO_EXTRACT>"; 
        $extract_nb++;
      }
    elsif( !$no_process && ($rand == 1) )
      { my $action= $ACTION[random(0, scalar @ACTION -1)];
        print qq{<$ELT_TO_PROCESS $ACTION_ATT="$action">};
        $process_nb++;
        $process_nb{$action}++;
        foreach( 1.. random( 0 => ($NB_SUB_ELT)))
          { gen_elt( $level+1, 1); }
        print "</$ELT_TO_PROCESS>";
      }
    else
      { my $elt_name= $ELT_BASENAME .  $level . $ELT_VARIANT . random( 1 => $ELT_VARIANT_NB);
        my $atts= gen_atts( $elt_name);
        print qq{<$elt_name$atts>};
        if( random( 0 => ($SUB_ELT_PROB-$level+1)) )
          { foreach( 1.. random( 0 => ($NB_SUB_ELT)))
              { gen_elt( $level+1, $no_process); }
          }
        else
          { print random_text( %TEXT_LENGTH); }
        print qq{</$elt_name>};
        $elt_number++;
      }
  }
    
sub gen_atts
  { my( $elt_name)= @_;
    my @specific_atts= map { $elt_name . '_' .$ATT_BASENAME . $_ } random_range( 0 => 2);
    my @generic_atts = map { $ATT_BASENAME . $_ } random_range( 0 => 2);
    my $atts= join( '', map { qq{ $_= "} . random_text( %ATT_LENGTH) . qq{"} } @specific_atts, @generic_atts);
    return $atts;
  }

sub random
  { my( $start, $end)= @_;
    return int( rand( $end-$start+1) + $start);
  }

sub random_range
  { my( $start, $end)= @_;
    my $random= random( $start => $end);
    return () unless $random;
    return ( 0 .. $random);
  }

BEGIN {
    my @words;
    open(WORDS, "/usr/dict/words")
      or open(WORDS, "/usr/share/dict/words")
        or die "Can't open /usr/dict/words or /usr/share/dict/words: $!";
    while (<WORDS>) {
        chomp;
        push @words, $_ if /^\w+$/;
    }
    srand (time ^ $$);

    # get a random text
    sub get_word {
        return lc $words[int(rand(scalar(@words)))];
    }

    # get $num random words, joined by $sep, defaulting to " "
    sub random_text 
      { my( %length_range)= @_;
        my $sep = " ";
        return join($sep, map { get_word() } random_range( %length_range));
      }
}


