################################################################################
#
#  $Header: ChronosPageHash.pm 14-jun-2005.12:47:45 adosani Exp $
#
#
# Copyright (c) 2001, 2005, Oracle. All rights reserved.  
#
#    NAME
#      ChronosPageHash.pm - Chronos Page Hash
#
#    DESCRIPTION
#
#    NOTES
#
#    MODIFIED   (MM/DD/YY)
#     adosani    06/14/05 - bug 4429320 - fix cleanup so it does not skip 
#                           entries 
#     adosani    06/13/05 - bug 4429326 - when more than 1 page in entry, 
#                           delete only completed page (after a gif request) 
#                           instead of whole entry 
#     adosani    06/01/05 - add requests and incomplete pages counts
#     adosani    05/25/05 - fix bug 4307822 - incomplete pages not output 
#                           during cleanup 
#     adosani    05/18/05 - change delimiters for page hash file 
#     adosani    03/20/05 - fix output of page URL for P-R mapping 
#                           and incomplete loads tables
#     adosani    03/15/05 - add post parameters for each page
#     adosani    02/17/05 - group frames together for incomplete loads
#     adosani    02/14/05 - fix server time calculation
#     adosani    02/14/05 - bug 4148884 - fix reg exp in is_page_valid to make 
#                           cme work with frames traffic
#     snakhoda   09/03/04 - add hash for incomplete page loads 
#     snakhoda   05/24/04 - bug 3644485 - call insert_single_chunk with the 
#                           correct number of arguments 
#     snakhoda   04/22/04 - bug 3262971 - enable splitting of large data
#     snakhoda   11/06/03 - improve on bug 3217848 - pass dbmInfo into new
#     snakhoda   10/29/03 - bug 3217842: osd changes for non-Solaris 
#     snakhoda   10/19/03 - wrap eval around hash update 
#     snakhoda   08/28/03 - fix bug 3120829 - no variables in regex'es
#     mashukla   06/04/03 - change regexp to not have urls
#     mashukla   02/15/03 - fixes
#     mashukla   02/04/03 - remove origin server trips
#     mashukla   11/19/02 - fix algo
#     mashukla   11/18/02 - fix after code review
#     mashukla   11/18/02 - fix st calculation
#     mashukla   11/07/02 - add server timings
#     mashukla   10/28/02 - add validate all pgs routine
#     mashukla   10/24/02 - fix page ref arr ref
#     mashukla   10/23/02 - fix delete entry
#     mashukla   10/21/02 - add pg hash update xts for pg
#     mashukla   10/17/02 - fix entry invalidation
#     mashukla   10/10/02 - fix hash entry for x-time-start
#     mashukla   07/10/02 - add page index in entry routine
#     mashukla   07/07/02 - fix lk var
#     asawant    02/20/02 - Temporary fix for Solaris page hash limitation.
#     asawant    12/04/01 - Adding code review fixes.
#     asawant    11/20/01 - Creation
#
#
################################################################################

package ChronosPageHash;
use strict;
use Time::Local;
use ChronosLogging qw($DEBUG_FATAL $DEBUG_LOW $DEBUG_MEDIUM $DEBUG_HIGH);
use ChronosDbmTypes;
use ChronosRequestsHash;
use ChronosIncompleteLoadHash;
use ChronosPageParams;
use vars qw(@ISA);
@ISA = qw(ChronosHash);

# This is the character used to separate data fields in the hash elements
# There are instances of the character without ref to the var in the code too!
my $data_separation_char = '|';

# This is the character used to separate pages in the page list. This character
# CANNOT be the same as the $data_separation_char
# There are instances of the character without ref to the var in the code too!
my $page_separation_char = "\n";    # previously '!'

# Char to seperate page from x-time-start corresponding to it
my $time_separation_char = "\t";     # previously ','

# The status of an entry (valid = 1, invalid = 0)
my $VALID_STAT   = 1;
my $INVALID_STAT = 0;

# The type of a page (frame vs non-frame)
my $NON_FRAME_PAGE = 0;
my $FRAME_PAGE = 1;
my $FRAMESET_PAGE = 2;

my $MAX_NUM_PAGES_IN_ENTRY = 20;

#################################################################################
#			STATIC HELPER FUNCTIONS (Internal use only)
#################################################################################
#################################################################################
# NAME: convert_tm_2_sec()
# INPUT: date/time string (format YYYY-MM-DD HH:MM:SS)
# OUTPUT: the number of sec from 1970
# FUNCTION: transform date and time to the equivalent number of sec. No checking
# is performed to assure that the date/time format is correct!
#################################################################################
sub convert_tm_2_sec
{
  my ($date_time_str) = @_;
  my ($date, $time) = split(/\s+/, $date_time_str);
  my @date_array = split(/-/, $date);
  my @time_array = split(/:/, $time);

  # the expected format is (sec, min, hour, mday, mon, year);
  return(timelocal($time_array[2], $time_array[1], $time_array[0],
    $date_array[2], $date_array[1] - 1, $date_array[0] - 1900));
}

################################################################################
#				CLASS METHODS
################################################################################

#####################################################################
# DESCRIPTION : constructor
# RETURN VALUE : 
#####################################################################
sub new
{
    my $class = shift;
    my $requestHash = shift;
    my $incompleteLoadHash = shift;
    my $pageParams = shift;
    my $queryDelimiterRegexp = shift;
    my $parameterizedUrl = shift;
    my $self  = $class->SUPER::new(@_);
    $self->{REQUESTHASH} = $requestHash;
    $self->{INCOMPLETELOADHASH} = $incompleteLoadHash;
    $self->{PAGEPARAMS} = $pageParams;
    $self->{QUERYDELIMITERREGEXP} = $queryDelimiterRegexp;
    $self->{PARAMETERIZEDURL} = $parameterizedUrl;
    $self->{OUT_INCOMPLETE_PAGES_COUNT} = 0;
    $self->{OUT_PAGE_REQUESTS_COUNT} = 0;
    bless ($self, $class);
    return $self;
}



#####################################################################
# DESCRIPTION : add the given entry to the hash.
# RETURN VALUE : 0 on success and 1 on failure.
#####################################################################

sub add_entry
{
  my ($self, $entry, $log_tm) = @_;

  unless(defined($entry) && defined($entry->{submit_tm}) && 
         defined($entry->{visitor_ip}) && defined($entry->{status}))
  {
    $self->{debug}->PrintLog($DEBUG_FATAL, 
        'Attempt to insert invalid entry in Page Hash.');
    return(1);
  }

  $entry->{log_tm} = convert_tm_2_sec($log_tm);

  # key is V:ST:GC from 9.0.4 onwards
  my $key = $entry->{submit_tm}.$data_separation_char.
            $entry->{visitor_ip}.$data_separation_char.
            $entry->{glue_cookie};

  my $data = $entry->{status}.$data_separation_char.
             $entry->{entry_page}.$data_separation_char.
             $entry->{log_tm}.$data_separation_char.
             $entry->{nPages}.$data_separation_char;

  my $i=0;
  my $pageDataRef = $entry->{page_data_hash_ref};
  foreach my $page (keys %{$pageDataRef})
  {
      my $thisPageData = $pageDataRef->{$page};
      # remove tab or new line (defensive programming. CME does not accept uri's with spaces in access log)
      $page =~ s/[$time_separation_char|$page_separation_char]//g; 
      ${thisPageData}->{post_params} =~ s/[$time_separation_char|$page_separation_char]//g;
      if ($i > 0) 
      {
	  $data .= $page_separation_char;
      }
      $data .= $page.$time_separation_char.
               ${thisPageData}->{type}.$time_separation_char.
               ${thisPageData}->{x_time_start}.$time_separation_char.
               ${thisPageData}->{index}.$time_separation_char.
               ${thisPageData}->{referrer_index}.$time_separation_char.
               ${thisPageData}->{post_params};
      my @elem_array = @{$thisPageData->{elem_array}};
      foreach my $elemData (@elem_array)
      {
          # remove tab or new line
	  $elemData->{elem} =~ s/[$time_separation_char|$page_separation_char]//g; 
          $data .= $time_separation_char;
	  $data .= $elemData->{elem}.'^'.$elemData->{xts}.'^'.$elemData->{xte}.'^'.$elemData->{x_cache};
      }
      $i++;
  }
    
  my $retval = $self->insert_value( $key, $data, $self->{max_entry_size}, $self->{debug});
  if ($retval != 0)
  {
      ## delete the entire hash and try to re-insert
      $self->{debug}->PrintLog($DEBUG_LOW, "Insert of [$key,$data] failed. Deleting hash and trying again.");
      if($self->fresh_hash())
      {
	  $self->{debug}->PrintLog($DEBUG_FATAL, "Couldn't open hash afresh.");
	  return 1;
      }
      if($self->insert_value( $key, $data, $self->{max_entry_size}, $self->{debug})) 
      {
	   $self->{debug}->PrintLog($DEBUG_LOW, "Insert of [$key,$data] failed after re-trying. Returning FAILURE");
	   return 1;
      }	  
  }
  return 0;
}

#####################################################################
# DESCRIPTION : deletes the given entry from the hash
#####################################################################

sub delete_entry
{
  my ($self, $entry, $emptyEntry) = @_;
  my $requestHash =  $self->{REQUESTHASH};
  my $incompleteLoadHash = $self->{INCOMPLETELOADHASH};

  if(!$emptyEntry)
  {
    $self->update_gu_hashes($entry, $requestHash, $incompleteLoadHash);
  }

  my $key = $entry->{submit_tm}.$data_separation_char.
            $entry->{visitor_ip}.$data_separation_char.
            $entry->{glue_cookie};
  $self->delete_entry_by_key($key);  
}

################################################################################
# DESCRIPTION : delete page from entry
################################################################################

sub delete_page_from_entry
{
  my ($self, $entry, $page, $isFrame, $reportRequests, $reportIncomplete) = @_;
  my ($requestHash, $incompleteLoadHash);
  
  if ($reportRequests) {
    $requestHash =  $self->{REQUESTHASH};
  }  
  if ($reportIncomplete) {
    $incompleteLoadHash = $self->{INCOMPLETELOADHASH};
  }

  my $pageDataHashRef = $entry->{page_data_hash_ref};
  # add page component requests to request hash before deleting it.
  if ($requestHash)
  {
    if ($isFrame)
    {
      $self->add_frameset_pages_to_request_hash($entry, $requestHash, ($page));
    }
    else
    {	
      $self->add_page_to_request_hash($page, $pageDataHashRef->{$page}->{elem_array}, 
                              $pageDataHashRef->{$page}->{x_time_start},
                              $pageDataHashRef->{$page}->{post_params}, $requestHash);
    }   
  }
  # if incompletely loaded page (and gu enabled), add page to incomplete pages hash.
  if ($incompleteLoadHash)
  {
    if ($isFrame)
    {
      $self->add_frameset_pages_to_incomplete_hash($entry, $incompleteLoadHash, ($page));
    }
    else
    {
      $self->add_page_to_incomplete_hash($entry, $page, 0, $incompleteLoadHash);
    }
  }

  my $page_index = page_index_in_entry($entry, $page);
  delete $entry->{page_data_hash_ref}->{$page};

  if ($isFrame)
  { 
    foreach my $pageInEntry (keys %{$entry->{page_data_hash_ref}})
    {
      if (($entry->{page_data_hash_ref}->{$pageInEntry}->{type} == $FRAME_PAGE) && 
          ($entry->{page_data_hash_ref}->{$pageInEntry}->{referrer_index} == $page_index))
      {
	delete $entry->{page_data_hash_ref}->{$pageInEntry};
      }
    }
  }
  # if no pages left in entry after deleting page, delete the entry.
  if (!get_page_count_in_entry($entry))
  {
    $self->delete_entry($entry, 1);
    return 1;              # return 1 to indicate that entry has been deleted
  }
  return 0;                # return 0 if entry still exists
}

#########################################################################
# DESCRIPTION : checks if an entry exists with the given submit_tm and 
# visitor_ip. undef is returned if the entry does not exist.
#########################################################################

sub get_entry
{
  my ($self, $submit_tm, $visitor_ip,$glue_cookie) = @_;
  my $entry;
  my $key = $submit_tm.$data_separation_char.
            $visitor_ip.$data_separation_char.
            $glue_cookie;

  my $value = $self->get_value($key);
  return($self->get_entry_($submit_tm, $visitor_ip, $glue_cookie, $value));
}

sub get_entry_
{
    my ($self, $submit_tm, $visitor_ip, $glue_cookie, $value) = @_;
    
    my $page_data_str;
    my @page_list;    
    my $entry = undef;

    if($value)
    {
	$entry = $self->new_entry($submit_tm, $visitor_ip,$glue_cookie);
	($entry->{status}, $entry->{entry_page},$entry->{log_tm}, 
            $entry->{nPages}, $page_data_str) = split(/[$data_separation_char]/, $value, 5);
	
	@page_list = split(/$page_separation_char/, $page_data_str);
	
	foreach my $thisPageStr (@page_list)
	{
	    my @thisPageData = split(/$time_separation_char/, $thisPageStr);
	    my @thisPageElems = @thisPageData[6 .. @thisPageData-1];
	    my @elem_array = ();
	    foreach my $thisElemStr (@thisPageElems)
	    {
		#my @thisElemTokens = split(/\^/, $thisElemStr);
                #this takes care of element uri having '^' characters in it.
                my @thisElemTokens = $thisElemStr =~ m/(.+)\^(.*?)\^(.*?)\^(.*?)$/o;		
		my $thisElemData = {
		    elem => $thisElemTokens[0],
		    xts => $thisElemTokens[1],
		    xte => $thisElemTokens[2],
		    x_cache => $thisElemTokens[3]
		};
		push(@elem_array, $thisElemData);
		
	    }
	    $entry->{page_data_hash_ref}->{$thisPageData[0]} = 
	    {
                type => $thisPageData[1],
		x_time_start => $thisPageData[2],
		index => $thisPageData[3],
                referrer_index => $thisPageData[4],
                post_params => $thisPageData[5],
		elem_array => \@elem_array 
	    };
	}
    }
    return $entry;
}


#####################################################################
# DESCRIPTION : cleans up the hash based on a time and a maxmum age
# INPUT :
#   source_hash_file : the full file name of the source hash
#   target_hash_file : the full file name of the target hash
#   current_tm : current time string (format is: YYYY-MM-DD HH:MM:SS)
#   max_age : number in minutes for max age
# RETURN : O on success 1 otherwise
#####################################################################

sub cleanup
{
    my ($self, $current_tm, $max_age) = @_;
    my $current_tm_sec = convert_tm_2_sec($current_tm);

    my $incompleteLoadHash = $self->{INCOMPLETELOADHASH};
    my $requestHash = $self->{REQUESTHASH};

    # multiple max_age by 60 to transform it in seconds
    $max_age *= 60;
    
    my @key_list = $self->get_keys();
    my $key;
    while($key = pop(@key_list))
    { 
        if (!ChronosHash::is_key_valid($key))
        {
            next;
	}
        my $value = $self->get_value($key);
    	my ($submit_tm, $visitor_ip, $glue_cookie) = 
	    split(/$data_separation_char/, $key);
	my $entry = $self->get_entry_($submit_tm, $visitor_ip, $glue_cookie, $value);
	
	if($current_tm_sec - $entry->{log_tm} > $max_age)
	{
	    $self->update_gu_hashes($entry, $requestHash, $incompleteLoadHash);
	    $self->delete_entry_by_key($key);
	}
    }
    return 0;
}

################################################################################
# updates a page already in page hash. reports existing page as incomplete,
# then overwrites with new page
################################################################################

sub update_page_in_entry
{
  my ($self, $entry, $page, $x_time_start, $x_time_end, 
      $x_cache, $referrer, $post_params) = @_;

  my $pageDataHashRef = $entry->{page_data_hash_ref};
  my $incompleteLoadHash = $self->{INCOMPLETELOADHASH};
  my $requestHash = $self->{REQUESTHASH};
  my $elem_page = "";

  return unless(exists($pageDataHashRef->{$page}));

  $elem_page = get_output_page($page, $post_params, $self->{PARAMETERIZEDURL}, 
                               $self->{QUERYDELIMITERREGEXP}, $self->{PAGEPARAMS});

  my $index = $pageDataHashRef->{$page}->{index};
  my $isFrame = ($pageDataHashRef->{$page}->{type} == $FRAMESET_PAGE);
  mark_frame_pages_for_framesets($entry, ($page));
  
  # delete page, report as incomplete and report the component requests.
  $self->delete_page_from_entry($entry, $page, $isFrame, 1, 1);

  my @elem_array = ();
  @elem_array[0] = 
    {
      elem => $elem_page,
      xts => $x_time_start,
      xte => $x_time_end,
      x_cache => $x_cache
    };
  $entry->{page_data_hash_ref}->{$page} = 
    {
      index => $index,
      x_time_start => $x_time_start,
      type => $NON_FRAME_PAGE,
      referrer_index => page_index_in_entry($entry, $referrer),
      post_params => $post_params,
      elem_array => \@elem_array
    };
}

#####################################################################
# DESCRIPTION : Add pages in entry to incomplete loads hash and 
#               page-request mapping to request hash
#               Group frame pages under frameset for both.
#####################################################################

sub update_gu_hashes
{
    my($self, $entry, $requestHash, $incompleteLoadHash) = @_;
    my $pageDataHashRef = $entry->{page_data_hash_ref};

    my @frameset_pages = get_frameset_pages($entry);
    mark_frame_pages_for_framesets($entry, @frameset_pages);

    if ($requestHash)
    {
      $self->add_frameset_pages_to_request_hash($entry, $requestHash, @frameset_pages);
      $self->add_nonframe_pages_to_request_hash($entry, $requestHash);      
    }

    if ($incompleteLoadHash)
    {
      $self->add_frameset_pages_to_incomplete_hash($entry, $incompleteLoadHash, @frameset_pages);
      $self->add_nonframe_pages_to_incomplete_hash($entry, $incompleteLoadHash);
    }
}

################################################################################
# DESCRIPTION : add given frameset pages server timings to incomplete loads hash
################################################################################

sub add_frameset_pages_to_incomplete_hash
{
  my ($self, $entry, $incompleteLoadHash, @frameset_pages) = @_;
  my $i;

  if ($incompleteLoadHash)
  {
    for ($i = 0; $i < @frameset_pages; $i++)
    {
      $self->add_page_to_incomplete_hash($entry, $frameset_pages[$i], 1, $incompleteLoadHash);
    }
  }
}

################################################################################
# DESCRIPTION : add all non-frame pages server timings to incomplete loads hash
################################################################################

sub add_nonframe_pages_to_incomplete_hash
{
  my ($self, $entry, $incompleteLoadHash) = @_;

  if ($incompleteLoadHash)
  {
    foreach my $page (keys %{$entry->{page_data_hash_ref}})
    {
      if ($entry->{page_data_hash_ref}->{$page}->{type} == $NON_FRAME_PAGE)
      {
        $self->add_page_to_incomplete_hash($entry, $page, 0, $incompleteLoadHash);
      }
    }
  }
}


################################################################################
# DESCRIPTION : add a particular page's server timings to the incomplete load hash
################################################################################

sub add_page_to_incomplete_hash
{
  my ($self, $entry, $page, $isFrame, $incompleteLoadHash) = @_;
  my $output_page;
  if ($incompleteLoadHash)
  {
    my ($incompleteServerTime, $incStartTime, $incEndTime) = 
        calculate_server_timings($entry, $page, $isFrame, 0);

    $output_page = get_output_page($page, $entry->{page_data_hash_ref}->{$page}->{post_params},
                $self->{PARAMETERIZEDURL}, $self->{QUERYDELIMITERREGEXP}, $self->{PAGEPARAMS});

    $incompleteLoadHash->add_entry($output_page, $incompleteServerTime, 
        $entry->{page_data_hash_ref}->{$page}->{x_time_start});

    # update total of incomplete pages output
    $self->incr_out_incomplete_pages();
  }
}

################################################################################
# DESCRIPTION : add a particular page's elements to the request hash
################################################################################

sub add_page_to_request_hash
{
  my ($self, $page, $pageElemArrayRef, $timeStampUSecs, $postParams, $requestHash) = @_;
  my $output_page;
  $output_page = get_output_page($page, $postParams, $self->{PARAMETERIZEDURL}, 
                          $self->{QUERYDELIMITERREGEXP}, $self->{PAGEPARAMS});

  $requestHash->add_elements($output_page, $pageElemArrayRef, $timeStampUSecs);
  
  # update total of page requests output 
  $self->incr_out_page_requests(scalar(@{$pageElemArrayRef}));
}


################################################################################
# DESCRIPTION : add component requests of given frameset pages to request hash
################################################################################

sub add_frameset_pages_to_request_hash
{
  my ($self, $entry, $requestHash, @frameset_pages) = @_;
  my $i; 
  my $pageDataHashRef = $entry->{page_data_hash_ref};
  
  if ($requestHash)
  {
    for ($i = 0; $i < @frameset_pages; $i++)
    {
      my $frameset_index = $pageDataHashRef->{$frameset_pages[$i]}->{index};
      foreach my $page (keys %{$pageDataHashRef})
      {
        if ((($pageDataHashRef->{$page}->{type} == $FRAME_PAGE) &&
             ($pageDataHashRef->{$page}->{referrer_index} == $frameset_index)) ||
            ($pageDataHashRef->{$page}->{index} == $frameset_index))
        {
          $self->add_page_to_request_hash($frameset_pages[$i], $pageDataHashRef->{$page}->{elem_array}, 
                                   $pageDataHashRef->{$frameset_pages[$i]}->{x_time_start}, 
                                   $pageDataHashRef->{$frameset_pages[$i]}->{post_params}, $requestHash);
        }
      }
    }
  }
}

################################################################################
# DESCRIPTION : add page-request mappings for all non-frame pages in entry to 
#               request hash
################################################################################

sub add_nonframe_pages_to_request_hash
{
  my ($self, $entry, $requestHash) = @_;
  my $pageDataHashRef = $entry->{page_data_hash_ref};
  
  if ($requestHash)
  {
    foreach my $page (keys %{$entry->{page_data_hash_ref}})
    {
      if ($pageDataHashRef->{$page}->{type} == $NON_FRAME_PAGE)
      {
  	$self->add_page_to_request_hash($page, $pageDataHashRef->{$page}->{elem_array}, 
                                 $pageDataHashRef->{$page}->{x_time_start},
                                 $pageDataHashRef->{$page}->{post_params}, $requestHash);
      }
    }
  }
}

################################################################################
# DESCRIPTION : creates a new entry with the given parameters. See that entries
# can be created in an incomplete fashion, but they can't be added to the page
# hash if incomplete!
################################################################################

sub new_entry
{
  my ($self, $submit_tm, $visitor_ip, $glue_cookie, $page, $x_time_start, 
      $entry_page, $x_time_end, $x_cache, $post_params) = @_;
  my %page_data_hash = ();

  if ($page)
  {
      my $elem_page = "";
      my @page_elem_array = ();

      $elem_page = get_output_page($page, $elem_page, $self->{PARAMETERIZEDURL}, 
                               $self->{QUERYDELIMITERREGEXP}, $self->{PAGEPARAMS});      
      $page_elem_array[0] = 
      {
	  elem => $elem_page,
	  xts => $x_time_start,
	  xte => $x_time_end,
	  x_cache => $x_cache
       };
      $page_data_hash{$page} = 
      {
	  x_time_start => $x_time_start,
	  type => $NON_FRAME_PAGE,
	  index => 0,
          referrer_index => -1,
          post_params => $post_params,          
	  elem_array => \@page_elem_array
     };
  }
  my $entry = 
  {
    submit_tm => $submit_tm,
    visitor_ip => $visitor_ip,
    status => $VALID_STAT,
    glue_cookie => $glue_cookie,
    page_data_hash_ref => \%page_data_hash,
    entry_page => $entry_page,
    nPages => 1
  };

  return($entry);
}

################################################################################
# routine to add new page:x-time-start to page list
################################################################################

sub add_page_to_entry
{
  my ($self, $entry, $page, $x_time_start, $x_time_end, 
      $x_cache, $referrer, $post_params, $current_tm, $max_age) = @_;

  my $current_tm_sec = convert_tm_2_sec($current_tm);
  # multiple max_age by 60 to transform it in seconds
  $max_age *= 60;

  my $elem_page = "";
  my @page_elem_array = ();

  $elem_page = get_output_page($page, "", $self->{PARAMETERIZEDURL}, 
                               $self->{QUERYDELIMITERREGEXP}, $self->{PAGEPARAMS});  
  $page_elem_array[0] = 
  {
      elem => $elem_page,
      xts => $x_time_start,
      xte => $x_time_end,
      x_cache => $x_cache
  };
   
  # if number of pages more than max number of pages in entry or
  # entry older than max life of page, delete entry and report 
  # all pages as incomplete 
  if (($entry->{nPages} > $MAX_NUM_PAGES_IN_ENTRY) || 
      ($current_tm_sec - $entry->{log_tm} > $max_age))
  {
      $self->update_gu_hashes($entry, $self->{REQUESTHASH}, $self->{INCOMPLETELOADHASH});
      $entry->{page_data_hash_ref} = ();
      $entry->{nPages} = 0;
  }

  $entry->{page_data_hash_ref}->{$page} = 
  {
      index => $entry->{nPages},
      x_time_start => $x_time_start,
      type => $NON_FRAME_PAGE,
      referrer_index => page_index_in_entry($entry, $referrer),
      post_params => $post_params,
      elem_array => \@page_elem_array
   };
  $entry->{nPages} ++;
}

################################################################################
# routine to update server timings of entry already in hash
################################################################################

sub update_server_timings
{
  my ($self, $entry, $request_elem, $x_time_start,$ref_page,
      $x_time_end,$x_cache) = @_;
  my $elem_page = "";
  my $pageDataHashRef = $entry->{page_data_hash_ref};

  return unless(exists($pageDataHashRef->{$ref_page}));

  $elem_page = get_output_page($request_elem, "", $self->{PARAMETERIZEDURL}, 
                               $self->{QUERYDELIMITERREGEXP}, $self->{PAGEPARAMS});
  my $new_elem_data = {
      elem => $elem_page,
      xts => $x_time_start,
      xte => $x_time_end,
      x_cache => $x_cache
      };
  my $elem_array = $pageDataHashRef->{$ref_page}->{elem_array};
  push(@{$elem_array},$new_elem_data);
}

################################################################################
# increment output page requests count
################################################################################

sub incr_out_page_requests
{
  my ($self, $num_requests) = @_;
  $self->{OUT_PAGE_REQUESTS_COUNT} = $self->{OUT_PAGE_REQUESTS_COUNT}
                                      + $num_requests;
}

################################################################################
# increment output incomplete pages count
################################################################################

sub incr_out_incomplete_pages
{
  my ($self) = @_;
  $self->{OUT_INCOMPLETE_PAGES_COUNT} = $self->{OUT_INCOMPLETE_PAGES_COUNT} 
                                         + 1;
}

################################################################################
# retrieve output page requests count
################################################################################

sub get_output_page_requests
{
  my ($self) = @_;
  return $self->{OUT_PAGE_REQUESTS_COUNT};
}

################################################################################
# increment output incomplete pages count
################################################################################

sub get_output_incomplete_pages
{
  my ($self) = @_;
  return $self->{OUT_INCOMPLETE_PAGES_COUNT};
}

################################################################################
#				STATIC FUNCTIONS
################################################################################
# routine to calculate server timings of entry already in hash for txn
################################################################################

sub calculate_server_timings
{
  my ($entry, $page, $isFrame, $closing_gif_xts) = @_;
  my $pageDataHashRef = $entry->{page_data_hash_ref};
  
  return (0,0,0) unless(exists($pageDataHashRef->{$page}));

  my $i=0;
  my @page_elems = (); # page elements to include in calculation
  my $tot_server_time=0;
  my @curr_interval;
  my @next_interval;
  my $START_TIME = 0; 
  my $END_TIME = 1;
  
  if ($isFrame)
  {
    my $frameset_index = $pageDataHashRef->{$page}->{index};
    foreach my $pageInEntry (keys %{$pageDataHashRef})
    {
      # include page if marked as frame for which gif received
      # or if it's the frameset page
      if ((($pageDataHashRef->{$pageInEntry}->{type} == $FRAME_PAGE) &&
           ($pageDataHashRef->{$pageInEntry}->{referrer_index} == $frameset_index)) ||
          ($pageDataHashRef->{$pageInEntry}->{index} == $frameset_index))
      {
        my @thisPageElems = @{$pageDataHashRef->{$pageInEntry}->{elem_array}};
        my $thisPageStartTime = $pageDataHashRef->{$pageInEntry}->{x_time_start};
        for ($i = 0; $i < @thisPageElems; $i++)
        {
          # include elements only if xts of element is greater than or equal to 
          # the start time of the page (page elements can only be requested after the page)
          # AND
          # less than or equal to the start time of the closing gif request (page elements
          # cannot be requested after page has been loaded)
	  if (($thisPageElems[$i]->{xts} >= $thisPageStartTime) &&
              (!$closing_gif_xts || ($thisPageElems[$i]->{xts} <= $closing_gif_xts)))
          {
	    push(@page_elems, {xts => $thisPageElems[$i]->{xts}, xte => $thisPageElems[$i]->{xte}});
          }
        }
      }
    }
  }
  else
  {
    my @thisPageElems = @{$pageDataHashRef->{$page}->{elem_array}};
    my $thisPageStartTime = $pageDataHashRef->{$page}->{x_time_start};
    for ($i = 0; $i < @thisPageElems; $i++)
    {
      # include element only if xts of element is greater than or equal to 
      # the start time of the page (page elements can only be requested after the page itself)
      # AND
      # less than or equal to the start time of the closing gif request (page elements
      # cannot be requested after page has been loaded)
      if (($thisPageElems[$i]->{xts} >= $thisPageStartTime) &&
          (!$closing_gif_xts || ($thisPageElems[$i]->{xts} <= $closing_gif_xts)))
      {
        push(@page_elems, {xts => $thisPageElems[$i]->{xts}, xte => $thisPageElems[$i]->{xte}});
      }
    }
  }

  my @page_elems = sort {$a->{xts} <=> $b->{xts}} @page_elems;

  if (@page_elems > 0)
  {
      @curr_interval = ($page_elems[0]->{xts}, $page_elems[0]->{xte});
      for ($i = 1; $i < @page_elems; $i++)
      {
	  @next_interval = ($page_elems[$i]->{xts}, $page_elems[$i]->{xte});

	  if ( $curr_interval[$END_TIME] >= $next_interval[$START_TIME] )
	  {
	      $curr_interval[$END_TIME] = 
                  ($curr_interval[$END_TIME] > $next_interval[$END_TIME]) ?
                  $curr_interval[$END_TIME] : $next_interval[$END_TIME];
	  }
	  else
	  {
	      $tot_server_time += ($curr_interval[$END_TIME] - $curr_interval[$START_TIME]);
	      $curr_interval[$START_TIME] = $next_interval[$START_TIME];
	      $curr_interval[$END_TIME] = $next_interval[$END_TIME];
	  }
      }
      $tot_server_time += ($curr_interval[$END_TIME] - $curr_interval[$START_TIME]);
      return ($tot_server_time, $page_elems[0]->{xts}, $curr_interval[$END_TIME]);
  }
  return (0, 0, 0);
}

################################################################################
# DESCRIPTION : mark page as frame page (after gif request received) 
################################################################################

sub mark_frame_page
{
  my ($entry, $frame_page, $frameset_page) = @_;

  if(exists($entry->{page_data_hash_ref}->{$frame_page}))
  {
      $entry->{page_data_hash_ref}->{$frame_page}->{type} = $FRAME_PAGE;
      if ($frameset_page)
      {
        $entry->{page_data_hash_ref}->{$frame_page}->{referrer_index} = 
            page_index_in_entry($entry, $frameset_page);
      }
  }

}

################################################################################
# DESCRIPTION : mark page as frameset page
################################################################################

sub mark_frameset_page
{
  my ($entry, $page) = @_;

  if(exists($entry->{page_data_hash_ref}->{$page}))
  {
      $entry->{page_data_hash_ref}->{$page}->{type} = $FRAMESET_PAGE;
  }

}

################################################################################
# DESCRIPTION : invalidate the given entry
################################################################################

sub invalidate_entry
{
  my ($entry) = @_;
  $entry->{status} = $INVALID_STAT;
}

################################################################################
# DESCRIPTION : check if the given entry is valid
################################################################################

sub is_valid
{
  my ($entry) = @_;
  if ($entry->{status} == $VALID_STAT)
  {
    return(1);
  }
  return(0);
}

################################################################################
# DESCRIPTION : check if the given page in part of the entry's list of pages
################################################################################

sub is_page_in_entry
{
  my ($entry, $page) = @_;
  return (exists($entry->{page_data_hash_ref}->{$page}));
}

################################################################################
# DESCRIPTION : return entry_page flag 
################################################################################

sub entry_page
{
  my ($entry) = @_;
  return ($entry->{entry_page}) ;
}

################################################################################
# DESCRIPTION: mark hash entry as entry page
################################################################################

sub mark_entry_page
{
  my ($entry) = @_; 
  $entry->{entry_page} = 1;
}

################################################################################
# DESCRIPTION : check if the given page in part of the entry's list of pages
#               and return its x-time-start
################################################################################

sub get_time_start
{
  my ($entry, $page) = @_;

  if(exists($entry->{page_data_hash_ref}->{$page}))
  {
      return $entry->{page_data_hash_ref}->{$page}->{x_time_start};
  }
  return(0);
}

################################################################################
# DESCRIPTION : check if the given page in entry's list of pages and
#               return index else return -1
################################################################################

sub page_index_in_entry
{
  my ($entry, $page) = @_;


  if(exists($entry->{page_data_hash_ref}->{$page}))
  {
      return $entry->{page_data_hash_ref}->{$page}->{index};
  }
  return(-1);
}

################################################################################
# DESCRIPTION : get number of frames in entry
################################################################################

sub get_frame_count_in_entry
{
  my ($entry, $frameset_page) = @_;
  my $numFrames = 0;
  my $frameset_page_index = page_index_in_entry($entry, $frameset_page);

  foreach my $page (keys %{$entry->{page_data_hash_ref}})
  {
    if (($entry->{page_data_hash_ref}->{$page}->{type} == $FRAME_PAGE) &&
        ($entry->{page_data_hash_ref}->{$page}->{referrer_index} == $frameset_page_index))
    {
      $numFrames++;
    }
  }
  return $numFrames;
}

################################################################################
# DESCRIPTION : get list of frameset pages in entry
################################################################################

sub get_frameset_pages
{
  my ($entry) = @_;
  my @frameset_pages = ();

  foreach my $page (keys %{$entry->{page_data_hash_ref}})
  {
    if ($entry->{page_data_hash_ref}->{$page}->{type} == $FRAMESET_PAGE)
    {
      push (@frameset_pages, $page);
    }
  }
  return @frameset_pages;
}

################################################################################
# DESCRIPTION : mark all pages in entry that belong to one of the frameset 
#               pages in the list as frame pages
################################################################################

sub mark_frame_pages_for_framesets
{
  my ($entry, @frameset_pages) = @_;

  if (@frameset_pages != 0)
  {
    foreach my $page (keys %{$entry->{page_data_hash_ref}})
    {
      if (is_page_in_framesets($entry, $page, @frameset_pages))
      {
        mark_frame_page($entry, $page);
      }
    }
  }
}


################################################################################
# DESCRIPTION : if parameterized url turned on, get output page from page uri 
#               (including any query parameters) and any post parameters from  
#               PageParams Object (which has list of selected parameters of interest).
################################################################################

sub get_output_page
{
  my ($page, $postParams, $parameterizedUrl, 
      $queryDelimiterRegexp, $pageParamsObjRef) = @_;
  my $pageWithoutQuery = "";
  my $queryStr = "";
 
  ($pageWithoutQuery, $queryStr) = CleanupRequest($page, $queryDelimiterRegexp);

  if ($parameterizedUrl)   # if parameterization is on, append parameters of interest
  {
    $page = $pageParamsObjRef->getOutputPage($queryStr, $pageWithoutQuery, $postParams);
  }
  else                     # else use page url without any parameters
  {
    $page = $pageWithoutQuery;
  } 
  return $page;
}


################################################################################
# DESCRIPTION : get number of pages in entry
################################################################################

sub get_page_count_in_entry
{
  my ($entry) = @_;
  return (keys %{$entry->{page_data_hash_ref}});
}

################################################################################
# DESCRIPTION : get post parameters for page in entry
################################################################################

sub get_page_post_params_in_entry
{
  my ($entry, $page) = @_;
  
  if(exists($entry->{page_data_hash_ref}->{$page}))
  {
      return $entry->{page_data_hash_ref}->{$page}->{post_params};
  }
  return "";
}

################################################################################
# DESCRIPTION : check if the given page belongs to one of the frameset pages
#               in the list
################################################################################

sub is_page_in_framesets
{
  my ($entry, $page, @frameset_pages) = @_;

  for (my $i=0; $i < @frameset_pages; $i++)
  {
    if($entry->{page_data_hash_ref}->{$frameset_pages[$i]}->{index} == 
       $entry->{page_data_hash_ref}->{$page}->{referrer_index})
    {
      return 1;
    }
  }
  return 0;
}

######################################################################################## 
#     CleanupRequest: Clean up page url so as to rid url of params etc.
######################################################################################## 

sub CleanupRequest
{
  my ($requestVal,$queryDelimiterRegexp) = @_;
  my $requestValTemp;
  my $queryStr="";

  # Remove all parameters followed by ? from request field
  if ((($requestValTemp,$queryStr)=($requestVal =~ m/^(\S+?)$queryDelimiterRegexp(\S+)/o)))
  {
      $requestVal=$requestValTemp;
  }
  # return requestVal after cleanup 
  return ($requestVal, $queryStr);
}

################################################################################
# DESCRIPTION : This is a debugging helper function that is used to trace an 
# entry's contents. The entry contents are dumped in the trace file as pointed 
# by the debug object passed in. In case no debug object is passed in, the 
# contents are printed to stdout.
################################################################################

sub print_entry_to_trace_file
{
  my ($entry, $debug) = @_;

  if(defined($debug))
  {
    $debug->PrintTrace('  Begin entry');
    $debug->PrintTrace('  submit time: ' . $entry->{submit_tm});
    $debug->PrintTrace('  visitor ip: ' . $entry->{visitor_ip});
    $debug->PrintTrace('  glue cookie: ' . $entry->{glue_cookie});
    $debug->PrintTrace('  entry_page: ' . $entry->{entry_page});
    $debug->PrintTrace('  status: ' . $entry->{status});
    $debug->PrintTrace('  log_tm: ' . $entry->{log_tm});
    $debug->PrintTrace('  Begin page');
    for(my $i=0; $i < @{$entry->{page_list_ref}}; $i++)
    {
      $debug->PrintTrace('  page: ' . $entry->{page_list_ref}->[$i]);
    }
    $debug->PrintTrace('  End page');
    $debug->PrintTrace('  End entry');
  }
  else
  {
    print("Begin entry\n");
    print('  submit time: ' . $entry->{submit_tm} . "\n");
    print('  visitor ip: ' . $entry->{visitor_ip} . "\n");
    print('  glue cookie: ' . $entry->{glue_cookie});
    print('  entry_page: ' . $entry->{entry_page});
    print('  status: ' . $entry->{status} . "\n");
    print('  log_tm: ' . $entry->{log_tm} . "\n");
    print("  Begin page\n");
    for(my $i=0; $i < @{$entry->{page_list_ref}}; $i++)
    {
      print('    page: ' . $entry->{page_list_ref}->[$i] . "\n");
    }
    print("  End page\n");
    print("End entry\n");
  }
}

################################################################################
# DESCRIPTION : This is a debugging/testing helper function 
################################################################################

sub dump_all_entries
{
  my ($self, $dump_raw, $debug) = @_;

  if(! defined($dump_raw))
  {
    $dump_raw = 1;
  }

  my $entry = {};
  if($dump_raw)
  {
    if(defined($debug))
    {
      while(my ($key, $value) = $self->each_key_value())
      {
        $debug->PrintTrace("key: $key ; value: $value\n");
      }
    }
    else
    {
      while(my ($key, $value) = $self->each_key_value())
      {
        print("key: $key ; value: $value\n");
      }
    }
  }
  else
  {
    while(my ($key, $value) = $self->each_key_value())
    {
      ($entry->{submit_time}, $entry->{visitor_ip},$entry->{glue_cookie}) = split(/$data_separation_char/, $key);

      $entry = $self->get_entry($entry->{submit_time}, $entry->{visitor_ip},$entry->{glue_cookie});

      print_entry_to_trace_file($entry, $debug);
    }
  }
}

# So the require or use succeeds
1;

