#############################################################################
# $Header: chronos_mining.pl 19-sep-2005.17:54:30 adosani Exp $
# Copyright (c) 2001, 2005, Oracle. All rights reserved.  
#
# Author      : Manu Shukla
# Description : This script mines log files to generate chronos
#               latency data.
#
#  MODIFIED    (MM/DD/YY)
#     adosani   09/19/05 - bug 4582772 - set dbtime to 0 if 10.2beta oms, else 
#                          null 
#     adosani   08/31/05 - fix bug 4569542 -- truncate browser version to 16 
#                          characters if longer 
#     adosani   08/27/05 - add oms version input for compatibility 
#     adosani   08/19/05 - fix bug 4564334 - change strftime format string
#                          to make it more standard (work on NT).
#     adosani   06/28/05 - fix bug 4441067 - recognize invalid apache log 
#                          lines 
#     adosani   06/13/05 - bug 4429330 - mark as entry page instead of 
#                          invalidating entry when page referred from a page 
#                          on another server 
#     adosani   06/13/05 - bug 4429326 - when more than 1 page in entry, 
#                          delete only completed page (after a gif request) 
#                          instead of whole entry 
#     adosani   03/31/05 - add more chronos run metrics 
#     adosani   05/26/05 - fix bug 4394136 - visitor domain not output 
#                          correctly in some cases 
#     adosani   05/26/05 - fix bug 4391481 - update marker offset even if no 
#                          valid lines in run 
#     adosani   05/25/05 - change max age of page to 10 min from 30 min 
#     adosani   04/25/05 - ignore beacon requests
#     adosani   03/30/05 - handle uri's with page fragments 
#     adosani   03/07/05 - use complete uri (with any query strings) when 
#                        - reading/writing to/from page hash to avoid losing pages
#     adosani   02/17/05 - group frames together for incomplete loads
#     adosani   02/14/05 - fix response/server time calculation after 
#                          interrupted page loads (clickaheads/backs/refreshes/etc)
#     adosani   01/28/05 - add rotation of log and trace files 
#     snakhoda  01/26/05 - fix bugs 4143912 and 4145284 
#     adosani   01/06/05 - add option for (not) archiving processed log files 
#     snakhoda  12/14/04 - 
#     eporter   12/02/04 - bug 3849061: add visitor_ip_num column 
#     adosani   11/30/04 - fix reg exp for unknown header field case 
#     adosani   10/20/04 - fix run metric output, set warnloglines to 500 
#                          when maxloglines <= 0
#     adosani   09/26/04 - bug 3898184 - fix user agent parsing 
#     adosani   09/22/04 - bug 2447655 - fix webcache target name handling to 
#                          take care of problematic characters such as '/' 
#     snakhoda  09/01/04 - add gmtoffset to marker 
#     snakhoda  08/31/04 - use GMT-Offset when present to offset timestamp 
#     eporter   07/29/04 - making it work with apache, not just calypso
#     snakhoda  08/18/04 - Grand Unification: implement request breakdown, 
#                          etc. 
#     snakhoda  06/17/04 - Make CME work with Apache logs for Forms SDK 
#     snakhoda  05/12/04 - bug 3617911 - fix marker offset calculation to not 
#                          use string length
#     snakhoda  04/21/04 - move deleteHash to ChronosDbmTypes.pm
#     snakhoda  01/30/04 - bug 3374505 - fix regex to find file to archive
#     snakhoda  12/19/03 - post parameter logging - handle marker regexes
#     snakhoda  12/08/03 - add code to handle post parameter logging 
#     snakhoda  11/16/03 - bug 3254271 - fix args to localtime 
#     snakhoda  11/11/03 - bug 3236955 - write "fields" field into marker file
#                          after reading dirty marker
#     snakhoda  11/10/03 - improve 3217842:remove defaultDbm; allow dbmmodule,
#                          dbmmaxdatasize,dbmexts as command line params
#     snakhoda  11/06/03 - bug 3236776 - do sdbm hash copy one key at a time 
#     snakhoda  10/29/03 - bug 3217842: osd changes for non-Solaris 
#     snakhoda  10/28/03 - do not include parameters in URL_FILENAME for Sdk
#     snakhoda  10/24/03 - disable latency hash check for Sdk requests 
#     snakhoda  10/23/03 - add support for Chronos API for Forms 
#     snakhoda  10/19/03 - add run metric columns 
#     snakhoda  10/16/03 - archive files only when they have been completely
#                          parsed (i.e. don't archive when interrupted by 
#                          maxloglines)
#     snakhoda  10/16/03 - produce warning when progress is not made after
#                          traversing <warnloglines> lines.
#     snakhoda  10/14/03 - bug 3178092 - soften error message when lock not
#                          acquired 
#     snakhoda  10/09/03 - bug 3176672 - fix regexp for cs(Host)
#     snakhoda  10/01/03 - bug 3133488 - update marker data after every log 
#     snakhoda  09/29/03 - use substring match for urls for pageidparams 
#     snakhoda  09/24/03 - bug 3093525 - fix parameterized url support 
#     snakhoda  09/12/03 - make hash copying/moving dbm-library-independent
#     snakhoda  09/04/03 - bug 3129615 - recognize 304s as page requests 
#     snakhoda  08/26/03 - add usage information for -priority 
#     snakhoda  08/21/03 - allow CME priority to be set from command line 
#     snakhoda  08/28/03 - add usage info and default max log lines 
#     snakhoda  08/24/03 - add support for limit on number of lines processed 
#     snakhoda  08/18/03 - Bug 3103887: copy hash files by dumping 
#                          in-memory hash table 
#     mashukla  06/27/03 - remove csv support
#     mashukla  06/18/03 - chronos_lite_changes
#     mashukla  06/06/03 - change user agent format
#     mashukla  05/19/03 - nslookup perf changes
#     mashukla  05/02/03 - rotation log
#     mashukla  04/21/03 - fix err msgs
#     mashukla  04/16/03 - fix err msgs
#     mashukla  04/02/03 - add offset
#     mashukla  03/31/03 - more skip hdr changes
#     mashukla  03/28/03 - skip headers
#     mashukla  02/14/03 - make server time cols 0
#     mashukla  02/12/03 - test pre reg changes
#     mashukla  02/04/03 - remove origin server trips 
#     mashukla  11/26/02 - use time-taken instead of x-time-end
#     mashukla  11/19/02 - cleanup page element processing
#     mashukla  11/19/02 - add origin server trips
#     mashukla  11/13/02 - check server timing errors
#     mashukla  11/07/02 - add server timings
#     mashukla  11/04/02 - server side numbers
#     mashukla  11/22/02 - extend emdchratrep fix
#     mashukla  11/20/02 - add check for EmChartBean type scenario
#     mashukla  10/31/02 - 902 adjustments
#     mashukla  10/16/02 - fix post review 
#     mashukla  10/15/02 - cleanup regexp
#     mashukla  10/15/02 - fix dynamic regexp
#     mashukla  10/11/02 - fix dynamic regexp return val strs
#     mashukla  10/28/02 - more entrypg chagnges
#     mashukla  10/28/02 - changes for validating pages after update
#     mashukla  10/24/02 - fix hashing mech
#     mashukla  10/23/02 - entry deletion after output
#     mashukla  10/20/02 - more entrypg changes
#     mashukla  10/17/02 - change entry page also
#     mashukla  10/02/02 - implement entry page solution
#     mashukla  09/24/02 - fix time vals
#     mashukla  09/19/02 - fix param structs
#     mashukla  09/15/02 - clean vals
#     mashukla  09/12/02 - clean params
#     mashukla  09/20/02 - grabtrans 'mashukla_run_metric'
#     mashukla  09/03/02 - add fix for time-taken field
#     mashukla  09/03/02 - modify urls
#     mashukla  09/03/02 - add parameterized url handling
#     mashukla  09/19/02 - fix last and first log times
#     mashukla  09/03/02 - add fix for time-taken field
#     mashukla  09/13/02 - fix run metric nums
#     mashukla  09/12/02 - add new mining fields
#     mashukla  09/11/02 - fix run metric
#     mashukla  09/10/02 - create run metric file
#     mashukla  08/14/02 - add header fields parsing
#     mashukla  07/10/02 - add referrer - processing
#     mashukla  07/07/02 - add lk var
#     asawant   02/23/02 - Adding fixes for the prevention of rotated log skip
#     asawant   02/21/02 - Adding flag to ingnore old files on first run.
#     asawant   02/21/02 - comparison of referrer webserver and ST cookie's webserver.
#     mashukla  02/22/02 - add active file check post sync.
#     mashukla  02/20/02 - prevent skipping of rotated log.
#     mashukla  02/19/02 - add discreet file changes.
#     mashukla  01/04/02 - fix page extension comparing.
#     asawant   01/04/02 - Addgin target_guid parameter..
#     mashukla  12/24/01 - dq ref fix.
#     mashukla  12/24/01 - dq algo changes.
#     mashukla  12/20/01 - changes for dq algo fix.
#     asawant   12/07/01 - Clean up code.
#     mashukla  12/03/01 - change date checking.
#     asawant   11/20/01 - Adding new algorithm to improve data Quality.
#     asawant   11/19/01 - Indenting comments..
#     asawant   10/31/01 - CME must exit when no input files are found (bug # 2090266).
#     njagathe  11/07/01 - Adding extra params to ChronosTableOut::new() call.
#     njagathe  11/05/01 - EMD functionality changes (bug # 1980893).
#     jmarfati  10/29/01 - fix -ve latency bug for GIT.
#     jmarfati  10/21/01 - change name in gif request
#     jmarfati  10/19/01 - changing request format for oracle_smp_chronos.gif
#     rpinnama  10/18/01 - Backing off (partially) the timezone changes.
#     asawant   10/08/01 - Fixing bug 2009494_and_2038459
#     mashukla  10/07/01 - fix log file iteration issue in PreParseLogFiles
#     mashukla  09/20/01 - changes to fix megre issues in logging
#     mashukla  09/17/01 - changes to print failures in log file
#     mashukla  09/11/01 - changes to return error codes to emd
#     asawant   09/10/01 - Fixing composite names.
#     asawant   09/09/01 - Disabling ck idx generation when ck output is off
#     jmarfati  09/05/01 - remove extra print statement
#     asawant   08/31/01 - Adding timezone parameter.
#     asawant   08/27/01 - Debug default reported value is inconsistent with actual
#     mashukla  08/27/01 - change to accept only blank space between request 
#     mashukla  08/19/01 - add new ChronosLogging object
#     mashukla  08/13/01 - add multi level logging
#     mashukla  08/09/01 - remove ';' and trailing characters from refererVal
#     mashukla  08/09/01 - changes to accept '-' in referer and cookie fields 
#     asawant   08/06/01 - Adding aging and retry policy to nslookup.
#     mashukla  07/26/01 - accept both http and https as valid requests
#     mashukla  07/23/01 - change default marker gif name and marker date format.
#     jmarfati  07/09/01 - change cookie names
#     mashukla  07/09/01 - changes for first implementation of frames algo
#     asawant   06/27/01 - Emd integration.
#     asawant   06/22/01 - Adding filters.
#     asawant   06/18/01 - Adding capability for multiple output formats.
#     asawant   06/08/01 - Adding nslookup timeout option.
#     mashukla  06/08/01 - changes in DoRecovery to delete latency hash files 
#     mashukla  06/06/01 - fix cookie regexps merge issues
#     mashukla  06/05/01 - changes in parse user agent
#     mashukla  06/05/01 - changes in submit and load_time extract regexps
#     mashukla  06/01/01 - changes for turning  cookie output on/off 
#     jmarfati  05/25/01 - moving from source safe.
# 
#
##################################################################################
use strict;
use Time::Local;
use Fcntl ':flock';
use ChronosDbmTypes qw(@dbmTypes);
use Getopt::Long;                      # set up to accept user input
use File::Copy;
use Config;
use ChronosFilter;
use ChronosLogging;
use ChronosTableOut;
use ChronosHash;
use ChronosPageHash;
use ChronosPageParams;
use ChronosRequestsHash;
use ChronosIncompleteLoadHash;
use POSIX;
#Do not do a 'use' on any module that in turn does a 'use' on a dbm
#module (NDBM_File, SDBM_File, etc). Do a require on those modules
#after a dbmtype has been determined and/or a dbmmodule has been
#loaded (below).

################################################################################# 
# Main program.
#################################################################################

# Constant definitions
my $SUCCESS = 10;
my $WARNING = 20;
my $FAILURE = 30;
my $FAILURE_WITH_MESSAGE = 35;
my $CATASTROPHIC_FAILURE = 40;

# status definitions from lower level subroutines
my $STATUS_WARN=2;
my $STATUS_SUCCESS=1;
my $STATUS_FAIL=0;
my $STATUS_ERROR=-1;

# same pathseperator for both nt and unix
my $pathSeperator="/";
my $debug = new ChronosLogging();

# status definitions for nslookups
my $LKUP_UNDEF=0;
my $LKUP_RES=1;
my $LKUP_UNRES=2;

# the extension used for the xml files when done
my $XML_SUCCESS_EXT='.suc';
# the flag possible states
my $FLAG_DIRTY = 1;
my $FLAG_CLEAN = 0;

my $POST_PARAM_LOG_LIMIT = 8160;

## Sdk Metric Parameter names
my $SDK_METRIC_TOT_TIME = "tot_t";
my $SDK_METRIC_SRV_TIME = "srv_t";
my $SDK_METRIC_DB_TIME = "db_t";
my $SDK_METRIC_TIMESTAMP = "tstamp";

# default oms version (if not specified by user)
my $OMS_102000 = "102000";

# create a unique run number (this is unique for every run)
my $run_id = time();

my $stages = BuildMainPipeLine();
my %ctx = ();
my $returnVal = $SUCCESS;

# the juice, execute top level main pipeline
# other pipeline is line pipeline
$returnVal = ExecutePipeLine($stages,\%ctx) ;

if (($returnVal == $FAILURE) || ($returnVal == $FAILURE_WITH_MESSAGE))
{
    DoCleanup(\%ctx);
} 

if (($returnVal == $CATASTROPHIC_FAILURE) || ($returnVal == $FAILURE_WITH_MESSAGE))
{
    exit 1;
}

# exit with success
exit 0;


########################## END of MAIN #####################################
############################################################################# 
# NAME: GetGMTLocalOffset()
# FUNCTION: Get the offset in seconds of the local time zone from GMT
# e.g. for PDT (during daylight savings), this should be -7*3600
############################################################################# 
sub GetGMTLocalOffset()
{
   my $timeNow = time();
   my $gmtOffsetSecs = (timegm(localtime($timeNow)) - $timeNow);   
}



############################################################################# 
# NAME: GetCurrentTime()
# OUTPUT: Current/Input paramerter Date/Time returned as MM/DD/YYYY HH:MM:SS
# FUNCTION: Get the current (or input parameter) date and time as a string
############################################################################# 
sub GetCurrentTime
{
  my ($my_time) = @_;
  unless(defined($my_time))
  {
    $my_time = time;
  }
  my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = 
    localtime($my_time);
  $mon++;
  $year += 1900;
  if ($mon < 10) {$mon = "0".$mon;}
  if ($mday < 10) {$mday = "0".$mday;}
  if ($hour < 10) {$hour = "0".$hour;}
  if ($min < 10) {$min = "0".$min;}
  if ($sec < 10) {$sec = "0".$sec;}
  return("$hour:$min:$sec $year-$mon-$mday");
}

############################################################################# 
#  Execute Pipeline
#
#  Executes each of the stages in the pipeline. 
#  Each stage is expected to return either of the following 
#  return values.
#     CATASTROPHIC_FAILURE  : There is a severe error and all the pipeline 
#           will exit and program will terminate after performing cleanup.
#     FAILURE  : There is a severe error and the pipeline will exit.
#     WARNING: The stage was not able to do a perfect job. But the 
#              pipeline can continue.
#     SUCCESS: returns when everything goes through as expected.
#
#  IN : 
#      pipeline : A list of stages
#      context  : A context that will be passed to each stage of the 
#                 pipeline.
#  OUT: 
#      SUCCESS : succcess.
#      FAILURE   : one of the stages in the pipeline failed.
########################################################################## 

sub ExecutePipeLine
{
  # $pipeline : reference to list of stages.
  # $context  : reference to a hashtable.
  my ($pipeline,$context) = @_; 
  my $i=0;
  my $stageRetVal = $SUCCESS;

  for ($i=0; $i < @$pipeline; $i++)
  {
    my $stageRef;
    my $stageName;
    my $stageFunctionPtr;

    $stageRef = $pipeline->[$i];
    $stageName = $stageRef->{"NAME"};
    $stageFunctionPtr = $stageRef->{"FNCPTR"};

    # put stages in pipeline
    $stageRetVal = &$stageFunctionPtr($context);
    if ( $stageRetVal == $FAILURE)
    {
       $debug->PrintLog($DEBUG_HIGH,"Error in Stage $stageName: ");
       $debug->PrintLog($DEBUG_HIGH,$context->{"FILEVALUES"}->{"errorVal"});
       return $FAILURE;
    }
    if ( $stageRetVal == $FAILURE_WITH_MESSAGE)
    {
       $debug->PrintLog($DEBUG_HIGH,"Error in Stage $stageName: ");
       $debug->PrintLog($DEBUG_HIGH,$context->{"FILEVALUES"}->{"errorVal"},1);
       return $FAILURE_WITH_MESSAGE;
    }
    if ( $stageRetVal == $CATASTROPHIC_FAILURE )
    {
       # Error written to stderr should consist of only one line in order to
       # show up on the console.
       $debug->PrintLog($DEBUG_FATAL,"Fatal Error in Stage $stageName: " . $context->{"FILEVALUES"}->{"errorVal"});
       DoCleanup($context);
       
       return $CATASTROPHIC_FAILURE;
    }
    if ( $stageRetVal == $WARNING )
    {
       $debug->PrintLog($DEBUG_HIGH,"Warning in Stage $stageName: ".
                $context->{"FILEVALUES"}->{"errorVal"});
    }
  }
  return $SUCCESS;
}

########################################################################### 
# ProcessCommandLineInput : Processes command line parameters.
#                      Note that $workingDir, $privateDir and 
#                      $destinationDir are furthur qualified 
#                       by $targetName
# Context in: -
# Context out:
#                     $CommandLineInput.inFileName
#                     $CommandLineInput.outFileName
#                     $CommandLineInput.inDirName
#                     $CommandLineInput.workingDirName
#                     $CommandLineInput.privateDirName
#                     $CommandLineInput.targetName
#                     $CommandLineInput.targetType
#                     $CommandLineInput.resolve
#                     $CommandLineInput.hostLookupTimeOut
#                     $CommandLineInput.apacheSessionId
#                     $CommandLineInput.jservSessionId
#                     $CommandLineInput.gifFileName
#                     $CommandLineInput.debug
#                     $CommandLineInput.outfiletype
#                     $CommandLineInput.chronosRoot
#                     $CommandLineInput.emdRoot
#
###################################################################### 

sub ProcessCommandLineInput
{
  my ($ctxRef) = @_;
  my $markerFileSufix="_LogFileMarker";
  my $errorFileSufix="_chronos.trace";
  my $filterFileSufix="_filter.txt";
  my $lockFileSufix=".lk";
  my $latencyFileSufix="_Latency";
  my $hostListFileSufix="_hostList";
  my $pageHashFileSufix="_pageHash";
  my $reqHashFileSuffix = "_requests";
  my $gifFile = "oracle_smp_chronos.gif";
  my $sdkGifFile = "oracle_smp_chronos_sdk.gif";
  my %CommandLineInput;
  my $error="";
  my $status=$STATUS_SUCCESS;
  my $pageExtensions="";
  my $targetType_Name;
  my $chronosLogName="apmeum.log";
  my $sysFileDir="sysman";
  my $chronosLogDir="log";
  my $pxIndex="";
  my @static_fields;

  # a URI with any of these extensions is a page
  # NOTE: These strings  must be lower case !!!!
  # we use this table like a set with "if ( defined ..."
  # NOTE: there are other ways a hit can  be a page

  my %defaultPageNameExtensions = ( "htm"   => 1,
                                    "html"  => 1,
                                    "txt"   => 1,
                                    "jhtml" => 1,
                                    "shtml" => 1,
                                    "jsp"   => 1,
                                    "asp"   => 1 );

  # Hash for all fields in log line we are intersted in and associate formats with
  my %FieldHash = ( "c-ip"            => '(\S+)', 
                    "c-dns"           => '(\S+)', 
                    "s-ip"            => '(\S+)', 
                    "c-auth-id"       => '(\S+)', 
                    "date"            => '(\S+)',
                    "time"            => '(\S+)', 
                    "cs-method"       => '(\S+)', 
                    "cs-uri"          => '(\S+)', 
                    "sc-status"       => '(\d+)',
                    "c-dns"           => '(\S+)', 
                    "c-auth-id"       => '(\S+)', 
                    "x-auth-id"       => '(\S+)', 
                    "x-date-start"    => '(\S+)', 
                    "x-time-start"    => '(\S+)',
                    "cs(Cookie)"      => '(\".+?\"|\-)',
                    "cs(Host)"        => '(\".+?\"|\-)',
                    "cs(Referer)"     => '(\".+?\"|\-)',
                    "bytes"           => '(\S+)', 
                    "cs-bytes"        => '(\S+)', 
                    "x-req-type"      => '(\S+)',
                    "x-protocol"      => '(\S+)',
                    "x-ecid"          => '(\S+)', 
                    "x-esi-info"      => '(\S+)',
                    "x-time-delay"    => '(\S+)', 
                    "x-time-end"      => '(\S+)', 
                    "x-date-end"      => '(\S+)', 
                    "x-cache"         => '(\S+)',
                    "time-taken"      => '(\S+)',
                    "r-time-taken"    => '(\S+)',
                    "us-time-taken"   => '(\d+)',
                    "x-os-timeout"    => '(\S+)', 
                    "x-cookie(ORACLE_SMP_CHRONOS_ST)"  => '(\S+)', 
                    "x-cookie(ORACLE_SMP_CHRONOS_LT)"  => '(\S+)', 
                    "x-cookie(ORACLE_SMP_CHRONOS_GL)"  => '(\S+)', 
                    "cs(User-Agent)"  => '(\".+\"|\-)',
                    "cs(Content-Length)" => '(\".+?\"|\-)',
                    "x-post-params" => '(\".+?\"|\-)'
                   );
  

  # put first command line parameter in variable progname
  my $progname = $0;

  # this is the first task of the script, so take a time stamp marking the
  # begining of run  
  $CommandLineInput{"startTime"}=$run_id;
  $CommandLineInput{"startTimeStamp"} = GetCurrentTime($run_id);

  # Keep arguments in a new var as GetOptions() will destroy @ARGV
  my @arguments = @ARGV;
 
  my %cmdLine = ();

  # This routine from Getopt package gets all the command line parameters
  GetOptions (\%cmdLine,
              "infile=s",
              "indir=s",
              "targetname=s",
              "targettype=s",
              "targetguid=s",
              "pageext=s",
              "outfile=s",
              "outfiletype=s",
              "trace!",
              "apchsess=s",
              "jservsess=s",
	      "apachelogformat=s",
              "giffile=s",
              "cookieoutput!",
              "resolve!",
              "usegmtoffset!",
              "timeout=i",
              "debug=i",
              "unresolvedage=i",
              "resolvedage=i",
              "maxundeftime=i",
              "timezone=s",
              "emdURL=s",
              "chronosroot=s",
              "advdtfilter!",
              "-advdtfilterage=i",
              "gu!",
              "cacheentries!",
              "procoldfiles!",
              "emdroot=s",
              "priority=i",
              "nosetpriority",
              "maxloglines=i",
              "warnloglines=i",
              "dbmtype=s",
              "dbmmaxdatasize=i",
              "dbmmodule=s",
              "dbmexts=s",
              "querydelimiters=s",
              "qparamdelimiters=s",
              "pageidparams=s",
	      "maxlogfilesizekb=i",
	      "numlogfiles=i",
	      "numtracefiles=i",
	      "archive!",
              "omsversion=s");
 

  # infile is required else print usage and exit
  if (!$cmdLine{"infile"}) 
  { 
     Usage($progname, 'Missing infile') ;
     return $CATASTROPHIC_FAILURE;
  } else {
     $CommandLineInput{"inFileName"}  = $cmdLine{"infile"};
  }

  # check remaining args
  if ( $cmdLine{"indir"} ) 
  {
    $CommandLineInput{"inDirName"} = $cmdLine{"indir"};
  } else {
    Usage($progname, 'Missing indir') ;
    return $CATASTROPHIC_FAILURE;
  }

  # put path to infile and put result in inputFileName
  $CommandLineInput{"inputFileName"} = $CommandLineInput{"inDirName"}.
     $pathSeperator.$CommandLineInput{"inFileName"};

  # accept target name 
  if ( $cmdLine{"targetname"} )
  {
    $CommandLineInput{"targetName"} = $cmdLine{"targetname"};
  } else {
    Usage($progname, 'Missing targetname') ;
    return $CATASTROPHIC_FAILURE;
  }

  # accept target type 
  if ( $cmdLine{"targettype"} )
  {
    $CommandLineInput{"targetType"} = $cmdLine{"targettype"};
  } else {
    Usage($progname, 'Missing targettype') ;
    return $CATASTROPHIC_FAILURE;
  }

  # accept target guid
  if ( $cmdLine{"targetguid"} )
  {
    $CommandLineInput{"targetGUID"} = $cmdLine{"targetguid"};
  } else {
    Usage($progname, 'Missing targetguid') ;
    return $CATASTROPHIC_FAILURE;
  }

  # chronos root directory
  if ( $cmdLine{"chronosroot"} )
  {
    $CommandLineInput{"chronosRoot"} = $cmdLine{"chronosroot"};
  } else {
    Usage($progname, 'Missing chronosroot') ;
    return $CATASTROPHIC_FAILURE;
  }

  # emd root directory
  if ( $cmdLine{"emdroot"} )
  {
    $CommandLineInput{"emdRoot"} = $cmdLine{"emdroot"};
  } else {
    Usage($progname, 'Missing emdroot') ;
    return $CATASTROPHIC_FAILURE;
  }

  # OMS Version. Default: 10.2
  if(exists $cmdLine{'omsversion'})
  {
    $CommandLineInput{'omsversion'} = $cmdLine{'omsversion'};
    $CommandLineInput{'omsversion'} =~ 
        s/[.]//g;    # converts 10.2.0.0.0 => 102000 or 10.1.0.3.0 => 101030

    # if oms version is not 10.2, -nogu option will be used 
    # to suppress the grand unification output. this is done
    # in the grand unification option setting part below. 
  }
  else
  {
    $CommandLineInput{'omsversion'} = $OMS_102000;
  }

  my $queryDelimiters = $cmdLine{"querydelimiters"} ? 
                        $cmdLine{"querydelimiters"} : '?$\';';


  $CommandLineInput{"queryDelimiterRegexp"} = ChronosPageParams::getDelimiterRegexp($queryDelimiters);

  
  my $qParamDelimiters = $cmdLine{"qparamdelimiters"} ? 
      $cmdLine{"qparamdelimiters"} : '?$&';

  $CommandLineInput{"qParamDelimiterRegexp"} = ChronosPageParams::getDelimiterRegexp($qParamDelimiters);
  #
  # url params
  # they can appear in two flavors
  # either with a url or without
  # if without url they are applied to all urls
  # if with they ones that has the same staring prefix for uris
  # currently they are + seperated with different url prefix 
  # blocks seperated by / and a , seperating uri and params
  #
  $CommandLineInput{"parameterizedURL"}=0;
  if ( $cmdLine{"pageidparams"} )
  {

    $CommandLineInput{"parameterizedURL"}=1;
    $ctxRef->{PAGEPARAMS} = new ChronosPageParams($debug, \$error, $cmdLine{"pageidparams"}, 
                                   $CommandLineInput{"queryDelimiterRegexp"},
                                   $CommandLineInput{"qParamDelimiterRegexp"});
  }


  # put chronos log file in {EMDROOT}/sysman/log directory
  $CommandLineInput{"chronosLogName"} = $CommandLineInput{"emdRoot"}.
    $pathSeperator.$sysFileDir.$pathSeperator.$chronosLogDir.
                            $pathSeperator.$chronosLogName;

  # create merge of type and name to reduce concatanation
  $targetType_Name = 
    $CommandLineInput{"targetType"}.'_'.$CommandLineInput{"targetName"};

  # replace unwanted characters in targetType with '_' 
  # allowed characters include alphanumeric, '-', '_' and '.'
  # a slash ('/') for example in target name causes error in CME

  $targetType_Name =~ s/[^\w\-\_\.]/_/g;

  # trace info turned on if debug level is high 
  if ( exists $cmdLine{"trace"} )
  {
    $CommandLineInput{"traceInfo"} = $cmdLine{"trace"};
    $CommandLineInput{"errorFileName"} = $CommandLineInput{"chronosRoot"}.
        $pathSeperator.$targetType_Name.$errorFileSufix;
   
  } else {
    $CommandLineInput{"traceInfo"} = 0;
  } 
  #
  # additional page and file name extensions can be passed 
  # in beyond the commonly accepted ones which will be put 
  # in the hash by default
  #
  if ( $cmdLine{"pageext"} )
  {
    $pageExtensions = $cmdLine{"pageext"} ;
  }

  if ($pageExtensions)
  {
    my @pageNameExtensions=split /\//,$pageExtensions;
    for $pxIndex (0 .. $#pageNameExtensions)
    {
      $defaultPageNameExtensions{$pageNameExtensions[$pxIndex]} = 1;
    }
  }

  # page and file name extension hashes go into CommandLineInput hash
  $CommandLineInput{"pageNameExtensions"}=\%defaultPageNameExtensions;

  # private area where work files live during a run
  $CommandLineInput{"workingDirName"} = 
    $CommandLineInput{"chronosRoot"}.$pathSeperator.'tmp';
  # private area where files live between runs (e.g. file marker)
  $CommandLineInput{"privateDirName"} =
    $CommandLineInput{"chronosRoot"}.$pathSeperator.'persistent';

  # turn on debug info in chronos log at level provided in command line 
  # parameter, default level is 1
  if (exists $cmdLine{"debug"} )
  {
    # make sure the debugLevel value is valid
    if (($cmdLine{"debug"} >= $DEBUG_LOW) && ($cmdLine{"debug"} <= $DEBUG_HIGH)) 
    {
      $CommandLineInput{"debugLevel"}=$cmdLine{"debug"};
    } else {
      Usage($progname, 'Invalid debug value') ;
      return $CATASTROPHIC_FAILURE;
    }
  } else { 
    $CommandLineInput{"debugLevel"}=$DEBUG_MEDIUM;
  }

  # setup output file name else exit with usage
  if ( $cmdLine{"outfile"} )
  {
     $CommandLineInput{"outFileName"} = $cmdLine{"outfile"};
  } else {
     Usage($progname, 'Missing outfile') ;
     return $CATASTROPHIC_FAILURE;
  }

  # select file format (default is XML)
  if ( $cmdLine{"outfiletype"} )
  {
    unless(($cmdLine{"outfiletype"} =~ m/SQLLDR/i) ||
      ($cmdLine{"outfiletype"} =~ m/XML/i)) 
    {
      Usage($progname, 'Unknown outfiletype') ;
      return $CATASTROPHIC_FAILURE;
    }
    $CommandLineInput{"outFileType"} = $cmdLine{"outfiletype"};
  } else {
    $CommandLineInput{"outFileType"} = 'XML';
  }

  # Add extensions to the output files if ouput type is SQLLDR
  if ($CommandLineInput{"outFileType"} =~ m/SQLLDR/)
  {
    $CommandLineInput{"cookieOutFileName"} = 
      $CommandLineInput{"outFileName"}.".cookies.ctl";
    $CommandLineInput{"outFileName"} = 
      $CommandLineInput{"outFileName"}.".ctl";
  }


  # default latency and hostlist hash name
  $CommandLineInput{"latencyHashName"} = $targetType_Name.$latencyFileSufix;
  $CommandLineInput{"hostListHashName"} = $targetType_Name.$hostListFileSufix;


  # timezone
  if ( $cmdLine{"timezone"} )
  {
    $CommandLineInput{"timeZone"} = ' '.$cmdLine{"timezone"};
  } else {
    # Usage($progname, 'Missing timezone.') ;
    # return $CATASTROPHIC_FAILURE;
    $CommandLineInput{"timeZone"} = '';
  }

  # emdURL
  if ( $cmdLine{"emdURL"} )
  {
    $CommandLineInput{"emdURL"} = $cmdLine{"emdURL"};
  } else {
    Usage($progname, 'Missing emdURL.') ;
    return $CATASTROPHIC_FAILURE;
  }

  # name of the gif file that is requested in our stamp entry in log file. This
  # is not a "user supported" option and will be soon totally desupported.
  if ( $cmdLine{"giffile"} )
  {
    $CommandLineInput{"OEM_GIF"} = $cmdLine{"giffile"};
  } else {
    $CommandLineInput{"OEM_GIF"} = $gifFile;
  }
  # name of the SDK gif file that is requested in our stamp entry in log file
  if ( $cmdLine{"sdkgiffile"} )
  {
    $CommandLineInput{"SDK_OEM_GIF"} = $cmdLine{"sdkgiffile"};
  } else {
    $CommandLineInput{"SDK_OEM_GIF"} = $sdkGifFile;
  }

  # option to turn cookie file output on/off
  if (exists $cmdLine{"cookieoutput"} )
  {
    $CommandLineInput{"cookieOutput"} = $cmdLine{"cookieoutput"};
  } else {
    $CommandLineInput{"cookieOutput"} = 0;
  }
 
  # Advanced data filtering (turn on/off Chronos data quality algo)
  if(exists $cmdLine{'advdtfilter'})
  {
    $CommandLineInput{'advDtFilter'} = $cmdLine{'advdtfilter'};
  }
  else
  {
    $CommandLineInput{"advDtFilter"} = 1;
  }

  # Grand Unification support - incomplete page loads & request mapping
  if(exists $cmdLine{'gu'})
  {
    $CommandLineInput{'gu'} = $cmdLine{'gu'};
  }
  else
  {
    $CommandLineInput{"gu"} = 1;
  }

  # if OMS version is not 10.2, disable grand unification
  if($CommandLineInput{'omsversion'} < $OMS_102000)
  {
    $CommandLineInput{"gu"} = 0;
  } 

# Archive processed access log files
  if(exists $cmdLine{'archive'})
  {
    $CommandLineInput{'archive'} = $cmdLine{'archive'};
  }
  else
  {
    $CommandLineInput{"archive"} = 1;
  }

  # Advanced data filtering (turn on/off Chronos data quality algo)
  if(exists $cmdLine{'apachelogformat'})
  {
    $CommandLineInput{'apachelogformat'} = $cmdLine{'apachelogformat'};
  }
  else
  {
    $CommandLineInput{"apachelogformat"} = "";
  }

  # Max size (in KB) of rotated Log files (apmeum.log)
  if(exists $cmdLine{'maxlogfilesizekb'})
  {
    $CommandLineInput{'maxlogfilesizekb'} = $cmdLine{'maxlogfilesizekb'};
  }
  else
  {
    $CommandLineInput{"maxlogfilesizekb"} = 5120; # 5 KB by default
  }

  # Number of Log files (apmeum.log) to keep
  if(exists $cmdLine{'numlogfiles'})
  {
    $CommandLineInput{'numlogfiles'} = $cmdLine{'numlogfiles'};
  }
  else
  {
    $CommandLineInput{"numlogfiles"} = 10;
  }

  # Number of Trace files to keep
  if(exists $cmdLine{'numtracefiles'})
  {
    $CommandLineInput{'numtracefiles'} = $cmdLine{'numtracefiles'};
  }
  else
  {
    $CommandLineInput{"numtracefiles"} = 10;
  }

  # if advanced filtering is on
  if($CommandLineInput{'advDtFilter'})
  {
    # set the page hash file name
    $CommandLineInput{'pageHashFileName'} = 
      $CommandLineInput{'privateDirName'}.$pathSeperator . 
      $targetType_Name.$pageHashFileSufix;
    # set the page hash tmp file name
    $CommandLineInput{'tmpPageHashFileName'} = 
      $CommandLineInput{'workingDirName'}.$pathSeperator . 
      $targetType_Name.$pageHashFileSufix;

    # aging
    if(exists $cmdLine{'advdtfilterage'})
    {
      $CommandLineInput{'advDtFilterAge'} = $cmdLine{'advdtfilterage'};
    }
    else
    {
      $CommandLineInput{'advDtFilterAge'} = 10;
    }

    # cache entries 
    if(exists $cmdLine{'cacheentries'})
    {
      $CommandLineInput{'cacheEntries'} = $cmdLine{'cacheentries'};
    }
    else
    {
      $CommandLineInput{'cacheEntries'} = 0;
    }
  }

  # proc old files for first run or not
  if(exists $cmdLine{"procoldfiles"})
  {
    $CommandLineInput{"procOldFiles"} = $cmdLine{"procoldfiles"};
  } else {
    $CommandLineInput{"procOldFiles"} = 0;
  }

  # resolve ip addrs or not, set to true by default
  if (exists $cmdLine{"resolve"} )
  {
      $CommandLineInput{"resolve"} = $cmdLine{"resolve"};
  } else {
      $CommandLineInput{"resolve"} = 1;
  }

  # resolve ip addrs or not, set to true by default
  if (exists $cmdLine{"usegmtoffset"} )
  {
      $CommandLineInput{"usegmtoffset"} = $cmdLine{"usegmtoffset"};
  } else {
      $CommandLineInput{"usegmtoffset"} = 1;
  }

  # host lookup time out set to 1 second by default
  if ( $cmdLine{"timeout"} )
  {
      # make sure the timeout value is valid
      if($cmdLine{"timeout"} < 1) 
      {
        Usage($progname, 'Invalid timeout value') ;
        return $CATASTROPHIC_FAILURE;
      }
      $CommandLineInput{"timeout"} = $cmdLine{"timeout"};
  } else {
      $CommandLineInput{"timeout"} = 2;
  }

  # unresolved aging set to 1 month (31 * 24 * 60 = 44640 minutes) by default
  if ( $cmdLine{"unresolvedage"} )
  {
      # make sure the unresolvedage value is valid
      if($cmdLine{"unresolvedage"} < 1) 
      {
        Usage($progname, 'Invalid unresolvedage value') ;
        return $CATASTROPHIC_FAILURE;
      }
      $CommandLineInput{"unresolvedage"} = $cmdLine{"unresolvedage"};
  } else {
      $CommandLineInput{"unresolvedage"} = 44640;
  }

  # resolved aging set to 1 week (7 * 24 * 60 = 10080 minutes) by default
  if ( $cmdLine{"resolvedage"} )
  {
      # make sure the resolvedage value is valid
      if($cmdLine{"resolvedage"} < 1) 
      {
        Usage($progname, 'Invalid resolvedage value') ;
        return $CATASTROPHIC_FAILURE;
      }
      $CommandLineInput{"resolvedage"} = $cmdLine{"resolvedage"};
  } else {
      $CommandLineInput{"resolvedage"} = 10080;
  }

  # undefined max time set to 2 hours (2 * 60 = 120 minutes) by default
  if ( $cmdLine{"maxundeftime"} )
  {
      # make sure the maxundeftime value is valid
      if($cmdLine{"maxundeftime"} < 1) {
        Usage($progname, 'Invalid maxundeftime value') ;
        return $CATASTROPHIC_FAILURE;
      }
      $CommandLineInput{"maxundeftime"} = $cmdLine{"maxundeftime"};
  } else {
      $CommandLineInput{"maxundeftime"} = 120;
  }

  # filter file name
  $CommandLineInput{"filterfile"} =
    $CommandLineInput{"privateDirName"}.$pathSeperator.
    $targetType_Name.$filterFileSufix;

  # name of session tracking cookies
  if ( $cmdLine{"apchsess"} )
  {
      $CommandLineInput{"ApacheSessionId"} = $cmdLine{"apchsess"};
  } else {
      $CommandLineInput{"ApacheSessionId"}="Apache";
  }
  if ( $cmdLine{"jservsess"} )
  {
      $CommandLineInput{"JservSessionId"} = $cmdLine{"jservsess"};
  } else {
      $CommandLineInput{"JservSessionId"}="JServSessionIdoraclejsp";
  }

  # additional page and file name extensions can be passed in beyond the 
  # commonly accepted ones which will be put in the hash by default

  if ( $cmdLine{"pageext"} )
  {
       $pageExtensions = $cmdLine{"pageext"} ;
  }

  if ( $cmdLine{"fileext"} )
  {
      my $fileExtensions = $cmdLine{"fileext"}; 
  }

  if( $cmdLine{"priority"} )
  {
      $CommandLineInput{"priority"} = $cmdLine{"priority"};     
  } 
  else {
      $CommandLineInput{"priority"} = 50;
  }
  
  if( $cmdLine{"nosetpriority"} )
  {
      $CommandLineInput{"nosetpriority"} = $cmdLine{"nosetpriority"};
  }

  $ctxRef->{OSNAME} = $Config{'osname'};
  my $dbmModule = $cmdLine{"dbmmodule"};
  my $dbmInfoFound = 0;
  my $dbmInfoRef;
  if($dbmModule)
  {
      eval {
         require $dbmModule;
      };
      if ($@)
      {
          $ctxRef->{"FILEVALUES"}->{"errorVal"} = "$@";
          return $CATASTROPHIC_FAILURE;
      }
      # find the dbmTypes entry for this module
      
      foreach my $dbmInfo (@dbmTypes)
      {
          if(($dbmInfo->{module} eq $dbmModule) || ($dbmInfo->{implname} eq "*"))
          {
              $dbmInfoRef = $dbmInfo;
              $dbmInfoFound = 1;
              last;
          }
      }
      if(!$dbmInfoFound)
      {
          $ctxRef->{"FILEVALUES"}->{"errorVal"} = "Cannot handle DBM Module $dbmModule. Check ChronosDbmTypes.pm\n";
          return $CATASTROPHIC_FAILURE;         
      }
  } 
  else 
  {
      my $dbmType = $cmdLine{"dbmtype"};
      if($dbmType) 
      {
          $CommandLineInput{"dbmtype"} = $dbmType;
          #find the dbmTypes entry for this dbmtype (implname)     
          foreach my $dbmInfo (@dbmTypes)
          {
              if($dbmInfo->{implname} eq $dbmType)
              {   
                  $dbmInfoRef = $dbmInfo;
                  $dbmInfoFound = 1;
                  last;
              } 
          }
          if(!$dbmInfoFound)
          {
              $ctxRef->{"FILEVALUES"}->{"errorVal"} = "Cannot handle DBM implementation $dbmType. Check ChronosDbmTypes.pm\n";
              return $CATASTROPHIC_FAILURE;
              
          } 
      }
      else
      {
          foreach my $dbmInfo (@dbmTypes)
          {
              if(!($dbmInfo->{implname} eq '*'))
              {
                 eval {
                   require $dbmInfo->{module}
                 };
                 if(!$@) {
                    $dbmModule = $dbmInfo->{module};
                    $dbmInfoFound = 1;
                    $dbmInfoRef = $dbmInfo;
                    last;
                 }
               }
           }
          if(!$dbmInfoFound)
          {
              $ctxRef->{"FILEVALUES"}->{"errorVal"} = "Could not find a DBM implementation. Check ChronosDbmTypes.pm";
              return $CATASTROPHIC_FAILURE;           
          } 
          
      }
  }
  $CommandLineInput{dbmInfo}->{"module"} = $dbmModule;

  $CommandLineInput{dbmInfo}->{"implname"} = $dbmInfoRef->{"implname"};
  $CommandLineInput{dbmInfo}->{"exts"} = $dbmInfoRef->{exts} if($dbmInfoRef->{exts});

  if(exists $cmdLine{dbmmaxdatasize})
  {
      $CommandLineInput{dbmInfo}->{"maxdatasize"} =  $cmdLine{dbmmaxdatasize};
  } else
  {
      my $osname = $ctxRef->{OSNAME};
      my $maxdatasize = 0;
      my $ostokenFound = 0;
 
      foreach my $ostoken (keys (%{$dbmInfoRef->{maxdatasize}}))
      {
        if((index($osname, $ostoken) >= 0) || ($ostoken eq '*'))
        {
           $maxdatasize = $dbmInfoRef->{maxdatasize}{$ostoken};
           last;
        }
      }
      $CommandLineInput{dbmInfo}->{"maxdatasize"} = $maxdatasize;
  }
  if(exists $cmdLine{dbmexts})
  {
      @{$CommandLineInput{dbmInfo}->{"exts"}} = split(/,/, $cmdLine{dbmexts});
  }

  my $pgextIndex;

  if ($pageExtensions) 
  {
    my @pageNameExtensions=split /\//,$pageExtensions;
    for $pgextIndex (0 .. $#pageNameExtensions)
    {
      $defaultPageNameExtensions{$pageNameExtensions[$pgextIndex]} = 1;
    }
  }

  # page and file name extension hashes go into CommandLineInput hash
  $CommandLineInput{"pageExtensions"}=\%defaultPageNameExtensions;

  if ( exists $cmdLine{"maxloglines"} )
  {
      if ($cmdLine{"maxloglines"} > 0)
      {
          $CommandLineInput{"maxloglines"} = $cmdLine{"maxloglines"};
      }
  } 
  else
  {
      $CommandLineInput{"maxloglines"} = 100000;
  }

  my $warnLogLines = ((exists $cmdLine{"warnloglines"})&& ($cmdLine{"warnloglines"} > 0)) ?  $cmdLine{"warnloglines"} : 500;
  $CommandLineInput{"warnloglines"} = (($warnLogLines < $CommandLineInput{"maxloglines"}) ||
				       ($CommandLineInput{"maxloglines"} <= 0)) ? $warnLogLines : $CommandLineInput{"maxloglines"};

                                  

  # create working and private directories
  ($status,$error) = CreateDirectories(\%CommandLineInput);
  if ($status==$STATUS_ERROR)
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
    return $CATASTROPHIC_FAILURE;
  }
  $CommandLineInput{"runMetricOutputFile"}=$CommandLineInput{"workingDirName"}.
            $pathSeperator.$targetType_Name.'_run.out';
  # set the outputFileName and cookiesOutputFileName as needed
  if ($CommandLineInput{"outFileType"} =~ /XML/)
  {
    $CommandLineInput{"tmpOutputFileName"} = $CommandLineInput{"workingDirName"}
       . $pathSeperator.$targetType_Name.'xml';
    $CommandLineInput{"outputFileName"} = $CommandLineInput{"outFileName"};
  }
  else  # /SQLLDR/
  {
    # set the output file name (includes full path)
    $CommandLineInput{"outputFileName"} = $CommandLineInput{"workingDirName"}.
       $pathSeperator.$targetType_Name;
    $CommandLineInput{"cookieOutputFileName"} = 
      $CommandLineInput{"outputFileName"}. '.cookies.ctl';
    $CommandLineInput{"outputFileName"} .= '.ctl';
  }

  # initialize file hash with field hash and field header line begining
  my @postParams = ();
  $ctxRef->{"FILEVALUES"}->{"REGEXP"}->{"postParams"} = \@postParams;
  $ctxRef->{"FILEVALUES"}->{"REGEXP"}->{"fieldHash"}=\%FieldHash;
  $ctxRef->{"FILEVALUES"}->{"REGEXP"}->{"fieldLabel"}='#Fields:';
  $ctxRef->{"FILEVALUES"}->{"REGEXP"}->{"gmtLabel"}='#GMT-Offset: (.)([0-9][0-9])([0-9][0-9])';
  $ctxRef->{"FILEVALUES"}->{"REGEXP"}->{"gmtLabelOffset"}='\+0000';
  #
  # Initialize run metric values
  #
  $ctxRef->{FILEVALUES}->{logEntryCount}=0;
  $ctxRef->{FILEVALUES}->{totalLogEntries}=0;
  $ctxRef->{FILEVALUES}->{invalidLogLines} = 0;
  $ctxRef->{FILEVALUES}->{numKnownBeaconRequests} = 0;
  $ctxRef->{FILEVALUES}->{noOutputLines}=0;
  $ctxRef->{FILEVALUES}->{hashEntries}=0;
  $ctxRef->{FILEVALUES}->{hashedPages}=0;
  $ctxRef->{FILEVALUES}->{totalGifRequests} = 0;
  $ctxRef->{FILEVALUES}->{totalGifsNoOutput} = 0;
  $ctxRef->{FILEVALUES}->{totalGifsNoPageFound} = 0;

  $ctxRef->{FILEVALUES}->{"startLogTime"}='1980-01-01 12:00:00';
  $ctxRef->{FILEVALUES}->{"endLogTime"}='1980-01-01 12:00:00';

  # initialize markerSignature
  $ctxRef->{"MARKERVALUES"}->{"newMarkerSignature"}='1980-01-01 12:00:00';
  $ctxRef->{"MARKERVALUES"}->{"markerSignature"}='1980-01-01 12:00:00';

  # make value of timestyle flag for GMT as opposed to local
  # false by default
  $ctxRef->{'FILEVALUES'}->{'gmtFlag'}=0;
  $ctxRef->{"FILEVALUES"}->{"REGEXP"}->{"headerLabel"}='#';

  $ctxRef->{'localGmtOffsetSecs'} = GetGMTLocalOffset();

  $ctxRef->{CLEANEDUP} = 0;

  # lock file that will prevent subsequent runs from clobbering output.
  # Full path is given.
  $CommandLineInput{'lockFileName'}=
    $CommandLineInput{'privateDirName'}.$pathSeperator.
    $targetType_Name.$lockFileSufix;

  # put marker file in private directory
  $CommandLineInput{"markerFileName"} = $CommandLineInput{"privateDirName"}.
    $pathSeperator.$targetType_Name.$markerFileSufix;


  # initialize constants that can be accepted as command line parameters
  $CommandLineInput{"noResolveIP"}="0.0.0.0";
  $CommandLineInput{"submitTimeID"}="ORACLE_SMP_CHRONOS_ST";
  $CommandLineInput{"loadTimeID"}="ORACLE_SMP_CHRONOS_LT";

  #  make all command line input values hash part of main context hash
  $ctxRef->{"COMMANDLINEINPUT"}=\%CommandLineInput;

  # open debug file for appending debug info
  if (!$debug->Initialize(\$error,$CommandLineInput{"errorFileName"},
        $CommandLineInput{"chronosLogName"},$CommandLineInput{"debugLevel"},
        $DEBUG_FATAL,$CommandLineInput{"traceInfo"},
        $CommandLineInput{"targetName"}.":".$CommandLineInput{"targetType"},
        $CommandLineInput{"startTimeStamp"},
	$CommandLineInput{"maxlogfilesizekb"}, $CommandLineInput{"numlogfiles"},
	$CommandLineInput{"numtracefiles"}))
  {
     $ctxRef->{"FILEVALUES"}->{"errorVal"}=
                       "Fatal error while initializing Logging/Tracing";
     return $CATASTROPHIC_FAILURE;
  }

  # Print start timestamp to log file
  $debug->PrintLog($DEBUG_LOW,"CME Run Starting ".
            $ctxRef->{"COMMANDLINEINPUT"}->{"startTimeStamp"});
  $debug->PrintLog($DEBUG_HIGH,"CME in stage CommandLineInput");
  $debug->PrintTrace("Arguments were: @arguments");

  unless( $CommandLineInput{"nosetpriority"})
  {
      eval {
          my $inputPriority = $CommandLineInput{"priority"};
          local $SIG{'__DIE__'} = sub { $debug->PrintLog($DEBUG_HIGH,"Error: CME priority cannot be set on this platform"); die; };
          $debug->PrintLog($DEBUG_HIGH,"Attempting to set CME priority to $inputPriority");
          my $setPrioOutput = setpriority(0, $$, $inputPriority);
          $debug->PrintLog($DEBUG_HIGH, "Error: Could not set CME priority to $inputPriority") unless ($setPrioOutput);
          my $myNewPriority = getpriority(0, $$);
          $debug->PrintLog($DEBUG_HIGH,"CME priority set to $myNewPriority");
      };
  }

  $debug->PrintTrace("Using dbm implementation: dbmtype: $CommandLineInput{dbmInfo}->{implname}, dbmmodule: $CommandLineInput{dbmInfo}->{module}, maxdatasize: $CommandLineInput{dbmInfo}->{maxdatasize}, ". scalar(@{$CommandLineInput{dbmInfo}->{exts}}) . " exts: [@{$CommandLineInput{dbmInfo}->{exts}}]");
  
  #
  # the default regexp, one which is used when there is none stored in marker file
  #
  if($CommandLineInput{'apachelogformat'} eq "combined")
  {
      # "combined" is assumed to be the prepackaged format string:
      # "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{'regExp'}='^(\S+) (\S+) (\S+) \[(.+)\] \"(\S+) (\S+) (\S+)\" (\d+) (\d+) (\".+?\"|-) (\".+?\"|-)$';
      
      # default fields to match default regexp
      @static_fields = ("c-ip", "c-dns","c-auth-id","timeDate","cs-method","cs-uri","cs-protocol", "sc-status","bytes", "cs(Referer)", "cs(User-Agent)");
      # put it in hash
      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{'fields'} = \@static_fields;
      # default fields, filled in by values from marker file or left as is from initialization above
      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{"numFormatFields"} = 11; 
      $debug->PrintLog($DEBUG_MEDIUM, "Expecting Apache 'combined' log format: \"%h %l %u %t \\\"%r\\\" %>s %b \\\"%{Referer}i\\\" \\\"%{User-Agent}i\\\"\""); 
      # turn off filtering (page hash, etc)
      $CommandLineInput{"advDtFilter"} = 0;
      $debug->PrintLog($DEBUG_HIGH, "Apache log format doesn't work with filtering. Turning filtering off.\n");
  }
  elsif($CommandLineInput{'apachelogformat'} eq "chronos")
    {
      # "chronos" is the format that apache returns
      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{'regExp'}='^(\S+) (\S+) (\S+) (\S+) (\".+?\"|\-) (\S+) (\S+) (\S+) (\d+) (\d+) (\d+) (\".+?\"|\"-\") (\".+?\"|\"-\") (\".+?\"|-) (\".+?\"|-) (\".+?\"|-)$';
      
      # default fields to match default regexp
      @static_fields = ("x-date-start", "x-time-start", "c-ip", "s-ip", "cs(Host)", "cs-method","cs-uri","cs-protocol", "sc-status","bytes","us-time-taken", "x-cookie(ORACLE_SMP_CHRONOS_ST)","x-cookie(ORACLE_SMP_CHRONOS_LT)","x-cookie(ORACLE_SMP_CHRONOS_GL)", "cs(Referer)", "cs(User-Agent)");
      # default fields, filled in by values from marker file or left as is from initialization above
      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{'fields'} = \@static_fields;
      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{"numFormatFields"} = 17;
      $debug->PrintLog($DEBUG_MEDIUM, "Expecting Apache 'chronos' log format");
  
    }
  else 
  {

      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{'regExp'}='^(\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\S+) (\d+) (\d+) (\".+?\"|-) (\".+?\"|-) (\S+) \"(.+)\"$';
      
      # default fields to match default regexp
      @static_fields = ("c-ip", "c-dns","c-auth-id","date","time","cs-method","cs-uri","sc-status","bytes",'cs(Cookie)','cs(Referer)',"time-taken",'cs(User-Agent)');
      # put it in hash
      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{'fields'} = \@static_fields;
      
      # default fields, filled in by values from marker file or left as is from initialization above
      $ctxRef->{"FILEVALUES"}->{'REGEXP'}->{"numFormatFields"} = 13;
  }

  # lock variable
  $ctxRef->{"FILEVALUES"}->{"lkUnavailable"} = 0;

  # open output lock file
  if (!open (OUTPUTLOCKFILE, ">".$CommandLineInput{"lockFileName"}))
  { 
       $ctxRef->{"FILEVALUES"}->{"errorVal"} = 
         "Error: Failed To Open Output Lock File".
         $CommandLineInput{"lockFileName"}." $!";
       $ctxRef->{"FILEVALUES"}->{"lkUnavailable"} = 1;
       return $CATASTROPHIC_FAILURE;
  }

  # lock the output lock file
  if (!flock (OUTPUTLOCKFILE,LOCK_EX|LOCK_NB)) 
  {  
    close(OUTPUTLOCKFILE);
    $ctxRef->{"FILEVALUES"}->{"errorVal"} = 
                "Warning: Cannot Lock Output Lock File: ".
                $CommandLineInput{"lockFileName"}.". $!. ".
                "A previous run of chronos_mining.pl may still be running.";
    $ctxRef->{"FILEVALUES"}->{"lkUnavailable"} = 1;
    return $FAILURE_WITH_MESSAGE;
  }
  $ctxRef->{"FILEVALUES"}->{"lockFileHandle"}=\*OUTPUTLOCKFILE;
  return $SUCCESS;
}

###################################################################### 
#   CreateDirectories : Create chronos directories (if needed)
#   IN:
#       input parameters hash reference
#
#   OUT: 
#
###################################################################### 
sub CreateDirectories
{
  my ($inputParametersHashRef)=@_;
  my $error="";
  my $status=$STATUS_SUCCESS;
  my $targetName=$inputParametersHashRef->{"targetName"};

  # check to see if chronos root exists else create it
  unless (-e "$inputParametersHashRef->{'chronosRoot'}")
  {
    if (!mkdir ("$inputParametersHashRef->{'chronosRoot'}",0777) )
    {
       $error = "Failed to create chronos directory ".
              "$inputParametersHashRef->{'chronosRoot'} $!";
       $status=$STATUS_ERROR;
       return ($status,$error);
    } 
  }

  # check to see if working directory exists else create it
  unless (-e "$inputParametersHashRef->{'workingDirName'}")
  {
    if (!mkdir ("$inputParametersHashRef->{'workingDirName'}",0777) )
    {
       $error = "Failed to create target working  directory ".
              "$inputParametersHashRef->{'workingDirName'} $!";
       $status=$STATUS_ERROR;
       return ($status,$error);
    } 
  }

  # create private directory if doesnt exist
  unless (-e "$inputParametersHashRef->{'privateDirName'}")
  {
    if (!mkdir ("$inputParametersHashRef->{'privateDirName'}",0777) )
    {
       $error = "Failed to create target private  directory ".
              "$inputParametersHashRef->{'privateDirName'} $!";
       $status= $STATUS_ERROR;
       return ($status,$error);
    } 
  }
  return ($status,$error);
}

################################################################### 
#   PostProcess : All rotated log files processed in file list are
#                 archived under $inDir/archive directory so
#                 as not to pick them to process every time, 
#                 unless -noarchive option is specified.
#                 Returns a Warning on failure
#   Context in:
#                    @DiskInput.logFileList
#                    $DiskInput.noLogFiles
#                    $CommandLineInput.inDirName
#
#   Context out: -
#
################################################################### 

sub PostProcess
{
  my ($ctxRef) = @_;
  my $fileName="";
  my $inputFile="";
  my $inputParametersHashRef = $ctxRef->{"COMMANDLINEINPUT"};
  my $fileHashRef = $ctxRef->{"FILEVALUES"};
  my @logFileList = @{$ctxRef->{"DISKINPUT"}->{"completedLogs"}};

  $debug->PrintLog($DEBUG_HIGH,"CME in stage PostProcess");

  if($inputParametersHashRef->{'archive'})
  {     
      # create archive directory under input dir if doesnt exist
      unless (-e $inputParametersHashRef->{"inDirName"}.$pathSeperator."archive")
      {
	if (!mkdir($inputParametersHashRef->{"inDirName"}.$pathSeperator."archive",
	  0777) )
	{
	   $ctxRef->{"FILEVALUES"}->{"errorVal"}=
	     "Failed to create archive directory ".
	     $inputParametersHashRef->{"inDirName"}.$pathSeperator."archive $!";
	   return $WARNING;
	}
      }
      # move all rotated log files in processed list to archive directory
      foreach $inputFile (@logFileList) 
      {
	my ($inputDirName, $inputFileName) = $inputFile =~ m/^(.+)\/(.+)$/o;
	if (($inputDirName eq $inputParametersHashRef->{'inDirName'}) &&
	    ($inputFileName ne $inputParametersHashRef->{"inFileName"}))
	{
	   if(move("$inputFile", 
	     $inputParametersHashRef->{"inDirName"}.$pathSeperator.
	     "archive".$pathSeperator.$fileName))
	   {
	       $debug->PrintLog($DEBUG_LOW, "Archived log file $inputFile");
	   } else
	   {
	      $ctxRef->{"FILEVALUES"}->{"errorVal"}=
		"Failed to archive log file $fileName $!";
	      return $WARNING;
	   }
	}
      }
  }

  return $SUCCESS;
}

###################################################################### 
# MiniDebugReport - Prints trace information to errorfile if trace
#                   flag turned on 
#
# IN:
#       $logEntryCount
#       $numberOfSkippedLines
# OUT:
#
# RETURNS:
###################################################################### 

sub PrintDebugReport
{
  my($fileHashRef,$debugHashRef,$inputParametersHashRef)=@_;
  # should be sent back to the agent if that is who launched it but for now 
  # put in error file
  $debug->PrintTrace("\nThe Output Was Successfully Generated");
  $debug->PrintLog($DEBUG_MEDIUM,"Total Number of Log Lines = ".
                   $debugHashRef->{"logEntryCount"});
  $debug->PrintLog($DEBUG_MEDIUM,"Number Of Lines Written to Output File = ".
                   $debugHashRef->{"noOutputLines"});
  $debug->PrintLog($DEBUG_MEDIUM,
    "Number of lines written to cookie output file = ".
                   $debugHashRef->{"noCookieOutputLines"});
  return $STATUS_SUCCESS;
}

###################################################################### 
#  DoCleanup : Deletes latency hash
#
#  Context in:     
#              $CommandLineInput.workingDirName
#  Context out: -
###################################################################### 

sub DoCleanup 
{
  my ($ctxRef) = @_;
  my $inputParametersHashRef = $ctxRef->{"COMMANDLINEINPUT"};
  my $fileHashRef = $ctxRef->{"FILEVALUES"};
  my $markerHashRef = $ctxRef->{MARKERVALUES};
  my $pageHash = $ctxRef->{PAGEHASH};
  my $debugMsg="";
  my $returnVal = $SUCCESS;
  my $system=0;
  my $user=0;
  my $cuser=0;
  my $csystem=0;
  my $tot_cpu_time=0;
  my $output_incomplete_pages_count = 0;
  my $output_page_requests_count = 0;
  my $total_unaccounted_gifs = $fileHashRef->{totalGifRequests} - 
                                 ($fileHashRef->{totalGifsNoOutput} + 
                                  $fileHashRef->{totalGifsNoPageFound} +
                                  $fileHashRef->{noOutputLines});

  $debug->PrintLog($DEBUG_HIGH,"CME in stage DoCleanup");

  if ($pageHash)
  {
    $output_incomplete_pages_count = $pageHash->get_output_incomplete_pages();
    $output_page_requests_count = $pageHash->get_output_page_requests();
  }

  if($ctxRef->{"CLEANEDUP"})
  {
      $debug->PrintLog($DEBUG_HIGH,"Already cleaned up");
      return $SUCCESS;
  }
  $ctxRef->{"CLEANEDUP"} = 1;
  # only if lock on ouput lk file was obtained in initialize environment
  if (!$ctxRef->{"FILEVALUES"}->{"lkUnavailable"})
  {
    # only remove the DBM file if it exists (might not if no input files found...)
    if(ChronosHash::hashExists("$inputParametersHashRef->{'workingDirName'}".$pathSeperator.
		  "$inputParametersHashRef->{'latencyHashName'}",
		  $inputParametersHashRef->{'dbmInfo'}))
    {
      # delete file target_type_target_name.dir
      unless(ChronosHash::deleteHash("$inputParametersHashRef->{'workingDirName'}".$pathSeperator.
			"$inputParametersHashRef->{'latencyHashName'}",
			$inputParametersHashRef->{'dbmInfo'}))
      {
        $ctxRef->{"FILEVALUES"}->{"errorVal"} = 
          "Error Unable to Delete DBM File ".
          "$inputParametersHashRef->{'workingDirName'}".
          "/$inputParametersHashRef->{'latencyHashName'}".
          ": $!\n";
        $returnVal = $FAILURE;
      }
    }
  
    # unlock and close lock file
    if (exists $fileHashRef->{"lockFileHandle"}) 
    {
      my $lockFileHandle=$fileHashRef->{"lockFileHandle"};
      if (!flock($lockFileHandle,LOCK_UN))
      {
        $ctxRef->{"FILEVALUES"}->{"errorVal"}=
          "Cannot Unlock Output Lock File: $!\n";
        $returnVal = $FAILURE;
      }
      close($lockFileHandle);
    }
  }

  # print end time to debug log file
  my $endTimeStamp = GetCurrentTime();
  my $runTime=time-$inputParametersHashRef->{"startTime"};
  $fileHashRef->{"endTime"} = $inputParametersHashRef->{"startTime"}+$runTime;

  # print debug information
  $debugMsg="CME Total run time ".$runTime." seconds";
  $debug->PrintLog($DEBUG_MEDIUM,$debugMsg);

  # debug messages
  $debugMsg="CME Run Ending ".$endTimeStamp;
  $debug->PrintLog($DEBUG_LOW,$debugMsg);
  #
  # output run metric numbers to file later read by run metric collection script
  # here cpu time is user+system cpu time
  #

  # cpu timings
  ($user,$system,$cuser,$csystem)=times; 
  $tot_cpu_time=$user+$system+$cuser+$csystem;

  # run output line printout
  if (!open(RUNOUTFILE,'>'.$inputParametersHashRef->{"runMetricOutputFile"}))
  {
      $ctxRef->{"FILEVALUES"}->{"errorVal"} =
        "Error failed To Open Run Metric Output File $inputParametersHashRef->{'runMetricOutputFile'} $!";
      $fileHashRef->{"outFileObject"}->end_force() if $fileHashRef->{"outFileObject"};
      return $CATASTROPHIC_FAILURE;
  }

  my $cpu_util_pct = ($runTime == 0) ? 0 : (($tot_cpu_time/$runTime) * 100);
  # write to run outpput file
  print RUNOUTFILE $fileHashRef->{totalLogEntries}.'|'.
                   $fileHashRef->{invalidLogLines}.'|'.
                   $fileHashRef->{numKnownBeaconRequests}.'|'.
                   $fileHashRef->{noOutputLines}.'|'.
                   $output_incomplete_pages_count.'|'.
                   $output_page_requests_count.'|'.
                   $fileHashRef->{totalGifRequests}.'|'.
                   $fileHashRef->{totalGifsNoOutput}.'|'.
                   $fileHashRef->{totalGifsNoPageFound}.'|'.
                   $total_unaccounted_gifs.'|'.
                   $inputParametersHashRef->{"startTimeStamp"}.'|'.
                   $runTime.'|'.
                   $fileHashRef->{"startLogTime"}.'|'.
                   $fileHashRef->{"endLogTime"}.'|'.
                   $tot_cpu_time.'|';
  printf RUNOUTFILE ("%.2f", $cpu_util_pct);

  # close run output file
  close (RUNOUTFILE);

  if(!$markerHashRef->{"modified"} && 
     ($fileHashRef->{logEntryCount} >= $inputParametersHashRef->{warnloglines}))
  {
      $ctxRef->{"FILEVALUES"}->{"errorVal"} = "Warning: No valid log entries were found in the $fileHashRef->{logEntryCount} lines processed. Does the access log have a valid format?";
      # To ensure that the process exits with status 1
      return $FAILURE_WITH_MESSAGE;
  }
  return $returnVal;
}

##################################################################### 
#  InitializeEnvironment : Obtains a lock on lock file and
#                          holds on to it till the end of the run
#                          Puts opened lock file handle into
#                          main context that gets closed in DoCleanup 
#
#  Context in:
#                    $CommandLineInput.lockFileName
#  Context out:      
#                    $FileValues.lockFileHandle
##################################################################### 

sub InitializeEnvironment
{
  my ($ctxRef) = @_;
  my $error="";
  my $inputParametersHashRef = $ctxRef->{"COMMANDLINEINPUT"};
  my $fileHashRef=$ctxRef->{"FILEVALUES"};

  $debug->PrintLog($DEBUG_HIGH,"CME in stage InitializeEnvironment");

  # open the filter file. If file is not found or fails opening, don't worry!
  $ctxRef->{"FILTER"} = 
      new ChronosFilter \$error,$inputParametersHashRef->{"filterfile"};

  # Create output object
  unless($fileHashRef->{"outFileObject"} = 
    ChronosTableOut->new(\$error, 
      $inputParametersHashRef->{"emdURL"},
      $inputParametersHashRef->{"outFileType"},
      $inputParametersHashRef->{"gu"},
      $inputParametersHashRef->{"cookieOutput"},
      $inputParametersHashRef->{"outputFileName"}, 
      $inputParametersHashRef->{"tmpOutputFileName"},
      $inputParametersHashRef->{"cookieOutputFileName"},
      $inputParametersHashRef->{"omsversion"}))
  {
    $error = "Error failed on creation of output object.\n$error";
    $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
    #dbmclose(%hostList);
    #dbmclose (%Latency);
    return $CATASTROPHIC_FAILURE;
  }

  # check if advanced filtering is on
  if($inputParametersHashRef->{'advDtFilter'})
  {
      if($inputParametersHashRef->{'gu'})
      {
	  $ctxRef->{'REQUESTHASH'} = ChronosRequestsHash->new($fileHashRef->{"outFileObject"});
	  $ctxRef->{'INCOMPLETELOADHASH'} = ChronosIncompleteLoadHash->new($fileHashRef->{"outFileObject"});
      }
      
      $ctxRef->{'PAGEHASH'} = ChronosPageHash->new
          ($ctxRef->{'REQUESTHASH'}, $ctxRef->{'INCOMPLETELOADHASH'}, 
           $ctxRef->{'PAGEPARAMS'},
           $inputParametersHashRef->{'queryDelimiterRegexp'}, 
           $inputParametersHashRef->{'parameterizedURL'},
	   $debug, $inputParametersHashRef->{'tmpPageHashFileName'},
           $inputParametersHashRef->{'pageHashFileName'},                               
           $inputParametersHashRef->{'dbmInfo'}, \$ctxRef->{FILEVALUES}->{'errorVal'},
	   );
      unless(defined($ctxRef->{'PAGEHASH'}))
      {
	  $ctxRef->{FILEVALUES}->{'errorVal'} .= "\nFailed opening tmp page hash. $!\n";
	  return $CATASTROPHIC_FAILURE;
      }      
  }
  
  return $SUCCESS;
}

######################################################################### 
#  DoRecovery : Moves output by previous run from working
#               to destination directory if marker file flag found dirty,
#               then sets dirty flag off. Deletes latency hash
#
#  Context in:
#                    $CommandLineInput.privateDirName
#                    $CommandLineInput.workingDirName
#  Context out:      -
######################################################################### 

sub DoRecovery 
{
  my ($ctxRef) = @_;
  my $markerOffset;
  my $markerFileName;
  my $markerPrevRunOutFileName;
  my $markerStamp;
  my $status=$STATUS_SUCCESS;
  my $error="";
  my $lastRegExp='';
  my $lastFormatVals='';
  my $inputParametersHashRef = $ctxRef->{"COMMANDLINEINPUT"};
  my $fileHashRef = $ctxRef->{"FILEVALUES"};
  my $markerExisted = 0;
  my $gmtFlag=0;
  my $gmtOffsetSecs=0;

  $debug->PrintLog($DEBUG_HIGH,"CME in stage DoRecovery");

  if (-e $inputParametersHashRef->{"markerFileName"})
  { 
    my $dirtyFlag;
    # read marker file to check status of dirty flag
    ($dirtyFlag, $markerFileName, $markerOffset, $markerPrevRunOutFileName, 
     $markerStamp, $lastRegExp,$lastFormatVals,$markerExisted, $gmtFlag,$gmtOffsetSecs,$status, $error) 
     = ReadMarkerFile($inputParametersHashRef->{"markerFileName"});

    
    if ($status==$STATUS_ERROR)
    {
      $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
      return $CATASTROPHIC_FAILURE;
    }

    if ($lastRegExp ne '') 
    { 
      $fileHashRef->{'REGEXP'}->{'regExp'} = $lastRegExp;
    }

    if ($gmtFlag ne '') 
    { 
      $fileHashRef->{'gmtFlag'} = $gmtFlag;
    }

    $fileHashRef->{'gmtOffsetSecs'} = $gmtOffsetSecs;

    # marker file is dirty
    if ($dirtyFlag == $FLAG_DIRTY)
    {
      # Do recovery
      ($status,$error) = 
        MoveOutputFilesToDestination($ctxRef->{"COMMANDLINEINPUT"}, 
                  $markerPrevRunOutFileName, 
                  $ctxRef->{"COMMANDLINEINPUT"}->{"cookieOutFileName"});
  
      # if recovery fails
      if ($status==$STATUS_ERROR)
      {
        $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
        return $CATASTROPHIC_FAILURE;
      }
      # write a clean marker file back
      ($status,$error)=WriteMarkerInfo($inputParametersHashRef,
       $fileHashRef,$FLAG_CLEAN,$markerFileName,
       $markerOffset, $fileHashRef->{'REGEXP'}->{'regExp'}, '-', $markerStamp, $lastFormatVals);
      if ($status==$STATUS_ERROR)
      {
        $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
        return $CATASTROPHIC_FAILURE;
      }
    }
  }

  if(ChronosHash::hashExists("$inputParametersHashRef->{'workingDirName'}".$pathSeperator.
		"$inputParametersHashRef->{'latencyHashName'}",
		$inputParametersHashRef->{'dbmInfo'}))
  {
    # delete file target_type_target_name.dir
    unless(ChronosHash::deleteHash("$inputParametersHashRef->{'workingDirName'}".$pathSeperator.
		      "$inputParametersHashRef->{'latencyHashName'}",
		      $inputParametersHashRef->{'dbmInfo'}))
    {
      $ctxRef->{"FILEVALUES"}->{"errorVal"} = 
        "Error Unable to Delete DBM File ".
        "$inputParametersHashRef->{'workingDirName'}".
        "/$inputParametersHashRef->{'latencyHashName'}".
        ": $!\n";
      return $FAILURE;
    }
  }

  return $SUCCESS;
}
###################################################################
#   WriteMarkerInfo : write passed information to marker file
#
#   IN: input parameters reference and marker info string
#   OUT: status message on success or failure
#
###################################################################

sub WriteMarkerInfo
{
  my ($inputParametersHashRef,$fileHashRef,$dirtyFlag,
      $markerFileStampName, $markerOffset,$lastRegExp, 
      $prevRunOutFileName, $markerSignature, $logFields)=@_;
  my $markerFile=$inputParametersHashRef->{"markerFileName"};
  my $status=$STATUS_SUCCESS;
  my $error="";
  my $fields = "";
  my $i=0;
  my $index=0;

  if($logFields)
  {
      $fields  = $logFields;
  }
  else
  {
      for ($i=0;$i<$fileHashRef->{'REGEXP'}->{'numFormatFields'};$i++) 
      {
          if ($i>0) 
          {
              $fields=$fields.':';  
          }
          #$index = 'f'.$i;
          $fields=$fields.$fileHashRef->{'REGEXP'}->{'fields'}[$i];
      }
  }
  my $gmtField = $fileHashRef->{'gmtFlag'} ? 
                       $fileHashRef->{'gmtFlag'} : 
		       ($fileHashRef->{'gmtOffsetSecs'} ? 
		       (($fileHashRef->{'gmtOffsetSecs'} > 0)?
		            '+'.$fileHashRef->{'gmtOffsetSecs'} : 
   			    $fileHashRef->{'gmtOffsetSecs'}) : 
			0);
  my $markerInfo=$dirtyFlag.'|'.$markerFileStampName.'|'.$markerOffset.'|'.
    $prevRunOutFileName.'|'.$markerSignature.'|'.$lastRegExp.'|'.$fields.'|'.$gmtField;
  # open file handle, write info as atomical operation and close file 
  if (!open (MARKERFILE, ">$markerFile"))
  {
    $error="Error failed To Open Marker File $markerFile: $!\n" ;
    return ($STATUS_ERROR, $error);
  }
  if (!print MARKERFILE $markerInfo."\n")
  {
    close(MARKERFILE);
    $error="Error in writing to marker file";
    return ($STATUS_ERROR,$error);
  }
  close (MARKERFILE);
  return ($STATUS_SUCCESS,$error);
}

######################################################################### 
#
#    DoCommit :  Marker file updated with marker file flag set to dirty,
#                output files moved to destination and marker dirty flag
#                turned off. Marker file is updated as an atomic
#                operation, i.e. in one print statement as opposed to
#                multiple ones.
#    Context in:
#                   $MarkerValues.newMarkerSignature
#                   $MarkerValues.newFirstFileOffset
#                   $CommandLineInput.workingDirName
#                   $CommandLineInput.privateDirName
#    Context out: -
#
######################################################################### 

sub DoCommit
{
  my ($ctxRef) = @_;
  my $inputParametersHashRef=$ctxRef->{"COMMANDLINEINPUT"};
  my $markerHashRef=$ctxRef->{"MARKERVALUES"};
  my $fileHashRef=$ctxRef->{"FILEVALUES"};
  my $status=$STATUS_SUCCESS;
  my $error="";
  my $lastRegExp='';
  my $markerFile=$inputParametersHashRef->{"markerFileName"};

  $debug->PrintLog($DEBUG_HIGH,"CME in stage DoCommit");

  # if new marker info has filename or signature as blank, then all files
  # got rejected in pre-parsing and hence need to leave old marker intact
  if(($markerHashRef->{"newMarkerFileStampName"} eq '') || 
     ($markerHashRef->{"newMarkerSignature"} eq '') ||
     ($markerHashRef->{"newMarkerOffset"} eq '') ||
     ($markerHashRef->{"newMarkerPrevRunOutFileName"} eq ''))
  {
     $ctxRef->{"FILEVALUES"}->{"errorVal"}="Invalid marker data\n";
     # move output files to destination from working directory

     ($status,$error)=MoveOutputFilesToDestination($inputParametersHashRef,
       $markerHashRef->{"newMarkerPrevRunOutFileName"},
       $inputParametersHashRef->{"cookieOutFileName"});

     return $WARNING;
  }
  ($status,$error)=WriteMarkerInfo ($inputParametersHashRef, $fileHashRef,$FLAG_DIRTY,
                   $markerHashRef->{"newMarkerFileStampName"},
                   $markerHashRef->{"newMarkerOffset"},
                   $fileHashRef->{'REGEXP'}->{"regExp"},
                   $markerHashRef->{"newMarkerPrevRunOutFileName"},
                   $markerHashRef->{"newMarkerSignature"});
  if ($status==$STATUS_ERROR)
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
    return $CATASTROPHIC_FAILURE;
  }

  # move output files to destination from working directory
  ($status,$error)=MoveOutputFilesToDestination($inputParametersHashRef,
    $markerHashRef->{"newMarkerPrevRunOutFileName"},
    $inputParametersHashRef->{"cookieOutFileName"});

  if ($status==$STATUS_ERROR)
  {
     $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
     return $CATASTROPHIC_FAILURE;
  }

  # re write all marker info with dirty flag off at the begining of the file
  ($status,$error)=WriteMarkerInfo ($inputParametersHashRef, 
                   $fileHashRef,$FLAG_CLEAN,
                   $markerHashRef->{"newMarkerFileStampName"},
                   $markerHashRef->{"newMarkerOffset"}, 
                   $fileHashRef->{'REGEXP'}->{"regExp"},'-',
                   $markerHashRef->{"newMarkerSignature"});

  if ($status==$STATUS_ERROR)
  {
      $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
      return $CATASTROPHIC_FAILURE;
  } else {
    my $debugMsg="Last valid data processed for ".
      $markerHashRef->{"newMarkerSignature"};
    $debug->PrintLog($DEBUG_MEDIUM,$debugMsg);
  }
  return $SUCCESS;
}

###########################################################################
#
#   MoveOutputFilesToDestination - move output files from working directory
#                                  to destination directory
#
#   IN: CommandLineInput hash reference
#   OUT: status on success or failure
#
###########################################################################

sub MoveOutputFilesToDestination
{
  my ($inputParametersHashRef, $destFile, $destCookieFile)=@_;
  my $noOutputFiles=0;
  my $outputFile="";

  # read list of all files from disk which have same base name as provided in 
  # infile command line parameter value and add all rotated files to list
  my $srcFile=$inputParametersHashRef->{"outputFileName"};

  # check if first move failed (or we're in SQLDR format where there is only
  # one move per file)
  if(-e $srcFile)
  {
    unless(move($srcFile,$destFile))
    {
      return ($STATUS_ERROR, 
        "Failed to move output file to destination - 1.\n$!");
    }
    # only move cookie file if cookies are enabled and in SQLLDR format
    if (($inputParametersHashRef->{"cookieOutput"}) &&
      ($inputParametersHashRef->{"outFileType"} =~ m/SQLLDR/))
    {
      my $srcFile=$inputParametersHashRef->{"cookieOutputFileName"};
      my $destCookieFile = $inputParametersHashRef->{"cookieOutFileName"};
      unless(move($srcFile,$destCookieFile))
      {
        return($STATUS_ERROR, 
          "Failed to move cookie output file to destination directory $!");
      }
    }
  }

  # only if we're in XML format we need to do the 2nd mv step
  if($inputParametersHashRef->{"outFileType"} =~ m/XML/)
  {
    my $finalDestFile = $destFile . $XML_SUCCESS_EXT;
    # it is possible that the file got moved in the last run, but the CME
    # died after that (before updating the marker file)
    if(-e $destFile)
    {
      # use perl rename as this SHOULD ALWAYS be in the same file system
      unless(rename($destFile, $finalDestFile))
      {
        return ($STATUS_ERROR, 
          "Failed to rename output file extension to $XML_SUCCESS_EXT.\n$!");
      }
    }
    else
    {
      # log this just for the records (very rare to happen)
      $debug->PrintLog($DEBUG_HIGH,"XML file not found in recovery phase.");
    }
  }

  # if we're in the advanced filter mode we need to copy the hash file
  if($inputParametersHashRef->{'advDtFilter'} && 
        (ChronosHash::hashExists($inputParametersHashRef->{'tmpPageHashFileName'},
		   $inputParametersHashRef->{'dbmInfo'}) == 
	 ChronosHash::getNumHashFiles($inputParametersHashRef->{'dbmInfo'})))

  {
      unless(ChronosHash::moveHash($debug, $inputParametersHashRef->{'tmpPageHashFileName'},
		    $inputParametersHashRef->{'pageHashFileName'},
                    $inputParametersHashRef->{'dbmInfo'}))
    {
      return ($STATUS_ERROR, "Failed to move page hash file(s) from tmp to persistent.\n$!");
    }
  }

  return ($STATUS_SUCCESS,'');
}
########################################################################## 
#
#   BuildLogFileList : Builds a culled list of log files found on
#                 disk that will be processed in the current run using
#                 $markerSignature and $firstFileOffset
#   Context in:
#                    $CommandLineInput.inDirName
#                    $CommandLineInput.inFileName
#   Context out:
#                    @DiskInput.logFileList
#                    $DiskInput.noLogFiles
#                    $MarkerValues.firstFileOffset
#                    $FileValues.fileFormat
#                    $MarkerValues.markerSignature
#
######################################################################### 

sub BuildLogFileList
{
  my ($ctxRef) = @_;
  my %diskInput;
  my $inputParametersHashRef=$ctxRef->{"COMMANDLINEINPUT"};
  my $fileHashRef;
  my $markerHashRef;
  my $noLogFiles=0;
  my $status;
  my $error="";
  my @logFileNames;
  my $fileName="";
  my $dirtyFlag;
  my $lastRegExp='';
  my $lastFormatVals='';
  my $i=0;
  my $formatIndex='';
  my $formatval;
  my @formatVals;

  $fileHashRef=$ctxRef->{"FILEVALUES"};
  $debug->PrintLog($DEBUG_HIGH,"CME in stage BuildLogFileList");
  if (!opendir(LOGFILEDIR,$inputParametersHashRef->{"inDirName"}))
  { 
    $ctxRef->{"FILEVALUES"}->{"errorVal"}="Input Directory ".
           $inputParametersHashRef->{"inDirName"}."Does Not Exist: $!";
    return $CATASTROPHIC_FAILURE;
  }

  # read list of all files from disk which have same base name as provided in 
  # infile command line parameter value and add all rotated files to list
  while ($fileName=readdir(LOGFILEDIR))
  {
    if ($fileName =~ m/^$inputParametersHashRef->{"inFileName"}\..+/o) 
    {
      $logFileNames[$noLogFiles]=$inputParametersHashRef->{"inDirName"}."/$fileName";
      $noLogFiles++;
    }
  }
  closedir(LOGFILEDIR);
  my $inputFile="";
  # this will sort them by datetime stamp appended to log file name
  @logFileNames=sort @logFileNames;
  # add the actual infile name if it exists
  if (-e $inputParametersHashRef->{"inputFileName"}) 
  {
    $inputFile=$inputParametersHashRef->{"inputFileName"};
    $logFileNames[$noLogFiles]=$inputFile;
    $noLogFiles++;
  }

  $debug->PrintTrace("Number of log files found: $noLogFiles.");

  # read marker file values
  ($dirtyFlag, $ctxRef->{"MARKERVALUES"}->{"markerFileStampName"},
    $ctxRef->{"MARKERVALUES"}->{"markerFileOffset"},
    $ctxRef->{"MARKERVALUES"}->{"markerPrevRunOutFileName"}, 
    $ctxRef->{"MARKERVALUES"}->{"markerFileSignature"},
    $lastRegExp,$lastFormatVals,
    $ctxRef->{"MARKERVALUES"}->{"markerExisted"},$ctxRef->{'FILEVALUES'}->{'gmtFlag'}, $ctxRef->{'FILEVALUES'}->{'gmtOffsetSecs'}, 
    $status, $error) = 
    ReadMarkerFile($ctxRef->{"COMMANDLINEINPUT"}->{"markerFileName"});

  if ($status == $STATUS_ERROR) 
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
    return $CATASTROPHIC_FAILURE;
  }
  # if regexp found in marker
  if ($lastRegExp) 
  {
     $fileHashRef->{'REGEXP'}->{"regExp"}=$lastRegExp;
     if ($lastFormatVals)
     {
       @formatVals=split /:/,$lastFormatVals;
     } else {
       $ctxRef->{"FILEVALUES"}->{"errorVal"}=
                    "Invalid RegExp format fields in marker data";
       return $FAILURE;
     }
     #
     # reconstruct the format string used to extract values in appropriate hashes
     # see command line input routine to see how it maps to the regexp and format string
     # 
     # To avoid code duplication, this should ideally be done by a call to
     # ChangeRegExp, which should be modified so that it can be called
     # from here.
     $i=0;
     foreach $formatval (@formatVals)
     {
       my $postParam; 
       $fileHashRef->{'REGEXP'}->{'fields'}[$i] = $formatval;
       if(($postParam) = ($formatval =~ m/x-post-params\((.*)\)/o))
       {
	   push(@{$fileHashRef->{"REGEXP"}->{"postParams"}}, $postParam); 
       }
       $i++;
     }
     $fileHashRef->{'REGEXP'}->{"numFormatFields"} = $i;
        
  }
  # initialize updated marker values with read marker values in case no new 
  # valid lines are found in this run  
  $ctxRef->{"MARKERVALUES"}->{"newMarkerOffset"}=
                     $ctxRef->{"MARKERVALUES"}->{"markerFileOffset"};  
  $ctxRef->{"MARKERVALUES"}->{"newMarkerSignature"}=
                     $ctxRef->{"MARKERVALUES"}->{"markerFileSignature"};  
  $ctxRef->{"MARKERVALUES"}->{"newMarkerFileStampName"}=
                     $ctxRef->{"MARKERVALUES"}->{"markerFileStampName"};  
  $ctxRef->{"MARKERVALUES"}->{"newMarkerPrevRunOutFileName"}=
                     $inputParametersHashRef->{"outFileName"};
  # make culled log file list and no of log files part of DiskInput hash 
  # and add them to the main context hash

  $diskInput{"logFileList"}=\@logFileNames;  
  $diskInput{"noLogFiles"}=$noLogFiles;

  $ctxRef->{"DISKINPUT"}=\%diskInput;

  $markerHashRef=$ctxRef->{"MARKERVALUES"};

  # Now find out which of these files we will keep and which we will throw out
  # based on what is read in from marker file

  ($ctxRef->{"MARKERVALUES"}->{"markerFileOffset"},
        $ctxRef->{"DISKINPUT"}->{"noLogFiles"},
        $ctxRef->{"FILEVALUES"}->{"fileFormat"},
        $ctxRef->{"FILEVALUES"}->{"lastProcessedFile"},
        $status,$error)= FindStartPosition($markerHashRef,
        $inputParametersHashRef, $fileHashRef, $ctxRef->{"DISKINPUT"},
        $ctxRef->{"MARKERVALUES"}->{"markerExisted"});
  if ($status == $STATUS_FAIL) 
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
    return $FAILURE;
  }
  if ($status == $STATUS_ERROR) 
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
    return $CATASTROPHIC_FAILURE;
  }
  return $SUCCESS; 
}

######################################################################
#
# ReadMarkerFile : reads marker file and detemines offset and
#                  signature
# IN:
#     $markerFileName
# OUT:
#      $lastFileName
#      $lastFileOffset
#      $lastFileEntryDate
#      $markerExisted
#
######################################################################

sub ReadMarkerFile 
{
  # open marker file and read last run file and offset
  my ($markerFileName)=@_;
  my $lastFileName='';
  my $lastFileOffset=0;
  my $lastFileEntryDate='1980-01-01 12:00:00';
  my $prevRunOutFileName='-';
  my $originalMarkerFileEntry='';
  my $dirtyFlag = $FLAG_CLEAN;
  my $status=$STATUS_SUCCESS;
  my $err_msg='';
  my $markerExisted = 0;
  my $lastRegExp='';
  my $lastFormatVals='';
  my $gmtFlag=0;
  my $gmtOffsetSecs = 0;

  if (-e $markerFileName) 
  {
    $markerExisted = 1;
    if (!open (MARKERFILE, "<$markerFileName"))
    {
       $status = $STATUS_ERROR;
       $err_msg = "Cannot open marker file $markerFileName: $!";
       return ($dirtyFlag, $lastFileName, $lastFileOffset, $prevRunOutFileName,
         $lastFileEntryDate, $markerExisted, $gmtFlag, $status, $err_msg);
    }
    $originalMarkerFileEntry=<MARKERFILE>;
    close (MARKERFILE);
    if ($originalMarkerFileEntry) 
    {
      chomp($originalMarkerFileEntry);
      # parse the fields if possible (bug #2096896 applies)
      if(($dirtyFlag, $lastFileName, $lastFileOffset, $prevRunOutFileName,
        $lastFileEntryDate,$lastRegExp,$lastFormatVals,$gmtFlag) =
        $originalMarkerFileEntry =~ m/^(\d)\|(.+?)\|(\d+)\|(\S+)\|(\S+ \S+)\|(.+)\|(\S+)\|(\S+)$/o)
      {
	  if(($gmtOffsetSecs) =   ($gmtFlag =~ m/((\+|\-)[0-9]*)/o))
	  {
	      $gmtFlag = 0;
	  }
      } else {
        # regexp did not match for 9.0.4 format
        unless(($dirtyFlag,$lastFileName,$lastFileOffset,$prevRunOutFileName,$lastFileEntryDate) =
           $originalMarkerFileEntry =~ m/^(\d)\|(\S+)\|(\d+)\|(\S+)\|(\S+ \S+)$/o)
        {
          $status = $STATUS_ERROR;
          $err_msg = "Unrecognized format in marker file:\n"."$originalMarkerFileEntry"; 
        }
        return ($dirtyFlag, $lastFileName, $lastFileOffset, $prevRunOutFileName,
          $lastFileEntryDate, $lastRegExp,$lastFormatVals,$markerExisted, $gmtFlag, $gmtOffsetSecs, 
          $status, $err_msg);
      }
      #print "marker read,$lastFileOffset\n";
    } 
    else
    {
      # marker file was empty
      $status = $STATUS_ERROR;
      $err_msg = "Empty marker file.";
      return ($dirtyFlag, $lastFileName, $lastFileOffset, $prevRunOutFileName,
        $lastFileEntryDate, $lastRegExp,$lastFormatVals,$markerExisted, $gmtFlag, 
        $status, $err_msg);
    }
  }
 
  $debug->PrintTrace("MARKERINFO:$dirtyFlag, $lastFileName, $lastFileOffset, $prevRunOutFileName,
    $lastFileEntryDate, $lastRegExp,$lastFormatVals,$markerExisted, $status, $err_msg\n");
  return ($dirtyFlag, $lastFileName, $lastFileOffset, $prevRunOutFileName,
    $lastFileEntryDate, $lastRegExp,$lastFormatVals,$markerExisted, $gmtFlag,$gmtOffsetSecs, 
    $status, $err_msg);
}

###########################################################################
#
# FindStartPosition: Open Log file list and use marker file information to 
#                    find start position in log file 
#
# IN: context reference 
#     
# OUT: $markerOffset,$noLogFiles,$lastProcessedFile
#    
###########################################################################

sub FindStartPosition 
{
  my($markerHashRef,$inputParametersHashRef,$fileHashRef,$diskHashRef,
    $markerExisted)=@_;
  my $markerFileIndex=0;
  my $markerFileFound=0;
  my $markerOffset = $markerHashRef->{"markerFileOffset"};
  #
  # preset format to CALYPSO, this eleminates unnecessary pre-parsing
  #
  my $fileFormat = ($inputParametersHashRef->{'apachelogformat'} eq "combined") ? "APACHE" : "CALYPSO";
  my %lineHash;
  my $status=$STATUS_SUCCESS;
  my $error="";
  my $lineHashRef=\%lineHash;
  my $lastProcessedFile="";
  my $logFileEntry='';
  my $validFormat=0;
  my $procOldFiles = $inputParametersHashRef->{"procOldFiles"};
  my $inputFileName = $inputParametersHashRef->{"inputFileName"};
  my $fileIndex=0;
  my $logFile='';
  my $regexp='';

  # open the first active log file
  my $noLogFiles=$diskHashRef->{"noLogFiles"};
  $markerFileIndex=$noLogFiles-1;
  my $logFileNamesRef=$diskHashRef->{"logFileList"};

  if ($noLogFiles == 0)
  {
    return ($markerOffset,$noLogFiles,$fileFormat,$lastProcessedFile,$status,$error);
  }
  # check to see if maker file exits before you compare marker file info with
  # log file dates
  if($markerExisted)
  {
    while ($markerFileIndex >=0 && !$markerFileFound) 
    {
      my $inputFile=$$logFileNamesRef[$markerFileIndex];
      ($markerOffset,$status,$error)=
        CompareOffsetDates($inputFile,$markerHashRef,$fileHashRef,
                   $inputParametersHashRef,$fileFormat,$markerOffset);
      if ($status==$STATUS_FAIL)
      {
        $debug->PrintTrace($error);
        ($status,$error)=CompareBeginDates($inputFile,$fileHashRef,
          $markerHashRef, $inputParametersHashRef,$fileFormat);
        # neither offset nor begin log file dates matched marker dates
        # splice log file from list
        if ($status==$STATUS_ERROR)
        {
          return ($markerOffset,$noLogFiles,$fileFormat,$lastProcessedFile,$status,$error);
        }
        if ($status==$STATUS_FAIL && ($inputFile ne $inputParametersHashRef->{inputFileName}))
        {
          $debug->PrintTrace($error);
          $debug->PrintTrace("About to splice File: $inputFile");
          $lastProcessedFile=$$logFileNamesRef[$markerFileIndex];
          splice(@{$logFileNamesRef},$markerFileIndex,1);
          $noLogFiles--;
          $markerFileIndex--;
        } else {
          if ($$logFileNamesRef[$markerFileIndex] ne $inputParametersHashRef->{'inputFileName'}) 
          {
            $lastProcessedFile=$$logFileNamesRef[$markerFileIndex];
          }
          #print "LAST:$lastProcessedFile,$markerFileIndex\n";
          $markerFileIndex--;
        }
      } elsif ($status==$STATUS_ERROR) {
          return ($markerOffset,$noLogFiles,$fileFormat,$lastProcessedFile,$status,$error);
      } else {
        $markerFileFound=1;
        $markerFileIndex--;
      }  
    }
    if (!$markerFileFound && ($markerHashRef->{"markerFileSignature"} ne '1980-01-01 12:00:00')) 
    {
      $markerOffset=0;
    }
    #print "MARKER:$markerFileIndex,$noLogFiles\n";
    if ($markerFileIndex >= 0 )
    {
      for (my $i=$markerFileIndex;$i>=0;$i--)
      {
        #print "splicing file$$logFileNamesRef[$i]\n";
        splice(@{$logFileNamesRef}, $i,1);
        $noLogFiles--;
      }
    }
  } else {
    $debug->PrintTrace("Marker does not exist, Looking for field format line");
    if(!$procOldFiles) # ignore all files except "access_log"
    {
      $markerOffset=0;

      # PreProcess also might have changed $lastProcessedFile.
      # We should actually revisit the issue of PreProcess changing the
      # lastProcessedFile (we shouldn't have it maybe)

      $lastProcessedFile=""; # This is because now pre-process also 

      # check that there is an active log file in the list of files
      if($logFileNamesRef->[$noLogFiles - 1] ne $inputFileName)
      {
        splice(@{$logFileNamesRef}, 0);
        $noLogFiles = 0;
        $debug->PrintLog($DEBUG_HIGH,
          "There is no current log file in the target directory, no ".
          "files will be processed (refer to the -procoldfiles option). \n");
      }
      else  # there is a current access_log file to process
      {
        splice(@{$logFileNamesRef}, 0, $noLogFiles - 1);
        $noLogFiles = 1;
      }
    }
    #
    # no marker so look for first #Fields line in log
    # continue down log file list until #fields found
    # keep splicing files in list till we find it
    #
    if(!$inputParametersHashRef->{'apachelogformat'}) 
    {
	while (($fileIndex < $noLogFiles) && !$validFormat) 
	{
	    $debug->PrintTrace("No Marker Found");
	    $logFile=$$logFileNamesRef[$fileIndex];
	    if (!open (INFILE, "<$logFile")) 
	    { 
		$error="Cannot open log file $logFile";
		return ($STATUS_ERROR,$error);
	    }
	    while ( <INFILE> ) 
	    {
		chomp;
		my $orgLogFileEntry=$_;
		$logFileEntry=\$orgLogFileEntry;
		$$logFileEntry  =~ s/\s+/ /go;
		
		if ($$logFileEntry =~ m/^$fileHashRef->{'REGEXP'}->{fieldLabel}.*$/o)
		{
		    ($regexp,$status)=ChangeRegExp($logFileEntry,$fileHashRef);
		    #
		    # if found complete set of fields stop and exit loop else continue till eof
		    #
		    if ($status == $FAILURE) 
		    {
			$validFormat=0;
		    } else {
			$validFormat=1;
			$fileHashRef->{'REGEXP'}->{'regExp'} = $regexp;
			$debug->PrintTrace("NEW REG: $regexp");
			last;
		    }
		}
	    }
	    close(INFILE);
	    if (!$validFormat)
	    {
		$debug->PrintTrace("splicing file $logFile");
		splice(@{$logFileNamesRef}, $fileIndex,1);
		$noLogFiles--;
	    }
	    #$fileIndex++;
	} 
    }
  }
  undef %lineHash;
  if ($noLogFiles > 0) 
  {
    $status=$STATUS_SUCCESS;
  }
  return ($markerOffset,$noLogFiles,$fileFormat,$lastProcessedFile,$status,$error);
}

###################################################################################
# CompareBeginDates : compare log files first valid line date with marker
#                     date
# IN:
#    $logFile,$fileHashRef,$markerHashRef,$inputParametersHashRef
# OUT:
#    status for success or failure in case of match or 
#    precedence in log file date
###################################################################################

sub CompareBeginDates 
{
  my ($logFile, $fileHashRef, $markerHashRef, $inputParametersHashRef, $format)
    = @_;
  my $markerDate;
  my $fileDate;
  my $logFileEntry;
  my %lineHash;
  my $error="";
  my $status=$STATUS_SUCCESS;
  my $lineHashRef=\%lineHash;
  my $logEntryNum=0;
  my $validLineFound=0;
  my $endOfFile=0;
  my $headerLine=1;

  # compare dates found in marker file with date in the begining of log file
  if (!open (INFILE, "<$logFile")) 
  { 
      $error="Cannot open log file $logFile";
      return ($STATUS_ERROR,$error);
  }

  $fileHashRef->{INFILE}=\*INFILE;
  $debug->PrintTrace("Checking Begin Dates for file: $logFile");

  InitializeLineValues($lineHashRef, $inputParametersHashRef->{"omsversion"});
  #
  # open the log file, read first line and see if the format is clf or is
  # another issue of whether in calypso format or not which is a subset of 
  # clfFormat
  #
  while (!$endOfFile && !$validLineFound && $headerLine) 
  {
      my $originalLogFileEntry=<INFILE>;
      $logEntryNum++;
      if ( $originalLogFileEntry ) 
      {
        chomp($originalLogFileEntry);
        $logFileEntry = \$originalLogFileEntry;
        $$logFileEntry  =~ s/\s+/ /go;
        if ($$logFileEntry =~ m/^$fileHashRef->{'REGEXP'}->{'headerLabel'}\S+/go)
        {
           $headerLine=1;
        } else {
           $headerLine=0;
        }
        if (($format eq "CALYPSO") || ($format eq "APACHE"))
        {
          if (ApplyCalypsoRegExp($logFileEntry,$lineHashRef,$fileHashRef,$inputParametersHashRef) == 
                     $STATUS_SUCCESS) 
          {
            $validLineFound=1;
          } else {
            $debug->PrintTrace("Rejecting Line in initial begin date " .
              "comparison in file $logFile: $logEntryNum");
            next;
          }
        } elsif ($format eq "CLF") {
          if (ApplyCLFRegExp($logFileEntry,$lineHashRef) == $STATUS_SUCCESS) 
          {
            $validLineFound=1;
          } else {
            $debug->PrintTrace("Rejecting Line in initial begin date ".
              " comparison in file $logFile: $logEntryNum");
            next;
          }
        }
       # neither CLF nor CALYPSO formats
       } else {
          $endOfFile=1;
      }
    }
    close (INFILE);
    if ($validLineFound)
    {
       ($markerDate,$fileDate)=ConvertMarkerLineDates($lineHashRef,
                             $markerHashRef,$format);
    } else {
       $error="No valid lines found in file $logFile";
       return ($STATUS_FAIL,$error);
    } 
    # if marker file date before first date of log file, start from begining 
    # of log file
    if ($markerDate > $fileDate)
    {
      $error="Begin dates do not match in $logFile";
      return ($STATUS_FAIL,$error);
    }
  undef %lineHash;
  return ($STATUS_SUCCESS,$error);
}

#############################################################################
#
# CompareOffsetDates : compare log file date at marker 
#                      offset with marker date
#
# IN:
#    $logFile,$lineHashRef,$inputParametersHashRef,
#    $markerHashRef, $fileHashRef
# OUT:
#     status of success or failure on match
#############################################################################

sub CompareOffsetDates 
{
  my ($logFile, $markerHashRef, $fileHashRef, $inputParametersHashRef,
      $format, $markerOffset) = @_;

  my $markerDate;
  my $fileDate;
  my %lineHash;
  my $status=$STATUS_SUCCESS;
  my $error="";
  my $llen=0;
  my $lineHashRef=\%lineHash;
  my $logFileEntry;
  my $originalLogFileEntry;

  # routine to initialize values after parsing and extracting data for each line
  InitializeLineValues($lineHashRef, $inputParametersHashRef->{"omsversion"});

  # compare dates found in marker file with date in the begining of log file
  # if marker file date before first date of log file, start from begining of 
  # log file
  if (!open (INFILE, "<$logFile")) 
  {
     $error="Cannot open $logFile in Compare Offset Dates"; 
     return ($markerOffset,$STATUS_ERROR,$error);
  } 
  $fileHashRef->{INFILE}=\*INFILE;

  # this routine gives size and other information about a file on handle
  my @fileattrs=stat(INFILE);

  # check the length attribute of file (7) against marker offset upfront
  if ($fileattrs[7] < $markerOffset ) 
  {
      $debug->PrintTrace("Exiting from end date checking in file: $logFile");
      close (INFILE);
      $error="Log File $logFile smaller than offset";
      return ($markerOffset,$STATUS_FAIL,$error); 
  }

  # go to byte offset in log file and apply regexp at the line
  seek(INFILE,$markerOffset,0);
  $debug->PrintTrace ("Checking End Dates in file: $logFile");
  $originalLogFileEntry=<INFILE>;
  if ( $originalLogFileEntry )
  {
    $llen=length($originalLogFileEntry);
  }

  # this if to get rid of eoln chars
  chomp($originalLogFileEntry);

  # put reference ot log file entry into logFileEntry
  $logFileEntry = \$originalLogFileEntry;

  # clean up all multiple white spaces in line and replace it 
  # with single blank space
  $$logFileEntry  =~ s/\s+/ /go;

  # check for format
  if (($format eq "CALYPSO") || ($format eq "APACHE"))
  {
    if (ApplyCalypsoRegExp($logFileEntry,$lineHashRef,$fileHashRef,$inputParametersHashRef) == $STATUS_SUCCESS) 
    { 
      ($markerDate, $fileDate) = ConvertMarkerLineDates($lineHashRef,
        $markerHashRef, $format);
    } else {
      $debug->PrintTrace("Exiting with failure from end dates comparison in: $logFile");
      $error="Failed in offset date checking in $logFile";
      close(INFILE);
      return ($markerOffset,$STATUS_FAIL,$error);
    }
  } elsif ($format eq "CLF") {
    if (ApplyCLFRegExp($logFileEntry,$lineHashRef) == $STATUS_SUCCESS) 
    { 
      ($markerDate,$fileDate)=
        ConvertMarkerLineDates($lineHashRef, $markerHashRef, $format);
    } else {
      $debug->PrintTrace(
        "Exiting with failure from end dates comparison in: $logFile");
      $error="Failed in offset date checking in $logFile";
      close(INFILE);
      return ($markerOffset,$STATUS_FAIL,$error);
    }
  }

  close(INFILE);
  if ($markerDate != $fileDate)
  {
      $error="Offset dates do not match in $logFile";
      return ($markerOffset,$STATUS_FAIL,$error);
  }

  # this is to unlink and sync file hash
  undef %lineHash;
  $debug->PrintTrace("Matched end dates in file $logFile"); 

  # forward offset by the length of the last line if dates match to update
  # main contexts marker offset
  if ( $originalLogFileEntry )
  {
    $markerOffset+=$llen;
  }
  return ($markerOffset,$STATUS_SUCCESS,$error);
}

############################################################################
#
# ConvertMarkerLineDates : Convert to integers two date fields, one from 
#                          log file and
#                          one from marker file. Calls timelocal functions
#                          which returns integers which can be compared
#                          for <,> etc.     
# IN:
#    $lineHashRef,$markerHashRef
# OUT:
#     $markerDate,$fileDate
#############################################################################
sub ConvertMarkerLineDates 
{
  my ($lineHashRef, $markerHashRef, $format)=@_;
  my ($Year, $Month, $Date, $Hour, $Mins, $Secs);
  my $fileDate=0;
  my $markerDate=0;
  my %DateHash=("Jan" => "00","Feb" => "01","Mar" => "02",
    "Apr" => "03","May" => "04",
    "Jun" => "05","Jul" => "06","Aug" => "07","Sep" => "08",
    "Oct" => "09","Nov" => "10","Dec" => "11" );

  # extract individual pieces that make up date field
  # regexps specific to CALYPSO format

  if ( $format eq "CALYPSO" )
  {
    ($Year,$Month,$Date) = $lineHashRef->{"date"} =~ 
      m/^(\d+)-(\d+)-(\d+)/o;
    ($Hour,$Mins,$Secs) = $lineHashRef->{"time"} =~ m/^(\d+):(\d+):(\d+)/o;
    # adjust month to be in 0..11 range for timelocal
    $Month=$Month-1;
    $fileDate=timelocal($Secs,$Mins,$Hour,$Date,$Month,$Year);
    ($Year,$Month,$Date,$Hour,$Mins,$Secs) =
      $markerHashRef->{"markerFileSignature"} =~
      m/^(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/o;
    # adjust month to be in 0..11 range for timelocal
    $Month=$Month-1;
    $markerDate=timelocal($Secs,$Mins,$Hour,$Date,$Month,$Year);
  } 
  elsif (( $format eq "CLF" ) || ($format eq "APACHE"))
  {
    ($Date,$Month,$Year,$Hour,$Mins,$Secs)= $lineHashRef->{"timeDate"} =~ 
      m/^(\d+)\/(\S+)\/(\d+):(\d+):(\d+):(\d+).+/o;
    $Month=$DateHash{"$Month"};
    $fileDate=timelocal($Secs,$Mins,$Hour,$Date,$Month,$Year);
    if($format eq "APACHE")
    {
	($Year,$Month,$Date,$Hour,$Mins,$Secs) =
	    $markerHashRef->{"markerFileSignature"} =~
	    m/^(\d+)-(\S+)-(\d+) (\d+):(\d+):(\d+)/o;
    } else ## CLF
    {
	($Date,$Month,$Year,$Hour,$Mins,$Secs) =
	    $markerHashRef->{"markerFileSignature"} =~
	    m/^(\d+)-(\S+)-(\d+) (\d+):(\d+):(\d+)/o;
    }
    $Month=$DateHash{"$Month"};
    $markerDate=timelocal($Secs,$Mins,$Hour,$Date,$Month,$Year);
  }
  # time function that returns an integer for date fields passed to it
  return ($markerDate, $fileDate);
}

#############################################################################
# ApplyCLFRegExp - Apply CLF regular expressions to line
#
# IN:
#    $logFileEntry,$lineHashRef
#
# OUT:
#
#    staus on failure or success
#    $lineHashRef is update with extracted values from regexp
#############################################################################
sub ApplyCLFRegExp 
{ 

  my($logFileEntry,$lineHashRef)=@_;

  if ( ($lineHashRef->{"c-ip"},$lineHashRef->{"c-dns"},
    $lineHashRef->{"c-auth-id"},$lineHashRef->{"timeDate"},
    $lineHashRef->{"cs-uri"},$lineHashRef->{"sc-status"},
    $lineHashRef->{"bytes"},$lineHashRef->{"cs(Cookie)"},
    $lineHashRef->{"cs(Referer)"},$lineHashRef->{"time-taken"}) = $$logFileEntry =~ 
    m/^(\S+) (\S+) (.+) \[(.+)\] \"(.+?)\" (\S+) (\S+) \"(.+?)\" \"(.+?)\" (\d+)$/o ) 
  {
    return $STATUS_SUCCESS;
  }
  else
  {
    return $STATUS_FAIL;
  }
}

############################################################################
# ApplyCalypsoRegExp - Apply Calypso format regular expressions to line
#
# IN:
#    $logFileEntry,$lineHashRef
# OUT:
#    staus on failure or success
#    $lineHashRef is update with extracted values from regexp
############################################################################

sub ApplyCalypsoRegExp 
{

  my($logFileEntry,$lineHashRef,$fileHashRef,$inputParametersHashRef)=@_;
  my $regExp='';
  my $status='';
  my $i=0;
  my $entry='';
  my @entryFields;
  my $index='';

  # time conversion vars
  my $hour=0;
  my $mins=0;
  my $secs=0;
  my $mon=0;
  my $day=0;
  my $year=0;
  my $time_end='';
  my $gmtFlag=0;
  my $validFormat=1;
 
  if ($$logFileEntry =~ m/^$fileHashRef->{'REGEXP'}->{fieldLabel}.*$/o)
  {
    ($regExp,$status)=ChangeRegExp($logFileEntry,$fileHashRef);
    if ($status == $FAILURE)
    {
      $validFormat=0;
      #
      # incomplete format so keep looking down file till we hit next #Fields or eof
      #
      my $inFileHandle=$fileHashRef->{INFILE};
      while( <$inFileHandle> )
      {
        chomp;
        $logFileEntry=\$_; 
        $$logFileEntry  =~ s/\s+/ /go;
        if ($$logFileEntry =~ m/^$fileHashRef->{'REGEXP'}->{fieldLabel}.*$/o) 
        {
          ($regExp,$status)=ChangeRegExp($logFileEntry,$fileHashRef);
          if ($status==$FAILURE) 
          {
             $validFormat=0;
          } else {
             $validFormat=1;
             last;
          }
        }
      }
      if (!$validFormat)
      {
        return $STATUS_ERROR;
      }
    }
    $fileHashRef->{'REGEXP'}->{"regExp"}=$regExp;
  } else {
    $regExp=$fileHashRef->{'REGEXP'}->{"regExp"};
  }
  #
  # calculate gmt offset if timestyle found to be gmt
  #

  if (my($gmtOffsetSign, $gmtOffsetHours, $gmtOffsetMins) = $$logFileEntry =~ m/$fileHashRef->{'REGEXP'}->{"gmtLabel"}/o)
  { 
     if (($gmtOffsetSign eq '+') && ($gmtOffsetMins == 0) && ($gmtOffsetHours == 0))
     {
       $fileHashRef->{'gmtFlag'}=1;
     } else {
        $fileHashRef->{'gmtFlag'}=0;
	$fileHashRef->{'gmtOffsetSecs'} = ($gmtOffsetHours * 3600) + ($gmtOffsetMins * 60);
	$fileHashRef->{'gmtOffsetSecs'} = 0 - $fileHashRef->{'gmtOffsetSecs'} if $gmtOffsetSign eq '-';
     }
  } 
  #
  # calculate new regexp
  #
  if ($regExp eq '') 
  {
    return $STATUS_FAIL;
  }

  if ( @entryFields = $$logFileEntry =~ m/$regExp/ ) 
  { 
    $i=0;
    foreach $entry (@entryFields)
    {
      $lineHashRef->{$fileHashRef->{'REGEXP'}->{'fields'}[$i]} = $entry;
      $i++;
    }
    #
    # cleanup some entries that were quoted and which are - when no value for then
    #
    if ( $lineHashRef->{"cs(Cookie)"} ne "-")
    {
      ($lineHashRef->{"cs(Cookie)"}) = $lineHashRef->{"cs(Cookie)"} =~ m/^\"(.+)\"$/o;
    }
    if ( $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_ST)"} ne "-")
    {
      ($lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_ST)"}) = 
         $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_ST)"} =~ m/^\"(.+)\"$/o;
    }
    if ( $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_GL)"} ne "-")
    {
      ($lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_GL)"}) = 
         $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_GL)"} =~ m/^\"(.+)\"$/o;
    }
    if ( $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_LT)"} ne "-")
    {
      ($lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_LT)"}) = 
         $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_LT)"} =~ m/^\"(.+)\"$/o;
    }
    if ( $lineHashRef->{"cs(Referer)"} ne "-")
    {
      ($lineHashRef->{"cs(Referer)"}) = $lineHashRef->{"cs(Referer)"} =~ m/^\"(.+)\"$/o;
    }
    if ( $lineHashRef->{"cs(User-Agent)"} ne "-")
    {
      ($lineHashRef->{"cs(User-Agent)"}) = 
         $lineHashRef->{"cs(User-Agent)"} =~ m/^\"(.+)\"$/o;
    }
    #
    #
    # x-date-start is same format as date need for output and comparions with marker
    # x-time-start on the other hand has a micro sec no appended to it that
    # needs to be normalized to a big microsec no using timelocal routine
    # which will be used to hash pages win entries and find latency in case
    # of entry page when ST and LT are missing
    #
    if (($lineHashRef->{'x-time-start'} ne '')  && ($lineHashRef->{'x-date-start'} ne ''))
    {
      $lineHashRef->{'date'}= $lineHashRef->{'x-date-start'};
      ($lineHashRef->{'time'},$lineHashRef->{'x-time-start'}) = 
              $lineHashRef->{'x-time-start'} =~ m/(\d+:\d+:\d+)\.(\d+)/o;
  
      ($hour,$mins,$secs) = $lineHashRef->{'time'} =~ m/(\d+):(\d+):(\d+)/o;
      ($year,$mon,$day) = $lineHashRef->{'date'} =~ m/(\d+)-(\d+)-(\d+)/o;
      if (!defined($hour) || !defined($year))
      {
          return $STATUS_FAIL;
      }
      $mon = $mon - 1;
      my $epochSecs = ($inputParametersHashRef->{'usegmtoffset'} || $fileHashRef->{'gmtFlag'}) ? 
         timegm($secs,$mins,$hour,$day,$mon,$year) - $fileHashRef->{'gmtOffsetSecs'} : timelocal($secs,$mins,$hour,$day,$mon,$year);
      $lineHashRef->{'x-time-start'} = ($lineHashRef->{'x-time-start'}) +
                         $epochSecs*1000000;
    }

    #
    # normalize x-time-end and x-date-end same way for server side number
    #
    if (($lineHashRef->{'x-time-end'} ne '')  && ($lineHashRef->{'x-date-end'} ne '') 
           && !$lineHashRef->{'time-taken'})
    {
      ($time_end,$lineHashRef->{'x-time-end'}) = 
              $lineHashRef->{'x-time-end'} =~ m/(\d+:\d+:\d+)\.(\d+)/o;
  
      ($hour,$mins,$secs) = $time_end =~ m/(\d+):(\d+):(\d+)/o;
      ($year,$mon,$day) = $lineHashRef->{'x-date-end'} =~ m/(\d+)-(\d+)-(\d+)/o;
      if (!defined($hour) || !defined($year))
      {
          return $STATUS_FAIL;
      }
      $mon = $mon - 1;
      my $epochSecs = ($inputParametersHashRef->{'usegmtoffset'} || $fileHashRef->{'gmtFlag'}) ? timegm($secs,$mins,$hour,$day,$mon,$year) - $fileHashRef->{'gmtOffsetSecs'} : timelocal($secs,$mins,$hour,$day,$mon,$year);
      $lineHashRef->{'x-time-end'} = ($lineHashRef->{'x-time-end'}) +
                                $epochSecs*1000000;
    } else {
      if ($lineHashRef->{'x-time-start'} ne '') {
	#check to see if t'time-taken' is defined
	#if it isn't, then us-time-taken should be defined
	if($lineHashRef->{'time-taken'} ne '') {
	  $lineHashRef->{'x-time-end'} = $lineHashRef->{'x-time-start'} +
	    $lineHashRef->{'time-taken'}*1000000;
	}
	else {
	  $lineHashRef->{'x-time-end'} = $lineHashRef->{'x-time-start'} +
	    $lineHashRef->{'us-time-taken'};
	}
      }
    }

    # if 'x-time-start' not defined, use 'time' for storing into page hash 
    # for correct server timestamps, but make interval 0 because no server calculations
    # are reported when webcache's resolution is low (accurate to seconds only)
    if (!$lineHashRef->{'x-time-start'}  && !$lineHashRef->{'x-date-start'} && 
        ($lineHashRef->{'time'} ne '')  && ($lineHashRef->{'date'} ne ''))
    {
      $lineHashRef->{'x-date-start'} = $lineHashRef->{'date'};
      $lineHashRef->{'x-time-start'} = $lineHashRef->{'time'};
      ($hour,$mins,$secs) = $lineHashRef->{'time'} =~ m/(\d+):(\d+):(\d+)/o;
      ($year,$mon,$day) = $lineHashRef->{'date'} =~ m/(\d+)-(\d+)-(\d+)/o;
      if (!defined($hour) || !defined($year))
      {
          return $STATUS_FAIL;
      }
      $mon = $mon - 1;
      my $epochSecs = ($inputParametersHashRef->{'usegmtoffset'} || $fileHashRef->{'gmtFlag'}) ? 
         timegm($secs,$mins,$hour,$day,$mon,$year) - $fileHashRef->{'gmtOffsetSecs'} : timelocal($secs,$mins,$hour,$day,$mon,$year);
      $lineHashRef->{'x-time-start'} = $epochSecs*1000000;
      $lineHashRef->{'x-time-end'} = $lineHashRef->{'x-time-start'};      
    }

    # for run metric
    if ($fileHashRef->{'startLogTime'} eq "")
    {
      $fileHashRef->{'startLogTime'} = $lineHashRef->{"time"}." ".$lineHashRef->{"date"};
    }
    $fileHashRef->{"endLogTime"}= $lineHashRef->{"time"}." ".$lineHashRef->{"date"};

    #DumpCtx($lineHashRef);
    return $STATUS_SUCCESS;
  }
  else
  {
    return $STATUS_FAIL;
  }
}

############################################################################
# ChangeRegExp - Extract line format regular expressions from header line
#
# IN:
#    $logFileEntry
# OUT:
#    $regExp
############################################################################
sub ChangeRegExp 
{
  my ($logFileEntry,$fileHashRef) = @_;
  my $regExpStr = '^';
  my $fieldHashRef = $fileHashRef->{"REGEXP"}->{"fieldHash"};
  my $index="";
  my $formatField = "";
  my @FormatFields = split / /,$$logFileEntry ;
  my $i = 0;
  my %NewFieldHash;
  #
  # go thorough all format fields and create a regexp out of it dynamically
  # also put the field names in file hash and read their corresponding format
  # strings in file hash 
  #
  #print "Changing Regexp\n";
  foreach $formatField (@FormatFields)
  {
      my $knownfield = 0;
      my $postParam = "";
      if(($postParam) = ($formatField =~ m/x-post-params\((.*)\)/o))
      {
	  $regExpStr = $regExpStr.' ' if ($regExpStr ne '^');
          push(@{$fileHashRef->{"REGEXP"}->{"postParams"}}, $postParam); 
          $regExpStr = $regExpStr.$fieldHashRef->{"x-post-params"};
	  $knownfield = 1;
      } elsif (($formatField ne $fileHashRef->{'REGEXP'}->{'fieldLabel'}) &&
             (exists $fieldHashRef->{$formatField}) )
      {
	  $regExpStr = $regExpStr.' ' if ($regExpStr ne '^');
          $regExpStr = $regExpStr.$fieldHashRef->{$formatField}; 
	  $knownfield = 1;
      }      
      elsif (($formatField ne $fileHashRef->{'REGEXP'}->{fieldLabel}) && 
                  !(exists $fieldHashRef->{$formatField})) 
      {
	  $regExpStr = $regExpStr.' ((?:(\S+)|(\".+?\")))'; 
      }
      if ($knownfield)
      {
	  # append char 'f' to index value to create placeholder for field name
	  # in file hash
	  $fileHashRef->{'REGEXP'}->{'fields'}[$i] = $formatField;
	  $NewFieldHash{$formatField} = $fieldHashRef->{$formatField}; 
	  $i++;
      }
  }
  
  # update num fields var
  $fileHashRef->{'REGEXP'}->{numFormatFields}=$i;
  # regular expression terminated with end of line char
  $regExpStr=$regExpStr.'$';     

  # check to see if minimum number of fields requierd to produce any
  # meaningful output are in format else exit with error
  if (!(exists ($NewFieldHash{'c-ip'}) && 
       (exists($NewFieldHash{'date'}) || exists($NewFieldHash{'x-date-start'})) && 
       (exists($NewFieldHash{'time'}) || exists($NewFieldHash{'x-time-start'})) && 
       (exists($NewFieldHash{'cs(Cookie)'}) || 
                   (exists($NewFieldHash{'x-cookie(ORACLE_SMP_CHRONOS_ST)'}) && 
                    exists($NewFieldHash{'x-cookie(ORACLE_SMP_CHRONOS_LT)'}))) && 
        exists($NewFieldHash{'cs(Referer)'}) && 
        exists($NewFieldHash{'cs-uri'}) && 
        exists($NewFieldHash{'cs(User-Agent)'})))
  {
      #print "Bad regexp\n";
      return ($regExpStr,$FAILURE);
  }
  return ($regExpStr,$SUCCESS); 
}
###################################################################### 
#    ProcessLogFiles : Process all log files in file list. Calls
#                 BuildLineStages. At the end, output file and error 
#                 file are created and LineValues context passed to 
#                 BuildLineStages is deleted
#    Context in:
#                    @DiskInput.logFileList
#                    $DiskInput.noLogFiles
#                    $MarkerValues.firstFileOffset
#                    $CommandLineInput.errFileName
#                    $CommandLineInput.workingDirName
#                    $CommandLineInput.resolve
#                    $CommandLineInput.apacheSessionId
#                    $CommandLineInput.jservSessionId
#                    $CommandLineInput.gifFileName
#                    $FileValues.fileFormat
#
#    Context out:    $MarkerValues.newFirstFileOffset
#                    $MarkerValues.newMarkerSignature
###################################################################### 
sub ProcessLogFiles
{
  my ($ctxRef) = @_;
  my $inputFile="";
  my $error="";
  my $status=$STATUS_SUCCESS;
  my $returnVal=0;
  my %lineHash;
  my %debugHash;
  my $debugHashRef=\%debugHash;
  my $lineHashRef=\%lineHash;
  my %hostList;
  my $logFileIndex=0;
  my $logFile="";
  my %Latency;
  my $fileHashRef=$ctxRef->{"FILEVALUES"};
  my $markerHashRef=$ctxRef->{"MARKERVALUES"};
  my $inputParametersHashRef=$ctxRef->{"COMMANDLINEINPUT"};
  my $prevOffset = 0;

  my @fileStat;
  my $initFileSize=0;
  my $activeFile=0;
  my $discreetOffset=0;
  my $seekRes=0;
  my $logIndex=0;
  my $newLogFile="";

  @{$ctxRef->{"DISKINPUT"}->{"completedLogs"}} = ();

  $debug->PrintLog($DEBUG_HIGH,"CME in stage ProcessLogFiles");
  # return if no log files to process
  if ($ctxRef->{"DISKINPUT"}->{"noLogFiles"} == 0)
  {
     return $SUCCESS;
  }

  # build Line pipeline stage into $lineStages
  my $lineStages=BuildLinePipeLine($inputParametersHashRef);

  # initialize some debug info which will be used to print sqlldr
  #  header info in output files 


  $debugHashRef->{"noOutputLines"}=0;
  $debugHashRef->{"noCookieOutputLines"}=0;
  $debugHashRef->{"noFilteredOutLines"}=0;

  # initialize main context hashes
  $ctxRef->{"LINEVALUES"}=\%lineHash;
  $ctxRef->{"DEBUGVALUES"}=$debugHashRef;
  $debugHashRef->{"logEntryCount"}=0;

  my $logLineLimit = 0;
  if (exists $inputParametersHashRef->{"maxloglines"})
  { 
      $logLineLimit = $inputParametersHashRef->{"maxloglines"};
      $debug->PrintTrace("Maximium number of log lines to be processed: $logLineLimit");
  } else
  {
      $debug->PrintTrace("No maximum number of log lines.  All available log lines beyond marker timestamp will be processed.");
  }

  my $numLogLinesProcessed = 0;
  my $logLineLimitReached = 0;

  # open dbm hostname and latency hashes
  if (!dbmopen (%hostList,$inputParametersHashRef->{"privateDirName"}.
    $pathSeperator.$inputParametersHashRef->{"hostListHashName"},0666))
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"} = "Cannot open Host Name Hash in ".
                $inputParametersHashRef->{"privateDirName"}.": $!\n";
    return $CATASTROPHIC_FAILURE;
  }
  if(!dbmopen (%Latency,$inputParametersHashRef->{"workingDirName"}.
    $pathSeperator.$inputParametersHashRef->{"latencyHashName"},0666))
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"} = "Cannot open Latency Time Hash in".
      $inputParametersHashRef->{"workingDirName"}." $!\n";
    dbmclose(%hostList);
    return $CATASTROPHIC_FAILURE;
  }
  # assign hashes to main context
  $ctxRef->{"FILEVALUES"}->{"Latency"}=\%Latency;
  $ctxRef->{"FILEVALUES"}->{"hostList"}=\%hostList;

  # initialize run metric values
  $fileHashRef->{"totalLogEntries"}=0;
  $fileHashRef->{"noOutputLines"}=0;
  $fileHashRef->{"hashedPages"}=0;
  $fileHashRef->{"hashEntries"}=0;
  $fileHashRef->{"startLogTime"}="";
  $fileHashRef->{"endLogTime"}="";

  # process all files in logFileList
  #  foreach $logFile (@{$ctxRef->{"DISKINPUT"}->{"logFileList"}}) 
  for ($logIndex=0;$logIndex < $ctxRef->{"DISKINPUT"}->{"noLogFiles"};$logIndex++)
  {
    $logFile = ${$ctxRef->{"DISKINPUT"}->{"logFileList"}}[$logIndex]; 
    if ($logFile eq $inputParametersHashRef->{"inputFileName"})
    {
      # if active log call sync routine
      ($status,$error) =
            SyncLogFileList($inputParametersHashRef->{"inDirName"},
                  $inputParametersHashRef->{"inputFileName"},
                  $inputParametersHashRef->{"inFileName"},
                  $ctxRef->{"FILEVALUES"}->{"lastProcessedFile"},
                  $ctxRef->{"DISKINPUT"}->{"noLogFiles"},
                  \@{$ctxRef->{"DISKINPUT"}->{"logFileList"}});
      # fatal error occured in sync routine
      if ($status == $STATUS_FAIL)
      {
        return $CATASTROPHIC_FAILURE;
      } 
      # update no log files in disk context hash, $# returns 1 less then len so add 1
      $ctxRef->{"DISKINPUT"}->{"noLogFiles"} = $#{$ctxRef->{"DISKINPUT"}->{"logFileList"}}+1;  
      # continue from same index as it reflects first new rotated log file if found in sync routine
      $logFile = ${$ctxRef->{"DISKINPUT"}->{"logFileList"}}[$logIndex];
      if ($logFile eq $inputParametersHashRef->{"inputFileName"})
      {
        $activeFile=1;
      } 
    } 
    $fileHashRef->{"logFileName"}=$logFile;
    $fileHashRef->{"logEntryCount"}=0;
    @fileStat=stat $logFile;
    $initFileSize=$fileStat[7];    

    # for storing num of output lines prior to current log file, to compute num of output lines
    # processed in current log later
    my $tmpNoOutputLines = $fileHashRef->{"noOutputLines"};

    $fileHashRef->{"noCookieOutputLines"}=0;
    if (($fileHashRef->{"fileFormat"} eq "CLF") || 
              ($fileHashRef->{"fileFormat"} eq "CALYPSO") ||
	      ($fileHashRef->{"fileFormat"} eq "APACHE"))
    {
      $fileHashRef->{"logEntryCount"}=0;
      my $thisFileInterrupted = 0;
      if (!open (INFILE, "<$logFile"))
      { 
        $ctxRef->{"FILEVALUES"}->{"errorVal"} = 
          "Error failed To Open Input File in ProcessLogFiles $logFile: $!" ;
        dbmclose(%hostList);
        dbmclose(%Latency);
        $fileHashRef->{"outFileObject"}->end_force() if $fileHashRef->{"outFileObject"};
        return $CATASTROPHIC_FAILURE;
      }
      $fileHashRef->{INFILE}=\*INFILE;

      # go to the the first files offset
      if ($logFileIndex == 0)
      { 
        $seekRes=seek(INFILE,$markerHashRef->{"markerFileOffset"},0);
        if ($seekRes > 0) 
        {
          $discreetOffset=$markerHashRef->{"markerFileOffset"};
        }
      }
      
      my $offset_line_invalid = ($markerHashRef->{"newMarkerSignature"} eq '1980-01-01 12:00:00');
      while ( <INFILE> ) 
      {
        $prevOffset = $discreetOffset;
        # NOTE:
        # chomp removes the newline charachter
        # which is garanteed to exist when not in paragraph mode
        # so the while above chunks on newline
        # this usually only matters at the last line of a file
        # which may or may not be terminiated with a newline.
        chomp;
        chomp;

        # replace multiple white space with single space
        $fileHashRef->{"logEntryCount"}++;
	$fileHashRef->{"totalLogEntries"}++;
        $debugHashRef->{"logEntryCount"}++;
        $numLogLinesProcessed ++;

        # NOTE:
        # use a pointer to the current line for readability and speed
        # this is dereferenced with $$logFileEntry later in the code
        my $logFileEntry = \$_;
        $$logFileEntry  =~ s/\s+/ /go;
        $ctxRef->{"FILEVALUES"}->{"logFileEntry"}=$logFileEntry;
        my $originalLogFileEntry = $_;
        # initilaze values in line hash
        $returnVal=ExecutePipeLine($lineStages,$ctxRef);

        # a catastriphic error happened somewhere in the pipeline so close 
        # all files and hashes and return from this pipeline
        if ($returnVal == $CATASTROPHIC_FAILURE)
        {
          $fileHashRef->{"outFileObject"}->end_force() if $fileHashRef->{"outFileObject"};
          close(INFILE);
          dbmclose %hostList;
          dbmclose %Latency;
          $ctxRef->{"FILEVALUES"}->{"errorVal"} = 
            "Fatal Error in Line Pipeline for file $logFile" ;
          return $CATASTROPHIC_FAILURE;
        } 

        # update marker out info if a time stamp was captured, so that dates 
        # can be compared if this record gets written to the marker file
        if ($ctxRef->{"LINEVALUES"}->{"dateTime"})
        { 
          $markerHashRef->{"newMarkerOffset"}=$prevOffset;
	  $markerHashRef->{"modified"} = 1;
          $markerHashRef->{"newMarkerSignature"}=
            $ctxRef->{"LINEVALUES"}->{"dateTime"};
          $debug->PrintTrace("Marker Signature moved to ".
            $markerHashRef->{"newMarkerSignature"});
          $offset_line_invalid = 0;
        }
        elsif ($offset_line_invalid)
        {
          $markerHashRef->{"newMarkerOffset"}=$prevOffset;
        }

        $discreetOffset = tell(INFILE);
        if (($activeFile) && ($discreetOffset > $initFileSize))
        { 
           $thisFileInterrupted = 1; 
           last;
        } 
        if ($logLineLimit && ($numLogLinesProcessed >= $logLineLimit))
        {
            $debug->PrintLog($DEBUG_HIGH, "Reached log lines limit: $logLineLimit");
            $logLineLimitReached = 1;
            $thisFileInterrupted = 1;
            last;
        }
        next; 
      }
      $markerHashRef->{"newMarkerFileStampName"}=$fileHashRef->{"logFileName"};
      if(!$thisFileInterrupted)
      {
          push(@{$ctxRef->{"DISKINPUT"}->{"completedLogs"}}, ($logFile));
      }
    }
    close(INFILE);

    $ctxRef->{"FILEVALUES"}->{"lastProcessedFile"}=$logFile;
    $debug->PrintLog($DEBUG_HIGH,"Finished Processing File $logFile:".
      $markerHashRef->{"newMarkerSignature"});
    $debug->PrintLog($DEBUG_HIGH,"Total Number of Lines Processed in File $logFile = ".
      $fileHashRef->{"logEntryCount"});
    $debug->PrintLog($DEBUG_HIGH,
      "Total Number of Output Lines in File $logFile = ".
        ($fileHashRef->{"noOutputLines"} - $tmpNoOutputLines));
    $debug->PrintLog($DEBUG_HIGH,
      "Total Number of Cookie Output Lines in File $logFile = ".
      $fileHashRef->{"noCookieOutputLines"});
    $logFileIndex++;
    $discreetOffset=0;
    last if $logLineLimitReached;

  }

  # if the advanced data filter is on
  if($inputParametersHashRef->{advDtFilter})
  {
      $ctxRef->{PAGEHASH}->cleanup($markerHashRef->{'newMarkerSignature'},$inputParametersHashRef->{'advDtFilterAge'});
				   
      ## dump grand unification data
      if($inputParametersHashRef->{"gu"})
      {
	  $ctxRef->{REQUESTHASH}->createOutput($inputParametersHashRef->{"targetGUID"}, \$error);
	  $ctxRef->{INCOMPLETELOADHASH}->createOutput($inputParametersHashRef->{"targetGUID"}, \$error);
      }
      # close page hash
      if($ctxRef->{PAGEHASH}->close_hash())
      {
	  $ctxRef->{FILEVALUES}->{errorVal} = 
	      "Fatal error while closting page hash.\n$!";
	  return $CATASTROPHIC_FAILURE;
      }
  }
  
  # close output file, output lock file and error file 
  # close hostname and latency dbm hashes
  # delete latency dbm hashes from disk
  unless($fileHashRef->{"outFileObject"}->end_data(\$error))
  {
    $error = "Fatal Error while calling end_data().\n$error";
    $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
    dbmclose %hostList;
    dbmclose %Latency;
    return $CATASTROPHIC_FAILURE;
  }
  # print all trace info to error file if trace flag turned on
  $status =
      PrintDebugReport($fileHashRef,$debugHashRef,$inputParametersHashRef);

  dbmclose %hostList;
  dbmclose %Latency;
  delete $fileHashRef->{"Latency"};
  delete $fileHashRef->{"hostList"};
  # delete line hashes
  undef %lineHash;
  undef %Latency;
  delete $ctxRef->{"LINEVALUES"};
  return $SUCCESS; 
}
######################################################################### 
# SyncLogFileList : Sync up log file list at begining of run with
#                   current log file list on disk. 
#
# IN: Log file List 
#     Log files directory location
#     Base log file name
#
# OUT: Updated log file list
#
######################################################################### 
sub SyncLogFileList
{
  my ($logFileDir,$logFileName,$inFileName,$lastProcessedLog,$noLogFiles,$logFileList)=@_;
  my $newNoLogFiles=0;
  my $status=$STATUS_SUCCESS;
  my $error;
  my $newLogFile;
  my $fileName;
  my $listIndex=0;
  my $numElements=$noLogFiles;
  my $oldListIndex=0;
  my @newLogFileNames;
  my $lastRotatedLog = $lastProcessedLog;
  
  # Read log file directory for all files starting with base file name
  if (!opendir(LOGFILEDIR,$logFileDir))
  { 
    $error="Input Directory ".$logFileDir."Does Not Exist: $!";
    $status= $STATUS_FAIL;
    return($status,$error);
  }
  while ($fileName=readdir(LOGFILEDIR))
  {
    if ($fileName =~ m/^$inFileName\..+/o) 
    { 
      $newLogFileNames[$newNoLogFiles]=$logFileDir."/$fileName";
      $newNoLogFiles++;
    }
  }
  closedir(LOGFILEDIR);
  @newLogFileNames=sort @newLogFileNames;
  # add active log file to list
  if (-e $logFileName) 
  {
    $newLogFileNames[$newNoLogFiles]=$logFileName;
    $newNoLogFiles++;
  }

  if (($numElements==1) && ($lastRotatedLog eq ""))
  {
     return ($status,$error);
  } 
  else  
  {
    for ($listIndex=0; ($listIndex < $newNoLogFiles-1) && 
       ($newLogFileNames[$listIndex] le $lastRotatedLog); $listIndex++){ };
    # print "IND:$newLogFileNames[$listIndex]\n";}
    # if new files found then append everything follwing last processed from
    # new list to old list
    $oldListIndex=$numElements-1;
    for (;$listIndex < $newNoLogFiles;$listIndex++)
    {
        $logFileList->[$oldListIndex]=$newLogFileNames[$listIndex];
        $oldListIndex++;
    } 
  } 
  return($status,$error);
}
###################################################################### 
#   InitializeLineValues : initialize all entries in context hash
#                          for the line
#   Context in:
#                    $LineValues.submitTime
#                    $LineValues.loadTime
#                    $LineValues.latency
#                    $LineValues.visitorName
#                    $LineValues.visitorIP
#                    $LineValues.visitorDomain
#                    $LineValues.cs(Cookie)
#                    $LineValues.browserName
#                    $LineValues.browserVersion
#                    $LineValues.osName
#                    $LineValues.osVersion
#                    $LineValues.cs(Referrer)
#                    $LineValues.validData
#  Context out:
#                    $FileValues.noOutputLine
#                    $LineValues.submitTime
#                    $LineValues.loadTime
#                    $LineValues.latency
#                    $LineValues.visitorName
#                    $LineValues.visitorIP
#                    $LineValues.visitorDomain
#                    $LineValues.cs(Cookie)
#                    $LineValues.browserName
#                    $LineValues.browserVersion
#                    $LineValues.osName
#                    $LineValues.osVersion
#                    $LineValues.cs(Referrer)
#                    $LineValues.validData
#
###################################################################### 
sub InitializeLineValues
{
  my ($lineHashRef, $oms_version)=@_;

  $lineHashRef->{"trackingCookie"}="";
  $lineHashRef->{"loadTime"}="";
  $lineHashRef->{"submitTime"}="x";
  $lineHashRef->{"submitPage"}="";
  $lineHashRef->{"refererVal"}="";
  $lineHashRef->{"cs(Referer)"}="";
  $lineHashRef->{"finalReferer"}="";
  $lineHashRef->{"cs-uri"}="";
  $lineHashRef->{"validData"}=$STATUS_FAIL;
  $lineHashRef->{"latencyTime"}=0;
  $lineHashRef->{"dateTime"}="";
  $lineHashRef->{"outDateTime"}="";
  $lineHashRef->{"c-ip"}="";
  $lineHashRef->{"name"}="";
  $lineHashRef->{"page"}="";
  $lineHashRef->{"outputpage"}="";
  $lineHashRef->{"visitorIP"}="";
  $lineHashRef->{"visitorDomain"}="";
  $lineHashRef->{"webserver"}="";
  $lineHashRef->{"topLocation"}="";
  $lineHashRef->{"cs(Cookie)"}="";
  $lineHashRef->{"cs(User-Agent)"}="";
  $lineHashRef->{"browserName"}="";
  $lineHashRef->{"browserEngine"}="";
  $lineHashRef->{"browserVersion"}="";
  $lineHashRef->{"osName"}="";
  $lineHashRef->{"orgReferer"}="";
  $lineHashRef->{"osVersion"}="";
  $lineHashRef->{"date"}="";
  $lineHashRef->{"time"}="";
  $lineHashRef->{"timeDate"}="";
  $lineHashRef->{"x-time-start"}='';
  $lineHashRef->{"x-date-start"}='';
  $lineHashRef->{"x-time-end"}='';
  $lineHashRef->{"x-date-end"}='';
  $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_ST)"}="";
  $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_LT)"}="";
  $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_GL)"}="";
  $lineHashRef->{"serverInTime"}=0;
  $lineHashRef->{"serverOutTime"}=0;
  $lineHashRef->{"totServerTime"}=0;
  $lineHashRef->{"sdkRequest"} = 0;
  if ($oms_version eq "102010") {
    $lineHashRef->{"dbTime"}=0;
  }
  else {
    $lineHashRef->{"dbTime"}="";
  }

  return;
}
###################################################################### 
#
#   ExtractLineValues : Extracts all line tokens from log file line
#           passed in by applying regexp specified by $fileFormat
#
#   Context in:
#                      $FileValues.logFileLine
#                      $DebugValues.logEntryCount
#                      $FileValues.logEntryCount
#                      $FileValues.fileFormat
#
#   Context out:
#                      $LineValues.cs(Cookie)
#                      $LineValues.visitor
#                      $LineValues.cs(Referrer)
#                      $LineValues.cs-uri
#                      $LineValues.status
#                      $LineValues.userAgent
#                      $LineValues.bytes
#                      $LineValues.date
#                      $LineValues.time
###################################################################### 
sub ExtractLineValues
{
  my ($ctxRef) = @_;

  my $fileHashRef=$ctxRef->{"FILEVALUES"};
  my $inputParametersHashRef=$ctxRef->{"COMMANDLINEINPUT"};
  my $lineHashRef=$ctxRef->{"LINEVALUES"};
  my $status=$STATUS_SUCCESS;
  my $error='';

  # If request from beacon, ignore
  if ($lineHashRef->{'cs(User-Agent)'} =~ 
      m/^Mozilla\/4\.0 \(compatible; Windows NT 5\.1\) OracleEMAgentURLTiming\/3\.0$/o) 
  {
    $ctxRef->{FILEVALUES}->{errorVal} = "Request from Beacon ".
      $fileHashRef->{logFileName}.":".$fileHashRef->{"logEntryCount"};
    $fileHashRef->{numKnownBeaconRequests}++;
    return $FAILURE;
  }
    
  # Extract the date/time
  $lineHashRef->{dateTime} = ExtractDateFields($fileHashRef->{fileFormat}, 
                             $lineHashRef->{date},
                             $lineHashRef->{time}, $lineHashRef->{timeDate});

  # Remove any page fragments from page request field and referrer field
  # eg "faq.html#2" or "page.html?paramid=param#2". remove "#2"
  my ($pageWithoutFragment)=($lineHashRef->{"cs-uri"} =~ m/^(\S+?)#\S+/o);
  if ($pageWithoutFragment) {
      $lineHashRef->{"cs-uri"} = $pageWithoutFragment;
  }
  ($pageWithoutFragment)=($lineHashRef->{"cs(Referer)"} =~ m/^(\S+?)#\S+/o);
  if ($pageWithoutFragment) {
      $lineHashRef->{"cs(Referer)"} = $pageWithoutFragment;
  }

  # extract Request field in the http request
  ($lineHashRef->{requestVal}, $lineHashRef->{framesetLoaded},
   $lineHashRef->{topLocation}) = ExtractRequestField($fileHashRef->{fileFormat}, 
                             $lineHashRef->{"cs-uri"}, $status, $error);
  if($status == $STATUS_FAIL)
  {
    $ctxRef->{FILEVALUES}->{errorVal} = $error . ' : ' .
      $fileHashRef->{logFileName}. ' : ' . $fileHashRef->{'logEntryCount'}; 
    return $FAILURE;
  }
  # get the referrer
  if (($lineHashRef->{topLocation}) && ($lineHashRef->{'topLocation'} ne '-'))
  {
    $lineHashRef->{finalReferer} = $lineHashRef->{'topLocation'};
  } else {
    $lineHashRef->{finalReferer} = $lineHashRef->{"cs(Referer)"};
  } 
  
  $lineHashRef->{'orgReferer'}=$lineHashRef->{finalReferer};

  # get the referrer data
  ($lineHashRef->{webserver}, $lineHashRef->{page}, $lineHashRef->{refererVal}, 
   $lineHashRef->{refQueryStr}, $lineHashRef->{refUriQuery},$status, $error) =
     ExtractRefererVal($lineHashRef, $inputParametersHashRef, $fileHashRef);

  #if($status==$STATUS_WARN)
  #{
  #  $ctxRef->{FILEVALUES}->{errorVal}=$error;
  #  return $WARNING;
  #}
  if($status==$STATUS_FAIL)
  {
    $ctxRef->{FILEVALUES}->{errorVal}=$error;
    return $FAILURE;
  }

  # Extract cookie info
  if ($lineHashRef->{"cs(Cookie)"} ne "") 
  {
    ($lineHashRef->{submitTime}, $lineHashRef->{submitPage},
    $lineHashRef->{loadTime}, $lineHashRef->{trackingCookie}, 
    $status, $error) =
      ExtractCookieData($lineHashRef, $inputParametersHashRef, $fileHashRef);
  } else {
    ($lineHashRef->{'submitTime'}, $lineHashRef->{'submitPage'}, $lineHashRef->{'entryPage'})=
      SplitSubmitCookie($lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_ST)"});
    $lineHashRef->{'trackingCookie'}=$lineHashRef->{'c-ip'};
    $lineHashRef->{'loadTime'}=$lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_LT)"};
  }   
  if ($lineHashRef->{'entryPage'} && ($lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_GL)"} eq "-"))
  {
    # to have this line invalidated null out GL
    $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_GL)"}="";
    $ctxRef->{FILEVALUES}->{errorVal} = "Invalid Glue Cookie ".
      $fileHashRef->{"logFileName"}.":".$fileHashRef->{"logEntryCount"}; 
    return $FAILURE;
  } 
  if($status == $STATUS_FAIL)
  {
    $ctxRef->{FILEVALUES}->{errorVal}=$error;
    return $FAILURE;
  }
  # Get the visitor information (IP and/or name)
  ($lineHashRef->{visitorIP}, $lineHashRef->{name}, $error, $status) =
        SetVisitorIP($lineHashRef->{"c-ip"},$inputParametersHashRef, $fileHashRef);

  if ($status==$STATUS_ERROR)
  {
     $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
     return $CATASTROPHIC_FAILURE;
  }
 
  return $SUCCESS;

} 

###################################################################### 
# SplitSubmitCookie: Split submit cookie
#                    into submit time and submit page 
###################################################################### 

sub SplitSubmitCookie
{
 my ($submitCookie) = @_;
 my $entryPage=0;
 
 my $submitTime=0;
 my $submitPage="";
 if (!(($submitTime,$submitPage) = $submitCookie =~ m/(\d+)\?(\S+)/o))
 {
   $submitTime='x';
   $entryPage=1;
 } 
 
 return ($submitTime,$submitPage,$entryPage);
}
###################################################################### 
#
#   ExtractLineTokens : Extracts all line tokens from log file line
#           passed in by applying regexp specified by $fileFormat
#
#   Context in:
#                      $FileValues.logFileLine
#                      $DebugValues.logEntryCount
#                      $FileValues.logEntryCount
#                      $FileValues.fileFormat
#
#   Context out:
#                      $LineValues.cs(Cookie)
#                      $LineValues.visitor
#                      $LineValues.cs(Referrer)
#                      $LineValues.cs-uri
#                      $LineValues.status
#                      $LineValues.userAgent
#                      $LineValues.bytes
#                      $LineValues.date
#                      $LineValues.time
###################################################################### 
sub ExtractLineTokens
{
  my ($ctxRef) = @_;
  my $format=$ctxRef->{"FILEVALUES"}->{"fileFormat"};
  my $fileHashRef=$ctxRef->{"FILEVALUES"};
  my $inputParametersHashRef=$ctxRef->{"COMMANDLINEINPUT"};
  my $logFile=$fileHashRef->{"logFileName"};
  my $logFileEntry=$fileHashRef->{"logFileEntry"};
  my $lineHashRef=$ctxRef->{"LINEVALUES"};

  InitializeLineValues($lineHashRef, $inputParametersHashRef->{"omsversion"});

  if ($format eq "CLF")
  {
    if (ApplyCLFRegExp($logFileEntry,$lineHashRef)==$STATUS_FAIL) 
    {
      $ctxRef->{"FILEVALUES"}->{"errorVal"}=
        "Bad Non-Calypso CLF Line: ".$logFile.":".
        $fileHashRef->{"logEntryCount"};
      $fileHashRef->{invalidLogLines}++;
      return $FAILURE; 
    } 
  }
  elsif (($format eq "CALYPSO") || ($format eq "APACHE"))
  {
    if (ApplyCalypsoRegExp($logFileEntry,$lineHashRef,$fileHashRef,$inputParametersHashRef)==$STATUS_FAIL) 
    {
      $ctxRef->{"FILEVALUES"}->{"errorVal"}=
        "Bad Calypso Line in: ".$logFile.":".$fileHashRef->{"logEntryCount"};
      $fileHashRef->{invalidLogLines}++;
      return $FAILURE;
    }

  }
  else
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"}=
      "Unrecognized line format in file: ".$logFile.":".
      $fileHashRef->{"logEntryCount"};
    $fileHashRef->{invalidLogLines}++;
    return $FAILURE;
  }
  return $SUCCESS; 
}
###################################################################### 
#     ComputeDerivedValues : Derive values from extracted fields
#     Context in:
#                     $CommandLineInput.apacheSessionId
#                     $CommandLineInput.jservSessionId
#                     $LineValues.cs-uri
#                     $LineValues.userAgent
#                     $LineValues.cookie
#                     $LineValues.resolve
#                     $LineValues.gifFileName
#                     $CommandLineInput.debug
#
#     Context out:
#                    $LineValues.submitTime
#                    $LineValues.loadTime
#                    $LineValues.latency
#                    $LineValues.visitorName
#                    $LineValues.visitorDomain
#                    $LineValues.browserName
#                    $LineValues.browserVersion
#                    $LineValues.osName
#                    $LineValues.osVersion
#                    $LineValues.visitorIP
###################################################################### 
sub ComputeDerivedValues
{
  my ($ctxRef) = @_;
  my $lineHashRef=$ctxRef->{"LINEVALUES"};
  my $fileHashRef=$ctxRef->{"FILEVALUES"};
  my $inputParametersHashRef=$ctxRef->{"COMMANDLINEINPUT"};
  my $debugHashRef=$ctxRef->{"DEBUGVALUES"};
  my $filter = $ctxRef->{"FILTER"};
  my $status=$STATUS_SUCCESS;
  my $error="";
  my $latency = 0;

  # extract Request field following http request
  # if framesetLoaded is true, then use topLocation as referer
  # else use referer from log entry
  ($lineHashRef->{"requestVal"},$lineHashRef->{"framesetLoaded"},
    $lineHashRef->{"topLocation"})=
    ExtractRequestField($fileHashRef->{"fileFormat"}, 
    $lineHashRef->{"cs-uri"}, $status, $error);

  # Make sure the request is valid
  if($status == $STATUS_FAIL)
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"} = $error . ':' .
      $fileHashRef->{"logFileName"}. ':' . $fileHashRef->{"logEntryCount"}; 
    return $FAILURE;
  }

  # check for our beloved OEM GIF
  # and process as if ready to write data to output file
  if ($lineHashRef->{"requestVal"} =~ m/$inputParametersHashRef->{'OEM_GIF'}$/)
  {
     # if the frameset flag is false, ignore this gif request
     if($lineHashRef->{'framesetLoaded'} ne 'true')
     {
       $ctxRef->{FILEVALUES}->{errorVal} = 'Frameset flag is not true :'.
         $fileHashRef->{logFileName}.': '.$fileHashRef->{logEntryCount};
       return $FAILURE;
     }

     if ($lineHashRef->{"topLocation"} ne "-" )
     {
       $lineHashRef->{"finalReferer"} = $lineHashRef->{"topLocation"};
     } else {
       $lineHashRef->{"finalReferer"} = $lineHashRef->{"cs(Referer)"};
     } 
     ($lineHashRef->{"webserver"},$lineHashRef->{"page"},$lineHashRef->{"refererVal"},
      $lineHashRef->{"refQueryStr"},$lineHashRef->{"refUriQuery"}, $status,$error)=
       ExtractRefererVal($lineHashRef,$inputParametersHashRef,$fileHashRef);

     if ($status==$STATUS_FAIL)
     {
        $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
        return $FAILURE;
     }

     # Verify that filters are ON
     if(defined($filter))
     {
       # Verify if requested page belongs to set of monitored pages
       unless($filter->is_uri_valid($lineHashRef->{"page"}))
       {
         $ctxRef->{"FILEVALUES"}->{"errorVal"} = 
            "Page $lineHashRef->{'page'} does not belong to monitored ".
           "pages: ".$fileHashRef->{"logFileName"}.":".$fileHashRef->{"logEntryCount"}; 
         # Increment the number of lines rejected due to the filters
         $debugHashRef->{"noFilteredOutLines"}++;
         return $FAILURE;
       }
     }

     $lineHashRef->{"dateTime"}=ExtractDateFields($fileHashRef->{"fileFormat"},
                           $lineHashRef->{"date"},$lineHashRef->{"time"},
                           $lineHashRef->{"timeDate"});
     ($lineHashRef->{"submitTime"}, $lineHashRef->{"submitPage"},
      $lineHashRef->{"loadTime"}, $lineHashRef->{"trackingCookie"},
      $status, $error) =
      ExtractCookieData($lineHashRef,$inputParametersHashRef,$fileHashRef);

     if ($status==$STATUS_FAIL)
     {
       $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
       return $FAILURE;
     }

     $latency = $lineHashRef->{"loadTime"} - $lineHashRef->{"submitTime"};
     if ($latency > 0)
     { 
       $lineHashRef->{"latencyTime"}= $latency;
     }
     else
     {
       $ctxRef->{"FILEVALUES"}->{"errorVal"}="Rejecting negative latency value".
          $fileHashRef->{"logFileName"}.":".$fileHashRef->{"logEntryCount"};
       return $FAILURE;
       
     }
     ($lineHashRef->{"visitorIP"},$lineHashRef->{"name"},$error,$status)=
       SetVisitorIP($lineHashRef->{"c-ip"},$inputParametersHashRef,
       $fileHashRef);
     if ($status==$STATUS_ERROR)
     {
       $ctxRef->{"FILEVALUES"}->{"errorVal"}=$error;
       return $CATASTROPHIC_FAILURE;
     }
     $lineHashRef->{"visitorDomain"}=SetVisitorDomain($lineHashRef->{"name"},
       $lineHashRef->{"c-ip"});
     ($lineHashRef->{"browserName"}, $lineHashRef->{"browserVersion"},
       $lineHashRef->{"osName"}, $lineHashRef->{"osVersion"})=
       ParseUserAgent($lineHashRef->{"cs(User-Agent)"}); 
  }
  elsif ($lineHashRef->{requestVal} =~ m/^\/oracle_smp_chronos\/$inputParametersHashRef->{SDK_OEM_GIF}/)
  {
      ($status,$error)=ProcessSdkOemGifRequest($lineHashRef, $fileHashRef, 
					       $inputParametersHashRef, $debugHashRef, $filter, "");
      if ($status==$FAILURE)
      {
	  $ctxRef->{FILEVALUES}->{errorVal} = $error;
	  return $FAILURE;
      }
  }
  else
  {
    $ctxRef->{"FILEVALUES"}->{"errorVal"} = "Not an OEM_GIF or page request: ".
      $fileHashRef->{"logFileName"}.":".$fileHashRef->{"logEntryCount"}; 
    return $FAILURE;
  }
  return $SUCCESS; 
}

###################################################################### 
#     AdvancedComputeDerivedValues : Derive values from extracted fields
#     Context in:
#                     $CommandLineInput.apacheSessionId
#                     $CommandLineInput.jservSessionId
#                     $LineValues.cs-uri
#                     $LineValues.userAgent
#                     $LineValues.cookie
#                     $LineValues.resolve
#                     $LineValues.gifFileName
#                     $CommandLineInput.debug
#
#     Context out:
#                    $LineValues.submitTime
#                    $LineValues.loadTime
#                    $LineValues.latency
#                    $LineValues.visitorName
#                    $LineValues.visitorDomain
#                    $LineValues.browserName
#                    $LineValues.browserVersion
#                    $LineValues.osName
#                    $LineValues.osVersion
#                    $LineValues.visitorIP
###################################################################### 

sub AdvancedComputeDerivedValues
{
  my ($ctxRef) = @_;

  my $lineHashRef=$ctxRef->{LINEVALUES};
  my $fileHashRef=$ctxRef->{FILEVALUES};
  my $inputParametersHashRef = $ctxRef->{COMMANDLINEINPUT};
  my $debugHashRef=$ctxRef->{DEBUGVALUES};
  my $filter = $ctxRef->{FILTER};
  my $pageHash = $ctxRef->{PAGEHASH};
  my $pageParams = $ctxRef->{PAGEPARAMS};
  my $status = $STATUS_SUCCESS;
  my $error = '';
  my $entry;

  # check for OEM GIF
  if ($lineHashRef->{requestVal} =~ m/$inputParametersHashRef->{OEM_GIF}$/)
  {
    ($status,$error)=AdvancedProcessOemGifRequest($lineHashRef, $fileHashRef, 
        $inputParametersHashRef, $debugHashRef, $filter, $pageHash, $pageParams);
    if ($status==$FAILURE)
    {
      $ctxRef->{FILEVALUES}->{errorVal} = $error;
      return $FAILURE;
    }
  }
  elsif ($lineHashRef->{requestVal} =~ m/^\/oracle_smp_chronos\/$inputParametersHashRef->{SDK_OEM_GIF}/)
  {
    ($status,$error)=ProcessSdkOemGifRequest($lineHashRef, $fileHashRef, 
        $inputParametersHashRef, $debugHashRef, $filter);
    if ($status==$FAILURE)
    {
      $ctxRef->{FILEVALUES}->{errorVal} = $error;
      return $FAILURE;
    }
      
  }
  # 
  # Not an OEM_GIF request so check if a page request
  # This is where hash entries are made for pages
  # which will be checked against the referrer field
  # later.
  #
  elsif (PageRequest($lineHashRef->{"requestVal"},
             $lineHashRef->{"sc-status"},$inputParametersHashRef))
  {
    ($status,$error)=AdvancedProcessPageRequest($lineHashRef, $fileHashRef, $inputParametersHashRef, 
        $debugHashRef, $filter, $pageHash);
    if ($status==$FAILURE)
    {
      $ctxRef->{FILEVALUES}->{errorVal} = $error;
      return $FAILURE;
    }
  } else {

    # neither oem_gif or page request so get server side numbers

    ($status,$error)=AdvancedProcessPageElementRequest($lineHashRef, $fileHashRef, 
		     $inputParametersHashRef, $debugHashRef, $filter, $pageHash);
    if ($status==$FAILURE)
    {
      $ctxRef->{FILEVALUES}->{errorVal} = $error;
      return $FAILURE;
    }
  }
  return $SUCCESS;
}

###################################################################### 
#     ProcessSdkOemGifRequest: Process SDK oem_gif request using
#     Context in:
#                lineHashRef 
#     Context out:
###################################################################### 

sub ProcessSdkOemGifRequest
{
    my($lineHashRef, $fileHashRef, $inputParametersHashRef, 
        $debugHashRef, $filter) = @_;

    $lineHashRef->{"sdkRequest"} = 1;

    my $queryDelimRegexp = $inputParametersHashRef->{"queryDelimiterRegexp"};
    my($queryStr)= ($lineHashRef->{"requestVal"} =~ m/^\S+?$queryDelimRegexp(\S+)/o);

    my $error = "";
    my $status=$STATUS_SUCCESS;
    
    $lineHashRef->{"finalReferer"} = $lineHashRef->{"cs(Referer)"};

    ($lineHashRef->{"webserver"},$lineHashRef->{"page"},$lineHashRef->{"refererVal"},
     $lineHashRef->{"refQueryStr"},$lineHashRef->{"refUriQuery"}, $status,$error)=
	 ExtractRefererVal($lineHashRef,$inputParametersHashRef,$fileHashRef);
    
    if ($status==$STATUS_FAIL)
    {
        $fileHashRef->{"errorVal"}=$error;
        return $FAILURE;
    }
    
    # Verify that filters are ON
    if(defined($filter))
    {
	# Verify if requested page belongs to set of monitored pages
	unless($filter->is_uri_valid($lineHashRef->{"page"}))
	{
	    $fileHashRef->{"errorVal"} = 
		"Page $lineHashRef->{'page'} does not belong to monitored ".
		    "pages: ".$fileHashRef->{"logFileName"}.":".$fileHashRef->{"logEntryCount"}; 
	    # Increment the number of lines rejected due to the filters
	    $debugHashRef->{"noFilteredOutLines"}++;
	    return $FAILURE;
	}
    }
    
    my @dataParams = split /$inputParametersHashRef->{qParamDelimiterRegexp}/, $queryStr;
    my $latency = 0;
    my $serverTime = 0;
    my $timestamp = 0;
    my $dbTime = 0;
    foreach my $thisParam (@dataParams)
    {
	my @nvPair = split /\=/, $thisParam;
	if ($nvPair[0] eq $SDK_METRIC_TOT_TIME)
	{
	    $latency = $nvPair[1];
	} elsif ($nvPair[0] eq $SDK_METRIC_SRV_TIME)
	{
	    $serverTime = $nvPair[1];
	} elsif ($nvPair[0] eq $SDK_METRIC_DB_TIME)
	{
	    $dbTime = $nvPair[1];
	}elsif ($nvPair[0] eq $SDK_METRIC_TIMESTAMP)
	{
	    $timestamp = $nvPair[1];
	}
	# should have a break condition as well, but we know there
	# will not be many more parameters
    }

    if (($latency > 0) && ($serverTime > 0))
    { 
	$lineHashRef->{"latencyTime"}= $latency;
	$lineHashRef->{"totServerTime"} = $serverTime;
	$lineHashRef->{"dbTime"} = $dbTime;
    }
    else
    {
	$fileHashRef->{"errorVal"}="Rejecting negative latency, server time or db time value".
	    $fileHashRef->{"logFileName"}.":".$fileHashRef->{"logEntryCount"};
	return $FAILURE;
    }
    my ($secs, $mins, $hour, $date, $month, $year, $wday, $yday, $isdst) = 
	localtime($timestamp/1000);
    $year=$year+1900;
    $month = $month+1;
    $lineHashRef->{'outDateTime'}="$year-$month-$date $hour:$mins:$secs";
    
    ($lineHashRef->{"visitorIP"},$lineHashRef->{"name"},$error,$status)=
	SetVisitorIP($lineHashRef->{"c-ip"},$inputParametersHashRef,
		     $fileHashRef);
    if ($status==$STATUS_ERROR)
    {
	$fileHashRef->{"errorVal"}=$error;
	return $FAILURE;
    }
    $lineHashRef->{"visitorDomain"}=SetVisitorDomain($lineHashRef->{"name"},
						     $lineHashRef->{"c-ip"});
    ($lineHashRef->{"browserName"}, $lineHashRef->{"browserVersion"},
     $lineHashRef->{"osName"}, $lineHashRef->{"osVersion"})=
	 ParseUserAgent($lineHashRef->{"cs(User-Agent)"}); 

    $lineHashRef->{"submitTime"} = 0;
    $lineHashRef->{"loadTime"} = 0;
    
    # To get past the ValidateData stage
    $lineHashRef->{"trackingCookie"} = "-";
    $lineHashRef->{"framesetLoaded"} = "true";

    return $SUCCESS;
}

###################################################################### 
#     AdvancedProcessOemGifRequest: Process oem_gif request using
#                                 advanced algo 
#     Context in:
#                lineHashRef 
#     Context out:
###################################################################### 

sub AdvancedProcessOemGifRequest
{
    my($lineHashRef, $fileHashRef, $inputParametersHashRef, 
        $debugHashRef, $filter, $pageHash, $pageParams) = @_;

    my $refererVal="";
    my $status = $STATUS_SUCCESS;
    my $refPgTimeStart="";
    my $entry;
    my $error = '';

    # parameter handling variables
    my $paramVal="";
    my @urlParams={};
    my @parameters={};
    my $parameter='';
    my $orgRefererVal=$lineHashRef->{'orgReferer'};
    my $urlparam='';
    my $urlVal='';
    my $matchedurl='';
    my $parametersRef="";
    my $latency = 0;
    my $entryPage=$lineHashRef->{"entryPage"};
    my $gif_referer = (SplitURL($lineHashRef->{"cs(Referer)"}, 
			        $inputParametersHashRef->{"queryDelimiterRegexp"}))[5];
    my $isFrame = ($lineHashRef->{'topLocation'} ne '-');
    my $page = $lineHashRef->{'refUriQuery'};
    my $postParamString = "";
    
    $fileHashRef->{totalGifRequests}++;

    # Verify if filters are ON
    if(defined($filter))
    {
      # Verify if requested page belongs to set of monitored pages
      unless($filter->is_uri_valid($lineHashRef->{page}))
      {
        $error = "Page $lineHashRef->{'page'} does not belong to monitored ".
          'pages: '.$fileHashRef->{'logFileName'}.' : '.  $fileHashRef->{'logEntryCount'}; 
        # Increment the number of lines rejected due to the filters
        $debugHashRef->{'noFilteredOutLines'}++;
        return ($FAILURE,$error);
      }
      ($lineHashRef->{'visitorIP'}, $lineHashRef->{'name'}, $error, $status) =
        SetVisitorIP($lineHashRef->{"c-ip"},$inputParametersHashRef, $fileHashRef);
    }

    # Get the entry for this referrer page (if it exists)

    $entry = $pageHash->get_entry($lineHashRef->{'submitTime'}, $lineHashRef->{'visitorIP'}, 
                        $lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_GL)"});
    # check if the entry exists or not
    if(! defined($entry))
    {
      # only accept cached pages if caching is allowed
      if(! $inputParametersHashRef->{'cacheEntries'})
      {
        # cache is off, and this entry is possibly from cache
        $fileHashRef->{totalGifsNoPageFound}++;
        $error = 'Possibly cached entry: '.
               $fileHashRef->{logFileName}.' : '.$fileHashRef->{logEntryCount};
        return ($FAILURE,$error);
      }
    }
    else  # entry exists
    {
      # make sure the page is valid
      if(! ChronosPageHash::is_valid($entry))
      {
        # invalid page, delete entry from hash 
        $pageHash->delete_entry($entry);
        $error = 'Delete invalidated page entry: '.
	    $fileHashRef->{logFileName}.' : '.$fileHashRef->{logEntryCount};
        return ($FAILURE,$error);
      }

      # if the frameset flag is false, ignore this gif request
      if($lineHashRef->{framesetLoaded} ne 'true')
      {
        $fileHashRef->{totalGifsNoOutput}++;
        $error = 'Frameset flag is not true :'.
           $fileHashRef->{logFileName}.' : '.$fileHashRef->{logEntryCount};
        if (ChronosPageHash::is_page_in_entry($entry,$gif_referer))
        {
          ChronosPageHash::mark_frame_page($entry,$gif_referer,$page);
          ChronosPageHash::mark_frameset_page($entry,$page);
          $pageHash->add_entry($entry, $lineHashRef->{'dateTime'});
        }
        return ($FAILURE,$error);
      }
      elsif ($isFrame)
      {
        if (ChronosPageHash::is_page_in_entry($entry,$gif_referer))
        {
          ChronosPageHash::mark_frame_page($entry,$gif_referer,$page);
        }
      }

      # check if the referrer page is among the requested pages for this entry
       #
       # also, put a check in place to check if xyz_css stylesheet type file which 
        # has its referrer as '-' does not invalidate an entire frameset
        # with same hash key (V:ST)
        #
        # snakhoda: did some logic refactoring to prevent code duplication.
      if ((ChronosPageHash::is_page_in_entry($entry, "-")
	       && (ChronosPageHash::page_index_in_entry($entry, "-") == 0))
	   || !ChronosPageHash::is_page_in_entry($entry, $page))
      {
           # invalid page, delete entry from hash
           $pageHash->delete_entry($entry);
           $error = "Referrer doesn't match pages: ".
               $fileHashRef->{logFileName}.' : '.$fileHashRef->{logEntryCount};
           return ($FAILURE,$error);
      }
      # 
      # if parameters provided with url, then apply those
      # if the url is not listed in parameterized list
      # discard it even if valid transaction
      # 
      if ( $inputParametersHashRef->{"parameterizedURL"})
      {
          $postParamString = ChronosPageHash::get_page_post_params_in_entry($entry, $page);
	  $lineHashRef->{"outputpage"} = $pageParams->getOutputPage($lineHashRef->{"refQueryStr"},
                                                        $lineHashRef->{"page"}, $postParamString);
      }
    }
 
    # Calculate latency
    my $numPages = ChronosPageHash::get_page_count_in_entry($entry);
    if ($isFrame)
    {
        #determine whether to use entry page algo for frame 
        my $numFramePages = ChronosPageHash::get_frame_count_in_entry($entry, $page);
        if (($numFramePages + 1) < $numPages)
        {
            ChronosPageHash::mark_entry_page($entry);
            $debug->PrintTrace("Marking entry $entry->{submit_tm}:$entry->{visitor_ip}:$entry->{glue_cookie} ".
                               "as entry page because more than one page set found for same ST cookie");
        }
    }
    else {
        if ($numPages > 1)
        {
            ChronosPageHash::mark_entry_page($entry);
            $debug->PrintTrace("Marking entry $entry->{submit_tm}:$entry->{visitor_ip}:$entry->{glue_cookie} ".
                               "as entry page because more than one page set found for same ST cookie");
        }
    }
    $entryPage = ChronosPageHash::entry_page($entry);

    # is entry page then we use x-time-start of referrer or top.location else we use
    # ST LT for latency for normal pages
    # page could be marked as entry page in page hash if it was an actual entry page
    # or if we found a scenario where instead of invalidating entire entry we were able
    # to salvage transaction by updating x-time-start of a page in entry and marking the
    # entry as entry page hash entry solely on basis of ST being sticky for txn

    if (!$entryPage)
    {
      $latency = $lineHashRef->{"loadTime"} - $lineHashRef->{"submitTime"};
    } else {
      $refPgTimeStart = ChronosPageHash::get_time_start($entry,$page);
      if ($refPgTimeStart != 0) 
      {
        $latency = ($lineHashRef->{"x-time-start"} - $refPgTimeStart)/1000;
        $lineHashRef->{"loadTime"} = $lineHashRef->{"x-time-start"}/1000;
        $lineHashRef->{"submitTime"} = $refPgTimeStart/1000;
      } else {
        $error="Error Retrieving start time of page $page from hash";
        return ($FAILURE,$error);
      }
    }

    #$pageHash->dump_all_entries(1,$debug);

    # now get server side values
    ($lineHashRef->{"totServerTime"},$lineHashRef->{"serverInTime"},$lineHashRef->{"serverOutTime"}) 
        =  ChronosPageHash::calculate_server_timings($entry, $page, $isFrame, $lineHashRef->{'x-time-start'});

    # if server time more than latency, use entry page algo
    if (($lineHashRef->{"totServerTime"}/1000) > $latency)
    {
      $refPgTimeStart = ChronosPageHash::get_time_start($entry,$page);
      if ($refPgTimeStart != 0) 
      {
        $latency = ($lineHashRef->{"x-time-start"} - $refPgTimeStart)/1000;
        $lineHashRef->{"loadTime"} = $lineHashRef->{"x-time-start"}/1000;
        $lineHashRef->{"submitTime"} = $refPgTimeStart/1000;
      } else {
        $error="Error Retrieving start time of page $page from hash";
        return ($FAILURE,$error);
      }
    }

    # Drop negative latency entries
    if ($latency > 0)
    { 
      $lineHashRef->{"latencyTime"}= $latency;
    }
    else
    {
      $error = 'Negative latency value: '.
        $fileHashRef->{logFileName}.' : '.$fileHashRef->{logEntryCount};
      return ($FAILURE,$error);
    }

    # Get browser and OS info
    ($lineHashRef->{"browserName"}, $lineHashRef->{"browserVersion"},
     $lineHashRef->{"osName"}, $lineHashRef->{"osVersion"}) =
                           ParseUserAgent($lineHashRef->{"cs(User-Agent)"}); 

    $debug->PrintTrace("Deleting page $page from page hash entry ".
                       "$entry->{submit_tm}:$entry->{visitor_ip}:$entry->{glue_cookie}");
    if(!$pageHash->delete_page_from_entry($entry, $page, $isFrame, 1, 0))
    {
      # add entry to page hash if it still exists
      $pageHash->add_entry($entry, $lineHashRef->{'dateTime'});
    }
    return ($SUCCESS,$error);
}

###################################################################### 
#     AdvancedProcessPageRequest: Process page request using
#                                 advanced algo 
#     Context in:
#                lineHashRef 
#     Context out:
###################################################################### 

sub AdvancedProcessPageRequest 
{
    my($lineHashRef, $fileHashRef, $inputParametersHashRef, $debugHashRef, 
       $filter, $pageHash) = @_;
  
    my $entry;
    my $status = $STATUS_SUCCESS;
    my $error = '';
    my $refererVal="";
    my $mark_entry_page = 0; # $invalid_entry = 0;
    my $sbmt_pg_wbsrv = '';
    my $sbmt_pg_uri  = '';
    my $sbmt_pg_query = '';
    my $refererPage="";
    my $postParamString="";

    $fileHashRef->{"pageRequests"}++;

    if(defined($filter))
    {
      # Verify if requested page belongs to set of monitored pages
      unless($filter->is_uri_valid($lineHashRef->{requestVal}))
      { 
        $error = "Page $lineHashRef->{requestVal} does not belong to monitored " .
        # Increment the number of lines rejected due to the filters
        $debugHashRef->{'noFilteredOutLines'}++;
        return $FAILURE;
      }
    }
    
    if($lineHashRef->{"cs-method"} eq "POST")
    {
	if ($lineHashRef->{"cs(Content-Length)"} > $POST_PARAM_LOG_LIMIT)
	{
	    foreach my $postParam (@{$fileHashRef->{"REGEXP"}->{"postParams"}})
	    {
                $postParamString.= $postParam."=__UNKNOWN__&";
	    }
	} else {
	    foreach my $postParam (@{$fileHashRef->{"REGEXP"}->{"postParams"}})
	    {
		my $myParamValue;
		unless(($myParamValue) = ($lineHashRef->{"x-post-params(".$postParam.")"} =~ m/\"(.*)\"/o)) {
		    $myParamValue = "";
		}
		if ($myParamValue) {
		    $postParamString.= $postParam."=".$myParamValue."&";
		}
	    }         
	}
        chop $postParamString;
    }
    # Extract the date/time
    # Please see the warning disclaimer on the call for this function 
    # (i.e. ExtractCookieData() before altering the line below.)
    
    if (!$lineHashRef->{entryPage}) 
    {
      # Split the submit page in its components
      ($status, $error, $sbmt_pg_wbsrv, $sbmt_pg_uri, $sbmt_pg_query) = 
               SplitURL($lineHashRef->{submitPage}, $inputParametersHashRef->{"queryDelimiterRegexp"});
      if($status == $FAILURE)
      {
        return ($FAILURE,$error); 
      }

      # Invalidate entry if ck wb srvr and referrer page srvr don't match
      if(($lineHashRef->{finalReferer} ne '-') &&
            ($sbmt_pg_wbsrv ne $lineHashRef->{webserver}))
      {
        $mark_entry_page = 1; #$invalid_entry = 1;
      }
    }

    # This will need to change once we cut over to doing 'normal' lookups
    $lineHashRef->{visitorIP} = $lineHashRef->{"c-ip"};

    # Try to read the entry from the page hash
    $entry = $pageHash->get_entry($lineHashRef->{submitTime}, 
           $lineHashRef->{visitorIP},$lineHashRef->{"x-cookie(ORACLE_SMP_CHRONOS_GL)"});

    # if referer is - then page will be null so has to be handeled different
    if ($lineHashRef->{'finalReferer'} eq '-')
    {
       $refererPage = '-';
    } else {
       $refererPage = $lineHashRef->{'page'};
    }

    if(defined($entry))  # entry exists
    {
      if(ChronosPageHash::is_valid($entry))
      {
          if (!ChronosPageHash::is_page_in_entry($entry,$lineHashRef->{requestVal})) 
          {
            $pageHash->add_page_to_entry($entry,$lineHashRef->{'requestVal'},
                         $lineHashRef->{'x-time-start'}, $lineHashRef->{'x-time-end'},
                         $lineHashRef->{'x-cache'}, $refererPage, $postParamString,
                         $lineHashRef->{'dateTime'}, $inputParametersHashRef->{'advDtFilterAge'});
            $fileHashRef->{"hashedPages"}++;
          } else {
            if ($lineHashRef->{'x-time-start'} ne '') 
            {
               # no need to delete entry in case of page conflict, we can salvage the txn
               # by updating xts of page and using that in gif request for latency
               # instead of traditional ST and LT
               $pageHash->update_page_in_entry($entry,$lineHashRef->{'requestVal'},
                        $lineHashRef->{'x-time-start'},$lineHashRef->{'x-time-end'},
                        $lineHashRef->{'x-cache'}, $refererPage, $postParamString);
  
               # mark entry as entry page hash entry
               ChronosPageHash::mark_entry_page($entry);
               $fileHashRef->{"hashedPages"}++; 
            } else { 
               $debug->PrintTrace("Invalidating page hash entry with submit time $entry->{'submit_tm'}");
               ChronosPageHash::invalidate_entry($entry);
            }
          }
      }
    }
    # entry does not exist, create a new one
    else
    {
      # new entry
      $entry = $pageHash->new_entry($lineHashRef->{'submitTime'},
           $lineHashRef->{'visitorIP'}, $lineHashRef->{'x-cookie(ORACLE_SMP_CHRONOS_GL)'},
           $lineHashRef->{'requestVal'}, $lineHashRef->{'x-time-start'}, $lineHashRef->{'entryPage'},
           $lineHashRef->{'x-time-end'},$lineHashRef->{'x-cache'}, $postParamString);

      # if referrer is '-' and entry page then invalidate it, as opposed to not making entry at all
      # since more page requests you want to drop are likely to follow
      if(($lineHashRef->{'finalReferer'} eq '-') && 
         ($lineHashRef->{'x-time-start'} eq ''))
      {
        $debug->PrintTrace("Invalidating page hash entry with submit time $entry->{'submit_tm'}");
        ChronosPageHash::invalidate_entry($entry);
      }
      $fileHashRef->{"hashEntries"}++;
    }

    # if page clicked from another server, make it entry page, as ST unreliable.
    if ($mark_entry_page)
    {
      ChronosPageHash::mark_entry_page($entry);
    }

    # Entries are saved every time they're accessed (so that log_tm is latest)
    # now add entry and update its timestamp
    if($pageHash->add_entry($entry, $lineHashRef->{'dateTime'}))
    {
        $error = 'Failed adding entry: '.
         $fileHashRef->{'logFileName'}.' : '.$fileHashRef->{'logEntryCount'}; 
        return ($FAILURE,$error);
    } else {
        $fileHashRef->{"hashedPages"}++;
    }

    return ($SUCCESS,$error);
}

###################################################################### 
#     AdvancedProcessPageElementRequest: Process page request using
#                                        advanced algo 
#     Context in:
#                lineHashRef 
#     Context out:
###################################################################### 

sub AdvancedProcessPageElementRequest 
{
    my($lineHashRef, $fileHashRef, $inputParametersHashRef, 
       $debugHashRef, $filter, $pageHash) = @_;
  
    my $entry;
    my $status = $STATUS_SUCCESS;
    my $error = '';
    my $refererVal='';
    my $sbmt_pg_wbsrv = '';
    my $sbmt_pg_uri = '';
    my $sbmt_pg_query = '';
    my $refHashPage = '';

    if (!$lineHashRef->{entryPage}) 
    {
      # Split the submit page in its components
      ($status, $error, $sbmt_pg_wbsrv, $sbmt_pg_uri, $sbmt_pg_query) = 
               SplitURL($lineHashRef->{submitPage}, $inputParametersHashRef->{"queryDelimiterRegexp"});
      if($status == $FAILURE)
      {
        return ($FAILURE,$error); 
      }
    }

    # This will need to change once we cut over to doing 'normal' lookups
    $lineHashRef->{visitorIP} = $lineHashRef->{"c-ip"};

    # Try to read the entry from the page hash
    $entry = $pageHash->get_entry($lineHashRef->{'submitTime'}, 
           $lineHashRef->{'visitorIP'},$lineHashRef->{'x-cookie(ORACLE_SMP_CHRONOS_GL)'});

    # if referer is - then page will be null so has to be handeled different
    if ($lineHashRef->{'finalReferer'} eq '-')
    {
       $refHashPage = '-';
    }     
    else
    {
       $refHashPage = $lineHashRef->{'refUriQuery'};  
    }

    if(defined($entry))  # entry exists
    {
      if(ChronosPageHash::is_valid($entry))
      {
        # same referer so expecting a typical frames scenario
        $pageHash->update_server_timings($entry,$lineHashRef->{'requestVal'},
               $lineHashRef->{'x-time-start'},$refHashPage,
               $lineHashRef->{'x-time-end'},$lineHashRef->{'x-cache'});
        if($pageHash->add_entry($entry, $lineHashRef->{'dateTime'}))
        {
            $error = 'Failed adding entry: '.
                     $fileHashRef->{'logFileName'}.' : '.$fileHashRef->{'logEntryCount'}; 
            return ($FAILURE,$error);
        } 
      } else {
        $error = 'Page element has invalid corresponding page entry: '.
                 $fileHashRef->{'logFileName'}.' : '.$fileHashRef->{'logEntryCount'}; 
        return ($FAILURE,$error);
      }
    } else {
        $error = 'Page element has no corresponding page entry: '.
                    $fileHashRef->{'logFileName'}.' : '.$fileHashRef->{'logEntryCount'}; 
        return ($FAILURE,$error);
    } 

    return ($SUCCESS,$error);
}




###################################################################### 
#     ValidateData : Validate data to be correct and make sure 
#              there are no latency hash conflicts
#     Context in:
#                       $LineValues.submitTime
#                       $LineValues.visitor
#                       $LineValues.referrer
#     Context out:
#                       $LineValues.validData
###################################################################### 

sub ValidateData
{
  my ($ctxRef) = @_;
  my $lineHashRef=$ctxRef->{"LINEVALUES"};
  my $fileHashRef=$ctxRef->{"FILEVALUES"};
  my $inputParametersHashRef=$ctxRef->{"COMMANDLINEINPUT"};
  my $error="";

  # a valid data line has all quantities present and now ready to check if
  # request was for THE gif we are interested in
  if ( ($lineHashRef->{"trackingCookie"} ne "") && 
       ($lineHashRef->{"webserver"} ne "") && 
       ($lineHashRef->{"page"} ne "") && 
       ($lineHashRef->{"refererVal"} ne "") && 
       ($lineHashRef->{"framesetLoaded"} eq "true") )
  {
    $lineHashRef->{"validData"}=$STATUS_SUCCESS;
    return $SUCCESS;
  }
  else
  {
   $ctxRef->{"FILEVALUES"}->{"errorVal"}="Invalid latency data in file ".
      $fileHashRef->{"logFileName"}.":".$fileHashRef->{"logEntryCount"};
    $lineHashRef->{"validData"}=$STATUS_FAIL;
    return $FAILURE;
  }
}

###################################################################### 
#     OutputLine : Output record line to the output file
#
#     Context in:
#                     $FileValues.outFileObject
#                     $FileValues.noOutputLines
#                     $DebugValues.noOutputLines
#                     $LineValues.submitTime
#                     $LineValues.loadTime
#                     $LineValues.latency
#                     $LineValues.visitorName
#                     $LineValues.visitorIP
#                     $LineValues.visitorDomain
#                     $LineValues.cookie
#                     $LineValues.browserName
#                     $LineValues.browserVersion
#                     $LineValues.osName
#                     $LineValues.osVersion
#                     $LineValues.referrer
#                     $LineValues.validData
#     Context out:
#                     $FileValues.noOutputLines
#                     $DebugValues.noOutputLines
###################################################################### 

sub OutputLine
{
  my ($ctxRef) = @_;
  my $fileHashRef=$ctxRef->{"FILEVALUES"};
  my $lineHashRef=$ctxRef->{"LINEVALUES"};
  my $debugHashRef=$ctxRef->{"DEBUGVALUES"};
  my $inputParametersHashRef=$ctxRef->{"COMMANDLINEINPUT"};
  my $status=$STATUS_SUCCESS;
  my @CookieList;
  my $cookieName="";
  my $cookieValue="";
  my $cookie="";
  my $error="";
  my $params=''; 
  my $entry;
  my $timegout=0;
  my $metricName="latency";
  my $pageHash = $ctxRef->{PAGEHASH};
  my $date;
  my $month;
  my $year; 
  my $hour;
  my $mins;
  my $secs;
  my $wday;
  my $yday;
  my $isdst;
  my $resolve=$inputParametersHashRef->{"resolve"};
  my $name='';
  my $visitorIP='';
  my $visitor_addr=$lineHashRef->{visitorIP};

  if ($lineHashRef->{"validData"}==$STATUS_SUCCESS)
  {
    my $cookieIndex='';
    if($inputParametersHashRef->{"cookieOutput"})
    {
      $cookieIndex=$inputParametersHashRef->{"targetName"}.
        $lineHashRef->{"submitTime"}.$fileHashRef->{"logEntryCount"};
    }
    # convert gmt time to local time
    unless($lineHashRef->{'outDateTime'})
    {
	$lineHashRef->{'outDateTime'} = $lineHashRef->{'dateTime'};
	if($fileHashRef->{'gmtFlag'} || $inputParametersHashRef->{"usegmtoffset"}) 
	{
	    ($year,$month,$date,$hour,$mins,$secs)= $lineHashRef->{'dateTime'} =~ 
	    m/^(\d+)-(\d+)-(\d+).(\d+):(\d+):(\d+).*/o;
	    $month = $month - 1;
	    $timegout=timegm($secs,$mins,$hour,$date,$month,$year) - $fileHashRef->{'gmtOffsetSecs'} ;
	    ($secs, $mins, $hour, $date, $month, $year, $wday, $yday, $isdst) = 
		localtime($timegout);
	    $lineHashRef->{'outDateTime'}=strftime("%Y-%m-%d %H:%M:%S",$secs, $mins, $hour, $date, $month, $year);
#	    $lineHashRef->{'outDateTime'}="$year-$month-$date $hour:$mins:$secs";
	}
    }
    if ($resolve) 
    {
       ($name,$visitorIP,$error,$status)=
          DoReverseLookup($visitor_addr, $inputParametersHashRef, $fileHashRef);
       $lineHashRef->{name}=$name;
    }
    # Get the domain
    $lineHashRef->{"visitorDomain"} = SetVisitorDomain($lineHashRef->{"name"},
      $lineHashRef->{"c-ip"});

    #Calculate number representation of IP address
    $lineHashRef->{"visitorIPNum"} = ConvertIPStrToNum($visitor_addr);
      
    # all different pieces found, so lets log the data as a line in the output 
    # file else throw along with all other debug information in error file

    my $LatencyHashKey = "";
    if(!$lineHashRef->{'sdkRequest'})
    {
	$LatencyHashKey=$lineHashRef->{"trackingCookie"}.":".
	    $lineHashRef->{"submitTime"}.":".$lineHashRef->{"refererVal"};
	if ($lineHashRef->{'totServerTime'} ne '') 
	{
	    $lineHashRef->{'totServerTime'} = $lineHashRef->{'totServerTime'}/1000;
	    $lineHashRef->{'serverInTime'} = $lineHashRef->{'serverInTime'}/1000;
	    $lineHashRef->{'serverOutTime'} = $lineHashRef->{'serverOutTime'}/1000;
	}
	if (defined ($fileHashRef->{"Latency"}->{$LatencyHashKey}))
	{
	    $ctxRef->{"FILEVALUES"}->{"errorVal"} ="CONFLICT in File : ".
		$fileHashRef->{"logFileName"}.":".$fileHashRef->{"logEntryCount"}.
		    ": Key ".$LatencyHashKey." exists in hash"; 
	    return $FAILURE;
	}
    }
    {
      unless($fileHashRef->{"outFileObject"}->add_data_line(\$error,
        [
          $inputParametersHashRef->{"targetGUID"}, $metricName,
          $lineHashRef->{"outDateTime"}."$inputParametersHashRef->{timeZone}",
          $lineHashRef->{"submitTime"}, $lineHashRef->{"loadTime"},
          $lineHashRef->{"latencyTime"}, 
          $lineHashRef->{"outputpage"}?$lineHashRef->{"outputpage"}:$lineHashRef->{"page"},
          $lineHashRef->{"webserver"}, $lineHashRef->{"name"},
          $lineHashRef->{"visitorDomain"}, $lineHashRef->{"visitorIP"},
          $lineHashRef->{"visitorIPNum"},
          $cookieIndex, $lineHashRef->{"serverInTime"},
          $lineHashRef->{"serverOutTime"}, $lineHashRef->{"totServerTime"},
	   $lineHashRef->{"dbTime"},
          $lineHashRef->{"browserName"},
          $lineHashRef->{"browserVersion"},$lineHashRef->{"osName"},
          $lineHashRef->{"osVersion"}
        ]))
      {
        $ctxRef->{"FILEVALUES"}->{"errorVal"}=
           "Fatal error while sending data line to output object\n$error";
        $fileHashRef->{"outFileObject"}->end_force() if $fileHashRef->{"outFileObject"} ;
        return $CATASTROPHIC_FAILURE;
      }

      $debug->PrintTrace("SUCCESSFUL Entry: ".$fileHashRef->{"logFileName"}.":".
        $fileHashRef->{"logEntryCount"});
      $fileHashRef->{"Latency"}->{$LatencyHashKey} = 
        $lineHashRef->{"latencyTime"} if(!$lineHashRef->{'sdkRequest'});
      $debugHashRef->{"noOutputLines"}++;
      $fileHashRef->{"noOutputLines"}++;

      if ($inputParametersHashRef->{"cookieOutput"})
      {
        @CookieList=split /\;/,$lineHashRef->{"cs(Cookie)"};
        foreach $cookie (@CookieList)
        {
          $cookieName = substr($cookie,0,index($cookie,"=",0));
          if (index($cookieName," ") == 0 )
          {
            $cookieName=substr($cookieName,1);
          }
          $cookieValue = substr($cookie,index($cookie,"=",0)+1);
          unless($fileHashRef->{"outFileObject"}->add_cookie_line(\$error,
            [$cookieIndex, $cookieName, $cookieValue]))
          {
             $ctxRef->{"FILEVALUES"}->{"errorVal"}=
                "Fatal error while sending cookie data to output object.\n";
             $fileHashRef->{"outFileObject"}->end_force() if $fileHashRef->{"outFileObject"};
             return $CATASTROPHIC_FAILURE;
          }
          $debugHashRef->{"noCookieOutputLines"}++;
          $fileHashRef->{"noCookieOutputLines"}++;
        }
      }
    }
  }
 return $SUCCESS; 
}

###################################################################### 
#  Build Main Pipeline : Bild the main pipeline.
#
# Name : BuildMainPipeLine
# Description :  Builds a list of functions(stages) to be executed
#                in the main pipeline
###################################################################### 

sub BuildMainPipeLine
{
  # return a list of stages. 

  my @list = (
               {"NAME" => "PROCESSCOMMANDLINEINPUT", 
                "FNCPTR"  => \&ProcessCommandLineInput}, 

               {"NAME" => "DORECOVERY", 
                "FNCPTR"  => \&DoRecovery},

               {"NAME" => "INITIALIZEENVIRONMENT", 
                "FNCPTR"  => \&InitializeEnvironment},

               {"NAME" => "BUILDLOGFILELIST", 
                "FNCPTR"  => \&BuildLogFileList},

               {"NAME" => "PROCESSLOGFILES", 
                "FNCPTR"  => \&ProcessLogFiles},

               {"NAME" => "POSTPROCESS", 
                "FNCPTR"  => \&PostProcess},

               {"NAME" => "DOCOMMIT", 
                "FNCPTR"  => \&DoCommit},

               {"NAME" => "DOCLEANUP", 
                "FNCPTR"  => \&DoCleanup}
            );

  return \@list;
}

###################################################################### 
# Name : BuildLinePipeLine
# Description :  Builds a list of functions(stages) to be executed
#                in the line pipeline
###################################################################### 

sub BuildLinePipeLine
{
  my ($inputParametersHashRef) = @_;
  my $val_computation;

  if($inputParametersHashRef->{advDtFilter})
  {
    $val_computation = \&AdvancedComputeDerivedValues;
  }
  else
  {
    $val_computation = \&ComputeDerivedValues;
  }

  my @list = (
              {"NAME" => "EXTRACTLINETOKENS", 
               "FNCPTR"  => \&ExtractLineTokens},

              {"NAME" => "EXTRACTLINEVALUES",
               "FNCPTR"  => \&ExtractLineValues},

              {"NAME" => "COMPUTEDERIVEDVALUES", 
               "FNCPTR"  => $val_computation},

              {"NAME" => "VALIDATEDATA", 
               "FNCPTR"  => \&ValidateData},

              {"NAME" => "OUTPUTLINE", 
               "FNCPTR"  => \&OutputLine}
             );
  return \@list;
}

######################################################################## 
# Name : dumpCtx
# Description : very useful routine to print the content of the hash
#               can be made recursive if we have nested data structures.
# 
######################################################################## 

sub DumpCtx
{
  my ($ctxRef) = @_;
  my $i = 0;

  print "-" x 50 ."\n";
  foreach $i (keys %{$ctxRef})
  {
    if (ref($ctxRef->{$i}))
    {
      if  (ref($ctxRef->{$i}) eq "ARRAY")
      {  
        my $listRef = $ctxRef->{$i};
        my $j =0 ;
        print "\@$i = ";
        for ($j=0; $j < @$listRef; $j++)
        { 
          print "|".@$listRef[$j];
        }
        print "\n";
      }
      if  (ref($ctxRef->{$i}) eq "HASH")
      {  
        print "\%$i =\n";
        DumpCtx($ctxRef->{$i});
      }
    }
    else 
    {
      if (exists $ctxRef->{$i})
      { 
        print "$i  ===> $ctxRef->{$i}\n";
      }
    }
  }
  print "-" x 50 ."\n";
}

###########################################################################
# SetVisitorDomain - set visitor domain based on Visitor Name
#
# IN: 
#    $Name
#    $IP
#
# OUT: 
#    $VisitorDomain
#
###########################################################################

sub SetVisitorDomain 
{
   my ($name, $ip)=@_;
   my $d1="";
   my $d2="";
   my $d3="";
   my $d4="";
   my $s1="";
   my $s2="";
   my $s3="";
   my $s4="";
   my $visitorDomain="";

   if (($d1,$d2,$d3,$d4) = $name =~ m/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/o) 
   {
       $visitorDomain="$d1.$d2.$d3";
   } elsif (($s1,$s2,$s3,$s4) = $name =~ m/(\S+)\.(\S+)\.(\S+)\.(\S+)$/o) 
   {
       $visitorDomain="$s2.$s3.$s4"; 
   } else 
   {
       ($visitorDomain) = $name =~ m/\S+?\.(.+)/o;
       # If the $visitorDomain is '' then set it back to the IP subnet
       unless(defined($visitorDomain))
       {
         ($d1,$d2,$d3,$d4) = $ip =~ m/(\d+)\.(\d+)\.(\d+)\.(\d+)/o;
         $visitorDomain = "$d1.$d2.$d3";
       }
   }
   return $visitorDomain;
}

############################################################################
# SetVisitorIP - set visitor ip based on Resolve option
#
# IN: 
#    $Domain,$Resolve,$noResolveIP
#
# OUT: 
#    $visitorIP,$Name
#
############################################################################


sub SetVisitorIP 
{
  my ($visitor_addr, $inputParametersHashRef, $fileHashRef) = @_;
  my $name="";
  my $visitorIP="";
  my $d1="";
  my $noResolveIP=$inputParametersHashRef->{"noResolveIP"};
  my $d2="";
  my $d3="";
  my $d4="";
  my $s1="";
  my $s2="";
  my $s3="";
  my $s4="";
  my $error="";
  my $status=$STATUS_SUCCESS;

  # if resolve option is turned on
  $name = $visitor_addr;
  if (($d1,$d2,$d3,$d4) = $visitor_addr =~ m/(\d+)\.(\d+)\.(\d+)\.(\d+)/o) 
  {
      $visitorIP = $visitor_addr;
  }
  else
  {
      ($s1,$s2,$s3,$s4) = $visitor_addr =~ m/(\S+)\.(\S+)\.(\S+)\.(\S+)/o; 
      $visitorIP=$noResolveIP;
  }
  return ($visitorIP,$name,$error,$status);
}

#############################################################################
# ParseUserAgent - Parse Through Browser information
#
# IN: 
#     $Browser
#
# OUT: 
#     $BrowserName,$BrowserVersion,$OsName,$OsVersion
#
#############################################################################

sub ParseUserAgent 
{
  my ($userAgent)=@_;
  my $osName="";
  my $osVersion="";
  my $browserName="";
  my $browserVersion="";

  # invalid empty browser field 
  if ($userAgent eq "")
  { 
     return($browserName,$browserVersion,$osName,$osVersion);
  }

  # Gecko based browsers (Netscape 6+, Mozilla, Firefox, Epiphany, etc)
  # typical user agent string:
  # Mozilla/5.0 (Platform; Security; OS-or-CPU; Localization; rv:x.x.x) Gecko/CCYYMMDD Vendor/Version
  if ($userAgent =~ /^Mozilla\/5\.0 \([^;]+;[^;]+;[^;]+;[^;]+;[^;\)]+\) Gecko\/[0-9]{8}/)    
  {
    ($browserName, $browserVersion) = 
        $userAgent =~ /Gecko\/[0-9]{8} ([^\/]+)\/(\S+)/;

    if (!$browserName)
    {
      $browserName = "Mozilla";
      ($browserVersion) = $userAgent =~ /rv:([^)]+)/;
    }
    (my $x) = $userAgent =~ /\(([^)]+)\)/;
    my @t = split(/;/,$x);
    ($osName, $osVersion) = $t[2] =~ /(\S+) (.*)/;
  }
  # Opera
  elsif ($userAgent =~ /(Opera)[\/ ](\S*) /)
  {
    $browserName = $1;
    $browserVersion = $2;
    ($osName, $osVersion) = OSfromUserAgent($userAgent);
  }
  # MSIE 
  elsif (($userAgent =~ /compatible/) && 
         (($browserName, $browserVersion) = ($userAgent =~ /(MSIE) ([^\s;]+)/)))
  {
    ($osName, $osVersion) = OSfromUserAgent($userAgent);   
  }  
  # Netscape (if Mozilla/4.x and not Gecko)
  elsif (($browserVersion) = ($userAgent =~ /^Mozilla\/(4\.\S*)/))
  {
    $browserName = "Netscape";
    ($osName, $osVersion) = OSfromUserAgent($userAgent); 
  }
  # Any thing else starting with Mozilla, classify as Mozilla
  elsif (($browserName, $browserVersion) = ($userAgent =~ /^(Mozilla)\/(\S*)/))
  {
    ($osName, $osVersion) = OSfromUserAgent($userAgent);
  }
  # Report any < >/< >  string as <browsername>/<browser version>
  elsif (($browserName, $browserVersion) = ($userAgent =~ /(\S+)\/(\S+)/))
  {
    ($osName, $osVersion) = OSfromUserAgent($userAgent);
  }
  else
  {
  # the user-agent entry is malformed
  # in case nothing matches, we just return the entire $browser field
  # as the browserName
    $browserName = $userAgent;
    $browserVersion="";
    $osName="";
    $osVersion="";
  }
  
  # if osVersion =~ NT 5.1 make it XP
  if ($osVersion =~ /NT 5\.1/)
  {
    $osVersion = "XP";
  }
  # if osVersion =~ NT 5.0 make it 2000
  elsif ($osVersion =~ /NT 5\.0/)
  {
    $osVersion = "2000";
  }

  # if length of browser version is more than 16, truncate it to 
  # 16 characters
  if ((length $browserVersion) > 16) {
    $browserVersion = substr ($browserVersion, 0, 16);
  }
  
  return($browserName,$browserVersion,$osName,$osVersion);
}

#############################################################################
# OSfromUserAgent - Get OS name and version from User Agent for generic case 
#                   (ie no format)
#
# IN: 
#     $userAgent
#
# OUT: 
#     $OsName,$OsVersion
#
#############################################################################

sub OSfromUserAgent 
{
  my ($userAgent)=@_;
  my $osName="";
  my $osVersion="";

  if ($userAgent =~ /Windows/) {
    $osName="Windows";
    if (!(($osVersion) = $userAgent =~ /Windows\s([^;)]+)/))
    {
       $osVersion="";
    }
  } elsif ($userAgent =~ /Win/) {
    $osName="Windows";
    if (!(($osVersion) = $userAgent =~ /Win([NT0-9]+)/))
    {
       $osVersion="";
    }
  } elsif ($userAgent =~/Sun/) {
    $osName="Sun";
    if (!(($osVersion)= $userAgent =~ /Sun\S*\s([^;)]+)/))
    {
       $osVersion="";
    }
  } elsif ($userAgent =~/Mac/) {
    $osName="Mac"; 
    if (!(($osVersion)= $userAgent =~ /(Mac_\S*)/) &&
        !(($osVersion)= $userAgent =~ /Mac (OS X|OS)/))
    { 
      $osVersion="";
    }
  } elsif ($userAgent =~/Linux/) {
    $osName="Linux";
    if (!(($osVersion)= $userAgent =~ /Linux\s?(\S*\s?\S*86)/))
    {
      $osVersion = "";
    } 
  } 
  return ($osName, $osVersion);
}

#############################################################################
#  Extract the http request from a log line
# IN:
#    $Request,$format
#  OUT:
#    $RequestVal
#############################################################################

sub ExtractRequestField
{
  my ($format, $request, $status, $error)=@_;
  my $tempRequest="";
  my $requestVal="";
  my $loadTime="";
  my $framesetLoaded="";
  my $topLocation="";
  # different ways of extracting requests from request fields
  # topLocation is top.location from javascript appended to request
  # framesetLoaded is true or false in marker gif request when
  # frames are being loaded
  if ($format eq "CLF") 
  {
    ($tempRequest) = $request =~ m/^\S+\s+(\S+)\s+\S+$/o;
    if (!(($requestVal,$loadTime,$framesetLoaded,$topLocation)=
      ($tempRequest =~ m/(\S+?)\?ORACLE_SMP_CHRONOS=(\S+?)\|(\S+?)\|(\S+)/o)))
    {
      $requestVal=$tempRequest;
    } 
  }
  elsif (($format eq "CALYPSO") || ($format eq "APACHE"))
  {
    $tempRequest=$request;
    if (!(($requestVal,$loadTime,$framesetLoaded,$topLocation)=
        ($tempRequest =~ m/^(\S+?)\?ORACLE_SMP_CHRONOS=(\S+?)\|(\S+?)\|(\S+)$/o)))
    {
        $requestVal=$tempRequest;
    }
    else
    {
      $status = $STATUS_FAIL;
      $error = 'Invalid request field.';
    }
  }
  return ($requestVal,$framesetLoaded,$topLocation);
}

##############################################################################
#  Routine to extact date sub fields from date in log line
#  IN:
#     $DateVal,$TimeVal,$TimeDate,$format
#  OUT:
#     $Year, $Month, $Date, $Hour, $Mins, $Secs
##############################################################################

sub ExtractDateFields 
{
    my($format,$dateVal,$timeVal,$timeDate)=@_;
    my $cookieTime="";
    my ($year,$month,$date,$hour,$mins,$secs);
    # stripping of date subfields from the date string
    if ($format eq "CALYPSO")
    {
      ($year,$month,$date) = $dateVal =~ m/^(\d+)-(\d+)-(\d+)/o;
      ($hour,$mins,$secs) = $timeVal =~ m/^(\d+):(\d+):(\d+)/o;
    } elsif (($format eq "CLF") || ($format eq "APACHE")) {  
      ($date,$month,$year,$hour,$mins,$secs)= $timeDate =~ 
            m/^(\d+)\/(\S+)\/(\d+):(\d+):(\d+):(\d+).+/o;
    }

    # store timestamp      
    $cookieTime="$year-$month-$date $hour:$mins:$secs";
    return ($cookieTime);
}


##############################################################################
#  Splits a URL into its components (proto_host : uri : query params)
#  The proto_host is everything before the first slash (ignoring of course //)
#  The uri is everything from the first slash (included) upto one of the query
#    chars: #, $, ;, ', ? (not included)
#  The query consists of anything after one of the query chars above (including)
#  IN:
#    full_url
#  OUT:
#    proto_host, uri, query
#  Test Cases:
#        ($status, $error, $sbmt_pg_wbsrv, $sbmt_pg_uri, $sbmt_pg_query) = 
#          SplitURL("http://www.oracle.com");
#        print("Status: $status, Error: $error, Webserver: $sbmt_pg_wbsrv " .
#          "page: $sbmt_pg_uri, query: $sbmt_pg_query\n");
#    
#        ($status, $error, $sbmt_pg_wbsrv, $sbmt_pg_uri, $sbmt_pg_query) = 
#          SplitUpdasync.m.pmpdasync.m_m.bmpdasync.t.bmpdasync.t.pmpdasync.t_m.bmpdb.m.bmpdb.m.pmpdb.m_m.bmpdb.t.bmpdb.t.pmpdb.t_m.bmprc.m.bmprc.m.pmprc.m_m.bmprc.t.bmprc.t.pmprc.t_m.bm#    
#        ($status, $error, $sbmt_pg_wbsrv, $sbmt_pg_uri, $sbmt_pg_query) = 
#          SplitURL("https://www.oracle.com/#\$\%^");
#        print("Status: $status, Error: $error, Webserver: $sbmt_pg_wbsrv " .
#          "page: $sbmt_pg_uri, query: $sbmt_pg_query\n");
#    
#        ($status, $error, $sbmt_pg_wbsrv, $sbmt_pg_uri, $sbmt_pg_query) = 
#          SplitURL("https://www.oracle.com/\$");
#        print("Status: $status, Error: $error, Webserver: $sbmt_pg_wbsrv " .
#          "page: $sbmt_pg_uri, query: $sbmt_pg_query\n");
#    
#        ($status, $error, $sbmt_pg_wbsrv, $sbmt_pg_uri, $sbmt_pg_query) = 
#          SplitURL("https://www.oracle.com/helloe/world/none.http#\$'!;http");
#        print("Status: $status, Error: $error, Webserver: $sbmt_pg_wbsrv " .
#          "page: $sbmt_pg_uri, query: $sbmt_pg_query\n");
#
##############################################################################
sub SplitURL
{
  my ($url, $queryDelimiterRegexp) = @_;
  my $status = $SUCCESS;
  my $error_msg = '';
  my $proto_host = '';
  my $uri = '';
  my $query = '';
  my $uri_query = '';
  my $url_without_query = '';

  # split URL into proto_host and uri_query
  unless(($proto_host, $uri_query) = ($url=~ m/^(http[s]?\:\/\/[^\/]+)(\/?\S*)/oi))
  {      
      $status = $FAILURE;
      $error_msg = "Failed spliting URL: $url.\n";
      return ($status, $error_msg, $proto_host, $uri, $query,$uri_query, $url_without_query);
  }

  if(!$uri_query)
  {
      $uri = "/";
      # check for http://www.oracle.com?blah=blah
      if(($proto_host, $query) = $proto_host =~ m/(\S+?)($queryDelimiterRegexp\S*)/oi)
      {
          $uri_query = $uri.$query;
      } else
      {
          $uri_query = $uri;
      }
      $url_without_query = $proto_host;
  }
  else 
  {
      # split uri_query into uri and query
      unless(($uri, $query) = ($uri_query=~ m/(\/\S*?)($queryDelimiterRegexp\S*)/o))
      {
          $uri = $uri_query;
      }
      $url_without_query = $proto_host.$uri;
  }
  return ($status, $error_msg, $proto_host, $uri, $query, $uri_query, $url_without_query);
}


##############################################################################
#  Routine to extract all the relevant cookie fields
#  WARNING: This function can be checked for SUCCESS in 2 ways. If you don't 
#  care about the load timestamp then don't check for status check for
#  submit_time eq "".
#  IN:
#      $lineHashRef
#  OUT:
#      $TrackingCookie
#      $SubmitTime
#      $LoadTime
##############################################################################
sub ExtractCookieData
{
    my($lineHashRef,$inputParametersHashRef,$fileHashRef)=@_;
    my $submitTimeID=$inputParametersHashRef->{"submitTimeID"};
    my $loadTimeID=$inputParametersHashRef->{"loadTimeID"};
    my $cookie=$lineHashRef->{"cs(Cookie)"};
    my $domain=$lineHashRef->{"c-ip"};
    my $status=$STATUS_SUCCESS;
    my $error="";

    my $submitTime="";
    my $submitPage="";
    my $loadTime="";
    my $webserver="";
    my $page="";
    my $refererVal="";
    my $trackingCookie="";
    my $cookieTemp="";
    my $trackingCookieTemp="";

    # now extract data from latency cookies
    # there will be submit times, load times and session tracking cookie
    # referer info will follow them

    if(index($cookie,";") > 0) 
    {
      # clean all blank spaces from cookie 
      $cookie  =~ s/\s+/ /go;
      if ($cookie=~ m/.*$submitTimeID.*$submitTimeID/o) 
      {
        $error="Multiple Submit times: ".$fileHashRef->{"logFileName"}.":".
                                  $fileHashRef->{"logEntryCount"};
        $status=$STATUS_FAIL;
        return ($submitTime,$submitPage,$loadTime,$trackingCookie,$status,$error);
      }

      if ($cookie=~ m/.*$loadTimeID.*$loadTimeID/o)  
      {
        $error="Multiple Load times: ".$fileHashRef->{"logFileName"}.":".
                                      $fileHashRef->{"logEntryCount"};
        $status=$STATUS_FAIL;
        return ($submitTime,$submitPage,$loadTime,$trackingCookie,$status,$error);
      }

      # check for existance of Submit Time in cookie
      if (!(($submitTime,$submitPage)=($cookie =~ 
                        m/.*$submitTimeID=(\d+)\?([^\s\;]+)[\;|\s]*/o)))
      {
        $submitTime="";
        $error="Bad Submit time and SubmitPage: " .
          $fileHashRef->{"logFileName"}.":" . $fileHashRef->{"logEntryCount"};
         
        $status=$STATUS_FAIL;
        return ($submitTime,$submitPage,$loadTime,$trackingCookie,$status,$error);
      }

      # check for existance of Load Time in cookie
      if (!(($loadTime)=($cookie =~ m/.*$loadTimeID=(\d+)[\;|\s]*/o)))
      {
        $loadTime="";
        $error="Bad Load time: ".$fileHashRef->{"logFileName"}.":".
                                   $fileHashRef->{"logEntryCount"};
        #$status=$STATUS_FAIL;
        # return ($submitTime,$submitPage,$loadTime,$trackingCookie,$status,$error);
      }

      # split single quotes from cookies
      if (($cookieTemp)=($cookie =~ m/(\S+?)\'\S+/o))
      {
        $cookie=$cookieTemp;
      } 

      # extracting tracking cookie can be difficult but transactions begin 
      # and end with change in session id and submit time at which time you 
      # hash information about time entries and page deltas
      if (($trackingCookieTemp)=($cookie =~ 
            m/.*${$inputParametersHashRef}{"ApacheSessionId"}=(\S+)[\;|\s]*/o)) 
      {
        $trackingCookie=$trackingCookieTemp; 
        if(index($trackingCookie,";") > 0)
        {
          $trackingCookie = substr($trackingCookie,0,length($trackingCookie)-1);
        }
      }
       elsif (($trackingCookieTemp)=($cookie=~ 
          m/.*${$inputParametersHashRef}{"JservSessionId"}=(\S+)[\;|\s]*/o))
      {
        $trackingCookie=$trackingCookieTemp; 
        if(index($trackingCookie,";") > 0) {
          $trackingCookie = substr($trackingCookie,0,length($trackingCookie)-1);
        }
      }
      else
      {
        $trackingCookie=$domain;
      }
    }
    else
    {
      $trackingCookie=$cookie;
      $submitTime="";
      $loadTime="";
      $status=$STATUS_FAIL;
      $error="Bad Cookie Entries: ".$fileHashRef->{"logFileName"}.":".
                                     $fileHashRef->{"logEntryCount"};
      return ($submitTime,$submitPage,$loadTime,$trackingCookie,$status,$error);
    }
    return ($submitTime,$submitPage,$loadTime,$trackingCookie,$status,$error);
}

###########################################################################
#  Routine to extract all the relevant Referer fields
#  IN:
#      $lineHashRef,$inputParametersHashRef
#  OUT:
#      $Webserver,$Page,$RefererVal
###########################################################################

sub ExtractRefererVal 
{
    my($lineHashRef,$inputParametersHashRef,$fileHashRef)=@_;
    my $referer=$lineHashRef->{"finalReferer"};
    my $webserver="";
    my $refUri="";
    my $refUriQuery="";
    my $refererVal="";
    my $refererValTemp="";
    my $refQueryStr = "";
    my $refUrlLessQuery = "";
    my $queryDelimiterRegexp = $inputParametersHashRef->{"queryDelimiterRegexp"};
 
    my $status=$STATUS_SUCCESS;
    my $error="";
    # check referer field
    if ($referer eq '-')
    {
      $status=$STATUS_WARN;
      return ($webserver,$refUri,$refUrlLessQuery,$refQueryStr,$refUriQuery, 
              $status,$error);
    }
    ($status, $error, $webserver, $refUri, $refQueryStr, $refUriQuery, $refUrlLessQuery) 
        = SplitURL($referer,  $queryDelimiterRegexp);
    
    if($status != $SUCCESS)
    {
        $error=$error.". Bad Referer: ".$fileHashRef->{"logFileName"}.":".
                         $fileHashRef->{"logEntryCount"};
        $status=$STATUS_FAIL;
        return ($webserver,$refUri,$refUrlLessQuery,$refQueryStr,$refUriQuery,$status,$error);
    } else 
    {
        $status = $STATUS_SUCCESS;
    }
    return ($webserver,$refUri,$refUrlLessQuery,$refQueryStr,$refUriQuery,$status,$error);
}

########################################################################
#
#  Instruct the OS to lookup a host name based on it's IP address. Respect
#  timeout constrains.  The returned status can be a success (in case the 
#  HostName can be found in the time constrains) or undefined (which
#  means that either the hostname lookup failed or timed out). It is the
#  caller's responsibillity to check if the failure should be treated as 
#  a final failure or retried again in the future.
#
#  IN:
#       $IP,$inputParametersHashRef
#  OUT:
#       $status, $host_name
#
########################################################################
sub LookupIP
{
  my ($IP,$inputParametersHashRef)=@_;
  my $timeout=$inputParametersHashRef->{"timeout"};
  my $host_name;
  my $status = $LKUP_RES;
  my @addr_bytes=split /\./,$IP ;
  my $pIP=pack "C4",@addr_bytes;

  # lookup the actual IP address
  eval 
  {
    local $SIG{ALRM} = sub {die "alarm\n" };
    alarm $timeout;  # set alarm
    $host_name=gethostbyaddr($pIP,2);
    alarm 0;         # disable alarm
  };
  if (($@) || (! $host_name)) 
  {
    # Hostaname lookup for $IP failed or timed out
    $host_name = $IP;
    $status = $LKUP_UNDEF;
  }

  return($status, $host_name);
}

########################################################################
#
#  Convert a string IP address representation to an unsigned integer
#  IN:
#       visitorIP (IP address in string form a.b.c.d e.g. 122.33.22.33)
#  OUT:
#       $visitorIPNum (IP address as an unsigned number defined by 
#             a*256*256*256 + b*256*256 + c*256 + d
########################################################################
sub ConvertIPStrToNum
{
    my($ipAddrStr) = @_;
    my $ipAddrNum = 0;
    if($ipAddrStr =~ m/(\d+).(\d+).(\d+).(\d+)/) 
    {
	$ipAddrNum = ($1 << 24) + ($2 << 16) + ($3 << 8) + $4;
    }
    return $ipAddrNum;
}

########################################################################
#
#  Create a reverse lookup  hash (tied to a DBM file)
#  IN:
#       $Visitor_addr,$inputParametersHashRef
#  OUT:
#       $Name,$visitorIP
#  NOTE:
#       The hostList hash values are arrays of 3 elements, the contents
#  are as follows:
#     0 -> status (can be $LKUP_UNDEF, $LKUP_RES, $LKUP_UNRES)
#     2 -> date (represents the date the host name value was last set).
#     1 -> host name (if resolved then the host name, else the iP address)
#
########################################################################

sub DoReverseLookup
{
  my ($visitor_addr,$inputParametersHashRef,$fileHashRef)=@_;
  my $d1;
  my $d2;
  my $d3;
  my $d4;
  my $name;
  my $visitorIP;
  my $max_undef_time = $inputParametersHashRef->{"maxundeftime"};
  my $resolved_age = $inputParametersHashRef->{"resolvedage"};
  my $unresolved_age = $inputParametersHashRef->{"unresolvedage"};
  my $error="";

  # Internally used constants that define the locations and lengths of the
  # fields inside a DBM hash entry
  my $STATE_LOC = 0;
  my $STATE_LEN = 1;
  my $DATE_LOC  = 1;
  my $DATE_LEN  = 8;
  my $NAME_LOC  = 9;
  my $RUNID_LOC = 9;

  # Check if visitor_addr is an IP or a hostname
  if (($d1,$d2,$d3,$d4) = $visitor_addr =~ m/^(\d+)\.(\d+)\.(\d+)\.(\d+)/o)
  {
    # visitor_addr is an IP
    my $status;
    my $last_update_timestamp = 0;
    my $current_timestamp = int(time() / 60);  # get current time

    # Check if IP exists in hash
    if(defined $fileHashRef->{"hostList"}->{$visitor_addr}) 
    {
      # Read the hash entry value
      my $hash_value = $fileHashRef->{"hostList"}->{$visitor_addr};

      # Parse out the necessary contents from the hash entry
      $status = substr($hash_value, $STATE_LOC, $STATE_LEN);
      $last_update_timestamp = substr($hash_value, $DATE_LOC, $DATE_LEN);

      # Case of status 
      if($status == $LKUP_UNDEF)
      {
        # Parse the rest of the hash entry -> Get the last run ID
        my $last_run_id = substr($hash_value, $RUNID_LOC);
        
        # If looked up in this run already, don't re-look up
        if($run_id != $last_run_id)
        {
          # IP hasn't been looked up in this run

          # Get the hostname and status 
          ($status, $name) = LookupIP($visitor_addr, $inputParametersHashRef);
  
          # Check if lookup failed again
          if($status == $LKUP_UNDEF)
          {
            # If this IP has been on LKUP_UNDEF state longer than allowed
            if($current_timestamp - $last_update_timestamp >= $max_undef_time)
            {
              # check if unresolved hasn't expired
              if($current_timestamp - $last_update_timestamp < $unresolved_age)
              {
                # Set the status to $LKUP_UNRES
                $status = $LKUP_UNRES;
              }
              else
              {
                # unresolved age has expired
                # we only need to reset the last lookup time and the run id, 
                # as the status will remain the same. 
                # All the fields that need to be updated, that is, 
                # current_timestamp status and run_id are already 
                # appropriately set, so there is no further code required here.
              }
            }
            else   # IP has been on LKUP_UNDEF less than the allowed max
            {
              # Reset the first failure time to the original
              $current_timestamp = $last_update_timestamp;
            }
          }
        }
        else  # We've already looked up this entry in this run
        {
          # Make sure we don't update the hash
          $current_timestamp = $last_update_timestamp;
          # Set the name to be the IP
          $name = $visitor_addr;
        }
      }
      else   # status is $LKUP_RES or $LKUP_UNRES
      {
        my $max_age;
        # Check if status is $LKUP_RES
        if($status == $LKUP_RES)
        {
          $max_age = $resolved_age;
        }
        else   # status is $LKUP_UNRES
        {
          $max_age = $unresolved_age;
        }

        # Check if hostname resolution time is too old to be used
        if($current_timestamp - $last_update_timestamp >= $max_age)
        {
          # Get the hostname and status and update the hash
          ($status, $name) = LookupIP($visitor_addr, $inputParametersHashRef);
        }
        else
        {
          # Parse the rest of the hash -> Get the hostanme from the hash
          $name = substr($hash_value, $NAME_LOC);
          # Assure that we don't rewrite to the hash
          $current_timestamp = $last_update_timestamp;
        }
      }
    }
    else      # IP doesn't exist in hash
    {
      # Lookup host name for the first time
      ($status, $name) = LookupIP($visitor_addr, $inputParametersHashRef);
    }
    # Update hash only if necessary
    if($last_update_timestamp != $current_timestamp)
    {
       if($status == $LKUP_UNDEF)
       {
         # Update the hash
         $fileHashRef->{"hostList"}->{$visitor_addr} =
           $status . $current_timestamp . $run_id;
       }
       else
       {
         $fileHashRef->{"hostList"}->{$visitor_addr} = 
           $status . $current_timestamp . $name;
       }
    }
    $visitorIP=$visitor_addr;
  }
  else    # visitor_addr is a host name
  {
    $error="Fatal error: currently only IP addresses are supported.\n";
    return ($name,$visitorIP,$error,$STATUS_ERROR);
  }
  return ($name,$visitorIP,$error,$STATUS_SUCCESS);
}

#############################################################################
# PageRequest
#
# IN:
#       $Request , $Status as determined in the master Parse function
#
# OUT:
#
# RETURNS: 1 for "yes" or 0 for"no"
##############################################################################
sub PageRequest
{
  my ($request,$status,$inputParametersHashRef) = @_;
  my $requestVal;
  my $requestValTemp;
  my $clientRequestType;
  my $defaultPageNameExtensionsRef=$inputParametersHashRef->{"pageNameExtensions"};
  my $queryDelimiterRegexp=$inputParametersHashRef->{"queryDelimiterRegexp"};
  # Here this 2 is for 2XX or Client request Successful
  # 304 is the "Not-Modified" status sent by the server when the
  # browser has the page cached and sends an If-Modified-Since request
  if (( substr($status,0,1) eq "2" ) || ($status eq "304"))
  {
    # for pages we get the file extension
    # if the uri has a ? we can assume its a page
    # because the status is assumed to be 2XX
    if ( $request =~ m/\S+\?\S+/o )
    {
      return (1);
    }
    # ok so it's not CGI - so next get the extension
    $requestVal=$request;
    # Remove all parameters followed by ? from request field
    if ((($requestValTemp)=($requestVal =~ m/^(\S+?)$queryDelimiterRegexp\S+/o)))
    {
        $requestVal=$requestValTemp;
    }
    # now check for extension
    if ( ($clientRequestType) = ($requestVal =~ m/.*\.(\S+)/o ) )
    {
      $clientRequestType = lc ($clientRequestType);
    }
    else
    {
      # no extension equals servlet page
      return 1;
    }
    # check if the extension is a page
    if ( defined $defaultPageNameExtensionsRef->{$clientRequestType} )
    {
      # these are the pages we count
      return 1;
    }
    else
    {
      return 0;
    }
  }
  # not a 2XX page
  else 
  {
    return 0;
  }
}
# end PageRequest

###########################################################################
# Usage - print usage message
#
# IN:
#     $progname -- name of perl program
# OUT: -
###########################################################################
sub Usage 
{
    my ($progname,$err_msg)=@_;
    print STDERR <<EOM;

usage: $progname   [ -targetname  <target name>        ]
                             [ -targettype  <target type>        ]
                             [ -timezone    <local timezone>     ]
                             [ -emdURL      <local EMD's URL>    ]
                             [ -chronosroot <chronos' root>      ]
                             [ -emdroot   <emd's root>           ]
                             [ -indir       <dir name>           ]
                             [ -infile      <file name>          ]
                             [ -outfile     <file name>          ]
                             [ -outfiletype <output file format> ]
                             [ -cookieoutput|nocookieoutput      ]
                             [ -resolve|noresolve                ]
                             [ -timeout     <timeout in seconds> ]
                             [ -apchsess    <apache session id>  ]
                             [ -jservsess   <jserv session id>   ]
                             [ -debug|nodebug                    ]
                             [ -debug 0|1|2                      ]
                             [ .... other options ......         ]

 Options         Description

 -targetname     Name of target. (mandatory)
 -targettype     Type of target. (mandatory)
 -emdURL         The emd's URL. (mandatory)
 -chronosroot    Path for chronos' root. (mandatory)
 -emdroot        Path for emd's root. (mandatory)
 -indir          The input directory (mandatory).
 -infile         The input log file name (mandatory).
 -outfile        The file name with path (mandatory).
 -timezone       The local time zone (e.g. PDT). (default is empty)
 -outfiletype    The output file format. Current valid formats are XML and 
                 SQLLDR (default is XML). If using SQLLDR format, do not
                 specify extension in outfile.
 -advdtfilter    Advanced data filtering on (default).
 -noadvdtfilter  Advanced data filtering off.
 -advdtfilterage Advanced data filtering aging in minutes (default = 10).
 -cacheentries   Accepts possibly browser cached entries. This option is only
                 used if the advdtfilter option is also set.
 -nocacheentries Does not accept possibly browser cached entries. This option
                 is only used if the advdtfilter options is also set (default).
 -cookieoutput   Output cookie data.
 -nocookieoutput Don't output cookie data (default).
 -resolve        Resolve ip/names of visitors from log file (default).
 -noresolve      Don't resolve names of visitors.
 -timeout        Time waited before aborting attempt to lookup host name.
                 Default is 2 seconds. 
 -resolvedage    Approximate time (in minutes) a successfull hostname lookup
                 is kept in the local cache file. Default is 31 days 
                 (44640 minutes).
 -unresolvedage  Approximate time (in minutes) an unsuccessful hostname lookup
                 is kept in the local cache file. The reported host name is the
                 IP itself. Default is 7 days (10080 minutes).
 -pageidparams   Query string parameters that should be used as part of the 
                 reported URI for a page.
-querydelimiters The character(s) used to delimit the query string from the path
                 part of the URI. More than one character allowed. 
-qparamdelimiters The character used to separate parameters within a query string.
                  More than one character allowed. 
 -maxundeftime   Estimated maximum time (in minutes) a given IP entry should 
                 remain in the undefined state. See the documentation for 
                 details. Default is 2 hours (120 minutes).
                 before being considered unresolved.
 -procoldfiles   If this flag is set and this is the first run of the CME (or
                 the log file can't be found) only the main log file will be
                 processed (i.e. all old rotated files are ignored).
 -noprocoldfiles See procoldfiles (above). Default.
 -apchsess       Apache session Cookie id (default is 'Apache=').
 -jservsess      Jserv session id (default is 'JServSessionIdoraclejsp=').
 -trace          Turn tracing on.
 -notrace        Turn tracing off (default).
 -priority       Set the scheduling priority of the process to the specified 
                 level. See the man page for setpriority for more information.
                 On Unix systems the default priority is 0. A higher priority level
                 causes less favorable scheduling for the process. The default is 
                 50 which should be above the highest possible level (least 
                 favorable scheduling) on most systems.
 -nosetpriority  Do not attempt to set priority, regardless of the presence or 
                 absence of the -priority option.
 -maxloglines    Maximum number of access log lines processed per run. Default
                 is 100,000. If set to zero or a negative number,  no maximum 
                 value is considered and the run will progress until all 
                 available log lines have been exhausted. 
 -warnloglines   If this number of log lines are processed and the marker has
                 not moved forward (i.e. no valid line has been found), a 
                 warning is issued. If greater than maxloglines, it will be 
                 set to maxloglines. Default is 500, or maxloglines if 
                 maxloglines is less than 500.
 -dbmtype        The dbm library to use for file-backed hashes. This should be
                 one of the "implname" fields in the dbmTypes hash in 
                 ChronosDbmTypes.pm e.g. ndbm,sdbm. By default the first entry
                 in dbmTypes is used, or, if dbmmodule is specified, the 
                 dbmTypes entry containing the value of dbmmodule.
 -dbmmodule      The module name of the DBM implementation to load 
                 (e.g. NDBM_File.pm, SDBM_File.pm, etc). Defaults to module 
                 field of the dbmTypes entry specified by dbmtype.
 -dbmmaxdatasize The maximum size of the data (key+value) that can be 
                 inserted into a dbm-tied file-backed hash. Defaults to 
                 maxdatasize field of dbmTypes entry specified by dbmtype. 
                 If not found in dbmTypes entry, defaults to unlimited.
 -dbmexts        Comma-separated list of file extensions used by the dbm 
                 implementation chosen. (e.g. ".dir,.pag"). Defaults to exts
                 field of the dbmTypes entry specified by dbmtype.
 -debug          Set debugging level (0, 1, or 2). 2 is maximum detail. Default 
                 is 1.
 -maxlogfilesizekb Set the maximum size in KB of log file. Default 5 KB.
 -numlogfiles    Maximum number of log files to keep in disk. Default 10.
 -numtracefiles  Maximum number of trace files to keep in disk. Default 10.

Example:

   $progname -targetname my_tgt -targettype my_tgt_type -timezone America/Los_Angeles PDT -emdURL http://somehost:8000/ -indir /home/oracle/calypso/log -infile access_log -outfile /home/somedir/out.xml -chronosroot /home/oracle/chronos -emdroot /home/oracle/emd

Error -> $err_msg

EOM
}

__END__

