#
# Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. 
#
#    NAME
#      parse-log1.pl 
#
#    DESCRIPTION
#      This script is used in EMD to parse log files for critical and 
#      warning patterns. The script holds the last line number searched
#      for each file in a state file for each time the script is run. The
#      next run of the script starts from the next line. The state file name 
#      is read from the environment variable $EM_STATE_FILE, which must
#      be set for the script to run.
#
#
#    Input: inputs are agentConditionContext environment variables, which specify program criteria
#             in terms of log file name, match pattern and ignore pattern
#
#    The output of this script is :
#      For each <file>,<match-pattern> following record will be created->
#      "em_result=log_file_name|log_file_match_pattern|timestamp|log_file_ignore_pattern|log_file_match_count|log_file_message"
#       where log_file_message is a concatenated string matching line found. Max length = 1024.
#
#
#    MODIFIED   (MM/DD/YY)
#    rgorle      06/22/12 - bug#14214947
#    rgorle      05/09/12 - bug#14027936
#    rgorle      01/06/12 - Supporting command line arguments
#    nasounda    12/05/11 - Bug:13101783
#    nasounda    01/20/11 - Don't expand regular exprs if there is no regex
#                           character
#    nasounda    10/13/10 - Bug:10197471
#    nasounda    09/02/10 - Bug:10052959
#    nasounda    08/12/10 - Bug:9777020 - Changing delimiter to support |
#                           symbol
#    nasounda    03/18/10 - Bug : 9323245 - Fix for small sized log files
#    sejain      05/12/08 - Backport sejain_bug-6790294 from main
#    sreddy      05/08/07 - Backport sreddy_bug-5139454 from main
#    sreddy      05/02/07 - fix bug#5139454
#    sreddy      05/02/07 - fix Windows Platform Handling
#    svrrao      10/10/06 - Porting Changes, added more supported platforms
#    ajayshar    08/09/05 - Bug-4501027-timestamp always in OMS locale (garbled).
#    sreddy      07/06/05 - expand dirs recursively for internal usage
#    sacgoyal    06/22/05 - 
#    sreddy      06/21/05 - Fix bug#4442004
#    sreddy      06/21/05 - lfm_{i,e}files_oracle located under sysman/admin now
#    sreddy      06/20/05 - restrict path expansion to oracle files
#    sreddy      06/20/05 - Suppress OMS warnings for ifiles and efiles entries
#    sreddy      06/20/05 - Add path separator support
#    sacgoyal    05/20/05 - ifiles not applicable for windows, remove support for SQL widlcards
#    sacgoyal    04/22/05 - fix bug#4314511 
#    sacgoyal    03/03/05 - fix bug#4042946
#    sreddy      01/31/05 - upload content via EM_LFM_TEST_MODE flag for internal use
#    sacgoyal    01/26/05 - correct log-file metric Message 
#    sacgoyal    11/23/04 - remove warning
#    sacgoyal    11/04/04 - add timestamp
#    sreddy      11/01/04 - generate separate record for each match pattern
#    sreddy      11/01/04 - support inode based file rotation
#    sreddy      11/01/04 - optimize the file reading and in memory requirements
#    sreddy      11/01/04 - better state management
#    sreddy      10/05/04 - use expandPath 
#    sacgoyal    09/22/04 - sacgoyal_condition_contexts
#    sacgoyal    07/30/04 - Creation for Agent Condition Context, Enterprise Manager, 10.2
#
#-Packages--------------------------------------------------------------------
use strict;
use warnings;
use File::Basename;
use Getopt::Long;
use Time::Local;
use HTTP::Date;
use Digest::MD5 qw(md5_hex);
require "emd_common.pl";
require "semd_common.pl";
require "conditionContext.pl";

# Environment-----------------------------------------------------------------
$ENV{PATH} = "/bin:/usr/bin:/usr/sbin";
#$ENV{EMAGENT_PERL_TRACE_LEVEL}=1; #uncomment this line to get TRACE messages

#-Global-Variables------------------------------------------------------------
my %key_value_set = ();
our $timestamp = localtime;
my $timestamp_withoutTZ = $timestamp;
my $previousTimestamp = "";
my $offset  = sprintf "%.1f", (timegm(localtime) - time) / 3600;
my $minutes = sprintf "%02d", abs( $offset - int($offset) ) * 60;
my $tz = sprintf("GM %+03d", int($offset)) .":" . $minutes;
$timestamp .= " " . $tz;

our %Flag   = ();  # Holds global toggles and flags from previous run
our %GC = ();  # Holds the global context of log file scan criteria and output
our %uploadContent = (); # Holds the uploadContent flag for a particular log-file

#single em_warning record per upload
my $statErrors="";
my $missingPatternErrors="";
my $missingFileErrors="";
my $fileOpenErrors="";
my $directoryErrors="";
my $includeFileErrors="";
my $excludeFileErrors="";
my $readErrors="";
my $allErrors="";
my $uncErrors="";
my $regexpErrors="";
my $driveErrors="";
my $unixPathErrors="";

# Current implementation requires stat call to return dev and inode_no
our $statSupportsInode = 1;

my @perlRegExpColumns = ('log_file_match_pattern', 'log_file_ignore_pattern');
our $conditionContextAref = getConditionContext(@perlRegExpColumns);

# On Solaris and Linux platforms, perl stat call returns the
# the same <dev_id and inode_id> for all hard links of a
# given file and symbolic links pointing to the real file
# This set serves as the unique signature of the file

# Verify whether your platform's perl works in similar
# way. If true, set $uniqueSignature to 1 for your
# platform below.

my $os;

if (($os = get_osType()) eq "-1")
{
  &raise_error_and_exit("Unsupported OS", 20);
}

my $fpSep = '/';

my $uniqueSignature = 1;
$uniqueSignature=0 if ($ENV{EM_TEST_UNIQUE_SIGNATURE});

$fpSep = '\\' 
  if ($os eq "WIN");

$statSupportsInode = 0
  if ($os eq "WIN");

# %excludeList and %includeList will have 
# <dev_id,inode_id> pair keys if $uniqueSignature is 1
# else <filenamepath> would be the key

my $targetGuid;
my $collInterval;
my $isHost;
my @logFilesInfo = ();

#############################################################################
# parse arguments()
# -c $collection_interval -t $target_guid -h $is_host_flag
#     -d $logfile_name,$match_patch,$ignore_pattern;$logfile_name,$match_patch,$ignore_pattern
# total arguments 4, all are optional
# argument c - collection interval
# argument t - target guid
# argument h - Yes if the metric is executed from host, No otherwise
# argument d - log file information(comma separated log file details
#            - multiple log files can be entered with semicolon separated          
#############################################################################
sub parseArguments ()
{
  my $logFilesInfo;

  GetOptions('c=i' => \$collInterval, 't=s' => \$targetGuid, 'h=s' => \$isHost, 'd=s' => \$logFilesInfo);

  #collection Interval is a mandatory parameter, if it is not
  #passed, set the default value to 300, aka 5minutes
  if (!defined($collInterval))
  {
    EMD_PERL_WARN("collection interval is not provided through the argument. So, assigning the default value to it.");
    $collInterval = 300;
  }

  #set the default value to isHost
  if (!defined($isHost))
  {
    $isHost = 'No';
  }

  #set the correct value to target guid
  my $targetGuidValue = "";
  if( exists $ENV{EM_TARGET_GUID} )
  {
    $targetGuid = $ENV{EM_TARGET_GUID};
  }
  
  # parse log file arguments
  return if !defined($logFilesInfo);

  my @logFilesDetails = split(';', $logFilesInfo);
  for (my $i = 0; $i <= $#logFilesDetails; $i++)
  {
    my @logFileDetails = split(',',$logFilesDetails[$i]);
    next if(!defined($logFileDetails[0]) || !defined($logFileDetails[1]));

    my $logFileName = $logFileDetails[0];
    if ($os eq "WIN") {
      $logFileName =~ s/\//\\\\/g;
    }

    my %logFileInfo = ("log_file_name" => $logFileName,
                         "log_file_match_pattern" => $logFileDetails[1],
                         "log_file_ignore_pattern" => ((defined $logFileDetails[2] ) ? $logFileDetails[2] : ""));

    push @logFilesInfo, \%logFileInfo;
  }
}

#############################################################################
# addArgumentConditionContext()
#    The log file can also be passed as arguments
#############################################################################
sub addArgumentConditionContext
{
  (my $logFileInfo) = @_;
  
  my @currentKeys1 = ();

  my %currentKey1 = ("keyName" => "log_file_name",
                    "keyOperator" => "0",    # EQ is defined as 0
                    "keyValueToReturn" => $logFileInfo->{log_file_name},
                    "keyValueToMatch" => $logFileInfo->{log_file_name});
  push(@currentKeys1, \%currentKey1);

  my %currentKey2 = ("keyName" => "log_file_match_pattern",
                        "keyOperator" => "0",    # EQ is defined as 0
                    "keyValueToReturn" => $logFileInfo->{log_file_match_pattern},
                    "keyValueToMatch" => $logFileInfo->{log_file_match_pattern});
  push(@currentKeys1, \%currentKey2);

  my %currentKey3 = ("keyName" => "log_file_ignore_pattern",
                        "keyOperator" => "0",    # EQ is defined as 0,
                    "keyValueToReturn" => $logFileInfo->{log_file_ignore_pattern},
                    "keyValueToMatch" => $logFileInfo->{log_file_ignore_pattern});
  push(@currentKeys1, \%currentKey3);

  my %currentCondition1 = ("conditionColumnName" => "",
                          "conditionOperator" => "",
                          "criticalThreshold" => "",
                          "warningThreshold" => "",
                          "keyColumnAref" => \@currentKeys1);

  push @{$conditionContextAref}, \%currentCondition1;
}

#############################################################################
# addDefaultConditionContext()
#    By default, $EMSTATE/sysman/log/emagent.log file is monitored for errors
#    to provide an example of generic log file monitoring.
#    EMSTATE points to Agent Home, which can be different from ORACLE_HOME
#############################################################################

sub addDefaultConditionContext ()
{
  my $defaultFile = "$ENV{EMSTATE}"."$fpSep"."sysman"."$fpSep"."log"."$fpSep"."gcagent.log";

  return if (! -e $defaultFile);

  my @currentKeys1 = ();

  my %currentKey1 = ("keyName" => "log_file_name",
                    "keyOperator" => "0",    # EQ is defined as 0
                    "keyValueToReturn" => $defaultFile,
                    "keyValueToMatch" => $defaultFile);
  push(@currentKeys1, \%currentKey1);

  my %currentKey2 = ("keyName" => "log_file_match_pattern",
                        "keyOperator" => "0",    # EQ is defined as 0
                    "keyValueToReturn" => "ERROR",
                    "keyValueToMatch" => "ERROR");
  push(@currentKeys1, \%currentKey2);

  my %currentKey3 = ("keyName" => "log_file_ignore_pattern",
                        "keyOperator" => "0",    # EQ is defined as 0,
                    "keyValueToReturn" => "",
                    "keyValueToMatch" => "");
  push(@currentKeys1, \%currentKey3);

  my %currentCondition1 = ("conditionColumnName" => "",
                          "conditionOperator" => "",
                          "criticalThreshold" => "",
                          "warningThreshold" => "",
                          "keyColumnAref" => \@currentKeys1);

  push @{$conditionContextAref}, \%currentCondition1;
}

parseArguments();
initFlags();

foreach my $logFileInfo (@logFilesInfo)
{
  addArgumentConditionContext($logFileInfo); #add arguments file 
}

addDefaultConditionContext()
  if (($#$conditionContextAref < 0) && ($isHost =~ /[yY][eE][sS]/)); #add an example file for monitoring
                                    #if there is nothing to monitor

exit 0 if ($#$conditionContextAref < 0); #nothing to monitor


my %excludeList = (); 
my %includeList = ();

getSignatures(\%excludeList, "$ENV{ORACLE_HOME}"."$fpSep"."sysman"."$fpSep"."admin"."$fpSep"."lfm_efiles_oracle",0);
getSignatures(\%excludeList, "$ENV{ORACLE_HOME}"."$fpSep"."sysman"."$fpSep"."config"."$fpSep"."lfm_efiles",1) if (-e "$ENV{ORACLE_HOME}"."$fpSep"."sysman"."$fpSep"."config"."$fpSep"."lfm_efiles");
getSignatures(\%excludeList, "$ENV{EMSTATE}"."$fpSep"."sysman"."$fpSep"."config"."$fpSep"."lfm_efiles",1) if ($ENV{ORACLE_HOME} ne $ENV{EMSTATE} && -e "$ENV{EMSTATE}"."$fpSep"."sysman"."$fpSep"."config"."$fpSep"."lfm_efiles");
getSignatures(\%includeList, "$ENV{ORACLE_HOME}"."$fpSep"."sysman"."$fpSep"."admin"."$fpSep"."lfm_ifiles_oracle",0);
getSignatures(\%includeList, "$ENV{ORACLE_HOME}"."$fpSep"."sysman"."$fpSep"."config"."$fpSep"."lfm_ifiles",1) if ( ($uniqueSignature == 1) && ( -e "$ENV{ORACLE_HOME}"."$fpSep"."sysman"."$fpSep"."config"."$fpSep"."lfm_ifiles"));
getSignatures(\%includeList, "$ENV{EMSTATE}"."$fpSep"."sysman"."$fpSep"."config"."$fpSep"."lfm_ifiles",1) if ( ($uniqueSignature == 1) && ($ENV{ORACLE_HOME} ne $ENV{EMSTATE}) && ( -e "$ENV{EMSTATE}"."$fpSep"."sysman"."$fpSep"."config"."$fpSep"."lfm_ifiles"));

if($uniqueSignature == 1)
{
  foreach my $key (keys %includeList)
  {
    $includeList{$key} = "" if (defined($excludeList{$key}));
  }
}

# Process conditionContextAref and set the log file
# monitoring criteria in %GC

foreach my $conditionHref (@$conditionContextAref)
{
  my $keysAref = ${$conditionHref}{"keyColumnAref"};
  next if ($#{$keysAref} < 1 );
  
  my ($fileKeyToMatch,$fileKeyToReturn,$fileKeyOperator)=("","","");
  my ($searchPatternToMatch,$searchPatternToReturn,$searchPatternOperator)=("","","");
  my ($ignorePatternToMatch,$ignorePatternToReturn,$ignorePatternOperator)=("","","");
  foreach my $keyHref (@$keysAref)
  {
    if (${$keyHref}{"keyName"} eq "log_file_name")
    {
      $fileKeyToMatch = ${$keyHref}{"keyValueToMatch"};
      $fileKeyToReturn = ${$keyHref}{"keyValueToReturn"};
      $fileKeyOperator = ${$keyHref}{"keyOperator"};
    }
    elsif (${$keyHref}{"keyName"} eq "log_file_match_pattern")
    {
      $searchPatternToMatch = ${$keyHref}{"keyValueToMatch"};
      $searchPatternToReturn = ${$keyHref}{"keyValueToReturn"};
      $searchPatternOperator = ${$keyHref}{"keyOperator"};
    }
    elsif (${$keyHref}{"keyName"} eq "log_file_ignore_pattern")
    {
      $ignorePatternToMatch = ${$keyHref}{"keyValueToMatch"};
      $ignorePatternToReturn = ${$keyHref}{"keyValueToReturn"};
      $ignorePatternOperator = ${$keyHref}{"keyOperator"};
    }
    elsif (${$keyHref}{"keyName"} eq "timestamp")
    {
      #ignore the timestamp key column
    }
    else
    {
      EMD_PERL_ERROR("Unknown Key Column: ${$keyHref}{'keyName'}");
    }
  }
  if ( !$fileKeyToReturn || $fileKeyToReturn eq ""
       || !$searchPatternToReturn || $searchPatternToReturn eq "" )
  {
    EMD_PERL_ERROR("Skipping, Required Key Columns are null"); 
    next;
  }
  if (!$ignorePatternToReturn || $ignorePatternToReturn eq "") 
  {
    $ignorePatternToReturn = "";
    $ignorePatternToMatch = "";
  }

  EMD_PERL_DEBUG("fileKeyToReturn=$fileKeyToReturn, searchPatternToReturn=$searchPatternToReturn, ignorePatternToReturn=$ignorePatternToReturn");

  if ($os eq "WIN" &&
      $fileKeyToReturn =~ /^\\\\/)
  {
    $uncErrors .= ", $fileKeyToReturn" if($uncErrors);
    $uncErrors="Invalid Path: [UNC filename(s) not supported on Windows]: $fileKeyToReturn" if(!$uncErrors);
    next;
  }

  if ($os eq "WIN" &&
      ($fileKeyToReturn =~ /^\\/ ||
       $fileKeyToReturn =~ /^\//))
  {
    $driveErrors .= ", $fileKeyToReturn" if($driveErrors);
    $driveErrors="Invalid filename(s): Path must start with a valid drive on Windows: $fileKeyToReturn" if(!$driveErrors);
    next;
  }

  if ($fpSep eq '/' &&
      !($fileKeyToReturn =~ /^\// ||
        $fileKeyToReturn =~ /^\%/))
  {
    $unixPathErrors .= ", $fileKeyToReturn" if($unixPathErrors);
    $unixPathErrors="Invalid filename(s): Path must start with root('/') on UNIX systems: $fileKeyToReturn" if(!$unixPathErrors);
    next;
  }

  if($fileKeyOperator eq "0")
  {
    updateGC($fileKeyToReturn, $searchPatternToReturn, $ignorePatternToReturn);
  }
  else
  {
    if (isRegexpValid($fileKeyToReturn) == 0)
    {
      $regexpErrors.= ", $fileKeyToReturn" if($regexpErrors);
      $regexpErrors="Invalid regular expression pattern for file(s): $fileKeyToReturn" if(!$regexpErrors);
      next;
    }
    my @files = expandPath($fileKeyToReturn);
    foreach my $file (@files) 
    {
      updateGC($file, $searchPatternToReturn, $ignorePatternToReturn);
    }
  }
}


# Process each log file and corresponding criteria from %GC
while (my ($file, $patternsAref) = each %GC)
{
  grepLogFile ($file, $patternsAref,$collInterval);
}

#single em_warning record per upload
$allErrors=$statErrors if $statErrors;
$allErrors.="; $regexpErrors" if $regexpErrors && $allErrors;
$allErrors=$regexpErrors if $regexpErrors && !$allErrors;
$allErrors.="; $driveErrors" if $driveErrors && $allErrors;
$allErrors=$driveErrors if $driveErrors && !$allErrors;
$allErrors.="; $uncErrors" if $uncErrors && $allErrors;
$allErrors=$uncErrors if $uncErrors && !$allErrors;
$allErrors.="; $unixPathErrors" if $unixPathErrors && $allErrors;
$allErrors=$unixPathErrors if $unixPathErrors && !$allErrors;
$allErrors.="; $fileOpenErrors" if $fileOpenErrors && $allErrors;
$allErrors=$fileOpenErrors if $fileOpenErrors && !$allErrors;
$allErrors.="; $missingFileErrors" if $missingFileErrors && $allErrors;
$allErrors=$missingFileErrors if $missingFileErrors && !$allErrors;
$allErrors.="; $directoryErrors" if $directoryErrors && $allErrors;
$allErrors=$directoryErrors if $directoryErrors && !$allErrors;
$allErrors.="; $readErrors" if $readErrors && $allErrors;
$allErrors=$readErrors if $readErrors && !$allErrors;
$allErrors.="; $missingPatternErrors" if $missingPatternErrors && $allErrors;
$allErrors=$missingPatternErrors if $missingPatternErrors && !$allErrors;
$allErrors.="; $excludeFileErrors" if $excludeFileErrors && $allErrors;
$allErrors=$excludeFileErrors if $excludeFileErrors && !$allErrors;
$allErrors.="; $includeFileErrors" if $includeFileErrors && $allErrors;
$allErrors=$includeFileErrors if $includeFileErrors && !$allErrors;
print "em_warning=$allErrors\n" and
  EMD_PERL_WARN("em_warning=$allErrors")
    if $allErrors;

# Save the state
saveFlags($collInterval);

#############################################################################
#--------------------- sub updateGC -----------------------------------
#############################################################################
sub updateGC
{
  my ($file, $searchPattern, $ignorePattern) = @_;

# Return if this criteria is already registered in key_value_set
  return if ( $key_value_set{"$file,$searchPattern,$ignorePattern"} );

  if ( ! -e $file)
  {
    EMD_PERL_ERROR("$file does not exist");
    $missingFileErrors.= ", $file" if($missingFileErrors);
    $missingFileErrors="Missing file(s): $file" if(!$missingFileErrors);
    return ;
  }

  if( -d $file)
  {
    EMD_PERL_ERROR("$file is a directory, not a log file");
    $directoryErrors .= ", $file" if $directoryErrors;
    $directoryErrors =" Following directories can not be monitored as Log Files: $file" if !$directoryErrors;
    return;
  }

  if ( ! -r $file )
  {
    EMD_PERL_ERROR("$file is not readable");
    $readErrors .= ", $file" if $readErrors;
    $readErrors =" Following files are not readable: $file" if !$readErrors;
    return;
  }

  my $file_signature="";

  if ($os eq "WIN")
  {
    my $fileOpen = open DATA, "< $file";
    if (!$fileOpen)
    {
      EMD_PERL_ERROR("$file could not be opened\n");
      $fileOpenErrors.= ", $file" if($fileOpenErrors);
      $fileOpenErrors="Could not open file(s): $file" if(!$fileOpenErrors);
      return;
    } 
    my $firstHundredLines="";
    read(DATA,$firstHundredLines,10240);
  #  for (my $i = 0; $i < 100; $i++)
  #  {
  #    my $line = <DATA>;
  #    last if (!defined($line));
  #    $firstHundredLines .= $line;
  #  }
    close DATA;
    $file_signature = md5_hex($firstHundredLines);
  }
  else
  {
    my @attr = stat($file);

    if ($#attr == -1)
    {
      EMD_PERL_ERROR("could not stat $file");
      $statErrors .= ", $file" if $statErrors;
      $statErrors = "Could not stat file(s): $file" if !$statErrors;
      return ;
    }
    my $dev = $attr[0];
    my $inode_no = $attr[1];
    $file_signature = "$dev,$inode_no";
  }
  
  if ($excludeList{$file_signature} && $excludeList{$file_signature} == 1)
  {
    # Raise an error here
    EMD_PERL_ERROR("Following Log Files can not be monitored: $file");
    $excludeFileErrors .= ", $file" if $excludeFileErrors;
    $excludeFileErrors = "Following Log File(s) can not be monitored: $file" if !$excludeFileErrors;
    return;
  }
  $uploadContent{$file} = 1 if (!$uploadContent{$file} && $includeList{$file_signature} && $includeList{$file_signature} == 1);

  my @patterns = ();
  my %pattern = ( "searchPattern" => $searchPattern,
                  "ignorePattern" => $ignorePattern,
                  "matchCount" => 0,
                  "matchedContent" => "");

  @patterns = @{$GC{$file}} if $GC{$file};
  push @patterns, \%pattern;
  $GC{$file} = \@patterns;
  $key_value_set{"$file,$searchPattern,$ignorePattern"} = 1;
}

#############################################################################
#---------------------sub grepLogFile-----------------------------------
# Input Parameters:
#  $file - log file to be scanned
#  $patternsAref - Aref to the patterns to be scanned in this file,
#                  where each element is a patternHref
#                  patternHref has the following elements
#                     searchPattern
#                     ignorePattern
#############################################################################

sub grepLogFile 
{
  my ($file, $patternsAref,$collInterval) = @_;

  if ($#$patternsAref < 0)
  {
    EMD_PERL_ERROR("No patterns specified for $file");
    $missingPatternErrors.= ", $file" if($missingPatternErrors);
    $missingPatternErrors="No patterns specified for: $file" if(!$missingPatternErrors);
    return;
  }

  my @patterns = @$patternsAref;

  if ( ! -e $file)
  {
    EMD_PERL_ERROR("$file does not exist");
    $missingFileErrors.= ", $file" if($missingFileErrors);
    $missingFileErrors="Missing file(s): $file" if(!$missingFileErrors);
    return ;
  }

  my $beginline = 1;  
  my $beginByte = 0;  
  my $fileRotated = 0;  
  my $dev = -1;
  my $inode_no = -1;
  my $size = -1;
  my @attr = ();
 
  @attr = stat($file);
  if ($#attr == -1)
  {
    EMD_PERL_ERROR("could not stat $file");
    $statErrors.= ", $file" if($statErrors);
    $statErrors="Could not stat file(s): $file" if(!$statErrors);
    return ;
  }
  $size = $attr[7];

  return if $size == 0; # Either no log has been generated yet or
                        # The file has just been rotated, new file has no data

  my ($oldSignatureLineCount, $signatureLineCount) = (0, 0);
  my ($oldSignature, $signature);

  $oldSignatureLineCount = $Flag{$file}{signatureLineCount} if (defined($Flag{$file}{signatureLineCount}));
  $oldSignature = $Flag{$file}{signature} if (defined($Flag{$file}{signature}));

  my $fileOpen = open DATA, "< $file";
  if (!$fileOpen)
  {
    EMD_PERL_ERROR("$file could not be opened\n");
    $fileOpenErrors.= ", $file" if($fileOpenErrors);
    $fileOpenErrors="Could not open file(s): $file" if(!$fileOpenErrors);
    return;
  } 

  my ($line1, $bytesRead , $bytesToRead);
  $bytesRead = $Flag{$file}{bytesRead} if(defined($Flag{$file}{bytesRead}));

  if($bytesRead == 0)
  {
    $bytesToRead = 1024;
  }
  else
  {
    $bytesToRead = $bytesRead;
  }
 
  $bytesRead = read(DATA,$line1,$bytesToRead);
  $Flag{$file}{bytesRead} = $bytesRead;
 
  if($bytesRead != 0)
  {
    ++$signatureLineCount; 
  }

  if ($oldSignatureLineCount > 0)
  {
    $signature = md5_hex($line1);

    if ($signature ne $oldSignature)
    {
      $fileRotated = 1;
      EMD_PERL_DEBUG("File Rotated: old sig: $oldSignature, new sig: $signature");
    }
  }

  if ($signatureLineCount > 0)
  {
    #Compute the current file signature
    $signature = md5_hex($line1);
  }

  if ($fileRotated == 0)
  {
    $beginByte = $Flag{$file}{position} if (defined($Flag{$file}{position}));
    $beginline = $Flag{$file}{line} if (defined($Flag{$file}{line}));
  }

  #Save the new signature into the hash so that it can be persisted
  $Flag{$file}{signature} = $signature ;
  $Flag{$file}{signatureLineCount} = $signatureLineCount ;

  if ($beginByte >= $size)
  {
    EMD_PERL_DEBUG("$file: Nothing to read");
    close DATA;
    return;
  }

  seek (DATA, $beginByte, 0);

 # my @file_lines = <DATA>;
 # chomp(@file_lines);
 #  $Flag{$file}{position} = tell DATA;
 # close DATA;

  my $currentLine = $beginline;

  my $file_contents;
  my $block_size = 10240;
  my $end_pos;
  my $match;
  my $tmp_block="";
  my $pos_correct;

  my $lastLine = $beginline;

  while(read(DATA,$file_contents,$block_size))
  {
    $end_pos = rindex($file_contents,"\n");
    $Flag{$file}{position} = tell DATA;
    if(length($file_contents) ne $block_size)
    {
        $match = $file_contents;
        $match = $tmp_block.$file_contents;
        $tmp_block = "";
        $pos_correct=0;
    }
    else
    {
        if($end_pos == -1 )
        {
              $end_pos = rindex($file_contents," ");
        }
        $match = substr($file_contents,0,$end_pos);
        $match = $tmp_block.$match;
        $tmp_block = substr($file_contents,$end_pos,$block_size-$end_pos);
        $pos_correct = $block_size-$end_pos;
    }
    my @file_lines=split("\n",$match);


    foreach my $line (@file_lines)
    {
    foreach my $patternHref (@patterns)
    {
      my $searchPattern = ${$patternHref}{"searchPattern"};
      my $ignorePattern = ${$patternHref}{"ignorePattern"};
      next if ($ignorePattern ne "" && $ignorePattern ne "%" &&
               $line =~ /$ignorePattern/);
      next if ($searchPattern eq "" || $searchPattern eq "%");
      if ($line =~ /$searchPattern/i)
      {
        ${$patternHref}{"matchCount"}++;
        ${$patternHref}{"matchedContent"} .= "<line#" . $currentLine . ">: ". $line . "; " if ($uploadContent{$file});
      }
    }
    $currentLine++;
  }
     $lastLine += $#file_lines;
  }

  my $msg1 = "";
  if( $beginline == $lastLine )
  {
    $msg1 = "Scanned line $beginline in $file."
  }
  else
  {
    $msg1 = "Scanned $file from line $beginline to $lastLine.";
  }
  $msg1 = "$file has been Rotated. $msg1" if $fileRotated;

  foreach my $patternHref (@patterns)
  {
    if (${$patternHref}{"matchCount"} > 0)
    {
      my $searchPattern = ${$patternHref}{"searchPattern"};
      my $ignorePattern = ${$patternHref}{"ignorePattern"};
      my $matchCount = ${$patternHref}{"matchCount"};
      my $msg;
      if (${$patternHref}{"matchCount"} > 1)
      {
        $msg = $msg1 . " Found $matchCount occurences of the pattern [$searchPattern]";
      }
      else
      {
        $msg = $msg1 . " Found 1 occurence of the pattern [$searchPattern]";
      }
      $msg .= " with ignore pattern [" . $ignorePattern ."]" if ${$patternHref}{"ignorePattern"};
      if ($uploadContent{$file})
      {
        $msg .= ";";
        $msg .= " " . ${$patternHref}{"matchedContent"};
        if (length($msg) > 1024)
        {
          my $msg2 = "; truncated ...";
          $msg = substr($msg, 0, 1024-length($msg2)) . $msg2;
        }
      }
      else
      {
        $msg .= ".";
      }
            my $expectedCollectionTime;
      if(!($previousTimestamp))
      {
          #This could happen if either there is no state file or if the fix for : 9852994 is applied for the first time
          $expectedCollectionTime = 0;
      }
      else
      {
          # Add twice the collection interval time to the last collected time stamp and compute the expectedCollection time.
          $expectedCollectionTime = str2time($previousTimestamp) + ($collInterval * 2);
      }

      my $collectionTime = str2time($timestamp_withoutTZ);

      #if the current collection time is less than expected collection time, then print the result or else skip the result.
      if ( ($collectionTime <= $expectedCollectionTime ) || $collInterval == 0 || $expectedCollectionTime == 0)
      {
          my $result="em_result=$file,$searchPattern,$ignorePattern,$timestamp,$matchCount,$msg";

          if (!(defined($isHost) && $isHost =~ /[yY][eE][sS]/))
          {
            # bug14214947 this section added for de-duplication stuff
            # --------------------------------------------------------
            my $dateTimeEndInSec = timelocal(localtime);
            my $dateTimeStartInSec = $dateTimeEndInSec - 1;

            my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($dateTimeStartInSec);
            my $dateTimeStart = sprintf("%.4d-%.2d-%.2d %.2d:%.2d:%.2d", $year+1900, $mon+1, $mday, $hour, $min, $sec);

            ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime($dateTimeEndInSec);
            my $dateTimeEnd = sprintf("%.4d-%.2d-%.2d %.2d:%.2d:%.2d", $year+1900, $mon+1, $mday, $hour, $min, $sec);
      
            if($matchCount <= 1)
            {
              $dateTimeStart = $dateTimeEnd;
            }

            $result=$result.",$dateTimeStart,$dateTimeEnd";
            #-----------------------------------------------------------
          }
         print "$result\n";
         EMD_PERL_DEBUG("$result");
      }
      else
      {
           #log the time stamps and skip the collection
           EMD_PERL_DEBUG("Current collection time stamp [$collectionTime in epoch seconds] is much greater than the scheduled collection time stamp [$expectedCollectionTime in epoch seconds]. This might be due to blackout or the agent might down. So, skipping this result");
      }
    }
  }

  $Flag{$file}{line} = $lastLine;
#  EMD_PERL_DEBUG("Next State[$file]: Line: $Flag{$file}{line}; Position: $Flag{$file}{position}; signatureLineCount: $Flag{$file}{signatureLineCount}; signature: $Flag{$file}{signature}");
}

#############################################################################
#---------------------sub getStateFileName---------------------------------
#############################################################################
sub getStateFileName 
{
  unless( exists $ENV{EMSTATE} or defined $ENV{EMSTATE} ) 
  {
    &raise_error_and_exit("The environment variable EMSTATE needs to be set in order to run parse-log1.pl",2);
  }

  if(!defined($targetGuid) || $targetGuid eq "" ) 
  {
    &raise_error_and_exit("The environment variable EM_TARGET_GUID needs to be set or target guid needs to be passed as an argument with the open -g in order to run parse-log1.pl",3);
  }
  return "$ENV{EMSTATE}"."$fpSep"."sysman"."$fpSep"."emd"."$fpSep"."state"."$fpSep"."parse-log-$targetGuid";
}

#############################################################################
#---------------------sub initFlags-----------------------------------
#############################################################################
sub initFlags 
{
  $ENV{EM_STATE_FILE} = getStateFileName();
  
  # Read the state line if we have one.
  unless( open STATE, "< $ENV{EM_STATE_FILE}" ) 
  {
#    EMD_PERL_DEBUG("$ENV{EM_STATE_FILE} file couldn't be opened"); 
    return;
  }
  my @stateLines = <STATE>;
  close STATE;

  foreach my $state (@stateLines)
  {
    chomp($state);
    if ($state) 
    {
      my @tokens = split ('\|\|\|', $state);
#      EMD_PERL_DEBUG("#tokens = $#tokens");

      if ($#tokens == 6)
      {
        my $file = $tokens[0];
        $Flag{$file}{line} = $tokens[1];
        $Flag{$file}{position} = $tokens[2];
        my @decodedLineCount = split('signature_line_count:',$tokens[3]);
        $previousTimestamp = $tokens[6];
        if ($#decodedLineCount == 1)
        {
          $Flag{$file}{signatureLineCount} = $decodedLineCount[1];
          $Flag{$file}{signature} = $tokens[4];
          $Flag{$file}{bytesRead} = $tokens[5];
        }
        else
        {
          #this can only happen from the old code which stored dev/inode info  
          #in $tokens[3] and $tokens[4]. Following code takes care of
          #potential file rotation when switching to the new md5 digest scheme.
          if ($statSupportsInode)
          {
            my $oldDev = $tokens[3];
            my $oldInode = $tokens[4];
            my $dev = -1;
            my $inode = -1;
            my @attr = stat($file);
            if ($#attr != -1)
            {
              $dev = $attr[0];
              $inode = $attr[1];
              if ($dev ne $oldDev ||
                  $inode ne $oldInode)
              {
                #file has been rotated, so set the old signature
                #to a default which would not match with the current
                #MD5 digest for the top 3 lines
                $Flag{$file}{position} = 0;
                $Flag{$file}{line} = 1;
                $Flag{$file}{signatureLineCount} = 1;
                $Flag{$file}{signature} = "SECRET";
                $Flag{$file}{bytesRead} = 0;
                EMD_PERL_DEBUG("$file rotated while switching to the md5 scheme for the first time");
              }
            }
          }
        }
#        EMD_PERL_DEBUG("Previous State[$file]: Line: $Flag{$file}{line}; Position: $Flag{$file}{position}; signatureLineCount: $Flag{$file}{signatureLineCount}; signature: $Flag{$file}{signature}");
      }
    }
    else
    {
      EMD_PERL_ERROR("Bad State: $state");
    }
  }
}
#############################################################################
#---------------------sub saveFlags-----------------------------------
#############################################################################
sub saveFlags
{
  my ($collInterval) = @_;

  EMD_PERL_DEBUG("_collInterval value received:".$collInterval);
  if( $collInterval == 0)
  {
    #Don't save the state if the collection is realtime
    EMD_PERL_DEBUG("This is a real time call. Skipping save state method");
    return; 
  }
  else
  {
    EMD_PERL_DEBUG("This is a default collection call. Saving the state");
  }

  # Save the state file.
  unless( open STATE, "> $ENV{EM_STATE_FILE}" ) 
  {
    EMD_PERL_ERROR("Unable to open state file: $ENV{EM_STATE_FILE}. [$!]"); 
    return;
  }

  foreach my $logFile (keys %Flag) 
  {
    chomp($logFile);
    if ($logFile && defined($GC{$logFile})) 
    {
      print STATE "$logFile|||$Flag{$logFile}{line}|||$Flag{$logFile}{position}|||signature_line_count:$Flag{$logFile}{signatureLineCount}|||$Flag{$logFile}{signature}|||$Flag{$logFile}{bytesRead}|||$timestamp_withoutTZ\n";
    }
  }
  close STATE;
}

#############################################################################
#-----------------------raise_error_and_exit--------------------      
#############################################################################
sub raise_error_and_exit()
{
  my ($message, $exit_status) = @_;
  EMD_PERL_ERROR($message);
  print STDERR "$message \n";
  exit $exit_status;
}

#############################################################################
#---------------------sub getSignatures -----------------------------------
# Inputs:
#  href having <dev_id,inode_id> pair keys
#  name of list-file
#############################################################################
sub getSignatures
{
  my ($signHref , $config_file, $customerFile) = @_;
  my @expandedPaths = getConfiguredFiles($config_file, $customerFile);
  
  #get the signature for each file, and collect them in signHref as keys
  foreach my $path (@expandedPaths)
  {
    if (-e $path)
    {
      my $signature = getSignature($path);
#      if ($customerFile)
#      {
#        EMD_PERL_DEBUG("external: $path: $signature");
#      }
#      else
#      {
#        EMD_PERL_DEBUG("internal: $path: $signature");
#      }
      ${$signHref}{$signature}=1;
    }
    else
    {
#      EMD_PERL_DEBUG("$path does not exist");
    }
  }
}

#############################################################################
#---------------------sub getConfiguredFiles -----------------------------
# Inputs:
#  $file - configuration file for log file monitoring
#  $customerFile - 1 if it is a customer file, else 0
#                
# Outputs:
#  @signatures - Array of file-names indicated by the configuration file
#############################################################################

sub getConfiguredFiles
{
  my ($config_file, $customerFile) = @_;
  my @configured_files = ();

  my $fileOpen = open (FILE, "<$config_file");
  if (!$fileOpen)
  {
    EMD_PERL_ERROR("Configuration file $config_file could not be opened\n");
    $fileOpenErrors.= ", $config_file" if($fileOpenErrors);
    $fileOpenErrors="Could not open file(s): $config_file" if(!$fileOpenErrors);
    return;
  } 
  my @configData = <FILE>;
  close FILE;

  foreach my $configLine ( @configData ) 
  {
    #escape leading and trailing whitespace, removing trailing new-line character, & ignoring comments
    $configLine =~ s/^\s+//;
    $configLine =~ s/\s+$//;

    

    chomp $configLine;
    next if($configLine =~ /^\#/);
    next if($configLine =~ /^$/);
    
    #if <EMSTATE> used, then replace it with its value
    if ($configLine =~ /^<EMSTATE>/)
    {
      my @lineParts = split("<EMSTATE>", $configLine);
      $configLine = $ENV{EMSTATE} . $lineParts[1];
    }
    
    #if <ORACLE_HOME> used, then replace it with its value
    if ($configLine =~ /^<ORACLE_HOME>/)
    {
      my @lineParts = split("<ORACLE_HOME>", $configLine);
      $configLine = $ENV{ORACLE_HOME} . $lineParts[1];
    }

    #On Windows, change / to \\ in the file path specification for oracle
    #shipped configuration files. Replacing with double backslash
    #ensures that wildcard % is not escaped on Windows platform.
    $configLine =~ s/\//\\\\/g
      if ($customerFile == 0 &&
          $os eq "WIN");

    if ($customerFile == 1)
    {
      if(-f $configLine || -l $configLine)
      {
        push (@configured_files, $configLine);
      }
      else
      {
        EMD_PERL_ERROR("$configLine is not a file or symbolic link, bad entry in $config_file");
      }
    }
    else
    {
      push(@configured_files, recursiveExpand($configLine));
    }
  }
  return @configured_files;
}

#############################################################################
#---------------------sub getSignature -----------------------------
# Inputs:
#  $path - absolute path of file/link/directory
#                
# Outputs:
#  @signature - Returns MD5 Digest of first 100 lines on Windows
#             - Returns <dev_id,inode_id> pair on UNIX platforms
#############################################################################

sub getSignature
{
  if ($os ne "LNX" && $os ne "SOL" && $os ne "WIN" && $os ne "HP" && $os ne "AIX" && $os ne "OSF1" && $os ne "MAC OS X") 
  {
    EMD_PERL_ERROR("Check if your OS supports unique stat based signature for symbolic and hard links");
    print "em_error=Check if your OS supports unique stat based signature for symbolic and hard links\n";
    exit 1;
  }

  my ($path) = @_;

#  print "getSignature: $path\n";

  if ($os eq "WIN")
  {
    # On Windows Platform, compute MD5 digest over the first 100 lines
    # of the given file. This digest would be used as the unique signature
    # for a given file. This approximation on Windows would lead to false
    # positives with respect to 2 files being equivalent. Such false
    # positives are highly unlikely to occur in real customer situations
    # with sensitive files.
    open FH, "< $path" || return $path;
    #my @lines = <FH>;
    my $firstHundredLines="";

    read(FH,$firstHundredLines,10240);
    close FH;
    #return $path if ($#lines == -1);
    
    #for (my $i = 0; $i < 100; $i++)
    #{
    #  $firstHundredLines .= $lines[$i];
    #  last if ($i == $#lines);
    #}

    my $signature = md5_hex($firstHundredLines);
#    EMD_PERL_DEBUG("getSignature: $path: $signature");
    return $signature;
  }
  else
  {
    my @attr = stat($path);
    return $path if ($#attr != 12);

    my $dev = $attr[0];
    my $inode_no = $attr[1];
    return "$dev,$inode_no";
  }
}

#############################################################################
#---------------------sub recursiveExpand -----------------------------
# Inputs:
#  path to expand
#                
# Outputs:
#  Returns list of files and symbolic links after recursive expansion
#############################################################################

sub recursiveExpand
{
  my @result = ();
  my ($configLine) = @_;
  my @expandedPaths;

  #Bug:10384753 - Do regular expression expansion only if there is any regex character('%' or '.' or '_' or '*') in the path. expandPath sub routine is bit time consuming as it handles all the regular expression expansion of the path. 
 #This is a performance improvement. If there is no regex character, then just recursive directory reading is enough.
 # if(no regex character in the list)
 #  {
 #    readdir
 #    if directory is found, then recurse.
 #    else - add the file entry to return array.
 #  }
 #  else
 #  {
 #    same old method.
 #  }
  
  if(!($configLine =~ /[\%|\*]/))
  {
     if(-e $configLine)
     {
       if(-d $configLine) # if its a dir, then read the content and recurse.
       {
         opendir(DH, $configLine);
         @expandedPaths = grep { !/^\.{1,2}$/ } readdir(DH);  #skips '.' and '..' entries in a directory.
         closedir(DH);

         @expandedPaths = map { $configLine . '/' . $_ } @expandedPaths;  
         # readdir will return only the file name and not the absolute path. The above statement will replace each file name with the absolute file path.
	 
        foreach my $path (@expandedPaths)
        {
          if (-d $path)
          {
            push (@result, recursiveExpand($path));
            next;
          }

          push (@result, $path) and
    	    next
             if (-f $path || -l $path);
        }
      }
      else
      {
         # if its a file, then just add the file entry to the return array.
         push (@result, $configLine);
      }
    }
  }
  #else if there is a regex character use expandPath subroutine. 
  else
  {
     @expandedPaths = expandPath($configLine);
  
     foreach my $path (@expandedPaths)
     {
        if (-d $path)
        {
           # NOTE: Add two $fpSep after $path, avoids 
           # wildcard % being escaped on Windows

           my $dir = "$path"."$fpSep"."$fpSep"."%";
#          EMD_PERL_DEBUG("recursiveExpand: $dir");
           push (@result, recursiveExpand($dir));
           next;
        }

        push (@result, $path) and
#        EMD_PERL_DEBUG("recursiveExpand: pushing $path") and
        next
          if (-f $path || -l $path);
     }
  }
  return @result;
}
