# 
# $Header: emas/sysman/admin/scripts/otd/pm/simpleXPath.pm /main/1 2012/04/15 03:56:27 rahgupta Exp $
#
# simpleXPath.pm
# 
# Copyright (c) 2004, 2012, Oracle and/or its affiliates. All rights reserved. 
#
#    NAME
#      simpleXPath.pm - Simple XPath implementation
#
#    DESCRIPTION
#       Wrapper around the XML::Parser module that implements a subset of XPath (as 
#       defined by W3C). The following syntax features are supported in query
#       strings:
#           /<tag name or *>                    select child nodes
#           //<tag name or *>                   select descendant nodes
#           ancestor::<tag name or *>           select ancestor nodes
#           ancestor-or-self::<tag name or *>   select ancestor nodes or self
#           [<number>]                          select a node by index
#           [@<attribute name>="<value>"]       select nodes by attribute value
#           [<tag name>="<value>"]              select nodes by the string values
#                                               of child nodes
#
#       Entry points:
#           simpleXPathQuery
#           simpleXPathQueryForText
#           simpleXPathQueryForNodes
#           ensuredef
#
#    NOTES
#      <other useful comments, qualifications, etc.>
#
#    MODIFIED   (MM/DD/YY)
#    vishagup    09/07/10 - WLS GC12 auto-discovery
#    jsmoler     11/19/08 - remove schema support
#    jsmoler     11/06/08 - some schema support
#    jsmoler     10/28/08 - support queries on pre-parsed files
#    jsmoler     07/18/07 - Backport jsmoler_bug-5736476_test_070712 from
#                           st_emasgc_10.2.0.1.0
#    jsmoler     12/28/06 - add some error checking
#    jsmoler     08/02/06 - Backport jsmoler_bug-5046383 from main
#    jsmoler     03/06/06 - add package statement 
#    jsmoler     10/07/04 - DTD support
#    jsmoler     10/04/04 - jsmoler_ecm01
#    jsmoler     08/18/04 - Creation
# 

package pm::simpleXPath;

use strict;
use XML::Parser;
use Exporter 'import';

our @EXPORT = qw(
    simpleXPathQuery
    simpleXPathQueryForText
    simpleXPathQueryForNodes
    simpleXPathQueryForTextNodes
    ensuredef
);

#
# Performs a query on an XML file. Returns the attribute maps for the
# result nodes.
#
# Arguments:
#   0: XML file name
#   1: XPath query string
#
# Returns: A list of hash references. Each hash reference corresponds to
#          one node in the query result. The hashes map attribute names
#          to values for each node. Nodes are in document order.
#
sub simpleXPathQuery #(for attributes)
{
    my @results = simpleXPathQueryForNodes(@_);
    @results = map { $$_{'attributes'} } @results;
    return @results;
}

#
# Performs a query on an XML file. Returns the attribute maps for the
# result nodes.
#
# Arguments:
#   0: Reference to the root XML node
#   1: XPath query string
#
# Returns: A list of hash references. Each hash reference corresponds to
#          one node in the query result. The hashes map attribute names
#          to values for each node. Nodes are in document order.
#
sub queryForAttributes
{
    my @results = queryForNodes(@_);
    @results = map { $$_{'attributes'} } @results;
    return @results;
}

#
# Performs a query on an XML file. Returns the string values of the result nodes.
#
# Arguments:
#   0: XML file name
#   1: XPath query string
#
# Returns: A list of strings. Each string is the string value (concatenated values
#          of all text descendants) of a result node. Nodes are in document order.
#
sub simpleXPathQueryForText
{
    my @results = simpleXPathQueryForNodes(@_);
    @results = map(getStringValue($_), @results);
    return @results;
}

#
# Performs a query on an XML file. Returns the string values of the result nodes.
#
# Arguments:
#   0: Reference to the root XML node
#   1: XPath query string
#
# Returns: A list of strings. Each string is the string value (concatenated values
#          of all text descendants) of a result node. Nodes are in document order.
#
sub queryForText
{
    my @results = queryForNodes(@_);
    @results = map(getStringValue($_), @results);
    return @results;
}

#
# Performs a query on an XML file. Returns a list of references to hashes 
# representing the result nodes.
#
# Arguments:
#   0: XML file name
#   1: XPath query string
#
# Returns: A list of references to hashes. Each hash is in the form generated
#          by buildTree, below. Nodes are in document order.
#
sub simpleXPathQueryForNodes
{
    my $fileName = shift(@_);
    my $rootNodeRef = parseFile($fileName);
    unshift(@_, $rootNodeRef);
    return queryForNodes(@_);
}

sub simpleXPathQueryForTextNodes
{
    my $fileName = shift(@_);
    my $rootNodeRef = parseText($fileName);
    unshift(@_, $rootNodeRef);
    return queryForNodes(@_);
}

#
# Parses an XML file that can be repeatedly queried.
#
# Arguments
#   0: XML file name
#
# Returns: A reference to a hash of the form generated by buildTree, below,
#          corresponding to the root element of the XML file.
#
sub parseFile
{
    my ( $fileName ) = @_;
    
    # Parse the file
    my $parser = new XML::Parser('Style' => 'Tree', # Generate a parse tree
                                 'ParseParamEnt'  => 1, # Use external DTDs
                                 'Handlers' => {
                                    'ExternEnt' => \&myExternEntHandler
                                 }
                                );
    my $parseResult = $parser->parsefile($fileName);
    
    # Build a nicer tree
    my $rootNodeRef = buildTree($parseResult);

    return $rootNodeRef;
}

sub parseText
{
    my ( $fileName ) = @_;

    # Parse the file
    my $parser = new XML::Parser('Style' => 'Tree', # Generate a parse tree
                                 'ParseParamEnt'  => 1, # Use external DTDs
                                 'Handlers' => {
                                    'ExternEnt' => \&myExternEntHandler
                                 }
                                );
    my $parseResult = $parser->parse($fileName);

    # Build a nicer tree
    my $rootNodeRef = buildTree($parseResult);

    return $rootNodeRef;
}


#
# Performs a query on an XML file. Returns a list of references to hashes 
# representing the result nodes.
#
# Arguments:
#   0: Reference to the root XML node
#   1: XPath query string
#
# Returns: A list of references to hashes. Each hash is in the form generated
#          by buildTree, below. Nodes are in document order.
#
sub queryForNodes
{
    my $rootNodeRef = $_[0];
    my $query = $_[1];
    
    my $query0 = $query;
    my @results = ( $rootNodeRef );
    
    # Check the root tag name against the start of the query
    my $n = nextIndex($query, "/", "[", "@");
    if ($n > 0)
    {
        my $tagName = substr($query, 0, $n);
        $query = substr($query, $n);
        if ($tagName ne $$rootNodeRef{'tagName'})
        {
            # Initial tag name in query does not match root node
            # tag name. Query has failed.
            return ();
        }
    }
    
    # Parse the rest of the query
    while (length($query) > 0)
    {
        if (@results == 0)
        {
            return ();
        }
    
        $n = nextIndex($query, "/", "[", "@");
    
        if ($n > 0)
        {
            my $predicate = substr($query, 0, $n);
            $query = substr($query, $n);
            
            if ($predicate =~ /^(.*)::(.*)$/)
            {
                my $axis = $1;
                my $tagName = $2;
                
                if ($axis eq 'ancestor')
                {
                    @results = map(getAncestors($_, $tagName), map { $$_{'parent'} } @results);
                }
                elsif ($axis eq 'ancestor-or-self')
                {
                    @results = map(getAncestors($_, $tagName), @results);
                }
                else
                {
                    die "Unknown axis: $axis";
                }
            }
            else
            {
                my $tagName = $predicate;
                @results = map(getChildren($_, $tagName), @results);
            }
        }
        elsif (substr($query, 0, 2) eq '//')
        {
            $query = substr($query, 2);
            $n = nextIndex($query, "/", "[", "@");
            my $tagName = substr($query, 0, $n);
            $query = substr($query, $n);
            
            @results = map(getDescendants($_, $tagName), @results);
        }
        else
        {
            my $char = substr($query, 0, 1);
            if ($char eq "/")
            {
                $query = substr($query, 1);
                next;
            }
            elsif ($char eq "[")
            {
                $n = index($query, "]");
                if ($n < 0)
                {
                    die "Expected ']' in query: $query0 $query";
                }
                my $expr = substr($query, 1, $n - 1);
                $query = substr($query, $n + 1);
                
                if ($expr =~ /^\@(.*)="(.*)"$/)
                {
                    # Expression is @<attribute name>="<value>"
                    my $attrName = $1;
                    my $attrVal = $2;
                    @results = filterOnAttribute(\@results, $attrName, $attrVal);
                }
                elsif ($expr =~ /^(.*)="(.*)"$/)
                {
                    # Expression is <tag name>="<value>"
                    my $tagName = $1;
                    my $stringVal = $2;
                    @results = filterOnStringValue(\@results, $tagName, $stringVal);
                }
                elsif ($expr =~ /^[0-9]+$/)
                {
                    # Expression is a numeric index
                    splice(@results, 0, $expr - 1);
                    splice(@results, 1);
                }
                else
                {
                    die "Error parsing query expression: $query0 $expr";
            }
            }
            else
            {
                die "Error parsing query: $query0 $query";
            }
        }
    }
    
    return @results;
}

#
# DTD resolver for XML::Parser
#
sub myExternEntHandler
{
    #my $externEnt = XML::Parser::lwp_ext_ent_handler(@_);
    # LWP handler not found, use file handler instead
    my $externEnt = XML::Parser::file_ext_ent_handler(@_);
    if (!defined($externEnt))
    {
        # The DTD failed to load. Proceed without it.
        return '';
    }
    return $externEnt;
}

#
# Given a set of delimiters, finds the first occurrence of one of them
# in a string.
#
# Arguments
#   0: A string.
#   1... Delimiters to search for.
#
# Returns: The index in the string of the first occurrence of one of the
#          delimiters, or the length of the string if none of the delimiters
#          occur.
#
sub nextIndex
{
    my $str = $_[0];
    my $n = length($str);

    splice(@_, 0, 1);
    for my $delim (@_)
    {
        my $m = index($str, $delim);
        if ($m >= 0 && $m < $n)
        {
            $n = $m;
        }
    }
    
    return $n;
}

#
# Retrieves all the children of a node with the specified tag name.
#
# Arguments
#   0: A reference to a hash describing a node, as returned by buildTree.
#   1: Tag name, or * to retrieve all children.
#
# Returns: A list of references to hashes representing the retrieved child
#          nodes. Hashes are in the form returned by buildTree.
#
sub getChildren
{
    my $nodeRef = $_[0];
    my $tagName = $_[1];
    
    my @result;

    my $childrenRef = $$nodeRef{'children'};
    for (my $i = 0; $i < @{$childrenRef}; $i++)
    {
        my $childRef = $$childrenRef[$i];
        if ($$childRef{'type'} eq 'element' && ($tagName eq "*" || $$childRef{'tagName'} eq $tagName))
        {
            push(@result, $childRef);
        }
    }
    return @result;
}

#
# Retrieves all descendents of a node with the specified tag name.
#
# Arguments
#   0: A reference to a hash representing a node, as returned by buildTree.
#   1: Tag name, or * to retrieve all descendants.
#
# Returns: A list of references to hashes representing the retrieved descendant
#          nodes. Hashes are in the form returned by buildTree.
#
sub getDescendants
{
    my $nodeRef = $_[0];
    my $tagName = $_[1];
    
    my @result;
    
    getDescendantsImpl($nodeRef, $tagName, \@result);

    return @result;
}

#
# Utility method for getDescendants.
#
# Arguments
#   0: Reference to a hash representing a node, as returned by buildTree.
#   1: Tag name, or *.
#   2: Reference to a list in which to place results.
#
sub getDescendantsImpl
{
    my $nodeRef = $_[0];
    my $tagName = $_[1];
    my $resultsRef = $_[2];

    my $childrenRef = $$nodeRef{'children'};
    for (my $i = 0; $i < @{$childrenRef}; $i++)
    {
        my $childRef = $$childrenRef[$i];
        if ($$childRef{'type'} eq 'element')
        {
            if ($tagName eq "*" || $$childRef{'tagName'} eq $tagName)
            {
                push(@{$resultsRef}, $childRef);
            }
            getDescendantsImpl($childRef, $tagName, $resultsRef);
        }
    }
}

#
# Retrieves all ancestors of a node with the specified tag name.
#
# Arguments
#   0: Reference to a hash representing a node, as returned by buildTree.
#   1: Tag name, or * to retrieve all ancestors.
#
# Returns: A list of references to hashes representing the retrieved ancestor
#          nodes. Hashes are in the form returned by buildTree.
#
sub getAncestors
{
    my $nodeRef = $_[0];
    my $tagName = $_[1];
    
    my @results;
    
    while ($nodeRef && $$nodeRef{'type'} eq 'element')
    {
        if ($tagName eq '*' || $tagName eq $$nodeRef{'tagName'})
        {
            unshift(@results, $nodeRef);
        }
        $nodeRef = $$nodeRef{'parent'};
    }
    
    return @results;
}

#
# Retrieves a named attribute from a node.
#
# Arguments
#   0: Reference to a hash representing a node, as returned by buildTree.
#   1: Attribute name.
#
# Returns: The value of the attribute, or the empty string if the attribute is
#          not defined for the node.
#
sub getAttribute
{
    my $nodeRef = $_[0];
    my $attrName = $_[1];
    
    my $attrVal = ${$$nodeRef{'attributes'}}{$attrName};
    if ($attrVal)
    {
        return $attrVal;
    }
    return "";
}

#
# Filters a list of nodes, keeping only nodes for which a particular attribute
# has a particular value.
#
# Arguments
#   0: Reference to list of references to hashes representing nodes, as returned
#      by buildTree.
#   1: The name of the attribute to filter on.
#   2: The desired value of the attribute.
#
# Returns: A filtered copy of the input node list.
#
sub filterOnAttribute
{
    my $listRef = $_[0];
    my $attrName = $_[1];
    my $attrVal = $_[2];
    my @results;
    
    for my $nodeRef (@{$listRef})
    {
        if (getAttribute($nodeRef, $attrName) eq $attrVal)
        {
            push(@results, $nodeRef);
        }
    }
    
    return @results;
}

#
# Calculates the string value of a node, as defined by the W3C. The string
# value of a node is the concatenated values of its text descendants.
#
# Arguments
#   0: A reference to a hash representing a node, as returned by buildTree.
#
# Returns: The computed string value of the node.
#
sub getStringValue
{
    my $nodeRef = $_[0];
    
    if (${$nodeRef}{'type'} eq 'text')
    {
        return ${$nodeRef}{'text'};
    }
    
    my @childText = map(getStringValue($_), @{$$nodeRef{'children'}});
    return join('', @childText);
}

#
# Filters a list of nodes, keeping only nodes that have a child with a particular
# tag name and string value.
#
# Arguments
#   0: Reference to list of references to hashes representing nodes, as returned
#      by buildTree.
#   1: Tag name, or *.
#   2: The desired string value.
#
# Returns: A filtered copy of the input node list.
#
sub filterOnStringValue
{
    my $listRef = $_[0];
    my $tagName = $_[1];
    my $stringVal = $_[2];
    my @results;
    
    for my $nodeRef (@{$listRef})
    {
        my @children = getChildren($nodeRef, $tagName);
        for my $childRef (@children)
        {
            my $childStringVal = getStringValue($childRef);
            if ($childStringVal eq $stringVal)
            {
                push(@results, $nodeRef);
            }
        }
    }
    
    return @results;
}

#
# Converts the output of XML::Parser, run in 'Tree' mode, into a nicer tree.
# The nodes of the result tree are hash references. Each node has a 'parent'
# reference, and a 'type'. The type is either 'text' for text nodes, or
# 'element' for element nodes. Text nodes have a 'text' attribute, which is a string.
# Text nodes have no children. Element nodes have an 'attributes' attribute that
# refers to a hash of the attributes for the element node, and a 'children' attribute
# that refers to a list of references to the child nodes of the element.
#
# Arguments
#   0: A reference to the output of XML::Parser, run in 'Tree' mode.
#   1: A reference to a node to use as the parent of the root node.
#
# Returns: A nice tree, as described above.
#
sub buildTree
{
    my $parseResult = $_[0];
    my $parent = $_[1];
    
    my $tagName = $$parseResult[0];
    
    if ($tagName eq "0")
    {
        my $nodeRef = { 'parent' => $parent,
                        'type' => 'text',
                        'text' => $$parseResult[1] };
        return $nodeRef;
    }
    else
    {
        my $attrHashRef = $$parseResult[1][0];

        my @children;
        
        my $nodeRef = { 'parent' => $parent,
                        'type' => 'element',
                        'tagName' => $tagName,
                        'attributes' => $attrHashRef,
                        'children' => \@children };
        
        my $childrenRef = $$parseResult[1];
        for (my $i = 1; $i < @{$childrenRef}; $i += 2)
        {
            my $childRef1 = [$$childrenRef[$i], $$childrenRef[$i + 1]];
            my $childRef = buildTree($childRef1, $nodeRef);
            push(@children, $childRef);
        }
        
        return $nodeRef;
    }
}

#
# Utility method to guarantee that a value is defined.
#
# Arguments
#   0: A value.
#
# Returns: The empty string if the value is not defined, otherwise the value.
#
sub ensuredef
{
    my $str = $_[0];
    if (!defined($str))
    {
        return '';
    }
    return $str;
}

return 1;
