package INSTALL::chiliXML; use INSTALL::common; use INSTALL::os; use INSTALL::query; use strict; # there are 5 levels of debugging here. # keep increasing the level number here for more verbose output. my $debug = 0; # # This module is a wrapper module for parsing the XML files. Perl provides # a generic XML parser but this is dependent on James Clark's XML Parser # library. So for this generic XML parser to work, we need to have this # expat parser library in the library runtime path. # # To overcome this , we also provide a limited scope version of XML parser, # which considers the XML file as a tag based representation of data in # a tree format. # # If CONST_UseChiliXMLParser flag is set, then we will use Chili's XML Parser # or else we will the Generic XML Parser. my $CONST_UseChiliXMLParser = 1 ; my $CONST_UseGenericXMLParser= (! $CONST_UseChiliXMLParser ); # Make sure that both these flags are NOT set to 1. We do NOT support this. die "Error in Package Configuration:Both Chili's XML and Clark's XML parser cannot be used at a same time.\n" if ($CONST_UseChiliXMLParser && $CONST_UseGenericXMLParser) ; my $pGlobals = \%INSTALL::global::variables; my $pParameters = \%INSTALL::parameter::variables; my $pOS = \%INSTALL::os::Details; #=============================================================================== # Collection of Wrapper XML Parser routines exported to the caller. #=============================================================================== #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # External functions #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #------------------------------------------------------------------------------- # This routine is responsible for parsing the given XML file and then building # a XML tree out of it. #------------------------------------------------------------------------------- sub build_XmlTree { my ($xml_file) = (@_ ); my $routineName = "build_XmlTree" ; # validate the parameters. ($#_ = 1) || die "Insufficient parameter passed for routine $routineName.\n"; unless (-f "$xml_file") { error("Error in $routineName:Input file $xml_file does NOT exist."); return (undef); } # Initialize the variables here. my $pXmlRootNode; if ($CONST_UseGenericXMLParser && (& __perl_configure_forPerlXml)) { require XML::Parser ; require XML::SimpleObject ; # We need to parse this XML based on the root variable and # store these parsed contents with respect to these root # variables. my $parser = new XML::Parser (ErrorContext => 2, Style => "Tree"); $pXmlRootNode = new XML::SimpleObject($parser->parsefile($xml_file)) ; } else { # Initialize the XML Parser Data-Structure. my %xmlParsedInfo = (); open(XML,"$xml_file") || die("Cannot open $xml_file for reading.\nReason: $!"); my @data = ; close(XML); my $data = join('',@data); unless (($pXmlRootNode = __chili_xmlParser(\$data,\%xmlParsedInfo))) { error("Error in $routineName:XML Parser failed"); return (undef); } } return ($pXmlRootNode); } #------------------------------------------------------------------------------- # This routine takes a node as input and goes through all its children and # compares the elementId of the children node with the given elementId. # If a element is found, then it is pushed onto the stack. # Input : node from the XML Parsed tree whose children needs to be queried. # Output: returns undef on error or a list containing the list of nodes. #------------------------------------------------------------------------------- sub queryElement_XmlNode { my ($elementId,$pCurNode) = (@_); my $routineName = "queryElement_XmlTree" ; ($#_ = 2) || die "Insufficient parameters given for routine $routineName.\n"; unless ($elementId) { error ("Error in $routineName:Element Id cannot be a null value."); return (undef); } unless (ref($pCurNode) eq "HASH") { error("Error in $routineName: Input Node must be a reference to a HASH"); return (undef); } if ($CONST_UseGenericXMLParser) { return ($pCurNode->child($elementId)) ; } else { return (__chili_queryElement($elementId,$pCurNode)); } return (undef); } #------------------------------------------------------------------------------- # This routine takes a node as input and goes through all its children and # compares the elementId of the children node with the given elementId. # If a element is found, then it is pushed onto the stack. # Input : node from the XML Parsed tree whose children needs to be queried. # Output: returns undef on error or a list containing the list of nodes. #------------------------------------------------------------------------------- sub queryAllElements_XmlNode { my ($elementId,$pCurNode) = (@_); my $routineName = "queryAllElements_XmlNode" ; ($#_ = 2) || die "Insufficient parameters given for routine $routineName.\n"; unless ($elementId) { error ("Error in $routineName:Element Id cannot be a null value."); return (undef); } unless (ref($pCurNode) eq "HASH") { error("Error in $routineName: Input Node must be a reference to a HASH"); return (undef); } if ($CONST_UseGenericXMLParser) { return (\$pCurNode->children($elementId)) } else { return (__chili_queryAllElements($elementId,$pCurNode)); } return (undef); } #=============================================================================== # Collection of internal functions to support chili's XML and Clark's XML Parser #=============================================================================== #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Internal functions #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #=============================================================================== # Collection of routines to support Chili's XML Parser. #=============================================================================== #=============================================================================== # Data structure description: # # 1.In this module,we use a $pXmlParsedInfo ( a reference to a hash structure ) # which contains the parsed XML information in a tree format. # This node contains the following keys:- # my %xmlParsedInfo = # ( # "elementId" => "element name" ( type = scalar ) # "elementAttrs" => "element attributes list" ( type = ref to a hash ) # "children" => "element children list" ( type = ref to a list ) # "child_names" => "name of children list" ( type = ref to a list ) # "parent" => "element's parent node" ( type = ref to a hash ) # ); #=============================================================================== #=============================================================================== # Series of Helper routines to operate upon the nodes which contains the parsed # XML information in a tree format. #=============================================================================== #------------------------------------------------------------------------------- # This routine is responsible for adding a child node to a given parent node. # Input: It takes a reference of a parent and child node. the child node will # be added to the children attributes of the parent node. # Output: returns 1 on success and undef on error. #------------------------------------------------------------------------------- sub __chili_addChild { my ($pParentNode,$pChildNode) = (@_); my $routineName = "__chili_addChild" ; ($#_ = 2) || die "Insufficient parameters given for routine $routineName.\n"; unless (ref($pParentNode) eq "HASH") { error("Error in $routineName:parentNode node must be a reference to a HASH"); return (undef); } unless (ref($pChildNode) eq "HASH") { error("Error in $routineName:childNode node must be a reference to a HASH"); return (undef); } # add a child either a new or update to the list. my @children ; my $pChildren; if (exists $pParentNode->{children}) { $pChildren = $pParentNode->{children}; } # already at least a child exists to this node. if ($pChildren) { push @{$pChildren},$pChildNode ; } else { push @children,$pChildNode ; $pChildren=\@children ; } # add the child names here. if ($pParentNode->{child_names}) { push @{$pParentNode->{child_names}},$pChildNode->{elementId} ; } else { my @name_list ; push @name_list,$pChildNode->{elementId} ; $pParentNode->{'child_names'} = \@name_list ; } # update the link. $pParentNode->{children} = $pChildren ; $pChildNode->{parent} = $pParentNode; return (1); } #------------------------------------------------------------------------------- # This routine is responsible for adding children nodes to a given parent node. # It iteratively add all the children in the pChildrenNodes to the children # key of the parent Node. # Input: It takes a reference of a parent and to a children node. # be added to the children attributes of the parent node. # Output: returns 1 on success and undef on error. #------------------------------------------------------------------------------- sub __chili_addChildren { my ($pParentNode,$pChildrenNode) = (@_); my $routineName = "__chili_addChildren" ; ($#_ = 2) || die "Insufficient parameters given for routine $routineName.\n"; unless (ref($pParentNode) eq "HASH") { error("Error in $routineName:parentNode node must be a reference to a HASH"); return (undef); } unless (ref($pChildrenNode) eq "ARRAY") { error("Error in $routineName:childrenNode node must be a reference to a HASH"); return (undef); } # add a child either a new or update to the list. my @children ; my $pChildren; if (exists $pParentNode->{children}) { $pChildren = $pParentNode->{children}; } # already at least a child exists to this node. if ($pChildren) { push @{$pChildren},@{$pChildrenNode} ; } else { push @children,@{$pChildrenNode} ; $pChildren=\@children ; } # update the link. $pParentNode->{children} = $pChildren ; map { $_->{"parent"} = $pParentNode } @{$pChildrenNode} ; return (1); } #------------------------------------------------------------------------------- # This routine takes a node as input and goes through all its children and # compares the elementId of the children node with the given elementId. # If a element is found, then it is pushed onto the stack. # Input : node from the XML Parsed tree. # Output: returns undef on error or a list containing the list of nodes. #------------------------------------------------------------------------------- sub __chili_queryElement { my ($elementId,$pCurNode) = (@_); my $routineName = "__chili_queryElement" ; ($#_ = 2) || die "Insufficient parameters given for routine $routineName.\n"; unless ($elementId) { error ("Error in $routineName:Element Id cannot be a null value."); return (undef); } unless (ref($pCurNode) eq "HASH") { error ("Error in $routineName:current Node must be a reference to a HASH"); return (undef); } unless (exists $pCurNode->{children} || $pCurNode->{children}) { error ("Error in $routineName:node does not have any children"); return (undef); } my @children = @{$pCurNode->{children}}; my @list ; foreach my $pChild (@children) { print "Element name is $pChild->{elementId}\n" if ($debug > 1) ; if ($pChild->{elementId} eq $elementId) { print "Element found\n" if ($debug) ; push @list,$pChild ; } } return (undef) unless (@list) ; (wantarray) ? return(@list) : return ($list[0]) ; } #------------------------------------------------------------------------------- # This routine takes a node as input and goes through all its children and # compares the elementId of the children node with the given elementId. # If a element is found, then it is pushed onto the stack. # Input : node from the XML Parsed tree. # Output: returns undef on error or a list containing the list of nodes. #------------------------------------------------------------------------------- sub __chili_queryAllElements { my ($elementId,$pCurNode) = (@_); my $routineName = "__chili_queryElement" ; ($#_ = 2) || die "Insufficient parameters given for routine $routineName.\n"; unless ($elementId) { error ("Error in $routineName:Element Id cannot be a null value."); return (undef); } unless (ref($pCurNode) eq "HASH") { error ("Error in $routineName:current Node must be a reference to a HASH"); return (undef); } unless (exists $pCurNode->{children} || $pCurNode->{children}) { error ("Error in $routineName:node does not have any children"); return (undef); } my @children = @{$pCurNode->{children}}; my @list ; foreach my $pChild (@children) { print "Element name is $pChild->{elementId}\n" if ($debug > 1) ; if ($pChild->{elementId} eq $elementId) { print "Element found\n" if ($debug) ; push @list,$pChild ; } } return (undef) unless (@list) ; return(\@list) } #=============================================================================== # Series of Helper routines to parse a given XML input string and construct # in a tree format where the root element is the root child in the tree. #=============================================================================== #------------------------------------------------------------------------------- # This routine can parse a string with key=value pair and returns the # hash reference to this list of attributes. #------------------------------------------------------------------------------- sub __chili_parseElement_Attributes { my $pSrc = shift ; my $routineName = "__chili_parseElement_Attributes" ; ($#_ = 1) || die "Error in $routineName:Insufficient parameter called.\n"; unless (ref($pSrc) eq "SCALAR") { error ("Error in $routineName:reference to the input string is expected here.\n"); return (undef); } my %attrs ; my @attrs ; push (@attrs,$1),push (@attrs,$2) while ( $$pSrc =~ m { (?:.*?) # Match everything ([\'\"\/a-zA-Z0-9_\-\.]+) # until a valid key is found. (?:\s*=\s*) # and followed by = ([\'\"\/a-zA-Z0-9_\-\.\\]+) # and followed by another valid value. }gcx ); (%attrs) = (@attrs) ; return (\%attrs); } #------------------------------------------------------------------------------- # This routine is the main XML parser engine which does the complete XML parsing # and updates the parsed information in a tree format. The output of this # routine will be a reference to a hash which will contain all the sub-elements # as its children, where the root child will the root tag in the XML file. # It first matches for the root tag and then for all valid tags as its children. # If a tag can have children, then it forks them and separately to collect # the list of children. # Parse either till the end of string or something like that. # Input:takes a reference to a string which should contain the XML string to # be parsed. and a reference to a node which will contain the complete list # of parsed informations in a tree format. # returns root node on success and undef on error. #------------------------------------------------------------------------------- sub __chili_xmlParser { # get the reference of the source and the parent node. #my $pData = shift ; #my $pParentNode = shift ; my ($pData,$pParentNode) = (@_); my $routineName = "__chili_xmlParser" ; # validate these parameters. ($#_ = 2) || die "Insufficient parameters given for routine $routineName.\n"; unless (ref($pData) eq "SCALAR") { error("Error in $routineName: XML input string must be a reference to a SCALAR", "This input variable will contain the input string which you want to parse."); return (undef); } unless (ref($pParentNode) eq "HASH") { error("Error in $routineName: the parent node must be a reference to a HASH.", "This node will contain the complete XML Parsed informations in a tree format."); return (undef); } my $data = $$pData ; my %xmlRootNode ; PARSER: { ($data =~ /\s*<([^\?!][a-zA-Z0-9\-_]+)\s+(?=(.*?)(\/>|>))/gcs) && do { my $element = $1 ; my $attributes = $2; my $tagValue = $3 ; my $pElementAttrs= __chili_parseElement_Attributes(\$attributes) ; # let us print the tags and attributes. if ($debug) { print "tag is $element\n"; foreach my $key (keys %{$pElementAttrs}) { print "key is $key and value is $pElementAttrs->{$key}\n"; } print "\n"; } # at this point, we have a valid XML element and a valid attribute # list. put this together is a list format and add this node in # the XML Parsed tree. my %xmlCurNode ; if ($element && keys %{$pElementAttrs}) { $xmlCurNode{"elementId"} = $element ; $xmlCurNode{"elementAttrs"} = $pElementAttrs; } unless (__chili_addChild($pParentNode,\%xmlCurNode)) { error ("Error in $routineName: Unable to add a parsed XML Node to the Tree."); return (undef); } if ($tagValue eq "/>") { # tags of this type are considered to be child of some parent. # we have nothing to do here. } elsif ($tagValue eq ">") { # tags of this type are considered to be the parent to some child. if ($data =~ /\G\s*(?:(.*?)<\/$element>)/gcs) { my $gobble = $1 ; unless (__chili_xmlParser(\$gobble,\%xmlCurNode)) { error ("Error in $routineName: Unable to recursively parse XML informations."); if ($debug == 5) { error("Error in $routineName: Current XML parsed text is :\n$gobble\n"); } return (undef); } # at this moment the recursive call must have finished. # now this node must be the root node,inform the caller about it. %xmlRootNode = %xmlCurNode ; } } redo PARSER; }; } return (\%xmlRootNode); } #=============================================================================== # Collection of routines to support Perl's Generic XML Parser. #=============================================================================== #------------------------------------------------------------------------------ # This routine is responsible for setting up the perl include paths and the # runtime library informations so that perl and its dependent EXPAT library # can be loaded dynamically at runtime. # It needs a root perl installation location under which it looks for the # perl libraries and its dependent Expat Parser libraries. # If no such files are not found, it looks for the libraries only under # /usr/local/lib location and no where else. # returns undef on error. #------------------------------------------------------------------------------ sub __perl_setupEnv_forPerlXml { #----------------------------------------------------------------------------- # This is a generic routine to locate a file under a directory. If the file # is found at more than one location, it is returned as a list. returns undef # if the file is not found under the given directory. #----------------------------------------------------------------------------- sub locate_file { my ($rootdir,$filename) = @_; my $routineName = "locate_file" ; ($#_ != 2) || die "Invalid number of arguments passed for routine $routineName."; my @list = (); sub isDir { ($#_ == 0) || die "Invalid number of arguments."; my ($dir) = @_; return((-d $dir) && (not -l $dir)); } if (-f "$rootdir/$filename") { my $located_file = INSTALL::common::realpath("$rootdir/$filename"); push @list,INSTALL::common::dirname("$located_file"); return (@list); } else { # Make sure we match a subdirectory with the name apache in it, for # completeness. local *ROOT; if (opendir(ROOT,"$rootdir")) { my @subfiles = readdir(ROOT); closedir(ROOT); foreach (@subfiles) { if ((($_ ne ".") && ($_ ne "..")) && (isDir("$rootdir/$_"))) { push @list,(locate_file("$rootdir/$_",$filename)); } } } return(@list); } } ### Start of setup_perlenv routine. my ($lib_rootdir) = (@_) ; my $routineName = "__perl_setup_perlenv" ; ($#_ == 0) || die "Insufficient parameters for routine $routineName.\n"; return (undef) unless (-d $lib_rootdir) ; # we need to find the location of libexpat.so under the lib root dir. # if this is found, then export this directory to the environment or # look in the /usr/local/lib directory. my $expat_libname = "libexpat.$pGlobals->{lib_ext}" ; my @expat_libdir = locate_file($lib_rootdir,$expat_libname) ; my $status = undef ; unless (@expat_libdir) { $lib_rootdir = "/usr/local/lib" ; @expat_libdir = locate_file($lib_rootdir,$expat_libname) ; } if (@expat_libdir) { INSTALL::common::libpath_append(@expat_libdir); $status = 1; } return ($status); } #------------------------------------------------------------------------------ # Its functionality is limited to find out whether the given perl location # can support Perl. This routine returns 1 if XML is supported or returns undef #------------------------------------------------------------------------------ sub __perl_querySupport_forPerlXml { my $perl = (@_ ); my $routineName = "__perl_querySupport_forPerlXml" ; ($#_ == 0) || die "Invalid number of arguments for routine $routineName.\n"; unless ($perl) { error ("Error in $routineName: Perl executable name cannot be a null."); return (undef); } my $pid = fork ; if (not $pid) { # Child. exec("$perl -e 'require XML::Parser' 2>&1 /dev/null"); exit(1); } else { wait; if ($?) { return (undef); } else { return (1); } } } #------------------------------------------------------------------------------ # This routine is a main wrapper for configuring the Perl's expat parser. # It does this by first making sure that the detected perl version has the # perl support enabled into it and it has the libexpat.so in the runtime # path location.. If all of these has succeded then we return 1 otherise undef. #------------------------------------------------------------------------------ sub __perl_configure_forPerlXml { my $perl = $pGlobals->{perl} ; my $perl_binary = "$pGlobals->{perl_binary}" ; my $perl_rootdir ; # we dont have anything to do here. if (not $CONST_UseGenericXMLParser) { return 1; } if (not __perl_querySupport_forPerlXml($perl)) { return (undef); } # find out if this detected perl has the standard distribution structure. # If not we need to look at the INC dirs in the perl binary. my @perl_locations = (); my @chiliperl_searchdirs = ( "$pGlobals->{tools_lib_dir}", "$pGlobals->{tools_bin_dir}" ); foreach (@chiliperl_searchdirs) { push @perl_locations,"$_" if (-d "$_") ; } unless (@perl_locations) { # well this looks like a standard distribution, look at the inc dirs # and see if my @incdir = @INC ; foreach (@incdir) { next unless ( -d "$_" ); push @perl_locations,"$_" ; } } foreach (@perl_locations) { if (__perl_setupEnv_forPerlXml("$_")) { last ; } } } 1;