diff options
Diffstat (limited to 'chromium/third_party/cygwin/lib/perl5/vendor_perl/5.10/i686-cygwin/XML/LibXML/Parser.pod')
-rw-r--r-- | chromium/third_party/cygwin/lib/perl5/vendor_perl/5.10/i686-cygwin/XML/LibXML/Parser.pod | 683 |
1 files changed, 0 insertions, 683 deletions
diff --git a/chromium/third_party/cygwin/lib/perl5/vendor_perl/5.10/i686-cygwin/XML/LibXML/Parser.pod b/chromium/third_party/cygwin/lib/perl5/vendor_perl/5.10/i686-cygwin/XML/LibXML/Parser.pod deleted file mode 100644 index e8aa32cf449..00000000000 --- a/chromium/third_party/cygwin/lib/perl5/vendor_perl/5.10/i686-cygwin/XML/LibXML/Parser.pod +++ /dev/null @@ -1,683 +0,0 @@ -=head1 NAME - -XML::LibXML::Parser - Parsing XML Data with XML::LibXML - -=head1 SYNOPSIS - - - - use XML::LibXML; - my $parser = XML::LibXML->new(); - - my $doc = $parser->parse_string(<<'EOT'); - <some-xml/> - EOT - my $fdoc = $parser->parse_file( $xmlfile ); - - my $fhdoc = $parser->parse_fh( $xmlstream ); - - my $fragment = $parser->parse_xml_chunk( $xml_wb_chunk ); - - $parser = XML::LibXML->new(); - $doc = $parser->parse_file( $xmlfilename ); - $doc = $parser->parse_fh( $io_fh ); - $doc = $parser->parse_string( $xmlstring); - $doc = $parser->parse_html_file( $htmlfile, \%opts ); - $doc = $parser->parse_html_fh( $io_fh, \%opts ); - $doc = $parser->parse_html_string( $htmlstring, \%opts ); - $fragment = $parser->parse_balanced_chunk( $wbxmlstring ); - $fragment = $parser->parse_xml_chunk( $wbxmlstring ); - $parser->process_xincludes( $doc ); - $parser->processXIncludes( $doc ); - $parser->parse_chunk($string, $terminate); - $parser->start_push(); - $parser->push(@data); - $doc = $parser->finish_push( $recover ); - $parser->validation(1); - $parser->recover(1); - $parser->recover_silently(1); - $parser->expand_entities(0); - $parser->keep_blanks(0); - $parser->pedantic_parser(1); - $parser->line_numbers(1); - $parser->load_ext_dtd(1); - $parser->complete_attributes(1); - $parser->expand_xinclude(1); - $parser->load_catalog( $catalog_file ); - $parser->base_uri( $your_base_uri ); - $parser->gdome_dom(1); - $parser->clean_namespaces( 1 ); - $parser->no_network(1); - -=head1 PARSING - -A XML document is read into a data structure such as a DOM tree by a piece of -software, called a parser. XML::LibXML currently provides four different parser -interfaces: - - -=over 4 - -=item * - -A DOM Pull-Parser - - - -=item * - -A DOM Push-Parser - - - -=item * - -A SAX Parser - - - -=item * - -A DOM based SAX Parser. - - - -=back - - -=head2 Creating a Parser Instance - -XML::LibXML provides an OO interface to the libxml2 parser functions. Thus you -have to create a parser instance before you can parse any XML data. - -=over 4 - -=item B<new> - - $parser = XML::LibXML->new(); - -There is nothing much to say about the constructor. It simply creates a new -parser instance. - -Although libxml2 uses mainly global flags to alter the behaviour of the parser, -each XML::LibXML parser instance has its own flags or callbacks and does not -interfere with other instances. - - - -=back - - -=head2 DOM Parser - -One of the common parser interfaces of XML::LibXML is the DOM parser. This -parser reads XML data into a DOM like data structure, so each tag can get -accessed and transformed. - -XML::LibXML's DOM parser is not only capable to parse XML data, but also -(strict) HTML files. There are three ways to parse documents - as a string, as -a Perl filehandle, or as a filename/URL. The return value from each is a L<<<<<< XML::LibXML DOM Document Class|XML::LibXML DOM Document Class >>>>>> object, which is a DOM object. - -All of the functions listed below will throw an exception if the document is -invalid. To prevent this causing your program exiting, wrap the call in an -eval{} block - -=over 4 - -=item B<parse_file> - - $doc = $parser->parse_file( $xmlfilename ); - -This function parses an XML document from a file or network; $xmlfilename can -be either a filename or an URL. Note that for parsing files, this function is -the fastest choice, about 6-8 times faster then parse_fh(). - - -=item B<parse_fh> - - $doc = $parser->parse_fh( $io_fh ); - -parse_fh() parses a IOREF or a subclass of IO::Handle. - -Because the data comes from an open handle, libxml2's parser does not know -about the base URI of the document. To set the base URI one should use -parse_fh() as follows: - - - - my $doc = $parser->parse_fh( $io_fh, $baseuri ); - - -=item B<parse_string> - - $doc = $parser->parse_string( $xmlstring); - -This function is similar to parse_fh(), but it parses a XML document that is -available as a single string in memory. Again, you can pass an optional base -URI to the function. - - - - my $doc = $parser->parse_string( $xmlstring, $baseuri ); - - -=item B<parse_html_file> - - $doc = $parser->parse_html_file( $htmlfile, \%opts ); - -Similar to parse_file() but parses HTML (strict) documents; $htmlfile can be -filename or URL. - -An optional second argument can be used to pass some options to the HTML parser -as a HASH reference. Possible options are: Possible options are: encoding and -URI for libxml2 < 2.6.27, and for later versions of libxml2 additionally: -recover, suppress_errors, suppress_warnings, pedantic_parser, no_blanks, and -no_network. - - -=item B<parse_html_fh> - - $doc = $parser->parse_html_fh( $io_fh, \%opts ); - -Similar to parse_fh() but parses HTML (strict) streams. - -An optional second argument can be used to pass some options to the HTML parser -as a HASH reference. Possible options are: encoding and URI for libxml2 < -2.6.27, and for later versions of libxml2 additionally: recover, -suppress_errors, suppress_warnings, pedantic_parser, no_blanks, and no_network. -Note: encoding option may not work correctly with this function in libxml2 < -2.6.27 if the HTML file declares charset using a META tag. - - -=item B<parse_html_string> - - $doc = $parser->parse_html_string( $htmlstring, \%opts ); - -Similar to parse_string() but parses HTML (strict) strings. - -An optional second argument can be used to pass some options to the HTML parser -as a HASH reference. Possible options are: encoding and URI for libxml2 < -2.6.27, and for later versions of libxml2 additionally: recover, -suppress_errors, suppress_warnings, pedantic_parser, no_blanks, and no_network. - - - -=back - -Parsing HTML may cause problems, especially if the ampersand ('&') is used. -This is a common problem if HTML code is parsed that contains links to -CGI-scripts. Such links cause the parser to throw errors. In such cases libxml2 -still parses the entire document as there was no error, but the error causes -XML::LibXML to stop the parsing process. However, the document is not lost. -Such HTML documents should be parsed using the I<<<<<< recover >>>>>> flag. By default recovering is deactivated. - -The functions described above are implemented to parse well formed documents. -In some cases a program gets well balanced XML instead of well formed documents -(e.g. a XML fragment from a Database). With XML::LibXML it is not required to -wrap such fragments in the code, because XML::LibXML is capable even to parse -well balanced XML fragments. - -=over 4 - -=item B<parse_balanced_chunk> - - $fragment = $parser->parse_balanced_chunk( $wbxmlstring ); - -This function parses a well balanced XML string into a L<<<<<< XML::LibXML's DOM L2 Document Fragment Implementation|XML::LibXML's DOM L2 Document Fragment Implementation >>>>>>. - - -=item B<parse_xml_chunk> - - $fragment = $parser->parse_xml_chunk( $wbxmlstring ); - -This is the old name of parse_balanced_chunk(). Because it may causes confusion -with the push parser interface, this function should not be used anymore. - - - -=back - -By default XML::LibXML does not process XInclude tags within a XML Document -(see options section below). XML::LibXML allows to post process a document to -expand XInclude tags. - -=over 4 - -=item B<process_xincludes> - - $parser->process_xincludes( $doc ); - -After a document is parsed into a DOM structure, you may want to expand the -documents XInclude tags. This function processes the given document structure -and expands all XInclude tags (or throws an error) by using the flags and -callbacks of the given parser instance. - -Note that the resulting Tree contains some extra nodes (of type -XML_XINCLUDE_START and XML_XINCLUDE_END) after successfully processing the -document. These nodes indicate where data was included into the original tree. -if the document is serialized, these extra nodes will not show up. - -Remember: A Document with processed XIncludes differs from the original -document after serialization, because the original XInclude tags will not get -restored! - -If the parser flag "expand_xincludes" is set to 1, you need not to post process -the parsed document. - - -=item B<processXIncludes> - - $parser->processXIncludes( $doc ); - -This is an alias to process_xincludes, but through a JAVA like function name. - - - -=back - - -=head2 Push Parser - -XML::LibXML provides a push parser interface. Rather than pulling the data from -a given source the push parser waits for the data to be pushed into it. - -This allows one to parse large documents without waiting for the parser to -finish. The interface is especially useful if a program needs to pre-process -the incoming pieces of XML (e.g. to detect document boundaries). - -While XML::LibXML parse_*() functions force the data to be a well-formed XML, -the push parser will take any arbitrary string that contains some XML data. The -only requirement is that all the pushed strings are together a well formed -document. With the push parser interface a program can interrupt the parsing -process as required, where the parse_*() functions give not enough flexibility. - -Different to the pull parser implemented in parse_fh() or parse_file(), the -push parser is not able to find out about the documents end itself. Thus the -calling program needs to indicate explicitly when the parsing is done. - -In XML::LibXML this is done by a single function: - -=over 4 - -=item B<parse_chunk> - - $parser->parse_chunk($string, $terminate); - -parse_chunk() tries to parse a given chunk of data, which isn't necessarily -well balanced data. The function takes two parameters: The chunk of data as a -string and optional a termination flag. If the termination flag is set to a -true value (e.g. 1), the parsing will be stopped and the resulting document -will be returned as the following example describes: - - - - my $parser = XML::LibXML->new; - for my $string ( "<", "foo", ' bar="hello world"', "/>") { - $parser->parse_chunk( $string ); - } - my $doc = $parser->parse_chunk("", 1); # terminate the parsing - - - -=back - -Internally XML::LibXML provides three functions that control the push parser -process: - -=over 4 - -=item B<start_push> - - $parser->start_push(); - -Initializes the push parser. - - -=item B<push> - - $parser->push(@data); - -This function pushes the data stored inside the array to libxml2's parser. Each -entry in @data must be a normal scalar! - - -=item B<finish_push> - - $doc = $parser->finish_push( $recover ); - -This function returns the result of the parsing process. If this function is -called without a parameter it will complain about non well-formed documents. If -$restore is 1, the push parser can be used to restore broken or non well formed -(XML) documents as the following example shows: - - - - eval { - $parser->push( "<foo>", "bar" ); - $doc = $parser->finish_push(); # will report broken XML - }; - if ( $@ ) { - # ... - } - -This can be annoying if the closing tag is missed by accident. The following -code will restore the document: - - - - eval { - $parser->push( "<foo>", "bar" ); - $doc = $parser->finish_push(1); # will return the data parsed - # unless an error happened - }; - - print $doc->toString(); # returns "<foo>bar</foo>" - -Of course finish_push() will return nothing if there was no data pushed to the -parser before. - - - -=back - - -=head2 DOM based SAX Parser - -XML::LibXML provides a DOM based SAX parser. The SAX parser is defined in the -module XML::LibXML::SAX::Parser. As it is not a stream based parser, it parses -documents into a DOM and traverses the DOM tree instead. - -The API of this parser is exactly the same as any other Perl SAX2 parser. See -XML::SAX::Intro for details. - -Aside from the regular parsing methods, you can access the DOM tree traverser -directly, using the generate() method: - - - - my $doc = build_yourself_a_document(); - my $saxparser = $XML::LibXML::SAX::Parser->new( ... ); - $parser->generate( $doc ); - -This is useful for serializing DOM trees, for example that you might have done -prior processing on, or that you have as a result of XSLT processing. - -I<<<<<< WARNING >>>>>> - -This is NOT a streaming SAX parser. As I said above, this parser reads the -entire document into a DOM and serialises it. Some people couldn't read that in -the paragraph above so I've added this warning. - -If you want a streaming SAX parser look at the L<<<<<< XML::LibXML direct SAX parser|XML::LibXML direct SAX parser >>>>>> man page - - -=head1 SERIALIZATION - -XML::LibXML provides some functions to serialize nodes and documents. The -serialization functions are described on the L<<<<<< Abstract Base Class of XML::LibXML Nodes|Abstract Base Class of XML::LibXML Nodes >>>>>> manpage or the L<<<<<< XML::LibXML DOM Document Class|XML::LibXML DOM Document Class >>>>>> manpage. XML::LibXML checks three global flags that alter the serialization -process: - - -=over 4 - -=item * - -skipXMLDeclaration - - - -=item * - -skipDTD - - - -=item * - -setTagCompression - - - -=back - -of that three functions only setTagCompression is available for all -serialization functions. - -Because XML::LibXML does these flags not itself, one has to define them locally -as the following example shows: - - - - local $XML::LibXML::skipXMLDeclaration = 1; - local $XML::LibXML::skipDTD = 1; - local $XML::LibXML::setTagCompression = 1; - -If skipXMLDeclaration is defined and not '0', the XML declaration is omitted -during serialization. - -If skipDTD is defined and not '0', an existing DTD would not be serialized with -the document. - -If setTagCompression is defined and not '0' empty tags are displayed as open -and closing tags rather than the shortcut. For example the empty tag I<<<<<< foo >>>>>> will be rendered as I<<<<<< <foo></foo> >>>>>> rather than I<<<<<< <foo/> >>>>>>. - - -=head1 PARSER OPTIONS - -LibXML options are global (unfortunately this is a limitation of the underlying -implementation, not this interface). They can either be set using -$parser->option(...), or XML::LibXML->option(...), both are treated in the same -manner. Note that even two parser processes will share some of the same -options, so be careful out there! - -Every option returns the previous value, and can be called without parameters -to get the current value. - -=over 4 - -=item B<validation> - - $parser->validation(1); - -Turn validation on (or off). Defaults to off. - - -=item B<recover> - - $parser->recover(1); - -Turn the parsers recover mode on (or off). Defaults to off. - -This allows one to parse broken XML data into memory. This switch will only -work with XML data rather than HTML data. Also the validation will be switched -off automatically. - -The recover mode helps to recover documents that are almost well-formed very -efficiently. That is for example a document that forgets to close the document -tag (or any other tag inside the document). The recover mode of XML::LibXML has -problems restoring documents that are more like well balanced chunks. - -XML::LibXML will only parse until the first fatal error occurs, reporting -recoverable parsing errors as warnings. To suppress these warnings use -$parser->recover_silently(1); or, equivalently, $parser->recover(2). - - -=item B<recover_silently> - - $parser->recover_silently(1); - -Turns the parser warnings off (or on). Defaults to on. - -This allows to switch off warnings printed to STDERR when parsing documents -with recover(1). - -Please note that calling recover_silently(0) also turns the parser recover mode -off and calling recover_silently(1) automatically activates the parser recover -mode. - - -=item B<expand_entities> - - $parser->expand_entities(0); - -Turn entity expansion on or off, enabled by default. If entity expansion is -off, any external parsed entities in the document are left as entities. -Probably not very useful for most purposes. - - -=item B<keep_blanks> - - $parser->keep_blanks(0); - -Allows you to turn off XML::LibXML's default behaviour of maintaining -white-space in the document. - - -=item B<pedantic_parser> - - $parser->pedantic_parser(1); - -You can make XML::LibXML more pedantic if you want to. - - -=item B<line_numbers> - - $parser->line_numbers(1); - -If this option is activated XML::LibXML will store the line number of a node. -This gives more information where a validation error occurred. It could be also -used to find out about the position of a node after parsing (see also -XML::LibXML::Node::line_number()). - -IMPORTANT: Due to limitations in the libxml2 library line numbers greater than -65535 will be returned as 65535. Please see L<<<<<< http://bugzilla.gnome.org/show_bug.cgi?id=325533|http://bugzilla.gnome.org/show_bug.cgi?id=325533 >>>>>> for more details. - -By default line numbering is switched off (0). - - -=item B<load_ext_dtd> - - $parser->load_ext_dtd(1); - -Load external DTD subsets while parsing. - -This flag is also required for DTD Validation, to provide complete attribute, -and to expand entities, regardless if the document has an internal subset. Thus -switching off external DTD loading, will disable entity expansion, validation, -and complete attributes on internal subsets as well. - -If you leave this parser flag untouched, everything will work, because the -default is 1 (activated) - - -=item B<complete_attributes> - - $parser->complete_attributes(1); - -Complete the elements attributes lists with the ones defaulted from the DTDs. -By default, this option is enabled. - - -=item B<expand_xinclude> - - $parser->expand_xinclude(1); - -Expands XIinclude tags immediately while parsing the document. This flag -assures that the parser callbacks are used while parsing the included document. - - -=item B<load_catalog> - - $parser->load_catalog( $catalog_file ); - -Will use $catalog_file as a catalog during all parsing processes. Using a -catalog will significantly speed up parsing processes if many external -resources are loaded into the parsed documents (such as DTDs or XIncludes). - -Note that catalogs will not be available if an external entity handler was -specified. At the current state it is not possible to make use of both types of -resolving systems at the same time. - - -=item B<base_uri> - - $parser->base_uri( $your_base_uri ); - -In case of parsing strings or file handles, XML::LibXML doesn't know about the -base uri of the document. To make relative references such as XIncludes work, -one has to set a separate base URI, that is then used for the parsed documents. - - -=item B<gdome_dom> - - $parser->gdome_dom(1); - -THIS FLAG IS EXPERIMENTAL! - -Although quite powerful XML:LibXML's DOM implementation is limited if one needs -or wants full DOM level 2 or level 3 support. XML::GDOME is based on libxml2 as -well but provides a rather complete DOM implementation by wrapping libgdome. -This allows you to make use of XML::LibXML's full parser options and -XML::GDOME's DOM implementation at the same time. - -To make use of this function, one has to install libgdome and configure -XML::LibXML to use this library. For this you need to rebuild XML::LibXML! - - -=item B<clean_namespaces> - - $parser->clean_namespaces( 1 ); - -libxml2 2.6.0 and later allows to strip redundant namespace declarations from -the DOM tree. To do this, one has to set clean_namespaces() to 1 (TRUE). By -default no namespace cleanup is done. - - -=item B<no_network> - - $parser->no_network(1); - -Turn networking support on or off, enabled by default. If networking is off, -all attempts to fetch non-local resources (such as DTD or external entities) -will fail (unless custom callbacks are defined). It may be necessary to use -$parser->recover(1) for processing documents requiring such resources while -networking is off. - - - -=back - - -=head1 ERROR REPORTING - -XML::LibXML throws exceptions during parsing, validation or XPath processing -(and some other occasions). These errors can be caught by using I<<<<<< eval >>>>>> blocks. The error then will be stored in I<<<<<< $@ >>>>>>. - -XML::LibXML throws errors as they occurs and does not wait if a user test for -them. This is a very common misunderstanding in the use of XML::LibXML. If the -eval is omitted, XML::LibXML will always halt your script by "croaking" (see -Carp man page for details). - -Also note that an increasing number of functions throw errors if bad data is -passed. If you cannot assure valid data passed to XML::LibXML you should eval -these functions. - -Note: since version 1.59, get_last_error() is no longer available in -XML::LibXML for thread-safety reasons. - -=head1 AUTHORS - -Matt Sergeant, -Christian Glahn, -Petr Pajas - - -=head1 VERSION - -1.66 - -=head1 COPYRIGHT - -2001-2007, AxKit.com Ltd; 2002-2006 Christian Glahn; 2006-2008 Petr Pajas, All rights reserved. - -=cut |