00001 // -*- C++ -*- 00002 00003 //============================================================================= 00004 /** 00005 * @file Parser.h 00006 * 00007 * $Id: Parser.h 78907 2007-07-16 10:22:58Z sowayaa $ 00008 * 00009 * @author Nanbor Wang <nanbor@cs.wustl.edu> 00010 * @author Krishnakumar B <kitty@cs.wustl.edu> 00011 */ 00012 //============================================================================= 00013 00014 #ifndef _ACEXML_BASIC_PARSER_H_ 00015 #define _ACEXML_BASIC_PARSER_H_ 00016 00017 #include /**/ "ace/pre.h" 00018 #include "ACEXML/parser/parser/Parser_export.h" 00019 00020 #if !defined (ACE_LACKS_PRAGMA_ONCE) 00021 #pragma once 00022 #endif /* ACE_LACKS_PRAGMA_ONCE */ 00023 00024 #include "ACEXML/common/XMLReader.h" 00025 #include "ACEXML/common/LocatorImpl.h" 00026 #include "ACEXML/common/NamespaceSupport.h" 00027 #include "ACEXML/common/CharStream.h" 00028 #include "ace/Obstack.h" 00029 #include "ace/Functor.h" 00030 #include "ace/SString.h" 00031 #include "ace/Hash_Map_Manager.h" 00032 #include "ace/Unbounded_Set.h" 00033 #include "ace/Containers_T.h" 00034 #include "ace/Auto_Ptr.h" 00035 #include "ACEXML/parser/parser/Entity_Manager.h" 00036 #include "ACEXML/parser/parser/ParserInternals.h" 00037 #include "ACEXML/parser/parser/ParserContext.h" 00038 00039 /** 00040 * @class ACEXML_Parser Parser.h "ACEXML/parser/parser/Parser.h" 00041 * 00042 * @brief A SAX based parser. 00043 * 00044 */ 00045 class ACEXML_PARSER_Export ACEXML_Parser : public ACEXML_XMLReader 00046 { 00047 public: 00048 /// Default constructor. 00049 ACEXML_Parser (void); 00050 00051 /// Destructor. 00052 virtual ~ACEXML_Parser (void); 00053 00054 /** 00055 * Initialize the parser state. 00056 * 00057 * @retval 0 if parser was initialized correctly else -1. 00058 */ 00059 int initialize (ACEXML_InputSource* input); 00060 00061 /** 00062 * Return the current content handler. 00063 */ 00064 virtual ACEXML_ContentHandler *getContentHandler (void) const; 00065 00066 /* 00067 * Return the current DTD handler. 00068 */ 00069 virtual ACEXML_DTDHandler *getDTDHandler (void) const; 00070 00071 /* 00072 * Return the current entity resolver. 00073 */ 00074 virtual ACEXML_EntityResolver *getEntityResolver (void) const; 00075 00076 /* 00077 * Return the current error handler. 00078 */ 00079 virtual ACEXML_ErrorHandler *getErrorHandler (void) const; 00080 00081 /** 00082 * Look up the value of a feature. This method allows 00083 * programmers to check whether a specific feature has been 00084 * activated in the parser. 00085 */ 00086 virtual int getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL); 00087 00088 /** 00089 * Activating or deactivating a feature. 00090 */ 00091 virtual void setFeature (const ACEXML_Char *name, 00092 int boolean_value ACEXML_ENV_ARG_DECL); 00093 00094 /* 00095 * Look up the value of a property. 00096 */ 00097 virtual void * getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL); 00098 00099 /* 00100 * Set the value of a property. 00101 */ 00102 virtual void setProperty (const ACEXML_Char *name, 00103 void *value ACEXML_ENV_ARG_DECL); 00104 00105 /* 00106 * Parse an XML document. 00107 */ 00108 virtual void parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL); 00109 00110 /* 00111 * Parse an XML document from a system identifier (URI). 00112 */ 00113 virtual void parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL) 00114 ; 00115 00116 /* 00117 * Allow an application to register a content event handler. 00118 */ 00119 virtual void setContentHandler (ACEXML_ContentHandler *handler); 00120 00121 /* 00122 * Allow an application to register a DTD event handler. 00123 */ 00124 virtual void setDTDHandler (ACEXML_DTDHandler *handler); 00125 00126 /* 00127 * Allow an application to register an entity resolver. 00128 */ 00129 virtual void setEntityResolver (ACEXML_EntityResolver *resolver); 00130 00131 /* 00132 * Allow an application to register an error event handler. 00133 */ 00134 virtual void setErrorHandler (ACEXML_ErrorHandler *handler); 00135 00136 00137 00138 protected: 00139 /** 00140 * Parse XML Prolog. 00141 */ 00142 void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL); 00143 00144 /** 00145 * Parse VersionInfo declaration. 00146 * 00147 */ 00148 void parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL); 00149 00150 /** 00151 * Parse a EncodingDecl declaration. 00152 * 00153 */ 00154 void parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL); 00155 00156 /** 00157 * Parse a XMLDecl declaration. 00158 * 00159 */ 00160 void parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL); 00161 00162 /** 00163 * Parse a TextDecl declaration. 00164 */ 00165 int parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL); 00166 00167 /** 00168 * Parse a PI statement. The first character encountered 00169 * should always be '?' in the PI prefix "@<?". 00170 * 00171 * @retval 0 on success, -1 otherwise. 00172 */ 00173 int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL); 00174 00175 /** 00176 * Parse the DOCTYPE declaration. The first character encountered 00177 * should always be 'D' in doctype prefix: "@<@!DOCTYPE". 00178 */ 00179 int parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL); 00180 00181 /** 00182 * Parse an XML element. The first character encountered should 00183 * be the first character of the element "Name". 00184 * 00185 * @param is_root If not 0, then we are expecting to see the "root" 00186 * element now, and the next element's name need to match the name 00187 * defined in DOCTYPE definition, i.e., @a this->doctype_. 00188 * 00189 * @todo Instead of simply checking for the root element based on the 00190 * argument @a is_root, we should instead either pass in some sort 00191 * of validator or allow the function to return the element name so it 00192 * can be used in a validator. 00193 */ 00194 void parse_element (int is_root ACEXML_ENV_ARG_DECL); 00195 00196 /** 00197 * Parse a content declaration. 00198 * 00199 */ 00200 int parse_content (const ACEXML_Char* startname, const ACEXML_Char*& ns_uri, 00201 const ACEXML_Char*& ns_lname, int ns_flag 00202 ACEXML_ENV_ARG_DECL); 00203 00204 /** 00205 * Parse a character reference, i.e., " " or "". The first 00206 * character encountered should be the '#' char. 00207 * 00208 * @param buf points to a character buffer for the result. 00209 * 00210 * @param len In/out argument which initially specifies the size of the 00211 * buffer and is later set to the no. of characters in the reference. 00212 * 00213 * @retval 0 on success and -1 otherwise. 00214 */ 00215 int parse_char_reference (ACEXML_Char *buf, size_t& len); 00216 00217 /** 00218 * Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first 00219 * character encountered should be the character following '&' or '%'. 00220 * Effectively the same as @sa parse_name but we don't use the parser's 00221 * obstack. Caller is responsible for deleting the memory. 00222 * 00223 * @retval A pointer to name of reference, 0 otherwise. 00224 */ 00225 ACEXML_Char* parse_reference_name (void); 00226 00227 /** 00228 * Parse a CDATA section. The first character should always be the first 00229 * '[' in CDATA definition. 00230 * 00231 * @retval 0 on success. 00232 * @retval -1 if fail. 00233 */ 00234 int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL); 00235 00236 /** 00237 * Parse a "markupdecl" section, this includes both "markupdecl" and 00238 * "DeclSep" sections in XML specification 00239 */ 00240 int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL); 00241 00242 /** 00243 * Skip over a comment. The first character encountered should always be 00244 * the first '-' in the comment prefix "@<@!--". 00245 */ 00246 int parse_comment (void); 00247 00248 /** 00249 * Parse an "ELEMENT" decl. The first character this method 00250 * expects is always the 'L' (the second char) in the word 00251 * "ELEMENT". 00252 * 00253 * @retval 0 on success, -1 otherwise. 00254 */ 00255 int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL); 00256 00257 /** 00258 * Parse an "ENTITY" decl. The first character this method expects 00259 * is always the 'N' (the second char) in the word "ENTITY". 00260 * 00261 * @retval 0 on success, -1 otherwise. 00262 */ 00263 int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL); 00264 00265 /** 00266 * Parse an "ATTLIST" decl. Thse first character this method 00267 * expects is always the 'A' (the first char) in the word 00268 * "ATTLIST". 00269 * 00270 * @retval 0 on success, -1 otherwise. 00271 */ 00272 int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL); 00273 00274 /** 00275 * Parse a AttType declaration. 00276 * 00277 */ 00278 int parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL); 00279 00280 /** 00281 *Parse a "NOTATION" decl. The first character this method 00282 * expects is always the 'N' (the first char) in the word 00283 * "NOTATION". 00284 * 00285 * @retval 0 on success, -1 otherwise. 00286 */ 00287 int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL); 00288 00289 /** 00290 * Parse an ExternalID or a reference to PUBLIC ExternalID. 00291 * Possible cases are in the forms of: <code> 00292 * 00293 * SYSTEM 'quoted string representing system resource' 00294 * PUBLIC 'quoted name of public ID' 'quoted resource' 00295 * PUBLIC 'quoted name we are referring to' 00296 * </code> 00297 * 00298 * The first character this function sees must be either 'S' or 'P'. 00299 * When the function finishes parsing, the input stream points 00300 * at the first non-whitespace character. 00301 * 00302 * @param publicId returns the unquoted publicId read. If none 00303 * is available, it will be reset to 0. 00304 * @param systemId returns the unquoted systemId read. If none 00305 * is available, it will be reset to 0. 00306 * 00307 * @retval 0 on success, -1 otherwise. 00308 */ 00309 int parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId 00310 ACEXML_ENV_ARG_DECL); 00311 00312 /** 00313 * Parse an external DTD. 00314 * 00315 */ 00316 int parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL); 00317 00318 /** 00319 * Parse an external subset. This does the actual parsing of an external 00320 * subset and is called by @sa parse_external_dtd. 00321 * 00322 */ 00323 int parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL); 00324 00325 /** 00326 * Parse a markupDecl section. 00327 * 00328 */ 00329 int parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL); 00330 00331 /** 00332 * Parse a conditionalSect declaration. 00333 * 00334 */ 00335 int parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL); 00336 00337 /** 00338 * Parse a includeSect declaration. 00339 * 00340 */ 00341 int parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL); 00342 00343 /** 00344 * 00345 * Parse a ignoreSect declaration. 00346 */ 00347 int parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL); 00348 00349 /** 00350 * Parse a PEReference. 00351 * 00352 */ 00353 int parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL); 00354 00355 /** 00356 * Parse a Reference. 00357 * 00358 */ 00359 int parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL); 00360 00361 /** 00362 * Parse an entityValue. 00363 * 00364 */ 00365 int parse_entity_value (ACEXML_Char *&str ACEXML_ENV_ARG_DECL); 00366 00367 /** 00368 * Parse a DefaultDecl specification. 00369 * 00370 */ 00371 int parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL); 00372 00373 00374 /** 00375 * Parse the "children" and "Mixed" non-terminals in contentspec. 00376 * 00377 * The first character this function sees must be the first 00378 * open paren '(' in children. 00379 * 00380 * @retval 0 on success, -1 otherwise. 00381 */ 00382 int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL); 00383 00384 /** 00385 * Parse a @c cp non-terminal. @c cp can either be a @c seq or a @c choice. 00386 * This function calls itself recursively. 00387 * 00388 * @param skip_open_paren when non-zero, it indicates that the open paren of 00389 * the @c seq or @c choice has already been removed from the input 00390 * stream. 00391 * 00392 * @retval 0 on success, -1 otherwise. 00393 */ 00394 int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL); 00395 00396 /** 00397 * Parse a name from the input CharStream. If @a ch @!= 0, then we have 00398 * already consumed the first name character from the input CharStream, 00399 * otherwise, parse_name will use this->get() to acquire the initial 00400 * character. 00401 * 00402 * @return A pointer to the string in the obstack, 0 if it's not a 00403 * valid name. 00404 */ 00405 ACEXML_Char *parse_name (ACEXML_Char ch = 0); 00406 00407 /** 00408 * Parse a NMTOKEN from the input stream. 00409 * 00410 * @return A pointer to the string in the obstack, 0 if it's not a valid 00411 * NMTOKEN. 00412 */ 00413 ACEXML_Char* parse_nmtoken (ACEXML_Char ch = 0); 00414 00415 /** 00416 * Parse the version string in an XML Prolog section. 00417 * 00418 * @param str String containing the version number if successful. 00419 * @return 0 if the string was read successfully, 0 otherwise. 00420 */ 00421 int parse_version (ACEXML_Char*& str); 00422 00423 /** 00424 * Parse the version number in a VersionInfo declaration. 00425 */ 00426 int parse_version_num (ACEXML_Char*& str); 00427 00428 /** 00429 * Parse the encoding name in an XML Prolog section. 00430 * 00431 * @param str String containing the encoding name if successful. 00432 * @return 0 if the string was read successfully, 0 otherwise. 00433 */ 00434 int parse_encname (ACEXML_Char*& str); 00435 00436 /** 00437 * Parse a SDDecl string. 00438 * 00439 * @param str String containing the encoding name if successful. 00440 * @return 0 if the string was read successfully, -1 otherwise. 00441 */ 00442 int parse_sddecl (ACEXML_Char*& str); 00443 00444 /** 00445 * Parse an attribute name. 00446 * 00447 * @retval str String containing the value of the attribute name 00448 * if successful. 00449 * @retval 0 otherwise. 00450 */ 00451 ACEXML_Char* parse_attname (ACEXML_ENV_SINGLE_ARG_DECL); 00452 00453 /** 00454 * Parse an attribute value. 00455 * 00456 * @param str String containing the value of the attribute if successful. 00457 * @return 0 if attribute value was read successfully, -1 otherwise. 00458 */ 00459 int parse_attvalue (ACEXML_Char*& str ACEXML_ENV_ARG_DECL); 00460 00461 /** 00462 * Parse a tokenized type attribute. 00463 * 00464 * @return 0 if attribute type was read successfully, -1 otherwise. 00465 */ 00466 int parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_DECL); 00467 00468 /** 00469 * Parse a SystemLiteral. 00470 * 00471 * @param str String containing the SystemLiteral if successful. 00472 * @return 0 if the string was read successfully, 0 otherwise. 00473 */ 00474 int parse_system_literal (ACEXML_Char*& str); 00475 00476 /** 00477 * Parse a PubidLiteral. 00478 * 00479 * @param str String containing the PubidLiteral if successful. 00480 * @return 0 if the string was read successfully, 0 otherwise. 00481 */ 00482 int parse_pubid_literal (ACEXML_Char*& str); 00483 00484 /** 00485 * Check if a character @a c is a whitespace. 00486 * 00487 * @retval 1 if @a c is a valid white space character. 0 otherwise. 00488 */ 00489 int is_whitespace (const ACEXML_Char c) const; 00490 00491 /** 00492 * Check if a character @a c is a valid Char. 00493 * 00494 * @retval 1 if @a c is a valid character. 0 otherwise. 00495 */ 00496 int isChar (ACEXML_UCS4 c) const; 00497 00498 /** 00499 * Check if a character @a c is a valid CharRef character. 00500 * 00501 * @retval 1 if @a c is a valid character reference character, 0 otherwise. 00502 */ 00503 int isCharRef (const ACEXML_Char c) const; 00504 00505 /** 00506 * Check if a character @a c is a BaseChar. 00507 * 00508 * @retval 1 if @a c is a valid BaseChar character, 0 otherwise. 00509 */ 00510 int isBasechar (const ACEXML_Char c) const; 00511 00512 /** 00513 * Check if a character @a c is a Ideographic. 00514 * 00515 * @retval 1 if @a c is a valid Ideographic character, 0 otherwise. 00516 */ 00517 int isIdeographic (const ACEXML_Char c) const; 00518 00519 /** 00520 * Check if a character @a c is a CombiningChar. 00521 * 00522 * @retval 1 if @a c is a valid CombiningChar character, 0 otherwise. 00523 */ 00524 int isCombiningchar (const ACEXML_Char c) const; 00525 00526 /** 00527 * Check if a character @a c is a Digit. 00528 * 00529 * @retval 1 if @a c is a valid Digit character, 0 otherwise. 00530 */ 00531 int isDigit (const ACEXML_Char c) const; 00532 00533 /** 00534 * Check if a character @a c is an Extender. 00535 * 00536 * @retval 1 if @a c is a valid Extender character, 0 otherwise. 00537 */ 00538 int isExtender (const ACEXML_Char c) const; 00539 00540 /** 00541 * Check if a character @a c is a Letter. 00542 * 00543 * @retval 1 if @a c is a valid Letter character, 0 otherwise. 00544 */ 00545 int isLetter (const ACEXML_Char c) const; 00546 00547 /** 00548 * Check if a character is an acceptable NameChar. 00549 * 00550 * @retval 1 if @a c is a valid NameChar character, 0 otherwise. 00551 */ 00552 int isNameChar (const ACEXML_Char c) const; 00553 00554 /** 00555 * Check if a character is a PubidChar. 00556 * 00557 * @retval 1 if @a c is a valid PubidChar character, 0 otherwise. 00558 */ 00559 int isPubidChar (const ACEXML_Char c) const; 00560 00561 /// Get a character. 00562 virtual ACEXML_Char get (void); 00563 00564 /// Peek a character. 00565 virtual ACEXML_Char peek (void); 00566 00567 private: 00568 00569 // *** Helper functions for parsing XML 00570 00571 /** 00572 * Skip any whitespaces encountered until the first non-whitespace 00573 * character is encountered. 00574 * 00575 * @return The next non-whitespace character from the CharStream. 00576 * 00577 * @sa skip_whitespace_count 00578 */ 00579 ACEXML_Char skip_whitespace (void); 00580 00581 /** 00582 * Skip any whitespaces encountered until the first non-whitespace 00583 * character. The first non-whitespace character is not consumed. 00584 * This method does peek into the input CharStream and therefore 00585 * is more expensive than @ref skip_whitespace. 00586 * 00587 * @param peek If non-null, @a peek points to a ACEXML_Char where 00588 * skip_whitespace_count stores the first non-whitespace 00589 * character it sees (character is not removed from the stream.) 00590 * 00591 * @return The number of whitespace characters consumed. 00592 * 00593 * @sa skip_whitespace 00594 */ 00595 int skip_whitespace_count (ACEXML_Char *peek = 0); 00596 00597 /** 00598 * Skip an equal sign. 00599 * 00600 * @retval 0 when succeeds, -1 if no equal sign is found. 00601 */ 00602 int skip_equal (void); 00603 00604 /** 00605 * Get a quoted string. Quoted strings are used to specify 00606 * attribute values and this routine will replace character and 00607 * entity references on-the-fly. Parameter entities are not allowed 00608 * (or replaced) in this function. (But regular entities are.) 00609 * 00610 * @param str returns the un-quoted string. 00611 * 00612 * @retval 0 on success, -1 otherwise. 00613 */ 00614 int get_quoted_string (ACEXML_Char *&str); 00615 00616 /** 00617 * Check if a character @a c is a Digit. 00618 * 00619 * @retval 1 if @a c is a valid Digit character, 0 otherwise. 00620 */ 00621 int isNormalDigit (const ACEXML_Char c) const; 00622 00623 /** 00624 * Dispatch errors to ErrorHandler. 00625 * 00626 */ 00627 void error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL); 00628 00629 /** 00630 * Dispatch warnings to ErrorHandler. 00631 * 00632 */ 00633 void warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL); 00634 00635 /** 00636 * Dispatch fatal errors to ErrorHandler. 00637 * 00638 */ 00639 void fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL); 00640 00641 /** 00642 * Dispatch prefix mapping calls to the ContentHandler. 00643 * 00644 * @param prefix Namespace prefix 00645 * @param uri Namespace URI 00646 * @param name Local name 00647 * @param start 1 => startPrefixMapping 0 => endPrefixMapping 00648 */ 00649 void prefix_mapping (const ACEXML_Char* prefix, 00650 const ACEXML_Char* uri, 00651 int start ACEXML_ENV_ARG_DECL); 00652 /** 00653 * Parse a keyword. 00654 */ 00655 int parse_token (const ACEXML_Char* keyword); 00656 00657 /** 00658 * Push the current context on to the stack. 00659 * 00660 */ 00661 int push_context (ACEXML_Parser_Context* context); 00662 00663 /** 00664 * Pop the top element in the stack and replace current context with that. 00665 */ 00666 size_t pop_context (int GE_ref ACEXML_ENV_ARG_DECL); 00667 00668 /** 00669 * Create a new ACEXML_CharStream from @a systemId and @a publicId and 00670 * replace the current input stream with the newly created stream. 00671 */ 00672 virtual int switch_input (ACEXML_CharStream* cstream, 00673 const ACEXML_Char* systemId, 00674 const ACEXML_Char* publicId = 0); 00675 /** 00676 * Create a new ACEXML_InputSource from @a systemId and @a publicId and 00677 * replace the current input source with the newly created InputSource. 00678 */ 00679 virtual int switch_input (ACEXML_InputSource* input, 00680 const ACEXML_Char* systemId, 00681 const ACEXML_Char* publicId = 0); 00682 00683 /** 00684 * Check for a parameter entity reference. This is used to check for the 00685 * occurence of a PE Reference withing markupDecl. Additionally this 00686 * function consumes any leading or trailing whitespace around the PE 00687 * Reference. 00688 * 00689 * @retval Number of whitespace characters skipped. 00690 */ 00691 int check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL); 00692 00693 /** 00694 * Reset the parser state. 00695 * 00696 */ 00697 void reset (void); 00698 00699 /** 00700 * Very trivial, non-conformant normalization of a systemid. 00701 * 00702 */ 00703 ACEXML_Char* normalize_systemid (const ACEXML_Char* systemId); 00704 00705 // Feature names: 00706 00707 /** 00708 * \addtogroup acexml_parser_features 00709 * @{ 00710 */ 00711 00712 /** 00713 * @var simple_parsing_feature_ 00714 * 00715 * This constant string defines the name of "simple XML parsing" 00716 * feature. When this feature is enabled, ACEXML parser is allowed 00717 * to parse a simple XML stream without mandated XML prolog 00718 * and no DTD defintion. 00719 */ 00720 static const ACEXML_Char simple_parsing_feature_[]; 00721 00722 /** 00723 * @var namespaces_feature_ 00724 * 00725 * This constant string defines the SAX XML Namespace feature. When this 00726 * feature is enabled, ACEXML parser allows access by namespace qualified 00727 * names. 00728 */ 00729 static const ACEXML_Char namespaces_feature_[]; 00730 00731 /** 00732 * @var namespace_prefixes_feature_ 00733 * 00734 * This constant string defines the SAX XML Namespace prefixes feature. 00735 * Normally the list of attributes returned by the parser will not 00736 * contain attributes used as namespace declarations (xmlns*). When this 00737 * feature is enabled, the list of attributes contains the namespace 00738 * declarations also. 00739 */ 00740 static const ACEXML_Char namespace_prefixes_feature_[]; 00741 00742 /** 00743 * @var validation_feature_ 00744 * 00745 * This constant string defines the SAX XML Validation feature. When 00746 * this feature is enabled, the parser validates the document in 00747 * addition to checking for well-formedness. 00748 */ 00749 static const ACEXML_Char validation_feature_[]; 00750 00751 /* @} */ 00752 00753 /// Keeping track of the handlers. We do not manage the memory for 00754 /// handlers. 00755 ACEXML_DTDHandler *dtd_handler_; 00756 ACEXML_EntityResolver *entity_resolver_; 00757 ACEXML_ContentHandler *content_handler_; 00758 ACEXML_ErrorHandler *error_handler_; 00759 00760 /// Document Type 00761 ACEXML_Char *doctype_; 00762 00763 /// Current parser context 00764 ACEXML_Parser_Context* current_; 00765 00766 /// Stack used to hold the Parser_Context 00767 ACE_Unbounded_Stack<ACEXML_Parser_Context*> ctx_stack_; 00768 00769 /* 00770 * The following two are essentially chains of references and is used by 00771 * the parser to determine if there is any recursion. We keep two of 00772 * these one for general entities and one for parameter entities, as they 00773 * both fall under different namespaces. 00774 * 00775 */ 00776 /// Set used to hold the general entity references that are active. 00777 ACE_Unbounded_Stack<ACEXML_Char*> GE_reference_; 00778 00779 /// Set used to hold the parameter entity references that are active. 00780 ACE_Unbounded_Stack<ACEXML_Char*> PE_reference_; 00781 00782 /// Obstack used by the parser to hold all the strings parsed 00783 ACE_Obstack_T<ACEXML_Char> obstack_; 00784 00785 /// Alternative obstack used to hold any strings when the original is in use 00786 ACE_Obstack_T<ACEXML_Char> alt_stack_; 00787 00788 /// Namespace stack used by the parser to implement support for Namespaces 00789 ACEXML_NamespaceSupport xml_namespace_; 00790 00791 /// T => We are processing a nested namespace 00792 int nested_namespace_; 00793 00794 /// Set of internal parsed general entities in the document 00795 ACEXML_Entity_Manager internal_GE_; 00796 00797 /// Set of external parsed general entities in the document 00798 ACEXML_Entity_Manager external_GE_; 00799 00800 /// Set of unparsed entities in the document 00801 ACEXML_Entity_Manager unparsed_entities_; 00802 00803 /// Set of predefined entities used by the parser 00804 ACEXML_Entity_Manager predef_entities_; 00805 00806 /// Set of internal parsed parameter entities in the document 00807 ACEXML_Entity_Manager internal_PE_; 00808 00809 /// Set of external parsed parameter entities in the document 00810 ACEXML_Entity_Manager external_PE_; 00811 00812 /// Set of notations declared in the document 00813 ACEXML_Entity_Manager notations_; 00814 00815 /// State of the parser when it encounters a reference. 00816 ACEXML_ParserInt::ReferenceState ref_state_; 00817 00818 /// T => We are parsing an external subset 00819 int external_subset_; 00820 00821 /// T => We are parsing an external entity value 00822 int external_entity_; 00823 00824 /// T => Internal DTD has parameter entity references 00825 int has_pe_refs_; 00826 00827 /// If set, the document is a standalone XML document 00828 int standalone_; 00829 00830 /// If set, the document has an external DTD subset 00831 int external_dtd_; 00832 00833 /// If set, the document has an internal DTD 00834 int internal_dtd_; 00835 00836 /// Feature flags 00837 /// If set, the parser should parse a document without a prolog 00838 int simple_parsing_; 00839 00840 /// If set, the parser should also validate 00841 int validate_; 00842 00843 /// If set, the parser should allow access by namespace qualified names. 00844 int namespaces_; 00845 00846 /// If set, the parser should include namespace declarations in the list 00847 /// of attributes of an element. 00848 int namespace_prefixes_; 00849 00850 }; 00851 00852 #if defined (__ACEXML_INLINE__) 00853 # include "ACEXML/parser/parser/Parser.inl" 00854 #endif /* __ACEXML_INLINE__ */ 00855 00856 #include /**/ "ace/post.h" 00857 00858 #endif /* _ACEXML_BASIC_PARSER_H_ */