00001 // -*- C++ -*- 00002 00003 //============================================================================= 00004 /** 00005 * @file Parser.h 00006 * 00007 * Parser.h,v 1.14 2006/02/09 22:53:20 kitty Exp 00008 * 00009 * @author Nanbor Wang <nanbor@cs.wustl.edu> 00010 * @author Krishnakumar B <kitty@cs.wustl.edu> 00011 */ 00012 //============================================================================= 00013 00014 #ifndef _ACEXML_BASIC_PARSER_H_ 00015 #define _ACEXML_BASIC_PARSER_H_ 00016 00017 #include /**/ "ace/pre.h" 00018 #include "ACEXML/parser/parser/Parser_export.h" 00019 00020 #if !defined (ACE_LACKS_PRAGMA_ONCE) 00021 #pragma once 00022 #endif /* ACE_LACKS_PRAGMA_ONCE */ 00023 00024 #include "ACEXML/common/XMLReader.h" 00025 #include "ACEXML/common/LocatorImpl.h" 00026 #include "ACEXML/common/NamespaceSupport.h" 00027 #include "ACEXML/common/CharStream.h" 00028 #include "ace/Obstack.h" 00029 #include "ace/Functor.h" 00030 #include "ace/SString.h" 00031 #include "ace/Hash_Map_Manager.h" 00032 #include "ace/Unbounded_Set.h" 00033 #include "ace/Containers_T.h" 00034 #include "ace/Auto_Ptr.h" 00035 #include "ACEXML/parser/parser/Entity_Manager.h" 00036 #include "ACEXML/parser/parser/ParserInternals.h" 00037 #include "ACEXML/parser/parser/ParserContext.h" 00038 00039 /** 00040 * @class ACEXML_Parser Parser.h "ACEXML/parser/parser/Parser.h" 00041 * 00042 * @brief A SAX based parser. 00043 * 00044 */ 00045 class ACEXML_PARSER_Export ACEXML_Parser : public ACEXML_XMLReader 00046 { 00047 public: 00048 /// Default constructor. 00049 ACEXML_Parser (void); 00050 00051 /// Destructor. 00052 virtual ~ACEXML_Parser (void); 00053 00054 /** 00055 * Initialize the parser state. 00056 * 00057 * @retval 0 if parser was initialized correctly else -1. 00058 */ 00059 int initialize (ACEXML_InputSource* input); 00060 00061 /** 00062 * Return the current content handler. 00063 */ 00064 virtual ACEXML_ContentHandler *getContentHandler (void) const; 00065 00066 /* 00067 * Return the current DTD handler. 00068 */ 00069 virtual ACEXML_DTDHandler *getDTDHandler (void) const; 00070 00071 /* 00072 * Return the current entity resolver. 00073 */ 00074 virtual ACEXML_EntityResolver *getEntityResolver (void) const; 00075 00076 /* 00077 * Return the current error handler. 00078 */ 00079 virtual ACEXML_ErrorHandler *getErrorHandler (void) const; 00080 00081 /** 00082 * Look up the value of a feature. This method allows 00083 * programmers to check whether a specific feature has been 00084 * activated in the parser. 00085 */ 00086 virtual int getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL) 00087 ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, 00088 ACEXML_SAXNotSupportedException)) ; 00089 00090 /** 00091 * Activating or deactivating a feature. 00092 */ 00093 virtual void setFeature (const ACEXML_Char *name, 00094 int boolean_value ACEXML_ENV_ARG_DECL) 00095 ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, 00096 ACEXML_SAXNotSupportedException)) ; 00097 00098 /* 00099 * Look up the value of a property. 00100 */ 00101 virtual void * getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL) 00102 ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, 00103 ACEXML_SAXNotSupportedException)) ; 00104 00105 /* 00106 * Set the value of a property. 00107 */ 00108 virtual void setProperty (const ACEXML_Char *name, 00109 void *value ACEXML_ENV_ARG_DECL) 00110 ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException, 00111 ACEXML_SAXNotSupportedException)) ; 00112 00113 /* 00114 * Parse an XML document. 00115 */ 00116 virtual void parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL) 00117 ACE_THROW_SPEC ((ACEXML_SAXException)) ; 00118 00119 /* 00120 * Parse an XML document from a system identifier (URI). 00121 */ 00122 virtual void parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL) 00123 ACE_THROW_SPEC ((ACEXML_SAXException)) 00124 ; 00125 00126 /* 00127 * Allow an application to register a content event handler. 00128 */ 00129 virtual void setContentHandler (ACEXML_ContentHandler *handler); 00130 00131 /* 00132 * Allow an application to register a DTD event handler. 00133 */ 00134 virtual void setDTDHandler (ACEXML_DTDHandler *handler); 00135 00136 /* 00137 * Allow an application to register an entity resolver. 00138 */ 00139 virtual void setEntityResolver (ACEXML_EntityResolver *resolver); 00140 00141 /* 00142 * Allow an application to register an error event handler. 00143 */ 00144 virtual void setErrorHandler (ACEXML_ErrorHandler *handler); 00145 00146 00147 00148 protected: 00149 /** 00150 * Parse XML Prolog. 00151 */ 00152 void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL) 00153 ACE_THROW_SPEC ((ACEXML_SAXException)); 00154 00155 /** 00156 * Parse VersionInfo declaration. 00157 * 00158 */ 00159 void parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL) 00160 ACE_THROW_SPEC ((ACEXML_SAXException)); 00161 00162 /** 00163 * Parse a EncodingDecl declaration. 00164 * 00165 */ 00166 void parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL) 00167 ACE_THROW_SPEC ((ACEXML_SAXException)); 00168 00169 /** 00170 * Parse a XMLDecl declaration. 00171 * 00172 */ 00173 void parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL) 00174 ACE_THROW_SPEC ((ACEXML_SAXException)); 00175 00176 /** 00177 * Parse a TextDecl declaration. 00178 */ 00179 int parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL) 00180 ACE_THROW_SPEC ((ACEXML_SAXException)); 00181 00182 /** 00183 * Parse a PI statement. The first character encountered 00184 * should always be '?' in the PI prefix "@<?". 00185 * 00186 * @retval 0 on success, -1 otherwise. 00187 */ 00188 int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL) 00189 ACE_THROW_SPEC ((ACEXML_SAXException)); 00190 00191 /** 00192 * Parse the DOCTYPE declaration. The first character encountered 00193 * should always be 'D' in doctype prefix: "@<@!DOCTYPE". 00194 */ 00195 int parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL) 00196 ACE_THROW_SPEC ((ACEXML_SAXException)); 00197 00198 /** 00199 * Parse an XML element. The first character encountered should 00200 * be the first character of the element "Name". 00201 * 00202 * @param is_root If not 0, then we are expecting to see the "root" 00203 * element now, and the next element's name need to match the name 00204 * defined in DOCTYPE definition, i.e., @a this->doctype_. 00205 * 00206 * @todo Instead of simply checking for the root element based on the 00207 * argument @a is_root, we should instead either pass in some sort 00208 * of validator or allow the function to return the element name so it 00209 * can be used in a validator. 00210 */ 00211 void parse_element (int is_root ACEXML_ENV_ARG_DECL) 00212 ACE_THROW_SPEC ((ACEXML_SAXException)); 00213 00214 /** 00215 * Parse a content declaration. 00216 * 00217 */ 00218 int parse_content (const ACEXML_Char* startname, const ACEXML_Char*& ns_uri, 00219 const ACEXML_Char*& ns_lname, int ns_flag 00220 ACEXML_ENV_ARG_DECL) 00221 ACE_THROW_SPEC ((ACEXML_SAXException)); 00222 00223 /** 00224 * Parse a character reference, i.e., " " or "". The first 00225 * character encountered should be the '#' char. 00226 * 00227 * @param buf points to a character buffer for the result. 00228 * 00229 * @param len In/out argument which initially specifies the size of the 00230 * buffer and is later set to the no. of characters in the reference. 00231 * 00232 * @retval 0 on success and -1 otherwise. 00233 */ 00234 int parse_char_reference (ACEXML_Char *buf, size_t& len); 00235 00236 /** 00237 * Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first 00238 * character encountered should be the character following '&' or '%'. 00239 * Effectively the same as @sa parse_name but we don't use the parser's 00240 * obstack. Caller is responsible for deleting the memory. 00241 * 00242 * @retval A pointer to name of reference, 0 otherwise. 00243 */ 00244 ACEXML_Char* parse_reference_name (void); 00245 00246 /** 00247 * Parse a CDATA section. The first character should always be the first 00248 * '[' in CDATA definition. 00249 * 00250 * @retval 0 on success. 00251 * @retval -1 if fail. 00252 */ 00253 int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL) 00254 ACE_THROW_SPEC ((ACEXML_SAXException)); 00255 00256 /** 00257 * Parse a "markupdecl" section, this includes both "markupdecl" and 00258 * "DeclSep" sections in XML specification 00259 */ 00260 int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL) 00261 ACE_THROW_SPEC ((ACEXML_SAXException)); 00262 00263 /** 00264 * Skip over a comment. The first character encountered should always be 00265 * the first '-' in the comment prefix "@<@!--". 00266 */ 00267 int parse_comment (void); 00268 00269 /** 00270 * Parse an "ELEMENT" decl. The first character this method 00271 * expects is always the 'L' (the second char) in the word 00272 * "ELEMENT". 00273 * 00274 * @retval 0 on success, -1 otherwise. 00275 */ 00276 int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL) 00277 ACE_THROW_SPEC ((ACEXML_SAXException)); 00278 00279 /** 00280 * Parse an "ENTITY" decl. The first character this method expects 00281 * is always the 'N' (the second char) in the word "ENTITY". 00282 * 00283 * @retval 0 on success, -1 otherwise. 00284 */ 00285 int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL) 00286 ACE_THROW_SPEC ((ACEXML_SAXException)); 00287 00288 /** 00289 * Parse an "ATTLIST" decl. Thse first character this method 00290 * expects is always the 'A' (the first char) in the word 00291 * "ATTLIST". 00292 * 00293 * @retval 0 on success, -1 otherwise. 00294 */ 00295 int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL) 00296 ACE_THROW_SPEC ((ACEXML_SAXException)); 00297 00298 /** 00299 * Parse a AttType declaration. 00300 * 00301 */ 00302 int parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL) 00303 ACE_THROW_SPEC ((ACEXML_SAXException)); 00304 00305 /** 00306 *Parse a "NOTATION" decl. The first character this method 00307 * expects is always the 'N' (the first char) in the word 00308 * "NOTATION". 00309 * 00310 * @retval 0 on success, -1 otherwise. 00311 */ 00312 int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL) 00313 ACE_THROW_SPEC ((ACEXML_SAXException)); 00314 00315 /** 00316 * Parse an ExternalID or a reference to PUBLIC ExternalID. 00317 * Possible cases are in the forms of: <code> 00318 * 00319 * SYSTEM 'quoted string representing system resource' 00320 * PUBLIC 'quoted name of public ID' 'quoted resource' 00321 * PUBLIC 'quoted name we are referring to' 00322 * </code> 00323 * 00324 * The first character this function sees must be either 'S' or 'P'. 00325 * When the function finishes parsing, the input stream points 00326 * at the first non-whitespace character. 00327 * 00328 * @param publicId returns the unquoted publicId read. If none 00329 * is available, it will be reset to 0. 00330 * @param systemId returns the unquoted systemId read. If none 00331 * is available, it will be reset to 0. 00332 * 00333 * @retval 0 on success, -1 otherwise. 00334 */ 00335 int parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId 00336 ACEXML_ENV_ARG_DECL) 00337 ACE_THROW_SPEC ((ACEXML_SAXException)); 00338 00339 /** 00340 * Parse an external DTD. 00341 * 00342 */ 00343 int parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL) 00344 ACE_THROW_SPEC ((ACEXML_SAXException)); 00345 00346 /** 00347 * Parse an external subset. This does the actual parsing of an external 00348 * subset and is called by @sa parse_external_dtd. 00349 * 00350 */ 00351 int parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL) 00352 ACE_THROW_SPEC ((ACEXML_SAXException)); 00353 00354 /** 00355 * Parse a markupDecl section. 00356 * 00357 */ 00358 int parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL) 00359 ACE_THROW_SPEC ((ACEXML_SAXException)); 00360 00361 /** 00362 * Parse a conditionalSect declaration. 00363 * 00364 */ 00365 int parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL) 00366 ACE_THROW_SPEC ((ACEXML_SAXException)); 00367 00368 /** 00369 * Parse a includeSect declaration. 00370 * 00371 */ 00372 int parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL) 00373 ACE_THROW_SPEC ((ACEXML_SAXException)); 00374 00375 /** 00376 * 00377 * Parse a ignoreSect declaration. 00378 */ 00379 int parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL) 00380 ACE_THROW_SPEC ((ACEXML_SAXException)); 00381 00382 /** 00383 * Parse a PEReference. 00384 * 00385 */ 00386 int parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL) 00387 ACE_THROW_SPEC ((ACEXML_SAXException)); 00388 00389 /** 00390 * Parse a Reference. 00391 * 00392 */ 00393 int parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL) 00394 ACE_THROW_SPEC ((ACEXML_SAXException)); 00395 00396 /** 00397 * Parse an entityValue. 00398 * 00399 */ 00400 int parse_entity_value (ACEXML_Char *&str ACEXML_ENV_ARG_DECL) 00401 ACE_THROW_SPEC ((ACEXML_SAXException)); 00402 00403 /** 00404 * Parse a DefaultDecl specification. 00405 * 00406 */ 00407 int parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL) 00408 ACE_THROW_SPEC ((ACEXML_SAXException)); 00409 00410 00411 /** 00412 * Parse the "children" and "Mixed" non-terminals in contentspec. 00413 * 00414 * The first character this function sees must be the first 00415 * open paren '(' in children. 00416 * 00417 * @retval 0 on success, -1 otherwise. 00418 */ 00419 int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL) 00420 ACE_THROW_SPEC ((ACEXML_SAXException)); 00421 00422 /** 00423 * Parse a @c cp non-terminal. @c cp can either be a @c seq or a @c choice. 00424 * This function calls itself recursively. 00425 * 00426 * @param skip_open_paren when non-zero, it indicates that the open paren of 00427 * the @c seq or @c choice has already been removed from the input 00428 * stream. 00429 * 00430 * @retval 0 on success, -1 otherwise. 00431 */ 00432 int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL) 00433 ACE_THROW_SPEC ((ACEXML_SAXException)); 00434 00435 /** 00436 * Parse a name from the input CharStream. If @a ch @!= 0, then we have 00437 * already consumed the first name character from the input CharStream, 00438 * otherwise, parse_name will use this->get() to acquire the initial 00439 * character. 00440 * 00441 * @return A pointer to the string in the obstack, 0 if it's not a 00442 * valid name. 00443 */ 00444 ACEXML_Char *parse_name (ACEXML_Char ch = 0); 00445 00446 /** 00447 * Parse a NMTOKEN from the input stream. 00448 * 00449 * @return A pointer to the string in the obstack, 0 if it's not a valid 00450 * NMTOKEN. 00451 */ 00452 ACEXML_Char* parse_nmtoken (ACEXML_Char ch = 0); 00453 00454 /** 00455 * Parse the version string in an XML Prolog section. 00456 * 00457 * @param str String containing the version number if successful. 00458 * @return 0 if the string was read successfully, 0 otherwise. 00459 */ 00460 int parse_version (ACEXML_Char*& str); 00461 00462 /** 00463 * Parse the version number in a VersionInfo declaration. 00464 */ 00465 int parse_version_num (ACEXML_Char*& str); 00466 00467 /** 00468 * Parse the encoding name in an XML Prolog section. 00469 * 00470 * @param str String containing the encoding name if successful. 00471 * @return 0 if the string was read successfully, 0 otherwise. 00472 */ 00473 int parse_encname (ACEXML_Char*& str); 00474 00475 /** 00476 * Parse a SDDecl string. 00477 * 00478 * @param str String containing the encoding name if successful. 00479 * @return 0 if the string was read successfully, -1 otherwise. 00480 */ 00481 int parse_sddecl (ACEXML_Char*& str); 00482 00483 /** 00484 * Parse an attribute name. 00485 * 00486 * @retval str String containing the value of the attribute name 00487 * if successful. 00488 * @retval 0 otherwise. 00489 */ 00490 ACEXML_Char* parse_attname (ACEXML_ENV_SINGLE_ARG_DECL) 00491 ACE_THROW_SPEC ((ACEXML_SAXException)); 00492 00493 /** 00494 * Parse an attribute value. 00495 * 00496 * @param str String containing the value of the attribute if successful. 00497 * @return 0 if attribute value was read successfully, -1 otherwise. 00498 */ 00499 int parse_attvalue (ACEXML_Char*& str ACEXML_ENV_ARG_DECL) 00500 ACE_THROW_SPEC ((ACEXML_SAXException)); 00501 00502 /** 00503 * Parse a tokenized type attribute. 00504 * 00505 * @return 0 if attribute type was read successfully, -1 otherwise. 00506 */ 00507 int parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_DECL) 00508 ACE_THROW_SPEC ((ACEXML_SAXException)); 00509 00510 /** 00511 * Parse a SystemLiteral. 00512 * 00513 * @param str String containing the SystemLiteral if successful. 00514 * @return 0 if the string was read successfully, 0 otherwise. 00515 */ 00516 int parse_system_literal (ACEXML_Char*& str); 00517 00518 /** 00519 * Parse a PubidLiteral. 00520 * 00521 * @param str String containing the PubidLiteral if successful. 00522 * @return 0 if the string was read successfully, 0 otherwise. 00523 */ 00524 int parse_pubid_literal (ACEXML_Char*& str); 00525 00526 /** 00527 * Check if a character @a c is a whitespace. 00528 * 00529 * @retval 1 if @a c is a valid white space character. 0 otherwise. 00530 */ 00531 int is_whitespace (const ACEXML_Char c) const; 00532 00533 /** 00534 * Check if a character @a c is a valid Char. 00535 * 00536 * @retval 1 if @a c is a valid character. 0 otherwise. 00537 */ 00538 int isChar (ACEXML_UCS4 c) const; 00539 00540 /** 00541 * Check if a character @a c is a valid CharRef character. 00542 * 00543 * @retval 1 if @a c is a valid character reference character, 0 otherwise. 00544 */ 00545 int isCharRef (const ACEXML_Char c) const; 00546 00547 /** 00548 * Check if a character @a c is a BaseChar. 00549 * 00550 * @retval 1 if @a c is a valid BaseChar character, 0 otherwise. 00551 */ 00552 int isBasechar (const ACEXML_Char c) const; 00553 00554 /** 00555 * Check if a character @a c is a Ideographic. 00556 * 00557 * @retval 1 if @a c is a valid Ideographic character, 0 otherwise. 00558 */ 00559 int isIdeographic (const ACEXML_Char c) const; 00560 00561 /** 00562 * Check if a character @a c is a CombiningChar. 00563 * 00564 * @retval 1 if @a c is a valid CombiningChar character, 0 otherwise. 00565 */ 00566 int isCombiningchar (const ACEXML_Char c) const; 00567 00568 /** 00569 * Check if a character @a c is a Digit. 00570 * 00571 * @retval 1 if @a c is a valid Digit character, 0 otherwise. 00572 */ 00573 int isDigit (const ACEXML_Char c) const; 00574 00575 /** 00576 * Check if a character @a c is an Extender. 00577 * 00578 * @retval 1 if @a c is a valid Extender character, 0 otherwise. 00579 */ 00580 int isExtender (const ACEXML_Char c) const; 00581 00582 /** 00583 * Check if a character @a c is a Letter. 00584 * 00585 * @retval 1 if @a c is a valid Letter character, 0 otherwise. 00586 */ 00587 int isLetter (const ACEXML_Char c) const; 00588 00589 /** 00590 * Check if a character is an acceptable NameChar. 00591 * 00592 * @retval 1 if @a c is a valid NameChar character, 0 otherwise. 00593 */ 00594 int isNameChar (const ACEXML_Char c) const; 00595 00596 /** 00597 * Check if a character is a PubidChar. 00598 * 00599 * @retval 1 if @a c is a valid PubidChar character, 0 otherwise. 00600 */ 00601 int isPubidChar (const ACEXML_Char c) const; 00602 00603 /// Get a character. 00604 virtual ACEXML_Char get (void); 00605 00606 /// Peek a character. 00607 virtual ACEXML_Char peek (void); 00608 00609 private: 00610 00611 // *** Helper functions for parsing XML 00612 00613 /** 00614 * Skip any whitespaces encountered until the first non-whitespace 00615 * character is encountered. 00616 * 00617 * @return The next non-whitespace character from the CharStream. 00618 * 00619 * @sa skip_whitespace_count 00620 */ 00621 ACEXML_Char skip_whitespace (void); 00622 00623 /** 00624 * Skip any whitespaces encountered until the first non-whitespace 00625 * character. The first non-whitespace character is not consumed. 00626 * This method does peek into the input CharStream and therefore 00627 * is more expensive than @ref skip_whitespace. 00628 * 00629 * @param peek If non-null, @a peek points to a ACEXML_Char where 00630 * skip_whitespace_count stores the first non-whitespace 00631 * character it sees (character is not removed from the stream.) 00632 * 00633 * @return The number of whitespace characters consumed. 00634 * 00635 * @sa skip_whitespace 00636 */ 00637 int skip_whitespace_count (ACEXML_Char *peek = 0); 00638 00639 /** 00640 * Skip an equal sign. 00641 * 00642 * @retval 0 when succeeds, -1 if no equal sign is found. 00643 */ 00644 int skip_equal (void); 00645 00646 /** 00647 * Get a quoted string. Quoted strings are used to specify 00648 * attribute values and this routine will replace character and 00649 * entity references on-the-fly. Parameter entities are not allowed 00650 * (or replaced) in this function. (But regular entities are.) 00651 * 00652 * @param str returns the un-quoted string. 00653 * 00654 * @retval 0 on success, -1 otherwise. 00655 */ 00656 int get_quoted_string (ACEXML_Char *&str); 00657 00658 /** 00659 * Check if a character @a c is a Digit. 00660 * 00661 * @retval 1 if @a c is a valid Digit character, 0 otherwise. 00662 */ 00663 int isNormalDigit (const ACEXML_Char c) const; 00664 00665 /** 00666 * Dispatch errors to ErrorHandler. 00667 * 00668 */ 00669 void error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) 00670 ACE_THROW_SPEC ((ACEXML_SAXException)); 00671 00672 /** 00673 * Dispatch warnings to ErrorHandler. 00674 * 00675 */ 00676 void warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) 00677 ACE_THROW_SPEC ((ACEXML_SAXException)); 00678 00679 /** 00680 * Dispatch fatal errors to ErrorHandler. 00681 * 00682 */ 00683 void fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL) 00684 ACE_THROW_SPEC ((ACEXML_SAXException)); 00685 00686 /** 00687 * Dispatch prefix mapping calls to the ContentHandler. 00688 * 00689 * @param prefix Namespace prefix 00690 * @param uri Namespace URI 00691 * @param name Local name 00692 * @param start 1 => startPrefixMapping 0 => endPrefixMapping 00693 */ 00694 void prefix_mapping (const ACEXML_Char* prefix, 00695 const ACEXML_Char* uri, 00696 int start ACEXML_ENV_ARG_DECL) 00697 ACE_THROW_SPEC ((ACEXML_SAXException)); 00698 /** 00699 * Parse a keyword. 00700 */ 00701 int parse_token (const ACEXML_Char* keyword); 00702 00703 /** 00704 * Push the current context on to the stack. 00705 * 00706 */ 00707 int push_context (ACEXML_Parser_Context* context); 00708 00709 /** 00710 * Pop the top element in the stack and replace current context with that. 00711 */ 00712 int pop_context (int GE_ref ACEXML_ENV_ARG_DECL); 00713 00714 /** 00715 * Create a new ACEXML_CharStream from @a systemId and @a publicId and 00716 * replace the current input stream with the newly created stream. 00717 */ 00718 virtual int switch_input (ACEXML_CharStream* cstream, 00719 const ACEXML_Char* systemId, 00720 const ACEXML_Char* publicId = 0); 00721 /** 00722 * Create a new ACEXML_InputSource from @a systemId and @a publicId and 00723 * replace the current input source with the newly created InputSource. 00724 */ 00725 virtual int switch_input (ACEXML_InputSource* input, 00726 const ACEXML_Char* systemId, 00727 const ACEXML_Char* publicId = 0); 00728 00729 /** 00730 * Check for a parameter entity reference. This is used to check for the 00731 * occurence of a PE Reference withing markupDecl. Additionally this 00732 * function consumes any leading or trailing whitespace around the PE 00733 * Reference. 00734 * 00735 * @retval Number of whitespace characters skipped. 00736 */ 00737 int check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL); 00738 00739 /** 00740 * Reset the parser state. 00741 * 00742 */ 00743 void reset (void); 00744 00745 /** 00746 * Very trivial, non-conformant normalization of a systemid. 00747 * 00748 */ 00749 ACEXML_Char* normalize_systemid (const ACEXML_Char* systemId); 00750 00751 // Feature names: 00752 00753 /** 00754 * \addtogroup acexml_parser_features 00755 * @{ 00756 */ 00757 00758 /** 00759 * @var simple_parsing_feature_ 00760 * 00761 * This constant string defines the name of "simple XML parsing" 00762 * feature. When this feature is enabled, ACEXML parser is allowed 00763 * to parse a simple XML stream without mandated XML prolog 00764 * and no DTD defintion. 00765 */ 00766 static const ACEXML_Char simple_parsing_feature_[]; 00767 00768 /** 00769 * @var namespaces_feature_ 00770 * 00771 * This constant string defines the SAX XML Namespace feature. When this 00772 * feature is enabled, ACEXML parser allows access by namespace qualified 00773 * names. 00774 */ 00775 static const ACEXML_Char namespaces_feature_[]; 00776 00777 /** 00778 * @var namespace_prefixes_feature_ 00779 * 00780 * This constant string defines the SAX XML Namespace prefixes feature. 00781 * Normally the list of attributes returned by the parser will not 00782 * contain attributes used as namespace declarations (xmlns*). When this 00783 * feature is enabled, the list of attributes contains the namespace 00784 * declarations also. 00785 */ 00786 static const ACEXML_Char namespace_prefixes_feature_[]; 00787 00788 /** 00789 * @var validation_feature_ 00790 * 00791 * This constant string defines the SAX XML Validation feature. When 00792 * this feature is enabled, the parser validates the document in 00793 * addition to checking for well-formedness. 00794 */ 00795 static const ACEXML_Char validation_feature_[]; 00796 00797 /* @} */ 00798 00799 /// Keeping track of the handlers. We do not manage the memory for 00800 /// handlers. 00801 ACEXML_DTDHandler *dtd_handler_; 00802 ACEXML_EntityResolver *entity_resolver_; 00803 ACEXML_ContentHandler *content_handler_; 00804 ACEXML_ErrorHandler *error_handler_; 00805 00806 /// Document Type 00807 ACEXML_Char *doctype_; 00808 00809 /// Current parser context 00810 ACEXML_Parser_Context* current_; 00811 00812 /// Stack used to hold the Parser_Context 00813 ACE_Unbounded_Stack<ACEXML_Parser_Context*> ctx_stack_; 00814 00815 /* 00816 * The following two are essentially chains of references and is used by 00817 * the parser to determine if there is any recursion. We keep two of 00818 * these one for general entities and one for parameter entities, as they 00819 * both fall under different namespaces. 00820 * 00821 */ 00822 /// Set used to hold the general entity references that are active. 00823 ACE_Unbounded_Stack<ACEXML_Char*> GE_reference_; 00824 00825 /// Set used to hold the parameter entity references that are active. 00826 ACE_Unbounded_Stack<ACEXML_Char*> PE_reference_; 00827 00828 /// Obstack used by the parser to hold all the strings parsed 00829 ACE_Obstack_T<ACEXML_Char> obstack_; 00830 00831 /// Alternative obstack used to hold any strings when the original is in use 00832 ACE_Obstack_T<ACEXML_Char> alt_stack_; 00833 00834 /// Namespace stack used by the parser to implement support for Namespaces 00835 ACEXML_NamespaceSupport xml_namespace_; 00836 00837 /// T => We are processing a nested namespace 00838 int nested_namespace_; 00839 00840 /// Set of internal parsed general entities in the document 00841 ACEXML_Entity_Manager internal_GE_; 00842 00843 /// Set of external parsed general entities in the document 00844 ACEXML_Entity_Manager external_GE_; 00845 00846 /// Set of unparsed entities in the document 00847 ACEXML_Entity_Manager unparsed_entities_; 00848 00849 /// Set of predefined entities used by the parser 00850 ACEXML_Entity_Manager predef_entities_; 00851 00852 /// Set of internal parsed parameter entities in the document 00853 ACEXML_Entity_Manager internal_PE_; 00854 00855 /// Set of external parsed parameter entities in the document 00856 ACEXML_Entity_Manager external_PE_; 00857 00858 /// Set of notations declared in the document 00859 ACEXML_Entity_Manager notations_; 00860 00861 /// State of the parser when it encounters a reference. 00862 ACEXML_ParserInt::ReferenceState ref_state_; 00863 00864 /// T => We are parsing an external subset 00865 int external_subset_; 00866 00867 /// T => We are parsing an external entity value 00868 int external_entity_; 00869 00870 /// T => Internal DTD has parameter entity references 00871 int has_pe_refs_; 00872 00873 /// If set, the document is a standalone XML document 00874 int standalone_; 00875 00876 /// If set, the document has an external DTD subset 00877 int external_dtd_; 00878 00879 /// If set, the document has an internal DTD 00880 int internal_dtd_; 00881 00882 /// Feature flags 00883 /// If set, the parser should parse a document without a prolog 00884 int simple_parsing_; 00885 00886 /// If set, the parser should also validate 00887 int validate_; 00888 00889 /// If set, the parser should allow access by namespace qualified names. 00890 int namespaces_; 00891 00892 /// If set, the parser should include namespace declarations in the list 00893 /// of attributes of an element. 00894 int namespace_prefixes_; 00895 00896 }; 00897 00898 #if defined (__ACEXML_INLINE__) 00899 # include "ACEXML/parser/parser/Parser.i" 00900 #endif /* __ACEXML_INLINE__ */ 00901 00902 #include /**/ "ace/post.h" 00903 00904 #endif /* _ACEXML_BASIC_PARSER_H_ */