Parser.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 //=============================================================================
00004 /**
00005  *  @file    Parser.h
00006  *
00007  *  $Id: Parser.h 78907 2007-07-16 10:22:58Z sowayaa $
00008  *
00009  *  @author Nanbor Wang <nanbor@cs.wustl.edu>
00010  *  @author Krishnakumar B <kitty@cs.wustl.edu>
00011  */
00012 //=============================================================================
00013 
00014 #ifndef _ACEXML_BASIC_PARSER_H_
00015 #define _ACEXML_BASIC_PARSER_H_
00016 
00017 #include /**/ "ace/pre.h"
00018 #include "ACEXML/parser/parser/Parser_export.h"
00019 
00020 #if !defined (ACE_LACKS_PRAGMA_ONCE)
00021 #pragma once
00022 #endif /* ACE_LACKS_PRAGMA_ONCE */
00023 
00024 #include "ACEXML/common/XMLReader.h"
00025 #include "ACEXML/common/LocatorImpl.h"
00026 #include "ACEXML/common/NamespaceSupport.h"
00027 #include "ACEXML/common/CharStream.h"
00028 #include "ace/Obstack.h"
00029 #include "ace/Functor.h"
00030 #include "ace/SString.h"
00031 #include "ace/Hash_Map_Manager.h"
00032 #include "ace/Unbounded_Set.h"
00033 #include "ace/Containers_T.h"
00034 #include "ace/Auto_Ptr.h"
00035 #include "ACEXML/parser/parser/Entity_Manager.h"
00036 #include "ACEXML/parser/parser/ParserInternals.h"
00037 #include "ACEXML/parser/parser/ParserContext.h"
00038 
00039 /**
00040  * @class ACEXML_Parser Parser.h "ACEXML/parser/parser/Parser.h"
00041  *
00042  * @brief A SAX based parser.
00043  *
00044  */
00045 class ACEXML_PARSER_Export ACEXML_Parser : public ACEXML_XMLReader
00046 {
00047 public:
00048   /// Default constructor.
00049   ACEXML_Parser (void);
00050 
00051   /// Destructor.
00052   virtual ~ACEXML_Parser (void);
00053 
00054   /**
00055    *  Initialize the parser state.
00056    *
00057    *  @retval 0 if parser was initialized correctly else -1.
00058    */
00059   int initialize (ACEXML_InputSource* input);
00060 
00061    /**
00062    * Return the current content handler.
00063    */
00064   virtual ACEXML_ContentHandler *getContentHandler (void) const;
00065 
00066   /*
00067    * Return the current DTD handler.
00068    */
00069   virtual ACEXML_DTDHandler *getDTDHandler (void) const;
00070 
00071   /*
00072    * Return the current entity resolver.
00073    */
00074   virtual ACEXML_EntityResolver *getEntityResolver (void) const;
00075 
00076   /*
00077    * Return the current error handler.
00078    */
00079   virtual ACEXML_ErrorHandler *getErrorHandler (void) const;
00080 
00081   /**
00082    * Look up the value of a feature.  This method allows
00083    * programmers to check whether a specific feature has been
00084    * activated in the parser.
00085    */
00086   virtual int getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL);
00087 
00088   /**
00089    * Activating or deactivating a feature.
00090    */
00091   virtual void setFeature (const ACEXML_Char *name,
00092                            int boolean_value ACEXML_ENV_ARG_DECL);
00093 
00094   /*
00095    * Look up the value of a property.
00096    */
00097   virtual void * getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL);
00098 
00099   /*
00100    * Set the value of a property.
00101    */
00102   virtual void setProperty (const ACEXML_Char *name,
00103                             void *value ACEXML_ENV_ARG_DECL);
00104 
00105   /*
00106    * Parse an XML document.
00107    */
00108   virtual void parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL);
00109 
00110   /*
00111    * Parse an XML document from a system identifier (URI).
00112    */
00113   virtual void parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL)
00114     ;
00115 
00116   /*
00117    * Allow an application to register a content event handler.
00118    */
00119   virtual void setContentHandler (ACEXML_ContentHandler *handler);
00120 
00121   /*
00122    * Allow an application to register a DTD event handler.
00123    */
00124   virtual void setDTDHandler (ACEXML_DTDHandler *handler);
00125 
00126   /*
00127    * Allow an application to register an entity resolver.
00128    */
00129   virtual void setEntityResolver (ACEXML_EntityResolver *resolver);
00130 
00131   /*
00132    * Allow an application to register an error event handler.
00133    */
00134   virtual void setErrorHandler (ACEXML_ErrorHandler *handler);
00135 
00136 
00137 
00138 protected:
00139   /**
00140    * Parse XML Prolog.
00141    */
00142   void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL);
00143 
00144   /**
00145    * Parse VersionInfo declaration.
00146    *
00147    */
00148   void parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL);
00149 
00150   /**
00151    *  Parse a EncodingDecl declaration.
00152    *
00153    */
00154   void parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL);
00155 
00156   /**
00157    *  Parse a XMLDecl declaration.
00158    *
00159    */
00160   void parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL);
00161 
00162   /**
00163    *  Parse a TextDecl declaration.
00164    */
00165   int parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL);
00166 
00167   /**
00168    * Parse a PI statement.  The first character encountered
00169    * should always be '?' in the PI prefix "@<?".
00170    *
00171    * @retval 0 on success, -1 otherwise.
00172    */
00173   int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL);
00174 
00175   /**
00176    * Parse the DOCTYPE declaration.  The first character encountered
00177    * should always be  'D' in doctype prefix: "@<@!DOCTYPE".
00178    */
00179   int parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL);
00180 
00181   /**
00182    * Parse an XML element.  The first character encountered should
00183    * be the first character of the element "Name".
00184    *
00185    * @param is_root If not 0, then we are expecting to see the "root"
00186    * element now, and the next element's name need to match the name
00187    * defined in DOCTYPE definition, i.e., @a this->doctype_.
00188    *
00189    * @todo Instead of simply checking for the root element based on the
00190    * argument @a is_root, we should instead either pass in some sort
00191    * of validator or allow the function to return the element name so it
00192    * can be used in a validator.
00193    */
00194   void parse_element (int is_root ACEXML_ENV_ARG_DECL);
00195 
00196   /**
00197    *  Parse a content declaration.
00198    *
00199    */
00200   int parse_content (const ACEXML_Char* startname, const ACEXML_Char*& ns_uri,
00201                      const ACEXML_Char*& ns_lname, int ns_flag
00202                      ACEXML_ENV_ARG_DECL);
00203 
00204   /**
00205    * Parse a character reference, i.e., "&#x20;" or "&#30;".   The first
00206    * character encountered should be the '#' char.
00207    *
00208    * @param buf points to a character buffer for the result.
00209    *
00210    * @param len In/out argument which initially specifies the size of the
00211    * buffer and is later set to the no. of characters in the reference.
00212    *
00213    * @retval 0 on success and -1 otherwise.
00214    */
00215   int parse_char_reference (ACEXML_Char *buf, size_t& len);
00216 
00217   /**
00218    * Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first
00219    * character encountered should be the character following '&' or '%'.
00220    * Effectively the same as @sa parse_name but we don't use the parser's
00221    * obstack. Caller is responsible for deleting the memory.
00222    *
00223    * @retval  A pointer to name of reference, 0 otherwise.
00224    */
00225   ACEXML_Char* parse_reference_name (void);
00226 
00227   /**
00228    * Parse a CDATA section.  The first character should always be the first
00229    * '[' in CDATA definition.
00230    *
00231    * @retval 0 on success.
00232    * @retval -1 if fail.
00233    */
00234   int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL);
00235 
00236   /**
00237    * Parse a "markupdecl" section, this includes both "markupdecl" and
00238    * "DeclSep" sections in XML specification
00239    */
00240   int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL);
00241 
00242   /**
00243    *  Skip over a comment. The first character encountered should always be
00244    *  the first '-' in the comment prefix "@<@!--".
00245    */
00246   int parse_comment (void);
00247 
00248   /**
00249    * Parse an "ELEMENT" decl.  The first character this method
00250    * expects is always the 'L' (the second char) in the word
00251    * "ELEMENT".
00252    *
00253    * @retval 0 on success, -1 otherwise.
00254    */
00255   int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL);
00256 
00257   /**
00258    * Parse an "ENTITY" decl.  The first character this method expects
00259    * is always the 'N' (the second char) in the word "ENTITY".
00260    *
00261    * @retval 0 on success, -1 otherwise.
00262    */
00263   int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL);
00264 
00265   /**
00266    * Parse an "ATTLIST" decl.  Thse first character this method
00267    * expects is always the 'A' (the first char) in the word
00268    * "ATTLIST".
00269    *
00270    * @retval 0 on success, -1 otherwise.
00271    */
00272   int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL);
00273 
00274   /**
00275    * Parse a AttType declaration.
00276    *
00277    */
00278   int parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL);
00279 
00280   /**
00281    *Parse a "NOTATION" decl.  The first character this method
00282    * expects is always the 'N' (the first char) in the word
00283    * "NOTATION".
00284    *
00285    * @retval 0 on success, -1 otherwise.
00286    */
00287   int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL);
00288 
00289   /**
00290    * Parse an ExternalID or a reference to PUBLIC ExternalID.
00291    * Possible cases are in the forms of: <code>
00292    *
00293    * SYSTEM 'quoted string representing system resource'
00294    * PUBLIC 'quoted name of public ID' 'quoted resource'
00295    * PUBLIC 'quoted name we are referring to'
00296    * </code>
00297    *
00298    * The first character this function sees must be either 'S' or 'P'.
00299    * When the function finishes parsing, the input stream points
00300    * at the first non-whitespace character.
00301    *
00302    * @param publicId returns the unquoted publicId read.  If none
00303    *        is available, it will be reset to 0.
00304    * @param systemId returns the unquoted systemId read.  If none
00305    *        is available, it will be reset to 0.
00306    *
00307    * @retval 0 on success, -1 otherwise.
00308    */
00309   int parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId
00310                          ACEXML_ENV_ARG_DECL);
00311 
00312   /**
00313    *  Parse an external DTD.
00314    *
00315    */
00316   int parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL);
00317 
00318   /**
00319    *  Parse an external subset. This does the actual parsing of an external
00320    *  subset and is called by @sa parse_external_dtd.
00321    *
00322    */
00323   int parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL);
00324 
00325   /**
00326    *  Parse a markupDecl section.
00327    *
00328    */
00329   int parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL);
00330 
00331   /**
00332    *  Parse a conditionalSect declaration.
00333    *
00334    */
00335   int parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL);
00336 
00337   /**
00338    *  Parse a includeSect declaration.
00339    *
00340    */
00341   int parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL);
00342 
00343   /**
00344    *
00345    *  Parse a ignoreSect declaration.
00346    */
00347   int parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL);
00348 
00349   /**
00350    * Parse a PEReference.
00351    *
00352    */
00353   int parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL);
00354 
00355   /**
00356    *  Parse a Reference.
00357    *
00358    */
00359   int parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL);
00360 
00361   /**
00362    *  Parse an entityValue.
00363    *
00364    */
00365   int parse_entity_value (ACEXML_Char *&str ACEXML_ENV_ARG_DECL);
00366 
00367   /**
00368    *  Parse a DefaultDecl specification.
00369    *
00370    */
00371   int parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL);
00372 
00373 
00374   /**
00375    * Parse the "children" and "Mixed" non-terminals in contentspec.
00376    *
00377    * The first character this function sees must be the first
00378    * open paren '(' in children.
00379    *
00380    * @retval 0 on success, -1 otherwise.
00381    */
00382   int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL);
00383 
00384   /**
00385    * Parse a @c cp non-terminal.  @c cp can either be a @c seq or a @c choice.
00386    * This function calls itself recursively.
00387    *
00388    * @param skip_open_paren when non-zero, it indicates that the open paren of
00389    *        the @c seq or @c choice has already been removed from the input
00390    *        stream.
00391    *
00392    * @retval 0 on success, -1 otherwise.
00393    */
00394   int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL);
00395 
00396   /**
00397    *  Parse a name from the input CharStream. If @a ch @!= 0, then we have
00398    *  already consumed the first name character from the input CharStream,
00399    *  otherwise, parse_name will use this->get() to acquire the initial
00400    *  character.
00401    *
00402    *  @return A pointer to the string in the obstack, 0 if it's not a
00403    *  valid name.
00404    */
00405   ACEXML_Char *parse_name (ACEXML_Char ch = 0);
00406 
00407   /**
00408    *  Parse a NMTOKEN from the input stream.
00409    *
00410    * @return A pointer to the string in the obstack, 0 if it's not a valid
00411    * NMTOKEN.
00412    */
00413   ACEXML_Char* parse_nmtoken (ACEXML_Char ch = 0);
00414 
00415   /**
00416    *  Parse the version string in an XML Prolog section.
00417    *
00418    *  @param str String containing the version number if successful.
00419    *  @return 0 if the string was read successfully, 0 otherwise.
00420    */
00421   int parse_version (ACEXML_Char*& str);
00422 
00423   /**
00424    *  Parse the version number in a VersionInfo declaration.
00425    */
00426   int parse_version_num (ACEXML_Char*& str);
00427 
00428   /**
00429    *  Parse the encoding name in an XML Prolog section.
00430    *
00431    *  @param str String containing the encoding name if successful.
00432    *  @return 0 if the string was read successfully, 0 otherwise.
00433    */
00434   int parse_encname (ACEXML_Char*& str);
00435 
00436   /**
00437    *  Parse a SDDecl string.
00438    *
00439    *  @param str String containing the encoding name if successful.
00440    *  @return 0 if the string was read successfully, -1 otherwise.
00441    */
00442   int parse_sddecl (ACEXML_Char*& str);
00443 
00444   /**
00445    *  Parse an attribute name.
00446    *
00447    *  @retval str String containing the value of the attribute name
00448    *             if successful.
00449    *  @retval 0 otherwise.
00450    */
00451   ACEXML_Char* parse_attname (ACEXML_ENV_SINGLE_ARG_DECL);
00452 
00453   /**
00454    *  Parse an attribute value.
00455    *
00456    *  @param str String containing the value of the attribute if successful.
00457    *  @return 0 if attribute value was read successfully, -1 otherwise.
00458    */
00459   int parse_attvalue (ACEXML_Char*& str ACEXML_ENV_ARG_DECL);
00460 
00461   /**
00462    *  Parse a tokenized type attribute.
00463    *
00464    *  @return 0 if attribute type was read successfully, -1 otherwise.
00465    */
00466   int parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_DECL);
00467 
00468   /**
00469    *  Parse a SystemLiteral.
00470    *
00471    *  @param str String containing the SystemLiteral if successful.
00472    *  @return 0 if the string was read successfully, 0 otherwise.
00473    */
00474   int parse_system_literal (ACEXML_Char*& str);
00475 
00476    /**
00477    *  Parse a PubidLiteral.
00478    *
00479    *  @param str String containing the PubidLiteral if successful.
00480    *  @return 0 if the string was read successfully, 0 otherwise.
00481    */
00482   int parse_pubid_literal (ACEXML_Char*& str);
00483 
00484   /**
00485    * Check if a character @a c is a whitespace.
00486    *
00487    * @retval 1 if @a c is a valid white space character. 0 otherwise.
00488    */
00489   int is_whitespace (const ACEXML_Char c) const;
00490 
00491   /**
00492    * Check if a character @a c is a valid Char.
00493    *
00494    * @retval 1 if @a c is a valid character. 0 otherwise.
00495    */
00496   int isChar (ACEXML_UCS4 c) const;
00497 
00498   /**
00499    * Check if a character @a c is a valid CharRef character.
00500    *
00501    * @retval 1 if @a c is a valid character reference character, 0 otherwise.
00502    */
00503   int isCharRef (const ACEXML_Char c) const;
00504 
00505   /**
00506    * Check if a character @a c is a BaseChar.
00507    *
00508    * @retval 1 if @a c is a valid BaseChar character, 0 otherwise.
00509    */
00510   int isBasechar (const ACEXML_Char c) const;
00511 
00512   /**
00513    * Check if a character @a c is a Ideographic.
00514    *
00515    * @retval 1 if @a c is a valid Ideographic character, 0 otherwise.
00516    */
00517   int isIdeographic (const ACEXML_Char c) const;
00518 
00519   /**
00520    * Check if a character @a c is a CombiningChar.
00521    *
00522    * @retval 1 if @a c is a valid CombiningChar character, 0 otherwise.
00523    */
00524   int isCombiningchar (const ACEXML_Char c) const;
00525 
00526   /**
00527    * Check if a character @a c is a Digit.
00528    *
00529    * @retval 1 if @a c is a valid Digit character, 0 otherwise.
00530    */
00531   int isDigit (const ACEXML_Char c) const;
00532 
00533   /**
00534    * Check if a character @a c is an Extender.
00535    *
00536    * @retval 1 if @a c is a valid Extender character, 0 otherwise.
00537    */
00538   int isExtender (const ACEXML_Char c) const;
00539 
00540   /**
00541    * Check if a character @a c is a Letter.
00542    *
00543    * @retval 1 if @a c is a valid Letter character, 0 otherwise.
00544    */
00545   int isLetter (const ACEXML_Char c) const;
00546 
00547   /**
00548    * Check if a character is an acceptable NameChar.
00549    *
00550    * @retval 1 if @a c is a valid NameChar character, 0 otherwise.
00551    */
00552   int isNameChar (const ACEXML_Char c) const;
00553 
00554   /**
00555    * Check if a character is a PubidChar.
00556    *
00557    * @retval 1 if @a c is a valid PubidChar character, 0 otherwise.
00558    */
00559   int isPubidChar (const ACEXML_Char c) const;
00560 
00561   /// Get a character.
00562   virtual ACEXML_Char get (void);
00563 
00564   /// Peek a character.
00565   virtual ACEXML_Char peek (void);
00566 
00567 private:
00568 
00569   // *** Helper functions for parsing XML
00570 
00571   /**
00572    * Skip any whitespaces encountered until the first non-whitespace
00573    * character is encountered.
00574    *
00575    * @return The next non-whitespace character from the CharStream.
00576    *
00577    * @sa skip_whitespace_count
00578    */
00579   ACEXML_Char skip_whitespace (void);
00580 
00581   /**
00582    * Skip any whitespaces encountered until the first non-whitespace
00583    * character.  The first non-whitespace character is not consumed.
00584    * This method does peek into the input CharStream and therefore
00585    * is more expensive than @ref skip_whitespace.
00586    *
00587    * @param peek If non-null, @a peek points to a ACEXML_Char where
00588    *        skip_whitespace_count stores the first non-whitespace
00589    *        character it sees (character is not removed from the stream.)
00590    *
00591    * @return The number of whitespace characters consumed.
00592    *
00593    * @sa skip_whitespace
00594    */
00595   int skip_whitespace_count (ACEXML_Char *peek = 0);
00596 
00597   /**
00598    * Skip an equal sign.
00599    *
00600    * @retval 0 when succeeds, -1 if no equal sign is found.
00601    */
00602   int skip_equal (void);
00603 
00604   /**
00605    * Get a quoted string.  Quoted strings are used to specify
00606    * attribute values and this routine will replace character and
00607    * entity references on-the-fly.  Parameter entities are not allowed
00608    * (or replaced) in this function.  (But regular entities are.)
00609    *
00610    * @param str returns the un-quoted string.
00611    *
00612    * @retval 0 on success, -1 otherwise.
00613    */
00614   int get_quoted_string (ACEXML_Char *&str);
00615 
00616   /**
00617    * Check if a character @a c is a Digit.
00618    *
00619    * @retval 1 if @a c is a valid Digit character, 0 otherwise.
00620    */
00621   int isNormalDigit (const ACEXML_Char c) const;
00622 
00623   /**
00624    * Dispatch errors to ErrorHandler.
00625    *
00626    */
00627   void error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL);
00628 
00629   /**
00630    * Dispatch warnings to ErrorHandler.
00631    *
00632    */
00633   void warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL);
00634 
00635   /**
00636    * Dispatch fatal errors to ErrorHandler.
00637    *
00638    */
00639   void fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL);
00640 
00641   /**
00642    * Dispatch prefix mapping calls to the ContentHandler.
00643    *
00644    * @param prefix Namespace prefix
00645    * @param uri Namespace URI
00646    * @param name Local name
00647    * @param start 1 => startPrefixMapping 0 => endPrefixMapping
00648    */
00649   void prefix_mapping (const ACEXML_Char* prefix,
00650                        const ACEXML_Char* uri,
00651                        int start ACEXML_ENV_ARG_DECL);
00652   /**
00653    *  Parse a keyword.
00654    */
00655   int parse_token (const ACEXML_Char* keyword);
00656 
00657   /**
00658    *  Push the current context on to the stack.
00659    *
00660    */
00661   int push_context (ACEXML_Parser_Context* context);
00662 
00663   /**
00664    *  Pop the top element in the stack and replace current context with that.
00665    */
00666   size_t pop_context (int GE_ref ACEXML_ENV_ARG_DECL);
00667 
00668   /**
00669    *  Create a new ACEXML_CharStream from @a systemId and @a publicId and
00670    *  replace the current input stream with the newly created stream.
00671    */
00672   virtual int switch_input (ACEXML_CharStream* cstream,
00673                             const ACEXML_Char* systemId,
00674                             const ACEXML_Char* publicId = 0);
00675   /**
00676    *  Create a new ACEXML_InputSource from @a systemId and @a publicId and
00677    *  replace the current input source with the newly created InputSource.
00678    */
00679   virtual int switch_input (ACEXML_InputSource* input,
00680                             const ACEXML_Char* systemId,
00681                             const ACEXML_Char* publicId = 0);
00682 
00683   /**
00684    * Check for a parameter entity reference. This is used to check for the
00685    * occurence of a PE Reference withing markupDecl. Additionally this
00686    * function consumes any leading or trailing whitespace around the PE
00687    * Reference.
00688    *
00689    * @retval Number of whitespace characters skipped.
00690    */
00691   int check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL);
00692 
00693   /**
00694    *  Reset the parser state.
00695    *
00696    */
00697   void reset (void);
00698 
00699   /**
00700    * Very trivial, non-conformant normalization of a systemid.
00701    *
00702    */
00703   ACEXML_Char* normalize_systemid (const ACEXML_Char* systemId);
00704 
00705   // Feature names:
00706 
00707   /**
00708    * \addtogroup acexml_parser_features
00709    * @{
00710    */
00711 
00712   /**
00713    * @var simple_parsing_feature_
00714    *
00715    * This constant string defines the name of "simple XML parsing"
00716    * feature.  When this feature is enabled, ACEXML parser is allowed
00717    * to parse a simple XML stream without mandated XML prolog
00718    * and no DTD defintion.
00719    */
00720   static const ACEXML_Char simple_parsing_feature_[];
00721 
00722   /**
00723    * @var namespaces_feature_
00724    *
00725    * This constant string defines the SAX XML Namespace feature. When this
00726    * feature is enabled, ACEXML parser allows access by namespace qualified
00727    * names.
00728    */
00729   static const ACEXML_Char namespaces_feature_[];
00730 
00731   /**
00732    *  @var namespace_prefixes_feature_
00733    *
00734    *  This constant string defines the SAX XML Namespace prefixes feature.
00735    *  Normally the list of attributes returned by the parser will not
00736    *  contain attributes used as namespace declarations (xmlns*). When this
00737    *  feature is enabled, the list of attributes contains the namespace
00738    *  declarations also.
00739    */
00740   static const ACEXML_Char namespace_prefixes_feature_[];
00741 
00742   /**
00743    *  @var validation_feature_
00744    *
00745    *  This constant string defines the SAX XML Validation feature. When
00746    *  this feature is enabled, the parser validates the document in
00747    *  addition to checking for well-formedness.
00748    */
00749   static const ACEXML_Char validation_feature_[];
00750 
00751   /* @} */
00752 
00753   /// Keeping track of the handlers. We do not manage the memory for
00754   /// handlers.
00755   ACEXML_DTDHandler *dtd_handler_;
00756   ACEXML_EntityResolver *entity_resolver_;
00757   ACEXML_ContentHandler *content_handler_;
00758   ACEXML_ErrorHandler *error_handler_;
00759 
00760   /// Document Type
00761   ACEXML_Char *doctype_;
00762 
00763   /// Current parser context
00764   ACEXML_Parser_Context* current_;
00765 
00766   /// Stack used to hold the Parser_Context
00767   ACE_Unbounded_Stack<ACEXML_Parser_Context*> ctx_stack_;
00768 
00769   /*
00770    * The following two are essentially chains of references and is used by
00771    * the parser to determine if there is any recursion. We keep two of
00772    * these one for general entities and one for parameter entities, as they
00773    * both fall under different namespaces.
00774    *
00775    */
00776   /// Set used to hold the general entity references that are active.
00777   ACE_Unbounded_Stack<ACEXML_Char*> GE_reference_;
00778 
00779   /// Set used to hold the parameter entity references that are active.
00780   ACE_Unbounded_Stack<ACEXML_Char*> PE_reference_;
00781 
00782   /// Obstack used by the parser to hold all the strings parsed
00783   ACE_Obstack_T<ACEXML_Char> obstack_;
00784 
00785   /// Alternative obstack used to hold any strings when the original is in use
00786   ACE_Obstack_T<ACEXML_Char> alt_stack_;
00787 
00788   /// Namespace stack used by the parser to implement support for Namespaces
00789   ACEXML_NamespaceSupport xml_namespace_;
00790 
00791   /// T => We are processing a nested namespace
00792   int nested_namespace_;
00793 
00794   /// Set of internal parsed general entities in the document
00795   ACEXML_Entity_Manager internal_GE_;
00796 
00797   /// Set of external parsed general entities in the document
00798   ACEXML_Entity_Manager external_GE_;
00799 
00800   /// Set of unparsed entities in the document
00801   ACEXML_Entity_Manager unparsed_entities_;
00802 
00803   /// Set of predefined entities used by the parser
00804   ACEXML_Entity_Manager predef_entities_;
00805 
00806   /// Set of internal parsed parameter entities in the document
00807   ACEXML_Entity_Manager internal_PE_;
00808 
00809   /// Set of external parsed parameter entities in the document
00810   ACEXML_Entity_Manager external_PE_;
00811 
00812   /// Set of notations declared in the document
00813   ACEXML_Entity_Manager notations_;
00814 
00815   /// State of the parser when it encounters a reference.
00816   ACEXML_ParserInt::ReferenceState ref_state_;
00817 
00818   /// T => We are parsing an external subset
00819   int external_subset_;
00820 
00821   /// T => We are parsing an external entity value
00822   int external_entity_;
00823 
00824   /// T => Internal DTD has parameter entity references
00825   int has_pe_refs_;
00826 
00827   /// If set, the document is a standalone XML document
00828   int standalone_;
00829 
00830   /// If set, the document has an external DTD subset
00831   int external_dtd_;
00832 
00833   /// If set, the document has an internal DTD
00834   int internal_dtd_;
00835 
00836   /// Feature flags
00837   /// If set, the parser should parse a document without a prolog
00838   int simple_parsing_;
00839 
00840   /// If set, the parser should also validate
00841   int validate_;
00842 
00843   /// If set, the parser should allow access by namespace qualified names.
00844   int namespaces_;
00845 
00846   /// If set, the parser should include namespace declarations in the list
00847   /// of attributes of an element.
00848   int namespace_prefixes_;
00849 
00850 };
00851 
00852 #if defined (__ACEXML_INLINE__)
00853 # include "ACEXML/parser/parser/Parser.inl"
00854 #endif /* __ACEXML_INLINE__ */
00855 
00856 #include /**/ "ace/post.h"
00857 
00858 #endif /* _ACEXML_BASIC_PARSER_H_ */

Generated on Sun Jan 27 13:04:15 2008 for ACEXML by doxygen 1.3.6