Parser.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 //=============================================================================
00004 /**
00005  *  @file    Parser.h
00006  *
00007  *  Parser.h,v 1.14 2006/02/09 22:53:20 kitty Exp
00008  *
00009  *  @author Nanbor Wang <nanbor@cs.wustl.edu>
00010  *  @author Krishnakumar B <kitty@cs.wustl.edu>
00011  */
00012 //=============================================================================
00013 
00014 #ifndef _ACEXML_BASIC_PARSER_H_
00015 #define _ACEXML_BASIC_PARSER_H_
00016 
00017 #include /**/ "ace/pre.h"
00018 #include "ACEXML/parser/parser/Parser_export.h"
00019 
00020 #if !defined (ACE_LACKS_PRAGMA_ONCE)
00021 #pragma once
00022 #endif /* ACE_LACKS_PRAGMA_ONCE */
00023 
00024 #include "ACEXML/common/XMLReader.h"
00025 #include "ACEXML/common/LocatorImpl.h"
00026 #include "ACEXML/common/NamespaceSupport.h"
00027 #include "ACEXML/common/CharStream.h"
00028 #include "ace/Obstack.h"
00029 #include "ace/Functor.h"
00030 #include "ace/SString.h"
00031 #include "ace/Hash_Map_Manager.h"
00032 #include "ace/Unbounded_Set.h"
00033 #include "ace/Containers_T.h"
00034 #include "ace/Auto_Ptr.h"
00035 #include "ACEXML/parser/parser/Entity_Manager.h"
00036 #include "ACEXML/parser/parser/ParserInternals.h"
00037 #include "ACEXML/parser/parser/ParserContext.h"
00038 
00039 /**
00040  * @class ACEXML_Parser Parser.h "ACEXML/parser/parser/Parser.h"
00041  *
00042  * @brief A SAX based parser.
00043  *
00044  */
00045 class ACEXML_PARSER_Export ACEXML_Parser : public ACEXML_XMLReader
00046 {
00047 public:
00048   /// Default constructor.
00049   ACEXML_Parser (void);
00050 
00051   /// Destructor.
00052   virtual ~ACEXML_Parser (void);
00053 
00054   /**
00055    *  Initialize the parser state.
00056    *
00057    *  @retval 0 if parser was initialized correctly else -1.
00058    */
00059   int initialize (ACEXML_InputSource* input);
00060 
00061    /**
00062    * Return the current content handler.
00063    */
00064   virtual ACEXML_ContentHandler *getContentHandler (void) const;
00065 
00066   /*
00067    * Return the current DTD handler.
00068    */
00069   virtual ACEXML_DTDHandler *getDTDHandler (void) const;
00070 
00071   /*
00072    * Return the current entity resolver.
00073    */
00074   virtual ACEXML_EntityResolver *getEntityResolver (void) const;
00075 
00076   /*
00077    * Return the current error handler.
00078    */
00079   virtual ACEXML_ErrorHandler *getErrorHandler (void) const;
00080 
00081   /**
00082    * Look up the value of a feature.  This method allows
00083    * programmers to check whether a specific feature has been
00084    * activated in the parser.
00085    */
00086   virtual int getFeature (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
00087           ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
00088                            ACEXML_SAXNotSupportedException)) ;
00089 
00090   /**
00091    * Activating or deactivating a feature.
00092    */
00093   virtual void setFeature (const ACEXML_Char *name,
00094                            int boolean_value ACEXML_ENV_ARG_DECL)
00095     ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
00096                      ACEXML_SAXNotSupportedException)) ;
00097 
00098   /*
00099    * Look up the value of a property.
00100    */
00101   virtual void * getProperty (const ACEXML_Char *name ACEXML_ENV_ARG_DECL)
00102     ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
00103                      ACEXML_SAXNotSupportedException)) ;
00104 
00105   /*
00106    * Set the value of a property.
00107    */
00108   virtual void setProperty (const ACEXML_Char *name,
00109                             void *value ACEXML_ENV_ARG_DECL)
00110     ACE_THROW_SPEC ((ACEXML_SAXNotRecognizedException,
00111                      ACEXML_SAXNotSupportedException)) ;
00112 
00113   /*
00114    * Parse an XML document.
00115    */
00116   virtual void parse (ACEXML_InputSource *input ACEXML_ENV_ARG_DECL)
00117     ACE_THROW_SPEC ((ACEXML_SAXException)) ;
00118 
00119   /*
00120    * Parse an XML document from a system identifier (URI).
00121    */
00122   virtual void parse (const ACEXML_Char *systemId ACEXML_ENV_ARG_DECL)
00123         ACE_THROW_SPEC ((ACEXML_SAXException))
00124     ;
00125 
00126   /*
00127    * Allow an application to register a content event handler.
00128    */
00129   virtual void setContentHandler (ACEXML_ContentHandler *handler);
00130 
00131   /*
00132    * Allow an application to register a DTD event handler.
00133    */
00134   virtual void setDTDHandler (ACEXML_DTDHandler *handler);
00135 
00136   /*
00137    * Allow an application to register an entity resolver.
00138    */
00139   virtual void setEntityResolver (ACEXML_EntityResolver *resolver);
00140 
00141   /*
00142    * Allow an application to register an error event handler.
00143    */
00144   virtual void setErrorHandler (ACEXML_ErrorHandler *handler);
00145 
00146 
00147 
00148 protected:
00149   /**
00150    * Parse XML Prolog.
00151    */
00152   void parse_xml_prolog (ACEXML_ENV_SINGLE_ARG_DECL)
00153     ACE_THROW_SPEC ((ACEXML_SAXException));
00154 
00155   /**
00156    * Parse VersionInfo declaration.
00157    *
00158    */
00159   void parse_version_info (ACEXML_ENV_SINGLE_ARG_DECL)
00160     ACE_THROW_SPEC ((ACEXML_SAXException));
00161 
00162   /**
00163    *  Parse a EncodingDecl declaration.
00164    *
00165    */
00166   void parse_encoding_decl (ACEXML_ENV_SINGLE_ARG_DECL)
00167     ACE_THROW_SPEC ((ACEXML_SAXException));
00168 
00169   /**
00170    *  Parse a XMLDecl declaration.
00171    *
00172    */
00173   void parse_xml_decl (ACEXML_ENV_SINGLE_ARG_DECL)
00174     ACE_THROW_SPEC ((ACEXML_SAXException));
00175 
00176   /**
00177    *  Parse a TextDecl declaration.
00178    */
00179   int parse_text_decl (ACEXML_ENV_SINGLE_ARG_DECL)
00180     ACE_THROW_SPEC ((ACEXML_SAXException));
00181 
00182   /**
00183    * Parse a PI statement.  The first character encountered
00184    * should always be '?' in the PI prefix "@<?".
00185    *
00186    * @retval 0 on success, -1 otherwise.
00187    */
00188   int parse_processing_instruction (ACEXML_ENV_SINGLE_ARG_DECL)
00189     ACE_THROW_SPEC ((ACEXML_SAXException));
00190 
00191   /**
00192    * Parse the DOCTYPE declaration.  The first character encountered
00193    * should always be  'D' in doctype prefix: "@<@!DOCTYPE".
00194    */
00195   int parse_doctypedecl (ACEXML_ENV_SINGLE_ARG_DECL)
00196         ACE_THROW_SPEC ((ACEXML_SAXException));
00197 
00198   /**
00199    * Parse an XML element.  The first character encountered should
00200    * be the first character of the element "Name".
00201    *
00202    * @param is_root If not 0, then we are expecting to see the "root"
00203    * element now, and the next element's name need to match the name
00204    * defined in DOCTYPE definition, i.e., @a this->doctype_.
00205    *
00206    * @todo Instead of simply checking for the root element based on the
00207    * argument @a is_root, we should instead either pass in some sort
00208    * of validator or allow the function to return the element name so it
00209    * can be used in a validator.
00210    */
00211   void parse_element (int is_root ACEXML_ENV_ARG_DECL)
00212     ACE_THROW_SPEC ((ACEXML_SAXException));
00213 
00214   /**
00215    *  Parse a content declaration.
00216    *
00217    */
00218   int parse_content (const ACEXML_Char* startname, const ACEXML_Char*& ns_uri,
00219                      const ACEXML_Char*& ns_lname, int ns_flag
00220                      ACEXML_ENV_ARG_DECL)
00221     ACE_THROW_SPEC ((ACEXML_SAXException));
00222 
00223   /**
00224    * Parse a character reference, i.e., "&#x20;" or "&#30;".   The first
00225    * character encountered should be the '#' char.
00226    *
00227    * @param buf points to a character buffer for the result.
00228    *
00229    * @param len In/out argument which initially specifies the size of the
00230    * buffer and is later set to the no. of characters in the reference.
00231    *
00232    * @retval 0 on success and -1 otherwise.
00233    */
00234   int parse_char_reference (ACEXML_Char *buf, size_t& len);
00235 
00236   /**
00237    * Parse a reference name, i.e., foo in "&foo;" or "%foo;". The first
00238    * character encountered should be the character following '&' or '%'.
00239    * Effectively the same as @sa parse_name but we don't use the parser's
00240    * obstack. Caller is responsible for deleting the memory.
00241    *
00242    * @retval  A pointer to name of reference, 0 otherwise.
00243    */
00244   ACEXML_Char* parse_reference_name (void);
00245 
00246   /**
00247    * Parse a CDATA section.  The first character should always be the first
00248    * '[' in CDATA definition.
00249    *
00250    * @retval 0 on success.
00251    * @retval -1 if fail.
00252    */
00253   int parse_cdata (ACEXML_ENV_SINGLE_ARG_DECL)
00254     ACE_THROW_SPEC ((ACEXML_SAXException));
00255 
00256   /**
00257    * Parse a "markupdecl" section, this includes both "markupdecl" and
00258    * "DeclSep" sections in XML specification
00259    */
00260   int parse_internal_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
00261     ACE_THROW_SPEC ((ACEXML_SAXException));
00262 
00263   /**
00264    *  Skip over a comment. The first character encountered should always be
00265    *  the first '-' in the comment prefix "@<@!--".
00266    */
00267   int parse_comment (void);
00268 
00269   /**
00270    * Parse an "ELEMENT" decl.  The first character this method
00271    * expects is always the 'L' (the second char) in the word
00272    * "ELEMENT".
00273    *
00274    * @retval 0 on success, -1 otherwise.
00275    */
00276   int parse_element_decl (ACEXML_ENV_SINGLE_ARG_DECL)
00277     ACE_THROW_SPEC ((ACEXML_SAXException));
00278 
00279   /**
00280    * Parse an "ENTITY" decl.  The first character this method expects
00281    * is always the 'N' (the second char) in the word "ENTITY".
00282    *
00283    * @retval 0 on success, -1 otherwise.
00284    */
00285   int parse_entity_decl (ACEXML_ENV_SINGLE_ARG_DECL)
00286     ACE_THROW_SPEC ((ACEXML_SAXException));
00287 
00288   /**
00289    * Parse an "ATTLIST" decl.  Thse first character this method
00290    * expects is always the 'A' (the first char) in the word
00291    * "ATTLIST".
00292    *
00293    * @retval 0 on success, -1 otherwise.
00294    */
00295   int parse_attlist_decl (ACEXML_ENV_SINGLE_ARG_DECL)
00296     ACE_THROW_SPEC ((ACEXML_SAXException));
00297 
00298   /**
00299    * Parse a AttType declaration.
00300    *
00301    */
00302   int parse_atttype (ACEXML_ENV_SINGLE_ARG_DECL)
00303     ACE_THROW_SPEC ((ACEXML_SAXException));
00304 
00305   /**
00306    *Parse a "NOTATION" decl.  The first character this method
00307    * expects is always the 'N' (the first char) in the word
00308    * "NOTATION".
00309    *
00310    * @retval 0 on success, -1 otherwise.
00311    */
00312   int parse_notation_decl (ACEXML_ENV_SINGLE_ARG_DECL)
00313     ACE_THROW_SPEC ((ACEXML_SAXException));
00314 
00315   /**
00316    * Parse an ExternalID or a reference to PUBLIC ExternalID.
00317    * Possible cases are in the forms of: <code>
00318    *
00319    * SYSTEM 'quoted string representing system resource'
00320    * PUBLIC 'quoted name of public ID' 'quoted resource'
00321    * PUBLIC 'quoted name we are referring to'
00322    * </code>
00323    *
00324    * The first character this function sees must be either 'S' or 'P'.
00325    * When the function finishes parsing, the input stream points
00326    * at the first non-whitespace character.
00327    *
00328    * @param publicId returns the unquoted publicId read.  If none
00329    *        is available, it will be reset to 0.
00330    * @param systemId returns the unquoted systemId read.  If none
00331    *        is available, it will be reset to 0.
00332    *
00333    * @retval 0 on success, -1 otherwise.
00334    */
00335   int parse_external_id (ACEXML_Char *&publicId, ACEXML_Char *&systemId
00336                          ACEXML_ENV_ARG_DECL)
00337     ACE_THROW_SPEC ((ACEXML_SAXException));
00338 
00339   /**
00340    *  Parse an external DTD.
00341    *
00342    */
00343   int parse_external_dtd (ACEXML_ENV_SINGLE_ARG_DECL)
00344     ACE_THROW_SPEC ((ACEXML_SAXException));
00345 
00346   /**
00347    *  Parse an external subset. This does the actual parsing of an external
00348    *  subset and is called by @sa parse_external_dtd.
00349    *
00350    */
00351   int parse_external_subset (ACEXML_ENV_SINGLE_ARG_DECL)
00352     ACE_THROW_SPEC ((ACEXML_SAXException));
00353 
00354   /**
00355    *  Parse a markupDecl section.
00356    *
00357    */
00358   int parse_markup_decl (ACEXML_ENV_SINGLE_ARG_DECL)
00359     ACE_THROW_SPEC ((ACEXML_SAXException));
00360 
00361   /**
00362    *  Parse a conditionalSect declaration.
00363    *
00364    */
00365   int parse_conditional_section (ACEXML_ENV_SINGLE_ARG_DECL)
00366     ACE_THROW_SPEC ((ACEXML_SAXException));
00367 
00368   /**
00369    *  Parse a includeSect declaration.
00370    *
00371    */
00372   int parse_includesect (ACEXML_ENV_SINGLE_ARG_DECL)
00373     ACE_THROW_SPEC ((ACEXML_SAXException));
00374 
00375   /**
00376    *
00377    *  Parse a ignoreSect declaration.
00378    */
00379   int parse_ignoresect (ACEXML_ENV_SINGLE_ARG_DECL)
00380     ACE_THROW_SPEC ((ACEXML_SAXException));
00381 
00382   /**
00383    * Parse a PEReference.
00384    *
00385    */
00386   int parse_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL)
00387     ACE_THROW_SPEC ((ACEXML_SAXException));
00388 
00389   /**
00390    *  Parse a Reference.
00391    *
00392    */
00393   int parse_entity_reference (ACEXML_ENV_SINGLE_ARG_DECL)
00394     ACE_THROW_SPEC ((ACEXML_SAXException));
00395 
00396   /**
00397    *  Parse an entityValue.
00398    *
00399    */
00400   int parse_entity_value (ACEXML_Char *&str ACEXML_ENV_ARG_DECL)
00401     ACE_THROW_SPEC ((ACEXML_SAXException));
00402 
00403   /**
00404    *  Parse a DefaultDecl specification.
00405    *
00406    */
00407   int parse_defaultdecl (ACEXML_ENV_SINGLE_ARG_DECL)
00408     ACE_THROW_SPEC ((ACEXML_SAXException));
00409 
00410 
00411   /**
00412    * Parse the "children" and "Mixed" non-terminals in contentspec.
00413    *
00414    * The first character this function sees must be the first
00415    * open paren '(' in children.
00416    *
00417    * @retval 0 on success, -1 otherwise.
00418    */
00419   int parse_children_definition (ACEXML_ENV_SINGLE_ARG_DECL)
00420     ACE_THROW_SPEC ((ACEXML_SAXException));
00421 
00422   /**
00423    * Parse a @c cp non-terminal.  @c cp can either be a @c seq or a @c choice.
00424    * This function calls itself recursively.
00425    *
00426    * @param skip_open_paren when non-zero, it indicates that the open paren of
00427    *        the @c seq or @c choice has already been removed from the input
00428    *        stream.
00429    *
00430    * @retval 0 on success, -1 otherwise.
00431    */
00432   int parse_child (int skip_open_paren ACEXML_ENV_ARG_DECL)
00433     ACE_THROW_SPEC ((ACEXML_SAXException));
00434 
00435   /**
00436    *  Parse a name from the input CharStream. If @a ch @!= 0, then we have
00437    *  already consumed the first name character from the input CharStream,
00438    *  otherwise, parse_name will use this->get() to acquire the initial
00439    *  character.
00440    *
00441    *  @return A pointer to the string in the obstack, 0 if it's not a
00442    *  valid name.
00443    */
00444   ACEXML_Char *parse_name (ACEXML_Char ch = 0);
00445 
00446   /**
00447    *  Parse a NMTOKEN from the input stream.
00448    *
00449    * @return A pointer to the string in the obstack, 0 if it's not a valid
00450    * NMTOKEN.
00451    */
00452   ACEXML_Char* parse_nmtoken (ACEXML_Char ch = 0);
00453 
00454   /**
00455    *  Parse the version string in an XML Prolog section.
00456    *
00457    *  @param str String containing the version number if successful.
00458    *  @return 0 if the string was read successfully, 0 otherwise.
00459    */
00460   int parse_version (ACEXML_Char*& str);
00461 
00462   /**
00463    *  Parse the version number in a VersionInfo declaration.
00464    */
00465   int parse_version_num (ACEXML_Char*& str);
00466 
00467   /**
00468    *  Parse the encoding name in an XML Prolog section.
00469    *
00470    *  @param str String containing the encoding name if successful.
00471    *  @return 0 if the string was read successfully, 0 otherwise.
00472    */
00473   int parse_encname (ACEXML_Char*& str);
00474 
00475   /**
00476    *  Parse a SDDecl string.
00477    *
00478    *  @param str String containing the encoding name if successful.
00479    *  @return 0 if the string was read successfully, -1 otherwise.
00480    */
00481   int parse_sddecl (ACEXML_Char*& str);
00482 
00483   /**
00484    *  Parse an attribute name.
00485    *
00486    *  @retval str String containing the value of the attribute name
00487    *             if successful.
00488    *  @retval 0 otherwise.
00489    */
00490   ACEXML_Char* parse_attname (ACEXML_ENV_SINGLE_ARG_DECL)
00491     ACE_THROW_SPEC ((ACEXML_SAXException));
00492 
00493   /**
00494    *  Parse an attribute value.
00495    *
00496    *  @param str String containing the value of the attribute if successful.
00497    *  @return 0 if attribute value was read successfully, -1 otherwise.
00498    */
00499   int parse_attvalue (ACEXML_Char*& str ACEXML_ENV_ARG_DECL)
00500     ACE_THROW_SPEC ((ACEXML_SAXException));
00501 
00502   /**
00503    *  Parse a tokenized type attribute.
00504    *
00505    *  @return 0 if attribute type was read successfully, -1 otherwise.
00506    */
00507   int parse_tokenized_type (ACEXML_ENV_SINGLE_ARG_DECL)
00508     ACE_THROW_SPEC ((ACEXML_SAXException));
00509 
00510   /**
00511    *  Parse a SystemLiteral.
00512    *
00513    *  @param str String containing the SystemLiteral if successful.
00514    *  @return 0 if the string was read successfully, 0 otherwise.
00515    */
00516   int parse_system_literal (ACEXML_Char*& str);
00517 
00518    /**
00519    *  Parse a PubidLiteral.
00520    *
00521    *  @param str String containing the PubidLiteral if successful.
00522    *  @return 0 if the string was read successfully, 0 otherwise.
00523    */
00524   int parse_pubid_literal (ACEXML_Char*& str);
00525 
00526   /**
00527    * Check if a character @a c is a whitespace.
00528    *
00529    * @retval 1 if @a c is a valid white space character. 0 otherwise.
00530    */
00531   int is_whitespace (const ACEXML_Char c) const;
00532 
00533   /**
00534    * Check if a character @a c is a valid Char.
00535    *
00536    * @retval 1 if @a c is a valid character. 0 otherwise.
00537    */
00538   int isChar (ACEXML_UCS4 c) const;
00539 
00540   /**
00541    * Check if a character @a c is a valid CharRef character.
00542    *
00543    * @retval 1 if @a c is a valid character reference character, 0 otherwise.
00544    */
00545   int isCharRef (const ACEXML_Char c) const;
00546 
00547   /**
00548    * Check if a character @a c is a BaseChar.
00549    *
00550    * @retval 1 if @a c is a valid BaseChar character, 0 otherwise.
00551    */
00552   int isBasechar (const ACEXML_Char c) const;
00553 
00554   /**
00555    * Check if a character @a c is a Ideographic.
00556    *
00557    * @retval 1 if @a c is a valid Ideographic character, 0 otherwise.
00558    */
00559   int isIdeographic (const ACEXML_Char c) const;
00560 
00561   /**
00562    * Check if a character @a c is a CombiningChar.
00563    *
00564    * @retval 1 if @a c is a valid CombiningChar character, 0 otherwise.
00565    */
00566   int isCombiningchar (const ACEXML_Char c) const;
00567 
00568   /**
00569    * Check if a character @a c is a Digit.
00570    *
00571    * @retval 1 if @a c is a valid Digit character, 0 otherwise.
00572    */
00573   int isDigit (const ACEXML_Char c) const;
00574 
00575   /**
00576    * Check if a character @a c is an Extender.
00577    *
00578    * @retval 1 if @a c is a valid Extender character, 0 otherwise.
00579    */
00580   int isExtender (const ACEXML_Char c) const;
00581 
00582   /**
00583    * Check if a character @a c is a Letter.
00584    *
00585    * @retval 1 if @a c is a valid Letter character, 0 otherwise.
00586    */
00587   int isLetter (const ACEXML_Char c) const;
00588 
00589   /**
00590    * Check if a character is an acceptable NameChar.
00591    *
00592    * @retval 1 if @a c is a valid NameChar character, 0 otherwise.
00593    */
00594   int isNameChar (const ACEXML_Char c) const;
00595 
00596   /**
00597    * Check if a character is a PubidChar.
00598    *
00599    * @retval 1 if @a c is a valid PubidChar character, 0 otherwise.
00600    */
00601   int isPubidChar (const ACEXML_Char c) const;
00602 
00603   /// Get a character.
00604   virtual ACEXML_Char get (void);
00605 
00606   /// Peek a character.
00607   virtual ACEXML_Char peek (void);
00608 
00609 private:
00610 
00611   // *** Helper functions for parsing XML
00612 
00613   /**
00614    * Skip any whitespaces encountered until the first non-whitespace
00615    * character is encountered.
00616    *
00617    * @return The next non-whitespace character from the CharStream.
00618    *
00619    * @sa skip_whitespace_count
00620    */
00621   ACEXML_Char skip_whitespace (void);
00622 
00623   /**
00624    * Skip any whitespaces encountered until the first non-whitespace
00625    * character.  The first non-whitespace character is not consumed.
00626    * This method does peek into the input CharStream and therefore
00627    * is more expensive than @ref skip_whitespace.
00628    *
00629    * @param peek If non-null, @a peek points to a ACEXML_Char where
00630    *        skip_whitespace_count stores the first non-whitespace
00631    *        character it sees (character is not removed from the stream.)
00632    *
00633    * @return The number of whitespace characters consumed.
00634    *
00635    * @sa skip_whitespace
00636    */
00637   int skip_whitespace_count (ACEXML_Char *peek = 0);
00638 
00639   /**
00640    * Skip an equal sign.
00641    *
00642    * @retval 0 when succeeds, -1 if no equal sign is found.
00643    */
00644   int skip_equal (void);
00645 
00646   /**
00647    * Get a quoted string.  Quoted strings are used to specify
00648    * attribute values and this routine will replace character and
00649    * entity references on-the-fly.  Parameter entities are not allowed
00650    * (or replaced) in this function.  (But regular entities are.)
00651    *
00652    * @param str returns the un-quoted string.
00653    *
00654    * @retval 0 on success, -1 otherwise.
00655    */
00656   int get_quoted_string (ACEXML_Char *&str);
00657 
00658   /**
00659    * Check if a character @a c is a Digit.
00660    *
00661    * @retval 1 if @a c is a valid Digit character, 0 otherwise.
00662    */
00663   int isNormalDigit (const ACEXML_Char c) const;
00664 
00665   /**
00666    * Dispatch errors to ErrorHandler.
00667    *
00668    */
00669   void error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
00670     ACE_THROW_SPEC ((ACEXML_SAXException));
00671 
00672   /**
00673    * Dispatch warnings to ErrorHandler.
00674    *
00675    */
00676   void warning (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
00677     ACE_THROW_SPEC ((ACEXML_SAXException));
00678 
00679   /**
00680    * Dispatch fatal errors to ErrorHandler.
00681    *
00682    */
00683   void fatal_error (const ACEXML_Char* msg ACEXML_ENV_ARG_DECL)
00684     ACE_THROW_SPEC ((ACEXML_SAXException));
00685 
00686   /**
00687    * Dispatch prefix mapping calls to the ContentHandler.
00688    *
00689    * @param prefix Namespace prefix
00690    * @param uri Namespace URI
00691    * @param name Local name
00692    * @param start 1 => startPrefixMapping 0 => endPrefixMapping
00693    */
00694   void prefix_mapping (const ACEXML_Char* prefix,
00695                        const ACEXML_Char* uri,
00696                        int start ACEXML_ENV_ARG_DECL)
00697     ACE_THROW_SPEC ((ACEXML_SAXException));
00698   /**
00699    *  Parse a keyword.
00700    */
00701   int parse_token (const ACEXML_Char* keyword);
00702 
00703   /**
00704    *  Push the current context on to the stack.
00705    *
00706    */
00707   int push_context (ACEXML_Parser_Context* context);
00708 
00709   /**
00710    *  Pop the top element in the stack and replace current context with that.
00711    */
00712   int pop_context (int GE_ref ACEXML_ENV_ARG_DECL);
00713 
00714   /**
00715    *  Create a new ACEXML_CharStream from @a systemId and @a publicId and
00716    *  replace the current input stream with the newly created stream.
00717    */
00718   virtual int switch_input (ACEXML_CharStream* cstream,
00719                             const ACEXML_Char* systemId,
00720                             const ACEXML_Char* publicId = 0);
00721   /**
00722    *  Create a new ACEXML_InputSource from @a systemId and @a publicId and
00723    *  replace the current input source with the newly created InputSource.
00724    */
00725   virtual int switch_input (ACEXML_InputSource* input,
00726                             const ACEXML_Char* systemId,
00727                             const ACEXML_Char* publicId = 0);
00728 
00729   /**
00730    * Check for a parameter entity reference. This is used to check for the
00731    * occurence of a PE Reference withing markupDecl. Additionally this
00732    * function consumes any leading or trailing whitespace around the PE
00733    * Reference.
00734    *
00735    * @retval Number of whitespace characters skipped.
00736    */
00737   int check_for_PE_reference (ACEXML_ENV_SINGLE_ARG_DECL);
00738 
00739   /**
00740    *  Reset the parser state.
00741    *
00742    */
00743   void reset (void);
00744 
00745   /**
00746    * Very trivial, non-conformant normalization of a systemid.
00747    *
00748    */
00749   ACEXML_Char* normalize_systemid (const ACEXML_Char* systemId);
00750 
00751   // Feature names:
00752 
00753   /**
00754    * \addtogroup acexml_parser_features
00755    * @{
00756    */
00757 
00758   /**
00759    * @var simple_parsing_feature_
00760    *
00761    * This constant string defines the name of "simple XML parsing"
00762    * feature.  When this feature is enabled, ACEXML parser is allowed
00763    * to parse a simple XML stream without mandated XML prolog
00764    * and no DTD defintion.
00765    */
00766   static const ACEXML_Char simple_parsing_feature_[];
00767 
00768   /**
00769    * @var namespaces_feature_
00770    *
00771    * This constant string defines the SAX XML Namespace feature. When this
00772    * feature is enabled, ACEXML parser allows access by namespace qualified
00773    * names.
00774    */
00775   static const ACEXML_Char namespaces_feature_[];
00776 
00777   /**
00778    *  @var namespace_prefixes_feature_
00779    *
00780    *  This constant string defines the SAX XML Namespace prefixes feature.
00781    *  Normally the list of attributes returned by the parser will not
00782    *  contain attributes used as namespace declarations (xmlns*). When this
00783    *  feature is enabled, the list of attributes contains the namespace
00784    *  declarations also.
00785    */
00786   static const ACEXML_Char namespace_prefixes_feature_[];
00787 
00788   /**
00789    *  @var validation_feature_
00790    *
00791    *  This constant string defines the SAX XML Validation feature. When
00792    *  this feature is enabled, the parser validates the document in
00793    *  addition to checking for well-formedness.
00794    */
00795   static const ACEXML_Char validation_feature_[];
00796 
00797   /* @} */
00798 
00799   /// Keeping track of the handlers. We do not manage the memory for
00800   /// handlers.
00801   ACEXML_DTDHandler *dtd_handler_;
00802   ACEXML_EntityResolver *entity_resolver_;
00803   ACEXML_ContentHandler *content_handler_;
00804   ACEXML_ErrorHandler *error_handler_;
00805 
00806   /// Document Type
00807   ACEXML_Char *doctype_;
00808 
00809   /// Current parser context
00810   ACEXML_Parser_Context* current_;
00811 
00812   /// Stack used to hold the Parser_Context
00813   ACE_Unbounded_Stack<ACEXML_Parser_Context*> ctx_stack_;
00814 
00815   /*
00816    * The following two are essentially chains of references and is used by
00817    * the parser to determine if there is any recursion. We keep two of
00818    * these one for general entities and one for parameter entities, as they
00819    * both fall under different namespaces.
00820    *
00821    */
00822   /// Set used to hold the general entity references that are active.
00823   ACE_Unbounded_Stack<ACEXML_Char*> GE_reference_;
00824 
00825   /// Set used to hold the parameter entity references that are active.
00826   ACE_Unbounded_Stack<ACEXML_Char*> PE_reference_;
00827 
00828   /// Obstack used by the parser to hold all the strings parsed
00829   ACE_Obstack_T<ACEXML_Char> obstack_;
00830 
00831   /// Alternative obstack used to hold any strings when the original is in use
00832   ACE_Obstack_T<ACEXML_Char> alt_stack_;
00833 
00834   /// Namespace stack used by the parser to implement support for Namespaces
00835   ACEXML_NamespaceSupport xml_namespace_;
00836 
00837   /// T => We are processing a nested namespace
00838   int nested_namespace_;
00839 
00840   /// Set of internal parsed general entities in the document
00841   ACEXML_Entity_Manager internal_GE_;
00842 
00843   /// Set of external parsed general entities in the document
00844   ACEXML_Entity_Manager external_GE_;
00845 
00846   /// Set of unparsed entities in the document
00847   ACEXML_Entity_Manager unparsed_entities_;
00848 
00849   /// Set of predefined entities used by the parser
00850   ACEXML_Entity_Manager predef_entities_;
00851 
00852   /// Set of internal parsed parameter entities in the document
00853   ACEXML_Entity_Manager internal_PE_;
00854 
00855   /// Set of external parsed parameter entities in the document
00856   ACEXML_Entity_Manager external_PE_;
00857 
00858   /// Set of notations declared in the document
00859   ACEXML_Entity_Manager notations_;
00860 
00861   /// State of the parser when it encounters a reference.
00862   ACEXML_ParserInt::ReferenceState ref_state_;
00863 
00864   /// T => We are parsing an external subset
00865   int external_subset_;
00866 
00867   /// T => We are parsing an external entity value
00868   int external_entity_;
00869 
00870   /// T => Internal DTD has parameter entity references
00871   int has_pe_refs_;
00872 
00873   /// If set, the document is a standalone XML document
00874   int standalone_;
00875 
00876   /// If set, the document has an external DTD subset
00877   int external_dtd_;
00878 
00879   /// If set, the document has an internal DTD
00880   int internal_dtd_;
00881 
00882   /// Feature flags
00883   /// If set, the parser should parse a document without a prolog
00884   int simple_parsing_;
00885 
00886   /// If set, the parser should also validate
00887   int validate_;
00888 
00889   /// If set, the parser should allow access by namespace qualified names.
00890   int namespaces_;
00891 
00892   /// If set, the parser should include namespace declarations in the list
00893   /// of attributes of an element.
00894   int namespace_prefixes_;
00895 
00896 };
00897 
00898 #if defined (__ACEXML_INLINE__)
00899 # include "ACEXML/parser/parser/Parser.i"
00900 #endif /* __ACEXML_INLINE__ */
00901 
00902 #include /**/ "ace/post.h"
00903 
00904 #endif /* _ACEXML_BASIC_PARSER_H_ */

Generated on Thu Nov 9 11:45:38 2006 for ACEXML by doxygen 1.3.6