00001 //============================================================================= 00002 /** 00003 * @file Parser.inl 00004 * 00005 * $Id: Parser.inl 82513 2008-08-05 18:52:53Z parsons $ 00006 * 00007 * @author Nanbor Wang <nanbor@cs.wustl.edu> 00008 */ 00009 //============================================================================= 00010 00011 ACEXML_INLINE ACEXML_ContentHandler * 00012 ACEXML_Parser::getContentHandler (void) const 00013 { 00014 return this->content_handler_; 00015 } 00016 00017 ACEXML_INLINE ACEXML_DTDHandler * 00018 ACEXML_Parser::getDTDHandler (void) const 00019 { 00020 return this->dtd_handler_; 00021 } 00022 00023 ACEXML_INLINE ACEXML_EntityResolver * 00024 ACEXML_Parser::getEntityResolver (void) const 00025 { 00026 return this->entity_resolver_; 00027 } 00028 00029 ACEXML_INLINE ACEXML_ErrorHandler * 00030 ACEXML_Parser::getErrorHandler (void) const 00031 { 00032 return this->error_handler_; 00033 } 00034 00035 ACEXML_INLINE void 00036 ACEXML_Parser::setContentHandler (ACEXML_ContentHandler *handler) 00037 { 00038 this->content_handler_ = handler; 00039 } 00040 00041 ACEXML_INLINE void 00042 ACEXML_Parser::setDTDHandler (ACEXML_DTDHandler *handler) 00043 { 00044 this->dtd_handler_ = handler; 00045 } 00046 00047 ACEXML_INLINE void 00048 ACEXML_Parser::setEntityResolver (ACEXML_EntityResolver *resolver) 00049 { 00050 this->entity_resolver_ = resolver; 00051 } 00052 00053 ACEXML_INLINE void 00054 ACEXML_Parser::setErrorHandler (ACEXML_ErrorHandler *handler) 00055 { 00056 this->error_handler_ = handler; 00057 } 00058 00059 ACEXML_INLINE int 00060 ACEXML_Parser::isChar (ACEXML_UCS4 c) const 00061 { 00062 return (c == 0x9 00063 || c == 0xA 00064 || c == 0xD 00065 || (c >= 0x20 && c <= 0xD7FF) 00066 || (c >= 0xE000 && c <= 0xFFFD) 00067 || (c >= 0x10000 && c <= 0x10FFFF)); 00068 } 00069 00070 ACEXML_INLINE int 00071 ACEXML_Parser::isCharRef (const ACEXML_Char c) const 00072 { 00073 return ((c >= 'a' && c <= 'f') 00074 || (c >= 'A' && c <= 'F')); 00075 } 00076 00077 ACEXML_INLINE int 00078 ACEXML_Parser::isNormalDigit (const ACEXML_Char c) const 00079 { 00080 return (c >= '\x30' && c <= '\x39'); 00081 } 00082 00083 ACEXML_INLINE int 00084 ACEXML_Parser::isBasechar (const ACEXML_Char c) const 00085 { 00086 #if defined (ACE_USES_WCHAR) 00087 return ACEXML_ParserInt::isBasechar_i (c); 00088 #else 00089 return ACEXML_ParserInt::base_char_table_[(int) c]; 00090 #endif /* ACE_USES_WCHAR */ 00091 } 00092 00093 ACEXML_INLINE int 00094 ACEXML_Parser::isIdeographic (const ACEXML_Char c) const 00095 { 00096 #if defined (ACE_USES_WCHAR) 00097 return ACEXML_ParserInt::isIdeographic_i (c); 00098 #else 00099 ACE_UNUSED_ARG (c); 00100 return 0; 00101 #endif /* ACE_USES_WCHAR */ 00102 } 00103 00104 ACEXML_INLINE int 00105 ACEXML_Parser::isCombiningchar (const ACEXML_Char c) const 00106 { 00107 #if defined (ACE_USES_WCHAR) 00108 return ACEXML_ParserInt::isCombiningchar_i (c); 00109 #else 00110 ACE_UNUSED_ARG (c); 00111 return 0; 00112 #endif /* ACE_USES_WCHAR */ 00113 } 00114 00115 ACEXML_INLINE int 00116 ACEXML_Parser::isDigit (const ACEXML_Char c) const 00117 { 00118 #if defined (ACE_USES_WCHAR) 00119 return ACEXML_ParserInt::isDigit_i (c); 00120 #else 00121 return (this->isNormalDigit (c)); 00122 #endif /* ACE_USES_WCHAR */ 00123 } 00124 00125 ACEXML_INLINE int 00126 ACEXML_Parser::isExtender (const ACEXML_Char c) const 00127 { 00128 #if defined (ACE_USES_WCHAR) 00129 return ACEXML_ParserInt::isExtender_i (c); 00130 #else 00131 return (c == '\xB7'); 00132 #endif /* ACE_USES_WCHAR */ 00133 } 00134 00135 ACEXML_INLINE int 00136 ACEXML_Parser::isLetter (const ACEXML_Char c) const 00137 { 00138 return (this->isBasechar (c) || this->isIdeographic (c)); 00139 } 00140 00141 ACEXML_INLINE int 00142 ACEXML_Parser::isNameChar (const ACEXML_Char c) const 00143 { 00144 return (this->isLetter (c) || this->isDigit (c) || c == '.' || c == '-' || 00145 c == '_' || c == ':' || this->isCombiningchar (c) || 00146 this->isExtender (c)); 00147 } 00148 00149 ACEXML_INLINE int 00150 ACEXML_Parser::isPubidChar (const ACEXML_Char c) const 00151 { 00152 return (c == '\x20' || c == '\x0D' || c == '\x0A' || 00153 (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || 00154 (c >= '0' && c <= '9') || c == '-' || c == '\'' || c == '(' || 00155 c == ')' || c == '+' || c == ',' || c == '.' || c == '/' || 00156 c == ':' || c == '=' || c == '?' || c == ';' || c == '!' || 00157 c == '*' || c == '#' || c == '@' || c == '$' || c == '_' || 00158 c == '%'); 00159 } 00160 00161 00162 ACEXML_INLINE int 00163 ACEXML_Parser::is_whitespace (const ACEXML_Char c) const 00164 { 00165 switch (c) 00166 { 00167 case '\x0A': case '\x20': 00168 case '\x09': case '\x0D': 00169 return 1; 00170 default: 00171 return 0; 00172 } 00173 } 00174 00175 ACEXML_INLINE ACEXML_Char 00176 ACEXML_Parser::skip_whitespace (void) 00177 { 00178 ACEXML_Char ch = this->get(); 00179 while (this->is_whitespace (ch)) 00180 ch = this->get (); 00181 return ch; 00182 } 00183 00184 00185 ACEXML_INLINE int 00186 ACEXML_Parser::skip_whitespace_count (ACEXML_Char *peeky) 00187 { 00188 int wscount = 0; 00189 ACEXML_Char dummy; 00190 ACEXML_Char &forward = (peeky == 0 ? dummy : *peeky); 00191 00192 for (;this->is_whitespace ((forward = this->peek ())); ++wscount) 00193 this->get (); 00194 return wscount; 00195 } 00196 00197 ACEXML_INLINE int 00198 ACEXML_Parser::skip_equal (void) 00199 { 00200 if (this->skip_whitespace() != '=') 00201 return -1; 00202 while (this->is_whitespace (this->peek())) 00203 this->get(); 00204 return 0; 00205 } 00206 00207 ACEXML_INLINE ACEXML_Char 00208 ACEXML_Parser::get (void) 00209 { 00210 ACEXML_Char ch = 0; 00211 const ACEXML_InputSource* ip = this->current_->getInputSource(); 00212 ACEXML_CharStream* instream = ip->getCharStream(); 00213 00214 if (instream->get (ch) != -1) 00215 { 00216 this->current_->getLocator()->incrColumnNumber(); 00217 // Normalize white-space 00218 if (ch == '\x0D') 00219 { 00220 if (instream->peek() == 0x0A) 00221 instream->get (ch); 00222 ch = '\x0A'; 00223 } 00224 if (ch == '\x0A') 00225 { 00226 // Reset column number and increment Line Number. 00227 this->current_->getLocator()->incrLineNumber(); 00228 this->current_->getLocator()->setColumnNumber (0); 00229 } 00230 return ch; 00231 } 00232 return 0; 00233 } 00234 00235 ACEXML_INLINE ACEXML_Char 00236 ACEXML_Parser::peek (void) 00237 { 00238 // Using an extra level of indirection so we can 00239 // manage document location in the future. 00240 ACEXML_Char ch = 0; 00241 const ACEXML_InputSource* ip = this->current_->getInputSource(); 00242 ACEXML_CharStream* instream = ip->getCharStream(); 00243 ch = static_cast<ACEXML_Char> (instream->peek ()); 00244 return (ch > 0 ? ch : 0); 00245 } 00246 00247 ACEXML_INLINE int 00248 ACEXML_Parser::parse_token (const ACEXML_Char* keyword) 00249 { 00250 if (keyword == 0) 00251 return -1; 00252 const ACEXML_Char* ptr = keyword; 00253 for (; *ptr != 0 && (this->get() == *ptr); ++ptr) 00254 ; 00255 if (*ptr == 0) 00256 return 0; 00257 else 00258 return -1; 00259 }