00001 //============================================================================= 00002 /** 00003 * @file Parser.i 00004 * 00005 * Parser.i,v 1.11 2006/03/14 21:20:41 sjiang Exp 00006 * 00007 * @author Nanbor Wang <nanbor@cs.wustl.edu> 00008 */ 00009 //============================================================================= 00010 00011 ACEXML_INLINE ACEXML_ContentHandler * 00012 ACEXML_Parser::getContentHandler (void) const 00013 { 00014 return this->content_handler_; 00015 } 00016 00017 ACEXML_INLINE ACEXML_DTDHandler * 00018 ACEXML_Parser::getDTDHandler (void) const 00019 { 00020 return this->dtd_handler_; 00021 } 00022 00023 ACEXML_INLINE ACEXML_EntityResolver * 00024 ACEXML_Parser::getEntityResolver (void) const 00025 { 00026 return this->entity_resolver_; 00027 } 00028 00029 ACEXML_INLINE ACEXML_ErrorHandler * 00030 ACEXML_Parser::getErrorHandler (void) const 00031 { 00032 return this->error_handler_; 00033 } 00034 00035 ACEXML_INLINE void 00036 ACEXML_Parser::setContentHandler (ACEXML_ContentHandler *handler) 00037 { 00038 this->content_handler_ = handler; 00039 } 00040 00041 ACEXML_INLINE void 00042 ACEXML_Parser::setDTDHandler (ACEXML_DTDHandler *handler) 00043 { 00044 this->dtd_handler_ = handler; 00045 } 00046 00047 ACEXML_INLINE void 00048 ACEXML_Parser::setEntityResolver (ACEXML_EntityResolver *resolver) 00049 { 00050 this->entity_resolver_ = resolver; 00051 } 00052 00053 ACEXML_INLINE void 00054 ACEXML_Parser::setErrorHandler (ACEXML_ErrorHandler *handler) 00055 { 00056 this->error_handler_ = handler; 00057 } 00058 00059 ACEXML_INLINE int 00060 ACEXML_Parser::isChar (ACEXML_UCS4 c) const 00061 { 00062 return (c == 0x9 || c == 0xA || c == 0xD || 00063 c >= 0x20 && c <= 0xD7FF || 00064 c >= 0xE000 && c <= 0xFFFD || 00065 c >= 0x10000 && c <= 0x10FFFF); 00066 } 00067 00068 ACEXML_INLINE int 00069 ACEXML_Parser::isCharRef (const ACEXML_Char c) const 00070 { 00071 return ((c >= 'a' && c <= 'f') || 00072 (c >= 'A' && c <= 'F')); 00073 } 00074 00075 ACEXML_INLINE int 00076 ACEXML_Parser::isNormalDigit (const ACEXML_Char c) const 00077 { 00078 return (c >= '\x30' && c <= '\x39'); 00079 } 00080 00081 ACEXML_INLINE int 00082 ACEXML_Parser::isBasechar (const ACEXML_Char c) const 00083 { 00084 #if defined (ACE_USES_WCHAR) 00085 return ACEXML_ParserInt::isBasechar_i (c); 00086 #else 00087 return ACEXML_ParserInt::base_char_table_[(int) c]; 00088 #endif /* ACE_USES_WCHAR */ 00089 } 00090 00091 ACEXML_INLINE int 00092 ACEXML_Parser::isIdeographic (const ACEXML_Char c) const 00093 { 00094 #if defined (ACE_USES_WCHAR) 00095 return ACEXML_ParserInt::isIdeographic_i (c); 00096 #else 00097 ACE_UNUSED_ARG (c); 00098 return 0; 00099 #endif /* ACE_USES_WCHAR */ 00100 } 00101 00102 ACEXML_INLINE int 00103 ACEXML_Parser::isCombiningchar (const ACEXML_Char c) const 00104 { 00105 #if defined (ACE_USES_WCHAR) 00106 return ACEXML_ParserInt::isCombiningchar_i (c); 00107 #else 00108 ACE_UNUSED_ARG (c); 00109 return 0; 00110 #endif /* ACE_USES_WCHAR */ 00111 } 00112 00113 ACEXML_INLINE int 00114 ACEXML_Parser::isDigit (const ACEXML_Char c) const 00115 { 00116 #if defined (ACE_USES_WCHAR) 00117 return ACEXML_ParserInt::isDigit_i (c); 00118 #else 00119 return (this->isNormalDigit (c)); 00120 #endif /* ACE_USES_WCHAR */ 00121 } 00122 00123 ACEXML_INLINE int 00124 ACEXML_Parser::isExtender (const ACEXML_Char c) const 00125 { 00126 #if defined (ACE_USES_WCHAR) 00127 return ACEXML_ParserInt::isExtender_i (c); 00128 #else 00129 return (c == '\xB7'); 00130 #endif /* ACE_USES_WCHAR */ 00131 } 00132 00133 ACEXML_INLINE int 00134 ACEXML_Parser::isLetter (const ACEXML_Char c) const 00135 { 00136 return (this->isBasechar (c) || this->isIdeographic (c)); 00137 } 00138 00139 ACEXML_INLINE int 00140 ACEXML_Parser::isNameChar (const ACEXML_Char c) const 00141 { 00142 return (this->isLetter (c) || this->isDigit (c) || c == '.' || c == '-' || 00143 c == '_' || c == ':' || this->isCombiningchar (c) || 00144 this->isExtender (c)); 00145 } 00146 00147 ACEXML_INLINE int 00148 ACEXML_Parser::isPubidChar (const ACEXML_Char c) const 00149 { 00150 return (c == '\x20' || c == '\x0D' || c == '\x0A' || 00151 (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || 00152 (c >= '0' && c <= '9') || c == '-' || c == '\'' || c == '(' || 00153 c == ')' || c == '+' || c == ',' || c == '.' || c == '/' || 00154 c == ':' || c == '=' || c == '?' || c == ';' || c == '!' || 00155 c == '*' || c == '#' || c == '@' || c == '$' || c == '_' || 00156 c == '%'); 00157 } 00158 00159 00160 ACEXML_INLINE int 00161 ACEXML_Parser::is_whitespace (const ACEXML_Char c) const 00162 { 00163 switch (c) 00164 { 00165 case '\x0A': case '\x20': 00166 case '\x09': case '\x0D': 00167 return 1; 00168 default: 00169 return 0; 00170 } 00171 } 00172 00173 ACEXML_INLINE ACEXML_Char 00174 ACEXML_Parser::skip_whitespace (void) 00175 { 00176 ACEXML_Char ch = this->get(); 00177 while (this->is_whitespace (ch)) 00178 ch = this->get (); 00179 return ch; 00180 } 00181 00182 00183 ACEXML_INLINE int 00184 ACEXML_Parser::skip_whitespace_count (ACEXML_Char *peeky) 00185 { 00186 int wscount = 0; 00187 ACEXML_Char dummy; 00188 ACEXML_Char &forward = (peeky == 0 ? dummy : *peeky); 00189 00190 for (;this->is_whitespace ((forward = this->peek ())); ++wscount) 00191 this->get (); 00192 return wscount; 00193 } 00194 00195 ACEXML_INLINE int 00196 ACEXML_Parser::skip_equal (void) 00197 { 00198 if (this->skip_whitespace() != '=') 00199 return -1; 00200 while (this->is_whitespace (this->peek())) 00201 this->get(); 00202 return 0; 00203 } 00204 00205 ACEXML_INLINE ACEXML_Char 00206 ACEXML_Parser::get (void) 00207 { 00208 ACEXML_Char ch = 0; 00209 const ACEXML_InputSource* ip = this->current_->getInputSource(); 00210 ACEXML_CharStream* instream = ip->getCharStream(); 00211 00212 if (instream->get (ch) != -1) 00213 { 00214 this->current_->getLocator()->incrColumnNumber(); 00215 // Normalize white-space 00216 if (ch == '\x0D') 00217 { 00218 if (instream->peek() == 0x0A) 00219 instream->get (ch); 00220 ch = '\x0A'; 00221 } 00222 if (ch == '\x0A') 00223 { 00224 // Reset column number and increment Line Number. 00225 this->current_->getLocator()->incrLineNumber(); 00226 this->current_->getLocator()->setColumnNumber (0); 00227 } 00228 return ch; 00229 } 00230 return 0; 00231 } 00232 00233 ACEXML_INLINE ACEXML_Char 00234 ACEXML_Parser::peek (void) 00235 { 00236 // Using an extra level of indirection so we can 00237 // manage document location in the future. 00238 ACEXML_Char ch = 0; 00239 const ACEXML_InputSource* ip = this->current_->getInputSource(); 00240 ACEXML_CharStream* instream = ip->getCharStream(); 00241 ch = static_cast<ACEXML_Char> (instream->peek ()); 00242 return (ch > 0 ? ch : 0); 00243 } 00244 00245 ACEXML_INLINE int 00246 ACEXML_Parser::parse_token (const ACEXML_Char* keyword) 00247 { 00248 if (keyword == 0) 00249 return -1; 00250 const ACEXML_Char* ptr = keyword; 00251 for (; *ptr != 0 && (this->get() == *ptr); ++ptr) 00252 ; 00253 if (*ptr == 0) 00254 return 0; 00255 else 00256 return -1; 00257 }