00001
00002
00003 #include "ACEXML/parser/parser/ParserInternals.h"
00004
00005
00006 const ACEXML_Char* ACEXML_ParserInt::predef_ent_[] = {
00007 ACE_TEXT ("amp"),
00008 ACE_TEXT ("lt"),
00009 ACE_TEXT ("gt"),
00010 ACE_TEXT ("apos"),
00011 ACE_TEXT ("quot")
00012 };
00013
00014 const ACEXML_Char* ACEXML_ParserInt::predef_val_[] = {
00015 ACE_TEXT ("&"),
00016 ACE_TEXT ("<"),
00017 ACE_TEXT (">"),
00018 ACE_TEXT ("'"),
00019 ACE_TEXT ("\"")
00020 };
00021
00022
00023
00024
00025 const ACEXML_Char ACEXML_ParserInt::base_char_table_[] = {
00026 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00027 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00028 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00029 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00030 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00031 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
00032 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00033 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
00034 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00035 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00036 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00037 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
00038 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00039 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
00040 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
00041 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
00042 };
00043
00044 #if defined (ACE_USES_WCHAR)
00045
00046 int
00047 ACEXML_ParserInt::isBasechar_i (const ACEXML_Char c)
00048 {
00049 if (c < 0x0100)
00050 return ACEXML_ParserInt::base_char_table_[c];
00051 else if (c < 0x0905)
00052 return ((c >= 0x0100 && c <= 0x0131) ||
00053 (c >= 0x0134 && c <= 0x013E) ||
00054 (c >= 0x0141 && c <= 0x0148) ||
00055 (c >= 0x014A && c <= 0x017E) ||
00056 (c >= 0x0180 && c <= 0x01C3) ||
00057 (c >= 0x01CD && c <= 0x01F0) ||
00058 (c >= 0x01F4 && c <= 0x01F5) ||
00059 (c >= 0x01FA && c <= 0x0217) ||
00060 (c >= 0x0250 && c <= 0x02A8) ||
00061 (c >= 0x02BB && c <= 0x02C1) ||
00062 (c == 0x0386) ||
00063 (c >= 0x0388 && c <= 0x038A) ||
00064 (c == 0x038C) ||
00065 (c >= 0x038E && c <= 0x03A1) ||
00066 (c >= 0x03A3 && c <= 0x03CE) ||
00067 (c >= 0x03D0 && c <= 0x03D6) ||
00068 (c == 0x03DA) ||
00069 (c == 0x03DC) ||
00070 (c == 0x03DE) ||
00071 (c == 0x03E0) ||
00072 (c >= 0x03E2 && c <= 0x03F3) ||
00073 (c >= 0x0401 && c <= 0x040C) ||
00074 (c >= 0x040E && c <= 0x044F) ||
00075 (c >= 0x0451 && c <= 0x045C) ||
00076 (c >= 0x045E && c <= 0x0481) ||
00077 (c >= 0x0490 && c <= 0x04C4) ||
00078 (c >= 0x04C7 && c <= 0x04C8) ||
00079 (c >= 0x04CB && c <= 0x04CC) ||
00080 (c >= 0x04D0 && c <= 0x04EB) ||
00081 (c >= 0x04EE && c <= 0x04F5) ||
00082 (c >= 0x04F8 && c <= 0x04F9) ||
00083 (c >= 0x0531 && c <= 0x0556) ||
00084 (c == 0x0559) ||
00085 (c >= 0x0561 && c <= 0x0586) ||
00086 (c >= 0x05D0 && c <= 0x05EA) ||
00087 (c >= 0x05F0 && c <= 0x05F2) ||
00088 (c >= 0x0621 && c <= 0x063A) ||
00089 (c >= 0x0641 && c <= 0x064A) ||
00090 (c >= 0x0671 && c <= 0x06B7) ||
00091 (c >= 0x06BA && c <= 0x06BE) ||
00092 (c >= 0x06C0 && c <= 0x06CE) ||
00093 (c >= 0x06D0 && c <= 0x06D3) ||
00094 (c == 0x06D5) ||
00095 (c >= 0x06E5 && c <= 0x06E6));
00096 else if (c < 0x10A0)
00097 return ((c >= 0x0905 && c <= 0x0939) ||
00098 (c == 0x093D) ||
00099 (c >= 0x0958 && c <= 0x0961) ||
00100 (c >= 0x0985 && c <= 0x098C) ||
00101 (c >= 0x098F && c <= 0x0990) ||
00102 (c >= 0x0993 && c <= 0x09A8) ||
00103 (c >= 0x09AA && c <= 0x09B0) ||
00104 (c == 0x09B2) ||
00105 (c >= 0x09B6 && c <= 0x09B9) ||
00106 (c >= 0x09DC && c <= 0x09DD) ||
00107 (c >= 0x09DF && c <= 0x09E1) ||
00108 (c >= 0x09F0 && c <= 0x09F1) ||
00109 (c >= 0x0A05 && c <= 0x0A0A) ||
00110 (c >= 0x0A0F && c <= 0x0A10) ||
00111 (c >= 0x0A13 && c <= 0x0A28) ||
00112 (c >= 0x0A2A && c <= 0x0A30) ||
00113 (c >= 0x0A32 && c <= 0x0A33) ||
00114 (c >= 0x0A35 && c <= 0x0A36) ||
00115 (c >= 0x0A38 && c <= 0x0A39) ||
00116 (c >= 0x0A59 && c <= 0x0A5C) ||
00117 (c == 0x0A5E) ||
00118 (c >= 0x0A72 && c <= 0x0A74) ||
00119 (c >= 0x0A85 && c <= 0x0A8B) ||
00120 (c == 0x0A8D) ||
00121 (c >= 0x0A8F && c <= 0x0A91) ||
00122 (c >= 0x0A93 && c <= 0x0AA8) ||
00123 (c >= 0x0AAA && c <= 0x0AB0) ||
00124 (c >= 0x0AB2 && c <= 0x0AB3) ||
00125 (c >= 0x0AB5 && c <= 0x0AB9) ||
00126 (c == 0x0ABD || c == 0x0AE0) ||
00127 (c >= 0x0B05 && c <= 0x0B0C) ||
00128 (c >= 0x0B0F && c <= 0x0B10) ||
00129 (c >= 0x0B13 && c <= 0x0B28) ||
00130 (c >= 0x0B2A && c <= 0x0B30) ||
00131 (c >= 0x0B32 && c <= 0x0B33) ||
00132 (c >= 0x0B36 && c <= 0x0B39) ||
00133 (c == 0x0B3D) ||
00134 (c >= 0x0B5C && c <= 0x0B5D) ||
00135 (c >= 0x0B5F && c <= 0x0B61) ||
00136 (c >= 0x0B85 && c <= 0x0B8A) ||
00137 (c >= 0x0B8E && c <= 0x0B90) ||
00138 (c >= 0x0B92 && c <= 0x0B95) ||
00139 (c >= 0x0B99 && c <= 0x0B9A) ||
00140 (c == 0x0B9C) ||
00141 (c >= 0x0B9E && c <= 0x0B9F) ||
00142 (c >= 0x0BA3 && c <= 0x0BA4) ||
00143 (c >= 0x0BA8 && c <= 0x0BAA) ||
00144 (c >= 0x0BAE && c <= 0x0BB5) ||
00145 (c >= 0x0BB7 && c <= 0x0BB9) ||
00146 (c >= 0x0C05 && c <= 0x0C0C) ||
00147 (c >= 0x0C0E && c <= 0x0C10) ||
00148 (c >= 0x0C12 && c <= 0x0C28) ||
00149 (c >= 0x0C2A && c <= 0x0C33) ||
00150 (c >= 0x0C35 && c <= 0x0C39) ||
00151 (c >= 0x0C60 && c <= 0x0C61) ||
00152 (c >= 0x0C85 && c <= 0x0C8C) ||
00153 (c >= 0x0C8E && c <= 0x0C90) ||
00154 (c >= 0x0C92 && c <= 0x0CA8) ||
00155 (c >= 0x0CAA && c <= 0x0CB3) ||
00156 (c >= 0x0CB5 && c <= 0x0CB9) ||
00157 (c == 0x0CDE) ||
00158 (c >= 0x0CE0 && c <= 0x0CE1) ||
00159 (c >= 0x0D05 && c <= 0x0D0C) ||
00160 (c >= 0x0D0E && c <= 0x0D10) ||
00161 (c >= 0x0D12 && c <= 0x0D28) ||
00162 (c >= 0x0D2A && c <= 0x0D39) ||
00163 (c >= 0x0D60 && c <= 0x0D61) ||
00164 (c >= 0x0E01 && c <= 0x0E2E) ||
00165 (c == 0x0E30) ||
00166 (c >= 0x0E32 && c <= 0x0E33) ||
00167 (c >= 0x0E40 && c <= 0x0E45) ||
00168 (c >= 0x0E81 && c <= 0x0E82) ||
00169 (c == 0x0E84) ||
00170 (c >= 0x0E87 && c <= 0x0E88) ||
00171 (c == 0x0E8A || c == 0x0E8D) ||
00172 (c >= 0x0E94 && c <= 0x0E97) ||
00173 (c >= 0x0E99 && c <= 0x0E9F) ||
00174 (c >= 0x0EA1 && c <= 0x0EA3) ||
00175 (c == 0x0EA5 || c == 0x0EA7) ||
00176 (c >= 0x0EAA && c <= 0x0EAB) ||
00177 (c >= 0x0EAD && c <= 0x0EAE) ||
00178 (c == 0x0EB0) ||
00179 (c >= 0x0EB2 && c <= 0x0EB3) ||
00180 (c == 0x0EBD) ||
00181 (c >= 0x0EC0 && c <= 0x0EC4) ||
00182 (c >= 0x0F40 && c <= 0x0F47) ||
00183 (c >= 0x0F49 && c <= 0x0F69));
00184 else
00185 return ((c >= 0x10A0 && c <= 0x10C5) ||
00186 (c >= 0x10D0 && c <= 0x10F6) ||
00187 (c == 0x1100) ||
00188 (c >= 0x1102 && c <= 0x1103) ||
00189 (c >= 0x1105 && c <= 0x1107) ||
00190 (c == 0x1109) ||
00191 (c >= 0x110B && c <= 0x110C) ||
00192 (c >= 0x110E && c <= 0x1112) ||
00193 (c == 0x113C || c == 0x113E || c == 0x1140) ||
00194 (c == 0x114C || c == 0x114E || c == 0x1150) ||
00195 (c >= 0x1154 && c <= 0x1155) ||
00196 (c == 0x1159) ||
00197 (c >= 0x115F && c <= 0x1161) ||
00198 (c == 0x1163) ||
00199 (c == 0x1165) ||
00200 (c == 0x1167) ||
00201 (c == 0x1169) ||
00202 (c >= 0x116D && c <= 0x116E) ||
00203 (c >= 0x1172 && c <= 0x1173) ||
00204 (c == 0x1175) ||
00205 (c == 0x119E) ||
00206 (c == 0x11A8) ||
00207 (c == 0x11AB) ||
00208 (c >= 0x11AE && c <= 0x11AF) ||
00209 (c >= 0x11B7 && c <= 0x11B8) ||
00210 (c == 0x11BA) ||
00211 (c >= 0x11BC && c <= 0x11C2) ||
00212 (c == 0x11EB) ||
00213 (c == 0x11F0) ||
00214 (c == 0x11F9) ||
00215 (c >= 0x1E00 && c <= 0x1E9B) ||
00216 (c >= 0x1EA0 && c <= 0x1EF9) ||
00217 (c >= 0x1F00 && c <= 0x1F15) ||
00218 (c >= 0x1F18 && c <= 0x1F1D) ||
00219 (c >= 0x1F20 && c <= 0x1F45) ||
00220 (c >= 0x1F48 && c <= 0x1F4D) ||
00221 (c >= 0x1F50 && c <= 0x1F57) ||
00222 (c == 0x1F59) ||
00223 (c == 0x1F5B) ||
00224 (c == 0x1F5D) ||
00225 (c >= 0x1F5F && c <= 0x1F7D) ||
00226 (c >= 0x1F80 && c <= 0x1FB4) ||
00227 (c >= 0x1FB6 && c <= 0x1FBC) ||
00228 (c == 0x1FBE) ||
00229 (c >= 0x1FC2 && c <= 0x1FC4) ||
00230 (c >= 0x1FC6 && c <= 0x1FCC) ||
00231 (c >= 0x1FD0 && c <= 0x1FD3) ||
00232 (c >= 0x1FD6 && c <= 0x1FDB) ||
00233 (c >= 0x1FE0 && c <= 0x1FEC) ||
00234 (c >= 0x1FF2 && c <= 0x1FF4) ||
00235 (c >= 0x1FF6 && c <= 0x1FFC) ||
00236 (c == 0x2126) ||
00237 (c >= 0x212A && c <= 0x212B) ||
00238 (c == 0x212E) ||
00239 (c >= 0x2180 && c <= 0x2182) ||
00240 (c >= 0x3041 && c <= 0x3094) ||
00241 (c >= 0x30A1 && c <= 0x30FA) ||
00242 (c >= 0x3105 && c <= 0x312C) ||
00243 (c >= 0xAC00 && c <= 0xD7A3));
00244 };
00245
00246 int
00247 ACEXML_ParserInt::isIdeographic_i (const ACEXML_Char c)
00248 {
00249 return ((c >= 0x4E00 && c <= 0x9FA5) ||
00250 (c == 3007) ||
00251 (c >= 0x3021 && c <= 0x3029));
00252 }
00253
00254 int
00255 ACEXML_ParserInt::isCombiningchar_i (const ACEXML_Char c)
00256 {
00257 if (c < 0x0901)
00258 return ((c >= 0x0300 && c <= 0x0345) ||
00259 (c >= 0x0360 && c <= 0x0361) ||
00260 (c >= 0x0483 && c <= 0x0486) ||
00261 (c >= 0x0591 && c <= 0x05A1) ||
00262 (c >= 0x05A3 && c <= 0x05B9) ||
00263 (c >= 0x05BB && c <= 0x05BD) ||
00264 (c == 0x05BF) ||
00265 (c >= 0x05C1 && c <= 0x05C2) ||
00266 (c == 0x05C4) ||
00267 (c >= 0x064B && c <= 0x0652) ||
00268 (c == 0x0670) ||
00269 (c >= 0x06D6 && c <= 0x06DC) ||
00270 (c >= 0x06DD && c <= 0x06DF) ||
00271 (c >= 0x06E0 && c <= 0x06E4) ||
00272 (c >= 0x06E7 && c <= 0x06E8) ||
00273 (c >= 0x06EA && c <= 0x06ED));
00274 else
00275 return ((c >= 0x0901 && c <= 0x0903) ||
00276 (c == 0x093C) ||
00277 (c >= 0x093E && c <= 0x094C) ||
00278 (c == 0x094D) ||
00279 (c >= 0x0951 && c <= 0x0954) ||
00280 (c >= 0x0962 && c <= 0x0963) ||
00281 (c >= 0x0981 && c <= 0x0983) ||
00282 (c == 0x09BC) ||
00283 (c == 0x09BE) ||
00284 (c == 0x09BF) ||
00285 (c >= 0x09C0 && c <= 0x09C4) ||
00286 (c >= 0x09C7 && c <= 0x09C8) ||
00287 (c >= 0x09CB && c <= 0x09CD) ||
00288 (c == 0x09D7) ||
00289 (c >= 0x09E2 && c <= 0x09E3) ||
00290 (c == 0x0A02) ||
00291 (c == 0x0A3C) ||
00292 (c == 0x0A3E) ||
00293 (c == 0x0A3F) ||
00294 (c >= 0x0A40 && c <= 0x0A42) ||
00295 (c >= 0x0A47 && c <= 0x0A48) ||
00296 (c >= 0x0A4B && c <= 0x0A4D) ||
00297 (c >= 0x0A70 && c <= 0x0A71) ||
00298 (c >= 0x0A81 && c <= 0x0A83) ||
00299 (c == 0x0ABC) ||
00300 (c >= 0x0ABE && c <= 0x0AC5) ||
00301 (c >= 0x0AC7 && c <= 0x0AC9) ||
00302 (c >= 0x0ACB && c <= 0x0ACD) ||
00303 (c >= 0x0B01 && c <= 0x0B03) ||
00304 (c == 0x0B3C) ||
00305 (c >= 0x0B3E && c <= 0x0B43) ||
00306 (c >= 0x0B47 && c <= 0x0B48) ||
00307 (c >= 0x0B4B && c <= 0x0B4D) ||
00308 (c >= 0x0B56 && c <= 0x0B57) ||
00309 (c >= 0x0B82 && c <= 0x0B83) ||
00310 (c >= 0x0BBE && c <= 0x0BC2) ||
00311 (c >= 0x0BC6 && c <= 0x0BC8) ||
00312 (c >= 0x0BCA && c <= 0x0BCD) ||
00313 (c == 0x0BD7) ||
00314 (c >= 0x0C01 && c <= 0x0C03) ||
00315 (c >= 0x0C3E && c <= 0x0C44) ||
00316 (c >= 0x0C46 && c <= 0x0C48) ||
00317 (c >= 0x0C4A && c <= 0x0C4D) ||
00318 (c >= 0x0C55 && c <= 0x0C56) ||
00319 (c >= 0x0C82 && c <= 0x0C83) ||
00320 (c >= 0x0CBE && c <= 0x0CC4) ||
00321 (c >= 0x0CC6 && c <= 0x0CC8) ||
00322 (c >= 0x0CCA && c <= 0x0CCD) ||
00323 (c >= 0x0CD5 && c <= 0x0CD6) ||
00324 (c >= 0x0D02 && c <= 0x0D03) ||
00325 (c >= 0x0D3E && c <= 0x0D43) ||
00326 (c >= 0x0D46 && c <= 0x0D48) ||
00327 (c >= 0x0D4A && c <= 0x0D4D) ||
00328 (c == 0x0D57) ||
00329 (c == 0x0E31) ||
00330 (c >= 0x0E34 && c <= 0x0E3A) ||
00331 (c >= 0x0E47 && c <= 0x0E4E) ||
00332 (c == 0x0EB1) ||
00333 (c >= 0x0EB4 && c <= 0x0EB9) ||
00334 (c >= 0x0EBB && c <= 0x0EBC) ||
00335 (c >= 0x0EC8 && c <= 0x0ECD) ||
00336 (c >= 0x0F18 && c <= 0x0F19) ||
00337 (c == 0x0F35) ||
00338 (c == 0x0F37) ||
00339 (c == 0x0F39) ||
00340 (c == 0x0F3E) ||
00341 (c == 0x0F3F) ||
00342 (c >= 0x0F71 && c <= 0x0F84) ||
00343 (c >= 0x0F86 && c <= 0x0F8B) ||
00344 (c >= 0x0F90 && c <= 0x0F95) ||
00345 (c == 0x0F97) ||
00346 (c >= 0x0F99 && c <= 0x0FAD) ||
00347 (c >= 0x0FB1 && c <= 0x0FB7) ||
00348 (c == 0x0FB9) ||
00349 (c >= 0x20D0 && c <= 0x20DC) ||
00350 (c == 0x20E1) ||
00351 (c >= 0x302A && c <= 0x302F) ||
00352 (c == 0x3099) ||
00353 (c == 0x309A));
00354 }
00355
00356 int
00357 ACEXML_ParserInt::isDigit_i (const ACEXML_Char c)
00358 {
00359 if (c < 0x0040)
00360 return (c >= 0x0030 && c <= 0x0039);
00361 else
00362 return ((c >= 0x0660 && c <= 0x0669) ||
00363 (c >= 0x06F0 && c <= 0x06F9) ||
00364 (c >= 0x0966 && c <= 0x096F) ||
00365 (c >= 0x09E6 && c <= 0x09EF) ||
00366 (c >= 0x0A66 && c <= 0x0A6F) ||
00367 (c >= 0x0AE6 && c <= 0x0AEF) ||
00368 (c >= 0x0B66 && c <= 0x0B6F) ||
00369 (c >= 0x0BE7 && c <= 0x0BEF) ||
00370 (c >= 0x0C66 && c <= 0x0C6F) ||
00371 (c >= 0x0CE6 && c <= 0x0CEF) ||
00372 (c >= 0x0D66 && c <= 0x0D6F) ||
00373 (c >= 0x0E50 && c <= 0x0E59) ||
00374 (c >= 0x0ED0 && c <= 0x0ED9) ||
00375 (c >= 0x0F20 && c <= 0x0F29));
00376 }
00377
00378 int
00379 ACEXML_ParserInt::isExtender_i (const ACEXML_Char c)
00380 {
00381
00382 switch (c)
00383 {
00384 case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387: case 0x0640:
00385 case 0x0E46: case 0x0EC6: case 0x3005: case 0x3031: case 0x3032:
00386 case 0x3033: case 0x3034: case 0x3035: case 0x309D: case 0x309E:
00387 case 0x30FC: case 0x30FD: case 0x30FE:
00388 return 1;
00389 default:
00390 return 0;
00391 }
00392 }
00393
00394 #endif