ParserInternals.cpp

Go to the documentation of this file.
00001 // ParserInternals.cpp,v 1.2 2003/05/30 03:31:49 kitty Exp
00002 
00003 #include "ACEXML/parser/parser/ParserInternals.h"
00004 
00005 
00006 const ACEXML_Char* ACEXML_ParserInt::predef_ent_[] = {
00007   ACE_TEXT ("amp"),
00008   ACE_TEXT ("lt"),
00009   ACE_TEXT ("gt"),
00010   ACE_TEXT ("apos"),
00011   ACE_TEXT ("quot")
00012 };
00013 
00014 const ACEXML_Char* ACEXML_ParserInt::predef_val_[] = {
00015   ACE_TEXT ("&"),
00016   ACE_TEXT ("<"),
00017   ACE_TEXT (">"),
00018   ACE_TEXT ("'"),
00019   ACE_TEXT ("\"")
00020 };
00021 
00022 // Optimize away the most common cases. Any compiler worth it's salt should
00023 // give generate a single memory access.
00024 
00025 const ACEXML_Char ACEXML_ParserInt::base_char_table_[] = {
00026   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0000 - 0x000F */
00027   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0010 - 0x001F */
00028   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0020 - 0x002F */
00029   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0030 - 0x003F */
00030   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0040 - 0x004F */
00031   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0050 - 0x005F */
00032   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x0060 - 0x006F */
00033   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 0x0070 - 0x007F */
00034   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0080 - 0x008F */
00035   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x0090 - 0x009F */
00036   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00A0 - 0x00AF */
00037   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00B0 - 0x00BF */
00038   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00C0 - 0x00CF */
00039   1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00D0 - 0x00DF */
00040   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00E0 - 0x00EF */
00041   1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x00F0 - 0x00FF */
00042 };
00043 
00044 #if defined (ACE_USES_WCHAR)
00045 
00046 int
00047 ACEXML_ParserInt::isBasechar_i (const ACEXML_Char c)
00048 {
00049   if (c < 0x0100)
00050     return ACEXML_ParserInt::base_char_table_[c];
00051   else if (c < 0x0905)
00052     return ((c >= 0x0100 && c <= 0x0131) ||
00053             (c >= 0x0134 && c <= 0x013E) ||
00054             (c >= 0x0141 && c <= 0x0148) ||
00055             (c >= 0x014A && c <= 0x017E) ||
00056             (c >= 0x0180 && c <= 0x01C3) ||
00057             (c >= 0x01CD && c <= 0x01F0) ||
00058             (c >= 0x01F4 && c <= 0x01F5) ||
00059             (c >= 0x01FA && c <= 0x0217) ||
00060             (c >= 0x0250 && c <= 0x02A8) ||
00061             (c >= 0x02BB && c <= 0x02C1) ||
00062             (c == 0x0386) ||
00063             (c >= 0x0388 && c <= 0x038A) ||
00064             (c == 0x038C) ||
00065             (c >= 0x038E && c <= 0x03A1) ||
00066             (c >= 0x03A3 && c <= 0x03CE) ||
00067             (c >= 0x03D0 && c <= 0x03D6) ||
00068             (c == 0x03DA) ||
00069             (c == 0x03DC) ||
00070             (c == 0x03DE) ||
00071             (c == 0x03E0) ||
00072             (c >= 0x03E2 && c <= 0x03F3) ||
00073             (c >= 0x0401 && c <= 0x040C) ||
00074             (c >= 0x040E && c <= 0x044F) ||
00075             (c >= 0x0451 && c <= 0x045C) ||
00076             (c >= 0x045E && c <= 0x0481) ||
00077             (c >= 0x0490 && c <= 0x04C4) ||
00078             (c >= 0x04C7 && c <= 0x04C8) ||
00079             (c >= 0x04CB && c <= 0x04CC) ||
00080             (c >= 0x04D0 && c <= 0x04EB) ||
00081             (c >= 0x04EE && c <= 0x04F5) ||
00082             (c >= 0x04F8 && c <= 0x04F9) ||
00083             (c >= 0x0531 && c <= 0x0556) ||
00084             (c == 0x0559) ||
00085             (c >= 0x0561 && c <= 0x0586) ||
00086             (c >= 0x05D0 && c <= 0x05EA) ||
00087             (c >= 0x05F0 && c <= 0x05F2) ||
00088             (c >= 0x0621 && c <= 0x063A) ||
00089             (c >= 0x0641 && c <= 0x064A) ||
00090             (c >= 0x0671 && c <= 0x06B7) ||
00091             (c >= 0x06BA && c <= 0x06BE) ||
00092             (c >= 0x06C0 && c <= 0x06CE) ||
00093             (c >= 0x06D0 && c <= 0x06D3) ||
00094             (c == 0x06D5) ||
00095             (c >= 0x06E5 && c <= 0x06E6));
00096   else if (c < 0x10A0)
00097     return ((c >= 0x0905 && c <= 0x0939) ||
00098             (c == 0x093D) ||
00099             (c >= 0x0958 && c <= 0x0961) ||
00100             (c >= 0x0985 && c <= 0x098C) ||
00101             (c >= 0x098F && c <= 0x0990) ||
00102             (c >= 0x0993 && c <= 0x09A8) ||
00103             (c >= 0x09AA && c <= 0x09B0) ||
00104             (c == 0x09B2) ||
00105             (c >= 0x09B6 && c <= 0x09B9) ||
00106             (c >= 0x09DC && c <= 0x09DD) ||
00107             (c >= 0x09DF && c <= 0x09E1) ||
00108             (c >= 0x09F0 && c <= 0x09F1) ||
00109             (c >= 0x0A05 && c <= 0x0A0A) ||
00110             (c >= 0x0A0F && c <= 0x0A10) ||
00111             (c >= 0x0A13 && c <= 0x0A28) ||
00112             (c >= 0x0A2A && c <= 0x0A30) ||
00113             (c >= 0x0A32 && c <= 0x0A33) ||
00114             (c >= 0x0A35 && c <= 0x0A36) ||
00115             (c >= 0x0A38 && c <= 0x0A39) ||
00116             (c >= 0x0A59 && c <= 0x0A5C) ||
00117             (c == 0x0A5E) ||
00118             (c >= 0x0A72 && c <= 0x0A74) ||
00119             (c >= 0x0A85 && c <= 0x0A8B) ||
00120             (c == 0x0A8D) ||
00121             (c >= 0x0A8F && c <= 0x0A91) ||
00122             (c >= 0x0A93 && c <= 0x0AA8) ||
00123             (c >= 0x0AAA && c <= 0x0AB0) ||
00124             (c >= 0x0AB2 && c <= 0x0AB3) ||
00125             (c >= 0x0AB5 && c <= 0x0AB9) ||
00126             (c == 0x0ABD || c == 0x0AE0) ||
00127             (c >= 0x0B05 && c <= 0x0B0C) ||
00128             (c >= 0x0B0F && c <= 0x0B10) ||
00129             (c >= 0x0B13 && c <= 0x0B28) ||
00130             (c >= 0x0B2A && c <= 0x0B30) ||
00131             (c >= 0x0B32 && c <= 0x0B33) ||
00132             (c >= 0x0B36 && c <= 0x0B39) ||
00133             (c == 0x0B3D) ||
00134             (c >= 0x0B5C && c <= 0x0B5D) ||
00135             (c >= 0x0B5F && c <= 0x0B61) ||
00136             (c >= 0x0B85 && c <= 0x0B8A) ||
00137             (c >= 0x0B8E && c <= 0x0B90) ||
00138             (c >= 0x0B92 && c <= 0x0B95) ||
00139             (c >= 0x0B99 && c <= 0x0B9A) ||
00140             (c == 0x0B9C) ||
00141             (c >= 0x0B9E && c <= 0x0B9F) ||
00142             (c >= 0x0BA3 && c <= 0x0BA4) ||
00143             (c >= 0x0BA8 && c <= 0x0BAA) ||
00144             (c >= 0x0BAE && c <= 0x0BB5) ||
00145             (c >= 0x0BB7 && c <= 0x0BB9) ||
00146             (c >= 0x0C05 && c <= 0x0C0C) ||
00147             (c >= 0x0C0E && c <= 0x0C10) ||
00148             (c >= 0x0C12 && c <= 0x0C28) ||
00149             (c >= 0x0C2A && c <= 0x0C33) ||
00150             (c >= 0x0C35 && c <= 0x0C39) ||
00151             (c >= 0x0C60 && c <= 0x0C61) ||
00152             (c >= 0x0C85 && c <= 0x0C8C) ||
00153             (c >= 0x0C8E && c <= 0x0C90) ||
00154             (c >= 0x0C92 && c <= 0x0CA8) ||
00155             (c >= 0x0CAA && c <= 0x0CB3) ||
00156             (c >= 0x0CB5 && c <= 0x0CB9) ||
00157             (c == 0x0CDE) ||
00158             (c >= 0x0CE0 && c <= 0x0CE1) ||
00159             (c >= 0x0D05 && c <= 0x0D0C) ||
00160             (c >= 0x0D0E && c <= 0x0D10) ||
00161             (c >= 0x0D12 && c <= 0x0D28) ||
00162             (c >= 0x0D2A && c <= 0x0D39) ||
00163             (c >= 0x0D60 && c <= 0x0D61) ||
00164             (c >= 0x0E01 && c <= 0x0E2E) ||
00165             (c == 0x0E30) ||
00166             (c >= 0x0E32 && c <= 0x0E33) ||
00167             (c >= 0x0E40 && c <= 0x0E45) ||
00168             (c >= 0x0E81 && c <= 0x0E82) ||
00169             (c == 0x0E84) ||
00170             (c >= 0x0E87 && c <= 0x0E88) ||
00171             (c == 0x0E8A || c == 0x0E8D) ||
00172             (c >= 0x0E94 && c <= 0x0E97) ||
00173             (c >= 0x0E99 && c <= 0x0E9F) ||
00174             (c >= 0x0EA1 && c <= 0x0EA3) ||
00175             (c == 0x0EA5 || c == 0x0EA7) ||
00176             (c >= 0x0EAA && c <= 0x0EAB) ||
00177             (c >= 0x0EAD && c <= 0x0EAE) ||
00178             (c == 0x0EB0) ||
00179             (c >= 0x0EB2 && c <= 0x0EB3) ||
00180             (c == 0x0EBD) ||
00181             (c >= 0x0EC0 && c <= 0x0EC4) ||
00182             (c >= 0x0F40 && c <= 0x0F47) ||
00183             (c >= 0x0F49 && c <= 0x0F69));
00184   else
00185     return ((c >= 0x10A0 && c <= 0x10C5) ||
00186             (c >= 0x10D0 && c <= 0x10F6) ||
00187             (c == 0x1100) ||
00188             (c >= 0x1102 && c <= 0x1103) ||
00189             (c >= 0x1105 && c <= 0x1107) ||
00190             (c == 0x1109) ||
00191             (c >= 0x110B && c <= 0x110C) ||
00192             (c >= 0x110E && c <= 0x1112) ||
00193             (c == 0x113C || c == 0x113E || c == 0x1140) ||
00194             (c == 0x114C || c == 0x114E || c == 0x1150) ||
00195             (c >= 0x1154 && c <= 0x1155) ||
00196             (c == 0x1159) ||
00197             (c >= 0x115F && c <= 0x1161) ||
00198             (c == 0x1163) ||
00199             (c == 0x1165) ||
00200             (c == 0x1167) ||
00201             (c == 0x1169) ||
00202             (c >= 0x116D && c <= 0x116E) ||
00203             (c >= 0x1172 && c <= 0x1173) ||
00204             (c == 0x1175) ||
00205             (c == 0x119E) ||
00206             (c == 0x11A8) ||
00207             (c == 0x11AB) ||
00208             (c >= 0x11AE && c <= 0x11AF) ||
00209             (c >= 0x11B7 && c <= 0x11B8) ||
00210             (c == 0x11BA) ||
00211             (c >= 0x11BC && c <= 0x11C2) ||
00212             (c == 0x11EB) ||
00213             (c == 0x11F0) ||
00214             (c == 0x11F9) ||
00215             (c >= 0x1E00 && c <= 0x1E9B) ||
00216             (c >= 0x1EA0 && c <= 0x1EF9) ||
00217             (c >= 0x1F00 && c <= 0x1F15) ||
00218             (c >= 0x1F18 && c <= 0x1F1D) ||
00219             (c >= 0x1F20 && c <= 0x1F45) ||
00220             (c >= 0x1F48 && c <= 0x1F4D) ||
00221             (c >= 0x1F50 && c <= 0x1F57) ||
00222             (c == 0x1F59) ||
00223             (c == 0x1F5B) ||
00224             (c == 0x1F5D) ||
00225             (c >= 0x1F5F && c <= 0x1F7D) ||
00226             (c >= 0x1F80 && c <= 0x1FB4) ||
00227             (c >= 0x1FB6 && c <= 0x1FBC) ||
00228             (c == 0x1FBE) ||
00229             (c >= 0x1FC2 && c <= 0x1FC4) ||
00230             (c >= 0x1FC6 && c <= 0x1FCC) ||
00231             (c >= 0x1FD0 && c <= 0x1FD3) ||
00232             (c >= 0x1FD6 && c <= 0x1FDB) ||
00233             (c >= 0x1FE0 && c <= 0x1FEC) ||
00234             (c >= 0x1FF2 && c <= 0x1FF4) ||
00235             (c >= 0x1FF6 && c <= 0x1FFC) ||
00236             (c == 0x2126) ||
00237             (c >= 0x212A && c <= 0x212B) ||
00238             (c == 0x212E) ||
00239             (c >= 0x2180 && c <= 0x2182) ||
00240             (c >= 0x3041 && c <= 0x3094) ||
00241             (c >= 0x30A1 && c <= 0x30FA) ||
00242             (c >= 0x3105 && c <= 0x312C) ||
00243             (c >= 0xAC00 && c <= 0xD7A3));
00244 };
00245 
00246 int
00247 ACEXML_ParserInt::isIdeographic_i (const ACEXML_Char c)
00248 {
00249   return ((c >= 0x4E00 && c <= 0x9FA5) ||
00250           (c == 3007) ||
00251           (c >= 0x3021 && c <= 0x3029));
00252 }
00253 
00254 int
00255 ACEXML_ParserInt::isCombiningchar_i (const ACEXML_Char c)
00256 {
00257   if (c < 0x0901)
00258     return ((c >= 0x0300 && c <= 0x0345) ||
00259             (c >= 0x0360 && c <= 0x0361) ||
00260             (c >= 0x0483 && c <= 0x0486) ||
00261             (c >= 0x0591 && c <= 0x05A1) ||
00262             (c >= 0x05A3 && c <= 0x05B9) ||
00263             (c >= 0x05BB && c <= 0x05BD) ||
00264             (c == 0x05BF) ||
00265             (c >= 0x05C1 && c <= 0x05C2) ||
00266             (c == 0x05C4) ||
00267             (c >= 0x064B && c <= 0x0652) ||
00268             (c == 0x0670) ||
00269             (c >= 0x06D6 && c <= 0x06DC) ||
00270             (c >= 0x06DD && c <= 0x06DF) ||
00271             (c >= 0x06E0 && c <= 0x06E4) ||
00272             (c >= 0x06E7 && c <= 0x06E8) ||
00273             (c >= 0x06EA && c <= 0x06ED));
00274   else
00275     return ((c >= 0x0901 && c <= 0x0903) ||
00276             (c == 0x093C) ||
00277             (c >= 0x093E && c <= 0x094C) ||
00278             (c == 0x094D) ||
00279             (c >= 0x0951 && c <= 0x0954) ||
00280             (c >= 0x0962 && c <= 0x0963) ||
00281             (c >= 0x0981 && c <= 0x0983) ||
00282             (c == 0x09BC) ||
00283             (c == 0x09BE) ||
00284             (c == 0x09BF) ||
00285             (c >= 0x09C0 && c <= 0x09C4) ||
00286             (c >= 0x09C7 && c <= 0x09C8) ||
00287             (c >= 0x09CB && c <= 0x09CD) ||
00288             (c == 0x09D7) ||
00289             (c >= 0x09E2 && c <= 0x09E3) ||
00290             (c == 0x0A02) ||
00291             (c == 0x0A3C) ||
00292             (c == 0x0A3E) ||
00293             (c == 0x0A3F) ||
00294             (c >= 0x0A40 && c <= 0x0A42) ||
00295             (c >= 0x0A47 && c <= 0x0A48) ||
00296             (c >= 0x0A4B && c <= 0x0A4D) ||
00297             (c >= 0x0A70 && c <= 0x0A71) ||
00298             (c >= 0x0A81 && c <= 0x0A83) ||
00299             (c == 0x0ABC) ||
00300             (c >= 0x0ABE && c <= 0x0AC5) ||
00301             (c >= 0x0AC7 && c <= 0x0AC9) ||
00302             (c >= 0x0ACB && c <= 0x0ACD) ||
00303             (c >= 0x0B01 && c <= 0x0B03) ||
00304             (c == 0x0B3C) ||
00305             (c >= 0x0B3E && c <= 0x0B43) ||
00306             (c >= 0x0B47 && c <= 0x0B48) ||
00307             (c >= 0x0B4B && c <= 0x0B4D) ||
00308             (c >= 0x0B56 && c <= 0x0B57) ||
00309             (c >= 0x0B82 && c <= 0x0B83) ||
00310             (c >= 0x0BBE && c <= 0x0BC2) ||
00311             (c >= 0x0BC6 && c <= 0x0BC8) ||
00312             (c >= 0x0BCA && c <= 0x0BCD) ||
00313             (c == 0x0BD7) ||
00314             (c >= 0x0C01 && c <= 0x0C03) ||
00315             (c >= 0x0C3E && c <= 0x0C44) ||
00316             (c >= 0x0C46 && c <= 0x0C48) ||
00317             (c >= 0x0C4A && c <= 0x0C4D) ||
00318             (c >= 0x0C55 && c <= 0x0C56) ||
00319             (c >= 0x0C82 && c <= 0x0C83) ||
00320             (c >= 0x0CBE && c <= 0x0CC4) ||
00321             (c >= 0x0CC6 && c <= 0x0CC8) ||
00322             (c >= 0x0CCA && c <= 0x0CCD) ||
00323             (c >= 0x0CD5 && c <= 0x0CD6) ||
00324             (c >= 0x0D02 && c <= 0x0D03) ||
00325             (c >= 0x0D3E && c <= 0x0D43) ||
00326             (c >= 0x0D46 && c <= 0x0D48) ||
00327             (c >= 0x0D4A && c <= 0x0D4D) ||
00328             (c == 0x0D57) ||
00329             (c == 0x0E31) ||
00330             (c >= 0x0E34 && c <= 0x0E3A) ||
00331             (c >= 0x0E47 && c <= 0x0E4E) ||
00332             (c == 0x0EB1) ||
00333             (c >= 0x0EB4 && c <= 0x0EB9) ||
00334             (c >= 0x0EBB && c <= 0x0EBC) ||
00335             (c >= 0x0EC8 && c <= 0x0ECD) ||
00336             (c >= 0x0F18 && c <= 0x0F19) ||
00337             (c == 0x0F35) ||
00338             (c == 0x0F37) ||
00339             (c == 0x0F39) ||
00340             (c == 0x0F3E) ||
00341             (c == 0x0F3F) ||
00342             (c >= 0x0F71 && c <= 0x0F84) ||
00343             (c >= 0x0F86 && c <= 0x0F8B) ||
00344             (c >= 0x0F90 && c <= 0x0F95) ||
00345             (c == 0x0F97) ||
00346             (c >= 0x0F99 && c <= 0x0FAD) ||
00347             (c >= 0x0FB1 && c <= 0x0FB7) ||
00348             (c == 0x0FB9) ||
00349             (c >= 0x20D0 && c <= 0x20DC) ||
00350             (c == 0x20E1) ||
00351             (c >= 0x302A && c <= 0x302F) ||
00352             (c == 0x3099) ||
00353             (c == 0x309A));
00354 }
00355 
00356 int
00357 ACEXML_ParserInt::isDigit_i (const ACEXML_Char c)
00358 {
00359   if (c < 0x0040)
00360     return (c >= 0x0030 && c <= 0x0039);
00361   else
00362     return ((c >= 0x0660 && c <= 0x0669) ||
00363             (c >= 0x06F0 && c <= 0x06F9) ||
00364             (c >= 0x0966 && c <= 0x096F) ||
00365             (c >= 0x09E6 && c <= 0x09EF) ||
00366             (c >= 0x0A66 && c <= 0x0A6F) ||
00367             (c >= 0x0AE6 && c <= 0x0AEF) ||
00368             (c >= 0x0B66 && c <= 0x0B6F) ||
00369             (c >= 0x0BE7 && c <= 0x0BEF) ||
00370             (c >= 0x0C66 && c <= 0x0C6F) ||
00371             (c >= 0x0CE6 && c <= 0x0CEF) ||
00372             (c >= 0x0D66 && c <= 0x0D6F) ||
00373             (c >= 0x0E50 && c <= 0x0E59) ||
00374             (c >= 0x0ED0 && c <= 0x0ED9) ||
00375             (c >= 0x0F20 && c <= 0x0F29));
00376 }
00377 
00378 int
00379 ACEXML_ParserInt::isExtender_i (const ACEXML_Char c)
00380 {
00381   // The compiler should generate a jump table and index into it directly.
00382   switch (c)
00383     {
00384       case 0x00B7: case 0x02D0: case 0x02D1: case 0x0387: case 0x0640:
00385       case 0x0E46: case 0x0EC6: case 0x3005: case 0x3031: case 0x3032:
00386       case 0x3033: case 0x3034: case 0x3035: case 0x309D: case 0x309E:
00387       case 0x30FC: case 0x30FD: case 0x30FE:
00388         return 1;
00389       default:
00390         return 0;
00391     }
00392 }
00393 
00394 #endif /* ACE_USES_WCHAR */

Generated on Thu Nov 9 11:45:38 2006 for ACEXML by doxygen 1.3.6