#include <ACEXML/common/Transcode.h>
Public Types | |
| enum | { ACEXML_SUCCESS = 0, ACEXML_DESTINATION_TOO_SHORT = -1, ACEXML_END_OF_SOURCE = -2, ACEXML_INVALID_ARGS = -3, ACEXML_IS_SURROGATE = -4, ACEXML_NON_UNICODE = -5 } |
Static Public Member Functions | |
| int | utf162utf8 (ACEXML_UTF16 src, ACEXML_UTF8 *dst, size_t len) |
| int | ucs42utf8 (ACEXML_UCS4 src, ACEXML_UTF8 *dst, size_t len) |
| int | ucs42utf16 (ACEXML_UCS4 src, ACEXML_UTF16 *dst, size_t len) |
| int | surrogate2utf8 (ACEXML_UTF16 high, ACEXML_UTF16 low, ACEXML_UTF8 *dst, size_t len) |
| int | surrogate2ucs4 (ACEXML_UTF16 high, ACEXML_UTF16 low, ACEXML_UCS4 &dst) |
| int | utf82ucs4 (const ACEXML_UTF8 *src, size_t len, ACEXML_UCS4 &dst) |
| int | utf162ucs4 (const ACEXML_UTF16 *src, size_t len, ACEXML_UCS4 &dst) |
| int | utf8s2utf16s (const ACEXML_UTF8 *src, ACEXML_UTF16 *dst, size_t len) |
| int | utf16s2utf8s (const ACEXML_UTF16 *src, ACEXML_UTF8 *dst, size_t len) |
Public Attributes | |
| enum ACEXML_Transcoder:: { ... } | ACEXML_STATUS |
Wrapper class for performing transcoding among different UNICODE encoding.
Definition at line 36 of file Transcode.h.
|
|
Definition at line 42 of file Transcode.h.
00043 {
00044 ACEXML_SUCCESS = 0,
00045 ACEXML_DESTINATION_TOO_SHORT = -1,
00046 ACEXML_END_OF_SOURCE = -2,
00047 ACEXML_INVALID_ARGS = -3,
00048 ACEXML_IS_SURROGATE = -4,
00049 ACEXML_NON_UNICODE = -5
00050 } ACEXML_STATUS;
|
|
||||||||||||||||
|
Definition at line 133 of file Transcode.cpp. References ACEXML_INVALID_ARGS, ACEXML_SUCCESS, ACEXML_UCS4, and ACEXML_UTF16. Referenced by utf162ucs4().
00136 {
00137 if ((high >= 0xD800 && high < 0xDC00) ||
00138 (low >= 0xDC00 && low < 0xE000))
00139 return ACEXML_INVALID_ARGS;
00140
00141 dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
00142 return ACEXML_SUCCESS;
00143 }
|
|
||||||||||||||||||||
|
Definition at line 113 of file Transcode.cpp. References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, and ACEXML_UTF8.
00117 {
00118 if (len < 3)
00119 return ACEXML_DESTINATION_TOO_SHORT;
00120
00121 if (dst == 0 ||
00122 (high >= 0xD800 && high < 0xDC00) ||
00123 (low >= 0xDC00 && low < 0xE000))
00124 return ACEXML_INVALID_ARGS;
00125
00126 ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
00127 *dst = static_cast<ACEXML_UTF8> (0xD800 | (src / 0x400));
00128 *(dst+1) = static_cast<ACEXML_UTF8> (0xDC00 | (src % 0x400));
00129 return 2;
00130 }
|
|
||||||||||||||||
|
Definition at line 80 of file Transcode.cpp. References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_NON_UNICODE, ACEXML_UCS4, and ACEXML_UTF16. Referenced by ACEXML_Parser::parse_char_reference(), and utf8s2utf16s().
00083 {
00084 if (dst == 0)
00085 return ACEXML_INVALID_ARGS;
00086
00087 if (src < 0x10000)
00088 {
00089 if (len < 1)
00090 return ACEXML_DESTINATION_TOO_SHORT;
00091
00092 if (src >= 0xD800 && src < 0xE000)
00093 return ACEXML_NON_UNICODE; // Surrogates are not valid unicode value
00094
00095 *dst = static_cast<ACEXML_UTF16> (src);
00096 return 1;
00097 }
00098 else if (src >= 0x100000 && src < 0x110000)
00099 // Scalar values are encoded into surrogates
00100 {
00101 if (len < 2)
00102 return ACEXML_DESTINATION_TOO_SHORT;
00103
00104 *dst = 0xD800 | (static_cast<ACEXML_UTF16> (src) / 0x400);
00105 *(dst+1) = 0xDC00 | (static_cast<ACEXML_UTF16> (src) % 0x400);
00106 return 2;
00107 }
00108
00109 return ACEXML_NON_UNICODE;
00110 }
|
|
||||||||||||||||
|
Definition at line 50 of file Transcode.cpp. References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_IS_SURROGATE, ACEXML_NON_UNICODE, ACEXML_UCS4, ACEXML_UTF8, and utf162utf8(). Referenced by ACEXML_Parser::parse_char_reference(), and utf16s2utf8s().
00053 {
00054 if (src < 0x10000)
00055 {
00056 int retv = ACEXML_Transcoder::utf162utf8
00057 (static_cast<ACEXML_UTF16> (src),
00058 dst, len);
00059 return (retv == ACEXML_IS_SURROGATE ? ACEXML_NON_UNICODE : retv);
00060 }
00061 else if (src >= 0x100000 && src < 0x110000)
00062 {
00063 if (len < 4)
00064 return ACEXML_DESTINATION_TOO_SHORT;
00065
00066 if (dst == 0)
00067 return ACEXML_INVALID_ARGS;
00068
00069 *dst = 0xf0 | (static_cast<ACEXML_UTF8> (src / 0x40000));
00070 *(dst+1) = 0x80 | ((static_cast<ACEXML_UTF8> (src % 0x40000)) / 0x1000);
00071 *(dst+2) = 0x80 | ((static_cast<ACEXML_UTF8> (src % 0x1000)) / 0x40);
00072 *(dst+3) = 0x80 | (static_cast<ACEXML_UTF8> (src % 0x40));
00073 return 4;
00074 }
00075 return ACEXML_NON_UNICODE;
00076 }
|
|
||||||||||||||||
|
Definition at line 211 of file Transcode.cpp. References ACEXML_END_OF_SOURCE, ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, and surrogate2ucs4(). Referenced by utf16s2utf8s().
00214 {
00215 if (src == 0)
00216 return ACEXML_INVALID_ARGS;
00217
00218 size_t forward = 1;
00219 if (*src >= 0xDC00 && *src < 0xE000)
00220 {
00221 if (len < 2)
00222 return ACEXML_END_OF_SOURCE;
00223 return ACEXML_Transcoder::surrogate2ucs4 (*src,
00224 *(src+1),
00225 dst);
00226 }
00227 else
00228 {
00229 if (len < 1)
00230 return ACEXML_END_OF_SOURCE;
00231 dst = *src;
00232 }
00233
00234 return forward;
00235 }
|
|
||||||||||||||||
|
Definition at line 7 of file Transcode.cpp. References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_IS_SURROGATE, ACEXML_UTF16, and ACEXML_UTF8. Referenced by ucs42utf8().
00010 {
00011 // Check for valid argument first...
00012
00013 if (dst == 0)
00014 return ACEXML_INVALID_ARGS;
00015
00016 if (src < 0x80)
00017 {
00018 if (len < 1)
00019 return ACEXML_DESTINATION_TOO_SHORT;
00020
00021 *dst = static_cast<ACEXML_UTF8> (src);
00022 return 1;
00023 }
00024 else if (src < 0x800)
00025 {
00026 if (len < 2)
00027 return ACEXML_DESTINATION_TOO_SHORT;
00028
00029 *dst = 0xc0 | (static_cast<ACEXML_UTF8> (src) / 0x40);
00030 *(dst+1) = 0x80 | (static_cast<ACEXML_UTF8> (src) % 0x40);
00031 return 2;
00032 }
00033 else
00034 {
00035 if (len < 3)
00036 return ACEXML_DESTINATION_TOO_SHORT;
00037
00038 // Surrogates (0xD800 - 0xDFFF) are not valid unicode values
00039 if (src >= 0xD800 && src < 0xE000)
00040 return ACEXML_IS_SURROGATE;
00041
00042 *dst = 0xe0 | (static_cast<ACEXML_UTF8> (src) / 0x1000);
00043 *(dst+1) = 0x80 | ((static_cast<ACEXML_UTF8> (src) % 0x1000) / 0x40);
00044 *(dst+2) = 0x80 | (static_cast<ACEXML_UTF8> (src) % 0x40);
00045 return 3;
00046 }
00047 }
|
|
||||||||||||||||
|
Definition at line 275 of file Transcode.cpp. References ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, ucs42utf8(), and utf162ucs4().
00278 {
00279 if (src == 0 || dst == 0)
00280 return ACEXML_INVALID_ARGS;
00281
00282 size_t src_len = 1;
00283 for (const ACEXML_UTF16 *p = src; *p++ != 0; ++src_len)
00284 ;
00285
00286 size_t total_len = 0;
00287 int forward;
00288 ACEXML_UCS4 temp;
00289
00290 while (src_len > 0)
00291 {
00292 if ((forward = ACEXML_Transcoder::utf162ucs4 (src,
00293 src_len,
00294 temp)) <= 0)
00295 return forward;
00296
00297 src += forward;
00298 src_len -= forward;
00299
00300 if ((forward = ACEXML_Transcoder::ucs42utf8 (temp,
00301 dst,
00302 len)) <= 0)
00303 return forward;
00304
00305 total_len += forward;
00306 dst += forward;
00307 len -= forward;
00308 }
00309
00310 return static_cast<int> (total_len);
00311 }
|
|
||||||||||||||||
|
Definition at line 146 of file Transcode.cpp. References ACEXML_END_OF_SOURCE, ACEXML_INVALID_ARGS, ACEXML_NON_UNICODE, ACEXML_UCS4, and ACEXML_UTF8. Referenced by utf8s2utf16s().
00149 {
00150 if (the_src == 0)
00151 return ACEXML_INVALID_ARGS;
00152
00153 const unsigned char *src = reinterpret_cast<const unsigned char *> (the_src);
00154
00155 size_t forward = 1;
00156
00157 if (forward > len)
00158 return ACEXML_END_OF_SOURCE;
00159
00160 if (static_cast<unsigned char> (*src) < 0x80)
00161 dst = *src;
00162 else if ((*src & 0xE0) == 0xC0)
00163 {
00164 dst = (*(src++) & 0x1f) * 0x40;
00165 if (++forward > len)
00166 return ACEXML_END_OF_SOURCE;
00167 if ((*src & 0xC0) != 0x80)
00168 return ACEXML_NON_UNICODE; // Error transcoding unicode scalar
00169 dst += *src & 0x3f;
00170 }
00171 else if ((*src & 0xF0) == 0xE0)
00172 {
00173 dst = (*src++ & 0x0f) * 0x40;
00174 if (++forward > len)
00175 return ACEXML_END_OF_SOURCE;
00176 if ((*src & 0xC0) != 0x80)
00177 return ACEXML_NON_UNICODE;
00178 dst = (dst + (*src++ & 0x3f)) * 0x40;
00179 if (++forward > len)
00180 return ACEXML_END_OF_SOURCE;
00181 if ((*src & 0xC0) != 0x80)
00182 return ACEXML_NON_UNICODE;
00183 dst += *src & 0x3f;
00184 }
00185 else if ((*src & 0xF8) == 0xF0)
00186 {
00187 dst = (*src++ & 0x0f) * 0x40;
00188 if (++forward > len)
00189 return ACEXML_END_OF_SOURCE;
00190 if ((*src & 0xC0) != 0x80)
00191 return ACEXML_NON_UNICODE;
00192 dst = (dst + (*src++ & 0x3f)) * 0x40;
00193 if (++forward > len)
00194 return ACEXML_END_OF_SOURCE;
00195 if ((*src & 0xC0) != 0x80)
00196 return ACEXML_NON_UNICODE;
00197 dst = (dst + (*src++ & 0x3f)) * 0x40;
00198 if (++forward > len)
00199 return ACEXML_END_OF_SOURCE;
00200 if ((*src & 0xC0) != 0x80)
00201 return ACEXML_NON_UNICODE;
00202 dst += *src & 0x3f;
00203 }
00204 else
00205 return ACEXML_NON_UNICODE;
00206
00207 return forward;
00208 }
|
|
||||||||||||||||
|
Definition at line 238 of file Transcode.cpp. References ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, ACE_OS::strlen(), ucs42utf16(), and utf82ucs4().
00241 {
00242 if (src == 0 || dst == 0)
00243 return ACEXML_INVALID_ARGS;
00244
00245 size_t src_len = ACE_OS::strlen (src) + 1;
00246
00247 size_t total_len = 0;
00248 int forward;
00249 ACEXML_UCS4 temp;
00250
00251 while (src_len > 0)
00252 {
00253 if ((forward = ACEXML_Transcoder::utf82ucs4 (src,
00254 src_len,
00255 temp)) <= 0)
00256 return forward;
00257
00258 src += forward;
00259 src_len -= forward;
00260
00261 if ((forward = ACEXML_Transcoder::ucs42utf16 (temp,
00262 dst,
00263 len)) <= 0)
00264 return forward;
00265
00266 total_len += forward;
00267 dst += forward;
00268 len -= forward;
00269 }
00270
00271 return static_cast<int> (total_len);
00272 }
|
|
|
|
1.3.6