#include <ACEXML/common/Transcode.h>
Public Types | |
enum | { ACEXML_SUCCESS = 0, ACEXML_DESTINATION_TOO_SHORT = -1, ACEXML_END_OF_SOURCE = -2, ACEXML_INVALID_ARGS = -3, ACEXML_IS_SURROGATE = -4, ACEXML_NON_UNICODE = -5 } |
Static Public Member Functions | |
int | utf162utf8 (ACEXML_UTF16 src, ACEXML_UTF8 *dst, size_t len) |
int | ucs42utf8 (ACEXML_UCS4 src, ACEXML_UTF8 *dst, size_t len) |
int | ucs42utf16 (ACEXML_UCS4 src, ACEXML_UTF16 *dst, size_t len) |
int | surrogate2utf8 (ACEXML_UTF16 high, ACEXML_UTF16 low, ACEXML_UTF8 *dst, size_t len) |
int | surrogate2ucs4 (ACEXML_UTF16 high, ACEXML_UTF16 low, ACEXML_UCS4 &dst) |
int | utf82ucs4 (const ACEXML_UTF8 *src, size_t len, ACEXML_UCS4 &dst) |
int | utf162ucs4 (const ACEXML_UTF16 *src, size_t len, ACEXML_UCS4 &dst) |
int | utf8s2utf16s (const ACEXML_UTF8 *src, ACEXML_UTF16 *dst, size_t len) |
int | utf16s2utf8s (const ACEXML_UTF16 *src, ACEXML_UTF8 *dst, size_t len) |
Public Attributes | |
enum ACEXML_Transcoder:: { ... } | ACEXML_STATUS |
Wrapper class for performing transcoding among different UNICODE encoding.
Definition at line 36 of file Transcode.h.
|
Definition at line 42 of file Transcode.h.
00043 { 00044 ACEXML_SUCCESS = 0, 00045 ACEXML_DESTINATION_TOO_SHORT = -1, 00046 ACEXML_END_OF_SOURCE = -2, 00047 ACEXML_INVALID_ARGS = -3, 00048 ACEXML_IS_SURROGATE = -4, 00049 ACEXML_NON_UNICODE = -5 00050 } ACEXML_STATUS; |
|
Definition at line 133 of file Transcode.cpp. References ACEXML_INVALID_ARGS, ACEXML_SUCCESS, ACEXML_UCS4, and ACEXML_UTF16. Referenced by utf162ucs4().
00136 { 00137 if ((high >= 0xD800 && high < 0xDC00) || 00138 (low >= 0xDC00 && low < 0xE000)) 00139 return ACEXML_INVALID_ARGS; 00140 00141 dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000; 00142 return ACEXML_SUCCESS; 00143 } |
|
Definition at line 113 of file Transcode.cpp. References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, and ACEXML_UTF8.
00117 { 00118 if (len < 3) 00119 return ACEXML_DESTINATION_TOO_SHORT; 00120 00121 if (dst == 0 || 00122 (high >= 0xD800 && high < 0xDC00) || 00123 (low >= 0xDC00 && low < 0xE000)) 00124 return ACEXML_INVALID_ARGS; 00125 00126 ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000; 00127 *dst = static_cast<ACEXML_UTF8> (0xD800 | (src / 0x400)); 00128 *(dst+1) = static_cast<ACEXML_UTF8> (0xDC00 | (src % 0x400)); 00129 return 2; 00130 } |
|
Definition at line 80 of file Transcode.cpp. References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_NON_UNICODE, ACEXML_UCS4, and ACEXML_UTF16. Referenced by ACEXML_Parser::parse_char_reference(), and utf8s2utf16s().
00083 { 00084 if (dst == 0) 00085 return ACEXML_INVALID_ARGS; 00086 00087 if (src < 0x10000) 00088 { 00089 if (len < 1) 00090 return ACEXML_DESTINATION_TOO_SHORT; 00091 00092 if (src >= 0xD800 && src < 0xE000) 00093 return ACEXML_NON_UNICODE; // Surrogates are not valid unicode value 00094 00095 *dst = static_cast<ACEXML_UTF16> (src); 00096 return 1; 00097 } 00098 else if (src >= 0x100000 && src < 0x110000) 00099 // Scalar values are encoded into surrogates 00100 { 00101 if (len < 2) 00102 return ACEXML_DESTINATION_TOO_SHORT; 00103 00104 *dst = 0xD800 | (static_cast<ACEXML_UTF16> (src) / 0x400); 00105 *(dst+1) = 0xDC00 | (static_cast<ACEXML_UTF16> (src) % 0x400); 00106 return 2; 00107 } 00108 00109 return ACEXML_NON_UNICODE; 00110 } |
|
Definition at line 50 of file Transcode.cpp. References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_IS_SURROGATE, ACEXML_NON_UNICODE, ACEXML_UCS4, ACEXML_UTF8, and utf162utf8(). Referenced by ACEXML_Parser::parse_char_reference(), and utf16s2utf8s().
00053 { 00054 if (src < 0x10000) 00055 { 00056 int retv = ACEXML_Transcoder::utf162utf8 00057 (static_cast<ACEXML_UTF16> (src), 00058 dst, len); 00059 return (retv == ACEXML_IS_SURROGATE ? ACEXML_NON_UNICODE : retv); 00060 } 00061 else if (src >= 0x100000 && src < 0x110000) 00062 { 00063 if (len < 4) 00064 return ACEXML_DESTINATION_TOO_SHORT; 00065 00066 if (dst == 0) 00067 return ACEXML_INVALID_ARGS; 00068 00069 *dst = 0xf0 | (static_cast<ACEXML_UTF8> (src / 0x40000)); 00070 *(dst+1) = 0x80 | ((static_cast<ACEXML_UTF8> (src % 0x40000)) / 0x1000); 00071 *(dst+2) = 0x80 | ((static_cast<ACEXML_UTF8> (src % 0x1000)) / 0x40); 00072 *(dst+3) = 0x80 | (static_cast<ACEXML_UTF8> (src % 0x40)); 00073 return 4; 00074 } 00075 return ACEXML_NON_UNICODE; 00076 } |
|
Definition at line 211 of file Transcode.cpp. References ACEXML_END_OF_SOURCE, ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, and surrogate2ucs4(). Referenced by utf16s2utf8s().
00214 { 00215 if (src == 0) 00216 return ACEXML_INVALID_ARGS; 00217 00218 size_t forward = 1; 00219 if (*src >= 0xDC00 && *src < 0xE000) 00220 { 00221 if (len < 2) 00222 return ACEXML_END_OF_SOURCE; 00223 return ACEXML_Transcoder::surrogate2ucs4 (*src, 00224 *(src+1), 00225 dst); 00226 } 00227 else 00228 { 00229 if (len < 1) 00230 return ACEXML_END_OF_SOURCE; 00231 dst = *src; 00232 } 00233 00234 return forward; 00235 } |
|
Definition at line 7 of file Transcode.cpp. References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_IS_SURROGATE, ACEXML_UTF16, and ACEXML_UTF8. Referenced by ucs42utf8().
00010 { 00011 // Check for valid argument first... 00012 00013 if (dst == 0) 00014 return ACEXML_INVALID_ARGS; 00015 00016 if (src < 0x80) 00017 { 00018 if (len < 1) 00019 return ACEXML_DESTINATION_TOO_SHORT; 00020 00021 *dst = static_cast<ACEXML_UTF8> (src); 00022 return 1; 00023 } 00024 else if (src < 0x800) 00025 { 00026 if (len < 2) 00027 return ACEXML_DESTINATION_TOO_SHORT; 00028 00029 *dst = 0xc0 | (static_cast<ACEXML_UTF8> (src) / 0x40); 00030 *(dst+1) = 0x80 | (static_cast<ACEXML_UTF8> (src) % 0x40); 00031 return 2; 00032 } 00033 else 00034 { 00035 if (len < 3) 00036 return ACEXML_DESTINATION_TOO_SHORT; 00037 00038 // Surrogates (0xD800 - 0xDFFF) are not valid unicode values 00039 if (src >= 0xD800 && src < 0xE000) 00040 return ACEXML_IS_SURROGATE; 00041 00042 *dst = 0xe0 | (static_cast<ACEXML_UTF8> (src) / 0x1000); 00043 *(dst+1) = 0x80 | ((static_cast<ACEXML_UTF8> (src) % 0x1000) / 0x40); 00044 *(dst+2) = 0x80 | (static_cast<ACEXML_UTF8> (src) % 0x40); 00045 return 3; 00046 } 00047 } |
|
Definition at line 275 of file Transcode.cpp. References ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, ucs42utf8(), and utf162ucs4().
00278 { 00279 if (src == 0 || dst == 0) 00280 return ACEXML_INVALID_ARGS; 00281 00282 size_t src_len = 1; 00283 for (const ACEXML_UTF16 *p = src; *p++ != 0; ++src_len) 00284 ; 00285 00286 size_t total_len = 0; 00287 int forward; 00288 ACEXML_UCS4 temp; 00289 00290 while (src_len > 0) 00291 { 00292 if ((forward = ACEXML_Transcoder::utf162ucs4 (src, 00293 src_len, 00294 temp)) <= 0) 00295 return forward; 00296 00297 src += forward; 00298 src_len -= forward; 00299 00300 if ((forward = ACEXML_Transcoder::ucs42utf8 (temp, 00301 dst, 00302 len)) <= 0) 00303 return forward; 00304 00305 total_len += forward; 00306 dst += forward; 00307 len -= forward; 00308 } 00309 00310 return static_cast<int> (total_len); 00311 } |
|
Definition at line 146 of file Transcode.cpp. References ACEXML_END_OF_SOURCE, ACEXML_INVALID_ARGS, ACEXML_NON_UNICODE, ACEXML_UCS4, and ACEXML_UTF8. Referenced by utf8s2utf16s().
00149 { 00150 if (the_src == 0) 00151 return ACEXML_INVALID_ARGS; 00152 00153 const unsigned char *src = reinterpret_cast<const unsigned char *> (the_src); 00154 00155 size_t forward = 1; 00156 00157 if (forward > len) 00158 return ACEXML_END_OF_SOURCE; 00159 00160 if (static_cast<unsigned char> (*src) < 0x80) 00161 dst = *src; 00162 else if ((*src & 0xE0) == 0xC0) 00163 { 00164 dst = (*(src++) & 0x1f) * 0x40; 00165 if (++forward > len) 00166 return ACEXML_END_OF_SOURCE; 00167 if ((*src & 0xC0) != 0x80) 00168 return ACEXML_NON_UNICODE; // Error transcoding unicode scalar 00169 dst += *src & 0x3f; 00170 } 00171 else if ((*src & 0xF0) == 0xE0) 00172 { 00173 dst = (*src++ & 0x0f) * 0x40; 00174 if (++forward > len) 00175 return ACEXML_END_OF_SOURCE; 00176 if ((*src & 0xC0) != 0x80) 00177 return ACEXML_NON_UNICODE; 00178 dst = (dst + (*src++ & 0x3f)) * 0x40; 00179 if (++forward > len) 00180 return ACEXML_END_OF_SOURCE; 00181 if ((*src & 0xC0) != 0x80) 00182 return ACEXML_NON_UNICODE; 00183 dst += *src & 0x3f; 00184 } 00185 else if ((*src & 0xF8) == 0xF0) 00186 { 00187 dst = (*src++ & 0x0f) * 0x40; 00188 if (++forward > len) 00189 return ACEXML_END_OF_SOURCE; 00190 if ((*src & 0xC0) != 0x80) 00191 return ACEXML_NON_UNICODE; 00192 dst = (dst + (*src++ & 0x3f)) * 0x40; 00193 if (++forward > len) 00194 return ACEXML_END_OF_SOURCE; 00195 if ((*src & 0xC0) != 0x80) 00196 return ACEXML_NON_UNICODE; 00197 dst = (dst + (*src++ & 0x3f)) * 0x40; 00198 if (++forward > len) 00199 return ACEXML_END_OF_SOURCE; 00200 if ((*src & 0xC0) != 0x80) 00201 return ACEXML_NON_UNICODE; 00202 dst += *src & 0x3f; 00203 } 00204 else 00205 return ACEXML_NON_UNICODE; 00206 00207 return forward; 00208 } |
|
Definition at line 238 of file Transcode.cpp. References ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, ACE_OS::strlen(), ucs42utf16(), and utf82ucs4().
00241 { 00242 if (src == 0 || dst == 0) 00243 return ACEXML_INVALID_ARGS; 00244 00245 size_t src_len = ACE_OS::strlen (src) + 1; 00246 00247 size_t total_len = 0; 00248 int forward; 00249 ACEXML_UCS4 temp; 00250 00251 while (src_len > 0) 00252 { 00253 if ((forward = ACEXML_Transcoder::utf82ucs4 (src, 00254 src_len, 00255 temp)) <= 0) 00256 return forward; 00257 00258 src += forward; 00259 src_len -= forward; 00260 00261 if ((forward = ACEXML_Transcoder::ucs42utf16 (temp, 00262 dst, 00263 len)) <= 0) 00264 return forward; 00265 00266 total_len += forward; 00267 dst += forward; 00268 len -= forward; 00269 } 00270 00271 return static_cast<int> (total_len); 00272 } |
|
|