00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "tao/Codeset/UTF16_BOM_Translator.h"
00016 #include "ace/OS_Memory.h"
00017 #include "tao/debug.h"
00018 #include "ace/Log_Msg.h"
00019
00020 ACE_RCSID (Codeset,
00021 TAO_UTF16_BOM_Translator,
00022 "$Id: UTF16_BOM_Translator.cpp 82545 2008-08-06 19:15:30Z parsons $")
00023
00024
00025
00026
00027
00028 typedef ACE_CDR::UShort ACE_UTF16_T;
00029 static const size_t ACE_UTF16_CODEPOINT_SIZE = sizeof (ACE_UTF16_T);
00030 static const unsigned short ACE_UNICODE_BOM_CORRECT = 0xFEFFU;
00031 static const unsigned short ACE_UNICODE_BOM_SWAPPED = 0xFFFEU;
00032
00033 TAO_BEGIN_VERSIONED_NAMESPACE_DECL
00034
00035
00036
00037
00038 TAO_UTF16_BOM_Translator::TAO_UTF16_BOM_Translator (bool forceBE)
00039 : forceBE_(forceBE)
00040 {
00041 if (TAO_debug_level > 1)
00042 ACE_DEBUG((LM_DEBUG,
00043 ACE_TEXT ("TAO (%P|%t) - UTF16_BOM_Translator: ")
00044 ACE_TEXT("forceBE %d\n"), this->forceBE_ ? 1:0 ));
00045 }
00046
00047 TAO_UTF16_BOM_Translator::~TAO_UTF16_BOM_Translator (void)
00048 {
00049 }
00050
00051
00052 ACE_CDR::Boolean
00053 TAO_UTF16_BOM_Translator::read_wchar (ACE_InputCDR &cdr, ACE_CDR::WChar &x)
00054 {
00055 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1 &&
00056 static_cast<ACE_CDR::Short> (this->minor_version (cdr)) == 2)
00057 {
00058 ACE_CDR::Octet len;
00059 if (! this->read_1 (cdr, &len))
00060 return 0;
00061
00062 if (len == 2)
00063 {
00064 ACE_CDR::Short sx;
00065
00066 if (!this->read_array (cdr,
00067 reinterpret_cast<char *> (&sx), 1,1,2))
00068 return 0;
00069
00070 #if defined (ACE_LITTLE_ENDIAN)
00071 ACE_CDR::Short ux;
00072 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&sx),
00073 reinterpret_cast<char *> (&ux));
00074 x = static_cast<ACE_CDR::WChar> (ux);
00075 #else
00076 x = static_cast<ACE_CDR::WChar> (sx);
00077 #endif // ACE_LITTLE_ENDIAN
00078 return 1;
00079 }
00080
00081 ACE_UTF16_T buf[2];
00082 if (len != 4 || !this->read_array (cdr,
00083 reinterpret_cast<char *> (buf),
00084 1,1,4))
00085 return 0;
00086
00087 if (buf[0] == ACE_UNICODE_BOM_CORRECT ||
00088 buf[0] == ACE_UNICODE_BOM_SWAPPED)
00089 {
00090
00091
00092 if (buf[0] == ACE_UNICODE_BOM_SWAPPED)
00093 {
00094 ACE_CDR::Short ux;
00095 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&buf[1]),
00096 reinterpret_cast<char *> (&ux));
00097 x = static_cast<ACE_CDR::WChar> (ux);
00098 }
00099 else
00100 x = static_cast<ACE_CDR::WChar> (buf[1]);
00101 return 1;
00102 }
00103
00104
00105 return 0;
00106 }
00107
00108 ACE_UTF16_T sx;
00109 if (this->read_2 (cdr, &sx))
00110 {
00111 x = static_cast<ACE_CDR::WChar> (sx);
00112 return 1;
00113 }
00114 return 0;
00115 }
00116
00117 ACE_CDR::Boolean
00118 TAO_UTF16_BOM_Translator::read_wstring (ACE_InputCDR &cdr,
00119 ACE_CDR::WChar *&x)
00120 {
00121 ACE_CDR::ULong len;
00122 if (!this->read_4 (cdr, &len))
00123 return 0;
00124
00125
00126
00127
00128 if (len > 0 && len <= cdr.length ())
00129 {
00130 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
00131 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
00132 {
00133 len /= ACE_UTF16_CODEPOINT_SIZE;
00134
00135
00136 ACE_NEW_RETURN (x,
00137 ACE_CDR::WChar [len + 1],
00138 0);
00139
00140 x[len] = L'\x00';
00141 if (this->read_wchar_array_i (cdr, x, len, 1))
00142 {
00143
00144
00145 x[len] = L'\x00';
00146 return 1;
00147 }
00148 }
00149 else
00150 {
00151 ACE_NEW_RETURN (x,
00152 ACE_CDR::WChar [len],
00153 0);
00154 if (this->read_wchar_array (cdr, x, len))
00155 return 1;
00156 }
00157 delete [] x;
00158 }
00159 else if (len == 0)
00160 {
00161
00162
00163 ACE_NEW_RETURN (x,
00164 ACE_CDR::WChar[1],
00165 0);
00166 x[0] = '\x00';
00167 return 1;
00168 }
00169 x = 0;
00170 return 0;
00171 }
00172
00173 ACE_CDR::Boolean
00174 TAO_UTF16_BOM_Translator::read_wchar_array_i (ACE_InputCDR & cdr,
00175 ACE_CDR::WChar *x,
00176 ACE_CDR::ULong &length,
00177 int adjust_len)
00178 {
00179 int has_bom = 0;
00180 int must_swap = 0;
00181 char* buf;
00182 static const size_t align = ACE_CDR::SHORT_ALIGN;
00183 if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf) == 0)
00184 {
00185
00186 ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);
00187 if (*sb == ACE_UNICODE_BOM_CORRECT || *sb == ACE_UNICODE_BOM_SWAPPED)
00188 {
00189 must_swap = (*sb == ACE_UNICODE_BOM_SWAPPED);
00190 has_bom = 1;
00191 }
00192 else
00193 {
00194 #if defined (ACE_LITTLE_ENDIAN)
00195 must_swap = 1;
00196 #endif // ACE_LITTLE_ENDIAN
00197 }
00198
00199 if (has_bom)
00200 {
00201 buf += ACE_UTF16_CODEPOINT_SIZE;
00202 ++sb;
00203
00204 if (adjust_len)
00205 length -= 1;
00206 }
00207
00208 for (size_t i = 0; i < length; ++i)
00209 #if defined (ACE_DISABLE_SWAP_ON_READ)
00210 x[i] = static_cast<ACE_CDR::WChar> (sb[i]);
00211 #else
00212 if (!must_swap)
00213 {
00214 x[i] = static_cast<ACE_CDR::WChar> (sb[i]);
00215 }
00216 else
00217 {
00218 ACE_CDR::UShort sx;
00219 ACE_CDR::swap_2 (&buf[i*2], reinterpret_cast<char *> (&sx));
00220 x[i] = static_cast<ACE_CDR::WChar> (sx);
00221 }
00222 #endif
00223
00224 if (has_bom && !adjust_len)
00225 {
00226 cdr.adjust (ACE_UTF16_CODEPOINT_SIZE, align, buf);
00227 }
00228 return 1;
00229 }
00230 return 0;
00231 }
00232
00233 ACE_CDR::Boolean
00234 TAO_UTF16_BOM_Translator::read_wchar_array (ACE_InputCDR & cdr,
00235 ACE_CDR::WChar *x,
00236 ACE_CDR::ULong length)
00237 {
00238 if (length == 0)
00239 return 1;
00240
00241 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
00242 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
00243 {
00244 for (size_t i = 0; i < length; ++i)
00245 if (!this->read_wchar (cdr, x[i]))
00246 return 0;
00247
00248 return 1;
00249 }
00250 else
00251 return this->read_wchar_array_i (cdr, x, length);
00252 }
00253
00254 ACE_CDR::Boolean
00255 TAO_UTF16_BOM_Translator::write_wchar (ACE_OutputCDR &cdr,
00256 ACE_CDR::WChar x)
00257 {
00258 return this->write_wchar_i (cdr, x, true);
00259 }
00260
00261 ACE_CDR::Boolean
00262 TAO_UTF16_BOM_Translator::write_wchar_i (ACE_OutputCDR &cdr,
00263 ACE_CDR::WChar x,
00264 bool allow_BOM)
00265 {
00266 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
00267 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
00268 {
00269 int len = 0;
00270 ACE_CDR::UShort buffer[2];
00271
00272 if( allow_BOM && cdr.byte_order())
00273 {
00274 len = 2;
00275 #if defined (ACE_LITTLE_ENDIAN)
00276 if (this->forceBE_)
00277 {
00278
00279 buffer[0] = ACE_UNICODE_BOM_SWAPPED;
00280 ACE_CDR::swap_2 (reinterpret_cast<const char *> (&x),
00281 reinterpret_cast<char *> (&buffer[1]));
00282 }
00283 else
00284 #endif
00285 {
00286
00287 buffer[0] = ACE_UNICODE_BOM_CORRECT;
00288 buffer[1] = static_cast<ACE_CDR::Short> (x);
00289 }
00290 }
00291 else
00292 {
00293
00294
00295 len = 1;
00296 if (cdr.byte_order ())
00297 ACE_CDR::swap_2 (reinterpret_cast<const char *> (&x),
00298 reinterpret_cast<char *> (buffer));
00299 else
00300 buffer[0] = static_cast<ACE_CDR::Short> (x);
00301 }
00302
00303 unsigned char tcsize =
00304 static_cast<unsigned char> (len * ACE_UTF16_CODEPOINT_SIZE);
00305
00306 if (this->write_1 (cdr, &tcsize))
00307 return this->write_array(cdr, &buffer, tcsize, 1, 1);
00308 else
00309 return 0;
00310 }
00311 else if (static_cast<ACE_CDR::Short> (this->minor_version (cdr)) != 0)
00312 {
00313
00314 ACE_UTF16_T sx = static_cast<ACE_UTF16_T> (x);
00315 return this->write_2 (cdr, &sx);
00316 }
00317 else
00318 {
00319 errno = EINVAL;
00320 return 0;
00321 }
00322 }
00323
00324 ACE_CDR::Boolean
00325 TAO_UTF16_BOM_Translator::write_wstring (ACE_OutputCDR & cdr,
00326 ACE_CDR::ULong len,
00327 const ACE_CDR::WChar *x)
00328 {
00329
00330 ACE_ASSERT (x != 0 || len == 0);
00331 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
00332 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
00333 {
00334 if (len == 0)
00335
00336 return this->write_4(cdr, &len);
00337
00338 if (this->forceBE_ && cdr.byte_order())
00339 {
00340 ACE_CDR::ULong l = (len+1) *
00341 static_cast<ACE_CDR::ULong> (
00342 ACE_UTF16_CODEPOINT_SIZE);
00343 if (this->write_4 (cdr, &l) &&
00344 this->write_2 (cdr, &ACE_UNICODE_BOM_SWAPPED) &&
00345 x != 0)
00346 return this->write_swapped_wchar_array_i (cdr, x, len);
00347 }
00348 else
00349 {
00350 ACE_CDR::ULong l = (len+1) *
00351 static_cast<ACE_CDR::ULong> (
00352 ACE_UTF16_CODEPOINT_SIZE);
00353 if (this->write_4 (cdr, &l) &&
00354 this->write_2 (cdr, &ACE_UNICODE_BOM_CORRECT) &&
00355 x != 0)
00356 return this->write_wchar_array_i (cdr, x, len);
00357 }
00358 }
00359 else
00360 {
00361
00362 ACE_CDR::ULong l = len + 1;
00363
00364 if (this->write_4 (cdr, &l))
00365 {
00366 if (x != 0)
00367 {
00368 return this->write_wchar_array_i (cdr, x, len + 1);
00369 }
00370 else
00371 {
00372 ACE_UTF16_T s = 0;
00373 return this->write_2 (cdr,&s);
00374 }
00375 }
00376 }
00377
00378 return 0;
00379 }
00380
00381 ACE_CDR::Boolean
00382 TAO_UTF16_BOM_Translator::write_wchar_array (ACE_OutputCDR & cdr,
00383 const ACE_CDR::WChar *x,
00384 ACE_CDR::ULong length)
00385 {
00386 if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
00387 && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
00388 {
00389 for (size_t i = 0; i < length; ++i)
00390 if (this->write_wchar_i (cdr, x[i], false) == 0)
00391 return 0;
00392
00393 return 1;
00394 }
00395
00396 return this->write_wchar_array_i (cdr, x, length);
00397 }
00398
00399 ACE_CDR::Boolean
00400 TAO_UTF16_BOM_Translator::write_wchar_array_i (ACE_OutputCDR & cdr,
00401 const ACE_CDR::WChar *x,
00402 ACE_CDR::ULong length)
00403 {
00404 if (length == 0)
00405 return 1;
00406 char* buf;
00407 static const size_t align = ACE_CDR::SHORT_ALIGN;
00408 if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf)
00409 != 0)
00410 {
00411 return 0;
00412 }
00413
00414 ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);
00415
00416 for (size_t i = 0; i < length; ++i)
00417 {
00418 sb[i] = static_cast<ACE_UTF16_T> (x[i]);
00419 }
00420 return 1;
00421
00422 }
00423
00424 ACE_CDR::Boolean
00425 TAO_UTF16_BOM_Translator::write_swapped_wchar_array_i (ACE_OutputCDR & cdr,
00426 const ACE_CDR::WChar *x,
00427 ACE_CDR::ULong length)
00428 {
00429 if (length == 0)
00430 return 1;
00431 char* buf;
00432 static const size_t align = ACE_CDR::SHORT_ALIGN;
00433 if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf)
00434 != 0)
00435 {
00436 return 0;
00437 }
00438
00439 ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);
00440
00441 for (size_t i = 0; i < length; ++i)
00442 {
00443 ACE_CDR::swap_2 (reinterpret_cast<const char*> (&x[i]),
00444 reinterpret_cast<char *> (&sb[i]));
00445 }
00446 return 1;
00447 }
00448
00449 TAO_END_VERSIONED_NAMESPACE_DECL