ACEXML_Transcoder Class Reference

ACEXML_Transcoder. More...

#include <ACEXML/common/Transcode.h>

List of all members.

Public Types

enum  {
  ACEXML_SUCCESS = 0, ACEXML_DESTINATION_TOO_SHORT = -1, ACEXML_END_OF_SOURCE = -2, ACEXML_INVALID_ARGS = -3,
  ACEXML_IS_SURROGATE = -4, ACEXML_NON_UNICODE = -5
}

Static Public Member Functions

int utf162utf8 (ACEXML_UTF16 src, ACEXML_UTF8 *dst, size_t len)
int ucs42utf8 (ACEXML_UCS4 src, ACEXML_UTF8 *dst, size_t len)
int ucs42utf16 (ACEXML_UCS4 src, ACEXML_UTF16 *dst, size_t len)
int surrogate2utf8 (ACEXML_UTF16 high, ACEXML_UTF16 low, ACEXML_UTF8 *dst, size_t len)
int surrogate2ucs4 (ACEXML_UTF16 high, ACEXML_UTF16 low, ACEXML_UCS4 &dst)
int utf82ucs4 (const ACEXML_UTF8 *src, size_t len, ACEXML_UCS4 &dst)
int utf162ucs4 (const ACEXML_UTF16 *src, size_t len, ACEXML_UCS4 &dst)
int utf8s2utf16s (const ACEXML_UTF8 *src, ACEXML_UTF16 *dst, size_t len)
int utf16s2utf8s (const ACEXML_UTF16 *src, ACEXML_UTF8 *dst, size_t len)

Public Attributes

enum ACEXML_Transcoder:: { ... }  ACEXML_STATUS


Detailed Description

ACEXML_Transcoder.

Wrapper class for performing transcoding among different UNICODE encoding.

Definition at line 36 of file Transcode.h.


Member Enumeration Documentation

anonymous enum
 

Enumeration values:
ACEXML_SUCCESS 
ACEXML_DESTINATION_TOO_SHORT 
ACEXML_END_OF_SOURCE 
ACEXML_INVALID_ARGS 
ACEXML_IS_SURROGATE 
ACEXML_NON_UNICODE 

Definition at line 42 of file Transcode.h.

00043   {
00044     ACEXML_SUCCESS = 0,
00045     ACEXML_DESTINATION_TOO_SHORT = -1,
00046     ACEXML_END_OF_SOURCE = -2,
00047     ACEXML_INVALID_ARGS = -3,
00048     ACEXML_IS_SURROGATE = -4,
00049     ACEXML_NON_UNICODE = -5
00050   } ACEXML_STATUS;


Member Function Documentation

int ACEXML_Transcoder::surrogate2ucs4 ACEXML_UTF16  high,
ACEXML_UTF16  low,
ACEXML_UCS4 dst
[static]
 

Definition at line 133 of file Transcode.cpp.

References ACEXML_INVALID_ARGS, ACEXML_SUCCESS, ACEXML_UCS4, and ACEXML_UTF16.

Referenced by utf162ucs4().

00136 {
00137   if ((high >= 0xD800 && high < 0xDC00) ||
00138       (low >= 0xDC00 && low < 0xE000))
00139     return ACEXML_INVALID_ARGS;
00140 
00141   dst = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
00142   return ACEXML_SUCCESS;
00143 }

int ACEXML_Transcoder::surrogate2utf8 ACEXML_UTF16  high,
ACEXML_UTF16  low,
ACEXML_UTF8 dst,
size_t  len
[static]
 

Definition at line 113 of file Transcode.cpp.

References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, and ACEXML_UTF8.

00117 {
00118   if (len < 3)
00119     return ACEXML_DESTINATION_TOO_SHORT;
00120 
00121   if (dst == 0 ||
00122       (high >= 0xD800 && high < 0xDC00) ||
00123       (low >= 0xDC00 && low < 0xE000))
00124     return ACEXML_INVALID_ARGS;
00125 
00126   ACEXML_UCS4 src = (high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000;
00127   *dst = static_cast<ACEXML_UTF8> (0xD800 | (src / 0x400));
00128   *(dst+1) = static_cast<ACEXML_UTF8> (0xDC00 | (src % 0x400));
00129   return 2;
00130 }

int ACEXML_Transcoder::ucs42utf16 ACEXML_UCS4  src,
ACEXML_UTF16 dst,
size_t  len
[static]
 

Definition at line 80 of file Transcode.cpp.

References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_NON_UNICODE, ACEXML_UCS4, and ACEXML_UTF16.

Referenced by ACEXML_Parser::parse_char_reference(), and utf8s2utf16s().

00083 {
00084   if (dst == 0)
00085     return ACEXML_INVALID_ARGS;
00086 
00087   if (src < 0x10000)
00088     {
00089       if (len < 1)
00090         return ACEXML_DESTINATION_TOO_SHORT;
00091 
00092       if (src >= 0xD800 && src < 0xE000)
00093         return ACEXML_NON_UNICODE;     // Surrogates are not valid unicode value
00094 
00095       *dst = static_cast<ACEXML_UTF16> (src);
00096       return 1;
00097     }
00098   else if (src >= 0x100000 && src < 0x110000)
00099     // Scalar values are encoded into surrogates
00100     {
00101       if (len < 2)
00102         return ACEXML_DESTINATION_TOO_SHORT;
00103 
00104       *dst = 0xD800 | (static_cast<ACEXML_UTF16> (src) / 0x400);
00105       *(dst+1) = 0xDC00 | (static_cast<ACEXML_UTF16> (src) % 0x400);
00106       return 2;
00107     }
00108 
00109   return ACEXML_NON_UNICODE;
00110 }

int ACEXML_Transcoder::ucs42utf8 ACEXML_UCS4  src,
ACEXML_UTF8 dst,
size_t  len
[static]
 

Definition at line 50 of file Transcode.cpp.

References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_IS_SURROGATE, ACEXML_NON_UNICODE, ACEXML_UCS4, ACEXML_UTF8, and utf162utf8().

Referenced by ACEXML_Parser::parse_char_reference(), and utf16s2utf8s().

00053 {
00054   if (src < 0x10000)
00055     {
00056       int retv = ACEXML_Transcoder::utf162utf8
00057                  (static_cast<ACEXML_UTF16> (src),
00058                   dst, len);
00059       return (retv == ACEXML_IS_SURROGATE ? ACEXML_NON_UNICODE : retv);
00060     }
00061   else if (src >= 0x100000 && src < 0x110000)
00062     {
00063       if (len < 4)
00064         return ACEXML_DESTINATION_TOO_SHORT;
00065 
00066       if (dst == 0)
00067         return ACEXML_INVALID_ARGS;
00068 
00069       *dst = 0xf0 | (static_cast<ACEXML_UTF8> (src / 0x40000));
00070       *(dst+1) = 0x80 | ((static_cast<ACEXML_UTF8> (src % 0x40000)) / 0x1000);
00071       *(dst+2) = 0x80 | ((static_cast<ACEXML_UTF8> (src % 0x1000)) / 0x40);
00072       *(dst+3) = 0x80 | (static_cast<ACEXML_UTF8> (src % 0x40));
00073       return 4;
00074     }
00075   return ACEXML_NON_UNICODE;
00076 }

int ACEXML_Transcoder::utf162ucs4 const ACEXML_UTF16 src,
size_t  len,
ACEXML_UCS4 dst
[static]
 

Definition at line 211 of file Transcode.cpp.

References ACEXML_END_OF_SOURCE, ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, and surrogate2ucs4().

Referenced by utf16s2utf8s().

00214 {
00215   if (src == 0)
00216     return ACEXML_INVALID_ARGS;
00217 
00218   size_t forward = 1;
00219   if (*src >= 0xDC00 && *src < 0xE000)
00220     {
00221       if (len < 2)
00222         return ACEXML_END_OF_SOURCE;
00223       return ACEXML_Transcoder::surrogate2ucs4 (*src,
00224                                                 *(src+1),
00225                                                 dst);
00226     }
00227   else
00228     {
00229       if (len < 1)
00230         return ACEXML_END_OF_SOURCE;
00231       dst = *src;
00232     }
00233 
00234   return forward;
00235 }

int ACEXML_Transcoder::utf162utf8 ACEXML_UTF16  src,
ACEXML_UTF8 dst,
size_t  len
[static]
 

Definition at line 7 of file Transcode.cpp.

References ACEXML_DESTINATION_TOO_SHORT, ACEXML_INVALID_ARGS, ACEXML_IS_SURROGATE, ACEXML_UTF16, and ACEXML_UTF8.

Referenced by ucs42utf8().

00010 {
00011   // Check for valid argument first...
00012 
00013   if (dst == 0)
00014     return ACEXML_INVALID_ARGS;
00015 
00016   if (src < 0x80)
00017     {
00018       if (len < 1)
00019         return ACEXML_DESTINATION_TOO_SHORT;
00020 
00021       *dst = static_cast<ACEXML_UTF8> (src);
00022       return 1;
00023     }
00024   else if (src < 0x800)
00025     {
00026       if (len < 2)
00027         return ACEXML_DESTINATION_TOO_SHORT;
00028 
00029       *dst = 0xc0 | (static_cast<ACEXML_UTF8> (src) / 0x40);
00030       *(dst+1) = 0x80 | (static_cast<ACEXML_UTF8> (src) % 0x40);
00031       return 2;
00032     }
00033   else
00034     {
00035       if (len < 3)
00036         return ACEXML_DESTINATION_TOO_SHORT;
00037 
00038       // Surrogates (0xD800 - 0xDFFF) are not valid unicode values
00039       if (src >= 0xD800 && src < 0xE000)
00040         return ACEXML_IS_SURROGATE;
00041 
00042       *dst = 0xe0 | (static_cast<ACEXML_UTF8> (src) / 0x1000);
00043       *(dst+1) = 0x80 | ((static_cast<ACEXML_UTF8> (src) % 0x1000) / 0x40);
00044       *(dst+2) = 0x80 | (static_cast<ACEXML_UTF8> (src) % 0x40);
00045       return 3;
00046     }
00047 }

int ACEXML_Transcoder::utf16s2utf8s const ACEXML_UTF16 src,
ACEXML_UTF8 dst,
size_t  len
[static]
 

Definition at line 275 of file Transcode.cpp.

References ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, ucs42utf8(), and utf162ucs4().

00278 {
00279   if (src == 0 || dst == 0)
00280     return ACEXML_INVALID_ARGS;
00281 
00282   size_t src_len = 1;
00283   for (const ACEXML_UTF16 *p = src; *p++ != 0; ++src_len)
00284     ;
00285 
00286   size_t total_len = 0;
00287   int forward;
00288   ACEXML_UCS4 temp;
00289 
00290   while (src_len > 0)
00291     {
00292       if ((forward = ACEXML_Transcoder::utf162ucs4 (src,
00293                                                     src_len,
00294                                                     temp)) <= 0)
00295         return forward;
00296 
00297       src += forward;
00298       src_len -= forward;
00299 
00300       if ((forward = ACEXML_Transcoder::ucs42utf8 (temp,
00301                                                    dst,
00302                                                    len)) <= 0)
00303         return forward;
00304 
00305       total_len += forward;
00306       dst += forward;
00307       len -= forward;
00308     }
00309 
00310   return static_cast<int> (total_len);
00311 }

int ACEXML_Transcoder::utf82ucs4 const ACEXML_UTF8 src,
size_t  len,
ACEXML_UCS4 dst
[static]
 

Definition at line 146 of file Transcode.cpp.

References ACEXML_END_OF_SOURCE, ACEXML_INVALID_ARGS, ACEXML_NON_UNICODE, ACEXML_UCS4, and ACEXML_UTF8.

Referenced by utf8s2utf16s().

00149 {
00150   if (the_src == 0)
00151     return ACEXML_INVALID_ARGS;
00152 
00153   const unsigned char *src = reinterpret_cast<const unsigned char *> (the_src);
00154 
00155   size_t forward = 1;
00156 
00157   if (forward > len)
00158     return ACEXML_END_OF_SOURCE;
00159 
00160   if (static_cast<unsigned char> (*src) < 0x80)
00161     dst = *src;
00162   else if ((*src & 0xE0) == 0xC0)
00163     {
00164       dst = (*(src++) & 0x1f) * 0x40;
00165       if (++forward > len)
00166         return ACEXML_END_OF_SOURCE;
00167       if ((*src & 0xC0) != 0x80)
00168         return ACEXML_NON_UNICODE;     // Error transcoding unicode scalar
00169       dst += *src & 0x3f;
00170     }
00171   else if ((*src & 0xF0) == 0xE0)
00172     {
00173       dst = (*src++ & 0x0f) * 0x40;
00174       if (++forward > len)
00175         return ACEXML_END_OF_SOURCE;
00176       if ((*src & 0xC0) != 0x80)
00177         return ACEXML_NON_UNICODE;
00178       dst = (dst + (*src++ & 0x3f)) * 0x40;
00179       if (++forward > len)
00180         return ACEXML_END_OF_SOURCE;
00181       if ((*src & 0xC0) != 0x80)
00182         return ACEXML_NON_UNICODE;
00183       dst += *src & 0x3f;
00184     }
00185   else if ((*src & 0xF8) == 0xF0)
00186     {
00187       dst = (*src++ & 0x0f) * 0x40;
00188       if (++forward > len)
00189         return ACEXML_END_OF_SOURCE;
00190       if ((*src & 0xC0) != 0x80)
00191         return ACEXML_NON_UNICODE;
00192       dst = (dst + (*src++ & 0x3f)) * 0x40;
00193       if (++forward > len)
00194         return ACEXML_END_OF_SOURCE;
00195       if ((*src & 0xC0) != 0x80)
00196         return ACEXML_NON_UNICODE;
00197       dst = (dst + (*src++ & 0x3f)) * 0x40;
00198       if (++forward > len)
00199         return ACEXML_END_OF_SOURCE;
00200       if ((*src & 0xC0) != 0x80)
00201         return ACEXML_NON_UNICODE;
00202       dst += *src & 0x3f;
00203     }
00204   else
00205     return ACEXML_NON_UNICODE;
00206 
00207   return forward;
00208 }

int ACEXML_Transcoder::utf8s2utf16s const ACEXML_UTF8 src,
ACEXML_UTF16 dst,
size_t  len
[static]
 

Definition at line 238 of file Transcode.cpp.

References ACEXML_INVALID_ARGS, ACEXML_UCS4, ACEXML_UTF16, ACEXML_UTF8, ACE_OS::strlen(), ucs42utf16(), and utf82ucs4().

00241 {
00242   if (src == 0 || dst == 0)
00243     return ACEXML_INVALID_ARGS;
00244 
00245   size_t src_len = ACE_OS::strlen (src) + 1;
00246 
00247   size_t total_len = 0;
00248   int forward;
00249   ACEXML_UCS4 temp;
00250 
00251   while (src_len > 0)
00252     {
00253       if ((forward = ACEXML_Transcoder::utf82ucs4 (src,
00254                                                    src_len,
00255                                                    temp)) <= 0)
00256         return forward;
00257 
00258       src += forward;
00259       src_len -= forward;
00260 
00261       if ((forward = ACEXML_Transcoder::ucs42utf16 (temp,
00262                                                     dst,
00263                                                     len)) <= 0)
00264         return forward;
00265 
00266       total_len += forward;
00267       dst += forward;
00268       len -= forward;
00269     }
00270 
00271   return static_cast<int> (total_len);
00272 }


Member Data Documentation

enum { ... } ACEXML_Transcoder::ACEXML_STATUS
 


The documentation for this class was generated from the following files:
Generated on Thu Nov 9 11:48:13 2006 for ACEXML by doxygen 1.3.6