00001 // -*- C++ -*- 00002 00003 //============================================================================= 00004 /** 00005 * @file Transcode.h 00006 * 00007 * This file declares functions to convert char string among different 00008 * unicode encoding (utf8, utf16, utf32) 00009 * 00010 * Transcode.h,v 1.11 2005/01/05 14:06:12 jwillemsen Exp 00011 * 00012 * @author Nanbor Wang <nanbor@cs.wustl.edu> 00013 */ 00014 //============================================================================= 00015 00016 #ifndef _ACEXML_TRANSCODE_H_ 00017 #define _ACEXML_TRANSCODE_H_ 00018 00019 #include /**/ "ace/pre.h" 00020 #include "ACEXML/common/ACEXML_Export.h" 00021 00022 #if !defined (ACE_LACKS_PRAGMA_ONCE) 00023 #pragma once 00024 #endif /* ACE_LACKS_PRAGMA_ONCE */ 00025 00026 #include "ACEXML/common/XML_Types.h" 00027 00028 /** 00029 * @class ACEXML_Transcoder Transcode.h "ACEXML/common/Transcode.h" 00030 * 00031 * @brief ACEXML_Transcoder 00032 * 00033 * Wrapper class for performing transcoding among different UNICODE 00034 * encoding. 00035 */ 00036 class ACEXML_Export ACEXML_Transcoder 00037 { 00038 public: 00039 /* 00040 * Status of the conversion function. 00041 */ 00042 enum 00043 { 00044 ACEXML_SUCCESS = 0, 00045 ACEXML_DESTINATION_TOO_SHORT = -1, 00046 ACEXML_END_OF_SOURCE = -2, 00047 ACEXML_INVALID_ARGS = -3, 00048 ACEXML_IS_SURROGATE = -4, 00049 ACEXML_NON_UNICODE = -5 00050 } ACEXML_STATUS; 00051 00052 00053 // The following functions translate a unicode characters 00054 // into different encoding. Return number of characters put into 00055 // destination or consumed from src if success without 00056 // error, otherwise, return corresponding error code. 00057 /* 00058 * Convert a UTF-16 character into a string in UTF-8 encoding. 00059 * 00060 * @return number of characters the function uses to store the 00061 * converted string if it succeeds or one of the error STATUS 00062 * otherwise. 00063 */ 00064 static int utf162utf8 (ACEXML_UTF16 src, 00065 ACEXML_UTF8 *dst, 00066 size_t len); 00067 00068 /* 00069 * Convert a UCS-4 character into a string in UTF-8 encoding. 00070 * 00071 * @return number of characters the function uses to store the 00072 * converted string if it succeeds or one of the error STATUS 00073 * otherwise. 00074 */ 00075 static int ucs42utf8 (ACEXML_UCS4 src, 00076 ACEXML_UTF8 *dst, 00077 size_t len); 00078 00079 /* 00080 * Convert a UCS-4 character into a string in UTF-16 encoding. 00081 * 00082 * @return number of characters the function uses to store the 00083 * converted string if it succeeds or one of the error STATUS 00084 * otherwise. 00085 */ 00086 static int ucs42utf16 (ACEXML_UCS4 src, 00087 ACEXML_UTF16 *dst, 00088 size_t len); 00089 00090 /* 00091 * Convert a UTF-16 surrogate character pair into a string in UTF-8 encoding. 00092 * 00093 * @return number of characters the function uses to store the 00094 * converted string if it succeeds or one of the error STATUS 00095 * otherwise. 00096 */ 00097 static int surrogate2utf8 (ACEXML_UTF16 high, 00098 ACEXML_UTF16 low, 00099 ACEXML_UTF8 *dst, 00100 size_t len); 00101 00102 /* 00103 * Convert a UTF-16 surrogate character pair into a UCS-4 character. 00104 * 00105 * @return SUCCESS if it succeeds or one of the error STATUS 00106 * otherwise. 00107 */ 00108 static int surrogate2ucs4 (ACEXML_UTF16 high, 00109 ACEXML_UTF16 low, 00110 ACEXML_UCS4 &dst); 00111 00112 /* 00113 * Convert the first UNICODE character in a UTF-8 character string 00114 * into a UCS-4 character. 00115 * 00116 * @return number of characters the function consumed from the 00117 * UTF-8 string if it succeeds or one of the error STATUS 00118 * otherwise. 00119 */ 00120 static int utf82ucs4 (const ACEXML_UTF8 *src, 00121 size_t len, 00122 ACEXML_UCS4 &dst); 00123 00124 /* 00125 * Convert the first UNICODE character in a UTF-16 character string 00126 * into a UCS-4 character. 00127 * 00128 * @return number of characters the function consumed from the 00129 * UTF-16 string if it succeeds or one of the error STATUS 00130 * otherwise. 00131 */ 00132 static int utf162ucs4 (const ACEXML_UTF16 *src, 00133 size_t len, 00134 ACEXML_UCS4 &dst); 00135 00136 // static int utf82utf16 (const ACEXML_UTF8 *src, 00137 // size_t len, 00138 // ACEXML_UTF16 &dst); 00139 // This function does not handle surrogates. 00140 00141 // = The following functions are non-inlined: 00142 00143 /* 00144 * Convert a UTF-8 string into a UTF-16 string. 00145 * 00146 * @param len The length of @a dst string. 00147 * 00148 * @return number of characters the function consumed from the 00149 * UTF-8 string if it succeeds or one of the error STATUS 00150 * otherwise. 00151 */ 00152 static int utf8s2utf16s (const ACEXML_UTF8 *src, 00153 ACEXML_UTF16 *dst, 00154 size_t len); 00155 00156 /* 00157 * Convert a UTF-16 string into a UTF-8 string. 00158 * 00159 * @param len The length of @a dst string. 00160 * 00161 * @return number of characters the function uses in 00162 * UTF-8 string if it succeeds or one of the error STATUS 00163 * otherwise. 00164 */ 00165 static int utf16s2utf8s (const ACEXML_UTF16 *src, 00166 ACEXML_UTF8 *dst, 00167 size_t len); 00168 }; 00169 00170 #include /**/ "ace/post.h" 00171 00172 #endif /* _ACEXML_TRANSCODE_H_ */