Transcode.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 //=============================================================================
00004 /**
00005  *  @file    Transcode.h
00006  *
00007  * This file declares functions to convert char string among different
00008  * unicode encoding (utf8, utf16, utf32)
00009  *
00010  *  Transcode.h,v 1.11 2005/01/05 14:06:12 jwillemsen Exp
00011  *
00012  *  @author Nanbor Wang <nanbor@cs.wustl.edu>
00013  */
00014 //=============================================================================
00015 
00016 #ifndef _ACEXML_TRANSCODE_H_
00017 #define _ACEXML_TRANSCODE_H_
00018 
00019 #include /**/ "ace/pre.h"
00020 #include "ACEXML/common/ACEXML_Export.h"
00021 
00022 #if !defined (ACE_LACKS_PRAGMA_ONCE)
00023 #pragma once
00024 #endif /* ACE_LACKS_PRAGMA_ONCE */
00025 
00026 #include "ACEXML/common/XML_Types.h"
00027 
00028 /**
00029  * @class ACEXML_Transcoder Transcode.h "ACEXML/common/Transcode.h"
00030  *
00031  * @brief ACEXML_Transcoder
00032  *
00033  * Wrapper class for performing transcoding among different UNICODE
00034  * encoding.
00035  */
00036 class ACEXML_Export ACEXML_Transcoder
00037 {
00038 public:
00039   /*
00040    * Status of the conversion function.
00041    */
00042   enum
00043   {
00044     ACEXML_SUCCESS = 0,
00045     ACEXML_DESTINATION_TOO_SHORT = -1,
00046     ACEXML_END_OF_SOURCE = -2,
00047     ACEXML_INVALID_ARGS = -3,
00048     ACEXML_IS_SURROGATE = -4,
00049     ACEXML_NON_UNICODE = -5
00050   } ACEXML_STATUS;
00051 
00052 
00053   // The following functions translate a unicode characters
00054   // into different encoding.  Return number of characters put into
00055   // destination or consumed from src if success without
00056   // error, otherwise, return corresponding error code.
00057   /*
00058    * Convert a UTF-16 character into a string in UTF-8 encoding.
00059    *
00060    * @return number of characters the function uses to store the
00061    *         converted string if it succeeds or one of the error STATUS
00062    *         otherwise.
00063    */
00064   static int utf162utf8 (ACEXML_UTF16 src,
00065                          ACEXML_UTF8 *dst,
00066                          size_t len);
00067 
00068   /*
00069    * Convert a UCS-4 character into a string in UTF-8 encoding.
00070    *
00071    * @return number of characters the function uses to store the
00072    *         converted string if it succeeds or one of the error STATUS
00073    *         otherwise.
00074    */
00075   static int ucs42utf8 (ACEXML_UCS4 src,
00076                         ACEXML_UTF8 *dst,
00077                         size_t len);
00078 
00079   /*
00080    * Convert a UCS-4 character into a string in UTF-16 encoding.
00081    *
00082    * @return number of characters the function uses to store the
00083    *         converted string if it succeeds or one of the error STATUS
00084    *         otherwise.
00085    */
00086   static int ucs42utf16 (ACEXML_UCS4 src,
00087                          ACEXML_UTF16 *dst,
00088                          size_t len);
00089 
00090   /*
00091    * Convert a UTF-16 surrogate character pair into a string in UTF-8 encoding.
00092    *
00093    * @return number of characters the function uses to store the
00094    *         converted string if it succeeds or one of the error STATUS
00095    *         otherwise.
00096    */
00097   static int surrogate2utf8 (ACEXML_UTF16 high,
00098                              ACEXML_UTF16 low,
00099                              ACEXML_UTF8 *dst,
00100                              size_t len);
00101 
00102   /*
00103    * Convert a UTF-16 surrogate character pair into a UCS-4 character.
00104    *
00105    * @return SUCCESS if it succeeds or one of the error STATUS
00106    *         otherwise.
00107    */
00108   static int surrogate2ucs4 (ACEXML_UTF16 high,
00109                              ACEXML_UTF16 low,
00110                              ACEXML_UCS4 &dst);
00111 
00112   /*
00113    * Convert the first UNICODE character in a UTF-8 character string
00114    * into a UCS-4 character.
00115    *
00116    * @return number of characters the function consumed from the
00117    *         UTF-8 string if it succeeds or one of the error STATUS
00118    *         otherwise.
00119    */
00120   static int utf82ucs4 (const ACEXML_UTF8 *src,
00121                         size_t len,
00122                         ACEXML_UCS4 &dst);
00123 
00124   /*
00125    * Convert the first UNICODE character in a UTF-16 character string
00126    * into a UCS-4 character.
00127    *
00128    * @return number of characters the function consumed from the
00129    *         UTF-16 string if it succeeds or one of the error STATUS
00130    *         otherwise.
00131    */
00132   static int utf162ucs4 (const ACEXML_UTF16 *src,
00133                         size_t len,
00134                         ACEXML_UCS4 &dst);
00135 
00136 //    static int utf82utf16 (const ACEXML_UTF8 *src,
00137 //                           size_t len,
00138 //                           ACEXML_UTF16 &dst);
00139   // This function does not handle surrogates.
00140 
00141   // = The following functions are non-inlined:
00142 
00143   /*
00144    * Convert a UTF-8 string into a UTF-16 string.
00145    *
00146    * @param len The length of @a dst string.
00147    *
00148    * @return number of characters the function consumed from the
00149    *         UTF-8 string if it succeeds or one of the error STATUS
00150    *         otherwise.
00151    */
00152   static int utf8s2utf16s (const ACEXML_UTF8 *src,
00153                            ACEXML_UTF16 *dst,
00154                            size_t len);
00155 
00156   /*
00157    * Convert a UTF-16 string into a UTF-8 string.
00158    *
00159    * @param len The length of @a dst string.
00160    *
00161    * @return number of characters the function uses in
00162    *         UTF-8 string if it succeeds or one of the error STATUS
00163    *         otherwise.
00164    */
00165   static int utf16s2utf8s (const ACEXML_UTF16 *src,
00166                            ACEXML_UTF8 *dst,
00167                            size_t len);
00168 };
00169 
00170 #include /**/ "ace/post.h"
00171 
00172 #endif /* _ACEXML_TRANSCODE_H_ */

Generated on Thu Nov 9 11:45:40 2006 for ACEXML by doxygen 1.3.6