Encoding.cpp

Go to the documentation of this file.
00001 // -*- C++ -*-  Encoding.cpp,v 1.3 2003/11/07 20:27:29 shuston Exp
00002 
00003 #include "ACEXML/common/Encoding.h"
00004 #include "ace/OS_NS_string.h"
00005 
00006 const ACEXML_Char* ACEXML_Encoding::encoding_names_[8] = {
00007   ACE_TEXT ("UCS-4BE"),
00008   ACE_TEXT ("UCS-4LE"),
00009   ACE_TEXT ("UCS-4_2143"),
00010   ACE_TEXT ("UCS-4_3412"),
00011   ACE_TEXT ("UTF-16"),
00012   ACE_TEXT ("UTF-16"),
00013   ACE_TEXT ("UTF-8"),
00014   ACE_TEXT ("Unsupported Encoding")
00015 };
00016 
00017 const ACEXML_UTF8 ACEXML_Encoding::byte_order_mark_[][4] = {
00018   { '\x00', '\x00', '\xFE', '\xFF' }, // UCS-4, big-endian  (1234 order)
00019   { '\xFF', '\xFE', '\x00', '\x00' }, // UCS-4, little-endian  (4321 order)
00020   { '\x00', '\x00', '\xFF', '\xFE' }, // UCS-4, unusual octet order (2143)
00021   { '\xFE', '\xFF', '\x00', '\x00' }, // UCS-4, unusual octet order (3412)
00022   { '\xFE', '\xFF', '\xFF', '\xFF' }, // UTF-16, big-endian (3 & 4 != 0)
00023   { '\xFF', '\xFE', '\xFF', '\xFF' }, // UTF-16, little-endian ( 3 & 4 != 0)
00024   { '\xEF', '\xBB', '\xBF', '\xFF' }  // UTF-8
00025 };
00026 
00027 const ACEXML_UTF8 ACEXML_Encoding::magic_values_[][4] = {
00028   { '\x00', '\x00', '\x00', '\x3c' }, //
00029   { '\x3c', '\x00', '\x00', '\x00' }, // UCS-4 and variants
00030   { '\x00', '\x00', '\x3c', '\x00' }, //
00031   { '\x00', '\x3c', '\x00', '\x00' }, //
00032   { '\x00', '\x3c', '\x00', '\x3f' }, // UTF-16BE
00033   { '\x3c', '\x00', '\x3f', '\x00' }, // UTF-16LE
00034   { '\x3c', '\x3f', '\x78', '\x6d' }, // UTF-8
00035 };
00036 
00037 const ACEXML_Char*
00038 ACEXML_Encoding::get_encoding (const char* input)
00039 {
00040   if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16BE][0], input, 2) == 0)
00041       && (input[2] != 0 || input[3] != 0)) // 3 & 4 should not be both zero
00042     return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE];
00043   else if ((ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF16LE][0], input, 2) == 0)
00044     && (input[2] != 0 || input[3] != 0)) // 3 & 4 should not be both zero
00045     return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE];
00046   else if (ACE_OS::memcmp (&ACEXML_Encoding::byte_order_mark_[ACEXML_Encoding::UTF8][0], input, 3) == 0)
00047     return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
00048   else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16BE][0], input, 4) == 0)
00049     return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16BE];
00050   else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF16LE][0], input, 4) == 0)
00051     return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF16LE];
00052   else if (ACE_OS::memcmp (&ACEXML_Encoding::magic_values_[ACEXML_Encoding::UTF8][0], input, 4) == 0)
00053     return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
00054   else
00055     {
00056       // ACE_ERROR ((LM_ERROR, "Unknown encoding. Assuming UTF-8\n"));
00057       return ACEXML_Encoding::encoding_names_[ACEXML_Encoding::UTF8];
00058     }
00059 }

Generated on Thu Nov 9 11:45:36 2006 for ACEXML by doxygen 1.3.6