Public Member Functions | Private Member Functions | Private Attributes

TAO_UTF16_BOM_Translator Class Reference

Codeset translation specialization - Manages Byte Order Marker. More...

#include <UTF16_BOM_Translator.h>

Inheritance diagram for TAO_UTF16_BOM_Translator:
Inheritance graph
[legend]
Collaboration diagram for TAO_UTF16_BOM_Translator:
Collaboration graph
[legend]

List of all members.

Public Member Functions

 TAO_UTF16_BOM_Translator (bool forceBE)
virtual ~TAO_UTF16_BOM_Translator (void)
 Virtual destruction.
virtual ACE_CDR::Boolean read_wchar (ACE_InputCDR &, ACE_CDR::WChar &)
virtual ACE_CDR::Boolean read_wstring (ACE_InputCDR &, ACE_CDR::WChar *&)
virtual ACE_CDR::Boolean read_wchar_array (ACE_InputCDR &, ACE_CDR::WChar *, ACE_CDR::ULong)
virtual ACE_CDR::Boolean write_wchar (ACE_OutputCDR &, ACE_CDR::WChar)
virtual ACE_CDR::Boolean write_wstring (ACE_OutputCDR &, ACE_CDR::ULong, const ACE_CDR::WChar *)
virtual ACE_CDR::Boolean write_wchar_array (ACE_OutputCDR &, const ACE_CDR::WChar *, ACE_CDR::ULong)
virtual ACE_CDR::ULong ncs ()
virtual ACE_CDR::ULong tcs ()

Private Member Functions

ACE_CDR::Boolean read_wchar_array_i (ACE_InputCDR &, ACE_CDR::WChar *, ACE_CDR::ULong &, int adjust_len=0)
ACE_CDR::Boolean write_wchar_array_i (ACE_OutputCDR &, const ACE_CDR::WChar *, ACE_CDR::ULong)
ACE_CDR::Boolean write_swapped_wchar_array_i (ACE_OutputCDR &cdr, const ACE_CDR::WChar *x, ACE_CDR::ULong length)
ACE_CDR::Boolean write_wchar_i (ACE_OutputCDR &, ACE_CDR::WChar, bool allow_BOM)

Private Attributes

bool forceBE_
 if this flag is true, force wchar's to big endian order

Detailed Description

Codeset translation specialization - Manages Byte Order Marker.

This class performs the codeset translation:

Definition at line 43 of file UTF16_BOM_Translator.h.


Constructor & Destructor Documentation

TAO_UTF16_BOM_Translator::TAO_UTF16_BOM_Translator ( bool  forceBE  ) 

constructor

Parameters:
forceBE,: true forces all wchar, warray, and wstrings to big-endian byte order

Definition at line 38 of file UTF16_BOM_Translator.cpp.

  : forceBE_(forceBE)
{
  if (TAO_debug_level > 1)
    ACE_DEBUG((LM_DEBUG,
               ACE_TEXT ("TAO (%P|%t) - UTF16_BOM_Translator: ")
               ACE_TEXT("forceBE %d\n"), this->forceBE_ ? 1:0 ));
}

TAO_UTF16_BOM_Translator::~TAO_UTF16_BOM_Translator ( void   )  [virtual]

Virtual destruction.

Definition at line 47 of file UTF16_BOM_Translator.cpp.

{
}


Member Function Documentation

virtual ACE_CDR::ULong TAO_UTF16_BOM_Translator::ncs ( void   )  [inline, virtual]

Implements ACE_WChar_Codeset_Translator.

Definition at line 70 of file UTF16_BOM_Translator.h.

{return 0x00010109;}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::read_wchar ( ACE_InputCDR cdr,
ACE_CDR::WChar x 
) [virtual]

Implements ACE_WChar_Codeset_Translator.

Definition at line 53 of file UTF16_BOM_Translator.cpp.

{
  if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1 &&
      static_cast<ACE_CDR::Short> (this->minor_version (cdr)) == 2)
    {
      ACE_CDR::Octet len;
      if (! this->read_1 (cdr, &len))
        return 0;

      if (len == 2) // no BOM present
        {
          ACE_CDR::Short sx;

          if (!this->read_array (cdr,
                                 reinterpret_cast<char *> (&sx), 1,1,2))
            return 0;

#if defined (ACE_LITTLE_ENDIAN)
          ACE_CDR::Short ux;
          ACE_CDR::swap_2 (reinterpret_cast<const char*> (&sx),
                           reinterpret_cast<char *> (&ux));
          x = static_cast<ACE_CDR::WChar> (ux);
#else
          x = static_cast<ACE_CDR::WChar> (sx);
#endif // ACE_LITTLE_ENDIAN
          return 1;
        }

      ACE_UTF16_T buf[2];
      if (len != 4 || !this->read_array (cdr,
                                         reinterpret_cast<char *> (buf),
                                         1,1,4)) // get BO & payload
        return 0;
      // Check for byte order mark, if found, consume and honor it.
      if (buf[0] == ACE_UNICODE_BOM_CORRECT ||
          buf[0] == ACE_UNICODE_BOM_SWAPPED)
        {
          // if we found it, but it came in in the wrong order
          // invert the byte order flag for the duration of this method
          if (buf[0] == ACE_UNICODE_BOM_SWAPPED)
            {
              ACE_CDR::Short ux;
              ACE_CDR::swap_2 (reinterpret_cast<const char*> (&buf[1]),
                               reinterpret_cast<char *> (&ux));
              x = static_cast<ACE_CDR::WChar> (ux);
            }
          else
            x = static_cast<ACE_CDR::WChar> (buf[1]);
          return 1;
        }
      // What do we do here? The length is > 2 but the first word
      // is not a BOM. Just return an error I suppose
      return 0;
    }

  ACE_UTF16_T sx;
  if (this->read_2 (cdr, &sx))
    {
      x = static_cast<ACE_CDR::WChar> (sx);
      return 1;
    }
  return 0;
}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::read_wchar_array ( ACE_InputCDR cdr,
ACE_CDR::WChar x,
ACE_CDR::ULong  length 
) [virtual]

Implements ACE_WChar_Codeset_Translator.

Definition at line 234 of file UTF16_BOM_Translator.cpp.

{
  if (length == 0)
    return 1;

  if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
      && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
    {
      for (size_t i = 0; i < length; ++i)
        if (!this->read_wchar (cdr, x[i]))
          return 0;

      return 1;
    }
  else
    return this->read_wchar_array_i (cdr, x, length);
}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::read_wchar_array_i ( ACE_InputCDR cdr,
ACE_CDR::WChar x,
ACE_CDR::ULong length,
int  adjust_len = 0 
) [private]

Definition at line 174 of file UTF16_BOM_Translator.cpp.

{
  int has_bom = 0;
  int must_swap = 0;
  char* buf;
  static const size_t align = ACE_CDR::SHORT_ALIGN;
  if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf) == 0)
    {
      // check for byte order mark.  If found, honor it then discard it
      ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);
      if (*sb == ACE_UNICODE_BOM_CORRECT || *sb == ACE_UNICODE_BOM_SWAPPED)
        {
          must_swap = (*sb == ACE_UNICODE_BOM_SWAPPED);
          has_bom = 1;
        }
      else
        {
#if defined (ACE_LITTLE_ENDIAN)
          must_swap = 1;
#endif // ACE_LITTLE_ENDIAN
        }

      if (has_bom)
        {
          buf += ACE_UTF16_CODEPOINT_SIZE;
          ++sb;

          if (adjust_len)
            length -= 1;
        }

      for (size_t i = 0; i < length; ++i)
#if defined (ACE_DISABLE_SWAP_ON_READ)
        x[i] = static_cast<ACE_CDR::WChar> (sb[i]);
#else
      if (!must_swap)
        {
          x[i] = static_cast<ACE_CDR::WChar> (sb[i]);
        }
      else
        {
          ACE_CDR::UShort sx;
          ACE_CDR::swap_2 (&buf[i*2], reinterpret_cast<char *> (&sx));
          x[i] = static_cast<ACE_CDR::WChar> (sx);
        }
#endif /* ACE_DISABLE_SWAP_ON_READ */

      if (has_bom && !adjust_len)
        {
          cdr.adjust (ACE_UTF16_CODEPOINT_SIZE, align, buf);
        }
      return 1;
    }
  return 0;
}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::read_wstring ( ACE_InputCDR cdr,
ACE_CDR::WChar *&  x 
) [virtual]

Implements ACE_WChar_Codeset_Translator.

Definition at line 118 of file UTF16_BOM_Translator.cpp.

{
  ACE_CDR::ULong len;
  if (!this->read_4 (cdr, &len))
    return 0;

  // A check for the length being too great is done later in the
  // call to read_char_array but we want to have it done before
  // the memory is allocated.
  if (len > 0 && len <= cdr.length ())
    {
      if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
          && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
        {
          len /= ACE_UTF16_CODEPOINT_SIZE;

          //allocating one extra for the null character needed by applications
          ACE_NEW_RETURN (x,
                          ACE_CDR::WChar [len + 1],
                          0);

          x[len] = L'\x00';
          if (this->read_wchar_array_i (cdr, x, len, 1))
            {
              // Since reading the array may have adjusted the length,
              // we simply rewrite the null terminator
              x[len] = L'\x00';
              return 1;
            }
        }
      else
        {
          ACE_NEW_RETURN (x,
                          ACE_CDR::WChar [len],
                          0);
          if (this->read_wchar_array (cdr, x, len))
            return 1;
        }
      delete [] x;
    }
  else if (len == 0)
    {
      // Convert any null strings to empty strings since empty
      // strings can cause crashes. (See bug 58.)
      ACE_NEW_RETURN (x,
                      ACE_CDR::WChar[1],
                      0);
      x[0] = '\x00';
      return 1;
    }
  x = 0;
  return 0;
}

virtual ACE_CDR::ULong TAO_UTF16_BOM_Translator::tcs (  )  [inline, virtual]

Implements ACE_WChar_Codeset_Translator.

Definition at line 71 of file UTF16_BOM_Translator.h.

{return 0x00010109;}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::write_swapped_wchar_array_i ( ACE_OutputCDR cdr,
const ACE_CDR::WChar x,
ACE_CDR::ULong  length 
) [private]

Definition at line 425 of file UTF16_BOM_Translator.cpp.

{
  if (length == 0)
    return 1;
  char* buf;
  static const size_t align = ACE_CDR::SHORT_ALIGN;
  if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf)
      != 0)
    {
      return 0;
    }

  ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);

  for (size_t i = 0; i < length; ++i)
    {
      ACE_CDR::swap_2 (reinterpret_cast<const char*> (&x[i]),
                       reinterpret_cast<char *> (&sb[i]));
    }
  return 1;
}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::write_wchar ( ACE_OutputCDR cdr,
ACE_CDR::WChar  x 
) [virtual]

Implements ACE_WChar_Codeset_Translator.

Definition at line 255 of file UTF16_BOM_Translator.cpp.

{
  return this->write_wchar_i (cdr, x, true);
}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::write_wchar_array ( ACE_OutputCDR cdr,
const ACE_CDR::WChar x,
ACE_CDR::ULong  length 
) [virtual]

Implements ACE_WChar_Codeset_Translator.

Definition at line 382 of file UTF16_BOM_Translator.cpp.

{
  if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
      && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
    {
      for (size_t i = 0; i < length; ++i)
        if (this->write_wchar_i (cdr, x[i], false) == 0)
          return 0;

      return 1;
    }

  return this->write_wchar_array_i (cdr, x, length);
}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::write_wchar_array_i ( ACE_OutputCDR cdr,
const ACE_CDR::WChar x,
ACE_CDR::ULong  length 
) [private]

Definition at line 400 of file UTF16_BOM_Translator.cpp.

{
  if (length == 0)
    return 1;
  char* buf;
  static const size_t align = ACE_CDR::SHORT_ALIGN;
  if (cdr.adjust (ACE_UTF16_CODEPOINT_SIZE * length, align, buf)
      != 0)
    {
      return 0;
    }

  ACE_UTF16_T *sb = reinterpret_cast<ACE_UTF16_T *> (buf);

  for (size_t i = 0; i < length; ++i)
    {
      sb[i] = static_cast<ACE_UTF16_T> (x[i]);
    }
  return 1;

}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::write_wchar_i ( ACE_OutputCDR cdr,
ACE_CDR::WChar  x,
bool  allow_BOM 
) [private]

Definition at line 262 of file UTF16_BOM_Translator.cpp.

{
  if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
      && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
    {
      int len = 0;
      ACE_CDR::UShort buffer[2];

      if( allow_BOM && cdr.byte_order())
        {
          len = 2;
#if defined (ACE_LITTLE_ENDIAN)
          if (this->forceBE_)
            {
              // force both the byte order mark and the data to Big Endian order
              buffer[0] = ACE_UNICODE_BOM_SWAPPED;
              ACE_CDR::swap_2 (reinterpret_cast<const char *> (&x),
                               reinterpret_cast<char *> (&buffer[1]));
            }
          else
#endif
            {
              // store both the byte order mark and the data in native order
              buffer[0] = ACE_UNICODE_BOM_CORRECT;
              buffer[1] = static_cast<ACE_CDR::Short> (x);
            }
        }
      else
        {
          // not using a byte order mark
          // force it to be big endian w/o BOM
          len = 1;
          if (cdr.byte_order ())
            ACE_CDR::swap_2 (reinterpret_cast<const char *> (&x),
                             reinterpret_cast<char *> (buffer));
          else
            buffer[0] = static_cast<ACE_CDR::Short> (x);
        }

      unsigned char tcsize =
        static_cast<unsigned char> (len * ACE_UTF16_CODEPOINT_SIZE);

      if (this->write_1 (cdr, &tcsize))
        return this->write_array(cdr, &buffer, tcsize, 1, 1);
      else
        return 0;
    }
  else if (static_cast<ACE_CDR::Short> (this->minor_version (cdr)) != 0)
    {
      // GIOP 1.1 simple support
      ACE_UTF16_T sx = static_cast<ACE_UTF16_T> (x);
      return this->write_2 (cdr, &sx);
    }
  else
    { // wchar is not allowed with GIOP 1.0.
      errno = EINVAL;
      return 0;
    }
}

ACE_CDR::Boolean TAO_UTF16_BOM_Translator::write_wstring ( ACE_OutputCDR cdr,
ACE_CDR::ULong  len,
const ACE_CDR::WChar x 
) [virtual]

Implements ACE_WChar_Codeset_Translator.

Definition at line 325 of file UTF16_BOM_Translator.cpp.

{
  // we'll accept a null pointer but only for an empty string
  ACE_ASSERT (x != 0 || len == 0);
  if (static_cast<ACE_CDR::Short> (this->major_version (cdr)) == 1
      && static_cast<ACE_CDR::Short> (this->minor_version (cdr)) > 1)
    {
      if (len == 0) // for zero length strings, only write a length of
                    // zero. The BOM is not needed in this case.
        return this->write_4(cdr, &len);

      if (this->forceBE_ && cdr.byte_order())
        {
          ACE_CDR::ULong l = (len+1) *
                             static_cast<ACE_CDR::ULong> (
                                         ACE_UTF16_CODEPOINT_SIZE);
          if (this->write_4 (cdr, &l) &&
              this->write_2 (cdr, &ACE_UNICODE_BOM_SWAPPED) &&
              x != 0)
            return this->write_swapped_wchar_array_i (cdr, x, len);
        }
      else
        {
          ACE_CDR::ULong l = (len+1) *
                             static_cast<ACE_CDR::ULong> (
                                         ACE_UTF16_CODEPOINT_SIZE);
          if (this->write_4 (cdr, &l) &&
              this->write_2 (cdr, &ACE_UNICODE_BOM_CORRECT) &&
              x != 0)
            return this->write_wchar_array_i (cdr, x, len);
        }
    }
  else
    {
      // pre GIOP 1.2:  include null terminator in length
      ACE_CDR::ULong l = len + 1;
      
      if (this->write_4 (cdr, &l))
        {
          if (x != 0)
            {
              return this->write_wchar_array_i (cdr, x, len + 1);
            }
          else
            {
              ACE_UTF16_T s = 0;
              return this->write_2 (cdr,&s);
            }
        }
    }

  return 0;
}


Member Data Documentation

if this flag is true, force wchar's to big endian order

Definition at line 94 of file UTF16_BOM_Translator.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines