codecvt_specializations.h

Go to the documentation of this file.
00001 // Locale support (codecvt) -*- C++ -*-
00002 
00003 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
00004 //  Free Software Foundation, Inc.
00005 //
00006 // This file is part of the GNU ISO C++ Library.  This library is free
00007 // software; you can redistribute it and/or modify it under the
00008 // terms of the GNU General Public License as published by the
00009 // Free Software Foundation; either version 2, or (at your option)
00010 // any later version.
00011 
00012 // This library is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public License for more details.
00016 
00017 // You should have received a copy of the GNU General Public License along
00018 // with this library; see the file COPYING.  If not, write to the Free
00019 // Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
00020 // USA.
00021 
00022 // As a special exception, you may use this file as part of a free software
00023 // library without restriction.  Specifically, if other files instantiate
00024 // templates or use macros or inline functions from this file, or you compile
00025 // this file and link it with other files to produce an executable, this
00026 // file does not by itself cause the resulting executable to be covered by
00027 // the GNU General Public License.  This exception does not however
00028 // invalidate any other reasons why the executable file might be covered by
00029 // the GNU General Public License.
00030 
00031 //
00032 // ISO C++ 14882: 22.2.1.5 Template class codecvt
00033 //
00034 
00035 // Written by Benjamin Kosnik <bkoz@redhat.com>
00036 
00041   // XXX
00042   // Define this here so codecvt.cc can have _S_max_size definition.
00043 #define _GLIBCXX_USE_ENCODING_STATE 1
00044 
00045 namespace __gnu_cxx
00046 {
00048   // This includes conversions and comparisons between various character
00049   // sets.  This object encapsulates data that may need to be shared between
00050   // char_traits, codecvt and ctype.
00051   class encoding_state
00052   {
00053   public:
00054     // Types: 
00055     // NB: A conversion descriptor subsumes and enhances the
00056     // functionality of a simple state type such as mbstate_t.
00057     typedef iconv_t descriptor_type;
00058     
00059   protected:
00060     // Name of internal character set encoding.
00061     std::string         _M_int_enc;
00062 
00063     // Name of external character set encoding.
00064     std::string     _M_ext_enc;
00065 
00066     // Conversion descriptor between external encoding to internal encoding.
00067     descriptor_type _M_in_desc;
00068 
00069     // Conversion descriptor between internal encoding to external encoding.
00070     descriptor_type _M_out_desc;
00071 
00072     // The byte-order marker for the external encoding, if necessary.
00073     int         _M_ext_bom;
00074 
00075     // The byte-order marker for the internal encoding, if necessary.
00076     int         _M_int_bom;
00077 
00078     // Number of external bytes needed to construct one complete
00079     // character in the internal encoding.
00080     // NB: -1 indicates variable, or stateful, encodings.
00081     int         _M_bytes;
00082 
00083   public:
00084     explicit 
00085     encoding_state() 
00086     : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
00087     { }
00088 
00089     explicit 
00090     encoding_state(const char* __int, const char* __ext, 
00091            int __ibom = 0, int __ebom = 0, int __bytes = 1)
00092     : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 
00093       _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
00094     { init(); }
00095 
00096     // 21.1.2 traits typedefs
00097     // p4
00098     // typedef STATE_T state_type
00099     // requires: state_type shall meet the requirements of
00100     // CopyConstructible types (20.1.3)
00101     // NB: This does not preseve the actual state of the conversion
00102     // descriptor member, but it does duplicate the encoding
00103     // information.
00104     encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
00105     { construct(__obj); }
00106 
00107     // Need assignment operator as well.
00108     encoding_state&
00109     operator=(const encoding_state& __obj)
00110     {
00111       construct(__obj);
00112       return *this;
00113     }
00114 
00115     ~encoding_state()
00116     { destroy(); } 
00117 
00118     bool
00119     good() const throw()
00120     { 
00121       const descriptor_type __err = reinterpret_cast<iconv_t>(-1);
00122       bool __test = _M_in_desc && _M_in_desc != __err; 
00123       __test &=  _M_out_desc && _M_out_desc != __err;
00124       return __test;
00125     }
00126     
00127     int
00128     character_ratio() const
00129     { return _M_bytes; }
00130 
00131     const std::string
00132     internal_encoding() const
00133     { return _M_int_enc; }
00134 
00135     int 
00136     internal_bom() const
00137     { return _M_int_bom; }
00138 
00139     const std::string
00140     external_encoding() const
00141     { return _M_ext_enc; }
00142 
00143     int 
00144     external_bom() const
00145     { return _M_ext_bom; }
00146 
00147     const descriptor_type&
00148     in_descriptor() const
00149     { return _M_in_desc; }
00150 
00151     const descriptor_type&
00152     out_descriptor() const
00153     { return _M_out_desc; }
00154 
00155   protected:
00156     void
00157     init()
00158     {
00159       const descriptor_type __err = reinterpret_cast<iconv_t>(-1);
00160       const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
00161       if (!_M_in_desc && __have_encodings)
00162     {
00163       _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
00164       if (_M_in_desc == __err)
00165         std::__throw_runtime_error(__N("encoding_state::_M_init "
00166                     "creating iconv input descriptor failed"));
00167     }
00168       if (!_M_out_desc && __have_encodings)
00169     {
00170       _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
00171       if (_M_out_desc == __err)
00172         std::__throw_runtime_error(__N("encoding_state::_M_init "
00173                   "creating iconv output descriptor failed"));
00174     }
00175     }
00176 
00177     void
00178     construct(const encoding_state& __obj)
00179     {
00180       destroy();
00181       _M_int_enc = __obj._M_int_enc;
00182       _M_ext_enc = __obj._M_ext_enc;
00183       _M_ext_bom = __obj._M_ext_bom;
00184       _M_int_bom = __obj._M_int_bom;
00185       _M_bytes = __obj._M_bytes;
00186       init();
00187     }
00188 
00189     void
00190     destroy() throw()
00191     {
00192       const descriptor_type __err = reinterpret_cast<iconv_t>(-1);
00193       if (_M_in_desc && _M_in_desc != __err) 
00194     {
00195       iconv_close(_M_in_desc);
00196       _M_in_desc = 0;
00197     }
00198       if (_M_out_desc && _M_out_desc != __err) 
00199     {
00200       iconv_close(_M_out_desc);
00201       _M_out_desc = 0;
00202     }
00203     }
00204   };
00205 
00207   // Custom traits type with encoding_state for the state type, and the
00208   // associated fpos<encoding_state> for the position type, all other
00209   // bits equivalent to the required char_traits instantiations.
00210   template<typename _CharT>
00211     struct encoding_char_traits : public std::char_traits<_CharT>
00212     {
00213       typedef encoding_state                state_type;
00214       typedef typename std::fpos<state_type>        pos_type;
00215     };
00216 } // namespace __gnu_cxx
00217 
00218 namespace std
00219 {
00220   using __gnu_cxx::encoding_state;
00221 
00223   // This partial specialization takes advantage of iconv to provide
00224   // code conversions between a large number of character encodings.
00225   template<typename _InternT, typename _ExternT>
00226     class codecvt<_InternT, _ExternT, encoding_state>
00227     : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
00228     {
00229     public:      
00230       // Types:
00231       typedef codecvt_base::result          result;
00232       typedef _InternT                  intern_type;
00233       typedef _ExternT                  extern_type;
00234       typedef __gnu_cxx::encoding_state         state_type;
00235       typedef state_type::descriptor_type       descriptor_type;
00236 
00237       // Data Members:
00238       static locale::id         id;
00239 
00240       explicit 
00241       codecvt(size_t __refs = 0)
00242       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
00243       { }
00244 
00245       explicit 
00246       codecvt(state_type& __enc, size_t __refs = 0)
00247       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
00248       { }
00249 
00250      protected:
00251       virtual 
00252       ~codecvt() { }
00253 
00254       virtual result
00255       do_out(state_type& __state, const intern_type* __from, 
00256          const intern_type* __from_end, const intern_type*& __from_next,
00257          extern_type* __to, extern_type* __to_end,
00258          extern_type*& __to_next) const;
00259 
00260       virtual result
00261       do_unshift(state_type& __state, extern_type* __to, 
00262          extern_type* __to_end, extern_type*& __to_next) const;
00263 
00264       virtual result
00265       do_in(state_type& __state, const extern_type* __from, 
00266         const extern_type* __from_end, const extern_type*& __from_next,
00267         intern_type* __to, intern_type* __to_end, 
00268         intern_type*& __to_next) const;
00269 
00270       virtual int 
00271       do_encoding() const throw();
00272 
00273       virtual bool 
00274       do_always_noconv() const throw();
00275 
00276       virtual int 
00277       do_length(state_type&, const extern_type* __from, 
00278         const extern_type* __end, size_t __max) const;
00279 
00280       virtual int 
00281       do_max_length() const throw();
00282     };
00283 
00284   template<typename _InternT, typename _ExternT>
00285     locale::id 
00286     codecvt<_InternT, _ExternT, encoding_state>::id;
00287 
00288   // This adaptor works around the signature problems of the second
00289   // argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
00290   // uses 'char**', which matches the POSIX 1003.1-2001 standard.
00291   // Using this adaptor, g++ will do the work for us.
00292   template<typename _T>
00293     inline size_t
00294     __iconv_adaptor(size_t(*__func)(iconv_t, _T, size_t*, char**, size_t*),
00295                     iconv_t __cd, char** __inbuf, size_t* __inbytes,
00296                     char** __outbuf, size_t* __outbytes)
00297     { return __func(__cd, (_T)__inbuf, __inbytes, __outbuf, __outbytes); }
00298 
00299   template<typename _InternT, typename _ExternT>
00300     codecvt_base::result
00301     codecvt<_InternT, _ExternT, encoding_state>::
00302     do_out(state_type& __state, const intern_type* __from, 
00303        const intern_type* __from_end, const intern_type*& __from_next,
00304        extern_type* __to, extern_type* __to_end,
00305        extern_type*& __to_next) const
00306     {
00307       result __ret = codecvt_base::error;
00308       if (__state.good())
00309     {
00310       const descriptor_type& __desc = __state.out_descriptor();
00311       const size_t __fmultiple = sizeof(intern_type);
00312       size_t __fbytes = __fmultiple * (__from_end - __from);
00313       const size_t __tmultiple = sizeof(extern_type);
00314       size_t __tbytes = __tmultiple * (__to_end - __to); 
00315       
00316       // Argument list for iconv specifies a byte sequence. Thus,
00317       // all to/from arrays must be brutally casted to char*.
00318       char* __cto = reinterpret_cast<char*>(__to);
00319       char* __cfrom;
00320       size_t __conv;
00321 
00322       // Some encodings need a byte order marker as the first item
00323       // in the byte stream, to designate endian-ness. The default
00324       // value for the byte order marker is NULL, so if this is
00325       // the case, it's not necessary and we can just go on our
00326       // merry way.
00327       int __int_bom = __state.internal_bom();
00328       if (__int_bom)
00329         {     
00330           size_t __size = __from_end - __from;
00331           intern_type* __cfixed = static_cast<intern_type*>
00332         (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
00333           __cfixed[0] = static_cast<intern_type>(__int_bom);
00334           char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
00335           __cfrom = reinterpret_cast<char*>(__cfixed);
00336           __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
00337                                         &__fbytes, &__cto, &__tbytes); 
00338         }
00339       else
00340         {
00341           intern_type* __cfixed = const_cast<intern_type*>(__from);
00342           __cfrom = reinterpret_cast<char*>(__cfixed);
00343           __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 
00344                        &__cto, &__tbytes); 
00345         }
00346 
00347       if (__conv != size_t(-1))
00348         {
00349           __from_next = reinterpret_cast<const intern_type*>(__cfrom);
00350           __to_next = reinterpret_cast<extern_type*>(__cto);
00351           __ret = codecvt_base::ok;
00352         }
00353       else 
00354         {
00355           if (__fbytes < __fmultiple * (__from_end - __from))
00356         {
00357           __from_next = reinterpret_cast<const intern_type*>(__cfrom);
00358           __to_next = reinterpret_cast<extern_type*>(__cto);
00359           __ret = codecvt_base::partial;
00360         }
00361           else
00362         __ret = codecvt_base::error;
00363         }
00364     }
00365       return __ret; 
00366     }
00367 
00368   template<typename _InternT, typename _ExternT>
00369     codecvt_base::result
00370     codecvt<_InternT, _ExternT, encoding_state>::
00371     do_unshift(state_type& __state, extern_type* __to, 
00372            extern_type* __to_end, extern_type*& __to_next) const
00373     {
00374       result __ret = codecvt_base::error;
00375       if (__state.good())
00376     {
00377       const descriptor_type& __desc = __state.in_descriptor();
00378       const size_t __tmultiple = sizeof(intern_type);
00379       size_t __tlen = __tmultiple * (__to_end - __to); 
00380       
00381       // Argument list for iconv specifies a byte sequence. Thus,
00382       // all to/from arrays must be brutally casted to char*.
00383       char* __cto = reinterpret_cast<char*>(__to);
00384       size_t __conv = __iconv_adaptor(iconv,__desc, NULL, NULL,
00385                                           &__cto, &__tlen); 
00386       
00387       if (__conv != size_t(-1))
00388         {
00389           __to_next = reinterpret_cast<extern_type*>(__cto);
00390           if (__tlen == __tmultiple * (__to_end - __to))
00391         __ret = codecvt_base::noconv;
00392           else if (__tlen == 0)
00393         __ret = codecvt_base::ok;
00394           else
00395         __ret = codecvt_base::partial;
00396         }
00397       else 
00398         __ret = codecvt_base::error;
00399     }
00400       return __ret; 
00401     }
00402    
00403   template<typename _InternT, typename _ExternT>
00404     codecvt_base::result
00405     codecvt<_InternT, _ExternT, encoding_state>::
00406     do_in(state_type& __state, const extern_type* __from, 
00407       const extern_type* __from_end, const extern_type*& __from_next,
00408       intern_type* __to, intern_type* __to_end, 
00409       intern_type*& __to_next) const
00410     { 
00411       result __ret = codecvt_base::error;
00412       if (__state.good())
00413     {
00414       const descriptor_type& __desc = __state.in_descriptor();
00415       const size_t __fmultiple = sizeof(extern_type);
00416       size_t __flen = __fmultiple * (__from_end - __from);
00417       const size_t __tmultiple = sizeof(intern_type);
00418       size_t __tlen = __tmultiple * (__to_end - __to); 
00419       
00420       // Argument list for iconv specifies a byte sequence. Thus,
00421       // all to/from arrays must be brutally casted to char*.
00422       char* __cto = reinterpret_cast<char*>(__to);
00423       char* __cfrom;
00424       size_t __conv;
00425 
00426       // Some encodings need a byte order marker as the first item
00427       // in the byte stream, to designate endian-ness. The default
00428       // value for the byte order marker is NULL, so if this is
00429       // the case, it's not necessary and we can just go on our
00430       // merry way.
00431       int __ext_bom = __state.external_bom();
00432       if (__ext_bom)
00433         {     
00434           size_t __size = __from_end - __from;
00435           extern_type* __cfixed =  static_cast<extern_type*>
00436         (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
00437           __cfixed[0] = static_cast<extern_type>(__ext_bom);
00438           char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
00439           __cfrom = reinterpret_cast<char*>(__cfixed);
00440           __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
00441                                        &__flen, &__cto, &__tlen); 
00442         }
00443       else
00444         {
00445           extern_type* __cfixed = const_cast<extern_type*>(__from);
00446           __cfrom = reinterpret_cast<char*>(__cfixed);
00447           __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
00448                                        &__flen, &__cto, &__tlen); 
00449         }
00450 
00451       
00452       if (__conv != size_t(-1))
00453         {
00454           __from_next = reinterpret_cast<const extern_type*>(__cfrom);
00455           __to_next = reinterpret_cast<intern_type*>(__cto);
00456           __ret = codecvt_base::ok;
00457         }
00458       else 
00459         {
00460           if (__flen < static_cast<size_t>(__from_end - __from))
00461         {
00462           __from_next = reinterpret_cast<const extern_type*>(__cfrom);
00463           __to_next = reinterpret_cast<intern_type*>(__cto);
00464           __ret = codecvt_base::partial;
00465         }
00466           else
00467         __ret = codecvt_base::error;
00468         }
00469     }
00470       return __ret; 
00471     }
00472   
00473   template<typename _InternT, typename _ExternT>
00474     int 
00475     codecvt<_InternT, _ExternT, encoding_state>::
00476     do_encoding() const throw()
00477     {
00478       int __ret = 0;
00479       if (sizeof(_ExternT) <= sizeof(_InternT))
00480     __ret = sizeof(_InternT) / sizeof(_ExternT);
00481       return __ret; 
00482     }
00483   
00484   template<typename _InternT, typename _ExternT>
00485     bool 
00486     codecvt<_InternT, _ExternT, encoding_state>::
00487     do_always_noconv() const throw()
00488     { return false; }
00489   
00490   template<typename _InternT, typename _ExternT>
00491     int 
00492     codecvt<_InternT, _ExternT, encoding_state>::
00493     do_length(state_type&, const extern_type* __from, 
00494           const extern_type* __end, size_t __max) const
00495     { return std::min(__max, static_cast<size_t>(__end - __from)); }
00496 
00497   // _GLIBCXX_RESOLVE_LIB_DEFECTS
00498   // 74.  Garbled text for codecvt::do_max_length
00499   template<typename _InternT, typename _ExternT>
00500     int 
00501     codecvt<_InternT, _ExternT, encoding_state>::
00502     do_max_length() const throw()
00503     { return 1; }
00504 } // namespace std
00505 

Generated on Tue Feb 2 16:55:50 2010 for GNU C++ STL by  doxygen 1.4.7