codecvt_specializations.h

Go to the documentation of this file.
00001 // Locale support (codecvt) -*- C++ -*-
00002 
00003 // Copyright (C) 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 2, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // You should have received a copy of the GNU General Public License along
00017 // with this library; see the file COPYING.  If not, write to the Free
00018 // Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
00019 // USA.
00020 
00021 // As a special exception, you may use this file as part of a free software
00022 // library without restriction.  Specifically, if other files instantiate
00023 // templates or use macros or inline functions from this file, or you compile
00024 // this file and link it with other files to produce an executable, this
00025 // file does not by itself cause the resulting executable to be covered by
00026 // the GNU General Public License.  This exception does not however
00027 // invalidate any other reasons why the executable file might be covered by
00028 // the GNU General Public License.
00029 
00030 //
00031 // ISO C++ 14882: 22.2.1.5 Template class codecvt
00032 //
00033 
00034 // Warning: this file is not meant for user inclusion.  Use <locale>.
00035 
00036 // Written by Benjamin Kosnik <bkoz@cygnus.com>
00037 
00038   // XXX
00039   // Define this here so codecvt.cc can have _S_max_size definition.
00040 #define _GLIBCXX_USE___ENC_TRAITS 1
00041 
00042   // Extension to use icov for dealing with character encodings,
00043   // including conversions and comparisons between various character
00044   // sets.  This object encapsulates data that may need to be shared between
00045   // char_traits, codecvt and ctype.
00046   class __enc_traits
00047   {
00048   public:
00049     // Types: 
00050     // NB: A conversion descriptor subsumes and enhances the
00051     // functionality of a simple state type such as mbstate_t.
00052     typedef iconv_t __desc_type;
00053     
00054   protected:
00055     // Data Members:
00056     // Max size of charset encoding name
00057     static const int    _S_max_size = 32;
00058     // Name of internal character set encoding.
00059     char            _M_int_enc[_S_max_size];
00060     // Name of external character set encoding.
00061     char            _M_ext_enc[_S_max_size];
00062 
00063     // Conversion descriptor between external encoding to internal encoding.
00064     __desc_type     _M_in_desc;
00065     // Conversion descriptor between internal encoding to external encoding.
00066     __desc_type     _M_out_desc;
00067 
00068     // Details the byte-order marker for the external encoding, if necessary.
00069     int         _M_ext_bom;
00070 
00071     // Details the byte-order marker for the internal encoding, if necessary.
00072     int         _M_int_bom;
00073 
00074   public:
00075     explicit __enc_traits() 
00076     : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0) 
00077     {
00078       memset(_M_int_enc, 0, _S_max_size);
00079       memset(_M_ext_enc, 0, _S_max_size);
00080     }
00081 
00082     explicit __enc_traits(const char* __int, const char* __ext, 
00083               int __ibom = 0, int __ebom = 0)
00084     : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(__ebom), _M_int_bom(__ibom)
00085     {
00086       strncpy(_M_int_enc, __int, _S_max_size);
00087       strncpy(_M_ext_enc, __ext, _S_max_size);
00088       _M_init();
00089     }
00090 
00091     // 21.1.2 traits typedefs
00092     // p4
00093     // typedef STATE_T state_type
00094     // requires: state_type shall meet the requirements of
00095     // CopyConstructible types (20.1.3)
00096     // NB: This does not preseve the actual state of the conversion
00097     // descriptor member, but it does duplicate the encoding
00098     // information.
00099     __enc_traits(const __enc_traits& __obj): _M_in_desc(0), _M_out_desc(0)
00100     {
00101       strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size);
00102       strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size);
00103       _M_ext_bom = __obj._M_ext_bom;
00104       _M_int_bom = __obj._M_int_bom;
00105       _M_destroy();
00106       _M_init();
00107     }
00108 
00109     // Need assignment operator as well.
00110     __enc_traits&
00111     operator=(const __enc_traits& __obj)
00112     {
00113       strncpy(_M_int_enc, __obj._M_int_enc, _S_max_size);
00114       strncpy(_M_ext_enc, __obj._M_ext_enc, _S_max_size);
00115       _M_ext_bom = __obj._M_ext_bom;
00116       _M_int_bom = __obj._M_int_bom;
00117       _M_destroy();
00118       _M_init();
00119       return *this;
00120     }
00121 
00122     ~__enc_traits()
00123     { _M_destroy(); } 
00124 
00125     void
00126     _M_init()
00127     {
00128       const __desc_type __err = reinterpret_cast<iconv_t>(-1);
00129       if (!_M_in_desc)
00130     {
00131       _M_in_desc = iconv_open(_M_int_enc, _M_ext_enc);
00132       if (_M_in_desc == __err)
00133         __throw_runtime_error(__N("__enc_traits::_M_init "
00134                   "creating iconv input descriptor failed"));
00135     }
00136       if (!_M_out_desc)
00137     {
00138       _M_out_desc = iconv_open(_M_ext_enc, _M_int_enc);
00139       if (_M_out_desc == __err)
00140         __throw_runtime_error(__N("__enc_traits::_M_init "
00141                   "creating iconv output descriptor failed"));
00142     }
00143     }
00144 
00145     void
00146     _M_destroy()
00147     {
00148       const __desc_type __err = reinterpret_cast<iconv_t>(-1);
00149       if (_M_in_desc && _M_in_desc != __err) 
00150     {
00151       iconv_close(_M_in_desc);
00152       _M_in_desc = 0;
00153     }
00154       if (_M_out_desc && _M_out_desc != __err) 
00155     {
00156       iconv_close(_M_out_desc);
00157       _M_out_desc = 0;
00158     }
00159     }
00160 
00161     bool
00162     _M_good()
00163     { 
00164       const __desc_type __err = reinterpret_cast<iconv_t>(-1);
00165       bool __test = _M_in_desc && _M_in_desc != __err; 
00166       __test &=  _M_out_desc && _M_out_desc != __err;
00167       return __test;
00168     }
00169 
00170     const __desc_type* 
00171     _M_get_in_descriptor()
00172     { return &_M_in_desc; }
00173 
00174     const __desc_type* 
00175     _M_get_out_descriptor()
00176     { return &_M_out_desc; }
00177 
00178     int 
00179     _M_get_external_bom()
00180     { return _M_ext_bom; }
00181 
00182     int 
00183     _M_get_internal_bom()
00184     { return _M_int_bom; }
00185 
00186     const char* 
00187     _M_get_internal_enc()
00188     { return _M_int_enc; }
00189 
00190     const char* 
00191     _M_get_external_enc()
00192     { return _M_ext_enc; }    
00193   };
00194 
00195   // Partial specialization
00196   // This specialization takes advantage of iconv to provide code
00197   // conversions between a large number of character encodings.
00198   template<typename _InternT, typename _ExternT>
00199     class codecvt<_InternT, _ExternT, __enc_traits>
00200     : public __codecvt_abstract_base<_InternT, _ExternT, __enc_traits>
00201     {
00202     public:      
00203       // Types:
00204       typedef codecvt_base::result          result;
00205       typedef _InternT                  intern_type;
00206       typedef _ExternT                  extern_type;
00207       typedef __enc_traits              state_type;
00208       typedef __enc_traits::__desc_type         __desc_type;
00209       typedef __enc_traits              __enc_type;
00210 
00211       // Data Members:
00212       static locale::id         id;
00213 
00214       explicit 
00215       codecvt(size_t __refs = 0)
00216       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
00217       { }
00218 
00219       explicit 
00220       codecvt(__enc_type* __enc, size_t __refs = 0)
00221       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
00222       { }
00223 
00224     protected:
00225       virtual 
00226       ~codecvt() { }
00227 
00228       virtual result
00229       do_out(state_type& __state, const intern_type* __from, 
00230          const intern_type* __from_end, const intern_type*& __from_next,
00231          extern_type* __to, extern_type* __to_end,
00232          extern_type*& __to_next) const;
00233 
00234       virtual result
00235       do_unshift(state_type& __state, extern_type* __to, 
00236          extern_type* __to_end, extern_type*& __to_next) const;
00237 
00238       virtual result
00239       do_in(state_type& __state, const extern_type* __from, 
00240         const extern_type* __from_end, const extern_type*& __from_next,
00241         intern_type* __to, intern_type* __to_end, 
00242         intern_type*& __to_next) const;
00243 
00244       virtual int 
00245       do_encoding() const throw();
00246 
00247       virtual bool 
00248       do_always_noconv() const throw();
00249 
00250       virtual int 
00251       do_length(state_type&, const extern_type* __from, 
00252         const extern_type* __end, size_t __max) const;
00253 
00254       virtual int 
00255       do_max_length() const throw();
00256     };
00257 
00258   template<typename _InternT, typename _ExternT>
00259     locale::id 
00260     codecvt<_InternT, _ExternT, __enc_traits>::id;
00261 
00262   // This adaptor works around the signature problems of the second
00263   // argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
00264   // uses 'char**', which matches the POSIX 1003.1-2001 standard.
00265   // Using this adaptor, g++ will do the work for us.
00266   template<typename _T>
00267     inline size_t
00268     __iconv_adaptor(size_t(*__func)(iconv_t, _T, size_t*, char**, size_t*),
00269                     iconv_t __cd, char** __inbuf, size_t* __inbytes,
00270                     char** __outbuf, size_t* __outbytes)
00271     { return __func(__cd, (_T)__inbuf, __inbytes, __outbuf, __outbytes); }
00272 
00273   template<typename _InternT, typename _ExternT>
00274     codecvt_base::result
00275     codecvt<_InternT, _ExternT, __enc_traits>::
00276     do_out(state_type& __state, const intern_type* __from, 
00277        const intern_type* __from_end, const intern_type*& __from_next,
00278        extern_type* __to, extern_type* __to_end,
00279        extern_type*& __to_next) const
00280     {
00281       result __ret = codecvt_base::error;
00282       if (__state._M_good())
00283     {
00284       const __desc_type* __desc = __state._M_get_out_descriptor();
00285       const size_t __fmultiple = sizeof(intern_type);
00286       size_t __fbytes = __fmultiple * (__from_end - __from);
00287       const size_t __tmultiple = sizeof(extern_type);
00288       size_t __tbytes = __tmultiple * (__to_end - __to); 
00289       
00290       // Argument list for iconv specifies a byte sequence. Thus,
00291       // all to/from arrays must be brutally casted to char*.
00292       char* __cto = reinterpret_cast<char*>(__to);
00293       char* __cfrom;
00294       size_t __conv;
00295 
00296       // Some encodings need a byte order marker as the first item
00297       // in the byte stream, to designate endian-ness. The default
00298       // value for the byte order marker is NULL, so if this is
00299       // the case, it's not necessary and we can just go on our
00300       // merry way.
00301       int __int_bom = __state._M_get_internal_bom();
00302       if (__int_bom)
00303         {     
00304           size_t __size = __from_end - __from;
00305           intern_type* __cfixed = static_cast<intern_type*>
00306         (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
00307           __cfixed[0] = static_cast<intern_type>(__int_bom);
00308           char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
00309           __cfrom = reinterpret_cast<char*>(__cfixed);
00310           __conv = __iconv_adaptor(iconv, *__desc, &__cfrom,
00311                                         &__fbytes, &__cto, &__tbytes); 
00312         }
00313       else
00314         {
00315           intern_type* __cfixed = const_cast<intern_type*>(__from);
00316           __cfrom = reinterpret_cast<char*>(__cfixed);
00317           __conv = __iconv_adaptor(iconv, *__desc, &__cfrom, &__fbytes, 
00318                        &__cto, &__tbytes); 
00319         }
00320 
00321       if (__conv != size_t(-1))
00322         {
00323           __from_next = reinterpret_cast<const intern_type*>(__cfrom);
00324           __to_next = reinterpret_cast<extern_type*>(__cto);
00325           __ret = codecvt_base::ok;
00326         }
00327       else 
00328         {
00329           if (__fbytes < __fmultiple * (__from_end - __from))
00330         {
00331           __from_next = reinterpret_cast<const intern_type*>(__cfrom);
00332           __to_next = reinterpret_cast<extern_type*>(__cto);
00333           __ret = codecvt_base::partial;
00334         }
00335           else
00336         __ret = codecvt_base::error;
00337         }
00338     }
00339       return __ret; 
00340     }
00341 
00342   template<typename _InternT, typename _ExternT>
00343     codecvt_base::result
00344     codecvt<_InternT, _ExternT, __enc_traits>::
00345     do_unshift(state_type& __state, extern_type* __to, 
00346            extern_type* __to_end, extern_type*& __to_next) const
00347     {
00348       result __ret = codecvt_base::error;
00349       if (__state._M_good())
00350     {
00351       const __desc_type* __desc = __state._M_get_in_descriptor();
00352       const size_t __tmultiple = sizeof(intern_type);
00353       size_t __tlen = __tmultiple * (__to_end - __to); 
00354       
00355       // Argument list for iconv specifies a byte sequence. Thus,
00356       // all to/from arrays must be brutally casted to char*.
00357       char* __cto = reinterpret_cast<char*>(__to);
00358       size_t __conv = __iconv_adaptor(iconv,*__desc, NULL, NULL,
00359                                           &__cto, &__tlen); 
00360       
00361       if (__conv != size_t(-1))
00362         {
00363           __to_next = reinterpret_cast<extern_type*>(__cto);
00364           if (__tlen == __tmultiple * (__to_end - __to))
00365         __ret = codecvt_base::noconv;
00366           else if (__tlen == 0)
00367         __ret = codecvt_base::ok;
00368           else
00369         __ret = codecvt_base::partial;
00370         }
00371       else 
00372         __ret = codecvt_base::error;
00373     }
00374       return __ret; 
00375     }
00376    
00377   template<typename _InternT, typename _ExternT>
00378     codecvt_base::result
00379     codecvt<_InternT, _ExternT, __enc_traits>::
00380     do_in(state_type& __state, const extern_type* __from, 
00381       const extern_type* __from_end, const extern_type*& __from_next,
00382       intern_type* __to, intern_type* __to_end, 
00383       intern_type*& __to_next) const
00384     { 
00385       result __ret = codecvt_base::error;
00386       if (__state._M_good())
00387     {
00388       const __desc_type* __desc = __state._M_get_in_descriptor();
00389       const size_t __fmultiple = sizeof(extern_type);
00390       size_t __flen = __fmultiple * (__from_end - __from);
00391       const size_t __tmultiple = sizeof(intern_type);
00392       size_t __tlen = __tmultiple * (__to_end - __to); 
00393       
00394       // Argument list for iconv specifies a byte sequence. Thus,
00395       // all to/from arrays must be brutally casted to char*.
00396       char* __cto = reinterpret_cast<char*>(__to);
00397       char* __cfrom;
00398       size_t __conv;
00399 
00400       // Some encodings need a byte order marker as the first item
00401       // in the byte stream, to designate endian-ness. The default
00402       // value for the byte order marker is NULL, so if this is
00403       // the case, it's not necessary and we can just go on our
00404       // merry way.
00405       int __ext_bom = __state._M_get_external_bom();
00406       if (__ext_bom)
00407         {     
00408           size_t __size = __from_end - __from;
00409           extern_type* __cfixed =  static_cast<extern_type*>
00410         (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
00411           __cfixed[0] = static_cast<extern_type>(__ext_bom);
00412           char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
00413           __cfrom = reinterpret_cast<char*>(__cfixed);
00414           __conv = __iconv_adaptor(iconv, *__desc, &__cfrom,
00415                                        &__flen, &__cto, &__tlen); 
00416         }
00417       else
00418         {
00419           extern_type* __cfixed = const_cast<extern_type*>(__from);
00420           __cfrom = reinterpret_cast<char*>(__cfixed);
00421           __conv = __iconv_adaptor(iconv, *__desc, &__cfrom,
00422                                        &__flen, &__cto, &__tlen); 
00423         }
00424 
00425       
00426       if (__conv != size_t(-1))
00427         {
00428           __from_next = reinterpret_cast<const extern_type*>(__cfrom);
00429           __to_next = reinterpret_cast<intern_type*>(__cto);
00430           __ret = codecvt_base::ok;
00431         }
00432       else 
00433         {
00434           if (__flen < static_cast<size_t>(__from_end - __from))
00435         {
00436           __from_next = reinterpret_cast<const extern_type*>(__cfrom);
00437           __to_next = reinterpret_cast<intern_type*>(__cto);
00438           __ret = codecvt_base::partial;
00439         }
00440           else
00441         __ret = codecvt_base::error;
00442         }
00443     }
00444       return __ret; 
00445     }
00446   
00447   template<typename _InternT, typename _ExternT>
00448     int 
00449     codecvt<_InternT, _ExternT, __enc_traits>::
00450     do_encoding() const throw()
00451     {
00452       int __ret = 0;
00453       if (sizeof(_ExternT) <= sizeof(_InternT))
00454     __ret = sizeof(_InternT)/sizeof(_ExternT);
00455       return __ret; 
00456     }
00457   
00458   template<typename _InternT, typename _ExternT>
00459     bool 
00460     codecvt<_InternT, _ExternT, __enc_traits>::
00461     do_always_noconv() const throw()
00462     { return false; }
00463   
00464   template<typename _InternT, typename _ExternT>
00465     int 
00466     codecvt<_InternT, _ExternT, __enc_traits>::
00467     do_length(state_type&, const extern_type* __from, 
00468           const extern_type* __end, size_t __max) const
00469     { return std::min(__max, static_cast<size_t>(__end - __from)); }
00470 
00471   // _GLIBCXX_RESOLVE_LIB_DEFECTS
00472   // 74.  Garbled text for codecvt::do_max_length
00473   template<typename _InternT, typename _ExternT>
00474     int 
00475     codecvt<_InternT, _ExternT, __enc_traits>::
00476     do_max_length() const throw()
00477     { return 1; }
00478 

Generated on Tue Jan 30 17:31:48 2007 for GNU C++ STL by doxygen 1.3.6