FileCharStream.cpp

Go to the documentation of this file.
00001 // FileCharStream.cpp,v 1.25 2006/03/14 21:20:40 sjiang Exp
00002 
00003 #include "ACEXML/common/FileCharStream.h"
00004 #include "ace/ACE.h"
00005 #include "ace/Log_Msg.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_sys_stat.h"
00008 
00009 #if defined (ACE_USES_WCHAR)
00010 #  include "ace/OS_NS_wchar.h"
00011 #endif /* ACE_USES_WCHAR */
00012 
00013 ACEXML_FileCharStream::ACEXML_FileCharStream (void)
00014   : filename_ (0), encoding_ (0), size_ (0), infile_ (0), peek_ (0)
00015 {
00016 }
00017 
00018 ACEXML_FileCharStream::~ACEXML_FileCharStream (void)
00019 {
00020   this->close();
00021 }
00022 
00023 int
00024 ACEXML_FileCharStream::open (const ACEXML_Char *name)
00025 {
00026   delete[] this->filename_;
00027   this->filename_ = 0;
00028 
00029   delete[] this->encoding_;
00030   this->encoding_ = 0;
00031 
00032   this->infile_ = ACE_OS::fopen (name, ACE_TEXT ("r"));
00033   if (this->infile_ == 0)
00034     return -1;
00035 
00036   ACE_stat statbuf;
00037   if (ACE_OS::stat (name, &statbuf) < 0)
00038     return -1;
00039 
00040   this->size_ = statbuf.st_size;
00041   this->filename_ = ACE::strnew (name);
00042   return this->determine_encoding();
00043 }
00044 
00045 int
00046 ACEXML_FileCharStream::determine_encoding (void)
00047 {
00048   if (this->infile_ == 0)
00049     return -1;
00050 
00051   char input[4];
00052   int retval = 0;
00053   int i = 0;
00054   for (; i < 4 && retval != -1; ++i)
00055     retval = this->getchar_i(input[i]);
00056   if (i < 4)
00057     return -1;
00058 
00059   // Rewind the stream
00060   ACE_OS::rewind (this->infile_);
00061 
00062   const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
00063   if (!temp)
00064     return -1;
00065   else
00066     {
00067       if (this->encoding_)
00068         delete [] this->encoding_;
00069       this->encoding_ = ACE::strnew (temp);
00070 //       ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("File's encoding is %s\n"),
00071 //                   this->encoding_));
00072     }
00073   // Move over the byte-order-mark if present.
00074   char ch;
00075   for (int j = 0; j < 3; ++j)
00076     {
00077       if (this->getchar_i (ch) < 0)
00078         return -1;
00079       if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF' || ch == '\xBB' ||
00080           ch == '\xBF')
00081         continue;
00082       else
00083         {
00084           ungetc (ch, this->infile_);
00085           break;
00086         }
00087     }
00088   return 0;
00089 }
00090 
00091 void
00092 ACEXML_FileCharStream::rewind()
00093 {
00094   if (this->infile_ == 0)
00095     return;
00096   ACE_OS::rewind (this->infile_);
00097   this->determine_encoding();
00098 }
00099 
00100 int
00101 ACEXML_FileCharStream::available (void)
00102 {
00103   if (this->infile_ == 0)
00104     return -1;
00105 
00106   long curr;
00107   if ((curr = ACE_OS::ftell (this->infile_)) < 0)
00108     return -1;
00109   return (this->size_ - curr);
00110 }
00111 
00112 int
00113 ACEXML_FileCharStream::close (void)
00114 {
00115   if (this->infile_ != 0)
00116     {
00117       ACE_OS::fclose (this->infile_);
00118       this->infile_ = 0;
00119     }
00120   delete[] this->filename_;
00121   this->filename_ = 0;
00122   delete[] this->encoding_;
00123   this->encoding_ = 0;
00124   this->size_ = 0;
00125   this->peek_ = 0;
00126   return 0;
00127 }
00128 
00129 
00130 int
00131 ACEXML_FileCharStream::getchar_i (char& ch)
00132 {
00133   ch = static_cast<char> (ACE_OS::fgetc (this->infile_));
00134   return (feof(this->infile_) ? -1 : 0);
00135 }
00136 
00137 int
00138 ACEXML_FileCharStream::read (ACEXML_Char *str,
00139                              size_t len)
00140 {
00141   if (this->infile_ == 0)
00142     return -1;
00143 
00144   return static_cast<int> (ACE_OS::fread (str, sizeof (ACEXML_Char), len, this->infile_));
00145 }
00146 
00147 int
00148 ACEXML_FileCharStream::get (ACEXML_Char& ch)
00149 {
00150   if (this->infile_ == 0)
00151     return -1;
00152 #if defined (ACE_USES_WCHAR)
00153   return this->get_i (ch);
00154 #else
00155   ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00156   return (feof(this->infile_) ? -1 : 0);
00157 #endif /* ACE_USES_WCHAR */
00158 }
00159 
00160 int
00161 ACEXML_FileCharStream::peek (void)
00162 {
00163   if (this->infile_ == 0)
00164     return -1;
00165 #if defined (ACE_USES_WCHAR)
00166   return this->peek_i();
00167 #else
00168 
00169   ACEXML_Char ch = static_cast<ACEXML_Char> (ACE_OS::fgetc (this->infile_));
00170   ::ungetc (ch, this->infile_);
00171   return ch;
00172 #endif /* ACE_USES_WCHAR */
00173 }
00174 
00175 #if defined (ACE_USES_WCHAR)
00176 int
00177 ACEXML_FileCharStream::get_i (ACEXML_Char& ch)
00178 {
00179   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00180     {
00181       ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00182       return (feof(this->infile_) ? -1 : 0);
00183     }
00184   // If we have a value in peek_, return it.
00185   if (this->peek_ != 0)
00186     {
00187       ch = this->peek_;
00188       this->peek_ = 0;
00189       return 0;
00190     }
00191 
00192   int BE = (ACE_OS::strcmp (this->encoding_,
00193                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00194   ACEXML_Char input[2];
00195   int i = 0;
00196   for (; i < 2 && !feof (this->infile_); ++i)
00197     {
00198       input[i] = ACE_OS::fgetwc (this->infile_);
00199     }
00200   if (i < 2)
00201     {
00202       ch = 0;
00203       return -1;
00204     }
00205   ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00206   return 0;
00207 }
00208 
00209 int
00210 ACEXML_FileCharStream::peek_i (void)
00211 {
00212   // If we are reading a UTF-8 encoded file, just use the plain unget.
00213   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00214     {
00215       ACEXML_Char ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00216       ::ungetc (ch, this->infile_);
00217       return ch;
00218     }
00219 
00220   // If somebody had already called peek() and not consumed it, return the
00221   // value held in this->peek_.
00222   if (this->peek_ != 0)
00223     return this->peek_;
00224 
00225   // Peek into the stream. This reads two characters off the stream, keeps
00226   // it in peek_.
00227   int BE = (ACE_OS::strcmp (this->encoding_,
00228                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00229   ACEXML_Char input[2];
00230   int i = 0;
00231   for (; i < 2 && !feof (this->infile_); ++i)
00232     {
00233       input[i] = ACE_OS::fgetwc (this->infile_);
00234     }
00235   if (i < 2)
00236     {
00237       this->peek_ = 0;
00238       return -1;
00239     }
00240   this->peek_ = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00241   return this->peek_;
00242 }
00243 #endif /* ACE_USES_WCHAR */
00244 
00245 const ACEXML_Char*
00246 ACEXML_FileCharStream::getEncoding (void)
00247 {
00248   return this->encoding_;
00249 }
00250 
00251 const ACEXML_Char*
00252 ACEXML_FileCharStream::getSystemId (void)
00253 {
00254   return this->filename_;
00255 }

Generated on Thu Nov 9 11:45:36 2006 for ACEXML by doxygen 1.3.6