FileCharStream.cpp

Go to the documentation of this file.
00001 // $Id: FileCharStream.cpp 78542 2007-06-03 20:05:12Z olli $
00002 
00003 #include "ACEXML/common/FileCharStream.h"
00004 #include "ace/ACE.h"
00005 #include "ace/Log_Msg.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_sys_stat.h"
00008 #include "ace/Truncate.h"
00009 
00010 #if defined (ACE_USES_WCHAR)
00011 #  include "ace/OS_NS_wchar.h"
00012 #endif /* ACE_USES_WCHAR */
00013 
00014 ACEXML_FileCharStream::ACEXML_FileCharStream (void)
00015   : filename_ (0), encoding_ (0), size_ (0), infile_ (0), peek_ (0)
00016 {
00017 }
00018 
00019 ACEXML_FileCharStream::~ACEXML_FileCharStream (void)
00020 {
00021   this->close();
00022 }
00023 
00024 int
00025 ACEXML_FileCharStream::open (const ACEXML_Char *name)
00026 {
00027   delete[] this->filename_;
00028   this->filename_ = 0;
00029 
00030   delete[] this->encoding_;
00031   this->encoding_ = 0;
00032 
00033   this->infile_ = ACE_OS::fopen (name, ACE_TEXT ("r"));
00034   if (this->infile_ == 0)
00035     return -1;
00036 
00037   ACE_stat statbuf;
00038   if (ACE_OS::stat (name, &statbuf) < 0)
00039     return -1;
00040 
00041   this->size_ = ACE_Utils::truncate_cast<ACE_OFF_T> (statbuf.st_size);
00042   this->filename_ = ACE::strnew (name);
00043   return this->determine_encoding();
00044 }
00045 
00046 int
00047 ACEXML_FileCharStream::determine_encoding (void)
00048 {
00049   if (this->infile_ == 0)
00050     return -1;
00051 
00052   char input[4];
00053   int retval = 0;
00054   int i = 0;
00055   for (; i < 4 && retval != -1; ++i)
00056     retval = this->getchar_i(input[i]);
00057   if (i < 4)
00058     return -1;
00059 
00060   // Rewind the stream
00061   ACE_OS::rewind (this->infile_);
00062 
00063   const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
00064   if (!temp)
00065     return -1;
00066   else
00067     {
00068       if (this->encoding_)
00069         delete [] this->encoding_;
00070       this->encoding_ = ACE::strnew (temp);
00071 //       ACE_DEBUG ((LM_DEBUG, ACE_TEXT ("File's encoding is %s\n"),
00072 //                   this->encoding_));
00073     }
00074   // Move over the byte-order-mark if present.
00075   char ch;
00076   for (int j = 0; j < 3; ++j)
00077     {
00078       if (this->getchar_i (ch) < 0)
00079         return -1;
00080       if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF' || ch == '\xBB' ||
00081           ch == '\xBF')
00082         continue;
00083       else
00084         {
00085           ACE_OS::ungetc (ch, this->infile_);
00086           break;
00087         }
00088     }
00089   return 0;
00090 }
00091 
00092 void
00093 ACEXML_FileCharStream::rewind()
00094 {
00095   if (this->infile_ == 0)
00096     return;
00097   ACE_OS::rewind (this->infile_);
00098   this->determine_encoding();
00099 }
00100 
00101 int
00102 ACEXML_FileCharStream::available (void)
00103 {
00104   if (this->infile_ == 0)
00105     return -1;
00106 
00107   long curr;
00108   if ((curr = ACE_OS::ftell (this->infile_)) < 0)
00109     return -1;
00110   return static_cast<int> (this->size_ - curr);
00111 }
00112 
00113 int
00114 ACEXML_FileCharStream::close (void)
00115 {
00116   if (this->infile_ != 0)
00117     {
00118       ACE_OS::fclose (this->infile_);
00119       this->infile_ = 0;
00120     }
00121   delete[] this->filename_;
00122   this->filename_ = 0;
00123   delete[] this->encoding_;
00124   this->encoding_ = 0;
00125   this->size_ = 0;
00126   this->peek_ = 0;
00127   return 0;
00128 }
00129 
00130 
00131 int
00132 ACEXML_FileCharStream::getchar_i (char& ch)
00133 {
00134   ch = static_cast<char> (ACE_OS::fgetc (this->infile_));
00135   return (feof(this->infile_) ? -1 : 0);
00136 }
00137 
00138 int
00139 ACEXML_FileCharStream::read (ACEXML_Char *str,
00140                              size_t len)
00141 {
00142   if (this->infile_ == 0)
00143     return -1;
00144 
00145   return static_cast<int> (ACE_OS::fread (str, sizeof (ACEXML_Char), len, this->infile_));
00146 }
00147 
00148 int
00149 ACEXML_FileCharStream::get (ACEXML_Char& ch)
00150 {
00151   if (this->infile_ == 0)
00152     return -1;
00153 #if defined (ACE_USES_WCHAR)
00154   return this->get_i (ch);
00155 #else
00156   ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00157   return (feof(this->infile_) ? -1 : 0);
00158 #endif /* ACE_USES_WCHAR */
00159 }
00160 
00161 int
00162 ACEXML_FileCharStream::peek (void)
00163 {
00164   if (this->infile_ == 0)
00165     return -1;
00166 #if defined (ACE_USES_WCHAR)
00167   return this->peek_i();
00168 #else
00169 
00170   ACEXML_Char ch = static_cast<ACEXML_Char> (ACE_OS::fgetc (this->infile_));
00171   ACE_OS::ungetc (ch, this->infile_);
00172   return ch;
00173 #endif /* ACE_USES_WCHAR */
00174 }
00175 
00176 #if defined (ACE_USES_WCHAR)
00177 int
00178 ACEXML_FileCharStream::get_i (ACEXML_Char& ch)
00179 {
00180   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00181     {
00182       ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00183       return (feof(this->infile_) ? -1 : 0);
00184     }
00185   // If we have a value in peek_, return it.
00186   if (this->peek_ != 0)
00187     {
00188       ch = this->peek_;
00189       this->peek_ = 0;
00190       return 0;
00191     }
00192 
00193   int BE = (ACE_OS::strcmp (this->encoding_,
00194                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00195   ACEXML_Char input[2];
00196   int i = 0;
00197   for (; i < 2 && !feof (this->infile_); ++i)
00198     {
00199       input[i] = ACE_OS::fgetwc (this->infile_);
00200     }
00201   if (i < 2)
00202     {
00203       ch = 0;
00204       return -1;
00205     }
00206   ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00207   return 0;
00208 }
00209 
00210 int
00211 ACEXML_FileCharStream::peek_i (void)
00212 {
00213   // If we are reading a UTF-8 encoded file, just use the plain unget.
00214   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00215     {
00216       ACEXML_Char ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00217       ACE_OS::ungetc (ch, this->infile_);
00218       return ch;
00219     }
00220 
00221   // If somebody had already called peek() and not consumed it, return the
00222   // value held in this->peek_.
00223   if (this->peek_ != 0)
00224     return this->peek_;
00225 
00226   // Peek into the stream. This reads two characters off the stream, keeps
00227   // it in peek_.
00228   int BE = (ACE_OS::strcmp (this->encoding_,
00229                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00230   ACEXML_Char input[2];
00231   int i = 0;
00232   for (; i < 2 && !feof (this->infile_); ++i)
00233     {
00234       input[i] = ACE_OS::fgetwc (this->infile_);
00235     }
00236   if (i < 2)
00237     {
00238       this->peek_ = 0;
00239       return -1;
00240     }
00241   this->peek_ = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00242   return this->peek_;
00243 }
00244 #endif /* ACE_USES_WCHAR */
00245 
00246 const ACEXML_Char*
00247 ACEXML_FileCharStream::getEncoding (void)
00248 {
00249   return this->encoding_;
00250 }
00251 
00252 const ACEXML_Char*
00253 ACEXML_FileCharStream::getSystemId (void)
00254 {
00255   return this->filename_;
00256 }

Generated on Sun Jan 27 13:04:15 2008 for ACEXML by doxygen 1.3.6