00001
00002
00003 #include "ACEXML/common/FileCharStream.h"
00004 #include "ace/ACE.h"
00005 #include "ace/Log_Msg.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_sys_stat.h"
00008 #include "ace/Truncate.h"
00009
00010 #if defined (ACE_USES_WCHAR)
00011 # include "ace/OS_NS_wchar.h"
00012 #endif
00013
00014 ACEXML_FileCharStream::ACEXML_FileCharStream (void)
00015 : filename_ (0), encoding_ (0), size_ (0), infile_ (0), peek_ (0)
00016 {
00017 }
00018
00019 ACEXML_FileCharStream::~ACEXML_FileCharStream (void)
00020 {
00021 this->close();
00022 }
00023
00024 int
00025 ACEXML_FileCharStream::open (const ACEXML_Char *name)
00026 {
00027 delete[] this->filename_;
00028 this->filename_ = 0;
00029
00030 delete[] this->encoding_;
00031 this->encoding_ = 0;
00032
00033 this->infile_ = ACE_OS::fopen (name, ACE_TEXT ("r"));
00034 if (this->infile_ == 0)
00035 return -1;
00036
00037 ACE_stat statbuf;
00038 if (ACE_OS::stat (name, &statbuf) < 0)
00039 return -1;
00040
00041 this->size_ = ACE_Utils::truncate_cast<ACE_OFF_T> (statbuf.st_size);
00042 this->filename_ = ACE::strnew (name);
00043 return this->determine_encoding();
00044 }
00045
00046 int
00047 ACEXML_FileCharStream::determine_encoding (void)
00048 {
00049 if (this->infile_ == 0)
00050 return -1;
00051
00052 char input[4];
00053 int retval = 0;
00054 int i = 0;
00055 for (; i < 4 && retval != -1; ++i)
00056 retval = this->getchar_i(input[i]);
00057 if (i < 4)
00058 return -1;
00059
00060
00061 ACE_OS::rewind (this->infile_);
00062
00063 const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
00064 if (!temp)
00065 return -1;
00066 else
00067 {
00068 if (this->encoding_)
00069 delete [] this->encoding_;
00070 this->encoding_ = ACE::strnew (temp);
00071
00072
00073 }
00074
00075 char ch;
00076 for (int j = 0; j < 3; ++j)
00077 {
00078 if (this->getchar_i (ch) < 0)
00079 return -1;
00080 if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF' || ch == '\xBB' ||
00081 ch == '\xBF')
00082 continue;
00083 else
00084 {
00085 ACE_OS::ungetc (ch, this->infile_);
00086 break;
00087 }
00088 }
00089 return 0;
00090 }
00091
00092 void
00093 ACEXML_FileCharStream::rewind()
00094 {
00095 if (this->infile_ == 0)
00096 return;
00097 ACE_OS::rewind (this->infile_);
00098 this->determine_encoding();
00099 }
00100
00101 int
00102 ACEXML_FileCharStream::available (void)
00103 {
00104 if (this->infile_ == 0)
00105 return -1;
00106
00107 long curr;
00108 if ((curr = ACE_OS::ftell (this->infile_)) < 0)
00109 return -1;
00110 return static_cast<int> (this->size_ - curr);
00111 }
00112
00113 int
00114 ACEXML_FileCharStream::close (void)
00115 {
00116 if (this->infile_ != 0)
00117 {
00118 ACE_OS::fclose (this->infile_);
00119 this->infile_ = 0;
00120 }
00121 delete[] this->filename_;
00122 this->filename_ = 0;
00123 delete[] this->encoding_;
00124 this->encoding_ = 0;
00125 this->size_ = 0;
00126 this->peek_ = 0;
00127 return 0;
00128 }
00129
00130
00131 int
00132 ACEXML_FileCharStream::getchar_i (char& ch)
00133 {
00134 ch = static_cast<char> (ACE_OS::fgetc (this->infile_));
00135 return (feof(this->infile_) ? -1 : 0);
00136 }
00137
00138 int
00139 ACEXML_FileCharStream::read (ACEXML_Char *str,
00140 size_t len)
00141 {
00142 if (this->infile_ == 0)
00143 return -1;
00144
00145 return static_cast<int> (ACE_OS::fread (str, sizeof (ACEXML_Char), len, this->infile_));
00146 }
00147
00148 int
00149 ACEXML_FileCharStream::get (ACEXML_Char& ch)
00150 {
00151 if (this->infile_ == 0)
00152 return -1;
00153 #if defined (ACE_USES_WCHAR)
00154 return this->get_i (ch);
00155 #else
00156 ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00157 return (feof(this->infile_) ? -1 : 0);
00158 #endif
00159 }
00160
00161 int
00162 ACEXML_FileCharStream::peek (void)
00163 {
00164 if (this->infile_ == 0)
00165 return -1;
00166 #if defined (ACE_USES_WCHAR)
00167 return this->peek_i();
00168 #else
00169
00170 ACEXML_Char ch = static_cast<ACEXML_Char> (ACE_OS::fgetc (this->infile_));
00171 ACE_OS::ungetc (ch, this->infile_);
00172 return ch;
00173 #endif
00174 }
00175
00176 #if defined (ACE_USES_WCHAR)
00177 int
00178 ACEXML_FileCharStream::get_i (ACEXML_Char& ch)
00179 {
00180 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00181 {
00182 ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00183 return (feof(this->infile_) ? -1 : 0);
00184 }
00185
00186 if (this->peek_ != 0)
00187 {
00188 ch = this->peek_;
00189 this->peek_ = 0;
00190 return 0;
00191 }
00192
00193 int BE = (ACE_OS::strcmp (this->encoding_,
00194 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00195 ACEXML_Char input[2];
00196 int i = 0;
00197 for (; i < 2 && !feof (this->infile_); ++i)
00198 {
00199 input[i] = ACE_OS::fgetwc (this->infile_);
00200 }
00201 if (i < 2)
00202 {
00203 ch = 0;
00204 return -1;
00205 }
00206 ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00207 return 0;
00208 }
00209
00210 int
00211 ACEXML_FileCharStream::peek_i (void)
00212 {
00213
00214 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00215 {
00216 ACEXML_Char ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00217 ACE_OS::ungetc (ch, this->infile_);
00218 return ch;
00219 }
00220
00221
00222
00223 if (this->peek_ != 0)
00224 return this->peek_;
00225
00226
00227
00228 int BE = (ACE_OS::strcmp (this->encoding_,
00229 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00230 ACEXML_Char input[2];
00231 int i = 0;
00232 for (; i < 2 && !feof (this->infile_); ++i)
00233 {
00234 input[i] = ACE_OS::fgetwc (this->infile_);
00235 }
00236 if (i < 2)
00237 {
00238 this->peek_ = 0;
00239 return -1;
00240 }
00241 this->peek_ = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00242 return this->peek_;
00243 }
00244 #endif
00245
00246 const ACEXML_Char*
00247 ACEXML_FileCharStream::getEncoding (void)
00248 {
00249 return this->encoding_;
00250 }
00251
00252 const ACEXML_Char*
00253 ACEXML_FileCharStream::getSystemId (void)
00254 {
00255 return this->filename_;
00256 }