00001
00002
00003 #include "ACEXML/common/FileCharStream.h"
00004 #include "ace/ACE.h"
00005 #include "ace/Log_Msg.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_sys_stat.h"
00008
00009 #if defined (ACE_USES_WCHAR)
00010 # include "ace/OS_NS_wchar.h"
00011 #endif
00012
00013 ACEXML_FileCharStream::ACEXML_FileCharStream (void)
00014 : filename_ (0), encoding_ (0), size_ (0), infile_ (0), peek_ (0)
00015 {
00016 }
00017
00018 ACEXML_FileCharStream::~ACEXML_FileCharStream (void)
00019 {
00020 this->close();
00021 }
00022
00023 int
00024 ACEXML_FileCharStream::open (const ACEXML_Char *name)
00025 {
00026 delete[] this->filename_;
00027 this->filename_ = 0;
00028
00029 delete[] this->encoding_;
00030 this->encoding_ = 0;
00031
00032 this->infile_ = ACE_OS::fopen (name, ACE_TEXT ("r"));
00033 if (this->infile_ == 0)
00034 return -1;
00035
00036 ACE_stat statbuf;
00037 if (ACE_OS::stat (name, &statbuf) < 0)
00038 return -1;
00039
00040 this->size_ = statbuf.st_size;
00041 this->filename_ = ACE::strnew (name);
00042 return this->determine_encoding();
00043 }
00044
00045 int
00046 ACEXML_FileCharStream::determine_encoding (void)
00047 {
00048 if (this->infile_ == 0)
00049 return -1;
00050
00051 char input[4];
00052 int retval = 0;
00053 int i = 0;
00054 for (; i < 4 && retval != -1; ++i)
00055 retval = this->getchar_i(input[i]);
00056 if (i < 4)
00057 return -1;
00058
00059
00060 ACE_OS::rewind (this->infile_);
00061
00062 const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
00063 if (!temp)
00064 return -1;
00065 else
00066 {
00067 if (this->encoding_)
00068 delete [] this->encoding_;
00069 this->encoding_ = ACE::strnew (temp);
00070
00071
00072 }
00073
00074 char ch;
00075 for (int j = 0; j < 3; ++j)
00076 {
00077 if (this->getchar_i (ch) < 0)
00078 return -1;
00079 if (ch == '\xFF' || ch == '\xFE' || ch == '\xEF' || ch == '\xBB' ||
00080 ch == '\xBF')
00081 continue;
00082 else
00083 {
00084 ungetc (ch, this->infile_);
00085 break;
00086 }
00087 }
00088 return 0;
00089 }
00090
00091 void
00092 ACEXML_FileCharStream::rewind()
00093 {
00094 if (this->infile_ == 0)
00095 return;
00096 ACE_OS::rewind (this->infile_);
00097 this->determine_encoding();
00098 }
00099
00100 int
00101 ACEXML_FileCharStream::available (void)
00102 {
00103 if (this->infile_ == 0)
00104 return -1;
00105
00106 long curr;
00107 if ((curr = ACE_OS::ftell (this->infile_)) < 0)
00108 return -1;
00109 return (this->size_ - curr);
00110 }
00111
00112 int
00113 ACEXML_FileCharStream::close (void)
00114 {
00115 if (this->infile_ != 0)
00116 {
00117 ACE_OS::fclose (this->infile_);
00118 this->infile_ = 0;
00119 }
00120 delete[] this->filename_;
00121 this->filename_ = 0;
00122 delete[] this->encoding_;
00123 this->encoding_ = 0;
00124 this->size_ = 0;
00125 this->peek_ = 0;
00126 return 0;
00127 }
00128
00129
00130 int
00131 ACEXML_FileCharStream::getchar_i (char& ch)
00132 {
00133 ch = static_cast<char> (ACE_OS::fgetc (this->infile_));
00134 return (feof(this->infile_) ? -1 : 0);
00135 }
00136
00137 int
00138 ACEXML_FileCharStream::read (ACEXML_Char *str,
00139 size_t len)
00140 {
00141 if (this->infile_ == 0)
00142 return -1;
00143
00144 return static_cast<int> (ACE_OS::fread (str, sizeof (ACEXML_Char), len, this->infile_));
00145 }
00146
00147 int
00148 ACEXML_FileCharStream::get (ACEXML_Char& ch)
00149 {
00150 if (this->infile_ == 0)
00151 return -1;
00152 #if defined (ACE_USES_WCHAR)
00153 return this->get_i (ch);
00154 #else
00155 ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00156 return (feof(this->infile_) ? -1 : 0);
00157 #endif
00158 }
00159
00160 int
00161 ACEXML_FileCharStream::peek (void)
00162 {
00163 if (this->infile_ == 0)
00164 return -1;
00165 #if defined (ACE_USES_WCHAR)
00166 return this->peek_i();
00167 #else
00168
00169 ACEXML_Char ch = static_cast<ACEXML_Char> (ACE_OS::fgetc (this->infile_));
00170 ::ungetc (ch, this->infile_);
00171 return ch;
00172 #endif
00173 }
00174
00175 #if defined (ACE_USES_WCHAR)
00176 int
00177 ACEXML_FileCharStream::get_i (ACEXML_Char& ch)
00178 {
00179 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00180 {
00181 ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00182 return (feof(this->infile_) ? -1 : 0);
00183 }
00184
00185 if (this->peek_ != 0)
00186 {
00187 ch = this->peek_;
00188 this->peek_ = 0;
00189 return 0;
00190 }
00191
00192 int BE = (ACE_OS::strcmp (this->encoding_,
00193 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00194 ACEXML_Char input[2];
00195 int i = 0;
00196 for (; i < 2 && !feof (this->infile_); ++i)
00197 {
00198 input[i] = ACE_OS::fgetwc (this->infile_);
00199 }
00200 if (i < 2)
00201 {
00202 ch = 0;
00203 return -1;
00204 }
00205 ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00206 return 0;
00207 }
00208
00209 int
00210 ACEXML_FileCharStream::peek_i (void)
00211 {
00212
00213 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00214 {
00215 ACEXML_Char ch = (ACEXML_Char) ACE_OS::fgetc (this->infile_);
00216 ::ungetc (ch, this->infile_);
00217 return ch;
00218 }
00219
00220
00221
00222 if (this->peek_ != 0)
00223 return this->peek_;
00224
00225
00226
00227 int BE = (ACE_OS::strcmp (this->encoding_,
00228 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00229 ACEXML_Char input[2];
00230 int i = 0;
00231 for (; i < 2 && !feof (this->infile_); ++i)
00232 {
00233 input[i] = ACE_OS::fgetwc (this->infile_);
00234 }
00235 if (i < 2)
00236 {
00237 this->peek_ = 0;
00238 return -1;
00239 }
00240 this->peek_ = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00241 return this->peek_;
00242 }
00243 #endif
00244
00245 const ACEXML_Char*
00246 ACEXML_FileCharStream::getEncoding (void)
00247 {
00248 return this->encoding_;
00249 }
00250
00251 const ACEXML_Char*
00252 ACEXML_FileCharStream::getSystemId (void)
00253 {
00254 return this->filename_;
00255 }