00001
00002
00003 #include "ace/ACE.h"
00004 #include "ace/ace_wchar.h"
00005 #include "ace/Auto_Ptr.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_string.h"
00008 #include "ACEXML/common/HttpCharStream.h"
00009 #include "ACEXML/common/Encoding.h"
00010
00011 ACE_RCSID (common, HttpCharStream, "HttpCharStream.cpp,v 1.22 2006/03/14 21:20:40 sjiang Exp")
00012
00013
00014 static const int HDST_LINE1_PROTOCOL = 0;
00015 static const int HDST_LINE1_WHITESPACE = 1;
00016 static const int HDST_LINE1_STATUS = 2;
00017 static const int HDST_BOL = 10;
00018 static const int HDST_TEXT = 11;
00019 static const int HDST_LF = 12;
00020 static const int HDST_CR = 13;
00021 static const int HDST_CRLF = 14;
00022 static const int HDST_CRLFCR = 15;
00023
00024 ACEXML_HttpCharStream::ACEXML_HttpCharStream (void)
00025 : url_(0),
00026 url_addr_(0),
00027 stream_(0),
00028 connector_(0),
00029 size_(0),
00030 data_offset_ (0),
00031 encoding_ (0)
00032 {
00033
00034 }
00035
00036 ACEXML_HttpCharStream::~ACEXML_HttpCharStream (void)
00037 {
00038 this->close ();
00039 }
00040
00041 int
00042 ACEXML_HttpCharStream::open (const ACEXML_Char *url)
00043 {
00044 this->url_ = ACE::strnew (url);
00045
00046 ACE_NEW_RETURN (this->url_addr_, ACEXML_URL_Addr, -1);
00047 ACE_NEW_RETURN (this->stream_, ACEXML_Mem_Map_Stream, -1);
00048
00049 if (this->url_addr_->string_to_addr (this->url_) == -1) {
00050 this->close();
00051 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot convert URL"), -1);
00052 }
00053
00054 ACE_NEW_RETURN (this->connector_,
00055 Connector (0, ACE_NONBLOCK),
00056 -1);
00057
00058 if (this->stream_->open (this->connector_, *this->url_addr_) == -1) {
00059 this->close();
00060 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot open backing store"), -1);
00061 }
00062
00063 int result = this->send_request();
00064 if (result == -1) {
00065 this->close();
00066 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "send_request"), -1);
00067 }
00068
00069 size_t len = 0;
00070 result = this->get_url(len);
00071 if (result == -1) {
00072 this->close();
00073 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "get_url"), -1);
00074 }
00075 if (result != 200) {
00076 this->close();
00077 ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n",
00078 result,
00079 "Refer HTTP/1.0 error code for details"), -1);
00080 }
00081
00082 this->size_ = static_cast<off_t> (len);
00083 return this->determine_encoding();
00084 }
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113 int
00114 ACEXML_HttpCharStream::get_url (size_t& len)
00115 {
00116 if (this->stream_ == 0)
00117 return -1;
00118
00119 int header_state = HDST_LINE1_PROTOCOL;
00120 int status = 0;
00121 size_t b = 0;
00122 char* buf = 0;
00123 size_t buflen = BUFSIZ;
00124 for (;;)
00125 {
00126 if ((buf = const_cast<char*> (this->stream_->recv (buflen))) == 0)
00127 if (buflen <= 0)
00128 break;
00129
00130 for (b = 0; b < buflen; ++b)
00131 {
00132 switch ( header_state )
00133 {
00134 case HDST_LINE1_PROTOCOL:
00135 switch ( buf[b] )
00136 {
00137 case ' ': case '\t':
00138 header_state = HDST_LINE1_WHITESPACE; break;
00139 case '\n': header_state = HDST_LF ; break;
00140 case '\r': header_state = HDST_CR; break;
00141 }
00142 break;
00143 case HDST_LINE1_WHITESPACE:
00144 switch ( buf[b] )
00145 {
00146 case '0': case '1': case '2': case '3': case '4':
00147 case '5': case '6': case '7': case '8': case '9':
00148 status = buf[b] - '0';
00149 header_state = HDST_LINE1_STATUS;
00150 break;
00151 case '\n': header_state = HDST_LF ; break;
00152 case '\r': header_state = HDST_CR; break;
00153 default: header_state = HDST_TEXT; break;
00154 }
00155 break;
00156 case HDST_LINE1_STATUS:
00157 switch ( buf[b] )
00158 {
00159 case '0': case '1': case '2': case '3': case '4':
00160 case '5': case '6': case '7': case '8': case '9':
00161 status = status * 10 + buf[b] - '0';
00162 break;
00163 case '\n': header_state = HDST_LF ; break;
00164 case '\r': header_state = HDST_CR; break;
00165 default: header_state = HDST_TEXT; break;
00166 }
00167 break;
00168 case HDST_BOL:
00169 switch ( buf[b] )
00170 {
00171 case '\n': header_state = HDST_LF; break;
00172 case '\r': header_state = HDST_CR; break;
00173 default: header_state = HDST_TEXT; break;
00174 }
00175 break;
00176 case HDST_TEXT:
00177 switch ( buf[b] )
00178 {
00179 case '\n': header_state = HDST_LF; break;
00180 case '\r': header_state = HDST_CR; break;
00181 }
00182 break;
00183
00184 case HDST_LF:
00185 switch ( buf[b] )
00186 {
00187 case '\n': goto end_of_headers;
00188 case '\r': header_state = HDST_CR; break;
00189 default: header_state = HDST_TEXT; break;
00190 }
00191 break;
00192
00193 case HDST_CR:
00194 switch ( buf[b] )
00195 {
00196 case '\n': header_state = HDST_CRLF; break;
00197 case '\r': goto end_of_headers;
00198 default: header_state = HDST_TEXT; break;
00199 }
00200 break;
00201
00202 case HDST_CRLF:
00203 switch ( buf[b] )
00204 {
00205 case '\n': goto end_of_headers;
00206 case '\r': header_state = HDST_CRLFCR; break;
00207 default: header_state = HDST_TEXT; break;
00208 }
00209 break;
00210
00211 case HDST_CRLFCR:
00212 switch ( buf[b] )
00213 {
00214 case '\n': case '\r': goto end_of_headers;
00215 default: header_state = HDST_TEXT; break;
00216 }
00217 break;
00218 }
00219 }
00220 }
00221 end_of_headers:
00222 if (b == 0)
00223 return -1;
00224 ++b;
00225
00226
00227 char* data_beg = buf + b;
00228 buflen = BUFSIZ;
00229
00230
00231
00232 while (( buf = const_cast<char*> (this->stream_->recv (buflen))) != 0)
00233 ;
00234
00235
00236 len = this->stream_->recv() - data_beg;
00237
00238
00239 this->stream_->rewind();
00240
00241 this->data_offset_ = data_beg - this->stream_->recv();
00242
00243 if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00244 ACE_ERROR_RETURN ((LM_ERROR, "%s: %m",
00245 "Error in seeking to beginning of data"), -1);
00246
00247 return status;
00248 }
00249
00250
00251 int
00252 ACEXML_HttpCharStream::send_request (void)
00253 {
00254 char* path = ACE::strnew (ACE_TEXT_ALWAYS_CHAR (this->url_addr_->get_path_name()));
00255 ACE_Auto_Basic_Array_Ptr<char> path_ptr (path);
00256 size_t commandsize = ACE_OS::strlen (path)
00257 + ACE_OS::strlen (this->url_addr_->get_host_name ())
00258 + 20
00259 + 1
00260 + 16 ;
00261
00262 char* command;
00263 ACE_NEW_RETURN (command, char[commandsize], -1);
00264
00265
00266 ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
00267
00268 int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path);
00269 bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n",
00270 this->url_addr_->get_host_name ());
00271 bytes += ACE_OS::sprintf (&command[bytes], "\r\n");
00272
00273 ACE_Time_Value tv (ACE_DEFAULT_TIMEOUT);
00274
00275
00276 int retval = this->stream_->send_n (command, bytes, &tv);
00277 if (retval <= 0)
00278 return -1;
00279 return retval;
00280 }
00281
00282
00283 int
00284 ACEXML_HttpCharStream::available (void)
00285 {
00286 if (this->stream_ == 0)
00287 return -1;
00288 return static_cast<int> (this->stream_->available());
00289 }
00290
00291 int
00292 ACEXML_HttpCharStream::close (void)
00293 {
00294 delete[] this->url_;
00295 this->url_ = 0;
00296
00297 delete this->url_addr_;
00298 this->url_addr_ = 0;
00299
00300 delete this->stream_;
00301 this->stream_ = 0;
00302
00303 delete this->connector_;
00304 this->connector_ = 0;
00305
00306 this->size_ = 0;
00307 this->data_offset_ = 0;
00308
00309 delete[] this->encoding_;
00310 this->encoding_ = 0;
00311
00312 return 0;
00313 }
00314
00315 int
00316 ACEXML_HttpCharStream::determine_encoding (void)
00317 {
00318 if (this->stream_ == 0)
00319 return -1;
00320
00321 char input[4] = {0, 0, 0, 0};
00322 int i = 0;
00323 for (; i < 4 && input[i] != (char)-1; ++i)
00324 input[i] = static_cast<char> (this->stream_->peek_char(i));
00325 if (i < 4)
00326 return -1;
00327 const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
00328 if (!temp)
00329 return -1;
00330 else
00331 {
00332 if (this->encoding_)
00333 delete [] this->encoding_;
00334 this->encoding_ = ACE::strnew (temp);
00335
00336 }
00337
00338 for (int j = 0; j < 3; ++j)
00339 {
00340 if (input[i] == '\xFF' || input[i] == '\xFE' || input[i] == '\xEF' ||
00341 input[i] == '\xBB' || input[i] == '\xBF')
00342 {
00343 this->stream_->get_char();
00344 continue;
00345 }
00346 break;
00347 }
00348 return 0;
00349 }
00350
00351 void
00352 ACEXML_HttpCharStream::rewind (void)
00353 {
00354 if (this->stream_ == 0)
00355 return;
00356 this->stream_->rewind();
00357
00358
00359 if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00360 ACE_ERROR ((LM_ERROR, "%s: %m", "Error in seeking to beginning of data"));
00361 this->determine_encoding();
00362 }
00363
00364 const ACEXML_Char*
00365 ACEXML_HttpCharStream::getEncoding (void)
00366 {
00367 return this->encoding_;
00368 }
00369
00370 const ACEXML_Char*
00371 ACEXML_HttpCharStream::getSystemId (void)
00372 {
00373 return this->url_;
00374 }
00375
00376
00377 int
00378 ACEXML_HttpCharStream::read (ACEXML_Char *str,
00379 size_t len)
00380 {
00381 if (this->stream_ == 0)
00382 return -1;
00383 len = len * sizeof (ACEXML_Char);
00384 char* temp = const_cast<char*> (this->stream_->recv (len));
00385 str = ACE_TEXT_CHAR_TO_TCHAR (temp);
00386 if (str == 0)
00387 return -1;
00388 return static_cast<int> (len);
00389 }
00390
00391
00392 int
00393 ACEXML_HttpCharStream::get (ACEXML_Char& ch)
00394 {
00395 if (this->stream_ == 0)
00396 return -1;
00397 #if defined (ACE_USES_WCHAR)
00398 return this->get_i (ch);
00399 #else
00400 ch = (ACEXML_Char) this->stream_->get_char();
00401 return (ch == (ACEXML_Char)EOF ? -1 :0);
00402 #endif
00403 }
00404
00405 int
00406 ACEXML_HttpCharStream::peek (void)
00407 {
00408 if (this->stream_ == 0)
00409 return -1;
00410
00411 #if defined (ACE_USES_WCHAR)
00412 return this->peek_i();
00413 #else
00414 return this->stream_->peek_char (0);
00415 #endif
00416 }
00417
00418
00419 #if defined (ACE_USES_WCHAR)
00420 int
00421 ACEXML_HttpCharStream::get_i (ACEXML_Char& ch)
00422 {
00423 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00424 {
00425 ch = (ACEXML_Char) this->stream_->get_char();
00426 return (ch == (ACEXML_Char)EOF ? -1 : 0);
00427 }
00428 int BE = (ACE_OS::strcmp (this->encoding_,
00429 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00430 ACEXML_Char input[2] = {0};
00431 int i = 0;
00432 for (; i < 2 && (input[i] = this->stream_->get_char()) > 0; ++i)
00433 ;
00434 if (i < 2)
00435 {
00436 ch = 0;
00437 return input[i];
00438 }
00439 ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00440 return 0;
00441 }
00442
00443 int
00444 ACEXML_HttpCharStream::peek_i (void)
00445 {
00446
00447 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00448 {
00449 ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0);
00450 return ch;
00451 }
00452
00453 int BE = (ACE_OS::strcmp (this->encoding_,
00454 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00455
00456 ACEXML_Char input[2];
00457 int i = 0;
00458 for (; i < 2 && (input[i] = this->stream_->peek_char (i)) > 0; ++i)
00459 ;
00460 if (i < 2)
00461 return -1;
00462 return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]);
00463 }
00464 #endif