00001
00002
00003 #include "ace/ACE.h"
00004 #include "ace/ace_wchar.h"
00005 #include "ace/Auto_Ptr.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_string.h"
00008 #include "ACEXML/common/HttpCharStream.h"
00009 #include "ACEXML/common/Encoding.h"
00010
00011 ACE_RCSID (common, HttpCharStream, "$Id: HttpCharStream.cpp 75114 2006-10-27 23:48:24Z ossama $")
00012
00013
00014 static const int HDST_LINE1_PROTOCOL = 0;
00015 static const int HDST_LINE1_WHITESPACE = 1;
00016 static const int HDST_LINE1_STATUS = 2;
00017 static const int HDST_BOL = 10;
00018 static const int HDST_TEXT = 11;
00019 static const int HDST_LF = 12;
00020 static const int HDST_CR = 13;
00021 static const int HDST_CRLF = 14;
00022 static const int HDST_CRLFCR = 15;
00023
00024 ACEXML_HttpCharStream::ACEXML_HttpCharStream (void)
00025 : url_(0),
00026 url_addr_(0),
00027 stream_(0),
00028 connector_(0),
00029 size_(0),
00030 data_offset_ (0),
00031 encoding_ (0)
00032 {
00033
00034 }
00035
00036 ACEXML_HttpCharStream::~ACEXML_HttpCharStream (void)
00037 {
00038 this->close ();
00039 }
00040
00041 int
00042 ACEXML_HttpCharStream::open (const ACEXML_Char *url)
00043 {
00044 this->url_ = ACE::strnew (url);
00045
00046 ACE_NEW_RETURN (this->url_addr_, ACEXML_URL_Addr, -1);
00047 ACE_NEW_RETURN (this->stream_, ACEXML_Mem_Map_Stream, -1);
00048
00049 if (this->url_addr_->string_to_addr (this->url_) == -1) {
00050 this->close();
00051 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot convert URL"), -1);
00052 }
00053
00054 ACE_NEW_RETURN (this->connector_,
00055 Connector (0, ACE_NONBLOCK),
00056 -1);
00057
00058 if (this->stream_->open (this->connector_, *this->url_addr_) == -1) {
00059 this->close();
00060 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot open backing store"), -1);
00061 }
00062
00063 int result = this->send_request();
00064 if (result == -1) {
00065 this->close();
00066 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "send_request"), -1);
00067 }
00068
00069 size_t len = 0;
00070 result = this->get_url(len);
00071 if (result == -1) {
00072 this->close();
00073 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "get_url"), -1);
00074 }
00075 if (result != 200) {
00076 this->close();
00077 ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n",
00078 result,
00079 "Refer HTTP/1.0 error code for details"), -1);
00080 }
00081
00082 this->size_ = static_cast<ACE_OFF_T> (len);
00083 return this->determine_encoding();
00084 }
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113 int
00114 ACEXML_HttpCharStream::get_url (size_t& len)
00115 {
00116 if (this->stream_ == 0)
00117 return -1;
00118
00119 int header_state = HDST_LINE1_PROTOCOL;
00120 int status = 0;
00121 size_t b = 0;
00122 char const * buf = 0;
00123 size_t buflen = BUFSIZ;
00124 for (;;)
00125 {
00126 buf = this->stream_->recv (buflen);
00127
00128 if (buf == 0)
00129 if (buflen == 0)
00130 break;
00131 else
00132 continue;
00133
00134 for (b = 0; b < buflen; ++b)
00135 {
00136 switch ( header_state )
00137 {
00138 case HDST_LINE1_PROTOCOL:
00139 switch ( buf[b] )
00140 {
00141 case ' ': case '\t':
00142 header_state = HDST_LINE1_WHITESPACE; break;
00143 case '\n': header_state = HDST_LF ; break;
00144 case '\r': header_state = HDST_CR; break;
00145 }
00146 break;
00147 case HDST_LINE1_WHITESPACE:
00148 switch ( buf[b] )
00149 {
00150 case '0': case '1': case '2': case '3': case '4':
00151 case '5': case '6': case '7': case '8': case '9':
00152 status = buf[b] - '0';
00153 header_state = HDST_LINE1_STATUS;
00154 break;
00155 case '\n': header_state = HDST_LF ; break;
00156 case '\r': header_state = HDST_CR; break;
00157 default: header_state = HDST_TEXT; break;
00158 }
00159 break;
00160 case HDST_LINE1_STATUS:
00161 switch ( buf[b] )
00162 {
00163 case '0': case '1': case '2': case '3': case '4':
00164 case '5': case '6': case '7': case '8': case '9':
00165 status = status * 10 + buf[b] - '0';
00166 break;
00167 case '\n': header_state = HDST_LF ; break;
00168 case '\r': header_state = HDST_CR; break;
00169 default: header_state = HDST_TEXT; break;
00170 }
00171 break;
00172 case HDST_BOL:
00173 switch ( buf[b] )
00174 {
00175 case '\n': header_state = HDST_LF; break;
00176 case '\r': header_state = HDST_CR; break;
00177 default: header_state = HDST_TEXT; break;
00178 }
00179 break;
00180 case HDST_TEXT:
00181 switch ( buf[b] )
00182 {
00183 case '\n': header_state = HDST_LF; break;
00184 case '\r': header_state = HDST_CR; break;
00185 }
00186 break;
00187
00188 case HDST_LF:
00189 switch ( buf[b] )
00190 {
00191 case '\n': goto end_of_headers;
00192 case '\r': header_state = HDST_CR; break;
00193 default: header_state = HDST_TEXT; break;
00194 }
00195 break;
00196
00197 case HDST_CR:
00198 switch ( buf[b] )
00199 {
00200 case '\n': header_state = HDST_CRLF; break;
00201 case '\r': goto end_of_headers;
00202 default: header_state = HDST_TEXT; break;
00203 }
00204 break;
00205
00206 case HDST_CRLF:
00207 switch ( buf[b] )
00208 {
00209 case '\n': goto end_of_headers;
00210 case '\r': header_state = HDST_CRLFCR; break;
00211 default: header_state = HDST_TEXT; break;
00212 }
00213 break;
00214
00215 case HDST_CRLFCR:
00216 switch ( buf[b] )
00217 {
00218 case '\n': case '\r': goto end_of_headers;
00219 default: header_state = HDST_TEXT; break;
00220 }
00221 break;
00222 }
00223 }
00224 }
00225 end_of_headers:
00226 if (b == 0)
00227 return -1;
00228 ++b;
00229
00230
00231 char const * const data_beg = buf + b;
00232 buflen = BUFSIZ;
00233
00234
00235
00236 while ((buf = this->stream_->recv (buflen)) != 0)
00237 ;
00238
00239
00240 len = this->stream_->recv() - data_beg;
00241
00242
00243 this->stream_->rewind();
00244
00245 this->data_offset_ = data_beg - this->stream_->recv();
00246
00247 if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00248 ACE_ERROR_RETURN ((LM_ERROR, "%s: %m",
00249 "Error in seeking to beginning of data"), -1);
00250
00251 return status;
00252 }
00253
00254
00255 int
00256 ACEXML_HttpCharStream::send_request (void)
00257 {
00258 char* path = ACE::strnew (ACE_TEXT_ALWAYS_CHAR (this->url_addr_->get_path_name()));
00259 ACE_Auto_Basic_Array_Ptr<char> path_ptr (path);
00260 size_t commandsize = ACE_OS::strlen (path)
00261 + ACE_OS::strlen (this->url_addr_->get_host_name ())
00262 + 20
00263 + 1
00264 + 16 ;
00265
00266 char* command;
00267 ACE_NEW_RETURN (command, char[commandsize], -1);
00268
00269
00270 ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
00271
00272 int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path);
00273 bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n",
00274 this->url_addr_->get_host_name ());
00275 bytes += ACE_OS::sprintf (&command[bytes], "\r\n");
00276
00277 ACE_Time_Value tv (ACE_DEFAULT_TIMEOUT);
00278
00279
00280 int retval = static_cast<int> (this->stream_->send_n (command, bytes, &tv));
00281 if (retval <= 0)
00282 return -1;
00283 return retval;
00284 }
00285
00286
00287 int
00288 ACEXML_HttpCharStream::available (void)
00289 {
00290 if (this->stream_ == 0)
00291 return -1;
00292 return static_cast<int> (this->stream_->available());
00293 }
00294
00295 int
00296 ACEXML_HttpCharStream::close (void)
00297 {
00298 delete[] this->url_;
00299 this->url_ = 0;
00300
00301 delete this->url_addr_;
00302 this->url_addr_ = 0;
00303
00304 delete this->stream_;
00305 this->stream_ = 0;
00306
00307 delete this->connector_;
00308 this->connector_ = 0;
00309
00310 this->size_ = 0;
00311 this->data_offset_ = 0;
00312
00313 delete[] this->encoding_;
00314 this->encoding_ = 0;
00315
00316 return 0;
00317 }
00318
00319 int
00320 ACEXML_HttpCharStream::determine_encoding (void)
00321 {
00322 if (this->stream_ == 0)
00323 return -1;
00324
00325 char input[] = {0, 0, 0, 0};
00326 size_t const len = sizeof (input) / sizeof (input[0]);
00327
00328 size_t i = 0;
00329 for (; i < len && input[i] != static_cast<char> (EOF); ++i)
00330 input[i] = this->stream_->peek_char (i);
00331
00332 if (i < len)
00333 return -1;
00334
00335 ACEXML_Char const * const temp = ACEXML_Encoding::get_encoding (input);
00336
00337 if (!temp)
00338 return -1;
00339 else
00340 {
00341 if (this->encoding_)
00342 delete [] this->encoding_;
00343
00344 this->encoding_ = ACE::strnew (temp);
00345
00346 }
00347
00348
00349 for (size_t j = 0; j < len; ++j)
00350 {
00351 if (input[j] == '\xFF' || input[j] == '\xFE' || input[j] == '\xEF' ||
00352 input[j] == '\xBB' || input[j] == '\xBF')
00353 {
00354 this->stream_->get_char();
00355 continue;
00356 }
00357 break;
00358 }
00359
00360 return 0;
00361 }
00362
00363 void
00364 ACEXML_HttpCharStream::rewind (void)
00365 {
00366 if (this->stream_ == 0)
00367 return;
00368 this->stream_->rewind();
00369
00370
00371 if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00372 ACE_ERROR ((LM_ERROR, "%s: %m", "Error in seeking to beginning of data"));
00373 this->determine_encoding();
00374 }
00375
00376 const ACEXML_Char*
00377 ACEXML_HttpCharStream::getEncoding (void)
00378 {
00379 return this->encoding_;
00380 }
00381
00382 const ACEXML_Char*
00383 ACEXML_HttpCharStream::getSystemId (void)
00384 {
00385 return this->url_;
00386 }
00387
00388
00389 int
00390 ACEXML_HttpCharStream::read (ACEXML_Char *str,
00391 size_t len)
00392 {
00393 if (this->stream_ == 0)
00394 return -1;
00395 len = len * sizeof (ACEXML_Char);
00396 char* temp = const_cast<char*> (this->stream_->recv (len));
00397 str = ACE_TEXT_CHAR_TO_TCHAR (temp);
00398 if (str == 0)
00399 return -1;
00400 return static_cast<int> (len);
00401 }
00402
00403
00404 int
00405 ACEXML_HttpCharStream::get (ACEXML_Char& ch)
00406 {
00407 if (this->stream_ == 0)
00408 return -1;
00409 #if defined (ACE_USES_WCHAR)
00410 return this->get_i (ch);
00411 #else
00412 ch = (ACEXML_Char) this->stream_->get_char();
00413 return (ch == (ACEXML_Char)EOF ? -1 :0);
00414 #endif
00415 }
00416
00417 int
00418 ACEXML_HttpCharStream::peek (void)
00419 {
00420 if (this->stream_ == 0)
00421 return -1;
00422
00423 #if defined (ACE_USES_WCHAR)
00424 return this->peek_i();
00425 #else
00426 return this->stream_->peek_char (0);
00427 #endif
00428 }
00429
00430
00431 #if defined (ACE_USES_WCHAR)
00432 int
00433 ACEXML_HttpCharStream::get_i (ACEXML_Char& ch)
00434 {
00435 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00436 {
00437 ch = (ACEXML_Char) this->stream_->get_char();
00438 return (ch == (ACEXML_Char)EOF ? -1 : 0);
00439 }
00440 int BE = (ACE_OS::strcmp (this->encoding_,
00441 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00442 ACEXML_Char input[2] = {0};
00443 int i = 0;
00444 for (; i < 2 && (input[i] = this->stream_->get_char()) > 0; ++i)
00445 ;
00446 if (i < 2)
00447 {
00448 ch = 0;
00449 return input[i];
00450 }
00451 ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00452 return 0;
00453 }
00454
00455 int
00456 ACEXML_HttpCharStream::peek_i (void)
00457 {
00458
00459 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00460 {
00461 ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0);
00462 return ch;
00463 }
00464
00465 int BE = (ACE_OS::strcmp (this->encoding_,
00466 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00467
00468 ACEXML_Char input[2];
00469 int i = 0;
00470 for (; i < 2 && (input[i] = this->stream_->peek_char (i)) > 0; ++i)
00471 ;
00472 if (i < 2)
00473 return -1;
00474 return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]);
00475 }
00476 #endif