00001
00002
00003 #include "ace/ACE.h"
00004 #include "ace/ace_wchar.h"
00005 #include "ace/Auto_Ptr.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_string.h"
00008 #include "ACEXML/common/HttpCharStream.h"
00009 #include "ACEXML/common/Encoding.h"
00010
00011 ACE_RCSID (common, HttpCharStream, "$Id: HttpCharStream.cpp 83341 2008-10-21 12:45:49Z vzykov $")
00012
00013
00014 static const int HDST_LINE1_PROTOCOL = 0;
00015 static const int HDST_LINE1_WHITESPACE = 1;
00016 static const int HDST_LINE1_STATUS = 2;
00017 static const int HDST_BOL = 10;
00018 static const int HDST_TEXT = 11;
00019 static const int HDST_LF = 12;
00020 static const int HDST_CR = 13;
00021 static const int HDST_CRLF = 14;
00022 static const int HDST_CRLFCR = 15;
00023
00024 ACEXML_HttpCharStream::ACEXML_HttpCharStream (void)
00025 : url_(0),
00026 url_addr_(0),
00027 stream_(0),
00028 connector_(0),
00029 size_(0),
00030 data_offset_ (0),
00031 encoding_ (0)
00032 {
00033
00034 }
00035
00036 ACEXML_HttpCharStream::~ACEXML_HttpCharStream (void)
00037 {
00038 this->close ();
00039 }
00040
00041 int
00042 ACEXML_HttpCharStream::open (const ACEXML_Char *url)
00043 {
00044 this->url_ = ACE::strnew (url);
00045
00046 ACE_NEW_RETURN (this->url_addr_, ACEXML_URL_Addr, -1);
00047 ACE_NEW_RETURN (this->stream_, ACEXML_Mem_Map_Stream, -1);
00048
00049 if (this->url_addr_->string_to_addr (this->url_) == -1) {
00050 this->close();
00051 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot convert URL"), -1);
00052 }
00053
00054 ACE_NEW_RETURN (this->connector_,
00055 Connector (0, ACE_NONBLOCK),
00056 -1);
00057
00058 if (this->stream_->open (this->connector_, *this->url_addr_) == -1) {
00059 this->close();
00060 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot open backing store"), -1);
00061 }
00062
00063 int result = this->send_request();
00064 if (result == -1) {
00065 this->close();
00066 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "send_request"), -1);
00067 }
00068
00069 size_t len = 0;
00070 result = this->get_url(len);
00071 if (result == -1) {
00072 this->close();
00073 ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "get_url"), -1);
00074 }
00075 if (result != 200) {
00076 this->close();
00077 ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n",
00078 result,
00079 "Refer HTTP/1.0 error code for details"), -1);
00080 }
00081
00082 this->size_ = static_cast<ACE_OFF_T> (len);
00083 return this->determine_encoding();
00084 }
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113 int
00114 ACEXML_HttpCharStream::get_url (size_t& len)
00115 {
00116 if (this->stream_ == 0)
00117 {
00118 return -1;
00119 }
00120
00121 int header_state = HDST_LINE1_PROTOCOL;
00122 int status = 0;
00123 size_t b = 0;
00124 char const * buf = 0;
00125 size_t buflen = BUFSIZ;
00126
00127 for (;;)
00128 {
00129 buf = this->stream_->recv (buflen);
00130
00131 if (buf == 0)
00132 {
00133 if (buflen == 0)
00134 {
00135 break;
00136 }
00137 else
00138 {
00139 continue;
00140 }
00141 }
00142
00143 for (b = 0; b < buflen; ++b)
00144 {
00145 switch ( header_state )
00146 {
00147 case HDST_LINE1_PROTOCOL:
00148 switch ( buf[b] )
00149 {
00150 case ' ': case '\t':
00151 header_state = HDST_LINE1_WHITESPACE; break;
00152 case '\n': header_state = HDST_LF ; break;
00153 case '\r': header_state = HDST_CR; break;
00154 }
00155 break;
00156 case HDST_LINE1_WHITESPACE:
00157 switch ( buf[b] )
00158 {
00159 case '0': case '1': case '2': case '3': case '4':
00160 case '5': case '6': case '7': case '8': case '9':
00161 status = buf[b] - '0';
00162 header_state = HDST_LINE1_STATUS;
00163 break;
00164 case '\n': header_state = HDST_LF ; break;
00165 case '\r': header_state = HDST_CR; break;
00166 default: header_state = HDST_TEXT; break;
00167 }
00168 break;
00169 case HDST_LINE1_STATUS:
00170 switch ( buf[b] )
00171 {
00172 case '0': case '1': case '2': case '3': case '4':
00173 case '5': case '6': case '7': case '8': case '9':
00174 status = status * 10 + buf[b] - '0';
00175 break;
00176 case '\n': header_state = HDST_LF ; break;
00177 case '\r': header_state = HDST_CR; break;
00178 default: header_state = HDST_TEXT; break;
00179 }
00180 break;
00181 case HDST_BOL:
00182 switch ( buf[b] )
00183 {
00184 case '\n': header_state = HDST_LF; break;
00185 case '\r': header_state = HDST_CR; break;
00186 default: header_state = HDST_TEXT; break;
00187 }
00188 break;
00189 case HDST_TEXT:
00190 switch ( buf[b] )
00191 {
00192 case '\n': header_state = HDST_LF; break;
00193 case '\r': header_state = HDST_CR; break;
00194 }
00195 break;
00196
00197 case HDST_LF:
00198 switch ( buf[b] )
00199 {
00200 case '\n': goto end_of_headers;
00201 case '\r': header_state = HDST_CR; break;
00202 default: header_state = HDST_TEXT; break;
00203 }
00204 break;
00205
00206 case HDST_CR:
00207 switch ( buf[b] )
00208 {
00209 case '\n': header_state = HDST_CRLF; break;
00210 case '\r': goto end_of_headers;
00211 default: header_state = HDST_TEXT; break;
00212 }
00213 break;
00214
00215 case HDST_CRLF:
00216 switch ( buf[b] )
00217 {
00218 case '\n': goto end_of_headers;
00219 case '\r': header_state = HDST_CRLFCR; break;
00220 default: header_state = HDST_TEXT; break;
00221 }
00222 break;
00223
00224 case HDST_CRLFCR:
00225 switch ( buf[b] )
00226 {
00227 case '\n': case '\r': goto end_of_headers;
00228 default: header_state = HDST_TEXT; break;
00229 }
00230 break;
00231 }
00232 }
00233 }
00234 end_of_headers:
00235
00236 if (b == 0)
00237 {
00238 return -1;
00239 }
00240
00241 ++b;
00242
00243
00244 char const * const data_beg = buf + b;
00245 buflen = BUFSIZ;
00246
00247
00248
00249 while ((buf = this->stream_->recv (buflen)) != 0)
00250 ;
00251
00252
00253 len = this->stream_->recv() - data_beg;
00254
00255
00256 this->stream_->rewind();
00257
00258 this->data_offset_ =
00259 ACE_Utils::truncate_cast<ACE_OFF_T> (data_beg - this->stream_->recv());
00260
00261
00262 if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00263 {
00264 ACE_ERROR_RETURN ((LM_ERROR,
00265 "%s: %m",
00266 "Error in seeking to beginning of data"),
00267 -1);
00268 }
00269
00270 return status;
00271 }
00272
00273
00274 int
00275 ACEXML_HttpCharStream::send_request (void)
00276 {
00277 char* path = ACE::strnew (ACE_TEXT_ALWAYS_CHAR (this->url_addr_->get_path_name()));
00278 ACE_Auto_Basic_Array_Ptr<char> path_ptr (path);
00279 size_t commandsize = ACE_OS::strlen (path)
00280 + ACE_OS::strlen (this->url_addr_->get_host_name ())
00281 + 20
00282 + 1
00283 + 16 ;
00284
00285 char* command;
00286 ACE_NEW_RETURN (command, char[commandsize], -1);
00287
00288
00289 ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
00290
00291 int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path);
00292 bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n",
00293 this->url_addr_->get_host_name ());
00294 bytes += ACE_OS::sprintf (&command[bytes], "\r\n");
00295
00296 ACE_Time_Value tv (ACE_DEFAULT_TIMEOUT);
00297
00298
00299 int retval = static_cast<int> (this->stream_->send_n (command, bytes, &tv));
00300 if (retval <= 0)
00301 return -1;
00302 return retval;
00303 }
00304
00305
00306 int
00307 ACEXML_HttpCharStream::available (void)
00308 {
00309 if (this->stream_ == 0)
00310 return -1;
00311 return static_cast<int> (this->stream_->available());
00312 }
00313
00314 int
00315 ACEXML_HttpCharStream::close (void)
00316 {
00317 delete[] this->url_;
00318 this->url_ = 0;
00319
00320 delete this->url_addr_;
00321 this->url_addr_ = 0;
00322
00323 delete this->stream_;
00324 this->stream_ = 0;
00325
00326 delete this->connector_;
00327 this->connector_ = 0;
00328
00329 this->size_ = 0;
00330 this->data_offset_ = 0;
00331
00332 delete[] this->encoding_;
00333 this->encoding_ = 0;
00334
00335 return 0;
00336 }
00337
00338 int
00339 ACEXML_HttpCharStream::determine_encoding (void)
00340 {
00341 if (this->stream_ == 0)
00342 return -1;
00343
00344 char input[] = {0, 0, 0, 0};
00345 size_t const len = sizeof (input) / sizeof (input[0]);
00346
00347 size_t i = 0;
00348 for (; i < len && input[i] != static_cast<char> (EOF); ++i)
00349 input[i] = this->stream_->peek_char (i);
00350
00351 if (i < len)
00352 return -1;
00353
00354 ACEXML_Char const * const temp = ACEXML_Encoding::get_encoding (input);
00355
00356 if (!temp)
00357 return -1;
00358 else
00359 {
00360 if (this->encoding_)
00361 delete [] this->encoding_;
00362
00363 this->encoding_ = ACE::strnew (temp);
00364
00365 }
00366
00367
00368 for (size_t j = 0; j < len; ++j)
00369 {
00370 if (input[j] == '\xFF' || input[j] == '\xFE' || input[j] == '\xEF' ||
00371 input[j] == '\xBB' || input[j] == '\xBF')
00372 {
00373 this->stream_->get_char();
00374 continue;
00375 }
00376 break;
00377 }
00378
00379 return 0;
00380 }
00381
00382 void
00383 ACEXML_HttpCharStream::rewind (void)
00384 {
00385 if (this->stream_ == 0)
00386 return;
00387 this->stream_->rewind();
00388
00389
00390 if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00391 ACE_ERROR ((LM_ERROR, "%s: %m", "Error in seeking to beginning of data"));
00392 this->determine_encoding();
00393 }
00394
00395 const ACEXML_Char*
00396 ACEXML_HttpCharStream::getEncoding (void)
00397 {
00398 return this->encoding_;
00399 }
00400
00401 const ACEXML_Char*
00402 ACEXML_HttpCharStream::getSystemId (void)
00403 {
00404 return this->url_;
00405 }
00406
00407
00408 int
00409 ACEXML_HttpCharStream::read (ACEXML_Char *str,
00410 size_t len)
00411 {
00412 if (this->stream_ == 0)
00413 return -1;
00414 len = len * sizeof (ACEXML_Char);
00415 const char* temp = this->stream_->recv (len);
00416 if (temp == 0)
00417 return -1;
00418 ACE_OS::strncpy (str, ACE_TEXT_CHAR_TO_TCHAR (temp), len);
00419 return static_cast<int> (len);
00420 }
00421
00422
00423 int
00424 ACEXML_HttpCharStream::get (ACEXML_Char& ch)
00425 {
00426 if (this->stream_ == 0)
00427 return -1;
00428 #if defined (ACE_USES_WCHAR)
00429 return this->get_i (ch);
00430 #else
00431 ch = (ACEXML_Char) this->stream_->get_char();
00432 return (ch == (ACEXML_Char)EOF ? -1 :0);
00433 #endif
00434 }
00435
00436 int
00437 ACEXML_HttpCharStream::peek (void)
00438 {
00439 if (this->stream_ == 0)
00440 return -1;
00441
00442 #if defined (ACE_USES_WCHAR)
00443 return this->peek_i();
00444 #else
00445 return this->stream_->peek_char (0);
00446 #endif
00447 }
00448
00449
00450 #if defined (ACE_USES_WCHAR)
00451 int
00452 ACEXML_HttpCharStream::get_i (ACEXML_Char& ch)
00453 {
00454 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00455 {
00456 ch = (ACEXML_Char) this->stream_->get_char();
00457 return (ch == (ACEXML_Char)EOF ? -1 : 0);
00458 }
00459 int BE = (ACE_OS::strcmp (this->encoding_,
00460 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00461 ACEXML_Char input[2] = {0};
00462 int i = 0;
00463 for (; i < 2 && (input[i] = this->stream_->get_char()) > 0; ++i)
00464 ;
00465 if (i < 2)
00466 {
00467 ch = 0;
00468 return input[i];
00469 }
00470 ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00471 return 0;
00472 }
00473
00474 int
00475 ACEXML_HttpCharStream::peek_i (void)
00476 {
00477
00478 if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00479 {
00480 ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0);
00481 return ch;
00482 }
00483
00484 int BE = (ACE_OS::strcmp (this->encoding_,
00485 ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00486
00487 ACEXML_Char input[2];
00488 int i = 0;
00489 for (; i < 2 && (input[i] = this->stream_->peek_char (i)) > 0; ++i)
00490 ;
00491 if (i < 2)
00492 return -1;
00493 return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]);
00494 }
00495 #endif