HttpCharStream.cpp

Go to the documentation of this file.
00001 // $Id: HttpCharStream.cpp 75114 2006-10-27 23:48:24Z ossama $
00002 
00003 #include "ace/ACE.h"
00004 #include "ace/ace_wchar.h"
00005 #include "ace/Auto_Ptr.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_string.h"
00008 #include "ACEXML/common/HttpCharStream.h"
00009 #include "ACEXML/common/Encoding.h"
00010 
00011 ACE_RCSID (common, HttpCharStream, "$Id: HttpCharStream.cpp 75114 2006-10-27 23:48:24Z ossama $")
00012 
00013 /* Header FSM states. */
00014 static const int HDST_LINE1_PROTOCOL = 0;
00015 static const int HDST_LINE1_WHITESPACE = 1;
00016 static const int HDST_LINE1_STATUS = 2;
00017 static const int HDST_BOL = 10;
00018 static const int HDST_TEXT = 11;
00019 static const int HDST_LF = 12;
00020 static const int HDST_CR = 13;
00021 static const int HDST_CRLF = 14;
00022 static const int HDST_CRLFCR = 15;
00023 
00024 ACEXML_HttpCharStream::ACEXML_HttpCharStream (void)
00025   : url_(0),
00026     url_addr_(0),
00027     stream_(0),
00028     connector_(0),
00029     size_(0),
00030     data_offset_ (0),
00031     encoding_ (0)
00032 {
00033 
00034 }
00035 
00036 ACEXML_HttpCharStream::~ACEXML_HttpCharStream (void)
00037 {
00038   this->close ();
00039 }
00040 
00041 int
00042 ACEXML_HttpCharStream::open (const ACEXML_Char *url)
00043 {
00044   this->url_ = ACE::strnew (url);
00045 
00046   ACE_NEW_RETURN (this->url_addr_, ACEXML_URL_Addr, -1);
00047   ACE_NEW_RETURN (this->stream_, ACEXML_Mem_Map_Stream, -1);
00048 
00049   if (this->url_addr_->string_to_addr (this->url_) == -1) {
00050     this->close();
00051     ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot convert URL"), -1);
00052   }
00053 
00054   ACE_NEW_RETURN (this->connector_,
00055                   Connector (0, ACE_NONBLOCK),
00056                   -1);
00057 
00058   if (this->stream_->open (this->connector_, *this->url_addr_) == -1) {
00059     this->close();
00060     ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot open backing store"), -1);
00061   }
00062 
00063   int result = this->send_request();
00064   if (result == -1) {
00065     this->close();
00066     ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "send_request"), -1);
00067   }
00068 
00069   size_t len = 0;
00070   result = this->get_url(len);
00071   if (result == -1) {
00072     this->close();
00073     ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "get_url"), -1);
00074   }
00075   if (result != 200) {
00076     this->close();
00077     ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n",
00078                        result,
00079                        "Refer HTTP/1.0 error code for details"), -1);
00080   }
00081 
00082   this->size_ = static_cast<ACE_OFF_T> (len);
00083   return this->determine_encoding();
00084 }
00085 
00086 // The FSM was taken from the implementation of http_get and that falls
00087 // under the following license:
00088 //
00089 // Copyrigh (c) 2000 by Jef Poskanzer <jef@acme.com>.  All rights reserved.
00090 
00091 // Redistribution and use in source and binary forms, with or without
00092 // modification, are permitted provided that the following conditions
00093 // are met:
00094 // 1. Redistributions of source code must retain the above copyright
00095 //    notice, this list of conditions and the following disclaimer.
00096 // 2. Redistributions in binary form must reproduce the above copyright
00097 //    notice, this list of conditions and the following disclaimer in the
00098 //    documentation and/or other materials provided with the distribution.
00099 
00100 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00101 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00102 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00103 // ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00104 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00105 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00106 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00107 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00108 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00109 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00110 // SUCH DAMAGE.
00111 
00112 
00113 int
00114 ACEXML_HttpCharStream::get_url (size_t& len)
00115 {
00116   if (this->stream_ == 0)
00117     return -1;
00118 
00119   int header_state = HDST_LINE1_PROTOCOL;
00120   int status = 0;
00121   size_t b = 0;
00122   char const * buf = 0;
00123   size_t buflen = BUFSIZ;
00124   for (;;)
00125     {
00126       buf = this->stream_->recv (buflen);
00127 
00128       if (buf == 0)
00129         if (buflen == 0)
00130           break;
00131         else
00132           continue;
00133 
00134       for (b = 0; b < buflen; ++b)
00135         {
00136           switch ( header_state )
00137             {
00138             case HDST_LINE1_PROTOCOL:
00139               switch ( buf[b] )
00140                 {
00141                 case ' ': case '\t':
00142                   header_state = HDST_LINE1_WHITESPACE; break;
00143                 case '\n': header_state = HDST_LF ; break;
00144                 case '\r': header_state = HDST_CR; break;
00145                 }
00146               break;
00147             case HDST_LINE1_WHITESPACE:
00148               switch ( buf[b] )
00149                 {
00150                 case '0': case '1': case '2': case '3': case '4':
00151                 case '5': case '6': case '7': case '8': case '9':
00152                   status = buf[b] - '0';
00153                   header_state = HDST_LINE1_STATUS;
00154                   break;
00155                 case '\n': header_state = HDST_LF ; break;
00156                 case '\r': header_state = HDST_CR; break;
00157                 default: header_state = HDST_TEXT; break;
00158                 }
00159               break;
00160             case HDST_LINE1_STATUS:
00161               switch ( buf[b] )
00162                 {
00163                 case '0': case '1': case '2': case '3': case '4':
00164                 case '5': case '6': case '7': case '8': case '9':
00165                   status = status * 10 + buf[b] - '0';
00166                   break;
00167                 case '\n': header_state = HDST_LF ; break;
00168                 case '\r': header_state = HDST_CR; break;
00169                 default: header_state = HDST_TEXT; break;
00170                 }
00171               break;
00172             case HDST_BOL:
00173               switch ( buf[b] )
00174                 {
00175                 case '\n': header_state = HDST_LF; break;
00176                 case '\r': header_state = HDST_CR; break;
00177                 default: header_state = HDST_TEXT; break;
00178                 }
00179               break;
00180             case HDST_TEXT:
00181               switch ( buf[b] )
00182                 {
00183                 case '\n': header_state = HDST_LF; break;
00184                 case '\r': header_state = HDST_CR; break;
00185                 }
00186               break;
00187 
00188             case HDST_LF:
00189               switch ( buf[b] )
00190                 {
00191                 case '\n': goto end_of_headers;
00192                 case '\r': header_state = HDST_CR; break;
00193                 default: header_state = HDST_TEXT; break;
00194                 }
00195               break;
00196 
00197             case HDST_CR:
00198               switch ( buf[b] )
00199                 {
00200                 case '\n': header_state = HDST_CRLF; break;
00201                 case '\r': goto end_of_headers;
00202                 default: header_state = HDST_TEXT; break;
00203                 }
00204               break;
00205 
00206             case HDST_CRLF:
00207               switch ( buf[b] )
00208                 {
00209                 case '\n': goto end_of_headers;
00210                 case '\r': header_state = HDST_CRLFCR; break;
00211                 default: header_state = HDST_TEXT; break;
00212                 }
00213               break;
00214 
00215             case HDST_CRLFCR:
00216               switch ( buf[b] )
00217                 {
00218                 case '\n': case '\r': goto end_of_headers;
00219                 default: header_state = HDST_TEXT; break;
00220                 }
00221               break;
00222             }
00223         }
00224     }
00225  end_of_headers:
00226   if (b == 0)
00227     return -1;
00228   ++b;
00229   // Store the address of the beginning of data. We will use it to seek to
00230   // beginning of the data in the URL.
00231   char const * const data_beg = buf + b;
00232   buflen = BUFSIZ;
00233 
00234   // Get all of the data. Since this is backed by file store, we won't lose
00235   // any of the data.
00236   while ((buf = this->stream_->recv (buflen)) != 0)
00237     ;
00238 
00239   // Length of data in the URL.
00240   len = this->stream_->recv() - data_beg;
00241 
00242   // Move the pointer to the beginning of the file store.
00243   this->stream_->rewind();
00244 
00245   this->data_offset_ = data_beg - this->stream_->recv();
00246   // Forward to the beginning of data.
00247   if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00248     ACE_ERROR_RETURN ((LM_ERROR, "%s: %m",
00249                        "Error in seeking to beginning of data"), -1);
00250 
00251   return status;
00252 }
00253 
00254 
00255 int
00256 ACEXML_HttpCharStream::send_request (void)
00257 {
00258   char* path = ACE::strnew (ACE_TEXT_ALWAYS_CHAR (this->url_addr_->get_path_name()));
00259   ACE_Auto_Basic_Array_Ptr<char> path_ptr (path);
00260   size_t commandsize = ACE_OS::strlen (path)
00261                        + ACE_OS::strlen (this->url_addr_->get_host_name ())
00262                        + 20     // Extra
00263                        + 1      // NUL byte
00264                        + 16 ;   // Protocol filler...
00265 
00266   char* command;
00267   ACE_NEW_RETURN (command, char[commandsize], -1);
00268 
00269   // Ensure that the <command> memory is deallocated.
00270   ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
00271 
00272   int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path);
00273   bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n",
00274                             this->url_addr_->get_host_name ());
00275   bytes += ACE_OS::sprintf (&command[bytes], "\r\n");
00276 
00277   ACE_Time_Value tv (ACE_DEFAULT_TIMEOUT);
00278 
00279   // Send the command to the connected server.
00280   int retval = static_cast<int> (this->stream_->send_n (command, bytes, &tv));
00281   if (retval <= 0)
00282     return -1;
00283   return retval;
00284 }
00285 
00286 
00287 int
00288 ACEXML_HttpCharStream::available (void)
00289 {
00290   if (this->stream_ == 0)
00291     return -1;
00292   return static_cast<int> (this->stream_->available());
00293 }
00294 
00295 int
00296 ACEXML_HttpCharStream::close (void)
00297 {
00298   delete[] this->url_;
00299   this->url_ = 0;
00300 
00301   delete this->url_addr_;
00302   this->url_addr_ = 0;
00303 
00304   delete this->stream_;
00305   this->stream_ = 0;
00306 
00307   delete this->connector_;
00308   this->connector_ = 0;
00309 
00310   this->size_ = 0;
00311   this->data_offset_ = 0;
00312 
00313   delete[] this->encoding_;
00314   this->encoding_ = 0;
00315 
00316   return 0;
00317 }
00318 
00319 int
00320 ACEXML_HttpCharStream::determine_encoding (void)
00321 {
00322   if (this->stream_ == 0)
00323     return -1;
00324 
00325   char input[] = {0, 0, 0, 0};
00326   size_t const len = sizeof (input) / sizeof (input[0]);
00327   
00328   size_t i = 0;
00329   for (; i < len && input[i] != static_cast<char> (EOF); ++i)
00330     input[i] = this->stream_->peek_char (i);
00331 
00332   if (i < len)
00333     return -1;
00334 
00335   ACEXML_Char const * const temp = ACEXML_Encoding::get_encoding (input);
00336 
00337   if (!temp)
00338     return -1;
00339   else
00340     {
00341       if (this->encoding_)
00342         delete [] this->encoding_;
00343 
00344       this->encoding_ = ACE::strnew (temp);
00345       //   ACE_DEBUG ((LM_DEBUG, "URI's encoding is %s\n", this->encoding_));
00346     }
00347 
00348   // Move over the byte-order-mark if present.
00349   for (size_t j = 0; j < len; ++j)
00350     {
00351       if (input[j] == '\xFF' || input[j] == '\xFE' || input[j] == '\xEF' ||
00352           input[j] == '\xBB' || input[j] == '\xBF')
00353         {
00354           this->stream_->get_char();
00355           continue;
00356         }
00357       break;
00358     }
00359 
00360   return 0;
00361 }
00362 
00363 void
00364 ACEXML_HttpCharStream::rewind (void)
00365 {
00366   if (this->stream_ == 0)
00367     return;
00368   this->stream_->rewind();
00369 
00370   // Forward to the beginning of data.
00371   if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00372     ACE_ERROR ((LM_ERROR, "%s: %m", "Error in seeking to beginning of data"));
00373   this->determine_encoding();
00374 }
00375 
00376 const ACEXML_Char*
00377 ACEXML_HttpCharStream::getEncoding (void)
00378 {
00379   return this->encoding_;
00380 }
00381 
00382 const ACEXML_Char*
00383 ACEXML_HttpCharStream::getSystemId (void)
00384 {
00385   return this->url_;
00386 }
00387 
00388 
00389 int
00390 ACEXML_HttpCharStream::read (ACEXML_Char *str,
00391                              size_t len)
00392 {
00393   if (this->stream_ == 0)
00394     return -1;
00395   len = len * sizeof (ACEXML_Char);
00396   char* temp = const_cast<char*> (this->stream_->recv (len));
00397   str = ACE_TEXT_CHAR_TO_TCHAR (temp);
00398   if (str == 0)
00399     return -1;
00400   return static_cast<int> (len);
00401 }
00402 
00403 
00404 int
00405 ACEXML_HttpCharStream::get (ACEXML_Char& ch)
00406 {
00407   if (this->stream_ == 0)
00408     return -1;
00409 #if defined (ACE_USES_WCHAR)
00410   return this->get_i (ch);
00411 #else
00412   ch = (ACEXML_Char) this->stream_->get_char();
00413   return (ch == (ACEXML_Char)EOF ? -1 :0);
00414 #endif /* ACE_USES_WCHAR */
00415 }
00416 
00417 int
00418 ACEXML_HttpCharStream::peek (void)
00419 {
00420   if (this->stream_ == 0)
00421     return -1;
00422 
00423 #if defined (ACE_USES_WCHAR)
00424   return this->peek_i();
00425 #else
00426   return this->stream_->peek_char (0);
00427 #endif /* ACE_USES_WCHAR */
00428 }
00429 
00430 
00431 #if defined (ACE_USES_WCHAR)
00432 int
00433 ACEXML_HttpCharStream::get_i (ACEXML_Char& ch)
00434 {
00435   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00436     {
00437       ch = (ACEXML_Char) this->stream_->get_char();
00438       return (ch == (ACEXML_Char)EOF ? -1 : 0);
00439     }
00440   int BE = (ACE_OS::strcmp (this->encoding_,
00441                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00442   ACEXML_Char input[2] = {0};
00443   int i = 0;
00444   for (; i < 2 && (input[i] = this->stream_->get_char()) > 0; ++i)
00445     ;
00446   if (i < 2)
00447     {
00448       ch = 0;
00449       return input[i];
00450     }
00451   ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00452   return 0;
00453 }
00454 
00455 int
00456 ACEXML_HttpCharStream::peek_i (void)
00457 {
00458   // If we are reading a UTF-8 encoded file, just use the plain unget.
00459   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00460     {
00461       ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0);
00462       return ch;
00463     }
00464 
00465   int BE = (ACE_OS::strcmp (this->encoding_,
00466                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00467   // Peek into the stream.
00468   ACEXML_Char input[2];
00469   int i = 0;
00470   for (; i < 2 && (input[i] = this->stream_->peek_char (i)) > 0; ++i)
00471     ;
00472   if (i < 2)
00473     return -1;
00474   return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]);
00475 }
00476 #endif /* ACE_USES_WCHAR */

Generated on Sun Jan 27 13:04:15 2008 for ACEXML by doxygen 1.3.6