HttpCharStream.cpp

Go to the documentation of this file.
00001 // HttpCharStream.cpp,v 1.22 2006/03/14 21:20:40 sjiang Exp
00002 
00003 #include "ace/ACE.h"
00004 #include "ace/ace_wchar.h"
00005 #include "ace/Auto_Ptr.h"
00006 #include "ace/OS_NS_stdio.h"
00007 #include "ace/OS_NS_string.h"
00008 #include "ACEXML/common/HttpCharStream.h"
00009 #include "ACEXML/common/Encoding.h"
00010 
00011 ACE_RCSID (common, HttpCharStream, "HttpCharStream.cpp,v 1.22 2006/03/14 21:20:40 sjiang Exp")
00012 
00013 /* Header FSM states. */
00014 static const int HDST_LINE1_PROTOCOL = 0;
00015 static const int HDST_LINE1_WHITESPACE = 1;
00016 static const int HDST_LINE1_STATUS = 2;
00017 static const int HDST_BOL = 10;
00018 static const int HDST_TEXT = 11;
00019 static const int HDST_LF = 12;
00020 static const int HDST_CR = 13;
00021 static const int HDST_CRLF = 14;
00022 static const int HDST_CRLFCR = 15;
00023 
00024 ACEXML_HttpCharStream::ACEXML_HttpCharStream (void)
00025   : url_(0),
00026     url_addr_(0),
00027     stream_(0),
00028     connector_(0),
00029     size_(0),
00030     data_offset_ (0),
00031     encoding_ (0)
00032 {
00033 
00034 }
00035 
00036 ACEXML_HttpCharStream::~ACEXML_HttpCharStream (void)
00037 {
00038   this->close ();
00039 }
00040 
00041 int
00042 ACEXML_HttpCharStream::open (const ACEXML_Char *url)
00043 {
00044   this->url_ = ACE::strnew (url);
00045 
00046   ACE_NEW_RETURN (this->url_addr_, ACEXML_URL_Addr, -1);
00047   ACE_NEW_RETURN (this->stream_, ACEXML_Mem_Map_Stream, -1);
00048 
00049   if (this->url_addr_->string_to_addr (this->url_) == -1) {
00050     this->close();
00051     ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot convert URL"), -1);
00052   }
00053 
00054   ACE_NEW_RETURN (this->connector_,
00055                   Connector (0, ACE_NONBLOCK),
00056                   -1);
00057 
00058   if (this->stream_->open (this->connector_, *this->url_addr_) == -1) {
00059     this->close();
00060     ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "cannot open backing store"), -1);
00061   }
00062 
00063   int result = this->send_request();
00064   if (result == -1) {
00065     this->close();
00066     ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "send_request"), -1);
00067   }
00068 
00069   size_t len = 0;
00070   result = this->get_url(len);
00071   if (result == -1) {
00072     this->close();
00073     ACE_ERROR_RETURN ((LM_ERROR, "%p\n", "get_url"), -1);
00074   }
00075   if (result != 200) {
00076     this->close();
00077     ACE_ERROR_RETURN ((LM_ERROR, "Server returned status %d : %s\n",
00078                        result,
00079                        "Refer HTTP/1.0 error code for details"), -1);
00080   }
00081 
00082   this->size_ = static_cast<off_t> (len);
00083   return this->determine_encoding();
00084 }
00085 
00086 // The FSM was taken from the implementation of http_get and that falls
00087 // under the following license:
00088 //
00089 // Copyrigh (c) 2000 by Jef Poskanzer <jef@acme.com>.  All rights reserved.
00090 
00091 // Redistribution and use in source and binary forms, with or without
00092 // modification, are permitted provided that the following conditions
00093 // are met:
00094 // 1. Redistributions of source code must retain the above copyright
00095 //    notice, this list of conditions and the following disclaimer.
00096 // 2. Redistributions in binary form must reproduce the above copyright
00097 //    notice, this list of conditions and the following disclaimer in the
00098 //    documentation and/or other materials provided with the distribution.
00099 
00100 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
00101 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00102 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00103 // ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
00104 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00105 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00106 // OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00107 // HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00108 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00109 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00110 // SUCH DAMAGE.
00111 
00112 
00113 int
00114 ACEXML_HttpCharStream::get_url (size_t& len)
00115 {
00116   if (this->stream_ == 0)
00117     return -1;
00118 
00119   int header_state = HDST_LINE1_PROTOCOL;
00120   int status = 0;
00121   size_t b = 0;
00122   char* buf = 0;
00123   size_t buflen = BUFSIZ;
00124   for (;;)
00125     {
00126       if ((buf = const_cast<char*> (this->stream_->recv (buflen))) == 0)
00127         if (buflen <= 0)
00128           break;
00129 
00130       for (b = 0; b < buflen; ++b)
00131         {
00132           switch ( header_state )
00133             {
00134             case HDST_LINE1_PROTOCOL:
00135               switch ( buf[b] )
00136                 {
00137                 case ' ': case '\t':
00138                   header_state = HDST_LINE1_WHITESPACE; break;
00139                 case '\n': header_state = HDST_LF ; break;
00140                 case '\r': header_state = HDST_CR; break;
00141                 }
00142               break;
00143             case HDST_LINE1_WHITESPACE:
00144               switch ( buf[b] )
00145                 {
00146                 case '0': case '1': case '2': case '3': case '4':
00147                 case '5': case '6': case '7': case '8': case '9':
00148                   status = buf[b] - '0';
00149                   header_state = HDST_LINE1_STATUS;
00150                   break;
00151                 case '\n': header_state = HDST_LF ; break;
00152                 case '\r': header_state = HDST_CR; break;
00153                 default: header_state = HDST_TEXT; break;
00154                 }
00155               break;
00156             case HDST_LINE1_STATUS:
00157               switch ( buf[b] )
00158                 {
00159                 case '0': case '1': case '2': case '3': case '4':
00160                 case '5': case '6': case '7': case '8': case '9':
00161                   status = status * 10 + buf[b] - '0';
00162                   break;
00163                 case '\n': header_state = HDST_LF ; break;
00164                 case '\r': header_state = HDST_CR; break;
00165                 default: header_state = HDST_TEXT; break;
00166                 }
00167               break;
00168             case HDST_BOL:
00169               switch ( buf[b] )
00170                 {
00171                 case '\n': header_state = HDST_LF; break;
00172                 case '\r': header_state = HDST_CR; break;
00173                 default: header_state = HDST_TEXT; break;
00174                 }
00175               break;
00176             case HDST_TEXT:
00177               switch ( buf[b] )
00178                 {
00179                 case '\n': header_state = HDST_LF; break;
00180                 case '\r': header_state = HDST_CR; break;
00181                 }
00182               break;
00183 
00184             case HDST_LF:
00185               switch ( buf[b] )
00186                 {
00187                 case '\n': goto end_of_headers;
00188                 case '\r': header_state = HDST_CR; break;
00189                 default: header_state = HDST_TEXT; break;
00190                 }
00191               break;
00192 
00193             case HDST_CR:
00194               switch ( buf[b] )
00195                 {
00196                 case '\n': header_state = HDST_CRLF; break;
00197                 case '\r': goto end_of_headers;
00198                 default: header_state = HDST_TEXT; break;
00199                 }
00200               break;
00201 
00202             case HDST_CRLF:
00203               switch ( buf[b] )
00204                 {
00205                 case '\n': goto end_of_headers;
00206                 case '\r': header_state = HDST_CRLFCR; break;
00207                 default: header_state = HDST_TEXT; break;
00208                 }
00209               break;
00210 
00211             case HDST_CRLFCR:
00212               switch ( buf[b] )
00213                 {
00214                 case '\n': case '\r': goto end_of_headers;
00215                 default: header_state = HDST_TEXT; break;
00216                 }
00217               break;
00218             }
00219         }
00220     }
00221  end_of_headers:
00222   if (b == 0)
00223     return -1;
00224   ++b;
00225   // Store the address of the beginning of data. We will use it to seek to
00226   // beginning of the data in the URL.
00227   char* data_beg = buf + b;
00228   buflen = BUFSIZ;
00229 
00230   // Get all of the data. Since this is backed by file store, we won't lose
00231   // any of the data.
00232   while (( buf = const_cast<char*> (this->stream_->recv (buflen))) != 0)
00233     ;
00234 
00235   // Length of data in the URL.
00236   len = this->stream_->recv() - data_beg;
00237 
00238   // Move the pointer to the beginning of the file store.
00239   this->stream_->rewind();
00240 
00241   this->data_offset_ = data_beg - this->stream_->recv();
00242   // Forward to the beginning of data.
00243   if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00244     ACE_ERROR_RETURN ((LM_ERROR, "%s: %m",
00245                        "Error in seeking to beginning of data"), -1);
00246 
00247   return status;
00248 }
00249 
00250 
00251 int
00252 ACEXML_HttpCharStream::send_request (void)
00253 {
00254   char* path = ACE::strnew (ACE_TEXT_ALWAYS_CHAR (this->url_addr_->get_path_name()));
00255   ACE_Auto_Basic_Array_Ptr<char> path_ptr (path);
00256   size_t commandsize = ACE_OS::strlen (path)
00257                        + ACE_OS::strlen (this->url_addr_->get_host_name ())
00258                        + 20     // Extra
00259                        + 1      // NUL byte
00260                        + 16 ;   // Protocol filler...
00261 
00262   char* command;
00263   ACE_NEW_RETURN (command, char[commandsize], -1);
00264 
00265   // Ensure that the <command> memory is deallocated.
00266   ACE_Auto_Basic_Array_Ptr<char> cmd_ptr (command);
00267 
00268   int bytes = ACE_OS::sprintf (command, "GET %s HTTP/1.0\r\n", path);
00269   bytes += ACE_OS::sprintf (&command[bytes], "Host: %s\r\n",
00270                             this->url_addr_->get_host_name ());
00271   bytes += ACE_OS::sprintf (&command[bytes], "\r\n");
00272 
00273   ACE_Time_Value tv (ACE_DEFAULT_TIMEOUT);
00274 
00275   // Send the command to the connected server.
00276   int retval = this->stream_->send_n (command, bytes, &tv);
00277   if (retval <= 0)
00278     return -1;
00279   return retval;
00280 }
00281 
00282 
00283 int
00284 ACEXML_HttpCharStream::available (void)
00285 {
00286   if (this->stream_ == 0)
00287     return -1;
00288   return static_cast<int> (this->stream_->available());
00289 }
00290 
00291 int
00292 ACEXML_HttpCharStream::close (void)
00293 {
00294   delete[] this->url_;
00295   this->url_ = 0;
00296 
00297   delete this->url_addr_;
00298   this->url_addr_ = 0;
00299 
00300   delete this->stream_;
00301   this->stream_ = 0;
00302 
00303   delete this->connector_;
00304   this->connector_ = 0;
00305 
00306   this->size_ = 0;
00307   this->data_offset_ = 0;
00308 
00309   delete[] this->encoding_;
00310   this->encoding_ = 0;
00311 
00312   return 0;
00313 }
00314 
00315 int
00316 ACEXML_HttpCharStream::determine_encoding (void)
00317 {
00318   if (this->stream_ == 0)
00319     return -1;
00320 
00321   char input[4] = {0, 0, 0, 0};
00322   int i = 0;
00323   for (; i < 4 && input[i] != (char)-1; ++i)
00324     input[i] = static_cast<char> (this->stream_->peek_char(i));
00325   if (i < 4)
00326     return -1;
00327   const ACEXML_Char* temp = ACEXML_Encoding::get_encoding (input);
00328   if (!temp)
00329     return -1;
00330   else
00331     {
00332       if (this->encoding_)
00333         delete [] this->encoding_;
00334       this->encoding_ = ACE::strnew (temp);
00335       //   ACE_DEBUG ((LM_DEBUG, "URI's encoding is %s\n", this->encoding_));
00336     }
00337   // Move over the byte-order-mark if present.
00338   for (int j = 0; j < 3; ++j)
00339     {
00340       if (input[i] == '\xFF' || input[i] == '\xFE' || input[i] == '\xEF' ||
00341           input[i] == '\xBB' || input[i] == '\xBF')
00342         {
00343           this->stream_->get_char();
00344           continue;
00345         }
00346       break;
00347     }
00348   return 0;
00349 }
00350 
00351 void
00352 ACEXML_HttpCharStream::rewind (void)
00353 {
00354   if (this->stream_ == 0)
00355     return;
00356   this->stream_->rewind();
00357 
00358   // Forward to the beginning of data.
00359   if (this->stream_->seek (this->data_offset_, SEEK_SET) == -1)
00360     ACE_ERROR ((LM_ERROR, "%s: %m", "Error in seeking to beginning of data"));
00361   this->determine_encoding();
00362 }
00363 
00364 const ACEXML_Char*
00365 ACEXML_HttpCharStream::getEncoding (void)
00366 {
00367   return this->encoding_;
00368 }
00369 
00370 const ACEXML_Char*
00371 ACEXML_HttpCharStream::getSystemId (void)
00372 {
00373   return this->url_;
00374 }
00375 
00376 
00377 int
00378 ACEXML_HttpCharStream::read (ACEXML_Char *str,
00379                              size_t len)
00380 {
00381   if (this->stream_ == 0)
00382     return -1;
00383   len = len * sizeof (ACEXML_Char);
00384   char* temp = const_cast<char*> (this->stream_->recv (len));
00385   str = ACE_TEXT_CHAR_TO_TCHAR (temp);
00386   if (str == 0)
00387     return -1;
00388   return static_cast<int> (len);
00389 }
00390 
00391 
00392 int
00393 ACEXML_HttpCharStream::get (ACEXML_Char& ch)
00394 {
00395   if (this->stream_ == 0)
00396     return -1;
00397 #if defined (ACE_USES_WCHAR)
00398   return this->get_i (ch);
00399 #else
00400   ch = (ACEXML_Char) this->stream_->get_char();
00401   return (ch == (ACEXML_Char)EOF ? -1 :0);
00402 #endif /* ACE_USES_WCHAR */
00403 }
00404 
00405 int
00406 ACEXML_HttpCharStream::peek (void)
00407 {
00408   if (this->stream_ == 0)
00409     return -1;
00410 
00411 #if defined (ACE_USES_WCHAR)
00412   return this->peek_i();
00413 #else
00414   return this->stream_->peek_char (0);
00415 #endif /* ACE_USES_WCHAR */
00416 }
00417 
00418 
00419 #if defined (ACE_USES_WCHAR)
00420 int
00421 ACEXML_HttpCharStream::get_i (ACEXML_Char& ch)
00422 {
00423   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00424     {
00425       ch = (ACEXML_Char) this->stream_->get_char();
00426       return (ch == (ACEXML_Char)EOF ? -1 : 0);
00427     }
00428   int BE = (ACE_OS::strcmp (this->encoding_,
00429                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00430   ACEXML_Char input[2] = {0};
00431   int i = 0;
00432   for (; i < 2 && (input[i] = this->stream_->get_char()) > 0; ++i)
00433     ;
00434   if (i < 2)
00435     {
00436       ch = 0;
00437       return input[i];
00438     }
00439   ch = BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0];
00440   return 0;
00441 }
00442 
00443 int
00444 ACEXML_HttpCharStream::peek_i (void)
00445 {
00446   // If we are reading a UTF-8 encoded file, just use the plain unget.
00447   if (ACE_OS::strcmp (this->encoding_, ACE_TEXT ("UTF-8")) == 0)
00448     {
00449       ACEXML_Char ch = (ACEXML_Char) this->stream_->peek_char (0);
00450       return ch;
00451     }
00452 
00453   int BE = (ACE_OS::strcmp (this->encoding_,
00454                             ACE_TEXT ("UTF-16BE")) == 0) ? 1 : 0;
00455   // Peek into the stream.
00456   ACEXML_Char input[2];
00457   int i = 0;
00458   for (; i < 2 && (input[i] = this->stream_->peek_char (i)) > 0; ++i)
00459     ;
00460   if (i < 2)
00461     return -1;
00462   return (BE ? input[0] << 8 | input[1] : input[1] << 8 | input[0]);
00463 }
00464 #endif /* ACE_USES_WCHAR */

Generated on Thu Nov 9 11:45:37 2006 for ACEXML by doxygen 1.3.6