Svc_Conf_Lexer.cpp

Go to the documentation of this file.
00001 // $Id: Svc_Conf_Lexer.cpp 81312 2008-04-09 21:01:34Z iliyan $
00002 #include "ace/Svc_Conf_Lexer.h"
00003 
00004 #if (ACE_USES_CLASSIC_SVC_CONF == 1)
00005 
00006 #if defined (ACE_USES_WCHAR)
00007 #include "ace/Encoding_Converter.h"
00008 #include "ace/Encoding_Converter_Factory.h"
00009 #endif /* ACE_USES_WCHAR */
00010 
00011 #include "ace/Svc_Conf_Tokens.h"
00012 #include "ace/Recursive_Thread_Mutex.h"
00013 #include "ace/Static_Object_Lock.h"
00014 #include "ace/OS_NS_stdio.h"
00015 #include "ace/OS_NS_ctype.h"
00016 #include "ace/OS_NS_string.h"
00017 #include "ace/Guard_T.h"
00018 #include "ace/Synch_Traits.h"
00019 #include "ace/os_include/os_ctype.h"
00020 
00021 #if !defined (__GNUG__)
00022 # include "ace/Auto_Ptr.h"
00023 #endif
00024 
00025 ACE_BEGIN_VERSIONED_NAMESPACE_DECL
00026 
00027 #define ACE_YY_BUF_SIZE 4096
00028 #define ACE_MAX_BYTES_PER_CHAR 4
00029 #define ACE_YY_CONVERSION_SPACE ACE_YY_BUF_SIZE * ACE_MAX_BYTES_PER_CHAR
00030 
00031 #if defined (__GNUG__)
00032 # define ACE_TEMPORARY_STRING(X,SIZE) \
00033    __extension__ char X[SIZE]
00034 #else
00035 # define ACE_TEMPORARY_STRING(X,SIZE) \
00036    char* X = 0; \
00037    char X ## buf[ACE_YY_BUF_SIZE]; \
00038    ACE_Auto_Ptr<char> X ## bufp (0); \
00039    if (SIZE > ACE_YY_BUF_SIZE) { \
00040      X ## bufp.reset (new char[SIZE]); \
00041      X = X ## bufp.get (); \
00042    } \
00043    else { \
00044      X = X ## buf; \
00045    }
00046 #endif /* __GNUG__ */
00047 
00048 // These are states not covered by the tokens in Svc_Conf_Tokens.h
00049 #define ACE_NO_STATE -1
00050 #define ACE_COMMENT 0
00051 
00052 #if defined (_MSC_VER) && (_MSC_VER >= 1400)
00053 // Visual Studio .NET 2005 (VC8) issues warning C4351 for input_ in the
00054 // constructor initializer list below. Since we like the warned-of new
00055 // behavior (input_ elements will be default-initialized), squash the
00056 // warning here.
00057 #  pragma warning (push)
00058 #  pragma warning (disable:4351)
00059 #endif /* VC8 */
00060 
00061 struct ace_yy_buffer_state
00062 {
00063   ace_yy_buffer_state (void)
00064    : input_ (),
00065      index_ (0),
00066      size_ (0),
00067      start_ (0),
00068      need_more_ (true),
00069      eof_ (false),
00070      state_ (ACE_NO_STATE),
00071      string_start_ (0)
00072 #if defined (ACE_USES_WCHAR)
00073      , converter_ (0)
00074 #endif /* ACE_USES_WCHAR */
00075   {
00076   }
00077 #if defined (_MSC_VER) && (_MSC_VER >= 1400)
00078 #  pragma warning (pop)
00079 #endif /* VC8 */
00080 
00081   ~ace_yy_buffer_state (void)
00082   {
00083 #if defined (ACE_USES_WCHAR)
00084     delete converter_;
00085 #endif /* ACE_USES_WCHAR */
00086   }
00087 
00088   // Input related
00089   char input_[ACE_YY_CONVERSION_SPACE];
00090   size_t index_;
00091   size_t size_;
00092   size_t start_;
00093   bool need_more_;
00094   bool eof_;
00095 
00096   // Parsing related
00097   int state_;
00098   char string_start_;
00099 
00100 #if defined (ACE_USES_WCHAR)
00101   // Code set conversion related
00102   ACE_Encoding_Converter* converter_;
00103 #endif /* ACE_USES_WCHAR */
00104 };
00105 
00106 // ******************************************************************
00107 // Global functions
00108 // ******************************************************************
00109 
00110 int
00111 ace_yylex (YYSTYPE *ace_yylval, void *YYLEX_PARAM)
00112 {
00113   ACE_MT (ACE_GUARD_RETURN (ACE_SYNCH_RECURSIVE_MUTEX,
00114                             ace_mon,
00115                             *ACE_Static_Object_Lock::instance (),
00116                             -1));
00117 
00118   return ACE_Svc_Conf_Lexer::yylex (ace_yylval, ACE_SVC_CONF_PARAM);
00119 
00120 }
00121 
00122 void
00123 ace_yy_delete_buffer (ace_yy_buffer_state *buffer)
00124 {
00125   delete buffer;
00126 }
00127 
00128 // ******************************************************************
00129 // Inline methods
00130 // ******************************************************************
00131 
00132 inline size_t
00133 normalize (size_t length)
00134 {
00135   return (length >= ACE_MAX_BYTES_PER_CHAR ?
00136           (length / ACE_MAX_BYTES_PER_CHAR) * ACE_MAX_BYTES_PER_CHAR :
00137           length);
00138 }
00139 
00140 // ******************************************************************
00141 // Static class methods
00142 // ******************************************************************
00143 
00144 int
00145 ACE_Svc_Conf_Lexer::yylex (YYSTYPE* ace_yylval,
00146                            ACE_Svc_Conf_Param* param)
00147 {
00148 #if defined (ACE_USES_WCHAR)
00149   bool look_for_bom = false;
00150   ACE_Encoding_Converter_Factory::Encoding_Hint hint =
00151                 ACE_Encoding_Converter_Factory::ACE_NONE;
00152 #endif /* ACE_USES_WCHAR */
00153   if (param->buffer == 0)
00154     {
00155 #if defined (ACE_USES_WCHAR)
00156       look_for_bom = true;
00157 #endif /* ACE_USES_WCHAR */
00158       ACE_NEW_RETURN (param->buffer,
00159                       ace_yy_buffer_state,
00160                       -1);
00161     }
00162 
00163   int token = ACE_NO_STATE;
00164   do {
00165     if (param->buffer->need_more_)
00166       {
00167 #if defined (ACE_USES_WCHAR)
00168         size_t skip_bytes = 0;
00169 #endif /* ACE_USES_WCHAR */
00170         param->buffer->need_more_ = false;
00171         size_t amount =
00172                input (param,
00173                       param->buffer->input_ + param->buffer->size_,
00174                       normalize (ACE_YY_BUF_SIZE -
00175                                  param->buffer->size_));
00176         if (amount == 0)
00177           {
00178             param->buffer->eof_ = true;
00179 #if defined (ACE_USES_WCHAR)
00180             skip_bytes = param->buffer->size_;
00181 #endif /* ACE_USES_WCHAR */
00182           }
00183         else
00184           {
00185 #if defined (ACE_USES_WCHAR)
00186             if (look_for_bom)
00187               {
00188                 size_t read_more = 0;
00189 
00190                 look_for_bom = false;
00191                 hint = locate_bom (param->buffer->input_, amount, read_more);
00192 
00193                 if (read_more != 0)
00194                   {
00195                     input (param,
00196                            param->buffer->input_ + amount,
00197                            read_more);
00198                     ACE_OS::memmove (param->buffer->input_,
00199                                      param->buffer->input_ + read_more,
00200                                      amount);
00201                   }
00202               }
00203             skip_bytes = param->buffer->size_;
00204 #endif /* ACE_USES_WCHAR */
00205             param->buffer->size_ += amount;
00206           }
00207 
00208 #if defined (ACE_USES_WCHAR)
00209         if (!convert_to_utf8 (param, skip_bytes, hint))
00210           {
00211             ace_yyerror (++param->yyerrno,
00212                          param->yylineno,
00213                          ACE_TEXT ("Unable to convert input stream to UTF-8"));
00214             return ACE_NO_STATE;
00215           }
00216 #endif /* ACE_USES_WCHAR */
00217       }
00218 
00219     token = scan (ace_yylval, param);
00220   } while (token == ACE_NO_STATE && param->buffer->need_more_);
00221 
00222   return token;
00223 }
00224 
00225 size_t
00226 ACE_Svc_Conf_Lexer::input (ACE_Svc_Conf_Param* param,
00227                            char* buf, size_t max_size)
00228 {
00229   size_t result = 0;
00230 
00231   switch (param->type)
00232     {
00233     case ACE_Svc_Conf_Param::SVC_CONF_FILE:
00234       errno = 0;
00235       while ((result = ACE_OS::fread (buf, 1,
00236                                       max_size, param->source.file)) == 0 &&
00237              ferror (param->source.file))
00238         {
00239           if (errno == EINTR)
00240             {
00241               errno = 0;
00242 #if !defined (ACE_LACKS_CLEARERR)
00243               ACE_OS::clearerr (param->source.file);
00244 #endif /* !ACE_LACKS_CLEARERR */
00245             }
00246           else
00247             {
00248               ACE_OS::fprintf (stderr, "ERROR: input in scanner failed\n");
00249               ACE_OS::exit (2);
00250             }
00251         }
00252       break;
00253     case ACE_Svc_Conf_Param::SVC_CONF_DIRECTIVE:
00254       result = ACE_OS::strlen (param->source.directive +
00255                                param->buffer->start_) * sizeof (ACE_TCHAR);
00256       if (result != 0)
00257         {
00258           // Make sure that the amount we are going to copy
00259           // fits in the buffer
00260           if (result > max_size)
00261             {
00262               result = max_size;
00263             }
00264           ACE_OS::memcpy (buf,
00265                           param->source.directive + param->buffer->start_,
00266                           result);
00267           param->buffer->start_ += (result / sizeof (ACE_TCHAR));
00268         }
00269       break;
00270     default:
00271       ace_yyerror (++param->yyerrno,
00272                    param->yylineno,
00273                    ACE_TEXT ("Invalid Service Configurator type in ")
00274                    ACE_TEXT ("ACE_Svc_Conf_Lexer::input"));
00275     }
00276 
00277   return result;
00278 }
00279 
00280 int
00281 ACE_Svc_Conf_Lexer::scan (YYSTYPE* ace_yylval,
00282                           ACE_Svc_Conf_Param* param)
00283 
00284 {
00285   ace_yy_buffer_state* buffer = param->buffer;
00286 
00287   // If we are not currently in any state, skip over whitespace
00288   if (buffer->state_ == ACE_NO_STATE)
00289     {
00290       while (buffer->index_ < buffer->size_ &&
00291              isspace (buffer->input_[buffer->index_]))
00292         {
00293           // Make sure that we count all of the new lines
00294           if (buffer->input_[buffer->index_] == '\n')
00295             {
00296               ++param->yylineno;
00297             }
00298           ++buffer->index_;
00299         }
00300     }
00301 
00302   size_t current;
00303   size_t last = buffer->size_ + (buffer->eof_ ? 1 : 0);
00304   for (current = buffer->index_; current < last; current++)
00305     {
00306       static const char* separators = " \t\r\n:*(){}";
00307       char c = (buffer->eof_ && current == buffer->size_ ?
00308                                      '\n' : buffer->input_[current]);
00309       switch (buffer->state_)
00310         {
00311           case ACE_COMMENT:
00312             if (c == '\n')
00313               {
00314                 buffer->state_ = ACE_NO_STATE;
00315                 buffer->index_ = current + 1;
00316                 ++param->yylineno;
00317               }
00318             break;
00319           case ACE_STRING:
00320             if (!(c >= ' ' && c <= '~'))
00321               {
00322                 // The character at currrent is definitely not part of
00323                 // the string so we need to move current back one.
00324                 --current;
00325 
00326                 // Get the starting point of our string (skipping the quote)
00327                 char* source = buffer->input_ + buffer->index_ + 1;
00328 
00329                 // Now, we need to move back in the string until we find the
00330                 // same character that started the string
00331                 bool string_end_found = false;
00332                 if (current > buffer->index_)
00333                   {
00334                     for (size_t i = current - buffer->index_; i-- != 0; )
00335                       {
00336                         if (source[i] == buffer->string_start_)
00337                           {
00338                             current = buffer->index_ + i + 1;
00339                             string_end_found = true;
00340                             break;
00341                           }
00342                       }
00343                   }
00344 
00345                 if (!string_end_found)
00346                   {
00347                     ace_yyerror (++param->yyerrno,
00348                                  param->yylineno,
00349                                  ACE_TEXT ("Unable to find the end of the string"));
00350                     return ACE_NO_STATE;
00351                   }
00352 
00353                 size_t amount = (current - buffer->index_) - 1;
00354 #if defined (ACE_USES_WCHAR)
00355                 ACE_TCHAR target[ACE_YY_CONVERSION_SPACE] = ACE_TEXT ("");
00356                 size_t length = 0;
00357                 if (!convert_from_utf8 (buffer->converter_,
00358                                         source,
00359                                         amount,
00360                                         target,
00361                                         ACE_YY_CONVERSION_SPACE,
00362                                         length))
00363                   {
00364                     ace_yyerror (++param->yyerrno,
00365                                  param->yylineno,
00366                                  ACE_TEXT ("Unable to convert string from UTF-8"));
00367                     return ACE_NO_STATE;
00368                   }
00369                 amount = length;
00370 #else
00371                 char* target = source;
00372 #endif /* ACE_USES_WCHAR */
00373                 ace_yylval->ident_ = param->obstack.copy (target, amount);
00374                 buffer->state_ = ACE_NO_STATE;
00375                 buffer->index_ = current + 1;
00376                 return ACE_STRING;
00377               }
00378             break;
00379           case ACE_NO_STATE:
00380             if (c == '"' || c == '\'')
00381               {
00382                 buffer->string_start_ = c;
00383                 buffer->state_ = ACE_STRING;
00384               }
00385             else if (c == '#')
00386               {
00387                 buffer->state_ = ACE_COMMENT;
00388               }
00389             else if (ACE_OS::strchr (separators, c) != 0)
00390               {
00391                 if (c == '\n')
00392                   {
00393                     ++param->yylineno;
00394                   }
00395 
00396                 if (current == buffer->index_ + 1)
00397                   {
00398                     int lower = ACE_OS::ace_tolower (
00399                                           buffer->input_[current - 1]);
00400                     if (c == ':' &&
00401                         (buffer->input_[current - 1] == '%' ||
00402                          (lower >= 'a' && lower <= 'z')))
00403                       {
00404                         // This is considered a path, so we need to
00405                         // skip over the ':' and go around the loop
00406                         // again
00407                         break;
00408                       }
00409                   }
00410 
00411                 if (current == buffer->index_)
00412                   {
00413                     buffer->index_ = current + 1;
00414                     if (isspace (c))
00415                       {
00416                         // This is an empty line.
00417                         // Let's look for something else.
00418                         break;
00419                       }
00420                     else
00421                       {
00422                         return c;
00423                       }
00424                   }
00425 
00426                 // String from buffer->index_ to current (inclusive)
00427                 size_t size = (current - buffer->index_) + 1;
00428                 ACE_TEMPORARY_STRING (str, size);
00429                 ACE_OS::strncpy (str, buffer->input_ + buffer->index_,
00430                                  size - 1);
00431                 str[size - 1] = '\0';
00432 
00433 
00434                 if (ACE_OS::strcmp (str, "dynamic") == 0)
00435                   {
00436                     buffer->index_ = current;
00437                     return ACE_DYNAMIC;
00438                   }
00439                 else if (ACE_OS::strcmp (str, "static") == 0)
00440                   {
00441                     buffer->index_ = current;
00442                     return ACE_STATIC;
00443                   }
00444                 else if (ACE_OS::strcmp (str, "suspend") == 0)
00445                   {
00446                     buffer->index_ = current;
00447                     return ACE_SUSPEND;
00448                   }
00449                 else if (ACE_OS::strcmp (str, "resume") == 0)
00450                   {
00451                     buffer->index_ = current;
00452                     return ACE_RESUME;
00453                   }
00454                 else if (ACE_OS::strcmp (str, "remove") == 0)
00455                   {
00456                     buffer->index_ = current;
00457                     return ACE_REMOVE;
00458                   }
00459                 else if (ACE_OS::strcmp (str, "stream") == 0)
00460                   {
00461                     buffer->index_ = current;
00462                     return ACE_USTREAM;
00463                   }
00464                 else if (ACE_OS::strcmp (str, "Module") == 0)
00465                   {
00466                     buffer->index_ = current;
00467                     return ACE_MODULE_T;
00468                   }
00469                 else if (ACE_OS::strcmp (str, "Service_Object") == 0)
00470                   {
00471                     buffer->index_ = current;
00472                     return ACE_SVC_OBJ_T;
00473                   }
00474                 else if (ACE_OS::strcmp (str, "STREAM") == 0)
00475                   {
00476                     buffer->index_ = current;
00477                     return ACE_STREAM_T;
00478                   }
00479                 else if (ACE_OS::strcmp (str, "active") == 0)
00480                   {
00481                     buffer->index_ = current;
00482                     return ACE_ACTIVE;
00483                   }
00484                 else if (ACE_OS::strcmp (str, "inactive") == 0)
00485                   {
00486                     buffer->index_ = current;
00487                     return ACE_INACTIVE;
00488                   }
00489                 else
00490                   {
00491                     // Get the string and save it in ace_yylval
00492                     int token = ACE_IDENT;
00493                     size_t amount = size - 1;
00494 #if defined (ACE_USES_WCHAR)
00495                     ACE_TCHAR target[ACE_YY_CONVERSION_SPACE] = ACE_TEXT ("");
00496                     size_t length = 0;
00497                     if (!convert_from_utf8 (buffer->converter_,
00498                                             str,
00499                                             amount,
00500                                             target,
00501                                             ACE_YY_CONVERSION_SPACE,
00502                                             length))
00503                       {
00504                         ace_yyerror (++param->yyerrno,
00505                                      param->yylineno,
00506                                      ACE_TEXT ("Unable to convert ")
00507                                      ACE_TEXT ("identifier from UTF-8"));
00508                         return ACE_NO_STATE;
00509                       }
00510                     amount = length;
00511 #else
00512                     char* target = str;
00513 #endif /* ACE_USES_WCHAR */
00514                     ace_yylval->ident_ = param->obstack.copy (target, amount);
00515 
00516                     // Determine the difference between pathname and ident
00517                     if (ACE_OS::ace_isdigit (ace_yylval->ident_[0]))
00518                       {
00519                         token = ACE_PATHNAME;
00520                       }
00521                     else
00522                       {
00523                         static const ACE_TCHAR* path_parts =
00524                                                 ACE_TEXT ("/\\:%.~-");
00525                         for (const ACE_TCHAR* p = path_parts; *p != '\0'; p++)
00526                           {
00527                             if (ACE_OS::strchr (ace_yylval->ident_, *p) != 0)
00528                               {
00529                                 token = ACE_PATHNAME;
00530                                 break;
00531                               }
00532                           }
00533                       }
00534 
00535                     buffer->state_ = ACE_NO_STATE;
00536                     buffer->index_ = current;
00537                     return token;
00538                   }
00539               }
00540             break;
00541           default:
00542             ace_yyerror (++param->yyerrno,
00543                          param->yylineno,
00544                          ACE_TEXT ("Unexpected state in ACE_Svc_Conf_Lexer::scan"));
00545             return ACE_NO_STATE;
00546         }
00547     }
00548 
00549   // We need more from the input source so, we will move the remainder of
00550   // the buffer to the front and signal that we need more
00551   if (!buffer->eof_)
00552     {
00553       buffer->need_more_ = true;
00554       if (buffer->state_ == ACE_COMMENT)
00555         {
00556           buffer->index_ = 0;
00557           buffer->size_  = 0;
00558         }
00559       else
00560         {
00561           buffer->size_ = current - buffer->index_;
00562           if (buffer->size_ != 0 && buffer->index_ != 0)
00563             ACE_OS::memmove (buffer->input_,
00564                              buffer->input_ + buffer->index_, buffer->size_);
00565           buffer->index_ = 0;
00566           buffer->state_ = ACE_NO_STATE;
00567         }
00568     }
00569   return ACE_NO_STATE;
00570 }
00571 
00572 #if defined (ACE_USES_WCHAR)
00573 
00574 bool
00575 ACE_Svc_Conf_Lexer::convert_to_utf8 (
00576                       ACE_Svc_Conf_Param* param,
00577                       size_t skip_bytes,
00578                       ACE_Encoding_Converter_Factory::Encoding_Hint hint)
00579 {
00580   bool status = false;
00581   if (param->buffer->converter_ == 0)
00582     {
00583       param->buffer->converter_ =
00584         ACE_Encoding_Converter_Factory::create (
00585           reinterpret_cast<ACE_Byte*> (param->buffer->input_ + skip_bytes),
00586           param->buffer->size_ - skip_bytes,
00587           hint);
00588     }
00589 
00590   if (param->buffer->converter_ != 0)
00591     {
00592       char target[ACE_YY_CONVERSION_SPACE] = "";
00593       if (param->buffer->converter_->to_utf8 (
00594             param->buffer->input_ + skip_bytes,
00595             param->buffer->size_ - skip_bytes,
00596             reinterpret_cast<ACE_Byte*> (target),
00597             ACE_YY_CONVERSION_SPACE) == ACE_Encoding_Converter::CONVERSION_OK)
00598         {
00599           ACE_OS::strcpy (param->buffer->input_ + skip_bytes, target);
00600           param->buffer->size_ = ACE_OS::strlen (target) + skip_bytes;
00601           status = true;
00602         }
00603     }
00604 
00605   return status;
00606 }
00607 
00608 bool
00609 ACE_Svc_Conf_Lexer::convert_from_utf8 (ACE_Encoding_Converter* converter,
00610                                        const char* source,
00611                                        size_t source_size,
00612                                        ACE_TCHAR* target,
00613                                        size_t target_size,
00614                                        size_t& length)
00615 {
00616   if (converter != 0)
00617     {
00618       if (converter->from_utf8 (
00619             reinterpret_cast <const ACE_Byte*> (source),
00620             source_size,
00621             target,
00622             target_size) != ACE_Encoding_Converter::CONVERSION_OK)
00623         {
00624           return false;
00625         }
00626     }
00627   else
00628     {
00629       ACE_OS::strncpy (target, ACE_TEXT_CHAR_TO_TCHAR (source), source_size);
00630       target[source_size] = 0;
00631     }
00632 
00633   length = ACE_OS::strlen (target);
00634   return true;
00635 }
00636 
00637 ACE_Encoding_Converter_Factory::Encoding_Hint
00638 ACE_Svc_Conf_Lexer::locate_bom (char* source,
00639                                 size_t source_size,
00640                                 size_t& bytes_used)
00641 {
00642   struct bom {
00643     size_t length_;
00644     const char* data_;
00645     ACE_Encoding_Converter_Factory::Encoding_Hint hint_;
00646   };
00647   static const bom boms[] = {
00648     { 4, "\x00\x00\xfe\xff", ACE_Encoding_Converter_Factory::ACE_UTF_32BE },
00649     { 4, "\xff\xfe\x00\x00", ACE_Encoding_Converter_Factory::ACE_UTF_32LE },
00650     { 2, "\xfe\xff",         ACE_Encoding_Converter_Factory::ACE_UTF_16BE },
00651     { 2, "\xff\xfe",         ACE_Encoding_Converter_Factory::ACE_UTF_16LE },
00652     { 3, "\xef\xbb\xbf",     ACE_Encoding_Converter_Factory::ACE_UTF_8    },
00653   };
00654 
00655   for (size_t i = 0; i < sizeof (boms) / sizeof (bom); i++)
00656     {
00657       if (source_size >= boms[i].length_)
00658         {
00659           if (ACE_OS::memcmp (source,
00660                               boms[i].data_, boms[i].length_) == 0)
00661             {
00662               bytes_used = boms[i].length_;
00663               return boms[i].hint_;
00664             }
00665         }
00666     }
00667 
00668   // No BOM was found
00669   bytes_used = 0;
00670   return ACE_Encoding_Converter_Factory::ACE_NONE;
00671 }
00672 
00673 #endif /* ACE_USES_WCHAR */
00674 
00675 ACE_END_VERSIONED_NAMESPACE_DECL
00676 #endif /* ACE_USES_CLASSIC_SVC_CONF = 1 */

Generated on Tue Feb 2 17:18:43 2010 for ACE by  doxygen 1.4.7