Svc_Conf_Lexer.cpp

Go to the documentation of this file.
00001 // Svc_Conf_Lexer.cpp,v 4.6 2006/06/06 21:55:25 shuston Exp
00002 #include "ace/Svc_Conf_Lexer.h"
00003 
00004 #if (ACE_USES_CLASSIC_SVC_CONF == 1)
00005 
00006 #if defined (ACE_USES_WCHAR)
00007 #include "ace/Encoding_Converter.h"
00008 #include "ace/Encoding_Converter_Factory.h"
00009 #endif /* ACE_USES_WCHAR */
00010 
00011 #include "ace/Svc_Conf_Tokens.h"
00012 #include "ace/Recursive_Thread_Mutex.h"
00013 #include "ace/Static_Object_Lock.h"
00014 #include "ace/OS_NS_stdio.h"
00015 #include "ace/OS_NS_ctype.h"
00016 #include "ace/OS_NS_string.h"
00017 #include "ace/Guard_T.h"
00018 #include "ace/Synch_Traits.h"
00019 #include "ace/os_include/os_ctype.h"
00020 
00021 #if !defined (__GNUG__)
00022 # include "ace/Auto_Ptr.h"
00023 #endif
00024 
00025 ACE_BEGIN_VERSIONED_NAMESPACE_DECL
00026 
00027 #define ACE_YY_BUF_SIZE 4096
00028 #define ACE_MAX_BYTES_PER_CHAR 4
00029 #define ACE_YY_CONVERSION_SPACE ACE_YY_BUF_SIZE * ACE_MAX_BYTES_PER_CHAR
00030 
00031 #if defined (__GNUG__)
00032 # define ACE_TEMPORARY_STRING(X,SIZE) \
00033    __extension__ char X[SIZE]
00034 #else
00035 # define ACE_TEMPORARY_STRING(X,SIZE) \
00036    char* X = 0; \
00037    char X ## buf[ACE_YY_BUF_SIZE]; \
00038    ACE_Auto_Ptr<char> X ## bufp (0); \
00039    if (SIZE > ACE_YY_BUF_SIZE) { \
00040      X ## bufp.reset (new char[SIZE]); \
00041      X = X ## bufp.get (); \
00042    } \
00043    else { \
00044      X = X ## buf; \
00045    }
00046 #endif /* __GNUG__ */
00047 
00048 // These are states not covered by the tokens in Svc_Conf_Tokens.h
00049 #define ACE_NO_STATE -1
00050 #define ACE_COMMENT 0
00051 
00052 struct ace_yy_buffer_state
00053 {
00054   ace_yy_buffer_state (void)
00055    : input_ (),
00056      index_ (0),
00057      size_ (0),
00058      start_ (0),
00059      need_more_ (true),
00060      eof_ (false),
00061      state_ (ACE_NO_STATE),
00062      string_start_ (0)
00063 #if defined (ACE_USES_WCHAR)
00064      , converter_ (0)
00065 #endif /* ACE_USES_WCHAR */
00066   {
00067   }
00068 
00069   ~ace_yy_buffer_state (void)
00070   {
00071 #if defined (ACE_USES_WCHAR)
00072     delete converter_;
00073 #endif /* ACE_USES_WCHAR */
00074   }
00075 
00076   // Input related
00077   char input_[ACE_YY_CONVERSION_SPACE];
00078   size_t index_;
00079   size_t size_;
00080   size_t start_;
00081   bool need_more_;
00082   bool eof_;
00083 
00084   // Parsing related
00085   int state_;
00086   char string_start_;
00087 
00088 #if defined (ACE_USES_WCHAR)
00089   // Code set conversion related
00090   ACE_Encoding_Converter* converter_;
00091 #endif /* ACE_USES_WCHAR */
00092 };
00093 
00094 // ******************************************************************
00095 // Global functions
00096 // ******************************************************************
00097 
00098 int
00099 ace_yylex (ACE_YYSTYPE *ace_yylval, void *ACE_YYLEX_PARAM)
00100 {
00101   ACE_MT (ACE_GUARD_RETURN (ACE_SYNCH_RECURSIVE_MUTEX,
00102                             ace_mon,
00103                             *ACE_Static_Object_Lock::instance (),
00104                             -1));
00105 
00106   return ACE_Svc_Conf_Lexer::yylex (ace_yylval, ACE_SVC_CONF_PARAM);
00107 
00108 }
00109 
00110 void
00111 ace_yy_delete_buffer (ace_yy_buffer_state *buffer)
00112 {
00113   delete buffer;
00114 }
00115 
00116 // ******************************************************************
00117 // Inline methods
00118 // ******************************************************************
00119 
00120 inline size_t
00121 normalize (size_t length)
00122 {
00123   return (length >= ACE_MAX_BYTES_PER_CHAR ?
00124           (length / ACE_MAX_BYTES_PER_CHAR) * ACE_MAX_BYTES_PER_CHAR :
00125           length);
00126 }
00127 
00128 // ******************************************************************
00129 // Static class methods
00130 // ******************************************************************
00131 
00132 int
00133 ACE_Svc_Conf_Lexer::yylex (ACE_YYSTYPE* ace_yylval,
00134                            ACE_Svc_Conf_Param* param)
00135 {
00136 #if defined (ACE_USES_WCHAR)
00137   bool look_for_bom = false;
00138   ACE_Encoding_Converter_Factory::Encoding_Hint hint =
00139                 ACE_Encoding_Converter_Factory::ACE_NONE;
00140 #endif /* ACE_USES_WCHAR */
00141   if (param->buffer == 0)
00142     {
00143 #if defined (ACE_USES_WCHAR)
00144       look_for_bom = true;
00145 #endif /* ACE_USES_WCHAR */
00146       ACE_NEW_RETURN (param->buffer,
00147                       ace_yy_buffer_state,
00148                       -1);
00149     }
00150 
00151   int token = ACE_NO_STATE;
00152   do {
00153     if (param->buffer->need_more_)
00154       {
00155         size_t skip_bytes = 0;
00156         param->buffer->need_more_ = false;
00157         size_t amount =
00158                input (param,
00159                       param->buffer->input_ + param->buffer->size_,
00160                       normalize (ACE_YY_BUF_SIZE -
00161                                  param->buffer->size_));
00162         if (amount == 0)
00163           {
00164             param->buffer->eof_ = true;
00165             skip_bytes = param->buffer->size_;
00166           }
00167         else
00168           {
00169 #if defined (ACE_USES_WCHAR)
00170             if (look_for_bom)
00171               {
00172                 size_t read_more = 0;
00173 
00174                 look_for_bom = false;
00175                 hint = locate_bom (param->buffer->input_, amount, read_more);
00176 
00177                 if (read_more != 0)
00178                   {
00179                     input (param,
00180                            param->buffer->input_ + amount,
00181                            read_more);
00182                     ACE_OS::memmove (param->buffer->input_,
00183                                      param->buffer->input_ + read_more,
00184                                      amount);
00185                   }
00186               }
00187 #endif /* ACE_USES_WCHAR */
00188             skip_bytes = param->buffer->size_;
00189             param->buffer->size_ += amount;
00190           }
00191 
00192 #if defined (ACE_USES_WCHAR)
00193         if (!convert_to_utf8 (param, skip_bytes, hint))
00194           {
00195             ace_yyerror (++param->yyerrno,
00196                          param->yylineno,
00197                          "Unable to convert input stream to UTF-8");
00198             return ACE_NO_STATE;
00199           }
00200 #endif /* ACE_USES_WCHAR */
00201       }
00202 
00203     token = scan (ace_yylval, param);
00204   } while (token == ACE_NO_STATE && param->buffer->need_more_);
00205 
00206   return token;
00207 }
00208 
00209 size_t
00210 ACE_Svc_Conf_Lexer::input (ACE_Svc_Conf_Param* param,
00211                            char* buf, size_t max_size)
00212 {
00213   size_t result = 0;
00214 
00215   switch (param->type)
00216     {
00217     case ACE_Svc_Conf_Param::SVC_CONF_FILE:
00218       errno = 0;
00219       while ((result = ACE_OS::fread (buf, 1,
00220                                       max_size, param->source.file)) == 0 &&
00221              ferror (param->source.file))
00222         {
00223           if (errno == EINTR)
00224             {
00225               errno = 0;
00226 #if !defined (ACE_LACKS_CLEARERR)
00227               ACE_OS::clearerr (param->source.file);
00228 #endif /* !ACE_LACKS_CLEARERR */
00229             }
00230           else
00231             {
00232               ACE_OS::fprintf (stderr, "ERROR: input in scanner failed\n");
00233               ACE_OS::exit (2);
00234             }
00235         }
00236       break;
00237     case ACE_Svc_Conf_Param::SVC_CONF_DIRECTIVE:
00238       result = ACE_OS::strlen (param->source.directive +
00239                                param->buffer->start_) * sizeof (ACE_TCHAR);
00240       if (result != 0)
00241         {
00242           // Make sure that the amount we are going to copy
00243           // fits in the buffer
00244           if (result > max_size)
00245             {
00246               result = max_size;
00247             }
00248           ACE_OS::memcpy (buf,
00249                           param->source.directive + param->buffer->start_,
00250                           result);
00251           param->buffer->start_ += (result / sizeof (ACE_TCHAR));
00252         }
00253       break;
00254     default:
00255       ace_yyerror (++param->yyerrno,
00256                    param->yylineno,
00257                    "Invalid Service Configurator type in "
00258                    "ACE_Svc_Conf_Lexer::input");
00259     }
00260 
00261   return result;
00262 }
00263 
00264 int
00265 ACE_Svc_Conf_Lexer::scan (ACE_YYSTYPE* ace_yylval,
00266                           ACE_Svc_Conf_Param* param)
00267 
00268 {
00269   ace_yy_buffer_state* buffer = param->buffer;
00270 
00271   // If we are not currently in any state, skip over whitespace
00272   if (buffer->state_ == ACE_NO_STATE)
00273     {
00274       while (buffer->index_ < buffer->size_ &&
00275              isspace (buffer->input_[buffer->index_]))
00276         {
00277           // Make sure that we count all of the new lines
00278           if (buffer->input_[buffer->index_] == '\n')
00279             {
00280               ++param->yylineno;
00281             }
00282           ++buffer->index_;
00283         }
00284     }
00285 
00286   size_t current;
00287   size_t last = buffer->size_ + (buffer->eof_ ? 1 : 0);
00288   for (current = buffer->index_; current < last; current++)
00289     {
00290       static const char* separators = " \t\r\n:*(){}";
00291       char c = (buffer->eof_ && current == buffer->size_ ?
00292                                      '\n' : buffer->input_[current]);
00293       switch (buffer->state_)
00294         {
00295           case ACE_COMMENT:
00296             if (c == '\n')
00297               {
00298                 buffer->state_ = ACE_NO_STATE;
00299                 buffer->index_ = current + 1;
00300                 ++param->yylineno;
00301               }
00302             break;
00303           case ACE_STRING:
00304             if (!(c >= ' ' && c <= '~'))
00305               {
00306                 // The character at currrent is definitely not part of
00307                 // the string so we need to move current back one.
00308                 --current;
00309 
00310                 // Get the starting point of our string (skipping the quote)
00311                 char* source = buffer->input_ + buffer->index_ + 1;
00312 
00313                 // Now, we need to move back in the string until we find the
00314                 // same character that started the string
00315                 bool string_end_found = false;
00316                 for(ssize_t i = (current - 1) - buffer->index_; i >= 0; i--)
00317                   {
00318                     if (source[i] == buffer->string_start_)
00319                       {
00320                         current = buffer->index_ + i + 1;
00321                         string_end_found = true;
00322                         break;
00323                       }
00324                   }
00325 
00326                 if (!string_end_found)
00327                   {
00328                     ace_yyerror (++param->yyerrno,
00329                                  param->yylineno,
00330                                  "Unable to find the end of the string");
00331                     return ACE_NO_STATE;
00332                   }
00333 
00334                 size_t amount = (current - buffer->index_) - 1;
00335 #if defined (ACE_USES_WCHAR)
00336                 ACE_TCHAR target[ACE_YY_CONVERSION_SPACE] = ACE_TEXT ("");
00337                 size_t length = 0;
00338                 if (!convert_from_utf8 (buffer->converter_,
00339                                         source,
00340                                         amount,
00341                                         target,
00342                                         ACE_YY_CONVERSION_SPACE,
00343                                         length))
00344                   {
00345                     ace_yyerror (++param->yyerrno,
00346                                  param->yylineno,
00347                                  "Unable to convert string from UTF-8");
00348                     return ACE_NO_STATE;
00349                   }
00350                 amount = length;
00351 #else
00352                 char* target = source;
00353 #endif /* ACE_USES_WCHAR */
00354                 ace_yylval->ident_ = param->obstack.copy (target, amount);
00355                 buffer->state_ = ACE_NO_STATE;
00356                 buffer->index_ = current + 1;
00357                 return ACE_STRING;
00358               }
00359             break;
00360           case ACE_NO_STATE:
00361             if (c == '"' || c == '\'')
00362               {
00363                 buffer->string_start_ = c;
00364                 buffer->state_ = ACE_STRING;
00365               }
00366             else if (c == '#')
00367               {
00368                 buffer->state_ = ACE_COMMENT;
00369               }
00370             else if (ACE_OS::strchr (separators, c) != 0)
00371               {
00372                 if (c == '\n')
00373                   {
00374                     ++param->yylineno;
00375                   }
00376 
00377                 if (current == buffer->index_ + 1)
00378                   {
00379                     int lower = ACE_OS::ace_tolower (
00380                                           buffer->input_[current - 1]);
00381                     if (c == ':' &&
00382                         (buffer->input_[current - 1] == '%' ||
00383                          (lower >= 'a' && lower <= 'z')))
00384                       {
00385                         // This is considered a path, so we need to
00386                         // skip over the ':' and go around the loop
00387                         // again
00388                         break;
00389                       }
00390                   }
00391 
00392                 if (current == buffer->index_)
00393                   {
00394                     buffer->index_ = current + 1;
00395                     if (isspace (c))
00396                       {
00397                         // This is an empty line.
00398                         // Let's look for something else.
00399                         break;
00400                       }
00401                     else
00402                       {
00403                         return c;
00404                       }
00405                   }
00406 
00407                 // String from buffer->index_ to current (inclusive)
00408                 size_t size = (current - buffer->index_) + 1;
00409                 ACE_TEMPORARY_STRING (str, size);
00410                 ACE_OS::strncpy (str, buffer->input_ + buffer->index_,
00411                                  size - 1);
00412                 str[size - 1] = '\0';
00413 
00414 
00415                 if (ACE_OS::strcmp (str, "dynamic") == 0)
00416                   {
00417                     buffer->index_ = current;
00418                     return ACE_DYNAMIC;
00419                   }
00420                 else if (ACE_OS::strcmp (str, "static") == 0)
00421                   {
00422                     buffer->index_ = current;
00423                     return ACE_STATIC;
00424                   }
00425                 else if (ACE_OS::strcmp (str, "suspend") == 0)
00426                   {
00427                     buffer->index_ = current;
00428                     return ACE_SUSPEND;
00429                   }
00430                 else if (ACE_OS::strcmp (str, "resume") == 0)
00431                   {
00432                     buffer->index_ = current;
00433                     return ACE_RESUME;
00434                   }
00435                 else if (ACE_OS::strcmp (str, "remove") == 0)
00436                   {
00437                     buffer->index_ = current;
00438                     return ACE_REMOVE;
00439                   }
00440                 else if (ACE_OS::strcmp (str, "stream") == 0)
00441                   {
00442                     buffer->index_ = current;
00443                     return ACE_USTREAM;
00444                   }
00445                 else if (ACE_OS::strcmp (str, "Module") == 0)
00446                   {
00447                     buffer->index_ = current;
00448                     return ACE_MODULE_T;
00449                   }
00450                 else if (ACE_OS::strcmp (str, "Service_Object") == 0)
00451                   {
00452                     buffer->index_ = current;
00453                     return ACE_SVC_OBJ_T;
00454                   }
00455                 else if (ACE_OS::strcmp (str, "STREAM") == 0)
00456                   {
00457                     buffer->index_ = current;
00458                     return ACE_STREAM_T;
00459                   }
00460                 else if (ACE_OS::strcmp (str, "active") == 0)
00461                   {
00462                     buffer->index_ = current;
00463                     return ACE_ACTIVE;
00464                   }
00465                 else if (ACE_OS::strcmp (str, "inactive") == 0)
00466                   {
00467                     buffer->index_ = current;
00468                     return ACE_INACTIVE;
00469                   }
00470                 else
00471                   {
00472                     // Get the string and save it in ace_yylval
00473                     int token = ACE_IDENT;
00474                     size_t amount = size - 1;
00475 #if defined (ACE_USES_WCHAR)
00476                     ACE_TCHAR target[ACE_YY_CONVERSION_SPACE] = ACE_TEXT ("");
00477                     size_t length = 0;
00478                     if (!convert_from_utf8 (buffer->converter_,
00479                                             str,
00480                                             amount,
00481                                             target,
00482                                             ACE_YY_CONVERSION_SPACE,
00483                                             length))
00484                       {
00485                         ace_yyerror (++param->yyerrno,
00486                                      param->yylineno,
00487                                      "Unable to convert "
00488                                      "identifier from UTF-8");
00489                         return ACE_NO_STATE;
00490                       }
00491                     amount = length;
00492 #else
00493                     char* target = str;
00494 #endif /* ACE_USES_WCHAR */
00495                     ace_yylval->ident_ = param->obstack.copy (target, amount);
00496 
00497                     // Determine the difference between pathname and ident
00498                     if (ACE_OS::ace_isdigit (ace_yylval->ident_[0]))
00499                       {
00500                         token = ACE_PATHNAME;
00501                       }
00502                     else
00503                       {
00504                         static const ACE_TCHAR* path_parts =
00505                                                 ACE_TEXT ("/\\:%.~-");
00506                         for (const ACE_TCHAR* p = path_parts; *p != '\0'; p++)
00507                           {
00508                             if (ACE_OS::strchr (ace_yylval->ident_, *p) != 0)
00509                               {
00510                                 token = ACE_PATHNAME;
00511                                 break;
00512                               }
00513                           }
00514                       }
00515 
00516                     buffer->state_ = ACE_NO_STATE;
00517                     buffer->index_ = current;
00518                     return token;
00519                   }
00520               }
00521             break;
00522           default:
00523             ace_yyerror (++param->yyerrno,
00524                          param->yylineno,
00525                          "Unexpected state in ACE_Svc_Conf_Lexer::scan");
00526             return ACE_NO_STATE;
00527         }
00528     }
00529 
00530   // We need more from the input source so, we will move the remainder of
00531   // the buffer to the front and signal that we need more
00532   if (!buffer->eof_)
00533     {
00534       buffer->need_more_ = true;
00535       if (buffer->state_ == ACE_COMMENT)
00536         {
00537           buffer->index_ = 0;
00538           buffer->size_  = 0;
00539         }
00540       else
00541         {
00542           buffer->size_ = current - buffer->index_;
00543           if (buffer->size_ != 0 && buffer->index_ != 0)
00544             ACE_OS::memmove (buffer->input_,
00545                              buffer->input_ + buffer->index_, buffer->size_);
00546           buffer->index_ = 0;
00547           buffer->state_ = ACE_NO_STATE;
00548         }
00549     }
00550   return ACE_NO_STATE;
00551 }
00552 
00553 #if defined (ACE_USES_WCHAR)
00554 
00555 bool
00556 ACE_Svc_Conf_Lexer::convert_to_utf8 (
00557                       ACE_Svc_Conf_Param* param,
00558                       size_t skip_bytes,
00559                       ACE_Encoding_Converter_Factory::Encoding_Hint hint)
00560 {
00561   bool status = false;
00562   if (param->buffer->converter_ == 0)
00563     {
00564       param->buffer->converter_ =
00565         ACE_Encoding_Converter_Factory::create (
00566           reinterpret_cast<ACE_Byte*> (param->buffer->input_ + skip_bytes),
00567           param->buffer->size_ - skip_bytes,
00568           hint);
00569     }
00570 
00571   if (param->buffer->converter_ != 0)
00572     {
00573       char target[ACE_YY_CONVERSION_SPACE] = "";
00574       if (param->buffer->converter_->to_utf8 (
00575             param->buffer->input_ + skip_bytes,
00576             param->buffer->size_ - skip_bytes,
00577             reinterpret_cast<ACE_Byte*> (target),
00578             ACE_YY_CONVERSION_SPACE) == ACE_Encoding_Converter::CONVERSION_OK)
00579         {
00580           ACE_OS::strcpy (param->buffer->input_ + skip_bytes, target);
00581           param->buffer->size_ = ACE_OS::strlen (target) + skip_bytes;
00582           status = true;
00583         }
00584     }
00585 
00586   return status;
00587 }
00588 
00589 bool
00590 ACE_Svc_Conf_Lexer::convert_from_utf8 (ACE_Encoding_Converter* converter,
00591                                        const char* source,
00592                                        size_t source_size,
00593                                        ACE_TCHAR* target,
00594                                        size_t target_size,
00595                                        size_t& length)
00596 {
00597   if (converter != 0)
00598     {
00599       if (converter->from_utf8 (
00600             reinterpret_cast <const ACE_Byte*> (source),
00601             source_size,
00602             target,
00603             target_size) != ACE_Encoding_Converter::CONVERSION_OK)
00604         {
00605           return false;
00606         }
00607     }
00608   else
00609     {
00610       ACE_OS::strncpy (target, ACE_TEXT_CHAR_TO_TCHAR (source), source_size);
00611       target[source_size] = 0;
00612     }
00613 
00614   length = ACE_OS::strlen (target);
00615   return true;
00616 }
00617 
00618 ACE_Encoding_Converter_Factory::Encoding_Hint
00619 ACE_Svc_Conf_Lexer::locate_bom (char* source,
00620                                 size_t source_size,
00621                                 size_t& bytes_used)
00622 {
00623   struct bom {
00624     size_t length_;
00625     const char* data_;
00626     ACE_Encoding_Converter_Factory::Encoding_Hint hint_;
00627   };
00628   static const bom boms[] = {
00629     { 4, "\x00\x00\xfe\xff", ACE_Encoding_Converter_Factory::ACE_UTF_32BE },
00630     { 4, "\xff\xfe\x00\x00", ACE_Encoding_Converter_Factory::ACE_UTF_32LE },
00631     { 2, "\xfe\xff",         ACE_Encoding_Converter_Factory::ACE_UTF_16BE },
00632     { 2, "\xff\xfe",         ACE_Encoding_Converter_Factory::ACE_UTF_16LE },
00633     { 3, "\xef\xbb\xbf",     ACE_Encoding_Converter_Factory::ACE_UTF_8    },
00634   };
00635 
00636   for (size_t i = 0; i < sizeof (boms) / sizeof (bom); i++)
00637     {
00638       if (source_size >= boms[i].length_)
00639         {
00640           if (ACE_OS::memcmp (source,
00641                               boms[i].data_, boms[i].length_) == 0)
00642             {
00643               bytes_used = boms[i].length_;
00644               return boms[i].hint_;
00645             }
00646         }
00647     }
00648 
00649   // No BOM was found
00650   bytes_used = 0;
00651   return ACE_Encoding_Converter_Factory::ACE_NONE;
00652 }
00653 
00654 #endif /* ACE_USES_WCHAR */
00655 
00656 ACE_END_VERSIONED_NAMESPACE_DECL
00657 #endif /* ACE_USES_CLASSIC_SVC_CONF = 1 */

Generated on Thu Nov 9 09:42:06 2006 for ACE by doxygen 1.3.6