00001 // -*- C++ -*- 00002 00003 //============================================================================= 00004 /** 00005 * @file Tokenizer_T.h 00006 * 00007 * $Id: Tokenizer_T.h 88793 2010-02-01 17:50:34Z cleeland $ 00008 * 00009 * @author Douglas C. Schmidt (schmidt@cs.wustl.edu) 00010 * @author Nanbor Wang <nanbor@cs.wustl.edu> 00011 */ 00012 //============================================================================= 00013 00014 #ifndef ACE_TOKENIZER_T_H 00015 #define ACE_TOKENIZER_T_H 00016 00017 #include /**/ "ace/pre.h" 00018 00019 #include "ace/Global_Macros.h" 00020 00021 #if !defined (ACE_LACKS_PRAGMA_ONCE) 00022 # pragma once 00023 #endif /* ACE_LACKS_PRAGMA_ONCE */ 00024 00025 ACE_BEGIN_VERSIONED_NAMESPACE_DECL 00026 00027 /** 00028 * @class ACE_Tokenizer_T 00029 * 00030 * @brief Tokenizer 00031 * 00032 * Tokenizes a buffer. Allows application to set delimiters and 00033 * preserve designators. Does not allow special characters, yet 00034 * (e.g., printf ("\"like a quoted string\"")). 00035 */ 00036 template <class ACE_CHAR_T> 00037 class ACE_Tokenizer_T 00038 { 00039 public: 00040 /** 00041 * \a buffer will be parsed. Notice that ACE_Tokenizer_T will modify 00042 * \a buffer if you use <code> delimiter_replace </code> or <code> 00043 * preserve_designators </code> to do character substitution. 00044 * @note You should NOT pass a constant string or string literal 00045 * to this constructor, since ACE_Tokenizer_T will try to modify 00046 * the string. 00047 * \sa preserve_designators 00048 * \sa preserve_designators 00049 */ 00050 ACE_Tokenizer_T (ACE_CHAR_T *buffer); 00051 00052 /** 00053 * \a d is a delimiter. 00054 * \return Returns 0 on success, -1 if there is no memory left. 00055 * 00056 * <B>Example:</B> 00057 * \verbatim 00058 char buf[30]; 00059 ACE_OS::strcpy(buf, "William/Joseph/Hagins"); 00060 00061 ACE_Tokenizer_T tok (buf); 00062 tok.delimiter ('/'); 00063 for (char *p = tok.next (); p; p = tok.next ()) 00064 cout << p << endl; 00065 \endverbatim 00066 * 00067 * This will print out: 00068 * \verbatim 00069 William/Joseph/Hagins 00070 Joseph/Hagins 00071 Hagins \endverbatim 00072 */ 00073 int delimiter (ACE_CHAR_T d); 00074 00075 /** 00076 * \a d is a delimiter and, when found, will be replaced by 00077 * \a replacement. 00078 * \return 0 on success, -1 if there is no memory left. 00079 * 00080 * <B>Example:</B> 00081 * \verbatim 00082 char buf[30]; 00083 ACE_OS::strcpy(buf, "William/Joseph/Hagins"); 00084 00085 ACE_Tokenizer tok (buf); 00086 tok.delimiter_replace ('/', 0); 00087 for (char *p = tok.next (); p; p = tok.next ()) 00088 cout << p << endl; 00089 \endverbatim 00090 * 00091 * This will print out: 00092 * \verbatim 00093 William 00094 Joseph 00095 Hagins \endverbatim 00096 */ 00097 int delimiter_replace (ACE_CHAR_T d, ACE_CHAR_T replacement); 00098 00099 /** 00100 * Extract string between a pair of designator characters. 00101 * For instance, quotes, or '(' and ')'. 00102 * \a start specifies the begin designator. 00103 * \a stop specifies the end designator. 00104 * \a strip If \a strip == 1, then the preserve 00105 * designators will be stripped from the tokens returned by next. 00106 * \return 0 on success, -1 if there is no memory left. 00107 * 00108 * <B>Example with strip = 0:</B> 00109 * \verbatim 00110 char buf[30]; 00111 ACE_OS::strcpy(buf, "William(Joseph)Hagins"); 00112 00113 ACE_Tokenizer tok (buf); 00114 tok.preserve_designators ('(', ')', 0); 00115 for (char *p = tok.next (); p; p = tok.next ()) 00116 cout << p << endl; 00117 \endverbatim 00118 * 00119 * This will print out: 00120 * \verbatim 00121 William(Joseph)Hagins 00122 (Joseph)Hagins 00123 )Hagins \endverbatim 00124 * 00125 * <B>Example with strip = 1:</B> 00126 * \verbatim 00127 char buf[30]; 00128 ACE_OS::strcpy(buf, "William(Joseph)Hagins"); 00129 00130 ACE_Tokenizer tok (buf); 00131 tok.preserve_designators ('(', ')', 1); 00132 for (char *p = tok.next (); p; p = tok.next ()) 00133 cout << p << endl; 00134 \endverbatim 00135 * 00136 * This will print out: 00137 * \verbatim 00138 William 00139 Joseph 00140 Hagins \endverbatim 00141 */ 00142 int preserve_designators (ACE_CHAR_T start, ACE_CHAR_T stop, int strip=1); 00143 00144 /// Returns the next token. 00145 ACE_CHAR_T *next (void); 00146 00147 enum { 00148 MAX_DELIMITERS=16, 00149 MAX_PRESERVES=16 00150 }; 00151 00152 protected: 00153 /// Returns 1 if @a d is a delimiter, 0 otherwise. If @a d should be 00154 /// replaced with @a r, @a replace is set to 1, otherwise 0. 00155 int is_delimiter (ACE_CHAR_T d, int &replace, ACE_CHAR_T &r); 00156 00157 /** 00158 * If @a start is a start preserve designator, returns 1 and sets 00159 * @a stop to the stop designator. Returns 0 if @a start is not a 00160 * preserve designator. 00161 */ 00162 int is_preserve_designator (ACE_CHAR_T start, ACE_CHAR_T &stop, int &strip); 00163 00164 ACE_CHAR_T *buffer_; 00165 int index_; 00166 00167 /** 00168 * @class Preserve_Entry 00169 * 00170 * @brief Preserve Entry 00171 * 00172 * Defines a set of characters that designate an area that 00173 * should not be parsed, but should be treated as a complete 00174 * token. For instance, in: (this is a preserve region), start 00175 * would be a left paren -(- and stop would be a right paren 00176 * -)-. The strip determines whether the designators should be 00177 * removed from the token. 00178 */ 00179 class Preserve_Entry 00180 { 00181 public: 00182 /** 00183 * E.g., "(". 00184 * E.g., ")". 00185 * Whether the designators should be removed from the token. 00186 */ 00187 ACE_CHAR_T start_; 00188 ACE_CHAR_T stop_; 00189 int strip_; 00190 }; 00191 00192 /// The application can specify MAX_PRESERVES preserve designators. 00193 Preserve_Entry preserves_[MAX_PRESERVES]; 00194 00195 /// Pointer to the next free spot in preserves_. 00196 int preserves_index_; 00197 00198 /** 00199 * @class Delimiter_Entry 00200 * 00201 * @brief Delimiter Entry 00202 * 00203 * Describes a delimiter for the tokenizer. 00204 */ 00205 class Delimiter_Entry 00206 { 00207 public: 00208 /** 00209 * Most commonly a space ' '. 00210 * What occurrences of delimiter_ should be replaced with. 00211 * Whether replacement_ should be used. This should be replaced 00212 * with a technique that sets replacement_ = delimiter by 00213 * default. I'll do that next iteration. 00214 */ 00215 ACE_CHAR_T delimiter_; 00216 ACE_CHAR_T replacement_; 00217 int replace_; 00218 }; 00219 00220 /// The tokenizer allows MAX_DELIMITERS number of delimiters. 00221 Delimiter_Entry delimiters_[MAX_DELIMITERS]; 00222 00223 /// Pointer to the next free space in delimiters_. 00224 int delimiter_index_; 00225 }; 00226 00227 typedef ACE_Tokenizer_T <ACE_TCHAR> ACE_Tokenizer; 00228 00229 ACE_END_VERSIONED_NAMESPACE_DECL 00230 00231 #if defined (ACE_TEMPLATES_REQUIRE_SOURCE) 00232 #include "ace/Tokenizer_T.cpp" 00233 #endif /* ACE_TEMPLATES_REQUIRE_SOURCE */ 00234 00235 #if defined (ACE_TEMPLATES_REQUIRE_PRAGMA) 00236 #pragma implementation ("Tokenizer_T.cpp") 00237 #endif /* ACE_TEMPLATES_REQUIRE_PRAGMA */ 00238 00239 #include /**/ "ace/post.h" 00240 00241 #endif /* ACE_TOKENIZER_T_H */