Tokenizer. More...
#include <Tokenizer_T.h>
Classes | |
class | Delimiter_Entry |
Delimiter Entry. More... | |
class | Preserve_Entry |
Preserve Entry. More... | |
Public Types | |
enum | { MAX_DELIMITERS = 16, MAX_PRESERVES = 16 } |
Public Member Functions | |
ACE_Tokenizer_T (ACE_CHAR_T *buffer) | |
int | delimiter (ACE_CHAR_T d) |
int | delimiter_replace (ACE_CHAR_T d, ACE_CHAR_T replacement) |
int | preserve_designators (ACE_CHAR_T start, ACE_CHAR_T stop, int strip=1) |
ACE_CHAR_T * | next (void) |
Returns the next token. | |
Protected Member Functions | |
int | is_delimiter (ACE_CHAR_T d, int &replace, ACE_CHAR_T &r) |
int | is_preserve_designator (ACE_CHAR_T start, ACE_CHAR_T &stop, int &strip) |
Protected Attributes | |
ACE_CHAR_T * | buffer_ |
int | index_ |
Preserve_Entry | preserves_ [MAX_PRESERVES] |
The application can specify MAX_PRESERVES preserve designators. | |
int | preserves_index_ |
Pointer to the next free spot in preserves_. | |
Delimiter_Entry | delimiters_ [MAX_DELIMITERS] |
The tokenizer allows MAX_DELIMITERS number of delimiters. | |
int | delimiter_index_ |
Pointer to the next free space in delimiters_. |
Tokenizer.
Tokenizes a buffer. Allows application to set delimiters and preserve designators. Does not allow special characters, yet (e.g., printf ("\"like a quoted string"")).
Definition at line 37 of file Tokenizer_T.h.
anonymous enum |
Definition at line 147 of file Tokenizer_T.h.
{ MAX_DELIMITERS=16, MAX_PRESERVES=16 };
ACE_Tokenizer_T< ACE_CHAR_T >::ACE_Tokenizer_T | ( | ACE_CHAR_T * | buffer | ) |
buffer will be parsed. Notice that ACE_Tokenizer_T will modify buffer if you use delimiter_replace
or preserve_designators
to do character substitution.
Definition at line 15 of file Tokenizer_T.cpp.
: buffer_ (buffer), index_ (0), preserves_index_ (0), delimiter_index_ (0) { }
int ACE_Tokenizer_T< ACE_CHAR_T >::delimiter | ( | ACE_CHAR_T | d | ) |
d is a delimiter.
Example:
char buf[30]; ACE_OS::strcpy(buf, "William/Joseph/Hagins"); ACE_Tokenizer_T tok (buf); tok.delimiter ('/'); for (char *p = tok.next (); p; p = tok.next ()) cout << p << endl;
This will print out:
William/Joseph/Hagins Joseph/Hagins Hagins
Definition at line 25 of file Tokenizer_T.cpp.
{ if (delimiter_index_ == MAX_DELIMITERS) return -1; delimiters_[delimiter_index_].delimiter_ = d; delimiters_[delimiter_index_].replace_ = 0; ++delimiter_index_; return 0; }
int ACE_Tokenizer_T< ACE_CHAR_T >::delimiter_replace | ( | ACE_CHAR_T | d, | |
ACE_CHAR_T | replacement | |||
) |
d is a delimiter and, when found, will be replaced by replacement.
Example:
char buf[30]; ACE_OS::strcpy(buf, "William/Joseph/Hagins"); ACE_Tokenizer tok (buf); tok.delimiter_replace ('/', 0); for (char *p = tok.next (); p; p = tok.next ()) cout << p << endl;
This will print out:
William Joseph Hagins
Definition at line 38 of file Tokenizer_T.cpp.
{ // Make it possible to replace delimiters on-the-fly, e.g., parse // string until certain token count and then copy rest of the // original string. for (int i = 0; i < delimiter_index_; i++) if (delimiters_[i].delimiter_ == d) { delimiters_[i].replacement_ = replacement; delimiters_[i].replace_ = 1; return 0; } if (delimiter_index_ >= MAX_DELIMITERS) return -1; delimiters_[delimiter_index_].delimiter_ = d; delimiters_[delimiter_index_].replacement_ = replacement; delimiters_[delimiter_index_].replace_ = 1; ++delimiter_index_; return 0; }
int ACE_Tokenizer_T< ACE_CHAR_T >::is_delimiter | ( | ACE_CHAR_T | d, | |
int & | replace, | |||
ACE_CHAR_T & | r | |||
) | [protected] |
Returns 1 if d is a delimiter, 0 otherwise. If d should be replaced with r, replace is set to 1, otherwise 0.
Definition at line 80 of file Tokenizer_T.cpp.
{ replace = 0; for (int x = 0; x < delimiter_index_; x++) if (delimiters_[x].delimiter_ == d) { if (delimiters_[x].replace_) { r = delimiters_[x].replacement_; replace = 1; } return 1; } return 0; }
int ACE_Tokenizer_T< ACE_CHAR_T >::is_preserve_designator | ( | ACE_CHAR_T | start, | |
ACE_CHAR_T & | stop, | |||
int & | strip | |||
) | [protected] |
If start is a start preserve designator, returns 1 and sets stop to the stop designator. Returns 0 if start is not a preserve designator.
Definition at line 102 of file Tokenizer_T.cpp.
{ for (int x = 0; x < preserves_index_; x++) if (preserves_[x].start_ == start) { stop = preserves_[x].stop_; strip = preserves_[x].strip_; return 1; } return 0; }
ACE_CHAR_T * ACE_Tokenizer_T< ACE_CHAR_T >::next | ( | void | ) |
Returns the next token.
Definition at line 119 of file Tokenizer_T.cpp.
{ // Check if the previous pass was the last one in the buffer. if (index_ == -1) { index_ = 0; return 0; } // Check if a buffer has been passed if (!buffer_) { return 0; } ACE_CHAR_T replacement = 0; int replace; ACE_CHAR_T *next_token = 0; // Skip all leading delimiters. for (;;) { // Check for end of string. if (buffer_[index_] == '\0') { // If we hit EOS at the start, return 0. index_ = 0; return 0; } if (this->is_delimiter (buffer_[index_], replace, replacement)) ++index_; else break; } // When we reach this point, buffer_[index_] is a non-delimiter and // not EOS - the start of our next_token. next_token = buffer_ + index_; // A preserved region is it's own token. ACE_CHAR_T stop; int strip; if (this->is_preserve_designator (buffer_[index_], stop, strip)) { while (++index_) { if (buffer_[index_] == '\0') { index_ = -1; goto EXIT_LABEL; } if (buffer_[index_] == stop) break; } if (strip) { // Skip start preserve designator. next_token += 1; // Zap the stop preserve designator. buffer_[index_] = '\0'; // Increment to the next token. ++index_; } goto EXIT_LABEL; } // Step through finding the next delimiter or EOS. for (;;) { // Advance pointer. ++index_; // Check for delimiter. if (this->is_delimiter (buffer_[index_], replace, replacement)) { // Replace the delimiter. if (replace != 0) buffer_[index_] = replacement; // Move the pointer up and return. ++index_; goto EXIT_LABEL; } // A preserve designator is NESTED inside this token // We can't strip such preserve designators, just skip // over them so that delimiters nested within arn't seen. if (this->is_preserve_designator (buffer_[index_], stop, strip)) { ++index_; // Skip starting preserve_designator while (('\0' != buffer_[index_]) && (stop != buffer_[index_])) ++index_; // Skip enclosed character } // Check for end of string. if (buffer_[index_] == '\0') { index_ = -1; goto EXIT_LABEL; } } EXIT_LABEL: return next_token; }
int ACE_Tokenizer_T< ACE_CHAR_T >::preserve_designators | ( | ACE_CHAR_T | start, | |
ACE_CHAR_T | stop, | |||
int | strip = 1 | |||
) |
Extract string between a pair of designator characters. For instance, quotes, or '(' and ')'. start specifies the begin designator. stop specifies the end designator. strip If strip == 1, then the preserve designators will be stripped from the tokens returned by next.
Example with strip = 0:
char buf[30]; ACE_OS::strcpy(buf, "William(Joseph)Hagins"); ACE_Tokenizer tok (buf); tok.preserve_designators ('(', ')', 0); for (char *p = tok.next (); p; p = tok.next ()) cout << p << endl;
This will print out:
William(Joseph)Hagins (Joseph)Hagins )Hagins
Example with strip = 1:
char buf[30]; ACE_OS::strcpy(buf, "William(Joseph)Hagins"); ACE_Tokenizer tok (buf); tok.preserve_designators ('(', ')', 1); for (char *p = tok.next (); p; p = tok.next ()) cout << p << endl;
This will print out:
William Joseph Hagins
Definition at line 64 of file Tokenizer_T.cpp.
{ if (preserves_index_ == MAX_PRESERVES) return -1; preserves_[preserves_index_].start_ = start; preserves_[preserves_index_].stop_ = stop; preserves_[preserves_index_].strip_ = strip; ++preserves_index_; return 0; }
ACE_CHAR_T* ACE_Tokenizer_T< ACE_CHAR_T >::buffer_ [protected] |
Definition at line 164 of file Tokenizer_T.h.
int ACE_Tokenizer_T< ACE_CHAR_T >::delimiter_index_ [protected] |
Pointer to the next free space in delimiters_.
Definition at line 224 of file Tokenizer_T.h.
Delimiter_Entry ACE_Tokenizer_T< ACE_CHAR_T >::delimiters_[MAX_DELIMITERS] [protected] |
The tokenizer allows MAX_DELIMITERS number of delimiters.
Definition at line 221 of file Tokenizer_T.h.
int ACE_Tokenizer_T< ACE_CHAR_T >::index_ [protected] |
Definition at line 165 of file Tokenizer_T.h.
Preserve_Entry ACE_Tokenizer_T< ACE_CHAR_T >::preserves_[MAX_PRESERVES] [protected] |
The application can specify MAX_PRESERVES preserve designators.
Definition at line 193 of file Tokenizer_T.h.
int ACE_Tokenizer_T< ACE_CHAR_T >::preserves_index_ [protected] |
Pointer to the next free spot in preserves_.
Definition at line 196 of file Tokenizer_T.h.