Tokenizer. More...
#include <Tokenizer_T.h>

Classes | |
| class | Delimiter_Entry |
| Delimiter Entry. More... | |
| class | Preserve_Entry |
| Preserve Entry. More... | |
Public Types | |
| enum | { MAX_DELIMITERS = 16, MAX_PRESERVES = 16 } |
Public Member Functions | |
| ACE_Tokenizer_T (ACE_CHAR_T *buffer) | |
| int | delimiter (ACE_CHAR_T d) |
| int | delimiter_replace (ACE_CHAR_T d, ACE_CHAR_T replacement) |
| int | preserve_designators (ACE_CHAR_T start, ACE_CHAR_T stop, int strip=1) |
| ACE_CHAR_T * | next (void) |
| Returns the next token. | |
Protected Member Functions | |
| int | is_delimiter (ACE_CHAR_T d, int &replace, ACE_CHAR_T &r) |
| int | is_preserve_designator (ACE_CHAR_T start, ACE_CHAR_T &stop, int &strip) |
Protected Attributes | |
| ACE_CHAR_T * | buffer_ |
| int | index_ |
| Preserve_Entry | preserves_ [MAX_PRESERVES] |
| The application can specify MAX_PRESERVES preserve designators. | |
| int | preserves_index_ |
| Pointer to the next free spot in preserves_. | |
| Delimiter_Entry | delimiters_ [MAX_DELIMITERS] |
| The tokenizer allows MAX_DELIMITERS number of delimiters. | |
| int | delimiter_index_ |
| Pointer to the next free space in delimiters_. | |
Tokenizer.
Tokenizes a buffer. Allows application to set delimiters and preserve designators. Does not allow special characters, yet (e.g., printf ("\"like a quoted string"")).
Definition at line 37 of file Tokenizer_T.h.
| anonymous enum |
Definition at line 147 of file Tokenizer_T.h.
{
MAX_DELIMITERS=16,
MAX_PRESERVES=16
};
| ACE_Tokenizer_T< ACE_CHAR_T >::ACE_Tokenizer_T | ( | ACE_CHAR_T * | buffer | ) |
buffer will be parsed. Notice that ACE_Tokenizer_T will modify buffer if you use delimiter_replace or preserve_designators to do character substitution.
Definition at line 15 of file Tokenizer_T.cpp.
: buffer_ (buffer), index_ (0), preserves_index_ (0), delimiter_index_ (0) { }
| int ACE_Tokenizer_T< ACE_CHAR_T >::delimiter | ( | ACE_CHAR_T | d | ) |
d is a delimiter.
Example:
char buf[30];
ACE_OS::strcpy(buf, "William/Joseph/Hagins");
ACE_Tokenizer_T tok (buf);
tok.delimiter ('/');
for (char *p = tok.next (); p; p = tok.next ())
cout << p << endl;
This will print out:
William/Joseph/Hagins
Joseph/Hagins
Hagins Definition at line 25 of file Tokenizer_T.cpp.
{
if (delimiter_index_ == MAX_DELIMITERS)
return -1;
delimiters_[delimiter_index_].delimiter_ = d;
delimiters_[delimiter_index_].replace_ = 0;
++delimiter_index_;
return 0;
}
| int ACE_Tokenizer_T< ACE_CHAR_T >::delimiter_replace | ( | ACE_CHAR_T | d, | |
| ACE_CHAR_T | replacement | |||
| ) |
d is a delimiter and, when found, will be replaced by replacement.
Example:
char buf[30];
ACE_OS::strcpy(buf, "William/Joseph/Hagins");
ACE_Tokenizer tok (buf);
tok.delimiter_replace ('/', 0);
for (char *p = tok.next (); p; p = tok.next ())
cout << p << endl;
This will print out:
William
Joseph
Hagins Definition at line 38 of file Tokenizer_T.cpp.
{
// Make it possible to replace delimiters on-the-fly, e.g., parse
// string until certain token count and then copy rest of the
// original string.
for (int i = 0; i < delimiter_index_; i++)
if (delimiters_[i].delimiter_ == d)
{
delimiters_[i].replacement_ = replacement;
delimiters_[i].replace_ = 1;
return 0;
}
if (delimiter_index_ >= MAX_DELIMITERS)
return -1;
delimiters_[delimiter_index_].delimiter_ = d;
delimiters_[delimiter_index_].replacement_ = replacement;
delimiters_[delimiter_index_].replace_ = 1;
++delimiter_index_;
return 0;
}
| int ACE_Tokenizer_T< ACE_CHAR_T >::is_delimiter | ( | ACE_CHAR_T | d, | |
| int & | replace, | |||
| ACE_CHAR_T & | r | |||
| ) | [protected] |
Returns 1 if d is a delimiter, 0 otherwise. If d should be replaced with r, replace is set to 1, otherwise 0.
Definition at line 80 of file Tokenizer_T.cpp.
{
replace = 0;
for (int x = 0; x < delimiter_index_; x++)
if (delimiters_[x].delimiter_ == d)
{
if (delimiters_[x].replace_)
{
r = delimiters_[x].replacement_;
replace = 1;
}
return 1;
}
return 0;
}
| int ACE_Tokenizer_T< ACE_CHAR_T >::is_preserve_designator | ( | ACE_CHAR_T | start, | |
| ACE_CHAR_T & | stop, | |||
| int & | strip | |||
| ) | [protected] |
If start is a start preserve designator, returns 1 and sets stop to the stop designator. Returns 0 if start is not a preserve designator.
Definition at line 102 of file Tokenizer_T.cpp.
{
for (int x = 0; x < preserves_index_; x++)
if (preserves_[x].start_ == start)
{
stop = preserves_[x].stop_;
strip = preserves_[x].strip_;
return 1;
}
return 0;
}
| ACE_CHAR_T * ACE_Tokenizer_T< ACE_CHAR_T >::next | ( | void | ) |
Returns the next token.
Definition at line 119 of file Tokenizer_T.cpp.
{
// Check if the previous pass was the last one in the buffer.
if (index_ == -1)
{
index_ = 0;
return 0;
}
// Check if a buffer has been passed
if (!buffer_)
{
return 0;
}
ACE_CHAR_T replacement = 0;
int replace;
ACE_CHAR_T *next_token = 0;
// Skip all leading delimiters.
for (;;)
{
// Check for end of string.
if (buffer_[index_] == '\0')
{
// If we hit EOS at the start, return 0.
index_ = 0;
return 0;
}
if (this->is_delimiter (buffer_[index_],
replace,
replacement))
++index_;
else
break;
}
// When we reach this point, buffer_[index_] is a non-delimiter and
// not EOS - the start of our next_token.
next_token = buffer_ + index_;
// A preserved region is it's own token.
ACE_CHAR_T stop;
int strip;
if (this->is_preserve_designator (buffer_[index_],
stop,
strip))
{
while (++index_)
{
if (buffer_[index_] == '\0')
{
index_ = -1;
goto EXIT_LABEL;
}
if (buffer_[index_] == stop)
break;
}
if (strip)
{
// Skip start preserve designator.
next_token += 1;
// Zap the stop preserve designator.
buffer_[index_] = '\0';
// Increment to the next token.
++index_;
}
goto EXIT_LABEL;
}
// Step through finding the next delimiter or EOS.
for (;;)
{
// Advance pointer.
++index_;
// Check for delimiter.
if (this->is_delimiter (buffer_[index_],
replace,
replacement))
{
// Replace the delimiter.
if (replace != 0)
buffer_[index_] = replacement;
// Move the pointer up and return.
++index_;
goto EXIT_LABEL;
}
// A preserve designator is NESTED inside this token
// We can't strip such preserve designators, just skip
// over them so that delimiters nested within arn't seen.
if (this->is_preserve_designator (buffer_[index_],
stop,
strip))
{
++index_; // Skip starting preserve_designator
while (('\0' != buffer_[index_]) && (stop != buffer_[index_]))
++index_; // Skip enclosed character
}
// Check for end of string.
if (buffer_[index_] == '\0')
{
index_ = -1;
goto EXIT_LABEL;
}
}
EXIT_LABEL:
return next_token;
}
| int ACE_Tokenizer_T< ACE_CHAR_T >::preserve_designators | ( | ACE_CHAR_T | start, | |
| ACE_CHAR_T | stop, | |||
| int | strip = 1 | |||
| ) |
Extract string between a pair of designator characters. For instance, quotes, or '(' and ')'. start specifies the begin designator. stop specifies the end designator. strip If strip == 1, then the preserve designators will be stripped from the tokens returned by next.
Example with strip = 0:
char buf[30];
ACE_OS::strcpy(buf, "William(Joseph)Hagins");
ACE_Tokenizer tok (buf);
tok.preserve_designators ('(', ')', 0);
for (char *p = tok.next (); p; p = tok.next ())
cout << p << endl;
This will print out:
William(Joseph)Hagins
(Joseph)Hagins
)Hagins Example with strip = 1:
char buf[30];
ACE_OS::strcpy(buf, "William(Joseph)Hagins");
ACE_Tokenizer tok (buf);
tok.preserve_designators ('(', ')', 1);
for (char *p = tok.next (); p; p = tok.next ())
cout << p << endl;
This will print out:
William
Joseph
Hagins Definition at line 64 of file Tokenizer_T.cpp.
{
if (preserves_index_ == MAX_PRESERVES)
return -1;
preserves_[preserves_index_].start_ = start;
preserves_[preserves_index_].stop_ = stop;
preserves_[preserves_index_].strip_ = strip;
++preserves_index_;
return 0;
}
ACE_CHAR_T* ACE_Tokenizer_T< ACE_CHAR_T >::buffer_ [protected] |
Definition at line 164 of file Tokenizer_T.h.
int ACE_Tokenizer_T< ACE_CHAR_T >::delimiter_index_ [protected] |
Pointer to the next free space in delimiters_.
Definition at line 224 of file Tokenizer_T.h.
Delimiter_Entry ACE_Tokenizer_T< ACE_CHAR_T >::delimiters_[MAX_DELIMITERS] [protected] |
The tokenizer allows MAX_DELIMITERS number of delimiters.
Definition at line 221 of file Tokenizer_T.h.
int ACE_Tokenizer_T< ACE_CHAR_T >::index_ [protected] |
Definition at line 165 of file Tokenizer_T.h.
Preserve_Entry ACE_Tokenizer_T< ACE_CHAR_T >::preserves_[MAX_PRESERVES] [protected] |
The application can specify MAX_PRESERVES preserve designators.
Definition at line 193 of file Tokenizer_T.h.
int ACE_Tokenizer_T< ACE_CHAR_T >::preserves_index_ [protected] |
Pointer to the next free spot in preserves_.
Definition at line 196 of file Tokenizer_T.h.
1.7.0