public void nextLexeme() {
int state = 0;
int commentLevel = 0;
char c;
while (state >= 0) {
switch (state) {
// Looking for a token, quoted string, or tspecial
case 0:
if (myCurrentIndex >= mySourceLength) {
myLexemeType = EOF_LEXEME;
myLexemeBeginIndex = mySourceLength;
myLexemeEndIndex = mySourceLength;
state = -1;
} else if (Character.isWhitespace
(c = mySource.charAt (myCurrentIndex ++))) {
state = 0;
} else if (c == '\"') {
myLexemeType = QUOTED_STRING_LEXEME;
myLexemeBeginIndex = myCurrentIndex;
state = 1;
} else if (c == '(') {
++ commentLevel;
state = 3;
} else if (c == '/' || c == ';' || c == '=' ||
c == ')' || c == '< ' || c == ' >' ||
c == '@' || c == ',' || c == ':' ||
c == '\\' || c == '[' || c == ']' ||
c == '?') {
myLexemeType = TSPECIAL_LEXEME;
myLexemeBeginIndex = myCurrentIndex - 1;
myLexemeEndIndex = myCurrentIndex;
state = -1;
} else {
myLexemeType = TOKEN_LEXEME;
myLexemeBeginIndex = myCurrentIndex - 1;
state = 5;
}
break;
// In a quoted string
case 1:
if (myCurrentIndex >= mySourceLength) {
myLexemeType = ILLEGAL_LEXEME;
myLexemeBeginIndex = mySourceLength;
myLexemeEndIndex = mySourceLength;
state = -1;
} else if ((c = mySource.charAt (myCurrentIndex ++)) == '\"') {
myLexemeEndIndex = myCurrentIndex - 1;
state = -1;
} else if (c == '\\') {
state = 2;
} else {
state = 1;
}
break;
// In a quoted string, backslash seen
case 2:
if (myCurrentIndex >= mySourceLength) {
myLexemeType = ILLEGAL_LEXEME;
myLexemeBeginIndex = mySourceLength;
myLexemeEndIndex = mySourceLength;
state = -1;
} else {
++ myCurrentIndex;
state = 1;
} break;
// In a comment
case 3: if (myCurrentIndex >= mySourceLength) {
myLexemeType = ILLEGAL_LEXEME;
myLexemeBeginIndex = mySourceLength;
myLexemeEndIndex = mySourceLength;
state = -1;
} else if ((c = mySource.charAt (myCurrentIndex ++)) == '(') {
++ commentLevel;
state = 3;
} else if (c == ')') {
-- commentLevel;
state = commentLevel == 0 ? 0 : 3;
} else if (c == '\\') {
state = 4;
} else { state = 3;
}
break;
// In a comment, backslash seen
case 4:
if (myCurrentIndex >= mySourceLength) {
myLexemeType = ILLEGAL_LEXEME;
myLexemeBeginIndex = mySourceLength;
myLexemeEndIndex = mySourceLength;
state = -1;
} else {
++ myCurrentIndex;
state = 3;
}
break;
// In a token
case 5:
if (myCurrentIndex >= mySourceLength) {
myLexemeEndIndex = myCurrentIndex;
state = -1;
} else if (Character.isWhitespace
(c = mySource.charAt (myCurrentIndex ++))) {
myLexemeEndIndex = myCurrentIndex - 1;
state = -1;
} else if (c == '\"' || c == '(' || c == '/' ||
c == ';' || c == '=' || c == ')' ||
c == '< ' || c == ' >' || c == '@' ||
c == ',' || c == ':' || c == '\\' ||
c == '[' || c == ']' || c == '?') {
-- myCurrentIndex;
myLexemeEndIndex = myCurrentIndex;
state = -1;
} else {
state = 5;
}
break;
}
}
}
|