[antlr-interest] Representing state in lexer
cgodfrey86
cgodfrey at epnet.com
Wed May 14 06:37:06 PDT 2003
Hello,
I am trying to write a grammar file which recognizes a subset of
tokens only if in a specific state.
For example AND is recognized as token AND_OP if NOT appearing within
quotes. If appearing within quotes, AND is recognized as a PATTERN
token. I've included the grammar file which I have defined. Any
suggestions as to what I am doing wrong would be appreciated.
When I run a test program using the generated lexer, tokens are
recognized properly when appearing in quotes:
"WAR AND PEACE";
*************************************************
> lexer mQUOTE; c=="
< lexer mQUOTE; c==w
Token: [""",<17>,line=1,col=1]
Token Type: 17
Token Text: "
> lexer mTERM; c==w
> lexer mALLOWCHARS; c==w
< lexer mALLOWCHARS; c==a
> lexer mALLOWCHARS; c==a
< lexer mALLOWCHARS; c==r
> lexer mALLOWCHARS; c==r
< lexer mALLOWCHARS; c==
< lexer mTERM; c==
Token: ["WAR",<16>,line=1,col=2]
Token Type: 16
Token Text: WAR
> lexer mWS; c==
< lexer mWS; c==a
> lexer mTERM; c==a
> lexer mALLOWCHARS; c==a
< lexer mALLOWCHARS; c==n
> lexer mALLOWCHARS; c==n
< lexer mALLOWCHARS; c==d
> lexer mALLOWCHARS; c==d
< lexer mALLOWCHARS; c==
< lexer mTERM; c==
Token: ["AND",<16>,line=1,col=6]
Token Type: 16
Token Text: AND
> lexer mWS; c==
< lexer mWS; c==p
> lexer mTERM; c==p
> lexer mALLOWCHARS; c==p
< lexer mALLOWCHARS; c==e
> lexer mALLOWCHARS; c==e
< lexer mALLOWCHARS; c==a
> lexer mALLOWCHARS; c==a
< lexer mALLOWCHARS; c==c
> lexer mALLOWCHARS; c==c
< lexer mALLOWCHARS; c==e
> lexer mALLOWCHARS; c==e
< lexer mALLOWCHARS; c=="
< lexer mTERM; c=="
Token: ["PEACE",<16>,line=1,col=10]
Token Type: 16
Token Text: PEACE
> lexer mQUOTE; c=="
< lexer mQUOTE; c==;
Token: [""",<17>,line=1,col=15]
Token Type: 17
Token Text: "
> lexer mSEMI; c==;
< lexer mSEMI; c==
Token: [";",<26>,line=1,col=16]
Token Type: 26
Token Text: ;
done lexing...
*************************************************
When appearing without quotes, tokens are not recognized as expected:
WAR AND PEACE;
*************************************************
> lexer mTERM; c==w
> lexer mWS; c==r
< lexer mWS; c==r
< lexer mTERM; c==w
exception: line 1:1: unexpected char: 'w'
*************************************************
AND PEACE;
*************************************************
> lexer mTERM; c==a
< lexer mTERM; c==
Token: ["AND",<6>,line=1,col=1]
Token Type: 6
Token Text: AND
> lexer mWS; c==
< lexer mWS; c==p
> lexer mTERM; c==p
> lexer mWS; c==a
< lexer mWS; c==a
< lexer mTERM; c==p
exception: line 1:5: unexpected char: 'p'
*************************************************
options
{
language = "CSharp";
}
class UserLexer extends Lexer;
options {
k=3;
caseSensitive=false;
caseSensitiveLiterals=false;
}
tokens {
S_TAG;
OR_OP;
AND_OP;
NOT_OP;
GT_OP;
GE_OP;
LT_OP;
LE_OP;
EQ_OP;
DASH;
W_OP;
N_OP;
PATTERN;
}
{
public bool isQuoted = false;
}
QUOTE : '"' {if (this.isQuoted) {this.isQuoted = false;} else
{this.isQuoted = true;} };
OPEN_PAREN : '(';
CLOSE_PAREN : ')';
TERM :
{!this.isQuoted}?
(
("gt")=> "gt"
{$setType(GT_OP);}
| (">")=> ">"
{$setType(GT_OP);}
|("ge")=> "ge"
{$setType(GE_OP);}
|(">=")=> ">="
{$setType(GE_OP);}
|("lt")=>"lt"
{$setType(LT_OP);}
|("<")=>"<"
{$setType(LT_OP);}
|("le")=>"le"
{$setType(LE_OP);}
|("<=")=>"<="
{$setType(LE_OP);}
|("eq")=>"eq"
{$setType(EQ_OP);}
|("=")=>"="
{$setType(EQ_OP);}
|("-")=>"-"
{$setType(DASH);}
| ("or") => "or"
{$setType(OR_OP);}
| ("and") => "and"
{$setType(AND_OP);}
| ("not") => "not"
{$setType(NOT_OP);}
|(('a'..'z')('a'..'z') WS) => ('a'..'z')('a'..'z')
{
$setType(S_TAG);
}
| ('w'INT)=>'w'INT
{$setType(W_OP);}
| ('n'INT)=>'n'INT
{$setType(N_OP);}
)
|
(ALLOWCHARS)+
{$setType(PATTERN);}
;
protected
REAL : INT'.'INT;
protected
DIGIT : ('0'..'9');
protected
INT : (DIGIT)+;
protected
ALLOWCHARS : ~('"'|'('|')'|'\n'|' '|'\r'|'\t'|';');
WS : (
options {
generateAmbigWarnings=false;
}
: ' '
| '\t'
| '\n' { newline(); }
| "\r\n" { newline(); }
| '\r' { newline(); }
)+
{ $setType(Token.SKIP); }
;
// semi is made special for test here only
SEMI : ';';
Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/
More information about the antlr-interest
mailing list