[antlr-interest] Help with pesky Lexer determinism
Mark Bednarczyk
voytechs at yahoo.com
Fri Jun 10 09:44:50 PDT 2005
While I'm on a roll, I added the Ethernet address matching code
in forms:
XX:XX:XX:XX:XX:XX
dec:dec:dec:dec:dec
XX-XX-XX-XX-XX-XX
dec-dec-dec-dec-dec-dec
And the code if anyone is interested:
/* Disable some rules inherited from java.g */
protected COLON :;
protected IDENT :;
protected NUM_3DIGIT: ('0'..'9') (('0'..'9') ('0'..'9')?)?
;
protected NUM_HEX_4DIGIT: HEX_DIGIT ((HEX_DIGIT) ((HEX_DIGIT)
(HEX_DIGIT)?)?)?
NUM_INT
options {
testLiterals = true;
}
{boolean isDecimal=false; Token t=null; }
// IPv4 RULE
: (NUM_3DIGIT '.' NUM_3DIGIT '.')=>
(
NUM_3DIGIT '.' NUM_3DIGIT '.' NUM_3DIGIT '.'
NUM_3DIGIT
{ $setType(IP_V4); }
)
//
// MAC ADRESS RULE - exactly 6 COLON/DASH separated ints
//
| (NUM_HEX_4DIGIT (':'|'-') NUM_HEX_4DIGIT (':'|'-')
NUM_HEX_4DIGIT (':'|'-') NUM_HEX_4DIGIT (':'|'-')
NUM_HEX_4DIGIT (':'|'-') NUM_HEX_4DIGIT ~(':'))=>
(
NUM_HEX_4DIGIT (':'|'-') NUM_HEX_4DIGIT (':'|'-')
NUM_HEX_4DIGIT (':'|'-') NUM_HEX_4DIGIT (':'|'-')
NUM_HEX_4DIGIT (':'|'-') NUM_HEX_4DIGIT
) { $setType(MAC_ADDRESS); }
// IPv6 RULE
| (NUM_HEX_4DIGIT ':')=>
(
((NUM_HEX_4DIGIT ':')+ ':')=>
(
(NUM_HEX_4DIGIT ':')+ ':'
(NUM_HEX_4DIGIT (':' NUM_HEX_4DIGIT)*)?
) { $setType(IP_V6); }
| NUM_HEX_4DIGIT (':' NUM_HEX_4DIGIT)+
{ $setType(IP_V6); }
) { $setType(IP_V6); }
| (':' ':' NUM_HEX_4DIGIT)=>
':' ':' NUM_HEX_4DIGIT (':' NUM_HEX_4DIGIT)*
{ $setType(IP_V6); }
| ':' ':'
{ $setType(IP_V6); }
| ':'
{ $setType(COLON); }
// IDENT rule
| ('a'..'z'|'A'..'Z'|'_'|'$')
('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$')*
{ $setType(IDENT); }
// Number beginning with '.' rule
| '.' { $setType(DOT);}
( ('0'..'9')+ (EXPONENT)? (f1:FLOAT_SUFFIX
{t=f1;})?
{
if (t != null &&
t.getText().toUpperCase().indexOf('F')>=0) {
_ttype = NUM_FLOAT;
}
else {
_ttype = NUM_DOUBLE; // assume double
}
}
)?
// Number beginning with a 0 rule
| ( '0' {isDecimal = true;} // special case for just '0'
( ('x'|'X')
( //
hex
// the 'e'|'E' and float suffix stuff look
// like hex digits, hence the (...)+ doesn't
// know when to stop: ambig. ANTLR resolves
// it correctly by matching immediately. It
// is therefor ok to hush warning.
options {
warnWhenFollowAmbig=false;
}
: HEX_DIGIT
)+
| //float or double with leading zero
(('0'..'9')+ ('.'|EXPONENT|FLOAT_SUFFIX)) =>
('0'..'9')+
| ('0'..'7')+ //
octal
)?
// A regular number non-zero starting rule
| ('1'..'9') ('0'..'9')* {isDecimal=true;} //
non-zero decimal
)
( ('l'|'L') { _ttype = NUM_LONG; }
// only check to see if it's a float if looks like
decimal so far
| {isDecimal}?
( '.' ('0'..'9')* (EXPONENT)? (f2:FLOAT_SUFFIX
{t=f2;})?
| EXPONENT (f3:FLOAT_SUFFIX {t=f3;})?
| f4:FLOAT_SUFFIX {t=f4;}
)
{
if (t != null && t.getText().toUpperCase()
.indexOf('F') >= 0) {
_ttype = NUM_FLOAT;
}
else {
_ttype = NUM_DOUBLE; // assume double
}
}
)?
;
More information about the antlr-interest
mailing list