[antlr-interest] Trouble getting fuzzy lexer to recognize ID followed by asterisk
Rick Mann
rmann at latencyzero.com
Wed Mar 7 20:06:20 PST 2007
I've got a fuzzy lexer to extract a few interesting symbols from .h
and .d files (C headers and D modules, both similar in syntax). It
does okay getting a function declaration like:
extern Boolean
InvokeNavPreviewUPP(
NavCBRecPtr callBackParms,
void* callBackUD,
NavPreviewUPP userUPP) SOME_MACRO;
But fails if one of the argument types has whitespace before the
asterisk. No matter what I do, I can't seem to get it to work. The
rule that matches on type looks like this:
fragment
TYPE: QID ('[]'| '*'+)?
;
I want to change it to:
fragment
TYPE: QID ('[]'| (WS? '*')+)?
;
But then it can't even match the void* (with no space) or even void
without an asterisk. The entire grammar is reproduced below. I'd sure
appreciate some help. Thanks!
--
Rick
lexer grammar FuzzyD;
options
{
filter=true;
}
MODULE
: 'module' WS name=QIDStar WS? ';'
{System.out.println("Module:\t"+$name.text);}
;
IMPORT
: 'import' WS name=QIDStar WS? ';'
;
/** Avoids having "return foo;" match as a field */
RETURN
: 'return' (options {greedy=false;}:.)* ';'
;
CLASS
: 'class' WS name=ID WS?
(':' WS QID WS? (',' WS? QID WS?)*)? '{'
{System.out.println("Class:\t"+$name.text);}
;
STRUCT
: 'struct' WS name=ID WS?
'{'
{System.out.println("Struct:\t"+$name.text);}
;
TYPEDEF
: ('typedef'|'alias') WS baseName=TYPE WS name=ID WS? ';'
{System.out.println("Typedef:\t" + $baseName.text + "\t" +
$name.text);}
;
METHDECL
: TYPE WS name=ID WS? '(' WS? ( ARG WS? (',' WS? COMMENT? WS?
ARG WS?)* )? ')' WS?
('throws' WS QID WS? (',' WS? QID WS?)*)? WS? ID? ';'
{System.out.println("MethodDecl:\t"+$name.text);}
;
METHOD
: TYPE WS name=ID WS? '(' WS? ( ARG WS? (',' WS? ARG WS?)* )?
')' WS?
('throws' WS QID WS? (',' WS? QID WS?)*)? '{'
{System.out.println("Method:\t"+$name.text);}
;
FIELD
: TYPE WS name=ID '[]'? WS? (';'|'=')
// {System.out.println("Field:\t" + $name.text);}
;
STAT: ('if'|'while'|'switch'|'for') WS? '(' ;
CALL
: name=QID WS? '('
// {/*ignore if this/super */ System.out.println("found call "+
$name.text);}
;
COMMENT
: '/*' (options {greedy=false;} : . )* '*/'
// {System.out.println("found comment "+getText());}
;
SL_COMMENT
: '//' (options {greedy=false;} : . )* '\n'
// {System.out.println("found // comment "+getText());}
;
STRING
: '"' (options {greedy=false;}: ESC | .)* '"'
;
CHAR
: '\'' (options {greedy=false;}: ESC | .)* '\''
;
WS : (' '|'\t'|'\n'|'\r')+
;
fragment
QID : ID ('.' ID)*
;
/** QID cannot see beyond end of token so using QID '.*'? somewhere
won't
* ever match since k=1 lookahead in the QID loop of '.' will make it
loop.
* I made this rule to compensate.
*/
fragment
QIDStar
: ID ('.' ID)* '.*'?
;
fragment
TYPE: QID ('[]'| '*'+)?
;
fragment
ARG : (('in' | 'out' | 'inout' | 'const') WS)? TYPE WS ID
// { System.out.println("Found argument " + $ID.text + " of type " +
$TYPE.text); }
;
fragment
ID : ('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
;
fragment
ESC : '\\' ('"'|'\''|'\\')
;
More information about the antlr-interest
mailing list