[antlr-interest] How to use predicate or workaround the bug ?

Ilia Kantor ilia at obnovlenie.ru
Tue Jan 2 06:34:56 PST 2007


I'm using antlr 3b6 build from Tue Jan  2 14:28:20 UTC 2007. Also tried 3b5.

Trying to make use of predicate, but fail.. Is there a workaround ?

Here I try to rewrite 
'LCURL  exprs? RCURL' into 'TEXT exprs? TEXT' in common case and strip 
LCURL/RCURL if this LCURL/RCURL block is outmost, that is: if LCURL is first 
token.

==================== rule with predicate=====
curly_block
	:	
	 {input.index()==0}? LCURL exprs? RCURL -> exprs? |
     LCURL exprs? RCURL -> TEXT[$LCURL] exprs? TEXT[$RCURL]       
===================== error  ===========


ANTLR Parser Generator  Version 3.0b6 (??)  1989-2007
Exception in thread "main" java.lang.StackOverflowError
        at java.util.HashMap$KeyIterator.<init>(HashMap.java:875)
        at java.util.HashMap$KeyIterator.<init>(HashMap.java:875)
        at java.util.HashMap.newKeyIterator(HashMap.java:889)
        at java.util.HashMap$KeySet.iterator(HashMap.java:921)
        at java.util.HashSet.iterator(HashSet.java:154)
        at 
org.antlr.analysis.DFAState.getGatedPredicatesInNFAConfigurations(DFAState.java:695)
        at 
org.antlr.codegen.CodeGenerator.canGenerateSwitch(CodeGenerator.java:1119)
        at 
org.antlr.codegen.ACyclicDFACodeGenerator.walkFixedDFAGeneratingStateMachine(ACyclicDFACodeGenerator.java:72)
        at 
org.antlr.codegen.ACyclicDFACodeGenerator.walkFixedDFAGeneratingStateMachine(ACyclicDFACodeGenerator.java:146)
=..................... the latest line repeats many times ..........=



======= Full grammar ===========


/* putting this to tokens { .. } causes antlrworks bugs like "...no lexer rule 
for IF..." */
IF	:	{false}? .;
THEN	:	{false}? .;
ELSE	:	{false}? .;
EVAL	:	{false}? .;
TEXT	:	{false}? .;
RULE_PROPERTY	:	{false}? .;
RULE_PART	:	{false}? .;

LCURL	:	'{';
RCURL	:	'}';

RULE_CALL
	:	'#';

CONTROL
	:	'~';
	
SEMI:	';';

DOT	:	'.';

MINUS
	:	'-';

GT	:	'>';

SPECIAL_CHAR
	:	'`' | '!' | '@' | '$' | '%' | '^' | '&' | '*' | '(' | ')' |  
	'+' | '=' | '[' | ']' | ':' | '\'' | '"' | '\\' | '|' | ',' | '<' |
	 '/' | '?';
	


fragment WS_CHAR  :
       (' '|'\r'|'\t'|'\u000C'|'\n' )
    ;
    
WS	:	WS_CHAR+;

// not SPECIAL_CHAR not WS_CHAR
WORD:	
{
	// check if previous token was CONTROL
	Boolean afterControl = input.index()>0 && input.LT(-1)=='~';
}
	(~ ('`' | '!' | '@' | '$' | '%' | '^' | '&' | '*' | '(' | ')' |  
	'+' | '=' | '[' | ']' | ':' | '\'' | '"' | '\\' | '|' | ',' | '<' | '/' | '?'
	| '{' 
	| '~'
	| '}'
	| '#'
	| ';'
	| '.'
	| '-'
	| '>'	
	| ' '|'\r'|'\t'|'\u000C'|'\n' ) )+ 
{
    if (afterControl) {
    	// output WORDs as commands if after control
		if (getText().equalsIgnoreCase("if")) $type=IF;
		else if (getText().equalsIgnoreCase("else")) $type=ELSE;
		else if (getText().equalsIgnoreCase("then")) $type=THEN;
	}
}
;



document
	:	 exprs;


exprs:
        (expr)+
;


expr:
(
        expr_no_semi -> expr_no_semi | 
        SEMI -> TEXT[$SEMI]
)
;

exprs_no_semi
        :       (expr_no_semi)+
;


expr_no_semi 
        :
                (expr_simple) | (curly_block)
        ;



exprs_simple 
        :       (expr_simple)+
        ;

/* expression without curl at start and ; */
expr_simple 
        :
        command   
        | rule_call 
        | text
;


command:
         command_eval |
         command_if  
;


command_if
       :
        CONTROL IF LCURL exprs? command_if_part_then command_if_part_else? 
RCURL ->
        ^(IF exprs? command_if_part_then command_if_part_else?)
;


command_if_part_else:
        CONTROL ELSE exprs? -> ^(ELSE exprs?)
;

command_if_part_then 
        : CONTROL THEN exprs? -> ^(THEN exprs?)
;



command_eval:
        CONTROL LCURL exprs? RCURL -> ^(EVAL exprs?)
;



rule_call: RULE_CALL WORD -> ^(RULE_CALL WORD) |
	RULE_CALL LCURL WORD rule_long_def_part* RCURL -> ^(RULE_CALL WORD 
rule_long_def_part*)
;

rule_long_def_part
	:	MINUS GT WORD -> ^(RULE_PROPERTY WORD) | 
		DOT WORD -> ^(RULE_PART WORD)
	;
	

curly_block
	:	
	 {true}? LCURL exprs? RCURL -> exprs? 
	 |  LCURL exprs? RCURL -> TEXT[$LCURL] exprs? TEXT[$RCURL]       
;

text:
	DOT -> TEXT[$DOT] | MINUS -> TEXT[$MINUS] | WORD -> TEXT[$WORD] 
	| WS -> TEXT[$WS] | GT -> TEXT[$GT] | SPECIAL_CHAR -> TEXT[$SPECIAL_CHAR]
;





More information about the antlr-interest mailing list