[antlr-interest] Latest antlr (bug?) incorrect parsing

Ilia Kantor ilia at obnovlenie.ru
Wed Dec 27 13:39:38 PST 2006


I run my old grammars on antlr 3b5

Here is a rule:

text returns [String value] :
 (TEXT {$value = $TEXT.text;}) | (WS {$value = $WS.text;})

There is no substutution in second alt for $WS in code:

..... java code for first alt ......
TEXT14=(Token)input.LT(1);
.....
retval.value = TEXT14.getText();

..... java code for second alt ......
WS15=(Token)input.LT(1);
...........
retval.value =$WS.text;  // COMPILE ERROR
-----

There clearly should be WS15 instead of $WS in second alt.

Same thing happens with this rule:
expr_no_semi returns [StringRuleResult value]
	:	
		(expr_simple {$value=$expr_simple.value;}) | (curly_block 
{$value=$curly_block.value; }) 
	;

That is: $curly_block is output into java code just like $WS.



============ The whole grammar just in case anyone needs it to test ==========


grammar CommonRule;

options {
    output=AST;
}

@parser::header {
    package grammar.parser;
    
	import grammar.*;	
	import grammar.parser.*;
	import grammar.parser.exception.*;

    import org.apache.log4j.Logger;
    import java.util.Stack;
    import java.util.ArrayList;
}

@rulecatch { }

@parser::members {

    private static Logger logger = Logger.getLogger(CommonRuleParser.class);

    Stack<Integer> caseCountersStack = new Stack<Integer>();

    // checks condition, if its false then case syntax error
    protected void checkCaseCounter(Boolean cond, Token token) throws 
RuleRecognitionException {
    	if (!cond) {
    		throw new RuleRecognitionException("Case syntax error near "+token, 
token);
		}
    }
    
    protected void mismatch(IntStream input, int ttype, BitSet follow) throws 
RecognitionException {
        MismatchedTokenException mte = new MismatchedTokenException(ttype, 
input);
        throw mte;
    }
    
    protected RuleInterpreter ruleInterpreter;

    public void setRuleInterpreter(RuleInterpreter ruleInterpreter) {
        this.ruleInterpreter = ruleInterpreter;
    }
    
    

}



@lexer::header {
    package grammar.parser;	
    import grammar.*;
    
}

LCURL	:	'{';
RCURL	:	'}';

CONTROL	:	 '~';

IF	:	'If';
THEN:	'Then';
CASE:	'Case';
ELSE:	'Else';


TEXT:  (~('~' | ';' | '#' | '{' | '}' | ' '|'\r'|'\t'|'\u000C'|'\n'))*;


RULE_CALL : '#' ID;

WS  :
       (' '|'\r'|'\t'|'\u000C'|'\n' )+
    ;

SEMI	:  ';';

fragment ID : NAMECHAR+;


fragment NAMECHAR
    : ALPHA_NUM | '_'
    ;

fragment ALPHA_NUM
	:	DIGIT | LETTER_ENG;

fragment DIGIT
    :    '0'..'9'
    ;

fragment LETTER_ENG
    : 'a'..'z'
    | 'A'..'Z'
    ;
    
    

document returns [StringRuleResult value]: LCURL exprs { 
$value=$exprs.value; } RCURL
;


exprs returns [StringRuleResult value]
@init {
	String sum="";
}:
	(expr { sum = sum+$expr.value.getStringValue(); } )+
{
	$value = new StringRuleResult(sum);
	logger.info("exprs value "+$value);
}
;

exprs_no_semi returns [StringRuleResult value]
@init {
	String sum="";
}
	:	(e=expr_no_semi { sum = sum + $e.value.getStringValue(); } )+
{
	$value = new StringRuleResult(sum);
	logger.info("exprs_no_semi value "+$value);
}
;


exprs_simple returns [StringRuleResult value]
@init {
	String sum="";
}
	:	(e=expr_simple { sum = sum+$e.value.getStringValue(); } )+
{
	$value = new StringRuleResult(sum);
	logger.info("exprs_simple value "+$value);
}
	;
	
	
expr_no_semi returns [StringRuleResult value]
	:	
		(expr_simple {$value=$expr_simple.value;}) | (curly_block 
{$value=$curly_block.value; }) 
	;

/* expression without curl at start and ; */
expr_simple returns [StringRuleResult value]
	:
	(
	//command {$value=$command.value;} |
	rule_call {$value=$rule_call.value;} |
	text {$value=$text.value; } 
	)
	{
	logger.info("expr_simple value "+$value);
	}
;	
	


expr returns [StringRuleResult value]:
(
	(e=expr_no_semi {$value=$e.value;}) | (SEMI {$value=new 
StringRuleResult($SEMI.text);})
)

	;


rule_call returns [StringRuleResult value]:
	RULE_CALL
{
	$value = ruleInterpreter.doRule($RULE_CALL, $RULE_CALL.text.substring(1));
	logger.info("rule_call value " + $value);
}
;


curly_block returns [StringRuleResult value]:
	LCURL exprs RCURL
{
	$value = new StringRuleResult($LCURL.text + $exprs.value.getStringValue() + 
$RCURL.text);
	logger.info("curly_block value "+$value);
}
;


text returns [StringRuleResult value] :
 (TEXT {$value = new StringRuleResult($TEXT.text);}) | (WS {$value = new 
StringRuleResult($WS.text);})
{	
	logger.info("text value "+$value);
};














More information about the antlr-interest mailing list