[antlr-interest] Recognizing syntax errors with C#

pragmaik contact at maik-schmidt.de
Mon Sep 26 02:02:05 PDT 2011


I have written a grammar for a small subset of C and my parser does not work
reliably, that is sometimes it reports syntax errors and sometimes it
doesn't. For example, my grammar insists on variable initialization:

bool x; // This is not allowed.
bool y = true; // This is allowed

My problem is that the parser emits an error message for the program above.
But if I simply switch the lines like so:

bool y = true; // This is allowed
bool x; // This is not allowed.

the parser happily creates an AST for the first statement and simply ignores
the second one without noticing me about the syntax error. 

What am I doing wrong? My grammar looks as follows:


grammar MyGrammar;

options {
    language = CSharp3;
    output = AST;
    ASTLabelType = MyAST;
}

tokens {
    VAR_DECL;
	ARG_DECL;
	METHOD_DECL;
	ASSIGN = '=';
	EXPR;
	ELIST;
	BLOCK;
	CALL;
	UNARY_MINUS;
	UNARY_NOT;
}

@lexer::namespace{MyGrammar}
@parser::namespace{MyGrammar}

/******************************************************************************
 *                             Parser section

*****************************************************************************/

public
compilationUnit
    :    (methodDeclaration | variableDeclaration)+
	;

methodDeclaration
    :    returnType IDENTIFIER '(' (formalParameter (',' formalParameter)*)?
')' block -> ^(METHOD_DECL returnType IDENTIFIER formalParameter* block)
    ;

formalParameter
    :    type IDENTIFIER -> ^(ARG_DECL type IDENTIFIER)
    ;

variableDeclaration
    :    type IDENTIFIER '=' expression ';' -> ^(VAR_DECL type IDENTIFIER
expression)
    ;

block 
    :    '{' (statement)* '}' -> ^(BLOCK statement*)
    ;

statement
options { backtrack=true; }
    :    block
	|    variableDeclaration
    |    lhs '=' expression ';' -> ^('=' lhs expression)
	|    'return' expression? ';' -> ^('return' expression?)
	|    'if' '(' expression ')' b1=block
	     ('else' b2=block -> ^('if' expression $b1 $b2)
		 |                -> ^('if' expression $b1)
		 )
    |    postfixExpression ';' -> ^(EXPR postfixExpression)
	|	 ';'!
	;

lhs :    postfixExpression -> ^(EXPR postfixExpression)
    ;

expressionList
    :    expr (',' expr)* -> ^(ELIST expr+)
    |    -> ELIST
    ;

expression
    :    expr -> ^(EXPR expr)
    ;

expr:    logicalOrExpression
    ;

logicalOrExpression
    :    logicalAndExpression ('or'^ logicalAndExpression)*
	;

logicalAndExpression
    :    equalityExpression ('and'^ equalityExpression)*
	;

equalityExpression
    :    relationalExpression (('!='^ | '=='^) relationalExpression)*
    ;

relationalExpression
    :    additiveExpression (('<'^ | '>'^ | '<='^ | '>='^ )
additiveExpression)*
    ;

additiveExpression
    :    multiplicativeExpression (('+'^ | '-'^) multiplicativeExpression)*
    ;

multiplicativeExpression
    :    unaryExpression (('*'^ | '/'^) unaryExpression)*
    ;

unaryExpression
    :    '-' unaryExpression -> ^(UNARY_MINUS unaryExpression)
    |    '+' unaryExpression -> unaryExpression
    |    '!' unaryExpression -> ^(UNARY_NOT unaryExpression)
    |    postfixExpression
    ;

postfixExpression
    :    (atom -> atom)
         (
            '(' expressionList ')' -> ^(CALL["CALL"] $postfixExpression
expressionList)
         )*              
    ;           

atom:    IDENTIFIER
	|	 literal
	|	 '(' expr ')' -> expr
	;

literal
    :    INTLITERAL
    |    LONGLITERAL
    |    DOUBLELITERAL
    |    STRINGLITERAL
    |    'true'
    |    'false'
    ;

returnType
    :    type
	|    'void'
	;

type
    :    primitiveType
	;

primitiveType
    :    'int'
	|    'long'
	|    'double'
	|    'string'
	|    'bool'
	;

/******************************************************************************
 *                               Lexer section

*****************************************************************************/

 TRUE
    :    'true'
    ;

 FALSE
    :    'false'
    ;

LONGLITERAL
    :    IntegerNumber LongSuffix
    ;

INTLITERAL
    :    IntegerNumber
	;

fragment
IntegerNumber
    :    '0'
    |	 '1'..'9' ('0'..'9')*
    |	 '0' ('0'..'7')+
    |    HexPrefix HexDigit+        
    ;

fragment
HexPrefix
    :    '0x' | '0X'
    ;
        
fragment
HexDigit
    :   ('0'..'9'|'a'..'f'|'A'..'F')
    ;

fragment
LongSuffix
    :   'l' | 'L'
    ;

fragment
NonIntegerNumber
    :   ('0' .. '9')+ '.' ('0' .. '9')* Exponent?  
    |   '.' ( '0' .. '9' )+ Exponent?  
    |   ('0' .. '9')+ Exponent  
    |   ('0' .. '9')+ 
    |   
        HexPrefix (HexDigit )* 
        (    () 
        |    ('.' (HexDigit )* ) 
        ) 
        ( 'p' | 'P' ) 
        ( '+' | '-' )? 
        ( '0' .. '9' )+
        ;
        
fragment 
Exponent    
    :   ( 'e' | 'E' ) ( '+' | '-' )? ( '0' .. '9' )+ 
    ;
    
fragment
DoubleSuffix
    :   'd' | 'D'
    ;
        
DOUBLELITERAL
    :   NonIntegerNumber DoubleSuffix?
    ;

STRINGLITERAL
    :   '"' (EscapeSequence | ~( '\\' | '"' | '\r' | '\n' ) )* '"' 
    ;

fragment
EscapeSequence 
    :   '\\' (
                 'b' 
             |   't' 
             |   'n' 
             |   'f' 
             |   'r' 
             |   '\"' 
             |   '\'' 
             |   '\\' 
             |   ('0'..'3') ('0'..'7') ('0'..'7')
             |   ('0'..'7') ('0'..'7') 
             |   ('0'..'7')
             )          
    ;     

IDENTIFIER	:	('a'..'z' |'A'..'Z' |'_' ) ('a'..'z' |'A'..'Z' |'_' |'0'..'9'
)* ;

WS  :   (' ' | '\t' | '\n' | '\r')+ { $channel = 99; } ;

COMMENT
    :   '/*' (options {greedy=false;} : . )*  '*/' { $channel = 99; }
    ;

LINE_COMMENT
    :   '//' ~('\n'|'\r')*  ('\r\n' | '\r' | '\n')  { $channel = 99; }
    |   '//' ~('\n'|'\r')*  { $channel = 99; } // A line comment could
appear at the end of the file without CR/LF
    ;

ANYCHAR : . ;


Maik


--
View this message in context: http://antlr.1301665.n2.nabble.com/Recognizing-syntax-errors-with-C-tp6831210p6831210.html
Sent from the ANTLR mailing list archive at Nabble.com.


More information about the antlr-interest mailing list