[antlr-interest] Recognizing syntax errors with C#

Mon Sep 26 02:06:31 PDT 2011

Hi Maik,

Try adding an EOF at the end of the entry point of your grammar:

compilationUnit
   :    (methodDeclaration | variableDeclaration)+ EOF
   ;

That way you force your parser to consume the entire token stream.

Regards,

Bart.

On Mon, Sep 26, 2011 at 11:02 AM, pragmaik <contact at maik-schmidt.de> wrote:

> I have written a grammar for a small subset of C and my parser does not
> work
> reliably, that is sometimes it reports syntax errors and sometimes it
> doesn't. For example, my grammar insists on variable initialization:
>
> bool x; // This is not allowed.
> bool y = true; // This is allowed
>
> My problem is that the parser emits an error message for the program above.
> But if I simply switch the lines like so:
>
> bool y = true; // This is allowed
> bool x; // This is not allowed.
>
> the parser happily creates an AST for the first statement and simply
> ignores
> the second one without noticing me about the syntax error.
>
> What am I doing wrong? My grammar looks as follows:
>
>
> grammar MyGrammar;
>
> options {
>    language = CSharp3;
>    output = AST;
>    ASTLabelType = MyAST;
> }
>
> tokens {
>    VAR_DECL;
>        ARG_DECL;
>        METHOD_DECL;
>        ASSIGN = '=';
>        EXPR;
>        ELIST;
>        BLOCK;
>        CALL;
>        UNARY_MINUS;
>        UNARY_NOT;
> }
>
> @lexer::namespace{MyGrammar}
> @parser::namespace{MyGrammar}
>
>
> /******************************************************************************
>  *                             Parser section
>
>
> *****************************************************************************/
>
> public
> compilationUnit
>    :    (methodDeclaration | variableDeclaration)+
>        ;
>
> methodDeclaration
>    :    returnType IDENTIFIER '(' (formalParameter (',' formalParameter)*)?
> ')' block -> ^(METHOD_DECL returnType IDENTIFIER formalParameter* block)
>    ;
>
> formalParameter
>    :    type IDENTIFIER -> ^(ARG_DECL type IDENTIFIER)
>    ;
>
> variableDeclaration
>    :    type IDENTIFIER '=' expression ';' -> ^(VAR_DECL type IDENTIFIER
> expression)
>    ;
>
> block
>    :    '{' (statement)* '}' -> ^(BLOCK statement*)
>    ;
>
> statement
> options { backtrack=true; }
>    :    block
>        |    variableDeclaration
>    |    lhs '=' expression ';' -> ^('=' lhs expression)
>        |    'return' expression? ';' -> ^('return' expression?)
>        |    'if' '(' expression ')' b1=block
>             ('else' b2=block -> ^('if' expression $b1 $b2)
>                 |                -> ^('if' expression $b1)
>                 )
>    |    postfixExpression ';' -> ^(EXPR postfixExpression)
>        |        ';'!
>        ;
>
> lhs :    postfixExpression -> ^(EXPR postfixExpression)
>    ;
>
> expressionList
>    :    expr (',' expr)* -> ^(ELIST expr+)
>    |    -> ELIST
>    ;
>
> expression
>    :    expr -> ^(EXPR expr)
>    ;
>
> expr:    logicalOrExpression
>    ;
>
> logicalOrExpression
>    :    logicalAndExpression ('or'^ logicalAndExpression)*
>        ;
>
> logicalAndExpression
>    :    equalityExpression ('and'^ equalityExpression)*
>        ;
>
> equalityExpression
>    :    relationalExpression (('!='^ | '=='^) relationalExpression)*
>    ;
>
> relationalExpression
>    :    additiveExpression (('<'^ | '>'^ | '<='^ | '>='^ )
> additiveExpression)*
>    ;
>
> additiveExpression
>    :    multiplicativeExpression (('+'^ | '-'^) multiplicativeExpression)*
>    ;
>
> multiplicativeExpression
>    :    unaryExpression (('*'^ | '/'^) unaryExpression)*
>    ;
>
> unaryExpression
>    :    '-' unaryExpression -> ^(UNARY_MINUS unaryExpression)
>    |    '+' unaryExpression -> unaryExpression
>    |    '!' unaryExpression -> ^(UNARY_NOT unaryExpression)
>    |    postfixExpression
>    ;
>
> postfixExpression
>    :    (atom -> atom)
>         (
>            '(' expressionList ')' -> ^(CALL["CALL"] $postfixExpression
> expressionList)
>         )*
>    ;
>
> atom:    IDENTIFIER
>        |        literal
>        |        '(' expr ')' -> expr
>        ;
>
> literal
>    :    INTLITERAL
>    |    LONGLITERAL
>    |    DOUBLELITERAL
>    |    STRINGLITERAL
>    |    'true'
>    |    'false'
>    ;
>
> returnType
>    :    type
>        |    'void'
>        ;
>
> type
>    :    primitiveType
>        ;
>
> primitiveType
>    :    'int'
>        |    'long'
>        |    'double'
>        |    'string'
>        |    'bool'
>        ;
>
>
> /******************************************************************************
>  *                               Lexer section
>
>
> *****************************************************************************/
>
>  TRUE
>    :    'true'
>    ;
>
>  FALSE
>    :    'false'
>    ;
>
> LONGLITERAL
>    :    IntegerNumber LongSuffix
>    ;
>
> INTLITERAL
>    :    IntegerNumber
>        ;
>
> fragment
> IntegerNumber
>    :    '0'
>    |    '1'..'9' ('0'..'9')*
>    |    '0' ('0'..'7')+
>    |    HexPrefix HexDigit+
>    ;
>
> fragment
> HexPrefix
>    :    '0x' | '0X'
>    ;
>
> fragment
> HexDigit
>    :   ('0'..'9'|'a'..'f'|'A'..'F')
>    ;
>
> fragment
> LongSuffix
>    :   'l' | 'L'
>    ;
>
> fragment
> NonIntegerNumber
>    :   ('0' .. '9')+ '.' ('0' .. '9')* Exponent?
>    |   '.' ( '0' .. '9' )+ Exponent?
>    |   ('0' .. '9')+ Exponent
>    |   ('0' .. '9')+
>    |
>        HexPrefix (HexDigit )*
>        (    ()
>        |    ('.' (HexDigit )* )
>        )
>        ( 'p' | 'P' )
>        ( '+' | '-' )?
>        ( '0' .. '9' )+
>        ;
>
> fragment
> Exponent
>    :   ( 'e' | 'E' ) ( '+' | '-' )? ( '0' .. '9' )+
>    ;
>
> fragment
> DoubleSuffix
>    :   'd' | 'D'
>    ;
>
> DOUBLELITERAL
>    :   NonIntegerNumber DoubleSuffix?
>    ;
>
> STRINGLITERAL
>    :   '"' (EscapeSequence | ~( '\\' | '"' | '\r' | '\n' ) )* '"'
>    ;
>
> fragment
> EscapeSequence
>    :   '\\' (
>                 'b'
>             |   't'
>             |   'n'
>             |   'f'
>             |   'r'
>             |   '\"'
>             |   '\''
>             |   '\\'
>             |   ('0'..'3') ('0'..'7') ('0'..'7')
>             |   ('0'..'7') ('0'..'7')
>             |   ('0'..'7')
>             )
>    ;
>
> IDENTIFIER      :       ('a'..'z' |'A'..'Z' |'_' ) ('a'..'z' |'A'..'Z' |'_'
> |'0'..'9'
> )* ;
>
> WS  :   (' ' | '\t' | '\n' | '\r')+ { $channel = 99; } ;
>
> COMMENT
>    :   '/*' (options {greedy=false;} : . )*  '*/' { $channel = 99; }
>    ;
>
> LINE_COMMENT
>    :   '//' ~('\n'|'\r')*  ('\r\n' | '\r' | '\n')  { $channel = 99; }
>    |   '//' ~('\n'|'\r')*  { $channel = 99; } // A line comment could
> appear at the end of the file without CR/LF
>    ;
>
> ANYCHAR : . ;
>
>
> Maik
>
>
> --
> View this message in context:
> http://antlr.1301665.n2.nabble.com/Recognizing-syntax-errors-with-C-tp6831210p6831210.html
> Sent from the ANTLR mailing list archive at Nabble.com.
>
> List: http://www.antlr.org/mailman/listinfo/antlr-interest
> Unsubscribe:
> http://www.antlr.org/mailman/options/antlr-interest/your-email-address
>