[antlr-interest] Error in generated lexer code when using exclusion operator.

Steve Van der Hoeven svdh_free at yahoo.com
Sun Dec 19 01:50:32 PST 2010


I haven't been able to find any related issue to my problem in the
archieves. In case I did overlook I would be glad to be pointed to the
relevant information.

 

The issue is the following.

 

I have a lexer rule of the form ~(a|b) ~a*

 

In the code generated the code for ~(a|b) is fine. 

For the loop ~a*

      -the code to set the state logic is correct

      -the code to consume the input is wrong. The error (seems to me) that
the test for consuming is not the same as the test for the logic.

 

The issue seems to be in the antlr engine as the error is the same in
generated C# and J code.

 

For information I'm compiling with antlr 3.1.1

 

Bellow is the relevant part of the grammar and the generated code.

 

Thanks for your time

Steve

 

==============================

Grammar

==============================

 

SPACE :     SPACECHARS+{$channel=HIDDEN;};

NEWLINE     :     '\r'?'\n'{$channel=HIDDEN;};

      

fragment DELIMS

      :

SPACECHARS|'\r'|'\n'|'#'|'\\'|'@'|'('|')'|'\''|'['|']'|'"'|';';

IDENTIFIER3 

      :     ~('-'|'+'|'/'|'.'|'0'..'9'|DELIMS)

            ~DELIMS*

      ;

 

================================

Generated code

================================

 

public void mIDENTIFIER3(){

      try{

            int _type = IDENTIFIER3;

            int _channel = DEFAULT_TOKEN_CHANNEL;

            

            //  (~ ( '-' | '+' | '/' | '.' | '0' .. '9' | DELIMS ) (~ DELIMS
)* )

                  //  ~ ( '-' | '+' | '/' | '.' | '0' .. '9' | DELIMS ) (~
DELIMS )*

                  {

                        if ( (input.LA(1) >= '\u0000' && input.LA(1) <=

'\b') || input.LA(1) == '\u000B' || (input.LA(1) >= '\u000E' && input.LA(1)
<= '\u001F') || input.LA(1) == '!' || (input.LA(1) >= '$' && input.LA(1) <=

'&') || input.LA(1) == '*' || input.LA(1) == ',' || input.LA(1) == ':' ||

(input.LA(1) >= '<' && input.LA(1) <= '?') || (input.LA(1) >= 'A' &&

input.LA(1) <= 'Z') || (input.LA(1) >= '^' && input.LA(1) <= '\uFFFF') ) {

                        input.Consume();

                  }else{

                              MismatchedSetException mse = new
MismatchedSetException(null,input);

                              Recover(mse);

                              throw mse;}

 

                  // (~ DELIMS )*

                  do{

                      int alt35 = 2;

                      int LA35_0 = input.LA(1);

 

                  //state logic

                      if ( ((LA35_0 >= '\u0000' && LA35_0 <= '\b') || LA35_0
== '\u000B' || (LA35_0 >= '\u000E' && LA35_0 <= '\u001F') || LA35_0 == '!'

|| (LA35_0 >= '$' && LA35_0 <= '&') || (LA35_0 >= '*' && LA35_0 <= ':') 

|| ||

(LA35_0 >= '<' && LA35_0 <= '?') || (LA35_0 >= 'A' && LA35_0 <= 'Z') ||
(LA35_0 >= '^' && LA35_0 <= '\uFFFF')) ){

                          alt35 = 1;

                      }

 

                      switch (alt35) 

                        {

                              case 1 :

                                  //~ DELIMS

                                  {

                                    //consumption code

                                    if ( (input.LA(1) >= '\u0000' &&

input.LA(1) <= ' ') || (input.LA(1) >= '\"' && input.LA(1) <= '\uFFFF') ) {

                                        input.Consume();

 

                                    }else {

                                        MismatchedSetException mse = new
MismatchedSetException(null,input);

                                        Recover(mse);

                                        throw mse;

                              }

 

                                  }

                                  break;

                              default:

                                  goto loop35;

                      }

                  } while (true);

 

                  loop35:

                        ;     // Stops C# compiler whining that label

'loop35' has no statements

 

            }

            state.type = _type;

            state.channel = _channel;

        }

        finally 

      {

        }

    }

 

 



More information about the antlr-interest mailing list