[antlr-interest] Identifiers with Spaces

Mon Nov 29 15:28:44 PST 2010

sometimes, when the necessary look ahead is small and is bounded,
syntactic predicates can be your friend.

see attached.

-------------- next part --------------
grammar LexerOnly;

@members {
   private static final String [] x = new String[]{
      "a8 b c = d   e23      f",
      "a1b2c3   =   xyz",
      "a1 2 b"
   };

   public static void main(String [] args) {
      for( int i = 0; i < x.length; ++i ) {
         try {
            System.out.println("about to lex:`"+x[i]+"`");
            LexerOnlyLexer lexer =
               new LexerOnlyLexer(new ANTLRStringStream(x[i]));

            int j = 1;
            Token token = lexer.nextToken();
            while( token.getType() != LexerOnlyLexer.EOF ) {
               System.out.format("\%d: type = \%s, text = `\%s`\%n",
                                 j,
                                 tokenNames[token.getType()],
                                 token.getText());
               j++;
               token = lexer.nextToken();
            }
         } catch(Exception e) {
            e.printStackTrace();
         }
      }
   }
}

list_of_tokens : .+ EOF ; // dummy, this rule is never used by this test case...

NUMBER : DIGIT+ ;
NAME : ID ( (WS ID_HEAD)=> WS ID )* ;
EQU : '=' ;
WHITESPACE : WS { $channel = HIDDEN; };

fragment ID : ID_HEAD ID_TAIL* ;
fragment ID_HEAD : ALPHA ;
fragment ID_TAIL : ALPHA | DIGIT;
fragment ALPHA : ('a'..'z')|('A'..'Z') ;
fragment DIGIT : '0'..'9' ;
fragment WS : ' '+ ;