[antlr-interest] Antlr v3 -- Lexing '..' and '1.'

Sun Jul 30 16:02:25 PDT 2006

Ter and Loring :-

Thanks for your replies!

I now have a working lexer fragment that handles float and '..'  tokens,
attached below.

I hope my implementation matches your intent for dealing with this....

(I also attach a TestDriver should anybody want to try to reproduce my result)

Thanks again!
   -jbb

Test.g follows...
//---------------------------begin cut here---------------------------
grammar Test;

//------------------------------------------------------------------------
// code to be incorporated into the generated lexer...

@lexer::header {
import java.util.Vector;
}

@lexer::members {
    // maximum number of emit() calls inside any rule action
    private static final int MAX_EMIT_COUNT = 2;

    // buffer (queue) to hold the emit()'d tokens
    private Token [] myToken = new Token[MAX_EMIT_COUNT];
    private int add_idx = 0; // deposit emit token here
    private int next_idx = 0; // next token to be delivered to parser

    public void emit(Token t) {
        token = t; // set flag to avoid automatic emit() at end of rule.
        myToken[add_idx++] = t;
    }

    public Token nextToken() {
        while (true) {
            if ( add_idx == next_idx ) {
                token = null;
                add_idx = 0;
                next_idx = 0;
                tokenStartCharIndex = getCharIndex();
                if ( input.LA(1)==CharStream.EOF ) {
                    return Token.EOF_TOKEN;
                }
                try {
                    mTokens();
                }
                catch (RecognitionException re) {
                    reportError(re);
                    recover(re);
                }
            } else {
                Token result = myToken[next_idx++];
                if ( result != Token.SKIP_TOKEN ) { // discard SKIP tokens
                    return result;
                }
            }
        }
    }
}

//------------------------------------------------------------------------
// next follows the (dummy) parser...

test : .+ EOF;

//------------------------------------------------------------------------
// and now, we specify the lexical analyzer...

DOT : '.' ;
RANGE : '..' ;
ELLIPSIS : '...' ;

NATURAL : UINT ;

FLOAT :
        ( i=UINT d=DOT
            ( ( UINT? EXPONENT? )
            | ( r=DOT
                    ( ( /*empty*/
                            { i.setType(NATURAL);
                              emit(i);
                              d.setType(RANGE);
                              d.setText("..");
                              emit(d); } )
                    | ( e=DOT
                            { i.setType(NATURAL);
                              emit(i);
                              d.setType(ELLIPSIS);
                              d.setText("...");
                              emit(d); } )
                    ) )
            ) )
    |   ( UINT EXPONENT )
    |   ( DOT UINT EXPONENT? )
    ;

fragment EXPONENT : ( 'e' | 'E' ) ( '+' | '-' )? UINT ;

fragment DIGIT_10 : '0'..'9' ;
fragment UINT : DIGIT_10+ ;

// Whitespace -- ignored
WS  :   (   ' '
        |   '\t'
        |   '\f'
        |   ( '\r' | '\n' ) // handle newlines
        )+
        { channel=99; }
    ;
//---------------------------end cut here-----------------------------

TestDriver.java follows:
//---------------------------begin cut here---------------------------
import java.io.*;
import java.util.*;

import org.antlr.runtime.*;

public class TestDriver {

   private static class Pair {
      public String input;
      public int[] expected;
      public Pair(String i, int[]e) {
         input = i;
         expected = e;
      }
   }

   private static final Pair[] x = {
      new Pair("1",new int[]{Test.NATURAL}),
      new Pair(".",new int[]{Test.DOT}),
      new Pair("..",new int[]{Test.RANGE}),
      new Pair("...",new int[]{Test.ELLIPSIS}),
      new Pair(".0",new int[]{Test.FLOAT}),
      new Pair(".0e+0",new int[]{Test.FLOAT}),
      new Pair("1.0",new int[]{Test.FLOAT}),
      new Pair("1.",new int[]{Test.FLOAT}),
      new Pair("1.0",new int[]{Test.FLOAT}),
      new Pair("1.0e+0",new int[]{Test.FLOAT}),
      new Pair("1e1",new int[]{Test.FLOAT}),
      new Pair("1e+1",new int[]{Test.FLOAT}),
      new Pair("1e-1",new int[]{Test.FLOAT}),
      new Pair("1 . 2",new int[]{Test.NATURAL,
                                 Test.WS,
                                 Test.DOT,
                                 Test.WS,
                                 Test.NATURAL}),
      new Pair("1 .. 2",new int[]{Test.NATURAL,
                                  Test.WS,
                                  Test.RANGE,
                                  Test.WS,
                                  Test.NATURAL}),
      new Pair("1 ... 2",new int[]{Test.NATURAL,
                                   Test.WS,
                                   Test.ELLIPSIS,
                                   Test.WS,
                                   Test.NATURAL}),
      new Pair("1. . 2.",new int[]{Test.FLOAT,
                                   Test.WS,
                                   Test.DOT,
                                   Test.WS,
                                   Test.FLOAT}),
      new Pair("1. .. 2.",new int[]{Test.FLOAT,
                                    Test.WS,
                                    Test.RANGE,
                                    Test.WS,
                                    Test.FLOAT}),
      new Pair("1. ... 2.",new int[]{Test.FLOAT,
                                     Test.WS,
                                     Test.ELLIPSIS,
                                     Test.WS,
                                     Test.FLOAT}),
      new Pair("1.1 . 2.2",new int[]{Test.FLOAT,
                                     Test.WS,
                                     Test.DOT,
                                     Test.WS,
                                     Test.FLOAT}),
      new Pair("1.1 .. 2.2",new int[]{Test.FLOAT,
                                      Test.WS,
                                      Test.RANGE,
                                      Test.WS,
                                      Test.FLOAT}),
      new Pair("1.1 ... 2.2",new int[]{Test.FLOAT,
                                       Test.WS,
                                       Test.ELLIPSIS,
                                       Test.WS,
                                       Test.FLOAT}),
      new Pair("1.",new int[]{Test.FLOAT}),
      new Pair("1..",new int[]{Test.NATURAL,Test.RANGE}),
      new Pair("1...",new int[]{Test.NATURAL,Test.ELLIPSIS}),
      new Pair("1.2",new int[]{Test.FLOAT}),
      new Pair("1..2",new int[]{Test.NATURAL,
                                Test.RANGE,
                                Test.NATURAL}),
      new Pair("1...2",new int[]{Test.NATURAL,
                                 Test.ELLIPSIS,
                                 Test.NATURAL})
   };

   private static void printTokens(List l) {

      if( l.size() == 0 ) {
         System.out.format("no tokens found!%n");

      } else {
         Token t = (Token)l.get(0);
         System.out.format("([%s,%s]",
                           Test.tokenNames[t.getType()],
                           t.getText());
         for(int j = 1; j < l.size(); ++j) {
            t = (Token)l.get(j);
            System.out.format(", [%s,%s]",
                              Test.tokenNames[t.getType()],
                              t.getText());
         }
         System.out.format(")");
      }
   }

   private static boolean checkTokens(List l, int [] p) {
      boolean success = true;

      if( l.size() == p.length ) {

         for(int j = 0; j < p.length; ++j) {
            Token t = (Token)l.get(j);
            if (t.getType() != p[j]) {
               success = false;
               System.out.format(" token number %s, %s should be %s; ",
                                 j,
                                 Test.tokenNames[t.getType()],
                                 Test.tokenNames[p[j]]);
            }
         }

      } else {
         success = false;
         System.out.format("...wrong number of tokens:%s should be %s; ",
                           l.size(),p.length);
      }

      return success;
   }

   public static void main(String[] args) {
      for(int i = 0; i < x.length; ++i) {
         try {
            System.out.format("testing `%s`: ",x[i].input);

            CommonTokenStream tokens = new CommonTokenStream
               (new TestLexer
                (new ANTLRStringStream(x[i].input)));

            List l = tokens.getTokens();
            printTokens(l);
            if (checkTokens(l, x[i].expected)) {
               System.out.format(" --- OK%n");
            } else {
               System.out.format(" --- FAIL%n");
            }

         } catch(Exception e) {
            System.out.println("exception: "+e);
         }
      }
   }
}
//---------------------------end cut here-----------------------------