[antlr-interest] Generate token types file from .tokens file

Andy Tripp antlr at jazillian.com
Tue Oct 16 12:36:01 PDT 2007


Here's a Java class that reads a .tokens file and produces a 
MyGrammarTokenTypes.java file.
It produces code that follows the "Constant Interface antipattern", but 
could be easily changed to
generate an enum. Note that there's a table mapping symbols to names, 
for symbols that aren't allowed
in Java variable names (e.g. "." becomes "DOT").

Sorry, no clever StringTemplate for output generation or even ANTLR for 
parsing.
Just vanilla Java.

---------------------------------------------------
/*
 * GenerateTokenTypes.java
 * Copyright 2007 Andy Tripp
 *
 * This class reads an ANTLR-generated .tokens file and
 * generate a Java class from it. For example, running:
 *
 * $ java GenerateTokenTypes VB > VBTokenTypes.java
 *
 * will read VB.tokens and create VBTokenTypes.java
 * The class contains a bunch of constant ints, one for each token type.
 * This is useful for when you want to specify a particular type in your
 * java code, so you can say:
 * if (token.getType() == VBTokenTypes.StringLiteral)
 * ...rather than...
 * if (token.getType() == 123)
 *
 * Be sure to run this after each run of ANTLR, as each run of ANTLR will
 * re-generate your .tokens file.
 */

import java.io.*;
import java.util.*;

public class GenerateTokenTypes {
    private final static String INDENT = "    ";
    public static void main(String[] args) {
        if (args.length != 1) {
            System.err.println("Usage: GenerateTokenTypes <grammarName>");
            return;
        }
        String fileName = args[0] + ".tokens";
        File file = new File(fileName);
        if (!file.exists()) {
            System.err.println("file does not exist:" + fileName);
            return;
        }

        String contents = IOUtils.fileToString(fileName);
        String[] lines = contents.split("\n");
        String[] names = new String[lines.length+10];
        for (String line: lines) {
            int i=0;
            // when we have something like:
            // '='=66
            // be sure to skip over the single-quotes when looking for '='
            if (line.startsWith("\'")) {
                i = line.indexOf('\'', 1);
            }
            i = line.indexOf('=', i);
            if (i == -1) {
                System.err.println("Missing '=' on line:" + line);
                return;
            }
            String name = line.substring(0, i);
            String value = line.substring(i+1);
            if (name.startsWith("'")) {
                String literal = name.substring(1, name.length()-1);
                name = generateLiteralName(literal);
            }
            try {
                int val = Integer.parseInt(value);
                names[val] = name;
            } catch (NumberFormatException e) {
                System.err.println("invalid number on line:" + line);
                return;
            }
        }

        String className = Character.toUpperCase(args[0].charAt(0)) +
            args[0].substring(1) +
            "TokenTypes";
        printOutclass(className, names);
    }

    private static void printOutclass(String className, String[] names) {

        System.out.println("public class " + className + " {");
        for (int i=0; i<names.length; i++) {
            if (names[i] != null) {
                System.out.println( INDENT + "final static int "
                    + names[i] + '=' + i + ';');
            }
        }

        System.out.println('}');
    }

    // map literal chars like '#' to strings
    // that are valid in variables like "HASH"
    private final static Map<String,String> VALID_NAMES = new 
HashMap<String,String>();
    private final static String[][] MAPPING = {
        {"=", "ASSIGN"},
        {":=", "COLON_ASSIGN"},
        {"==", "COMPARE"},
        {"\\\\", "BACKSLASH"},
        {"\\r", "CR"},
        {"!", "BANG"},
        {"&", "AMPERSAND"},
        {":", "COLON"},
        {",", "COMMA"},
        {"$", "DOLLAR"},
        {".", "DOT"},
        {"_", "UNDERSCORE"},
        {";", "SEMI"},
        {"~", "TILDE"},
        {"\\n", "NEWLINE"},
        {"^", "BNOT"},
        {">", "GT"},
        {">=", "GE"},
        {"<", "LT"},
        {"<=", "LE"},
        {"<>", "NE"},
        {"?", "QUESTION"},
        {"{", "LCURLY"},
        {"}", "RCURLY"},
        {"[", "LBRACK"},
        {"]", "RBRACK"},
        {"(", "LPAREN"},
        {")", "RPAREN"},
        {"+", "PLUS"},
        {"-", "MINUS"},
        {"*", "STAR"},
        {"%", "MODULO"},
        {"/", "DIV"},
        {"#", "HASH"},
        {"@", "AT"},
    };
    static {
        for (int i=0; i<MAPPING.length; i++) {
            VALID_NAMES.put(MAPPING[i][0], MAPPING[i][1]);
        }
    }
    private static String generateLiteralName(String literal) {
        String value = VALID_NAMES.get(literal);
        if (value != null) {
            return "LITERAL_" + value;
        }

        literal = literal.replace(' ', '_'); // replace spaces with 
underscores
        literal = literal.replace('.', '_'); // replace dots with 
underscores
        literal = literal.replace("#", "HASH_"); // #IF becomes HASH_IF

        return "LITERAL_" + literal;
    }
}


More information about the antlr-interest mailing list