[antlr-interest] Generate token types file from .tokens file
Andy Tripp
antlr at jazillian.com
Tue Oct 16 12:36:01 PDT 2007
Here's a Java class that reads a .tokens file and produces a
MyGrammarTokenTypes.java file.
It produces code that follows the "Constant Interface antipattern", but
could be easily changed to
generate an enum. Note that there's a table mapping symbols to names,
for symbols that aren't allowed
in Java variable names (e.g. "." becomes "DOT").
Sorry, no clever StringTemplate for output generation or even ANTLR for
parsing.
Just vanilla Java.
---------------------------------------------------
/*
* GenerateTokenTypes.java
* Copyright 2007 Andy Tripp
*
* This class reads an ANTLR-generated .tokens file and
* generate a Java class from it. For example, running:
*
* $ java GenerateTokenTypes VB > VBTokenTypes.java
*
* will read VB.tokens and create VBTokenTypes.java
* The class contains a bunch of constant ints, one for each token type.
* This is useful for when you want to specify a particular type in your
* java code, so you can say:
* if (token.getType() == VBTokenTypes.StringLiteral)
* ...rather than...
* if (token.getType() == 123)
*
* Be sure to run this after each run of ANTLR, as each run of ANTLR will
* re-generate your .tokens file.
*/
import java.io.*;
import java.util.*;
public class GenerateTokenTypes {
private final static String INDENT = " ";
public static void main(String[] args) {
if (args.length != 1) {
System.err.println("Usage: GenerateTokenTypes <grammarName>");
return;
}
String fileName = args[0] + ".tokens";
File file = new File(fileName);
if (!file.exists()) {
System.err.println("file does not exist:" + fileName);
return;
}
String contents = IOUtils.fileToString(fileName);
String[] lines = contents.split("\n");
String[] names = new String[lines.length+10];
for (String line: lines) {
int i=0;
// when we have something like:
// '='=66
// be sure to skip over the single-quotes when looking for '='
if (line.startsWith("\'")) {
i = line.indexOf('\'', 1);
}
i = line.indexOf('=', i);
if (i == -1) {
System.err.println("Missing '=' on line:" + line);
return;
}
String name = line.substring(0, i);
String value = line.substring(i+1);
if (name.startsWith("'")) {
String literal = name.substring(1, name.length()-1);
name = generateLiteralName(literal);
}
try {
int val = Integer.parseInt(value);
names[val] = name;
} catch (NumberFormatException e) {
System.err.println("invalid number on line:" + line);
return;
}
}
String className = Character.toUpperCase(args[0].charAt(0)) +
args[0].substring(1) +
"TokenTypes";
printOutclass(className, names);
}
private static void printOutclass(String className, String[] names) {
System.out.println("public class " + className + " {");
for (int i=0; i<names.length; i++) {
if (names[i] != null) {
System.out.println( INDENT + "final static int "
+ names[i] + '=' + i + ';');
}
}
System.out.println('}');
}
// map literal chars like '#' to strings
// that are valid in variables like "HASH"
private final static Map<String,String> VALID_NAMES = new
HashMap<String,String>();
private final static String[][] MAPPING = {
{"=", "ASSIGN"},
{":=", "COLON_ASSIGN"},
{"==", "COMPARE"},
{"\\\\", "BACKSLASH"},
{"\\r", "CR"},
{"!", "BANG"},
{"&", "AMPERSAND"},
{":", "COLON"},
{",", "COMMA"},
{"$", "DOLLAR"},
{".", "DOT"},
{"_", "UNDERSCORE"},
{";", "SEMI"},
{"~", "TILDE"},
{"\\n", "NEWLINE"},
{"^", "BNOT"},
{">", "GT"},
{">=", "GE"},
{"<", "LT"},
{"<=", "LE"},
{"<>", "NE"},
{"?", "QUESTION"},
{"{", "LCURLY"},
{"}", "RCURLY"},
{"[", "LBRACK"},
{"]", "RBRACK"},
{"(", "LPAREN"},
{")", "RPAREN"},
{"+", "PLUS"},
{"-", "MINUS"},
{"*", "STAR"},
{"%", "MODULO"},
{"/", "DIV"},
{"#", "HASH"},
{"@", "AT"},
};
static {
for (int i=0; i<MAPPING.length; i++) {
VALID_NAMES.put(MAPPING[i][0], MAPPING[i][1]);
}
}
private static String generateLiteralName(String literal) {
String value = VALID_NAMES.get(literal);
if (value != null) {
return "LITERAL_" + value;
}
literal = literal.replace(' ', '_'); // replace spaces with
underscores
literal = literal.replace('.', '_'); // replace dots with
underscores
literal = literal.replace("#", "HASH_"); // #IF becomes HASH_IF
return "LITERAL_" + literal;
}
}
More information about the antlr-interest
mailing list