[antlr-interest] faster expression parsing

Mon Mar 17 14:18:03 PDT 2008

So, I should have been working on something else but got to thinking  
about how annoying it is specifying expressions in recursive descent  
parsers.  You have to have a new rule for each precedence level.   
This is also very slow. Just to match 34 it has to descend about 15  
method calls.  I built a prototype single-rule (plus primary and  
suffix) operator matching thingie today.  I should be able to  
generate this from some metameta syntax in antlr.  Try it out...it's  
amazing (v3.1 required due to tree bulding bug fix).

Ter
---------------
/** Test faster recursive-descent expression parsing.
  *  Goal: avoid recursing for *each* precedence level.
  *  Recurse for changes in precedence, avoiding repeated
  *  tests for each level.  The key is passing into expression
  *  the min precedence level to match, which is based upon
  *  previous operator.
  *
  *  Based upon "precedence climbling" by Theodore Norvell:
  *      http://www.engr.mun.ca/~theo/Misc/exp_parsing.htm
  *
  *  NOTES:
  *      1. Holy crap!  This actually works!
  *      2. Should be able to autogenerate from list of operators,  
precedence
  *         etc...
  */
grammar T;

options { output=AST; ASTLabelType=CommonTree; }

tokens { PREINC; POSTINC; CALL; INDEX; }

@members {
public static final int LEFT = 1;
public static final int RIGHT = 2;
static int[] prec = new int[tokenNames.length];
static int[] uprec = new int[tokenNames.length];
static int[] postprec = new int[tokenNames.length];
static int[] assoc = new int[tokenNames.length];
static {
     for (int i=0; i<tokenNames.length; i++) { assoc[i]=LEFT; }
     prec[PLUS] = 1;
     prec[MINUS] = 1;
     prec[STAR] = 3;
     prec[CARET] = 4;
     prec[DOT] = 6;
     assoc[CARET] = RIGHT;

     uprec[MINUS] = 2;       // sits between +/- binary and * binary ops
     postprec[LPAREN] = 5;   // lower than DOT for p.f()
     postprec[LBRACK] = 5;
     postprec[INC] = 5;
}

int nextp(int p) {
     int prevOpType = input.LA(-1);
     if ( assoc[prevOpType]==LEFT ) return prec[prevOpType]+1;
     else return prec[prevOpType];
}
}

expr : e[0] {System.out.println($e.tree.toStringTree());} ;

/** This could be autogenerated if you give me primary and suffix and  
precedence levels */
e[int p]
     :   (primary->primary)
         (   {prec[input.LA(1)]>=p}?=>     bop r=e[nextp(p)] -> ^(bop  
$e $r)
         |   {postprec[input.LA(1)]>=p}?=> suffix[$e.tree]   ->  
{$suffix.tree}
         )*
     ;

primary
     :   INT
     |   ID
     |   uop^ {int q=uprec[input.LA(-1)];} e[q]
     |   '(' expr ')' -> expr
     ;

suffix[CommonTree lhs]
     :   t='[' expr ']'                -> ^(INDEX[$t] {$lhs} expr)
     |   t='(' (expr (',' expr)*)? ')' -> ^(CALL[$t] {$lhs} expr*)
     |   t='++'                        -> ^(POSTINC[$t] {$lhs})
     ;

bop :   '+' | '-' | '*' | '^' | '.' ;

uop :   '-' | t='++' -> PREINC[$t];

INC : '++';
LPAREN : '(' ;
LBRACK : '[' ;
PLUS: '+';
MINUS: '-';
STAR: '*';
DOT : '.';
CARET:'^';
INT : '0'..'9'+ ;
ID  : 'a'..'z'+ ;
WS  : (' '|'\n')+ ;