[antlr-interest] Faster expression parsing
Sam Harwell
sharwell at pixelminegames.com
Wed Aug 27 20:31:21 PDT 2008
Profiling my grammar showed the expression parsing as one of the slowest
segments. The problem stemmed from the fact that this grammar has ~18
precedence categories of operators, so getting to a primary expression
was unnecessarily slow. Here is an example grammar that shows how you
can flatten groups of operators while maintaining a correct AST. If you
have a large number of precedence groups, and your expressions are
taking a long time to parse, you can try a method like this and see if
it helps. The createPrecedenceTree algorithm is sub-optimal, but it was
only responsible for 1% of the parse time in my recent profiling.
My current stages have simplified to:
assignment_expression
ternary_expression
binary_expression
prefix_expression
postfix_expression
primary_expression
And the time it spent in the expression rule dropped from 54% of the
total parse time to 23%.
Sam
grammar ExpressionParser;
options
{
output=AST;
}
tokens
{
PLUS = '+';
MINUS = '-';
TIMES = '*';
DIVIDE = '/';
LPAREN = '(';
RPAREN = ')';
AST_EXPR;
}
@parser::members
{
boolean isRightToLeft( int type )
{
return false;
}
int getOperatorPrecedence( int type )
{
switch ( type )
{
case TIMES:
case DIVIDE:
return 1;
case PLUS:
case MINUS:
return 2;
default:
return 0;
}
}
int findPivot( List operators, int startIndex, int stopIndex )
{
int pivot = startIndex;
int pivotRank = getOperatorPrecedence(
((Token)operators.get(pivot)).getType() );
for ( int i = startIndex + 1; i <= stopIndex; i++ )
{
int type = ((Token)operators.get(i)).getType();
int current = getOperatorPrecedence( type );
boolean rtl = isRightToLeft(type);
if ( current > pivotRank || (current ==
pivotRank && rtl) )
{
pivot = i;
pivotRank = current;
}
}
return pivot;
}
Tree createPrecedenceTree( List expressions, List operators, int
startIndex, int stopIndex )
{
if ( stopIndex == startIndex )
return (Tree)expressions.get(startIndex);
int pivot = findPivot( operators, startIndex, stopIndex
- 1 );
Tree root = (Tree)adaptor.nil();
Object root_1 = (Object)adaptor.nil();
root_1 = (Object)adaptor.becomeRoot(
(Token)operators.get(pivot), root_1 );
adaptor.addChild( root_1, createPrecedenceTree(
expressions, operators, startIndex, pivot ) );
adaptor.addChild( root_1, createPrecedenceTree(
expressions, operators, pivot + 1, stopIndex ) );
adaptor.addChild( root, root_1 );
return root;
}
Tree createPrecedenceTree( List expressions, List operators )
{
return createPrecedenceTree( expressions, operators, 0,
expressions.size() - 1 );
}
}
complete_expression
: expression EOF
-> expression
;
primary_expression
: INTEGER
-> ^(INTEGER)
| '(' expression ')'
-> expression
;
expression
@init
{
List expressions = new ArrayList();
List operators = new ArrayList();
}
: ( left=primary_expression
{ expressions.add($left.tree); }
)
( operator
right=primary_expression
{
operators.add($operator.start);
expressions.add($right.tree);
}
)*
-> {createPrecedenceTree(expressions,operators)}
;
operator
: '+'
| '-'
| '*'
| '/'
;
///////////////////////////////////////////
// LEXER
fragment
DIGIT : '0'..'9'
;
INTEGER : DIGIT+
;
WS : (' ' | '\t' | '\n' | '\r')+
{ $channel = HIDDEN; }
;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.antlr.org/pipermail/antlr-interest/attachments/20080827/85d419cf/attachment.html
More information about the antlr-interest
mailing list