[antlr-interest] help with a logical query-like expression grammar

Wed Mar 5 15:33:36 PST 2003

Please see the attached example. It is the beginning of an 
ANTLR/Java-based parser for grammars defined in the standard IEEE BNF 
notation. It parses the BNF expressions into an abstract syntax tree 
defined by a class hierarchy. It is by no means complete but it is a 
small and simple example. This example isn't exactly what you asked for 
but I think it might help. Let me know if you have any questions.

I have included the object model as well. The classes are actually 
defined in a programming language called Nice (nice.sf.net). Nice is is 
its own programming language, but it runs on the JVM. You can think of 
it also as Java with multiple dispatch (ala MultiJava, Cecil, CLOS), 
parametric polymorphism, closures, funtions, and other extra features. 
it is designed to be similar and compatible with Java, so it should be 
simple enough to figure out what it does by reading it (in particular, 
read bnf.g and ast.nice).

- Brian

Michael Hartman wrote:
> I need a grammar to describe logical query-like expression like the
> kinds you'd see in a SQL WHERE clause.  For example,
> 
> A = 1
> 
> A = 1 AND B >= 2
> 
> A = 1 AND (B <= 2)
> 
> A = 1 AND (B != 2 OR C = 3)
> 
> A = 1 AND (B = 2 AND NOT (C = 3 OR D = 4))
> 
> where A, B, C, and D would be identifiers and 1, 2, 3, and 4 would be
> values (essentially quoted strings or numbers but again, just identifiers)
> 
> I don't need to evaluate these expressions but rather parse them into
> an object model I have.
> 
> I can't seem to make the mental connection as to where in the parser
> or lexer I make calls to my object model to create and associate
> objects that represent the elements of the expression.
> 
> For example, A = 1 would translate into an instance of type Expression
> with an object of type Operator representing the equals sign.  Another
> Expression would represent B <= 2 and yet another would represent the
> whole expression A = 1 AND B <= 2 with Operator representing the AND
> and two references to the first two Expression instances.
> 
> I'm not a computer scientist and I've reviewed the ANTLR doc and
> samples but it is very difficult to grasp.  
> 
> THis is what I have so far:
> 
> class FilterParser extends Parser;
> 
> startRule
>     :   
>     ;
> 
> class FilterLexer extends Lexer;
> 
> LPAREN:	'('
> 	;
> 
> RPAREN:	')'
> 	;
> 
> EQUALS: '='
>   ;
> 
> NOTEQUALS: "!="
>   ;
> 
> GT: '>'
>   ;
> 
> GTE: '>='
>   ;
> 
> LT: '<'
>   ;
> 
> LTE: '<='
>   ;
> 
> AND: "AND"
>   ;
> 
> OR: "OR"
>   ;
> 
> ANDNOT: "AND NOT"
>   ;
> 
> ORNOT:  "OR NOT"
>   ;
> 
> ID
> options {
> 	testLiterals = true;
> }
> 	:	('a'..'z'|'A'..'Z'|'_') ('a'..'z'|'A'..'Z'|'_'|'0'..'9')*
> 	;
> 
> 
>  
> 
> Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ 
> 

Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ 

-------------- next part --------------
package org.brianlsmith.bnf.parser;

import org.brianlsmith.bnf.*;
import java.io.*;

public class ParserTest {
    public static void main(String [] args) throws FileNotFoundException, IOException,
            antlr.ANTLRException {
        FileInputStream fis = new FileInputStream(args[0]);
        try {
            BnfLexer lexer = new BnfLexer(fis);
            BnfParser parser = new BnfParser(lexer, true);
            java.util.List rules = parser.grammar();
            for (java.util.Iterator i = rules.iterator(); i.hasNext(); ) {
                Rule r = (Rule) i.next();
                System.out.println(org.brianlsmith.bnf.dispatch.prettyprintRule(r));
            }
        } finally {
            fis.close();
        }
    }
}
-------------- next part --------------
header {
    package org.brianlsmith.bnf.parser;
    import org.brianlsmith.bnf.*;
}

class BnfParser extends Parser;

{
    public BnfParser(TokenStream lexer, boolean mustBeTrue) {
        this(filterLexer(lexer));
        tokenNames[BnfParserTokenTypes.EOF] = "end of file";
    }

    private static TokenStream filterLexer(TokenStream lexer) {
        final antlr.TokenStreamBasicFilter filter = new antlr.TokenStreamBasicFilter(lexer);
        filter.discard(BnfParserTokenTypes.WS);
        return filter;
    }

    private static final EpsilonExp epsilon = new EpsilonExp();
}

grammar     returns [java.util.List l = new java.util.ArrayList();]
            { Production r; }
            : ( r=rule  { l.add(r); } )+
            ;

rule        returns [Production r = null]
            : n:NAME EQUALS { Exp e; } e=exp SEMI { r = new Production(n.getText(), e); }
            ;

exp         returns [Exp e = null]
            : e=concatExp  ( { Exp tl; } OR tl=exp    { e = new ChoiceExp(e, tl); } )?
            ;

concatExp   returns [Exp e = null]
            : e=groupedExp ( { Exp tl; } tl=concatExp { e = new ConcatExp(e, tl); } )?
            ;

groupedExp  returns [Exp e = null]
            : LBRACKET e=exp RBRACKET   { e = new OptionalExp(e); }
            | LBRACE e=exp   RBRACE     { e = new RepeatedExp(e); }
            | LPAREN e=exp   RPAREN
            | n:NAME                    { e = new RefExp(n.getText());    }
            | EPSILON                   { e = epsilon;       }
            | s:STRING                  { e = new StringExp(s.getText()); }
            ;

class BnfLexer extends Lexer;
options {
    charVocabulary = '\u0000'..'\uFFFE';
}

protected LETTER : ('a'..'z' | 'A'..'Z');
NAME             : (LETTER)+;

EPSILON options { paraphrase="'#'"  ; } : '#';

LBRACE        options { paraphrase="'{'";  }  : '{';
RBRACE        options { paraphrase="'}'";  }  : '}';
LPAREN        options { paraphrase="'('";  }  : '(';
RPAREN        options { paraphrase="')'";  }  : ')';
LBRACKET      options { paraphrase="'['";  }  : '[';
RBRACKET      options { paraphrase="']'";  }  : ']';
EQUALS        options { paraphrase="'='";  }  : '=';
SEMI          options { paraphrase="';'";  }  : ';';
OR            options { paraphrase="'|'";  }  : '|';

WS          options { paraphrase="whitespace"; }
    :   ( ' ' | '\t' | '\f'
        | ( '\n' (options {greedy=true;} : '\r')?
          | '\r' (options {greedy=true;} : '\n')?
          ) { newline(); }
        )+
        ;

// This is taken directly from the OCL 1.4 specification.
// The bad thing is that it should be using Unicode (and decimal or hex) escapes!!
// difficult syntax to understand: "\12345" -> "\123" "45", "\67890" -> "\67" "890"
// Thus the reason for use of the greedy option.
STRING options { paraphrase="a string literal"; }
    :      '"' ( QUOTED_CHARACTER | '\'' )* '"'
    ;

// Note that QUOTED-CHARACTER doesn't allow single OR double quotes.
protected QUOTED_CHARACTER
                   : (~( '\'' | '"' | '\r'  | '\n' | '\\' ))
             | '\\' ( ( '\'' | '"' | 'n' |  'r'  | 't'  |  'b' |  'f' | '\\' )
                | OCTAL_DIGIT
                  (options {greedy=true;} : OCTAL_DIGIT)?
                  (options {greedy=true;} : OCTAL_DIGIT)?
                )
                   ;

protected OCTAL_DIGIT: '0'..'7'
        ;
-------------- next part --------------
package org.brianlsmith.bnf;

public class Grammar {
    final HashMap<String, Production> productions;
}

public class Production {
    final String name;
    final Exp    definition;
}

public abstract class Exp  { }
public abstract class AtomicExp extends Exp { }

public abstract class ExpContainer extends Exp {
    final Exp fst;
    final Exp snd;
}

public abstract class ExpWrapper extends Exp {
    final Exp exp;
}

public class StringExp extends AtomicExp {
    final String text;
}

public class ConcatExp extends ExpContainer { }
public class ChoiceExp extends ExpContainer { }

public class RefExp extends AtomicExp{
    final String ruleName;
}

public class OptionalExp extends ExpWrapper {
}

public class RepeatedExp extends ExpWrapper {
}

public class EpsilonExp extends AtomicExp { }

-------------- next part --------------
package org.brianlsmith.bnf;

String paren(Exp container, Exp e) =
    precedence(container) > precedence(e)
        ? "( " + prettyprint(e) + " )"
        : prettyprint(e)
        ;

String prettyprint(Exp e);

prettyprint(s at StringExp)  = s.text;
prettyprint(s at RefExp)     = s.ruleName;
prettyprint(s at EpsilonExp) = "\u1d75";
prettyprint(c at ChoiceExp)  = prettyInfix(c, " | ");
prettyprint(c at ConcatExp)  = prettyInfix(c, " ");
prettyprint(c at OptionalExp)= "[ " + prettyprint(exp(c)) + " ]";
prettyprint(c at RepeatedExp)= "{ " + prettyprint(exp(c)) + " }";

String prettyprintProduction(Production r);
prettyprintProduction(r) {
    String prefix = name(r) + " = ";
    return prefixedChoice(definition(r), prefix) + '\n'
        + spaces(prefix.length() - 2) + ";\n";
}

String toString(StringBuffer s) = native String StringBuffer.toString();

String prefixedChoice(Exp e, String prefix);
prefixedChoice(e, prefix) = prefix + prettyprint(e);
prefixedChoice(e at ChoiceExp, prefix) {
    return prefixedChoice(fst(e), prefix) + '\n'
         + prefixedChoice(snd(e), spaces(prefix.length() - 2) + "| ");
}

private String prettyInfix(ExpContainer c, String op) {
    return paren(c, fst(c)) + op + paren(c, snd(c));
}

String spaces(int n) {
    StringBuffer sb = new StringBuffer(n);
    for (int i = 0; i < n; ++i) sb.append(' ');
    return sb.toString();
}
-------------- next part --------------
package org.brianlsmith.bnf;

int precedence(Exp e);

precedence(s at AtomicExp)     = 100;
precedence(r at ExpWrapper)    = 100;
precedence(c at ConcatExp)     = 50;
precedence(c at ChoiceExp)     = 25;