[antlr-interest] Left recursion removal

James Robson james.robson at ymail.com
Mon Jun 15 01:39:43 PDT 2009


Hi,
Below is a fragment from a grammar I've written. I've had to modifiy is so that an expression may or may not be enclosed in parenthesis

However, which ever way I try to remove the recursion I either end up by reducing teh expressivness or by re-introducing the recursion.

Can anyone help point me in the right direction?


grammar SLRR;


expression
  : LEFT_PARENTHESIS (predicate|expression) (SPACE_CHAR logicalOperator SPACE_CHAR (predicate | expression))* RIGHT_PARENTHESIS
  | (predicate|expression) (SPACE_CHAR logicalOperator SPACE_CHAR (predicate | expression))*
  ;
 


///////////////////////////////////////////////////////////////////////
predicate
    :    predicateField SPACE_CHAR searchOperator SPACE_CHAR predicateField
     ; 



 
predicateField
  : TEXT
  ;
  
  
fieldName
  : TEXT

  ;

searchOperator 
  : TEXT
  | TEXTNOSPACES

  //| QUOTATION_MARK urlUnreserved+ (SPACE_CHAR urlUnreserved+)+ QUOTATION_MARK 
  ;

fieldValue 
  : TEXT
  //| quotedQueryParamValue
  ;
  
logicalOperator
       :    LOWER_CASE_LETTER_A
           LOWER_CASE_LETTER_N
           LOWER_CASE_LETTER_D
       |    LOWER_CASE_LETTER_O
           LOWER_CASE_LETTER_R
       ;
  
LOWER_CASE_LETTER_A : 'a' ;
LOWER_CASE_LETTER_B : 'b' ;
LOWER_CASE_LETTER_C : 'c' ;
LOWER_CASE_LETTER_D : 'd' ;
LOWER_CASE_LETTER_E : 'e' ;
LOWER_CASE_LETTER_F : 'f' ;
LOWER_CASE_LETTER_G : 'g' ;
LOWER_CASE_LETTER_H : 'h' ;
LOWER_CASE_LETTER_I : 'i' ;
LOWER_CASE_LETTER_J : 'j' ;
LOWER_CASE_LETTER_K : 'k' ;
LOWER_CASE_LETTER_L : 'l' ;
LOWER_CASE_LETTER_M : 'm' ;
LOWER_CASE_LETTER_N : 'n' ;
LOWER_CASE_LETTER_O : 'o' ;
LOWER_CASE_LETTER_P : 'p' ;
LOWER_CASE_LETTER_Q : 'q' ;
LOWER_CASE_LETTER_R : 'r' ;
LOWER_CASE_LETTER_S : 's' ;
LOWER_CASE_LETTER_T : 't' ;
LOWER_CASE_LETTER_U : 'u' ;
LOWER_CASE_LETTER_V : 'v' ;
LOWER_CASE_LETTER_W : 'w' ;
LOWER_CASE_LETTER_X : 'x' ;
LOWER_CASE_LETTER_Y : 'y' ;
LOWER_CASE_LETTER_Z : 'z' ;
 
CAPITAL_LETTER_A : 'A' ;
CAPITAL_LETTER_B : 'B' ;
CAPITAL_LETTER_C : 'C' ;
CAPITAL_LETTER_D : 'D' ;
CAPITAL_LETTER_E : 'E' ;
CAPITAL_LETTER_F : 'F' ;
CAPITAL_LETTER_G : 'G' ;
CAPITAL_LETTER_H : 'H' ;
CAPITAL_LETTER_I : 'I' ;
CAPITAL_LETTER_J : 'J' ;
CAPITAL_LETTER_K : 'K' ;
CAPITAL_LETTER_L : 'L' ;
CAPITAL_LETTER_M : 'M' ;
CAPITAL_LETTER_N : 'N' ;
CAPITAL_LETTER_O : 'O' ;
CAPITAL_LETTER_P : 'P' ;
CAPITAL_LETTER_Q : 'Q' ;
CAPITAL_LETTER_R : 'R' ;
CAPITAL_LETTER_S : 'S' ;
CAPITAL_LETTER_T : 'T' ;
CAPITAL_LETTER_U : 'U' ;
CAPITAL_LETTER_V : 'V' ;
CAPITAL_LETTER_W : 'W' ;
CAPITAL_LETTER_X : 'X' ;
CAPITAL_LETTER_Y : 'Y' ;
CAPITAL_LETTER_Z : 'Z' ;
 
DIGIT_0 : '0' ;
DIGIT_1 : '1' ;
DIGIT_2 : '2' ;
DIGIT_3 : '3' ;
DIGIT_4 : '4' ;
DIGIT_5 : '5' ;
DIGIT_6 : '6' ;
DIGIT_7 : '7' ;
DIGIT_8 : '8' ;
DIGIT_9 : '9' ;

TEXT        
@init{StringBuilder lBuf = new StringBuilder();}
    :   
           QUOTATION_MARK
           ( escaped=ESC {lBuf.append(getText());} | 
             normal=~(QUOTATION_MARK|'\\'|'\n'|'\r')     {lBuf.appendCodePoint(normal);} )* 
           QUOTATION_MARK    
           {setText("\""+lBuf.toString()+"\"");}
    ;

fragment
ESC
    :   '\\'
        (       'n'    {setText("\n");}
        |       'r'    {setText("\r");}
        |       't'    {setText("\t");}
        |       'b'    {setText("\b");}
        |       'f'    {setText("\f");}
        |       '"'    {setText("\"");}
        |       '\''   {setText("\'");}
        |       '\\'   {setText("\\");}
        |       ('u')+ i=HEX_DIGIT j=HEX_DIGIT k=HEX_DIGIT l=HEX_DIGIT   {setText(this.hextToString(i.getText(),j.getText(),k.getText(),l.getText()));}

        )
    ;
    
fragment
HEX_DIGIT:
  DIGIT_0|DIGIT_1|DIGIT_2|DIGIT_3|DIGIT_4|DIGIT_5|DIGIT_6|DIGIT_7|DIGIT_8|DIGIT_9
  |LOWER_CASE_LETTER_A|LOWER_CASE_LETTER_B|LOWER_CASE_LETTER_C|LOWER_CASE_LETTER_D
  |LOWER_CASE_LETTER_E|LOWER_CASE_LETTER_F|CAPITAL_LETTER_A|CAPITAL_LETTER_B
  |CAPITAL_LETTER_C|CAPITAL_LETTER_D|CAPITAL_LETTER_E|CAPITAL_LETTER_F
  ;
  
fragment
TEXTNOSPACES
  : (HEX_DIGIT|'g'..'z'|'G'..'Z'|'-'|':')+
  ;
  

AMPERSAND : '&' ; // &
APOSTROPHE : '\'' ;
ASTERISK : '*' ;
CIRCUMFLEX_ACCENT : '^' ;
COLON : ':' ;
COMMA : ',' ;
PERCENT_SIGN : '%';
DOLLAR_SIGN : '$';
HYPHEN_MINUS :  '-';
LOW_LINE : '_';
FULL_STOP : '.';
PLUS_SIGN : '+';
EXCLAMATION_MARK : '!';
LEFT_PARENTHESIS : '(';
RIGHT_PARENTHESIS : ')';
SEMICOLON : ';';
SLASH : '/';
QUESTION_MARK : '?';
EQUALS_SIGN :'=';
COMMERCIAL_AT : '@';
HASH_SIGN : '#';
TILDE : '~';
QUOTATION_MARK : '"';
SPACE_CHAR : ' ';


      Need a Holiday? Win a $10,000 Holiday of your choice. Enter now.http://us.lrd.yahoo.com/_ylc=X3oDMTJxN2x2ZmNpBF9zAzIwMjM2MTY2MTMEdG1fZG1lY2gDVGV4dCBMaW5rBHRtX2xuawNVMTEwMzk3NwR0bV9uZXQDWWFob28hBHRtX3BvcwN0YWdsaW5lBHRtX3BwdHkDYXVueg--/SIG=14600t3ni/**http%3A//au.rd.yahoo.com/mail/tagline/creativeholidays/*http%3A//au.docs.yahoo.com/homepageset/%3Fp1=other%26p2=au%26p3=mailtagline
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.antlr.org/pipermail/antlr-interest/attachments/20090615/dabb0211/attachment.html 


More information about the antlr-interest mailing list