[antlr-interest] left recursion removal

John B. Brodie jbb at acm.org
Wed Jul 6 20:08:05 PDT 2011


see attached.

I, also, am available for hire, if you should opt for that...

   -jbb

On Thu, 2011-07-07 at 01:27 +0200, Sébastien Kirche wrote:
> Le 7 juillet 2011 00:14, John B. Brodie <jbb at acm.org> a écrit :
> > Greetings!
> >
> > [...]
> >
> > unable to reproduce.
> >
> > given your admittedly partial grammar, i tried to construct a complete
> > example by adding the missing elements and creating an AST (so i could
> > know the resultant parse).
> >
> > my test rig is attached.
> >
> > it runs without error when Tool'd, compiled, and executed from the
> > command-line (FWIW i use Ubunto 11.04 Linux running Sun Java 6 and the
> > Antlr version from the antlr-3.4-complete.jar file).
> >
> > Please try to post the *smallest* yet *complete* example of your
> > problem.
> 
> Sorry for not having posted a more complete code before, I though that
> should have been enough...
> 
> I have worked further on my grammar, with trying to define more
> precisely what an expression should be, with operator precedence. This
> made me rewrite the singleStatement by moving the funCall  rule to the
> primary rule. But my problem about the if-then-else construct is not
> gone.
> 
> I have stripped down my grammar, keeping only the expression and
> subsequent rules, removed all that define loops, switches, etc.
> Given that simplified grammar, the following script should pass the
> parsing, but it can't yet :
> 
> ---------------------------------------
> string s1, s2
> 
> if s1='42' then s2='421'
> 
> if s2='421' then
> 	string s3
> 	s3='123'
> else
> 	string s4
> 	s4='666'
> end if
> 
> ---------------------------------------
> 
> Also, to Jim Idle : the language I would be able to parse is
> Powerbuilder. You can see it as a sort of basic, while it accepts
> syntax like i++ or s += 'foo' He has also the ability to accept inline
> sql statements... I tried to play on the EndOfLine delimiter to handle
> the two if-then-else syntaxes but without success.
> 


-------------- next part --------------
grammar pbifthenelse;

options {
   output = AST;
   ASTLabelType = CommonTree;
}

@members {
   private static final String [] x = new String[] {
      "string s1, s2\n"+
      "\n"+
      "if s1='42' then s2='421'\n"+
      "\n"+
      "if s2='421' then\n"+
      "        string s3\n"+
      "        s3='123'\n"+
      "else\n"+
      "        string s4\n"+
      "        s4='666'\n"+
      "end if\n",
      "string s1 = '42', s2='999', s3='101'\n"+
      "\n"+
      "if s1='42' then s2='421'\n"+
      "\n"+
      "if s2='421' then\n"+
      "        s3='123'\n"+
      "else\n"+
      "        s3='666'\n"+
      "end if\n"
  };

   public static void main(String [] args) {
      for( int i = 0; i < x.length; ++i ) {
         try {
            System.out.println("about to parse:`"+x[i]+"`");

            pbifthenelseLexer lexer =
               new pbifthenelseLexer(new ANTLRStringStream(x[i]));
            CommonTokenStream tokens = new CommonTokenStream(lexer);
            pbifthenelseParser parser = new pbifthenelseParser(tokens);
            pbifthenelseParser.pgm_return p_result = parser.pgm();

            // System.out.format("the token stream:\%n");
            // for( int j = 0; j < tokens.size(); ++j ) {
            //    Token token = tokens.get(j);
            //    System.out.format("\%d: type = \%s, text = `\%s`\%n",
            //                      j,
            //                      tokenNames[token.getType()],
            //                      token.getText());
            // }

            CommonTree ast = p_result.tree;
            if( ast == null ) {
               System.out.println("resultant tree: is NULL");
            } else {
               System.out.println("resultant tree: " + ast.toStringTree());
            }
            System.out.println();
         } catch(Exception e) {
            e.printStackTrace();
         }
      }
   }
}

pgm : codeBlock EOF!;

codeBlock : ( compoundStatement )* ;

compoundStatement
   : (  /* empty */
      | localVariableDeclaration
      | ifStatement
      | singleStatement
      ) ( ';' | EOL )
   ;

singleStatement : assignment ;

assignment : IDENTIFIER OPEQ expression ;

ifStatement
   : 'if' expression 'then'
      ((singleStatement ('else' singleStatement)? EOL)=>
         singleStatement ('else' singleStatement)?
      | codeBlock ('else' codeBlock)? 'end if'
      )
   ;
// predicate is needed to distinguish between the singleStatment for a
// single line IF and the singleStatement within a compoundStatement
// comprising a codeBlock that contains just one statement within a
// multi-line IF.

localVariableDeclaration
   : dataType variableDeclaration (',' variableDeclaration)*
   ;

variableDeclaration
   : IDENTIFIER ( OPEQ literal)?
   ;

dataType
   : 'int' | 'integer'
   | 'long'
   | 'string'
   ;


expression : logicalORExpr ;

logicalORExpr : logicalANDExpr ('or' logicalANDExpr)* ;

logicalANDExpr : logicalNOTExpr ('and' logicalNOTExpr)* ;

logicalNOTExpr : 'not'? relationalExpr ;

relationalExpr : binaryPlusMinus (logicalOP binaryPlusMinus)* ;

binaryPlusMinus : unaryExpr (('+' | '-') binaryMulDiv)* ;

unaryExpr : ('+' | '-')? binaryMulDiv ;

binaryMulDiv : exponentExpr (('*' | '/') exponentExpr)* ;

exponentExpr : primary ( '^' primary )* ;

primary
   : ( '(' expression ')' )
   | literal
   | ( IDENTIFIER postFixExpr? )
   ;

postFixExpr
   : '!'
   | '++'
   | '--'
   | '(' expressionList? ')' // funCall
      // placing funCall here permits expressions like: f(x) + g(y)
   ;

expressionList : expression (',' expression)* ;

logicalOP
   : OPEQ
   | OPNEQ
   | OPINF
   | OPINFEQ
   | OPSUP
   | OPSUPEQ
   ;
 
literal
   : STRING_LITERAL
   | INTEGER_LITERAL
   ;

WS : (' '|'\t'|'\u000C') {$channel=HIDDEN;} ;

EOL
   : '\r' '\n'   // DOS
   | '\r'        // Mac
   | '\n'        // Unix
   ;

STRING_LITERAL
   : '\'' ( options{ greedy=false; }: ( ~('\\'|'\'') | ('\\' '\'') ) )* '\''
      {
         System.out.println("string>" + getText());
      }
   ;
    
  
INTEGER_LITERAL : '0' | ( '1'..'9' ( '0'..'9' )* ) ;

IDENTIFIER
   :  LETTER (LETTER|'0'..'9'|'$'|'#'|'%'|'-')*
      {
         System.out.println("identifier>" + getText());
      }
   ;

fragment LETTER : ('A'..'Z'|'a'..'z'|'_') ;

OPEQ    : '=' ;
OPNEQ   : '<>' ;
OPINF   : '<' ;
OPINFEQ : '<=' ;
OPSUP   : '>' ;
OPSUPEQ : '>=' ;


More information about the antlr-interest mailing list