[antlr-interest] Refactor grammar without semantic predicates

Mon Oct 24 13:25:51 PDT 2011

Hi,
I'm working on a grammar and I can't figure out how to refactor it without using semantic predicates. Essentially, what I'm trying to do with RAW_TEXT is a catch all if none of the tags match, the only way I could make it work was by using Semantic Predicates, does anyone have ideas? Thanks in advance. The problem is that I need { tokens to be parsed as RAW_TEXT if they do not match the other tag types.

Brian
PS. I apologize for hitting the antlr-dev mailing list by accident with this question

grammar Dust;

options {
  language=Java;
  output=AST;
  ASTLabelType=CommonTree;
}

/* LEXER RULES */
tokens {
        TAG;
        SPECIAL_TAG;
        LOOP_SECTION;
        EXISTS_SECTION;
        NOT_EXISTS_SECTION;
        CONTEXT_HELPER_SECTION;
        INLINE_PARTIAL_SECTION;
        PRAGMA_SECTION;
        CUSTOM_SECTION;
        BLOCK_SECTION;
        END_SECTION;
        PARTIAL;
        BUFFER;
        COMMENT;
        QUOTED_PARAM;
        IDENT_PARAM;
        CONTEXT;
        SEC_START_TAG;
        SEC_END_TAG;
        BODY;
        IDENT;
        FILTER;
        ELSE_BODY;
        CUSTOM_BLK;
        SELF_CLOSE;
}

@lexer::members{
  boolean inTag = false;

}

/* LEXER RULES */
COMMENT_T : '{!' .* '!}';
ID : { inTag }? => (LETTER | '_' | '$') (LETTER | '_' | DIGIT | '$')*;
IDENTIFIER :     (('.'? ID ('.' ID)*));
RAW_TEXT : { !inTag }? =>  ~('{'|'}')+;
fragment DIGIT  : '0'..'9';
fragment LETTER : ('a'..'z' | 'A'..'Z');
LD  @init { inTag = true; }     : '{' ;
RD  @init { inTag = false; }    :       '}' ;
QUOTED_STR : { inTag }? => '"' ~('\n' | '\r' | '"')* '"';

/* DUST PARSER RULES */
start : body? EOF
            ;

body    : (  tag
         | special_tag
         | section
         | partial
         | comment
         | buffer )+
            ;

buffer
      : RAW_TEXT -> ^(BUFFER RAW_TEXT)
      ;

ident   :       IDENTIFIER -> ^(IDENT IDENTIFIER)
      | ID -> ^(IDENT ID)
      | '.' -> ^(IDENT '.')
                        ;

special_tag
      : LD '~' ID RD -> ^(SPECIAL_TAG ID)
            ;

filter
      : '|' ID -> ^(FILTER ID)
      ;

tag
      : LD ident filter* RD -> ^(TAG ident filter*)
      ;

parameter
      : ' ' i1=ident '=' ( QUOTED_STR -> ^(QUOTED_PARAM $i1 QUOTED_STR)                         | i2=ident -> ^(IDENT_PARAM $i1 $i2) )
                        ;

context
      : ':' ident -> ^(CONTEXT ident)
      ;

partial
      : LD '>' ident context? '/' RD -> ^(PARTIAL ident context?)
      ;

section_start_tag
            : ident context? parameter* -> ^(SEC_START_TAG ident context? parameter*)
            ;

section_end_tag
            : LD '/' ident RD -> ^(SEC_END_TAG ident)
            ;

else_section
            : ( LD ':else' RD body? ) -> ^(ELSE_BODY body?)
            ;

loop_section
            : LD '#' section_start_tag RD
              body?
              else_section?
              section_end_tag -> ^(LOOP_SECTION section_start_tag ^(BODY body?) else_section? section_end_tag)
            | LD '#' section_start_tag '/' RD -> ^(LOOP_SECTION section_start_tag SELF_CLOSE)
            ;

exists_section
            : LD '?' section_start_tag RD
              body?
              else_section?
              section_end_tag -> ^(EXISTS_SECTION section_start_tag ^(BODY body?) else_section? section_end_tag)
            | LD '?' section_start_tag '/' RD -> ^(EXISTS_SECTION section_start_tag SELF_CLOSE)
            ;

not_exists_section
            : LD '^' section_start_tag RD
              body?
              else_section?
              section_end_tag -> ^(NOT_EXISTS_SECTION section_start_tag ^(BODY body?) else_section? section_end_tag)
            | LD '^' section_start_tag '/' RD -> ^(NOT_EXISTS_SECTION section_start_tag SELF_CLOSE)
            ;

context_helper_section
            : LD '@' section_start_tag RD
              body?
              section_end_tag -> ^(CONTEXT_HELPER_SECTION section_start_tag ^(BODY body?) section_end_tag)
            | LD '@' section_start_tag '/' RD -> ^(CONTEXT_HELPER_SECTION section_start_tag SELF_CLOSE)
            ;

pragma_section
            : LD '%' section_start_tag RD
              body?
              section_end_tag -> ^(PRAGMA_SECTION section_start_tag ^(BODY body?) section_end_tag)
            | LD '%' section_start_tag '/' RD -> ^(PRAGMA_SECTION section_start_tag SELF_CLOSE)
            ;

block_section
            : LD '+' section_start_tag RD
              body?
              section_end_tag -> ^(BLOCK_SECTION section_start_tag ^(BODY body?) section_end_tag)
            | LD '+' section_start_tag '/' RD -> ^(BLOCK_SECTION section_start_tag SELF_CLOSE)
            ;

inline_partial_section
            : LD '<' section_start_tag RD
              body?
              section_end_tag -> ^(INLINE_PARTIAL_SECTION section_start_tag ^(BODY body?) section_end_tag)
            | LD '<' section_start_tag '/' RD -> ^(INLINE_PARTIAL_SECTION section_start_tag SELF_CLOSE)
            ;

custom_section
                : LD '*' section_start_tag '/' RD -> ^(CUSTOM_SECTION section_start_tag SELF_CLOSE)
                ;
section
      : loop_section
      | exists_section
      | not_exists_section
      | context_helper_section
      | pragma_section
      | block_section
      | inline_partial_section
      | custom_section
      ;

comment
      :  COMMENT_T -> ^(COMMENT COMMENT_T)
      ;