[antlr-interest] skipping EOF.

Thu Nov 7 14:32:43 PST 2002

Hi,

I'm running into an infinite loop because my grammar doesn't detect 
the EOF. Everything else in the grammar works well. The generated 
nextToken() method looks like this:

---code snippet begin---
			try {   // for lexical error handling
				if ((LA(1)=='<') && (LA(2)=='@')) {
					mTAG(true);
					theRetToken=_returnToken;
				}
				else {
					mTOP_TEXT(true);
					theRetToken=_returnToken;
				}

				if ( _returnToken==null ) continue 
tryAgain; // found SKIP token
				_ttype = _returnToken.getType();
				_returnToken.setType(_ttype);
				return _returnToken;
			}
			catch (RecognitionException e) {
				throw new 
TokenStreamRecognitionException(e);
			}
---end code snippet---

I think I need a semantic pred. to prevent getting into TOP_TEXT rule 
and somehow return on seeing EOF.

Please advice. Grammar starts below.

---grammar begins-----------
header { package tom; }

class SimpleParser extends Parser;

template: (TOP_TEXT | TAG | INCLUDE | LOOP | END_LOOP)+ EOF;

class SimpleLexer extends Lexer;
options {
    k=2;
    testLiterals=false;
    charVocabulary = '\3'..'\377' | '\u1000'..'\u1fff';
}

{
    public boolean isEndOfText(boolean stopOnAngleBracket) throws 
antlr.CharStreamException {
        char char1 = LA(1);
        if (char1 == '<') {
            char char2 = LA(2);
            if (char2 == '@') { // "<@"
                return true;
            } else if (char2 == '/') { // "</"
                //ignore any white space
                int i = forwardToNonWhiteSpace(3);

                if (
                    (LA(i++) == 'l') &&
                    (LA(i++) == 'o') &&
                    (LA(i++) == 'o') &&
                    (LA(i++) == 'p')
                   ) {

                   //match whitespace.
                   i = forwardToNonWhiteSpace(i);

                   if (LA(i++) == '>') { // "</loop>"
                        return true;
                   } else {
                        return false;
                   }
               } else {
                    return false;
               }
            } else { // it began with '<' but doesn't have anything 
meaningful after that.
                return false;
            }
        } else if (char1 == EOF_CHAR) { //End-of-file reached, can't 
proceed further.
            return true;
        } else if (stopOnAngleBracket && (LA(1)=='>')) {
            return true;
        } else { //doesn't begin with '<', so it's a normal char, not 
a special one.
            return false;
        }
    }

    public int forwardToNonWhiteSpace(int i) throws 
antlr.CharStreamException {
        char c = LA(i);
        while ( (c==' ') || (c == '\t') || (c == '\n') ) {
            i++;
            c = LA(i);
        }
        return i;
    }

    public boolean isEndOfTagText() throws antlr.CharStreamException 
{ //end of <@asdf> or 
        return isEndOfText(true) || (LA(1) == '>');
    }
}

protected
WS : ( ' ' | '\t' | '\n' )* ;

TAG:     (options 
            {
                generateAmbigWarnings=false;
            }:
                "<@" 
                    (
                        (
                            ("include") => INCLUDE { $setType
(INCLUDE); } |
                            ("loop")    => LOOP { $setType(LOOP); } 
                        ) |
                        (
                            //since this a greedy loop, stop on 
seeing the first '>', hence the sem. pred. below.
                            { LA(1) != '>' }? TEXT[ true ]  | TAG 
                        )+
                    )
                '>'
        );

protected
INCLUDE: "include" ( { !isEndOfTagText() }?  TEXT[true] | TAG )*;

protected
LOOP: "loop" ( 
                { !isEndOfTagText() }?  TEXT[true] | 
                TAG 
             )*;

protected
END_LOOP: "</" LOOP '>';

TOP_TEXT: TEXT[ false ];

protected 
TEXT[ boolean stopOnAngleBracket ]:   (END_LOOP) => END_LOOP { 
$setType(END_LOOP); } |
        (
        options 
            {
                generateAmbigWarnings=false;
            }:
            ( { !isEndOfText(stopOnAngleBracket) }? . { if (LA(1)
=='\n') newline(); } )* 
        );

Thanks in advance.

-Praveen

Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/