[antlr-interest] Input parsing ending before end of file

Ymo ymo.mail at gmail.com
Sun Mar 30 09:12:58 PDT 2008


Hi i am trying to parse an input file and the generated parser seems to just
quit without apparent errors :

input file :
package org.test;

import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;

{ // As soon as it sees the braces it just exits  !

// if i put the braces inside the define block everything is fine.
// how do i even stop any kind of text entered between the import & the
define block ?

« define main for Object »

/*
*/

{

//debug test1

fgsadfgsdf

fgsadfgsdf {

fgsadfgsdf    .; x000 x


« enddefine »

grammar:
templateFile
   :  packageDeclaration?
      importDeclaration*
      templateDeclaration*
   ;

packageDeclaration
   :  'package' qualifiedName ';'
   ;

importDeclaration
   :  'import' 'static'? Identifier ('.' Identifier)* ('.' '*')? ';'
   ;

templateDeclaration
   : LG
      textBlock*
     ( defineDeclaration textBlock )*
   ;

defineDeclaration
   :  'define' Identifier 'for' type=qualifiedName
      textBlock
      'enddefine'
   ;

//textDeclaration
//   :  t1=textBlock { System.out.println("t1:"+t1.getTree());}
//      ( t2=textBlock { System.out.println("t2:"+t2.getTree());})*
//   ;

textBlock : RG (~LG)* LG?;

typeDeclaration
   : collectionType | simpleType
   ;

simpleType
   : type=qualifiedName
   ;

collectionType
   : col=( 'Collection' | 'List' | 'Set' )
     ('[' simpleType ']' )?
   ;

qualifiedName
   :  Identifier ('.' Identifier)*
   ;

// LEXER

HexLiteral : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;

DecimalLiteral : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;

OctalLiteral : '0' ('0'..'7')+ IntegerTypeSuffix? ;

fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;

fragment
IntegerTypeSuffix : ('l'|'L') ;

fragment
Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;

fragment
FloatTypeSuffix : ('f'|'F'|'d'|'D') ;

CharacterLiteral
    :   '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
    ;

StringLiteral
    :  '"' ( EscapeSequence | ~('\\'|'"') )* '"'
    ;

fragment
EscapeSequence
    :   '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
    |   UnicodeEscape
    |   OctalEscape
    ;

fragment
OctalEscape
    :   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7') ('0'..'7')
    |   '\\' ('0'..'7')
    ;

fragment
UnicodeEscape
    :   '\\' 'u' HexDigit HexDigit HexDigit HexDigit
    ;

Identifier
    :   Letter (Letter|JavaIDDigit)*
    ;

/**I found this char range in JavaCC's grammar, but Letter and Digit
overlap.
   Still works, but...
 */
fragment
Letter
    :  '\u0024' |
       '\u0041'..'\u005a' |
       '\u005f' |
       '\u0061'..'\u007a' |
       '\u00c0'..'\u00d6' |
       '\u00d8'..'\u00f6' |
       '\u00f8'..'\u00ff' |
       '\u0100'..'\u1fff' |
       '\u3040'..'\u318f' |
       '\u3300'..'\u337f' |
       '\u3400'..'\u3d2d' |
       '\u4e00'..'\u9fff' |
       '\uf900'..'\ufaff'
    ;

fragment
JavaIDDigit
    :  '\u0030'..'\u0039' |
       '\u0660'..'\u0669' |
       '\u06f0'..'\u06f9' |
       '\u0966'..'\u096f' |
       '\u09e6'..'\u09ef' |
       '\u0a66'..'\u0a6f' |
       '\u0ae6'..'\u0aef' |
       '\u0b66'..'\u0b6f' |
       '\u0be7'..'\u0bef' |
       '\u0c66'..'\u0c6f' |
       '\u0ce6'..'\u0cef' |
       '\u0d66'..'\u0d6f' |
       '\u0e50'..'\u0e59' |
       '\u0ed0'..'\u0ed9' |
       '\u1040'..'\u1049'
   ;

WS  :  (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
    ;

ML_COMMENT
    :   '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
    ;

fragment
LINE_DEBUG : '//debug ' ~('\n'|'\r')* '\r'? '\n' { System.out.println("line
debug");};

LINE_COMMENT
    : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
    ;

// a dummy rule to force vocabulary to be all characters (except special
// ones that ANTLR uses internally (0 to 2) and the guillemot characters
//fragment
TextBlockVocab : ('\3'..'\u00aa'|'\u00ac'..'\u00ba'|'\u00bc'..'\ufffe') ;

----------------------------

As you can see if i put braces { before the guillemot characters the parser
just exits whithout a single error. I even tried antlrworks and its the same
behavior. Ultimately what i want to do is not allow any text entered before
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.antlr.org/pipermail/antlr-interest/attachments/20080330/73a4b4bf/attachment.html 


More information about the antlr-interest mailing list