[antlr-interest] Input parsing ending before end of file
Ymo
ymo.mail at gmail.com
Sun Mar 30 09:12:58 PDT 2008
Hi i am trying to parse an input file and the generated parser seems to just
quit without apparent errors :
input file :
package org.test;
import org.antlr.runtime.ANTLRFileStream;
import org.antlr.runtime.CharStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.RecognitionException;
import org.antlr.runtime.tree.CommonTreeNodeStream;
import org.antlr.runtime.tree.Tree;
{ // As soon as it sees the braces it just exits !
// if i put the braces inside the define block everything is fine.
// how do i even stop any kind of text entered between the import & the
define block ?
« define main for Object »
/*
*/
{
//debug test1
fgsadfgsdf
fgsadfgsdf {
fgsadfgsdf .; x000 x
« enddefine »
grammar:
templateFile
: packageDeclaration?
importDeclaration*
templateDeclaration*
;
packageDeclaration
: 'package' qualifiedName ';'
;
importDeclaration
: 'import' 'static'? Identifier ('.' Identifier)* ('.' '*')? ';'
;
templateDeclaration
: LG
textBlock*
( defineDeclaration textBlock )*
;
defineDeclaration
: 'define' Identifier 'for' type=qualifiedName
textBlock
'enddefine'
;
//textDeclaration
// : t1=textBlock { System.out.println("t1:"+t1.getTree());}
// ( t2=textBlock { System.out.println("t2:"+t2.getTree());})*
// ;
textBlock : RG (~LG)* LG?;
typeDeclaration
: collectionType | simpleType
;
simpleType
: type=qualifiedName
;
collectionType
: col=( 'Collection' | 'List' | 'Set' )
('[' simpleType ']' )?
;
qualifiedName
: Identifier ('.' Identifier)*
;
// LEXER
HexLiteral : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
DecimalLiteral : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
OctalLiteral : '0' ('0'..'7')+ IntegerTypeSuffix? ;
fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
fragment
IntegerTypeSuffix : ('l'|'L') ;
fragment
Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
fragment
FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
CharacterLiteral
: '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
;
StringLiteral
: '"' ( EscapeSequence | ~('\\'|'"') )* '"'
;
fragment
EscapeSequence
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| UnicodeEscape
| OctalEscape
;
fragment
OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UnicodeEscape
: '\\' 'u' HexDigit HexDigit HexDigit HexDigit
;
Identifier
: Letter (Letter|JavaIDDigit)*
;
/**I found this char range in JavaCC's grammar, but Letter and Digit
overlap.
Still works, but...
*/
fragment
Letter
: '\u0024' |
'\u0041'..'\u005a' |
'\u005f' |
'\u0061'..'\u007a' |
'\u00c0'..'\u00d6' |
'\u00d8'..'\u00f6' |
'\u00f8'..'\u00ff' |
'\u0100'..'\u1fff' |
'\u3040'..'\u318f' |
'\u3300'..'\u337f' |
'\u3400'..'\u3d2d' |
'\u4e00'..'\u9fff' |
'\uf900'..'\ufaff'
;
fragment
JavaIDDigit
: '\u0030'..'\u0039' |
'\u0660'..'\u0669' |
'\u06f0'..'\u06f9' |
'\u0966'..'\u096f' |
'\u09e6'..'\u09ef' |
'\u0a66'..'\u0a6f' |
'\u0ae6'..'\u0aef' |
'\u0b66'..'\u0b6f' |
'\u0be7'..'\u0bef' |
'\u0c66'..'\u0c6f' |
'\u0ce6'..'\u0cef' |
'\u0d66'..'\u0d6f' |
'\u0e50'..'\u0e59' |
'\u0ed0'..'\u0ed9' |
'\u1040'..'\u1049'
;
WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
;
ML_COMMENT
: '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
;
fragment
LINE_DEBUG : '//debug ' ~('\n'|'\r')* '\r'? '\n' { System.out.println("line
debug");};
LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
;
// a dummy rule to force vocabulary to be all characters (except special
// ones that ANTLR uses internally (0 to 2) and the guillemot characters
//fragment
TextBlockVocab : ('\3'..'\u00aa'|'\u00ac'..'\u00ba'|'\u00bc'..'\ufffe') ;
----------------------------
As you can see if i put braces { before the guillemot characters the parser
just exits whithout a single error. I even tried antlrworks and its the same
behavior. Ultimately what i want to do is not allow any text entered before
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.antlr.org/pipermail/antlr-interest/attachments/20080330/73a4b4bf/attachment.html
More information about the antlr-interest
mailing list