[antlr-interest] duplicate token type when collapsing subrule into set, etc.

David Holroyd dave at badgers-in-foil.co.uk
Sat Mar 24 10:52:42 PDT 2007


I just grabbed antlr-03-22-2007.18.tar.gz to see if the
static-lexer-init change gives a speedup when I create large lexer rules
(for unicode identifiers).

Running against my existing grammar, which was processed without
warnings by an ANTLR from source control around the start of Jan, I now
get these errors (among many others):

  error(204): org/asdt/core/internal/antlr/AS3.g3:1526:6: duplicate token type '\\' when collapsing subrule into set
  error(204): org/asdt/core/internal/antlr/AS3.g3:1527:6: duplicate token type '\\' when collapsing subrule into set

The rule in question is,

  fragment OCTAL_ESC
	:   '\\' ('0'..'3') ('0'..'7') ('0'..'7')
	|   '\\' ('0'..'7') ('0'..'7')
	|   '\\' ('0'..'7')
	;

If I left factor the above into this form,

  fragment OCTAL_ESC
	:	'\\'
	(	('0'..'3') ('0'..'7') ('0'..'7')
	|	('0'..'7') ('0'..'7')
	|	('0'..'7')
	)
	;

when I try again, ANTLR just outputs this:

  ANTLR Parser Generator  Version 3.0b7 (??, 2007)  1989-2007
  org/asdt/core/internal/antlr/AS3.g3
  warning(205): org/asdt/core/internal/antlr/AS3.g3:1:10: ANTLR could not analyse this decision in rule Tokens; often this is because of recursive rule references visible from the left edge of alternatives.  ANTLR will re-analyze the decision with a fixed lookahead of k=1.  Consider using "options {k=1;}" for that decision and possibly adding a syntactic predicate.
  warning(205): org/asdt/core/internal/antlr/AS3.g3:1:10: ANTLR could not analyze this decision in rule Tokens; often this is because of recursive rule references visible from the left edge of alternatives.  ANTLR will re-analyze the decision with a fixed lookahead of k=1.  Consider using "options {k=1;}" for that decision and possibly adding a syntactic predicate.
  error(10):  internal error: org.antlr.tool.Grammar.createLookaheadDFA(Grammar.java:857): could not even do k=1 for decision 25 

Note the warnings are reported against line 1.


I attach AS3.g3, including the modification to OCTAL_ESC mentioned.


What could I look at to fix these issues?

ta,
dave

-- 
http://david.holroyd.me.uk/
-------------- next part --------------

	
/**********************************************************
 * ActionScript Development Tool 
 * Copyright (C) 2005 asdt.org 
 * 
 * http://www.asdt.org
 * http://sourceforge.net/projects/aseclipseplugin/
 *
 * This program is free software; 
 * you can redistribute it and/or modify it under the terms of 
 * the GNU General Public License as published by the 
 * Free Software Foundation; either version 2 of the License, 
 * or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, 
 * but WITHOUT ANY WARRANTY; without even the implied warranty of 
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 * See the GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License 
 * along with this program; if not, write to the 
 * Free Software Foundation, Inc., 
 * 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. 
 * 
 * Created on 29 sept 2005
 * 
 **********************************************************


**
 *	@author Martin Schnabel
 */
grammar AS3;

options {
	k = 2;
	output=AST;
	ASTLabelType=LinkedListTree;
}

tokens {
	COMPILATION_UNIT;
	TYPE_BLOCK; METHOD_DEF; VAR_DEF;
	ANNOTATIONS; ANNOTATION; ANNOTATION_PARAMS; ANNOTATION_PARAM;
	MODIFIERS; NAMESPACE_DEF;
	CLASS_DEF; INTERFACE_DEF;
	PARAMS;
	PARAM; TYPE_SPEC;
	BLOCK; EXPR; ELIST;
	EXPR_STMNT;
	ENCPS_EXPR;
	VAR_INIT;
	METHOD_CALL; PROPERTY_OR_IDENTIFIER; PROPERTY_ACCESS; TYPE_NAME;
	ARRAY_ACC;
	UNARY_PLUS; UNARY_MINUS; POST_INC; POST_DEC;
	ARRAY_LITERAL;
	ELEMENT; OBJECT_LITERAL;
	OBJECT_FIELD; FUNC_DEF;
	FOR_INIT; FOR_CONDITION; FOR_ITERATOR;
	FOR_EACH; FOR_IN;
	SWITCH_STATEMENT_LIST;
	IDENTIFIER;
	DEFAULT_XML_NAMESPACE;
	
	VIRTUAL_PLACEHOLDER;
}

scope InOperator {
	boolean allowed;
}

@parser::header {
package org.asdt.core.internal.antlr;

import uk.co.badgersinfoil.metaas.impl.antlr.LinkedListTree;
import uk.co.badgersinfoil.metaas.impl.antlr.LinkedListTreeAdaptor;
import uk.co.badgersinfoil.metaas.impl.antlr.LinkedListToken;
import uk.co.badgersinfoil.metaas.impl.antlr.LinkedListTokenStream;
import uk.co.badgersinfoil.metaas.impl.antlr.LinkedListTokenSource;
import java.io.StringReader;
import java.io.Reader;
import java.io.IOException;
import uk.co.badgersinfoil.metaas.impl.parser.E4XHelper;
import uk.co.badgersinfoil.metaas.impl.parser.RegexSimpleHelper;
import uk.co.badgersinfoil.metaas.impl.TokenBuilder;
import uk.co.badgersinfoil.metaas.impl.ASTUtils;
}
@lexer::header {
package org.asdt.core.internal.antlr;
}

// disable standard error handling; be strict
@rulecatch { }

@parser::members {

	public static final int CHANNEL_PLACEHOLDER = 999;

	protected void mismatch(IntStream input, int ttype, BitSet follow)
		throws RecognitionException
	{
		throw new MismatchedTokenException(ttype, input);
	}
	
	private AS3Lexer lexer;
	private CharStream cs;

	public void setInput(AS3Lexer lexer, CharStream cs) {
		this.lexer = lexer;
		this.cs = cs;
	}

	/**
	 * Handle 'island grammar' for embeded XML-literal elements.
	 */
	private LinkedListTree parseXMLLiteral(LinkedListToken startMarker) throws RecognitionException {
		return E4XHelper.parseXMLLiteral(startMarker, lexer, cs, (LinkedListTokenStream)input);
	}

	/**
	 * Handle 'island grammar' for skipping over embeded
	 * regular-expression-literal values.
	 */
	private LinkedListTree parseRegexpLiteral(LinkedListToken startMarker) throws RecognitionException {
		return RegexSimpleHelper.parseRegexpLiteral(startMarker, lexer, cs, (LinkedListTokenStream)input);
	}

	private boolean virtualSemi() {
		LinkedListToken tok = ((LinkedListToken)input.LT(1)).getPrev();
		while (tok.getType() == WS) {
			tok = tok.getPrev();
		}
		return tok.getType() == NL;
	}
	
	/**
	 * Adds a placeholder token into the stream, corresponding to the
	 * location of the given AST node, if the given AST node has no
	 * children.  Only use this function with 'imaginary' nodes, not with
	 * nodes that already correspond to a concrete Token.
	 */
	private void placeholder(LinkedListTree imaginary) {
		if (imaginary.getChildCount() > 0) {
			// one of this node's children will presumably have
			// some tokens of its own, so there's no need for us
			// to synthesize one,
			return;
		}

		// Since this this AST node is 'imaginary', it does not directly
		// correspond to any Token in the input stream.  Also, since
		// it has no children, there is nothing anchoring it to a
		// position in the token-stream, which will make it difficult to
		// know where new tokens should be inserted if the calling app
		// adds a child to this node.  We therefore insert a virtual
		// token at this point in the stream just so that the AST node
		// can remember its own location.

		LinkedListToken tok = (LinkedListToken)input.LT(1);
		LinkedListToken placeholder = TokenBuilder.newPlaceholder();
		tok.beforeInsert(placeholder);
		imaginary.setStartToken(placeholder);
		imaginary.setStopToken(placeholder);
	}
}


/**
 * this is the start rule for this parser
 */
compilationUnit
	:	(	as2CompilationUnit
		|	as3CompilationUnit
		) -> ^(COMPILATION_UNIT as2CompilationUnit? as3CompilationUnit?)
	;

as2CompilationUnit
	:	importDefinition*
		as2Type
	;

as2Type
	:	annos=annotations! mods=modifiers!
	(	as2ClassDefinition[$annos.tree,$mods.tree]
	|	as2InterfaceDefinition[$annos.tree,$mods.tree]
	)
	;

as3CompilationUnit
	:	packageDecl
		packageBlockEntry*
		EOF!
	;

packageDecl
	:	PACKAGE^ identifier?
		packageBlock
	;

packageBlock
	:	LCURLY
		packageBlockEntry*
		RCURLY
		-> ^(BLOCK packageBlockEntry*)
	;

packageBlockEntry
	:	(	importDefinition
//		|	includeDirective  This is in 'annos' instead
		|	annos=annotations! mods=modifiers!
			(	classDefinition[$annos.tree,$mods.tree]
			|	interfaceDefinition[$annos.tree,$mods.tree]
			|	variableDefinition[$annos.tree,$mods.tree]
			|	methodDefinition[$annos.tree,$mods.tree]
			|	namespaceDefinition[$annos.tree,$mods.tree]
			|	useNamespaceDirective
			)
		|	SEMI!
//		|	statement
		)
	;

endOfFile
	:	EOF!
	;

importDefinition
	:	IMPORT^ identifierStar semi
	;

semi
	:	SEMI!
//	|	{ virtualSemi() }?
	;	

classDefinition[LinkedListTree annos, LinkedListTree mods]
	:	CLASS ident
		classExtendsClause
		implementsClause
		typeBlock
		-> ^(CLASS_DEF {$mods} CLASS ident classExtendsClause implementsClause typeBlock)
	;

as2ClassDefinition[LinkedListTree annos, LinkedListTree mods]
	:	CLASS identifier
		classExtendsClause
		implementsClause
		typeBlock
		-> ^(CLASS_DEF {$mods} CLASS identifier classExtendsClause implementsClause typeBlock)
	;

interfaceDefinition[LinkedListTree annos, Tree mods]
	:	INTERFACE ident
		interfaceExtendsClause
		typeBlock
		-> ^(INTERFACE_DEF {$mods} INTERFACE ident interfaceExtendsClause typeBlock)
	;

as2InterfaceDefinition[LinkedListTree annos, Tree mods]
	:	INTERFACE identifier
		interfaceExtendsClause
		typeBlock
		-> ^(INTERFACE_DEF INTERFACE identifier interfaceExtendsClause typeBlock) // TODO: mods?
	;

classExtendsClause
	:	(EXTENDS^ identifier)?
	;
interfaceExtendsClause
	:	(EXTENDS^ identifier ( COMMA! identifier)*)?
	;
implementsClause
	:	(IMPLEMENTS^ identifier ( COMMA! identifier)*)?
	;
typeBlock
	:	LCURLY
		typeBlockEntry*
		RCURLY
		-> ^(TYPE_BLOCK typeBlockEntry*)
	;

typeBlockEntry
	:	/*statement
	|	(LBRACK)=>*/ annotatableDirective
	|	importDefinition
	|	as2IncludeDirective
//	|	includeDirective  This is in 'annos' instead
	;

annotatableDirective
	:	a=annotations! m=modifiers!
		(	variableDefinition[$a.tree,$m.tree]
		|	methodDefinition[$a.tree,$m.tree]
		)
	;

as2IncludeDirective
	:	INCLUDE_DIRECTIVE
		STRING_LITERAL
	;

includeDirective
	:	'include'
		STRING_LITERAL
		semi
	;


methodDefinition[LinkedListTree annos, Tree mods]
	:	functionDecl ident
		parameterDeclarationList
		typeExpression?
		(block|semi)
		-> ^(METHOD_DEF {$annos} {$mods}
		                functionDecl ident
				parameterDeclarationList
				typeExpression?
				block?)
	;

functionDecl
	:	FUNCTION^ (role=accessorRole)?
	;

accessorRole
	:	GET | SET
	;

namespaceDefinition[LinkedListTree annos, Tree mods]
	:	NAMESPACE ident
		-> ^(NAMESPACE_DEF {$annos} {$mods} NAMESPACE ident)
	;

useNamespaceDirective
	:	USE NAMESPACE ident semi
	;

variableDefinition[LinkedListTree annos, Tree mods]
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	decl=varOrConst variableDeclarator
		(COMMA variableDeclarator)*
		semi
		-> ^(VAR_DEF {$annos} {$mods} $decl variableDeclarator+)
	;

varOrConst
	:	VAR | CONST
	;

variableDeclarator
	:	ident^ typeExpression? variableInitializer?
	;
declaration
	:	varOrConst^ variableDeclarator
		declarationTail
	;

declarationTail
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	(COMMA! variableDeclarator)*
	;

variableInitializer
	:	ASSIGN^ assignmentExpression
	;

// A list of formal parameters
// TODO: shouldn't the 'rest' parameter only be allowed in the last position?
parameterDeclarationList
	:	LPAREN
		(	parameterDeclaration
			(COMMA parameterDeclaration)*
		)?
		RPAREN
		-> ^(PARAMS parameterDeclaration*)
	;


parameterDeclaration
	:	basicParameterDeclaration | parameterRestDeclaration
	;

basicParameterDeclaration
	:	CONST? ident typeExpression? parameterDefault?
		-> ^(PARAM CONST? ident typeExpression? parameterDefault?)
	;

parameterDefault
scope InOperator;
@init {
	$InOperator::allowed = true;
}
		// TODO: can we be more strict about allowed values?
	:	ASSIGN^ assignmentExpression
	;

parameterRestDeclaration
	:	REST ident?
		-> ^(PARAM REST ident?)
	;
block
	:	LCURLY blockEntry* RCURLY
		-> ^(BLOCK blockEntry*)
	;

blockEntry
	:
/*options { k=*; }
	:	a=annotations! m=modifiers!
		(	variableDefinition[$a.tree,$m.tree]
		|	methodDefinition[$a.tree,$m.tree]
		)
	|*/	statement
	;

condition
	:	LPAREN! expression RPAREN!
	;

statement
	:	(LCURLY)=> block
	|	declarationStatement
	|	expressionStatement
	|	ifStatement
	// For statement
	|	forEachStatement
	|	forStatement

	// While statement
	|	whileStatement

	// do-while statement
	|	doWhileStatement
	
	// with statement
	|	withStatement
	
	// switch statement
	|	switchStatement
	
	// get out of a loop (or switch)
	|	breakStatement

	// do next iteration of a loop
	|	continueStatement

	// Return an expression
	|	returnStatement

	// throw an exception
	|	throwStatement
	
	// handle exceptions
	|	tryStatement
	
	// handle 'default xml namespace = "foo"'
	|	defaultXMLNamespaceStatement

	// empty statement
	|	SEMI!
	;

declarationStatement
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	declaration semi
	;

expressionStatement
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	expressionList semi
		-> ^(EXPR_STMNT expressionList)
	;
	
ifStatement
	:	IF^ condition statement
		((ELSE)=>ELSE statement)?
	;

throwStatement
	:	'throw'^ expression semi
	;

tryStatement
	:	'try'
		block
		catchBlock*
		finallyBlock?
	;

catchBlock
	:	'catch' LPAREN! ident typeExpression? RPAREN!
		block
	;

finallyBlock
	:	'finally' block
	;

returnStatement
	:	RETURN^ ((expression)=>expression)? semi
	;
		
continueStatement
	:	CONTINUE^ semi
	;

breakStatement
	:	BREAK^ semi
	;

switchStatement
	:	SWITCH^ condition
		switchBlock
	;

switchBlock
	:	LCURLY
		(caseStatement)*
		(defaultStatement)?
		RCURLY
		-> ^(BLOCK caseStatement* defaultStatement?)
	;

caseStatement
	:	CASE^ expression COLON! switchStatementList
	;
	
defaultStatement
	:	DEFAULT^ COLON! switchStatementList
	;

switchStatementList
	:	statement* -> ^(SWITCH_STATEMENT_LIST statement*)
	;

forEachStatement
	:	FOR EACH
		LPAREN
		forInClause
		RPAREN
		statement
		-> ^(FOR_EACH forInClause statement)
	;
forStatement
scope InOperator;
@init {
	$InOperator::allowed = false;
}
	:	f=FOR
		LPAREN
		(	(forInClauseDecl IN)=>forInClause -> ^(FOR_IN[$f] forInClause)
		|	traditionalForClause -> ^($f traditionalForClause)
		)
		RPAREN
		statement -> $forStatement statement	// statement to loop over
	;
traditionalForClause
	:	a=forInit {placeholder($a.tree);} SEMI!	// initializer
		b=forCond {placeholder($b.tree);} SEMI!	// condition test
		c=forIter {placeholder($c.tree);}		// updater
	;

forInClause
	:	forInClauseDecl IN forInClauseTail
	;

forInClauseDecl
scope InOperator;
@init {
	$InOperator::allowed = false;
}
	:	declaration | ident
	;


forInClauseTail
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	expressionList
	;

// The initializer for a for loop
forInit	
scope InOperator;
@init {
	$InOperator::allowed = false;
}
	:	((declaration)=> declaration | expressionList )?
		-> ^(FOR_INIT declaration? expressionList?)
	;

forCond
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	expressionList?
		-> ^(FOR_CONDITION expressionList?)
	;

forIter
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	expressionList?
		-> ^(FOR_ITERATOR expressionList?)
	;

whileStatement
	:	WHILE^ condition statement
	;

doWhileStatement
	:	DO^ statement WHILE! condition semi
	;

withStatement
	:	WITH^ condition statement
	;

defaultXMLNamespaceStatement
	:	DEFAULT XML NAMESPACE ASSIGN expression semi
		-> ^(DEFAULT_XML_NAMESPACE expression)
	;

typeExpression
	:	c=COLON
		// TODO: identifier allows namespaces, and I suspect a type:spec shouldn't
		(identifier | 'void' | STAR)
		-> ^(TYPE_SPEC[$c] identifier? 'void'? STAR?)
	;	

identifier 
	:	qualifiedIdent
		(	options{greedy=true;}
		: 	DOT qualifiedIdent
		)*
		-> ^(IDENTIFIER qualifiedIdent+)
	;

qualifiedIdent
	:	(namespaceName DBL_COLON)? ident 
	;

namespaceName
	:	IDENT | reservedNamespace
	;

reservedNamespace
	:	PUBLIC
	|	PRIVATE
	|	'protected'
	|	'internal'
	;

identifierStar
	:	ident
		(	options{greedy=true;}
		:	DOT ident
		)* 
		(	DOT STAR
		)?
		-> ^(IDENTIFIER ident+ STAR?)
	;

annotations
	:	(	annotation
		|	includeDirective
		)*
		-> ^(ANNOTATIONS annotation*)
	;

annotation
	:	LBRACK
		ident
		annotationParamList?
		RBRACK
		-> ^(ANNOTATION ident annotationParamList?)
	;

annotationParamList
	:
		LPAREN
		(	annotationParam
			(COMMA annotationParam)*
		)?
		RPAREN
		-> ^(ANNOTATION_PARAMS annotationParam*)
	;

/*
[Inspectable(name="Icon Offset", verbose = 1,type=Boolean, defaultValue=true)]
[Inspectable(defaultValue="circular")]
[Bindable]
[ChangeEvent("event")]
[Inspectable("danger", 1, true, maybe)] 
[InspectableList("flavorStr","colorStr")]
[Event("click")]
[Collection (name="name", variable="varname", collectionClass="mx.utils.CollectionImpl", collectionItem="coll-item-classname", identifier="string")] 
*/

annotationParam
	:
		ident ASSIGN constant -> ^(ASSIGN ident constant)
	|	constant -> constant
	|	ident -> ident
	;

modifiers
	:	modifier*
		-> ^(MODIFIERS modifier*)
	;

modifier
	:	namespaceName
	|	STATIC
	|	'final'
	|	'enumerable'
	|	'explicit'
	|	'override'
	|	DYNAMIC
	|	'intrinsic'
	;

arguments
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	LPAREN expressionList RPAREN
		-> ^(ELIST expressionList)
	|	LPAREN RPAREN
		-> ELIST
	;
// This is an initializer used to set up an array.
arrayLiteral
	:	LBRACK elementList? RBRACK
		-> ^(ARRAY_LITERAL elementList?)
	;
		
elementList
	:	COMMA!
	|	nonemptyElementList
	;
nonemptyElementList
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	assignmentExpression (COMMA! assignmentExpression)*
	;

element
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	assignmentExpression
		-> ^(ELEMENT assignmentExpression)
	;

// This is an initializer used to set up an object.
objectLiteral
	:	LCURLY fieldList? RCURLY
		-> ^(OBJECT_LITERAL fieldList?)
	;
	
fieldList
	:	literalField (COMMA! literalField?)*
	;
	
literalField 
	: 	fieldName COLON element
		-> ^(OBJECT_FIELD fieldName element)
	;
	
fieldName
	:	ident
	|	number
	;

// the mother of all expressions
expression
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	assignmentExpression
		-> ^(EXPR assignmentExpression)
	;

// This is a list of expressions.
expressionList
	:	assignmentExpression (COMMA assignmentExpression)*
		-> ^(ELIST assignmentExpression+)
	;

// assignment expression (level 13)
assignmentExpression
	:	conditionalExpression
	(	(assignmentOperator)=> assignmentOperator
		assignmentExpression
	)*
	;

assignmentOperator
	:	ASSIGN
	| 	STAR_ASSIGN
	|	DIV_ASSIGN
	|	MOD_ASSIGN
	|	PLUS_ASSIGN
	|	MINUS_ASSIGN
	|	SL_ASSIGN
	|	SR_ASSIGN
	|	BSR_ASSIGN
	|	BAND_ASSIGN
	|	BXOR_ASSIGN
	|	BOR_ASSIGN
	|	LAND_ASSIGN
	|	LOR_ASSIGN
	;

// conditional test (level 12)
conditionalExpression
	:	(logicalOrExpression -> logicalOrExpression)
		(
			QUESTION
			conditionalSubExpression
			-> ^(QUESTION $conditionalExpression conditionalSubExpression)
		)?
	;
conditionalSubExpression
	:	assignmentExpression COLON^ assignmentExpression
	;

// TODO: should 'and'/'or' have same precidence as '&&'/'||' ?

// logical or (||)  (level 11)
logicalOrExpression
	:	logicalAndExpression
		(logicalOrOperator^ logicalAndExpression)*
	;

logicalOrOperator
	:	LOR | 'or'
	;

// logical and (&&)  (level 10)
logicalAndExpression
	:	bitwiseOrExpression
		(logicalAndOperator^ bitwiseOrExpression)*
	;

logicalAndOperator
	:	LAND | 'and'
	;

// bitwise or non-short-circuiting or (|)  (level 9)
bitwiseOrExpression
	:	bitwiseXorExpression
		(BOR^ bitwiseXorExpression)*
	;

// exclusive or (^)  (level 8)
bitwiseXorExpression
	:	bitwiseAndExpression
		(BXOR^ bitwiseAndExpression)*
	;

// bitwise or non-short-circuiting and (&)  (level 7)
bitwiseAndExpression
	:	equalityExpression
		(BAND^ equalityExpression)*
	;

// equality/inequality (==/!=) (level 6)
equalityExpression
	:	relationalExpression
	(	equalityOperator^
		relationalExpression
	)*
	;

equalityOperator
	:	STRICT_EQUAL | STRICT_NOT_EQUAL | NOT_EQUAL | EQUAL
	;
	
// boolean relational expressions (level 5)
relationalExpression
	:	shiftExpression
		((relationalOperator)=> relationalOperator^ shiftExpression)*
	;

relationalOperator
	:	{$InOperator::allowed}? IN
	|	LT | GT | LE | GE | IS | AS | 'instanceof'
	;

// bit shift expressions (level 4)
shiftExpression
	:	additiveExpression
		(shiftOperator^ additiveExpression)*
	;

shiftOperator
	:	SL | SR | BSR
	;

// binary addition/subtraction (level 3)
additiveExpression
	:	multiplicativeExpression
		((additiveOperator)=> additiveOperator^ multiplicativeExpression )*
	;

additiveOperator
	:	PLUS | MINUS
	;

// multiplication/division/modulo (level 2)
multiplicativeExpression
	:	unaryExpression
		(	(multiplicativeOperator)=>multiplicativeOperator^
			unaryExpression
		)*
	;

multiplicativeOperator
	:	STAR | DIV | MOD
	;

//	(level 1)
unaryExpression
	:	INC unaryExpression -> ^(INC unaryExpression)
	|	DEC unaryExpression -> ^(DEC unaryExpression)
	|	MINUS unaryExpression -> ^(UNARY_MINUS unaryExpression)
	|	PLUS unaryExpression -> ^(UNARY_PLUS unaryExpression)
	|	unaryExpressionNotPlusMinus
	;

unaryExpressionNotPlusMinus
	:	'delete' postfixExpression -> ^('delete' postfixExpression)
	|	'void' unaryExpression -> ^('void' unaryExpression)
	|	'typeof' unaryExpression -> ^('typeof' unaryExpression)
	|	LNOT unaryExpression -> ^(LNOT unaryExpression)
	|	BNOT unaryExpression -> ^(BNOT unaryExpression)
	|	postfixExpression
	;

// qualified names, array expressions, method invocation, post inc/dec
postfixExpression
	:	(postfixExpressionPrefix -> postfixExpressionPrefix)
		(	(DOT | LBRACK | E4X_DESC | LPAREN)=>
			(	poi=propOrIdent[root_0, retval.start] -> $poi
			|	DOT e4xExpression
			|	LBRACK expression RBRACK -> ^(ARRAY_ACC $postfixExpression expression)
			|	E4X_DESC e4xExpression
			|	arguments -> ^(METHOD_CALL $postfixExpression arguments)
			)
		)*

		(	(INC | DEC)=>
			( 	in=INC -> ^(POST_INC[$in] $postfixExpression)
		 	|	de=DEC -> ^(POST_DEC[$de] $postfixExpression)
		 	)
		)?
 	;

postfixExpressionPrefix
	:	primaryExpression | qualifiedIdent
	;

e4xExpression
	:	STAR^
	|	e4xAttributeIdentifier
	|	e4xFilterPredicate
	;

e4xAttributeIdentifier
	:	E4X_ATTRI
		(	qualifiedIdent
		|	STAR
		|	LBRACK expression RBRACK
		)
	;

e4xFilterPredicate
	:	LPAREN!
		expression
		RPAREN!
	;

primaryExpression
	:	'undefined'
	|	constant
	|	arrayLiteral
	|	objectLiteral
	|	functionDefinition
	|	newExpression
	|	encapsulatedExpression
	|	e4xAttributeIdentifier
	;


//identPrimary
//	:	(qualifiedIdent -> qualifiedIdent)
//		(	options{greedy=true;}
//			/* yuk! tree building doesn't set the start/stop values
//			   we need without some hand-holding, */
//		:	(	poi=propOrIdent[root_0, retval.start] -> $poi
//			)
//		|	(	LBRACK expr=expression RBRACK
//				-> ^(ARRAY_ACC $identPrimary $expr)
//			)
//		)*
//		(	options {greedy=true;}
//		:	a=arguments
//			-> ^(METHOD_CALL $identPrimary $a)
//		)*
//    ;

propOrIdent[LinkedListTree identPrimary, Token startToken]
	:	
		{ retval.start = startToken; }
		DOT propId=qualifiedIdent
		/* without further semantic analysis, we can't
		   tell if a.b is an access of the property 'b'
		   from the var 'a' or a reference to the type
		   'b' in the package 'a'.  (This could be
		   resolved in an AST post-processing step) */
		-> ^(PROPERTY_OR_IDENTIFIER {$identPrimary} $propId)
	;

constant
	:	xmlLiteral
	|	regexpLiteral
	|	number
	|	STRING_LITERAL
	|	TRUE
	|	FALSE
	|	'null'
	;

number	:	HEX_LITERAL
	|	DECIMAL_LITERAL
	|	OCTAL_LITERAL
	|	FLOAT_LITERAL
	;

	
xmlLiteral
	@init {
		LinkedListTree xml = null;
	}
	:	s=LT { xml=parseXMLLiteral((LinkedListToken)$s); }
		-> ^({xml})
	;

regexpLiteral
	@init {
		LinkedListTree regexp = null;
	}
	:	s=DIV { regexp=parseRegexpLiteral((LinkedListToken)$s); }
		-> ^({regexp})
	;

newExpression
	:	'new'^ postfixExpression
	;

encapsulatedExpression
scope InOperator;
@init {
	$InOperator::allowed = true;
}
	:	LPAREN assignmentExpression RPAREN
		-> ^(ENCPS_EXPR assignmentExpression)
	;

// TODO: should anonymous and named functions have seperate definitions so that
// we can dissallow named functions in expressions?

functionDefinition
	:	FUNCTION parameterDeclarationList typeExpression? block
		-> ^(FUNC_DEF parameterDeclarationList typeExpression? block)
	;


ident
	:	IDENT
	|	i=USE -> IDENT[$i]
	|	i=XML -> IDENT[$i]
	|	i=DYNAMIC -> IDENT[$i]
	|	i=NAMESPACE -> IDENT[$i]
	|	i=IS -> IDENT[$i]
	|	i=AS -> IDENT[$i]
	|	i=GET -> IDENT[$i]
	|	i=SET -> IDENT[$i]
	;


PACKAGE		:	'package';
PUBLIC		:	'public';
PRIVATE		:	'private';
FUNCTION	:	'function';
EXTENDS		:	'extends';
IMPLEMENTS	:	'implements';
VAR		:	'var';
STATIC		:	'static';
IF		:	'if';
IMPORT		:	'import';
FOR		:	'for';
EACH		:	'each';
IN		:	'in';
WHILE		:	'while';
DO		:	'do';
SWITCH		:	'switch';
CASE		:	'case';
DEFAULT		:	'default';
ELSE		:	'else';
CONST		:	'const';
CLASS		:	'class';
INTERFACE	:	'interface';
TRUE		:	'true';
FALSE		:	'false';
DYNAMIC		:	'dynamic';
USE		:	'use';
XML		:	'xml';
NAMESPACE	:	'namespace';
IS		:	'is';
AS		:	'as';
GET		:	'get';
SET		:	'set';
WITH		:	'with';
RETURN		:	'return';
CONTINUE	:	'continue';
BREAK		:	'break';

// OPERATORS
QUESTION		:	'?'	;
LPAREN			:	'('	;
RPAREN			:	')'	;
LBRACK			:	'['	;
RBRACK			:	']'	;
LCURLY			:	'{'	;
RCURLY			:	'}'	;
COLON			:	':'	;
DBL_COLON		:	'::'	;
COMMA			:	','	;
ASSIGN			:	'='	;
EQUAL			:	'=='	;
STRICT_EQUAL		:	'==='	;
LNOT			:	'!'	;
BNOT			:	'~'	;
NOT_EQUAL		:	'!='	;
STRICT_NOT_EQUAL	:	'!=='	;
DIV			:	'/'	;
DIV_ASSIGN		:	'/='	;
PLUS			:	'+'	;
PLUS_ASSIGN		:	'+='	;
INC			:	'++'	;
MINUS			:	'-'	;
MINUS_ASSIGN		:	'-='	;
DEC			:	'--'	;
STAR			:	'*'	;
STAR_ASSIGN		:	'*='	;
MOD			:	'%'	;
MOD_ASSIGN		:	'%='	;
SR			:	'>>'	;
SR_ASSIGN		:	'>>='	;
BSR			:	'>>>'	;
BSR_ASSIGN		:	'>>>='	;
GE			:	'>='	;
GT			:	'>'	;
SL			:	'<<'	;
SL_ASSIGN		:	'<<='	;
LE			:	'<='	;
LT			:	'<'	;
BXOR			:	'^'	;
BXOR_ASSIGN		:	'^='	;
BOR			:	'|'	;
BOR_ASSIGN		:	'|='	;
LOR			:	'||'	;
BAND			:	'&'	;
BAND_ASSIGN		:	'&='	;
LAND			:	'&&'	;
LAND_ASSIGN		:	'&&='	;
LOR_ASSIGN		:	'||='	;
E4X_ATTRI		:	'@'	; 
SEMI			:	';'	;


DOT		:	'.'	;
E4X_DESC	:	'..'	;
REST		:	'...'	;

IDENT
	:	UNICODE_IDENTIFIER_START
		UNICODE_IDENTIFIER_PART*
	;

fragment UNICODE_IDENTIFIER_START
	:	'\u0041'..'\u005a' | '\u005f' | '\u0061'..'\u007a' |
		'\u00aa' | '\u00b5' | '\u00ba' | '\u00c0'..'\u00d6' |
		'\u00d8'..'\u00f6' | '\u00f8'..'\u02c1' | '\u02c6'..'\u02d1' |
		'\u02e0'..'\u02e4' | '\u02ee' | '\u037a'..'\u037d' | '\u0386' |
		'\u0388'..'\u038a' | '\u038c' | '\u038e'..'\u03a1' |
		'\u03a3'..'\u03ce' | '\u03d0'..'\u03f5' | '\u03f7'..'\u0481' |
		'\u048a'..'\u0513' | '\u0531'..'\u0556' | '\u0559' |
		'\u0561'..'\u0587' | '\u05d0'..'\u05ea' | '\u05f0'..'\u05f2' |
		'\u0621'..'\u063a' | '\u0640'..'\u064a' | '\u066e'..'\u066f' |
		'\u0671'..'\u06d3' | '\u06d5' | '\u06e5'..'\u06e6' |
		'\u06ee'..'\u06ef' | '\u06fa'..'\u06fc' | '\u06ff' | '\u0710' |
		'\u0712'..'\u072f' | '\u074d'..'\u076d' | '\u0780'..'\u07a5' |
		'\u07b1' | '\u07ca'..'\u07ea' | '\u07f4'..'\u07f5' | '\u07fa' |
		'\u0904'..'\u0939' | '\u093d' | '\u0950' | '\u0958'..'\u0961' |
		'\u097b'..'\u097f' | '\u0985'..'\u098c' | '\u098f'..'\u0990' |
		'\u0993'..'\u09a8' | '\u09aa'..'\u09b0' | '\u09b2' |
		'\u09b6'..'\u09b9' | '\u09bd' | '\u09ce' | '\u09dc'..'\u09dd' |
		'\u09df'..'\u09e1' | '\u09f0'..'\u09f1' | '\u0a05'..'\u0a0a' |
		'\u0a0f'..'\u0a10' | '\u0a13'..'\u0a28' | '\u0a2a'..'\u0a30' |
		'\u0a32'..'\u0a33' | '\u0a35'..'\u0a36' | '\u0a38'..'\u0a39' |
		'\u0a59'..'\u0a5c' | '\u0a5e' | '\u0a72'..'\u0a74' |
		'\u0a85'..'\u0a8d' | '\u0a8f'..'\u0a91' | '\u0a93'..'\u0aa8' |
		'\u0aaa'..'\u0ab0' | '\u0ab2'..'\u0ab3' | '\u0ab5'..'\u0ab9' |
		'\u0abd' | '\u0ad0' | '\u0ae0'..'\u0ae1' | '\u0b05'..'\u0b0c' |
		'\u0b0f'..'\u0b10' | '\u0b13'..'\u0b28' | '\u0b2a'..'\u0b30' |
		'\u0b32'..'\u0b33' | '\u0b35'..'\u0b39' | '\u0b3d' |
		'\u0b5c'..'\u0b5d' | '\u0b5f'..'\u0b61' | '\u0b71' | '\u0b83' |
		'\u0b85'..'\u0b8a' | '\u0b8e'..'\u0b90' | '\u0b92'..'\u0b95' |
		'\u0b99'..'\u0b9a' | '\u0b9c' | '\u0b9e'..'\u0b9f' |
		'\u0ba3'..'\u0ba4' | '\u0ba8'..'\u0baa' | '\u0bae'..'\u0bb9' |
		'\u0c05'..'\u0c0c' | '\u0c0e'..'\u0c10' | '\u0c12'..'\u0c28' |
		'\u0c2a'..'\u0c33' | '\u0c35'..'\u0c39' | '\u0c60'..'\u0c61' |
		'\u0c85'..'\u0c8c' | '\u0c8e'..'\u0c90' | '\u0c92'..'\u0ca8' |
		'\u0caa'..'\u0cb3' | '\u0cb5'..'\u0cb9' | '\u0cbd' | '\u0cde' |
		'\u0ce0'..'\u0ce1' | '\u0d05'..'\u0d0c' | '\u0d0e'..'\u0d10' |
		'\u0d12'..'\u0d28' | '\u0d2a'..'\u0d39' | '\u0d60'..'\u0d61' |
		'\u0d85'..'\u0d96' | '\u0d9a'..'\u0db1' | '\u0db3'..'\u0dbb' |
		'\u0dbd' | '\u0dc0'..'\u0dc6' | '\u0e01'..'\u0e30' |
		'\u0e32'..'\u0e33' | '\u0e40'..'\u0e46' | '\u0e81'..'\u0e82' |
		'\u0e84' | '\u0e87'..'\u0e88' | '\u0e8a' | '\u0e8d' |
		'\u0e94'..'\u0e97' | '\u0e99'..'\u0e9f' | '\u0ea1'..'\u0ea3' |
		'\u0ea5' | '\u0ea7' | '\u0eaa'..'\u0eab' | '\u0ead'..'\u0eb0' |
		'\u0eb2'..'\u0eb3' | '\u0ebd' | '\u0ec0'..'\u0ec4' | '\u0ec6' |
		'\u0edc'..'\u0edd' | '\u0f00' | '\u0f40'..'\u0f47' |
		'\u0f49'..'\u0f6a' | '\u0f88'..'\u0f8b' | '\u1000'..'\u1021' |
		'\u1023'..'\u1027' | '\u1029'..'\u102a' | '\u1050'..'\u1055' |
		'\u10a0'..'\u10c5' | '\u10d0'..'\u10fa' | '\u10fc' |
		'\u1100'..'\u1159' | '\u115f'..'\u11a2' | '\u11a8'..'\u11f9' |
		'\u1200'..'\u1248' | '\u124a'..'\u124d' | '\u1250'..'\u1256' |
		'\u1258' | '\u125a'..'\u125d' | '\u1260'..'\u1288' |
		'\u128a'..'\u128d' | '\u1290'..'\u12b0' | '\u12b2'..'\u12b5' |
		'\u12b8'..'\u12be' | '\u12c0' | '\u12c2'..'\u12c5' |
		'\u12c8'..'\u12d6' | '\u12d8'..'\u1310' | '\u1312'..'\u1315' |
		'\u1318'..'\u135a' | '\u1380'..'\u138f' | '\u13a0'..'\u13f4' |
		'\u1401'..'\u166c' | '\u166f'..'\u1676' | '\u1681'..'\u169a' |
		'\u16a0'..'\u16ea' | '\u16ee'..'\u16f0' | '\u1700'..'\u170c' |
		'\u170e'..'\u1711' | '\u1720'..'\u1731' | '\u1740'..'\u1751' |
		'\u1760'..'\u176c' | '\u176e'..'\u1770' | '\u1780'..'\u17b3' |
		'\u17d7' | '\u17dc' | '\u1820'..'\u1877' | '\u1880'..'\u18a8' |
		'\u1900'..'\u191c' | '\u1950'..'\u196d' | '\u1970'..'\u1974' |
		'\u1980'..'\u19a9' | '\u19c1'..'\u19c7' | '\u1a00'..'\u1a16' |
		'\u1b05'..'\u1b33' | '\u1b45'..'\u1b4b' | '\u1d00'..'\u1dbf' |
		'\u1e00'..'\u1e9b' | '\u1ea0'..'\u1ef9' | '\u1f00'..'\u1f15' |
		'\u1f18'..'\u1f1d' | '\u1f20'..'\u1f45' | '\u1f48'..'\u1f4d' |
		'\u1f50'..'\u1f57' | '\u1f59' | '\u1f5b' | '\u1f5d' |
		'\u1f5f'..'\u1f7d' | '\u1f80'..'\u1fb4' | '\u1fb6'..'\u1fbc' |
		'\u1fbe' | '\u1fc2'..'\u1fc4' | '\u1fc6'..'\u1fcc' |
		'\u1fd0'..'\u1fd3' | '\u1fd6'..'\u1fdb' | '\u1fe0'..'\u1fec' |
		'\u1ff2'..'\u1ff4' | '\u1ff6'..'\u1ffc' | '\u2071' | '\u207f' |
		'\u2090'..'\u2094' | '\u2102' | '\u2107' | '\u210a'..'\u2113' |
		'\u2115' | '\u2119'..'\u211d' | '\u2124' | '\u2126' | '\u2128'
		| '\u212a'..'\u212d' | '\u212f'..'\u2139' | '\u213c'..'\u213f'
		| '\u2145'..'\u2149' | '\u214e' | '\u2160'..'\u2184' |
		'\u2c00'..'\u2c2e' | '\u2c30'..'\u2c5e' | '\u2c60'..'\u2c6c' |
		'\u2c74'..'\u2c77' | '\u2c80'..'\u2ce4' | '\u2d00'..'\u2d25' |
		'\u2d30'..'\u2d65' | '\u2d6f' | '\u2d80'..'\u2d96' |
		'\u2da0'..'\u2da6' | '\u2da8'..'\u2dae' | '\u2db0'..'\u2db6' |
		'\u2db8'..'\u2dbe' | '\u2dc0'..'\u2dc6' | '\u2dc8'..'\u2dce' |
		'\u2dd0'..'\u2dd6' | '\u2dd8'..'\u2dde' | '\u3005'..'\u3007' |
		'\u3021'..'\u3029' | '\u3031'..'\u3035' | '\u3038'..'\u303c' |
		'\u3041'..'\u3096' | '\u309d'..'\u309f' | '\u30a1'..'\u30fa' |
		'\u30fc'..'\u30ff' | '\u3105'..'\u312c' | '\u3131'..'\u318e' |
		'\u31a0'..'\u31b7' | '\u31f0'..'\u31ff' | '\u3400'..'\u4db5' |
		'\u4e00'..'\u9fbb' | '\ua000'..'\ua48c' | '\ua717'..'\ua71a' |
		'\ua800'..'\ua801' | '\ua803'..'\ua805' | '\ua807'..'\ua80a' |
		'\ua80c'..'\ua822' | '\ua840'..'\ua873' | '\uac00'..'\ud7a3' |
		'\uf900'..'\ufa2d' | '\ufa30'..'\ufa6a' | '\ufa70'..'\ufad9' |
		'\ufb00'..'\ufb06' | '\ufb13'..'\ufb17' | '\ufb1d' |
		'\ufb1f'..'\ufb28' | '\ufb2a'..'\ufb36' | '\ufb38'..'\ufb3c' |
		'\ufb3e' | '\ufb40'..'\ufb41' | '\ufb43'..'\ufb44' |
		'\ufb46'..'\ufbb1' | '\ufbd3'..'\ufd3d' | '\ufd50'..'\ufd8f' |
		'\ufd92'..'\ufdc7' | '\ufdf0'..'\ufdfb' | '\ufe70'..'\ufe74' |
		'\ufe76'..'\ufefc' | '\uff21'..'\uff3a' | '\uff41'..'\uff5a' |
		'\uff66'..'\uffbe' | '\uffc2'..'\uffc7' | '\uffca'..'\uffcf' |
		'\uffd2'..'\uffd7' | '\uffda'..'\uffdc'
	;

fragment UNICODE_IDENTIFIER_PART
	:	'\u0000'..'\u0008' | '\u000e'..'\u001b' |
		'\u0030'..'\u0039' | '\u0041'..'\u005a' | '\u005f' |
		'\u0061'..'\u007a' | '\u007f'..'\u009f' | '\u00aa' | '\u00ad' |
		'\u00b5' | '\u00ba' | '\u00c0'..'\u00d6' | '\u00d8'..'\u00f6' |
		'\u00f8'..'\u02c1' | '\u02c6'..'\u02d1' | '\u02e0'..'\u02e4' |
		'\u02ee' | '\u0300'..'\u036f' | '\u037a'..'\u037d' | '\u0386' |
		'\u0388'..'\u038a' | '\u038c' | '\u038e'..'\u03a1' |
		'\u03a3'..'\u03ce' | '\u03d0'..'\u03f5' | '\u03f7'..'\u0481' |
		'\u0483'..'\u0486' | '\u048a'..'\u0513' | '\u0531'..'\u0556' |
		'\u0559' | '\u0561'..'\u0587' | '\u0591'..'\u05bd' | '\u05bf' |
		'\u05c1'..'\u05c2' | '\u05c4'..'\u05c5' | '\u05c7' |
		'\u05d0'..'\u05ea' | '\u05f0'..'\u05f2' | '\u0600'..'\u0603' |
		'\u0610'..'\u0615' | '\u0621'..'\u063a' | '\u0640'..'\u065e' |
		'\u0660'..'\u0669' | '\u066e'..'\u06d3' | '\u06d5'..'\u06dd' |
		'\u06df'..'\u06e8' | '\u06ea'..'\u06fc' | '\u06ff' |
		'\u070f'..'\u074a' | '\u074d'..'\u076d' | '\u0780'..'\u07b1' |
		'\u07c0'..'\u07f5' | '\u07fa' | '\u0901'..'\u0939' |
		'\u093c'..'\u094d' | '\u0950'..'\u0954' | '\u0958'..'\u0963' |
		'\u0966'..'\u096f' | '\u097b'..'\u097f' | '\u0981'..'\u0983' |
		'\u0985'..'\u098c' | '\u098f'..'\u0990' | '\u0993'..'\u09a8' |
		'\u09aa'..'\u09b0' | '\u09b2' | '\u09b6'..'\u09b9' |
		'\u09bc'..'\u09c4' | '\u09c7'..'\u09c8' | '\u09cb'..'\u09ce' |
		'\u09d7' | '\u09dc'..'\u09dd' | '\u09df'..'\u09e3' |
		'\u09e6'..'\u09f1' | '\u0a01'..'\u0a03' | '\u0a05'..'\u0a0a' |
		'\u0a0f'..'\u0a10' | '\u0a13'..'\u0a28' | '\u0a2a'..'\u0a30' |
		'\u0a32'..'\u0a33' | '\u0a35'..'\u0a36' | '\u0a38'..'\u0a39' |
		'\u0a3c' | '\u0a3e'..'\u0a42' | '\u0a47'..'\u0a48' |
		'\u0a4b'..'\u0a4d' | '\u0a59'..'\u0a5c' | '\u0a5e' |
		'\u0a66'..'\u0a74' | '\u0a81'..'\u0a83' | '\u0a85'..'\u0a8d' |
		'\u0a8f'..'\u0a91' | '\u0a93'..'\u0aa8' | '\u0aaa'..'\u0ab0' |
		'\u0ab2'..'\u0ab3' | '\u0ab5'..'\u0ab9' | '\u0abc'..'\u0ac5' |
		'\u0ac7'..'\u0ac9' | '\u0acb'..'\u0acd' | '\u0ad0' |
		'\u0ae0'..'\u0ae3' | '\u0ae6'..'\u0aef' | '\u0b01'..'\u0b03' |
		'\u0b05'..'\u0b0c' | '\u0b0f'..'\u0b10' | '\u0b13'..'\u0b28' |
		'\u0b2a'..'\u0b30' | '\u0b32'..'\u0b33' | '\u0b35'..'\u0b39' |
		'\u0b3c'..'\u0b43' | '\u0b47'..'\u0b48' | '\u0b4b'..'\u0b4d' |
		'\u0b56'..'\u0b57' | '\u0b5c'..'\u0b5d' | '\u0b5f'..'\u0b61' |
		'\u0b66'..'\u0b6f' | '\u0b71' | '\u0b82'..'\u0b83' |
		'\u0b85'..'\u0b8a' | '\u0b8e'..'\u0b90' | '\u0b92'..'\u0b95' |
		'\u0b99'..'\u0b9a' | '\u0b9c' | '\u0b9e'..'\u0b9f' |
		'\u0ba3'..'\u0ba4' | '\u0ba8'..'\u0baa' | '\u0bae'..'\u0bb9' |
		'\u0bbe'..'\u0bc2' | '\u0bc6'..'\u0bc8' | '\u0bca'..'\u0bcd' |
		'\u0bd7' | '\u0be6'..'\u0bef' | '\u0c01'..'\u0c03' |
		'\u0c05'..'\u0c0c' | '\u0c0e'..'\u0c10' | '\u0c12'..'\u0c28' |
		'\u0c2a'..'\u0c33' | '\u0c35'..'\u0c39' | '\u0c3e'..'\u0c44' |
		'\u0c46'..'\u0c48' | '\u0c4a'..'\u0c4d' | '\u0c55'..'\u0c56' |
		'\u0c60'..'\u0c61' | '\u0c66'..'\u0c6f' | '\u0c82'..'\u0c83' |
		'\u0c85'..'\u0c8c' | '\u0c8e'..'\u0c90' | '\u0c92'..'\u0ca8' |
		'\u0caa'..'\u0cb3' | '\u0cb5'..'\u0cb9' | '\u0cbc'..'\u0cc4' |
		'\u0cc6'..'\u0cc8' | '\u0cca'..'\u0ccd' | '\u0cd5'..'\u0cd6' |
		'\u0cde' | '\u0ce0'..'\u0ce3' | '\u0ce6'..'\u0cef' |
		'\u0d02'..'\u0d03' | '\u0d05'..'\u0d0c' | '\u0d0e'..'\u0d10' |
		'\u0d12'..'\u0d28' | '\u0d2a'..'\u0d39' | '\u0d3e'..'\u0d43' |
		'\u0d46'..'\u0d48' | '\u0d4a'..'\u0d4d' | '\u0d57' |
		'\u0d60'..'\u0d61' | '\u0d66'..'\u0d6f' | '\u0d82'..'\u0d83' |
		'\u0d85'..'\u0d96' | '\u0d9a'..'\u0db1' | '\u0db3'..'\u0dbb' |
		'\u0dbd' | '\u0dc0'..'\u0dc6' | '\u0dca' | '\u0dcf'..'\u0dd4' |
		'\u0dd6' | '\u0dd8'..'\u0ddf' | '\u0df2'..'\u0df3' |
		'\u0e01'..'\u0e3a' | '\u0e40'..'\u0e4e' | '\u0e50'..'\u0e59' |
		'\u0e81'..'\u0e82' | '\u0e84' | '\u0e87'..'\u0e88' | '\u0e8a' |
		'\u0e8d' | '\u0e94'..'\u0e97' | '\u0e99'..'\u0e9f' |
		'\u0ea1'..'\u0ea3' | '\u0ea5' | '\u0ea7' | '\u0eaa'..'\u0eab' |
		'\u0ead'..'\u0eb9' | '\u0ebb'..'\u0ebd' | '\u0ec0'..'\u0ec4' |
		'\u0ec6' | '\u0ec8'..'\u0ecd' | '\u0ed0'..'\u0ed9' |
		'\u0edc'..'\u0edd' | '\u0f00' | '\u0f18'..'\u0f19' |
		'\u0f20'..'\u0f29' | '\u0f35' | '\u0f37' | '\u0f39' |
		'\u0f3e'..'\u0f47' | '\u0f49'..'\u0f6a' | '\u0f71'..'\u0f84' |
		'\u0f86'..'\u0f8b' | '\u0f90'..'\u0f97' | '\u0f99'..'\u0fbc' |
		'\u0fc6' | '\u1000'..'\u1021' | '\u1023'..'\u1027' |
		'\u1029'..'\u102a' | '\u102c'..'\u1032' | '\u1036'..'\u1039' |
		'\u1040'..'\u1049' | '\u1050'..'\u1059' | '\u10a0'..'\u10c5' |
		'\u10d0'..'\u10fa' | '\u10fc' | '\u1100'..'\u1159' |
		'\u115f'..'\u11a2' | '\u11a8'..'\u11f9' | '\u1200'..'\u1248' |
		'\u124a'..'\u124d' | '\u1250'..'\u1256' | '\u1258' |
		'\u125a'..'\u125d' | '\u1260'..'\u1288' | '\u128a'..'\u128d' |
		'\u1290'..'\u12b0' | '\u12b2'..'\u12b5' | '\u12b8'..'\u12be' |
		'\u12c0' | '\u12c2'..'\u12c5' | '\u12c8'..'\u12d6' |
		'\u12d8'..'\u1310' | '\u1312'..'\u1315' | '\u1318'..'\u135a' |
		'\u135f' | '\u1380'..'\u138f' | '\u13a0'..'\u13f4' |
		'\u1401'..'\u166c' | '\u166f'..'\u1676' | '\u1681'..'\u169a' |
		'\u16a0'..'\u16ea' | '\u16ee'..'\u16f0' | '\u1700'..'\u170c' |
		'\u170e'..'\u1714' | '\u1720'..'\u1734' | '\u1740'..'\u1753' |
		'\u1760'..'\u176c' | '\u176e'..'\u1770' | '\u1772'..'\u1773' |
		'\u1780'..'\u17d3' | '\u17d7' | '\u17dc'..'\u17dd' |
		'\u17e0'..'\u17e9' | '\u180b'..'\u180d' | '\u1810'..'\u1819' |
		'\u1820'..'\u1877' | '\u1880'..'\u18a9' | '\u1900'..'\u191c' |
		'\u1920'..'\u192b' | '\u1930'..'\u193b' | '\u1946'..'\u196d' |
		'\u1970'..'\u1974' | '\u1980'..'\u19a9' | '\u19b0'..'\u19c9' |
		'\u19d0'..'\u19d9' | '\u1a00'..'\u1a1b' | '\u1b00'..'\u1b4b' |
		'\u1b50'..'\u1b59' | '\u1b6b'..'\u1b73' | '\u1d00'..'\u1dca' |
		'\u1dfe'..'\u1e9b' | '\u1ea0'..'\u1ef9' | '\u1f00'..'\u1f15' |
		'\u1f18'..'\u1f1d' | '\u1f20'..'\u1f45' | '\u1f48'..'\u1f4d' |
		'\u1f50'..'\u1f57' | '\u1f59' | '\u1f5b' | '\u1f5d' |
		'\u1f5f'..'\u1f7d' | '\u1f80'..'\u1fb4' | '\u1fb6'..'\u1fbc' |
		'\u1fbe' | '\u1fc2'..'\u1fc4' | '\u1fc6'..'\u1fcc' |
		'\u1fd0'..'\u1fd3' | '\u1fd6'..'\u1fdb' | '\u1fe0'..'\u1fec' |
		'\u1ff2'..'\u1ff4' | '\u1ff6'..'\u1ffc' | '\u200b'..'\u200f' |
		'\u202a'..'\u202e' | '\u203f'..'\u2040' | '\u2054' |
		'\u2060'..'\u2063' | '\u206a'..'\u206f' | '\u2071' | '\u207f' |
		'\u2090'..'\u2094' | '\u20d0'..'\u20dc' | '\u20e1' |
		'\u20e5'..'\u20ef' | '\u2102' | '\u2107' | '\u210a'..'\u2113' |
		'\u2115' | '\u2119'..'\u211d' | '\u2124' | '\u2126' | '\u2128'
		| '\u212a'..'\u212d' | '\u212f'..'\u2139' | '\u213c'..'\u213f'
		| '\u2145'..'\u2149' | '\u214e' | '\u2160'..'\u2184' |
		'\u2c00'..'\u2c2e' | '\u2c30'..'\u2c5e' | '\u2c60'..'\u2c6c' |
		'\u2c74'..'\u2c77' | '\u2c80'..'\u2ce4' | '\u2d00'..'\u2d25' |
		'\u2d30'..'\u2d65' | '\u2d6f' | '\u2d80'..'\u2d96' |
		'\u2da0'..'\u2da6' | '\u2da8'..'\u2dae' | '\u2db0'..'\u2db6' |
		'\u2db8'..'\u2dbe' | '\u2dc0'..'\u2dc6' | '\u2dc8'..'\u2dce' |
		'\u2dd0'..'\u2dd6' | '\u2dd8'..'\u2dde' | '\u3005'..'\u3007' |
		'\u3021'..'\u302f' | '\u3031'..'\u3035' | '\u3038'..'\u303c' |
		'\u3041'..'\u3096' | '\u3099'..'\u309a' | '\u309d'..'\u309f' |
		'\u30a1'..'\u30fa' | '\u30fc'..'\u30ff' | '\u3105'..'\u312c' |
		'\u3131'..'\u318e' | '\u31a0'..'\u31b7' | '\u31f0'..'\u31ff' |
		'\u3400'..'\u4db5' | '\u4e00'..'\u9fbb' | '\ua000'..'\ua48c' |
		'\ua717'..'\ua71a' | '\ua800'..'\ua827' | '\ua840'..'\ua873' |
		'\uac00'..'\ud7a3' | '\uf900'..'\ufa2d' | '\ufa30'..'\ufa6a' |
		'\ufa70'..'\ufad9' | '\ufb00'..'\ufb06' | '\ufb13'..'\ufb17' |
		'\ufb1d'..'\ufb28' | '\ufb2a'..'\ufb36' | '\ufb38'..'\ufb3c' |
		'\ufb3e' | '\ufb40'..'\ufb41' | '\ufb43'..'\ufb44' |
		'\ufb46'..'\ufbb1' | '\ufbd3'..'\ufd3d' | '\ufd50'..'\ufd8f' |
		'\ufd92'..'\ufdc7' | '\ufdf0'..'\ufdfb' | '\ufe00'..'\ufe0f' |
		'\ufe20'..'\ufe23' | '\ufe33'..'\ufe34' | '\ufe4d'..'\ufe4f' |
		'\ufe70'..'\ufe74' | '\ufe76'..'\ufefc' | '\ufeff' |
		'\uff10'..'\uff19' | '\uff21'..'\uff3a' | '\uff3f' |
		'\uff41'..'\uff5a' | '\uff66'..'\uffbe' | '\uffc2'..'\uffc7' |
		'\uffca'..'\uffcf' | '\uffd2'..'\uffd7' | '\uffda'..'\uffdc' |
		'\ufff9'..'\ufffb'
	;


STRING_LITERAL
	:	'"' (ESC|~('"'|'\\'|'\n'|'\r'))* '"'
	|	'\'' (ESC|~('\''|'\\'|'\n'|'\r'))* '\''
	;

HEX_LITERAL	:	'0' ('x'|'X') HEX_DIGIT+ ;

DECIMAL_LITERAL	:	('0' | '1'..'9' '0'..'9'*) ;

OCTAL_LITERAL	:	'0' ('0'..'7')+ ;

FLOAT_LITERAL
    :   ('0'..'9')+ '.' ('0'..'9')* EXPONENT?
    |   '.' ('0'..'9')+ EXPONENT?
	;


// whitespace -- ignored
WS	:	(
			' '
		|	'\t'
		|	'\f'
		)+
		{$channel=HIDDEN;}
	;
NL	
	:	(
			'\r' '\n'  	// DOS
		|	'\r'    	// Mac
		|	'\n'    	// Unix
		)
		{$channel=HIDDEN;}
	;
	
// skip BOM bytes
BOM	:	(	'\u00EF'  '\u00BB' '\u00BF'
		|	'\uFEFF'
		)
		{ $channel=HIDDEN; };

// might be better to filter this out as a preprocessing step
INCLUDE_DIRECTIVE
	:	'#include'
	;

// single-line comments
SL_COMMENT
	:	'//' (~('\n'|'\r'))* ('\n'|'\r'('\n')?)?
		{$channel=HIDDEN;}
	;
// multiple-line comments
ML_COMMENT
	:	'/*' ( options {greedy=false;} : . )* '*/'
		{$channel=HIDDEN;}
	;

fragment EXPONENT
	:	('e'|'E') ('+'|'-')? ('0'..'9')+
	;
fragment HEX_DIGIT
	:	('0'..'9'|'A'..'F'|'a'..'f')
	;

fragment OCT_DIGIT
	:	'0'..'7'
	;
	
fragment ESC
	:   CTRLCHAR_ESC
	|   UNICODE_ESC
	|   OCTAL_ESC
	;

fragment CTRLCHAR_ESC
	:	'\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
	;

fragment OCTAL_ESC
	:	'\\'
	(	('0'..'3') ('0'..'7') ('0'..'7')
	|	('0'..'7') ('0'..'7')
	|	('0'..'7')
	)
	;

fragment UNICODE_ESC
	:   '\\' 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
	;


More information about the antlr-interest mailing list