[antlr-interest] v3: conformance of current Java 1.5 grammar

Dieter Frej dieter_frej at gmx.net
Fri Aug 25 10:38:31 PDT 2006


Hi everbody,

some time ago, I had some spare time and I checked the conformance
of the current Java 1.5 grammar to the Java Language Specification 3.
And I was really surprised. I attached a file listing the rules and
their corresponding page number in the jls3 pdf. To increase the 
conformity I renamed some rules.

I also attached the changed java grammar. So far, I had no time to
look into overwriting the method, so that exceptions are not
catched by antlr. If this is done (has anyone does this yet?), I can
start compiling the JCK again...

- Didi
-- 


Der GMX SmartSurfer hilft bis zu 70% Ihrer Onlinekosten zu sparen!
Ideal für Modem und ISDN: http://www.gmx.net/de/go/smartsurfer
-------------- next part --------------
/**
 * A Java 1.5 grammar for ANTLR v3 derived from the spec
 *
 * This is a very close representation of the spec; the changes
 * are comestic (remove left recursion) and also fixes (the spec
 * isn't exactly perfect). I have run this on the 1.4.2 source
 * and some nasty looking enums from 1.5, but have not really
 * tested for 1.5 compatibility.
 *
 * I built this with:
 * java -Xmx300M org.antlr.Tool java.g
 * and got two errors that are ok:
 * java.g:691:9: Decision can match input such as
 *   "'0'..'9'{'E', 'e'}{'+', '-'}'0'..'9'{'D', 'F', 'd', 'f'}"
 *   using multiple alternatives: 3, 4
 * As a result, alternative(s) 4 were disabled for that input
 * java.g:734:35: Decision can match input such as "{'$', 'A'..'Z',
 *   '_', 'a'..'z', '\u00C0'..'\u00D6', '\u00D8'..'\u00F6',
 *   '\u00F8'..'\u1FFF', '\u3040'..'\u318F', '\u3300'..'\u337F',
 *   '\u3400'..'\u3D2D', '\u4E00'..'\u9FFF', '\uF900'..'\uFAFF'}"
 *   using multiple alternatives: 1, 2
 * As a result, alternative(s) 2 were disabled for that input
 *
 * You can turn enum on/off as a keyword :)
 *
 * Version 1.0 -- initial release July 5, 2006 (requires 3.0b2)
 *
 * Primary author: Terence Parr, July 2006
 */
grammar JavaParser;
options {
	k=2;
	backtrack=true;
	memoize=true;
}

@lexer::members {
	protected boolean enumIsKeyword = false;
}

// starting point for parsing a java file
compilationUnit
	:	annotations?
		packageDeclaration?
      		importDeclaration*
      		typeDeclaration*
	;

packageDeclaration
	:	'package' qualifiedIdentifier ';'
	;

importDeclaration
	:	'import' 'static'? Identifier ('.' Identifier)* ('.' '*')? ';'
	;
	
typeDeclaration
	:	classOrInterfaceDeclaration
	|	';'
	;

classOrInterfaceDeclaration
	:	modifier* (classDeclaration | interfaceDeclaration)
	;
	
classDeclaration
	:	normalClassDeclaration
	|	enumDeclaration
	;
	
normalClassDeclaration
	:	'class' Identifier (typeParameters)?
		('extends' type)?
		('implements' typeList)?
		classBody
	;
	
typeParameters
	:	'<' typeParameter (',' typeParameter)* '>'
	;

typeParameter
	:	Identifier ('extends' bound)?
	;
	
bound
	:	type ('&' type)*
	;

enumDeclaration
	:	ENUM Identifier ('implements' typeList)? enumBody
	;
	
enumBody
	:	'{' enumConstants? ','? enumBodyDeclarations? '}'
	;

enumConstants
	:	enumConstant (',' enumConstant)*
	;
	
enumConstant
	:	annotations? Identifier (arguments)? (classBody)?
	;
	
enumBodyDeclarations
	:	';' (classBodyDeclaration)*
	;
	
interfaceDeclaration
	:	normalInterfaceDeclaration
	|	annotationTypeDeclaration
	;
	
normalInterfaceDeclaration
	:	'interface' Identifier typeParameter? ('extends' typeList)? interfaceBody
	;
	
typeList
	:	type (',' type)*
	;
	
classBody
	:	'{' classBodyDeclaration* '}'
	;
	
interfaceBody
	:	'{' interfaceBodyDeclaration* '}'
	;

classBodyDeclaration
	:	';'
	|	'static'? block
	|	modifier* memberDecl
	;
	
memberDecl
	:	genericMethodOrConstructorDecl
	|	methodDeclaration
	|	fieldDeclaration
	|	'void' Identifier voidMethodDeclaratorRest
	|	Identifier constructorDeclaratorRest
	|	interfaceDeclaration
	|	classDeclaration
	;
	
genericMethodOrConstructorDecl
	:	typeParameters genericMethodOrConstructorRest
	;
	
genericMethodOrConstructorRest
	:	(type | 'void') Identifier methodDeclaratorRest
	|	Identifier constructorDeclaratorRest
	;

methodDeclaration
	:	type Identifier methodDeclaratorRest
	;

fieldDeclaration
	:	type variableDeclarators ';'
	;
	
interfaceBodyDeclaration
	:	';'
	|	modifier* interfaceMemberDecl
	;

interfaceMemberDecl
	:	interfaceMethodOrFieldDecl
	|	interfaceGenericMethodDecl
	|	'void' Identifier voidInterfaceMethodDeclaratorRest
	|	interfaceDeclaration
	|	classDeclaration
	;
	
interfaceMethodOrFieldDecl
	:	type Identifier interfaceMethodOrFieldRest
	;
	
interfaceMethodOrFieldRest
	:	constantDeclaratorsRest ';'
	|	interfaceMethodDeclaratorRest
	;
	
methodDeclaratorRest
	:	formalParameters ('[' ']')*
		('throws' qualifiedIdentifierList)?
		(methodBody | ';')
	;
	
voidMethodDeclaratorRest
	:	formalParameters ('throws' qualifiedIdentifierList)?
		(methodBody | ';')
	;
	
interfaceMethodDeclaratorRest
	:	formalParameters ('[' ']')* ('throws' qualifiedIdentifierList)? ';'
	;
	
interfaceGenericMethodDecl
	:	typeParameters (type | 'void') Identifier
		interfaceMethodDeclaratorRest
	;
	
voidInterfaceMethodDeclaratorRest
	:	formalParameters ('throws' qualifiedIdentifierList)? ';'
	;
	
constructorDeclaratorRest
	:	formalParameters ('throws' qualifiedIdentifierList)? methodBody
	;

constantDeclarator
	:	Identifier constantDeclaratorRest
	;
	
variableDeclarators
	:	variableDeclarator (',' variableDeclarator)*
	;

variableDeclaratorsRest
	:	variableDeclaratorRest (',' variableDeclarator)*
	;
	
variableDeclarator
	:	Identifier variableDeclaratorRest
	;
	
variableDeclaratorRest
	:	('[' ']')+ ('=' variableInitializer)?
	|	'=' variableInitializer
	|
	;

constantDeclaratorsRest
	:   constantDeclaratorRest (',' constantDeclarator)*
	;

constantDeclaratorRest
	:	('[' ']')* '=' variableInitializer
	;
	
variableDeclaratorId
	:	Identifier ('[' ']')*
	;

variableInitializer
	:	arrayInitializer
	|	expression
	;

arrayInitializer
	:	'{' (variableInitializer (',' variableInitializer)* (',')?)? '}'
	;

modifier
	:	annotation
	|	'public'
	|	'protected'
	|	'private'
	|	'static'
	|	'abstract'
	|	'final'
	|	'native'
	|	'synchronized'
	|	'transient'
	|	'volatile'
	|	'strictfp'
	;


packageOrTypeName
	:	Identifier ('.' Identifier)*
	;

enumConstantName
	:	Identifier
	;

typeName
	:	Identifier
	|	packageOrTypeName '.' Identifier
	;

type
	:	Identifier (typeArguments)? ('.' Identifier (typeArguments)?)* ('[' ']')*
	|	basicType ('[' ']')*
	;

basicType
	:	'byte'
	|	'short'
	|	'char'
	|	'int'
	|	'long'
	|	'float'
	|	'double'
	|	'boolean'
	;

variableModifier
	:	'final'
	|	annotation
	;

typeArguments
	:	'<' typeArgument (',' typeArgument)* '>'
	;
	
typeArgument
	:	type
	|	'?' (('extends' | 'super') type)?
	;
	
qualifiedIdentifierList
	:	qualifiedIdentifier (',' qualifiedIdentifier)*
	;
	
formalParameters
	:	'(' formalParameterDecls? ')'
	;

formalParameterDecls
	:	'final'? annotations? type formalParameterDeclsRest?
	;
	
formalParameterDeclsRest
	:	variableDeclaratorId (',' formalParameterDecls)?
	|	'...' variableDeclaratorId
	;
	
methodBody
	:	block
	;

qualifiedIdentifier
	:	Identifier ('.' Identifier)*
	;
	
literal	
	:	integerLiteral
	|	FloatingPointLiteral
	|	CharacterLiteral
	|	StringLiteral
	|	booleanLiteral
	|	'null'
	;

integerLiteral
	:	DecimalIntegerLiteral
	|	HexIntegerLiteral
	|	OctalIntegerLiteral
	;

booleanLiteral
	:	'true'
	|	'false'
	;

// ANNOTATIONS
annotations
	:	annotation+
	;

annotation
	:	'@' typeName ('(' (Identifier '=')? elementValue ')')?
	;
	
elementValue
	:	conditionalExpression
	|	annotation
	|	elementValueArrayInitializer
	;
	
elementValueArrayInitializer
	:	'{' (elementValue)? (',')? '}'
	;
	
annotationTypeDeclaration
	:	'@' 'interface' Identifier annotationTypeBody
	;
	
annotationTypeBody
	:	'{' (annotationTypeElementDeclarations)? '}'
	;
	
annotationTypeElementDeclarations
	:	(annotationTypeElementDeclaration) (annotationTypeElementDeclaration)*
	;

annotationTypeElementDeclaration
	:	(modifier)* annotationTypeElementRest
	;
	
annotationTypeElementRest
	:	type Identifier annotationMethodOrConstantRest ';'
	|	classDeclaration
	|	interfaceDeclaration
	|	enumDeclaration
	|	annotationTypeDeclaration
	;
	
annotationMethodOrConstantRest
	:	annotationMethodRest
	|	annotationConstantRest
	;
	
annotationMethodRest
 	:	'(' ')' (defaultValue)?
 	;
 	
annotationConstantRest
 	:	variableDeclarators
 	;
 	
defaultValue
 	:	'default' elementValue
 	;

// STATEMENTS / BLOCKS
block
	:	'{' blockStatements '}'
	;
	
blockStatements
	:	blockStatement*
	;
	
blockStatement
	:	localVariableDeclarationStatement
	|	classOrInterfaceDeclaration
	|	(Identifier ':')? statement
	;
	
localVariableDeclarationStatement
	:	('final')? type variableDeclarators ';'
	;
	
statement
	:	block
	|	'assert' expression (':' expression)? ';'
	|	'if' parExpression statement ('else' statement)?
	|	'for' '(' forControl ')' statement
	|	'while' parExpression statement
	|	'do' statement 'while' parExpression ';'
	|	'try' block
		(	catches 'finally' block
		|	catches
		|	'finally' block
		)
	|	'switch' parExpression '{' switchBlockStatementGroups '}'
	|	'synchronized' parExpression block
	|	'return' expression? ';'
	|	'throw' expression ';'
	|	'break' Identifier? ';'
	|	'continue' Identifier? ';'
	|	';'
	|	statementExpression ';'
	|	Identifier ':' statement
	;
	
catches
	:	catchClause (catchClause)*
	;
	
catchClause
	:	'catch' '(' formalParameter ')' block
	;

formalParameter
	:	variableModifier* type variableDeclaratorId
	;
	
switchBlockStatementGroups
	:	(switchBlockStatementGroup)*
	;
	
switchBlockStatementGroup
	:	switchLabel blockStatement*
	;
	
switchLabel
	:	'case' constantExpression ':'
	|	'case' enumConstantName ':'
	|	'default' ':'
	;
	
moreStatementExpressions
	:	(',' statementExpression)*
	;

forControl
	:	forVarControl
	|	forInit ';' expression? ';' forUpdate?
	;

forInit
	:	'final'? type variableDeclarators
	|	statementExpressionList
	;
	
forVarControl
	:	'final'? (annotation)? type Identifier forVarControlRest
	;

forVarControlRest
	:	variableDeclaratorsRest ';' expression? ';' forUpdate?
	|	':' expression
	;

forUpdate
	:	statementExpressionList
	;

// EXPRESSIONS
parExpression
	:	'(' expression ')'
	;

statementExpressionList
	:	statementExpression (',' statementExpression)*
	;
	
statementExpression
	:	expression
	;

constantExpression
	:	expression
	;
	
expression
	:	conditionalExpression (assignmentOperator expression)?
	;
	
assignmentOperator
	:	'='
	|	'+='
	|	'-='
	|	'*='
	|	'/='
	|	'&='
	|	'|='
	|	'^='
	|	'%='
	|	'<' '<' '='
	|	'>' '>' '='
	|	'>' '>' '>' '='
	;

conditionalExpression
	:	conditionalOrExpression ('?' expression ':' expression)?
	;

conditionalOrExpression
	:	conditionalAndExpression ('||' conditionalAndExpression)*
	;

conditionalAndExpression
	:	inclusiveOrExpression ('&&' inclusiveOrExpression)*
	;

inclusiveOrExpression
	:	exclusiveOrExpression ('|' exclusiveOrExpression)*
	;

exclusiveOrExpression
	:	andExpression ('^' andExpression)*
	;

andExpression
	:	equalityExpression ('&' equalityExpression)*
	;

equalityExpression
	:	instanceOfExpression (('==' | '!=') instanceOfExpression)*
	;

instanceOfExpression
	:	relationalExpression ('instanceof' type)?
	;

relationalExpression
	:	shiftExpression (relationalOp shiftExpression)*
	;
	
relationalOp
	:	('<' '=' | '>' '=' | '<' | '>')
	;

shiftExpression
	:	additiveExpression (shiftOp additiveExpression)*
	;

// TODO: need a sem pred to check column on these >>>
shiftOp
	:	('<' '<' | '>' '>' '>' | '>' '>')
	;

additiveExpression
	:	multiplicativeExpression (('+' | '-') multiplicativeExpression)*
	;

multiplicativeExpression
	:	unaryExpression (('*' | '/' | '%') unaryExpression)*
	;
	
unaryExpression
	:	'+' unaryExpression
	|	'-' unaryExpression
	|	'++' primary
	|	'--' primary
	|	unaryExpressionNotPlusMinus
	;

unaryExpressionNotPlusMinus
	:	'~' unaryExpression
	| 	'!' unaryExpression
	|	castExpression
	|	primary selector* ('++' | '--')?
	;

castExpression
	:	'(' basicType ')' unaryExpression
	|	'(' (expression | type) ')' unaryExpressionNotPlusMinus
	;

primary
	:	parExpression
	|	nonWildcardTypeArguments (explicitGenericInvocationSuffix | 'this' arguments)
	|	'this' (arguments)?
	|	'super' superSuffix
	|	literal
	|	'new' creator
	|	Identifier ('.' Identifier)* (identifierSuffix)?
	|	basicType ('[' ']')* '.' 'class'
	|	'void' '.' 'class'
	;

identifierSuffix
	:	('[' ']')+ '.' 'class'
	|	('[' expression ']')+ // can also be matched by selector, but do here
	|	arguments
	|	'.' 'class'
	|	'.' explicitGenericInvocation
	|	'.' 'this'
	|	'.' 'super' arguments
	|	'.' 'new' (nonWildcardTypeArguments)? innerCreator
	;
	
creator
	:	nonWildcardTypeArguments? createdName
		(arrayCreatorRest | classCreatorRest)
	;

createdName
	:	Identifier nonWildcardTypeArguments?
		('.' Identifier nonWildcardTypeArguments?)*
	|	basicType
	;
	
innerCreator
	:	Identifier classCreatorRest
	;

arrayCreatorRest
	:	'['
	      	(	']' ('[' ']')* arrayInitializer
		|	expression ']' ('[' expression ']')* ('[' ']')*
		)
	;

classCreatorRest
	:	arguments classBody?
	;
	
explicitGenericInvocation
	:	nonWildcardTypeArguments explicitGenericInvocationSuffix
	;
	
nonWildcardTypeArguments
	:	'<' typeList '>'
	;
	
explicitGenericInvocationSuffix
	:	'super' superSuffix
	|	Identifier arguments
	;
	
selector
	:	'.' Identifier (arguments)?
	|	'.' 'this'
	|	'.' 'super' superSuffix
	|	'.' 'new' (nonWildcardTypeArguments)? innerCreator
	|	'[' expression ']'
	;
	
superSuffix
	:	arguments
	|	'.' Identifier (arguments)?
	;

arguments
	:	'(' statementExpressionList? ')'
	;



// LEXER
HexIntegerLiteral
	:	'0' ('x' | 'X') HexDigit+ IntegerTypeSuffix?
	;

DecimalIntegerLiteral
	:	('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix?
	;

OctalIntegerLiteral
	:	'0' ('0'..'7')+ IntegerTypeSuffix?
	;

fragment
HexDigit
	:	('0'..'9' | 'a'..'f' | 'A'..'F')
	;

fragment
IntegerTypeSuffix
	:	('l' | 'L')
	;

FloatingPointLiteral
	:	('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
	|	'.' ('0'..'9')+ Exponent? FloatTypeSuffix?
	|	('0'..'9')+ Exponent FloatTypeSuffix?
	|	('0'..'9')+ Exponent? FloatTypeSuffix
	;

fragment
Exponent
	:	('e' | 'E') ('+' | '-')? ('0'..'9')+
	;

fragment
FloatTypeSuffix
	:	('f' | 'F' | 'd' | 'D')
	;

CharacterLiteral
	:	'\'' (EscapeSequence | ~('\'' | '\\')) '\''
	;

StringLiteral
	:	'"' (EscapeSequence | ~('\\' | '"'))* '"'
	;

fragment
EscapeSequence
	:	'\\' ('b' | 't' | 'n' | 'f' | 'r' | '\"' | '\'' | '\\')
	|	UnicodeEscape
	|	OctalEscape
	;

fragment
OctalEscape
	:	'\\' ('0'..'3') ('0'..'7') ('0'..'7')
	|	'\\' ('0'..'7') ('0'..'7')
	|	'\\' ('0'..'7')
	;

fragment
UnicodeEscape
	:	'\\' 'u' HexDigit HexDigit HexDigit HexDigit
	;

ENUM
	:	'enum' {if ( !enumIsKeyword ) type=Identifier;}
	;
	
Identifier 
	:	JavaLetter (JavaLetter | JavaLetterOrDigit)*
	;

// I found this char range in JavaCC's grammar, but Letter and Digit overlap.
// Still works, but...
fragment
JavaLetter
	:	'\u0024'
	|	'\u0041'..'\u005a'
	|	'\u005f'
	|	'\u0061'..'\u007a'
	|	'\u00c0'..'\u00d6'
	|	'\u00d8'..'\u00f6'
	|	'\u00f8'..'\u00ff'
	|	'\u0100'..'\u1fff'
	|	'\u3040'..'\u318f'
	|	'\u3300'..'\u337f'
	|	'\u3400'..'\u3d2d'
	|	'\u4e00'..'\u9fff'
	|	'\uf900'..'\ufaff'
	;

fragment
JavaLetterOrDigit
	:	'\u0030'..'\u0039'
	|	'\u0660'..'\u0669'
	|	'\u06f0'..'\u06f9'
	|	'\u0966'..'\u096f'
	|	'\u09e6'..'\u09ef'
	|	'\u0a66'..'\u0a6f'
	|	'\u0ae6'..'\u0aef'
	|	'\u0b66'..'\u0b6f'
	|	'\u0be7'..'\u0bef'
	|	'\u0c66'..'\u0c6f'
	|	'\u0ce6'..'\u0cef'
	|	'\u0d66'..'\u0d6f'
	|	'\u0e50'..'\u0e59'
	|	'\u0ed0'..'\u0ed9'
	|	'\u1040'..'\u1049'
	;

WS
	:	(' ' | '\r' | '\t' | '\u000C' | '\n') {channel=99;}
	;

Comment
	:	'/*' (options {greedy=false;} : . )* '*/' {channel=99;}
	;

EndOfLineComment
	:	'//' ~('\n' | '\r')* '\r'? '\n' {channel=99;}
	;


More information about the antlr-interest mailing list