[antlr-interest] Line numbers

Morne Streicher mornes at mosaicsoftware.com
Sun Jan 19 21:39:15 PST 2003


Dear All

I'm pretty new to compiler theory and parser generators. I've managed to successfully defined a language (and implemented the interpreter as well) using ANTLR. :-)

What I'm doing to interpret the source language is to walk the Abstract Syntax Tree (AST) generated by ANTLR, and "evaluate" that. 

The AST generated by the parser however do not include the line and column number for the token it represents. I need that information to customize run-time error messages, i.e. include line numbers.

Has anybody got an (simple) example of how I could do that - extend the .g file to make it include line numbers in the AST class? I believe I'll have to extend the AST class, which is fine.

Below, a the grammer I'm using.

Thanks,
Morne.


------------------------------------------------------------------
class otsParser extends Parser;

options {
	buildAST = true;	// uses CommonAST by default
	k = 2;
	
	defaultErrorHandler = false;     // Don't generate parser error handlers
}

otscript
		:	(command)*
		;
		
command :   assignment_statement
			|
			expression SEMI!
			|
			IMPORT^ text SEMI!
			|
			IDENTIFIER PLUS_EQUAL_TO^ expression SEMI!
			|
			variable_decleration SEMI!
			|
			foreach_statement
			|
			function_decleration
			|
			return_statement
			|
			if_statement
		;
		
if_statement :	IF^ expression if_body (optional_else|empty) END!
		;
		
if_body : BEGIN^ otscript 
		;
		
optional_else : ELSE^ otscript
		;
			
assignment_statement : IDENTIFIER EQUAL_TO^ expression SEMI! ;

foreach_statement : FOREACH^ IDENTIFIER IN! expression BEGIN! otscript END!
		;
		
function_decleration : FUNCTION^ IDENTIFIER function_parameters BEGIN! otscript END!
		;
		
function_parameters : LPAREN^ identifier_list RPAREN! ;

return_statement : RETURN^ (expression | empty) SEMI!
		;

text :  PCDATA^;

////////////////////////////////////////////////////////////////////////////////////
// Expressions

expression : simple_expression ( empty | ((EQUAL^ | NOT_EQUAL^ | LT^ | LE^ | GE^ | GT^ ) simple_expression)) 
;

simple_expression : term ( (PLUS^ | MINUS^ | OR^) term )*  ;

term : factor ( (MULT^ | DIV^ | MOD^ | AND^ | WITH^) factor )* ;

factor : identifier | unsigned_constant | NULL | UNDEFINED | TRUE | FALSE | structure_decleration | list_decleration | 
			function_call | LPAREN^ expression RPAREN! ;

identifier : IDENTIFIER    ;

unsigned_constant : unsigned_number | text;

unsigned_number : INTEGER;

sign : PLUS | MINUS ;

variable_decleration	: VAR^ IDENTIFIER (COMMA! IDENTIFIER)*
		;

function_call : IDENTIFIER^ LPAREN! expression_list RPAREN!
		;

structure_decleration :
				LCURL^ id_equals_expr_list RCURL!;

id_equals_expr_list : (id_equals_expr_pair | empty) (COMMA! id_equals_expr_pair)*
				;
				
id_equals_expr_pair : 
				IDENTIFIER EQUAL_TO! expression
			;
			
list_decleration : 
			LBRACK^ expression_list RBRACK!
		;
		
expression_list : (expression | empty) (COMMA! expression)*
				;
				
identifier_list : (IDENTIFIER | empty) (COMMA! IDENTIFIER)*
				;

empty : /* empty */ ;

class otsLexer extends Lexer;

options {	
	k = 3;
	caseSensitive=false;
	charVocabulary = '\0'..'\377';
}

tokens {
  	OR 					= "OR";

	AND 				= "and";
	
	NULL				= "null";
	
	UNDEFINED			= "undefined";
	
	TRUE				= "true";
	
	FALSE				= "false";
	
	WITH				= "WITH";
	
	IMPORT				= "IMPORT";
	
	VAR					= "VAR";
	
	FOREACH				= "FOREACH";
	
	IN					= "IN";
	
	BEGIN				= "BEGIN";
	
	END					= "END";
	
	FUNCTION			= "FUNCTION";
	
	RETURN				= "return";
	
	IF					= "IF";
	
	ELSE				= "ELSE";
}


WS      : ( ' '
		|	'\t'
		|	'\f'
		// handle newlines
		|	(	"\r\n"  // Evil DOS
			|	'\r'    // Macintosh
			|	'\n'    // Unix (the right way)
			)
			{ newline(); }
		)
		{ _ttype = Token.SKIP; }
	;
	
SL_COMMENT : 
	"//" 
	(~'\n')* '\n'
	{ _ttype = Token.SKIP; newline(); }
	;
	
ML_COMMENT
	:	"/*"
		(	{ LA(2)!='/' }? '*'
		|	'\n' { newline(); }
		|	~('*'|'\n')
		)*
		"*/"
			{ $setType(Token.SKIP); }
	;

POINT : ".";

COMMA : ",";

SEMI : ";";


LPAREN	:	'('	
		;		
RPAREN	:	')'
		;
		
LCURL	:	'{'	
		;		
RCURL	:	'}'
		;
LBRACK	:	'['	
		;		
RBRACK	:	']'
		;	

		

PLUS	:	'+' ;
MINUS 	:   '-';		
MULT    :   '*';
DIV     :   '/';



QUOTE	: '"';

EQUAL 		: "==";
NOT_EQUAL 		: "!=";
EQUAL_TO 	: "=";
PLUS_EQUAL_TO 	: "+=";
//NOT_EQUAL  	: "<>";
LT 			: '<';
LE 			: "<=";
GE 			: ">=";
GT 			: '>';

MOD : "%";



INTEGER  
	//options {testLiterals=true;}
	:	'0'..'9' ('0'..'9')*
	;

IDENTIFIER  
	options {testLiterals=true;}
	:	('a'..'z'|'_') ('a'..'z'|'0'..'9'|'_'|'.')*
	;

// string literals
PCDATA
	: '"'! ( ~('"'))* '"'!
	;

 

Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ 



More information about the antlr-interest mailing list