[antlr-interest] newbie faces unexpected tokens

Mo m.axmed at gmail.com
Tue Jul 11 14:33:46 PDT 2006


Hi Scott,

On 7/11/06, Scott Amort <jsamort at gmail.com> wrote:
> Hi Mo,
>

> The following successfully parsed (ignoring whitespace):
>
> on foo{foo.bar.foo;}

umm, must  be the rest of my grammar then, but it's virtually all commented out!


> Perhaps if you post your full grammar file it may be easier to see what
> is happening.


anyhow here it is

-----------------------------------

class PicycleLexer extends Lexer;
	options {
		k = 3;
		exportVocab = PicycleScript;
		charVocabulary = '\3'..'\377';
		testLiterals = false;
	}

	// Whitespace -- ignored
	WS
	  : ( ' '
	    | '\t'
	    | '\f'
	
	    // handle newlines
	    | ( "\r\n"  // DOS/Windows
	      | '\r'    // Macintosh
	      | '\n'    // Unix
	      )
	      // increment the line count in the scanner
	      { newline(); }
	    )
	    { $setType(Token.SKIP); }
	  ;
	
	 // Single-line comments
	COMMENT
	  : ( ("//" | '#') (~('\n'|'\r'))* )
	  	
	    { $setType(Token.SKIP); }
	  ;
	
	// multiple-line comments
	ML_COMMENT
	  : "/*"	
		( /* '\r' '\n' can be matched in one alternative or by matching
	  	 '\r' in one iteration and '\n' in another. I am trying to
	  		 handle any flavor of newline that comes in, but the language
	   		that allows both "\r\n" and "\r" and "\n" to all be valid
	   		newline is ambiguous. Consequently, the resulting grammar
	   		must be ambiguous. I'm shutting this warning off.
		*/
	      options {
	        generateAmbigWarnings=false;
	      }
	      :  { LA(2)!='/' }? '*'
	      | '\r' '\n' {newline();}
	      | '\r' {newline();}
	      | '\n' {newline();}
	      | ~('*'|'\n'|'\r')
	    )*
	    "*/"
	    {$setType(Token.SKIP);}
	;

	PLUS	: '+';
	DASH	: '-' (NUMBER { $setType(NUMBER); })?;
	STAR	: '*';
	MOD		: '%';
	SLASH	: '/';
	LPREN	: '(';
	RPREN	: ')';
	LBRACE	: '{';
	RBRACE	: '}';
	COMMA	: ',';
	DOT		: '.';
	IS		: '=';
	EQ		: "==";
	NE		: "!=";
	GE		: ">=";
	LE		: "<=";
	GT		: '>';
	LT		: '<';
	NOT		: '!';
	AND     : "&&";
	OR		: "||";
	SEMI	: ';';
	COLON	: ':';
	ARROW	: "->";
	
	IDENT
	  options {testLiterals=true;}
	  : ('a'..'z'|'A'..'Z') ('a'..'z'|'A'..'Z'|'0'..'9')*
	  ;
	
	NUMBER options {testLiterals=true;}
	  : ('0'..'9')+ ('.' ('0'..'9')*)? | '.' ('0'..'9')+;

	STRING :   '"'!
	    ( '"' '"'!
	    )*
	    ( '"'!
	    | // nothing -- write error message
	    )
		;



class PicycleParser extends Parser;
	options {
		k = 3;
		buildAST = true;
		exportVocab = PicycleScript;
	}
	tokens {
		ROOT;
	}
	
	program :
			"policy"^ IDENT LBRACE! (policy )+ RBRACE!
	;
	
	policy
		:
		 event
		 | function
		 | assignmentStatement
		;

   	function
   		: "function"^  IDENT farguments
   			LBRACE!
   				(statement)*
   				(return_expr ) ?
   			RBRACE!

   		;
	
	farguments
		: LPREN^ (IDENT (COMMA! IDENT)*)?  RPREN! ;
 	
 	event
  	: "on"^ IDENT  LBRACE!
	  	symbol SEMI!
  	 /*  (assignmentStatement)*	  	
	  (	
	    (preconditions)*
	   	(action)*
	   	(postconditions)* 	
	    )  	*/
	   	RBRACE!
	;
	
  preconditions
  	:
  		"preconditions"^  LBRACE!
   	   	(statement )*	
		RBRACE!
	;

  action
  	: "action"^  LBRACE!
   	   	(statement )*	
  	   	RBRACE!
	;

  postconditions
  	: "postconditions"^  LBRACE!
   	   	(statement )*	
   	 RBRACE!
	;

  	
	statement
		:
		 assignmentStatement
		|  foreach
		| while_stmt
		| if_stmt
		| function_call SEMI!
		;
		

	assignmentStatement
	  	: 	
	  	symbol (IS^ expression ) SEMI!
	  		
	  	;

	
	while_stmt
		: "while"^ LPREN! logic RPREN! then_stmt ;	
	foreach
		: "foreach"^ LPREN!
			 (logic
			 	|
			 	{ #foreach = #(#foreach, #([NUMBER, "1"])); }
			 ) 	RPREN! then_stmt
		;

	if_stmt
		: "if"^ LPREN! logic RPREN! then_stmt (else_stmt)*		
		;		
		
	then_stmt
		: "then"^ LBRACE!   (statement)*  (return_expr )? RBRACE!
		;	

	else_stmt
		: "elif"^  LPREN! logic RPREN! then_stmt ( else_stmt)
		| "else"^ LBRACE!   (statement)* (return_expr )? RBRACE!
		;
		 	
	
	expression
		: expression_2 (PLUS^ expression_2 | DASH^ expression_2)*
		;

	expression_2
		: expression_3 (STAR^ expression_3 | SLASH^ expression_3 | MOD^
expression_3)*
		;

	expression_3
		: atom | LPREN! expression RPREN!
		;
		
	logic
		: logic_2
			(
			    AND^ logic_2
			  | OR^ logic_2
			)*
		;

	logic_2
		: logic_3
			(
				
			  GT^ logic_3
			  | LT^ logic_3
			  | GE^ logic_3
			  | EQ^ logic_3
			  | NE^ logic_3
			) *
		;
		
	logic_3
		: (NOT^)? logic_4
		;
		
	logic_4
		:( LPREN! logic) => RPREN! 	
		| expression
		;
		
	atom
		: NUMBER
		| STRING
		| symbol
		| function_call
		;
	
	symbol
		:  IDENT   (DOT^ IDENT )*
		;
	

	function_call 	
		:  (IDENT^ LPREN!
			(
				arguments
				| !
			 	{ #function_call = #(#function_call, #([IDENT, "null"])); }
				)
			RPREN!)
		;
	
	return_expr
		: "return" ^
			(
				atom
				| !
			 	{ #return_expr = #(#return_expr, #([IDENT, "null"])); }
			 	
			)
			SEMI!	
		;
	
	
	arguments
		:  (atom (COMMA! atom)*)
		;


---------------

cheers
 mo


More information about the antlr-interest mailing list